communication_op.py 1.2 KB

1234567891011121314151617181920212223242526272829303132
  1. from typing import Any, Dict, Optional, Union
  2. import torch
  3. import torch.distributed
  4. from .parallel_state import get_tp_group
  5. def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
  6. """All-reduce the input tensor across model parallel group."""
  7. return get_tp_group().all_reduce(input_)
  8. def tensor_model_parallel_all_gather(input_: torch.Tensor,
  9. dim: int = -1) -> torch.Tensor:
  10. """All-gather the input tensor across model parallel group."""
  11. return get_tp_group().all_gather(input_, dim)
  12. def tensor_model_parallel_gather(input_: torch.Tensor,
  13. dst: int = 0,
  14. dim: int = -1) -> Optional[torch.Tensor]:
  15. """Gather the input tensor across model parallel group."""
  16. return get_tp_group().gather(input_, dst, dim)
  17. def broadcast_tensor_dict(tensor_dict: Optional[Dict[Any, Union[torch.Tensor,
  18. Any]]] = None,
  19. src: int = 0):
  20. if not torch.distributed.is_initialized():
  21. return tensor_dict
  22. return get_tp_group().broadcast_tensor_dict(tensor_dict, src)