ray_tools.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. from typing import Optional, List, Tuple, TYPE_CHECKING
  2. from loguru import logger
  3. from aphrodite.common.config import ParallelConfig
  4. from aphrodite.common.utils import is_hip, set_cuda_visible_devices, get_ip
  5. try:
  6. import ray
  7. class RayWorkerAphrodite:
  8. """Ray wrapper for aphrodite.task_handler.Worker, allowing Worker to be
  9. lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
  10. def __init__(self, init_cached_hf_modules=False) -> None:
  11. if init_cached_hf_modules:
  12. # pylint: disable=import-outside-toplevel
  13. from transformers.dynamic_module_utils import init_hf_modules
  14. init_hf_modules()
  15. self.worker = None
  16. def init_worker(self, worker_init_fn):
  17. self.worker = worker_init_fn()
  18. def __getattr__(self, name):
  19. return getattr(self.worker, name)
  20. def execute_method(self, method, *args, **kwargs):
  21. executor = getattr(self, method)
  22. return executor(*args, **kwargs)
  23. def get_node_ip(self) -> str:
  24. return get_ip()
  25. def get_node_and_gpu_ids(self) -> Tuple[str, List[int]]:
  26. node_id = ray.get_runtime_context().get_node_id()
  27. gpu_ids = ray.get_gpu_ids()
  28. return node_id, gpu_ids
  29. def set_cuda_visible_devices(self, device_ids) -> None:
  30. set_cuda_visible_devices(device_ids)
  31. except ImportError as e:
  32. logger.warning(f"Failed to import Ray with {e!r}. "
  33. "For distributed inference, please install Ray with "
  34. "`pip install ray`.")
  35. ray = None
  36. RayWorkerAphrodite = None
  37. if TYPE_CHECKING:
  38. from ray.util.placement_group import PlacementGroup
  39. def initialize_cluster(
  40. parallel_config: ParallelConfig,
  41. engine_use_ray: bool = False,
  42. ray_address: Optional[str] = None,
  43. ) -> Optional["PlacementGroup"]:
  44. """Initialize the distributed cluster probably with Ray.
  45. Args:
  46. parallel_config: The configurations for parallel execution.
  47. engine_use_ray: Whether to use Ray for async engine.
  48. ray_address: The address of the Ray cluster. If None, uses
  49. the default Ray cluster address.
  50. Returns:
  51. A tuple of (`distributed_init_method`, `placement_group`). The
  52. `distributed_init_method` is the address for initializing the
  53. distributed backend. `placement_group` includes the specification
  54. of the resources for each distributed worker.
  55. """
  56. if parallel_config.worker_use_ray or engine_use_ray:
  57. if ray is None:
  58. raise ImportError(
  59. "Ray is not installed. Please install Ray to use distributed "
  60. "serving.")
  61. # Connect to a ray cluster.
  62. if is_hip():
  63. ray.init(address=ray_address,
  64. ignore_reinit_error=True,
  65. num_gpus=parallel_config.world_size)
  66. else:
  67. ray.init(address=ray_address, ignore_reinit_error=True)
  68. if not parallel_config.worker_use_ray:
  69. assert parallel_config.world_size == 1, (
  70. "Ray is required if parallel_config.world_size > 1.")
  71. return None
  72. # Create placement group for worker processes
  73. current_placement_group = ray.util.get_current_placement_group()
  74. if current_placement_group:
  75. # We are in a placement group
  76. bundles = current_placement_group.bundle_specs
  77. # Verify that we can use the placement group.
  78. gpu_bundles = 0
  79. for bundle in bundles:
  80. bundle_gpus = bundle.get("GPU", 0)
  81. if bundle_gpus > 1:
  82. raise ValueError(
  83. "Placement group bundle cannot have more than 1 GPU.")
  84. if bundle_gpus:
  85. gpu_bundles += 1
  86. if parallel_config.world_size > gpu_bundles:
  87. raise ValueError(
  88. "The number of required GPUs exceeds the total number of "
  89. "available GPUs in the placement group.")
  90. else:
  91. num_gpus_in_cluster = ray.cluster_resources().get("GPU", 0)
  92. if parallel_config.world_size > num_gpus_in_cluster:
  93. raise ValueError(
  94. "The number of required GPUs exceeds the total number of "
  95. "available GPUs in the cluster.")
  96. # Create a new placement group
  97. placement_group_specs = ([{"GPU": 1}] * parallel_config.world_size)
  98. current_placement_group = ray.util.placement_group(
  99. placement_group_specs)
  100. # Wait until PG is ready - this will block until all
  101. # requested resources are available, and will timeout
  102. # if they cannot be provisioned.
  103. ray.get(current_placement_group.ready(), timeout=1800)
  104. return current_placement_group