ray_utils.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. from typing import List, Optional, Tuple
  2. from aphrodite.common.config import ParallelConfig
  3. from aphrodite.common.sequence import ExecuteModelRequest
  4. from aphrodite.common.utils import get_ip, is_hip, is_xpu
  5. from aphrodite.task_handler.worker_base import WorkerWrapperBase
  6. try:
  7. import ray
  8. class RayWorkerWrapper(WorkerWrapperBase):
  9. """Ray wrapper for aphrodite.task_handler.Worker, allowing Worker to be
  10. lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
  11. def __init__(self, *args, **kwargs) -> None:
  12. super().__init__(*args, **kwargs)
  13. # Since the compiled DAG runs a main execution
  14. # in a different thread that calls cuda.set_device.
  15. # The flag indicates is set_device is called on
  16. # that thread.
  17. self.compiled_dag_cuda_device_set = False
  18. def get_node_ip(self) -> str:
  19. return get_ip()
  20. def get_node_and_gpu_ids(self) -> Tuple[str, List[int]]:
  21. node_id = ray.get_runtime_context().get_node_id()
  22. gpu_ids = ray.get_gpu_ids()
  23. return node_id, gpu_ids
  24. def execute_model_spmd(self, execute_model_req: ExecuteModelRequest):
  25. """Used only when SPMD worker and compiled DAG are both
  26. enabled."""
  27. # TODO: This is needed right now because Ray DAG executes
  28. # on a background thread, so we need to reset torch's current
  29. # device.
  30. import torch
  31. if not self.compiled_dag_cuda_device_set:
  32. torch.cuda.set_device(self.worker.device)
  33. self.compiled_dag_cuda_device_set = True
  34. return self.worker._execute_model_spmd(execute_model_req)
  35. ray_import_err = None
  36. except ImportError as e:
  37. ray = None # type: ignore
  38. ray_import_err = e
  39. RayWorkerWrapper = None # type: ignore
  40. def ray_is_available() -> bool:
  41. """Returns True if Ray is available."""
  42. return ray is not None
  43. def assert_ray_available():
  44. """Raise an exception if Ray is not available."""
  45. if ray is None:
  46. raise ValueError("Failed to import Ray, please install Ray with "
  47. "`pip install ray`.") from ray_import_err
  48. def initialize_ray_cluster(
  49. parallel_config: ParallelConfig,
  50. ray_address: Optional[str] = None,
  51. ):
  52. """Initialize the distributed cluster with Ray.
  53. it will connect to the Ray cluster and create a placement group
  54. for the workers, which includes the specification of the resources
  55. for each distributed worker.
  56. Args:
  57. parallel_config: The configurations for parallel execution.
  58. ray_address: The address of the Ray cluster. If None, uses
  59. the default Ray cluster address.
  60. """
  61. assert_ray_available()
  62. # Connect to a ray cluster.
  63. if is_hip() or is_xpu():
  64. ray.init(address=ray_address,
  65. ignore_reinit_error=True,
  66. num_gpus=parallel_config.world_size)
  67. else:
  68. ray.init(address=ray_address, ignore_reinit_error=True)
  69. if parallel_config.placement_group:
  70. # Placement group is already set.
  71. return
  72. # Create placement group for worker processes
  73. current_placement_group = ray.util.get_current_placement_group()
  74. if current_placement_group:
  75. # We are in a placement group
  76. bundles = current_placement_group.bundle_specs
  77. # Verify that we can use the placement group.
  78. gpu_bundles = 0
  79. for bundle in bundles:
  80. bundle_gpus = bundle.get("GPU", 0)
  81. if bundle_gpus > 1:
  82. raise ValueError(
  83. "Placement group bundle cannot have more than 1 GPU.")
  84. if bundle_gpus:
  85. gpu_bundles += 1
  86. if parallel_config.world_size > gpu_bundles:
  87. raise ValueError(
  88. "The number of required GPUs exceeds the total number of "
  89. "available GPUs in the placement group.")
  90. else:
  91. num_gpus_in_cluster = ray.cluster_resources().get("GPU", 0)
  92. if parallel_config.world_size > num_gpus_in_cluster:
  93. raise ValueError(
  94. "The number of required GPUs exceeds the total number of "
  95. "available GPUs in the cluster.")
  96. # Create a new placement group
  97. placement_group_specs = ([{"GPU": 1}] * parallel_config.world_size)
  98. current_placement_group = ray.util.placement_group(
  99. placement_group_specs)
  100. # Wait until PG is ready - this will block until all
  101. # requested resources are available, and will timeout
  102. # if they cannot be provisioned.
  103. ray.get(current_placement_group.ready(), timeout=1800)
  104. # Set the placement group in the parallel config
  105. parallel_config.placement_group = current_placement_group