Переглянути джерело

fix: avoid unnecessary ray import warnings

AlpinDale 6 місяців тому
батько
коміт
5240c0da23

+ 7 - 2
aphrodite/common/config.py

@@ -762,11 +762,13 @@ class ParallelConfig:
 
             from aphrodite.executor import ray_utils
             backend = "mp"
-            ray_found = ray_utils.ray is not None
+            ray_found = ray_utils.ray_is_available()
             if cuda_device_count_stateless() < self.world_size:
                 if not ray_found:
                     raise ValueError("Unable to load Ray which is "
-                                     "required for multi-node inference")
+                                     "required for multi-node inference, "
+                                     "please install Ray with `pip install "
+                                     "ray`.") from ray_utils.ray_import_err
                 backend = "ray"
             elif ray_found:
                 if self.placement_group:
@@ -799,6 +801,9 @@ class ParallelConfig:
             raise ValueError(
                 "Unrecognized distributed executor backend. Supported values "
                 "are 'ray' or 'mp'.")
+        if self.distributed_executor_backend == "ray":
+            from aphrodite.executor import ray_utils
+            ray_utils.assert_ray_available()
         if not self.disable_custom_all_reduce and self.world_size > 1:
             if is_hip():
                 self.disable_custom_all_reduce = True

+ 5 - 0
aphrodite/engine/async_aphrodite.py

@@ -376,6 +376,11 @@ class AsyncAphrodite:
         """Creates an async LLM engine from the engine arguments."""
         # Create the engine configs.
         engine_config = engine_args.create_engine_config()
+
+        if engine_args.engine_use_ray:
+            from aphrodite.executor import ray_utils
+            ray_utils.assert_ray_available()
+
         distributed_executor_backend = (
             engine_config.parallel_config.distributed_executor_backend)
 

+ 16 - 5
aphrodite/executor/ray_utils.py

@@ -39,11 +39,26 @@ try:
             output = pickle.dumps(output)
             return output
 
+    ray_import_err = None
+
 except ImportError as e:
     ray = None  # type: ignore
+    ray_import_err = e
     RayWorkerWrapper = None  # type: ignore
 
 
+def ray_is_available() -> bool:
+    """Returns True if Ray is available."""
+    return ray is not None
+
+
+def assert_ray_available():
+    """Raise an exception if Ray is not available."""
+    if ray is None:
+        raise ValueError("Failed to import Ray, please install Ray with "
+                         "`pip install ray`.") from ray_import_err
+
+
 def initialize_ray_cluster(
     parallel_config: ParallelConfig,
     ray_address: Optional[str] = None,
@@ -59,11 +74,7 @@ def initialize_ray_cluster(
         ray_address: The address of the Ray cluster. If None, uses
             the default Ray cluster address.
     """
-    if ray is None:
-        raise ImportError(
-            "Ray is not installed. Please install Ray to use multi-node "
-            "serving. You can install Ray by running "
-            "`pip install aphrodite-engine[\"ray\"]`.")
+    assert_ray_available()
 
     # Connect to a ray cluster.
     if is_hip() or is_xpu():