7 mēneši atpakaļ · 323fe23b21
--- a/aphrodite/executor/multiproc_gpu_executor.py
+++ b/aphrodite/executor/multiproc_gpu_executor.py
@@ -6,8 +6,8 @@ from typing import Any, List, Optional
 
															 from aphrodite.common.sequence import ExecuteModelRequest, SamplerOutput
														
 
															 from aphrodite.common.utils import (cuda_device_count_stateless,
														
 
															                                     get_aphrodite_instance_id,
														
 
															-                                    get_distributed_init_method, get_ip,
														
 
															-                                    get_open_port, make_async)
														
 
															+                                    get_distributed_init_method, get_open_port,
														
 
															+                                    make_async)
														
 
															 from aphrodite.executor.distributed_gpu_executor import (  # yapf: disable
														
 
															     DistributedGPUExecutor, DistributedGPUExecutorAsync)
														
 
															 from aphrodite.executor.multiproc_worker_utils import (ProcessWorkerWrapper,
														
@@ -36,8 +36,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
 
															         assert world_size <= cuda_device_count_stateless(), (
														
 
															             "please set tensor_parallel_size to less than max local gpu count")
														
 
															+        # Multiprocessing-based executor does not support multi-node setting.
														
 
															+        # Since it only works for single node, we can use the loopback address
														
 
															+        # 127.0.0.1 for communication.
														
 
															         distributed_init_method = get_distributed_init_method(
														
 
															-            get_ip(), get_open_port())
														
 
															+            "127.0.0.1", get_open_port())
														
 
															         if world_size == 1:
														
 
															             self.workers = []
														
--- a/aphrodite/executor/ray_gpu_executor.py
+++ b/aphrodite/executor/ray_gpu_executor.py
@@ -158,6 +158,16 @@ class RayGPUExecutor(DistributedGPUExecutor):
 
															         self._run_workers("update_environment_variables",
														
 
															                           all_args=all_args_to_update_environment_variables)
														
 
															+        if len(node_gpus) == 1:
														
 
															+            # in single node case, we don't need to get the IP address.
														
 
															+            # the loopback address is sufficient
														
 
															+            # NOTE: a node may have several IP addresses, one for each
														
 
															+            # network interface. `get_ip()` might return any of them,
														
 
															+            # while they might not work for communication inside the node
														
 
															+            # if the network setup is complicated. Using the loopback address
														
 
															+            # solves this issue, as it always works for communication inside
														
 
															+            # the node.
														
 
															+            driver_ip = "127.0.0.1"
														
 
															         distributed_init_method = get_distributed_init_method(
														
 
															             driver_ip, get_open_port())