|
- import os
- import tempfile
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
- if TYPE_CHECKING:
- APHRODITE_HOST_IP: str = ""
- APHRODITE_PORT: Optional[int] = None
- APHRODITE_RPC_BASE_PATH: str = tempfile.gettempdir()
- APHRODITE_USE_MODELSCOPE: bool = False
- APHRODITE_RINGBUFFER_WARNING_INTERVAL: int = 60
- APHRODITE_INSTANCE_ID: Optional[str] = None
- APHRODITE_NCCL_SO_PATH: Optional[str] = None
- LD_LIBRARY_PATH: Optional[str] = None
- APHRODITE_USE_TRITON_FLASH_ATTN: bool = False
- LOCAL_RANK: int = 0
- CUDA_VISIBLE_DEVICES: Optional[str] = None
- APHRODITE_ENGINE_ITERATION_TIMEOUT_S: int = 60
- APHRODITE_API_KEY: Optional[str] = None
- APHRODITE_ADMIN_KEY: Optional[str] = None
- S3_ACCESS_KEY_ID: Optional[str] = None
- S3_SECRET_ACCESS_KEY: Optional[str] = None
- S3_ENDPOINT_URL: Optional[str] = None
- APHRODITE_CACHE_ROOT: str = os.path.expanduser("~/.cache/aphrodite")
- APHRODITE_CONFIG_ROOT: str = os.path.expanduser("~/.config/aphrodite")
- APHRODITE_CONFIGURE_LOGGING: int = 1
- APHRODITE_LOGGING_LEVEL: str = "INFO"
- APHRODITE_LOGGING_CONFIG_PATH: Optional[str] = None
- APHRODITE_TRACE_FUNCTION: int = 0
- APHRODITE_ATTENTION_BACKEND: Optional[str] = None
- APHRODITE_USE_SAMPLING_KERNELS: bool = False
- APHRODITE_PP_LAYER_PARTITION: Optional[str] = None
- APHRODITE_CPU_KVCACHE_SPACE: int = 0
- APHRODITE_CPU_OMP_THREADS_BIND: str = ""
- APHRODITE_OPENVINO_KVCACHE_SPACE: int = 0
- APHRODITE_OPENVINO_CPU_KV_CACHE_PRECISION: Optional[str] = None
- APHRODITE_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: bool = False
- APHRODITE_XLA_CACHE_PATH: str = os.path.join(APHRODITE_CACHE_ROOT, "xla_cache")
- APHRODITE_FUSED_MOE_CHUNK_SIZE: int = 64 * 1024
- APHRODITE_USE_RAY_SPMD_WORKER: bool = False
- APHRODITE_USE_RAY_COMPILED_DAG: bool = False
- APHRODITE_USE_RAY_COMPILED_DAG_NCCL_CHANNEL: bool = True
- APHRODITE_WORKER_MULTIPROC_METHOD: str = "fork"
- APHRODITE_ASSETS_CACHE: str = os.path.join(APHRODITE_CACHE_ROOT, "assets")
- APHRODITE_IMAGE_FETCH_TIMEOUT: int = 5
- APHRODITE_AUDIO_FETCH_TIMEOUT: int = 5
- APHRODITE_TARGET_DEVICE: str = "cuda"
- MAX_JOBS: Optional[str] = None
- NVCC_THREADS: Optional[str] = None
- APHRODITE_USE_PRECOMPILED: bool = False
- APHRODITE_NO_DEPRECATION_WARNING: bool = False
- APHRODITE_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
- CMAKE_BUILD_TYPE: Optional[str] = None
- VERBOSE: bool = False
- APHRODITE_DYNAMIC_ROPE_SCALING: bool = False
- APHRODITE_TEST_FORCE_FP8_MARLIN: bool = False
- APHRODITE_PLUGINS: Optional[List[str]] = None
- APHRODITE_RPC_TIMEOUT: int = 5000
- APHRODITE_FORCE_SINGLE_USER_PREFIX_CACHE: bool = False
- APHRODITE_TEST_DYNAMO_GRAPH_CAPTURE: int = 0
- APHRODITE_TEST_DYNAMO_FULLGRAPH_CAPTURE: int = 0
- APHRODITE_USE_TRITON_AWQ: bool = False
- APHRODITE_DYNAMO_USE_CUSTOM_DISPATCHER: bool = False
- APHRODITE_USE_TRITON_LAYERNORM: bool = False
- def get_default_cache_root():
- return os.getenv(
- "XDG_CACHE_HOME",
- os.path.join(os.path.expanduser("~"), ".cache"),
- )
- def get_default_config_root():
- return os.getenv(
- "XDG_CONFIG_HOME",
- os.path.join(os.path.expanduser("~"), ".config"),
- )
- environment_variables: Dict[str, Callable[[], Any]] = {
-
-
-
- "APHRODITE_TARGET_DEVICE":
- lambda: os.getenv("APHRODITE_TARGET_DEVICE", "cuda"),
-
-
- "MAX_JOBS":
- lambda: os.getenv("MAX_JOBS", None),
-
-
-
- "NVCC_THREADS":
- lambda: os.getenv("NVCC_THREADS", None),
-
- "APHRODITE_USE_PRECOMPILED":
- lambda: bool(os.environ.get("APHRODITE_USE_PRECOMPILED")),
-
-
-
- "CMAKE_BUILD_TYPE":
- lambda: os.getenv("CMAKE_BUILD_TYPE"),
-
- "VERBOSE":
- lambda: bool(int(os.getenv('VERBOSE', '0'))),
-
-
-
-
-
- "APHRODITE_CONFIG_ROOT":
- lambda: os.path.expanduser(
- os.getenv(
- "APHRODITE_CONFIG_ROOT",
- os.path.join(get_default_config_root(), "aphrodite"),
- )),
-
-
-
- "APHRODITE_CACHE_ROOT":
- lambda: os.path.expanduser(
- os.getenv(
- "APHRODITE_CACHE_ROOT",
- os.path.join(get_default_cache_root(), "aphrodite"),
- )),
-
-
-
-
- 'APHRODITE_HOST_IP':
- lambda: os.getenv('APHRODITE_HOST_IP', "") or os.getenv("HOST_IP", ""),
-
-
-
-
-
- 'APHRODITE_PORT':
- lambda: int(os.getenv('APHRODITE_PORT', '0'))
- if 'APHRODITE_PORT' in os.environ else None,
-
-
- 'APHRODITE_RPC_BASE_PATH':
- lambda: os.getenv('APHRODITE_RPC_BASE_PATH', tempfile.gettempdir()),
-
-
- "APHRODITE_USE_MODELSCOPE":
- lambda: os.environ.get(
- "APHRODITE_USE_MODELSCOPE", "False").lower() == "true",
-
-
- "APHRODITE_INSTANCE_ID":
- lambda: os.environ.get("APHRODITE_INSTANCE_ID", None),
-
- "APHRODITE_RINGBUFFER_WARNING_INTERVAL":
- lambda: int(os.environ.get("APHRODITE_RINGBUFFER_WARNING_INTERVAL", "60")),
-
-
- "CUDA_HOME":
- lambda: os.environ.get("CUDA_HOME", None),
-
-
- "APHRODITE_NCCL_SO_PATH":
- lambda: os.environ.get("APHRODITE_NCCL_SO_PATH", None),
-
-
- "LD_LIBRARY_PATH":
- lambda: os.environ.get("LD_LIBRARY_PATH", None),
-
- "APHRODITE_USE_TRITON_FLASH_ATTN":
- lambda: (os.environ.get(
- "APHRODITE_USE_TRITON_FLASH_ATTN", "True").lower() in ("true", "1")),
-
- "APHRODITE_TEST_DYNAMO_GRAPH_CAPTURE":
- lambda: int(os.environ.get("APHRODITE_TEST_DYNAMO_GRAPH_CAPTURE", "0")),
- "APHRODITE_DYNAMO_USE_CUSTOM_DISPATCHER":
- lambda:
- (os.environ.get("APHRODITE_DYNAMO_USE_CUSTOM_DISPATCHER", "True").lower() in
- ("true", "1")),
-
- "APHRODITE_TEST_DYNAMO_FULLGRAPH_CAPTURE":
- lambda: bool(
- os.environ.get("APHRODITE_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
-
-
- "LOCAL_RANK":
- lambda: int(os.environ.get("LOCAL_RANK", "0")),
-
- "CUDA_VISIBLE_DEVICES":
- lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None),
-
- "APHRODITE_ENGINE_ITERATION_TIMEOUT_S":
- lambda: int(os.environ.get("APHRODITE_ENGINE_ITERATION_TIMEOUT_S", "60")),
-
- "APHRODITE_API_KEY":
- lambda: os.environ.get("APHRODITE_API_KEY", None),
-
- "APHRODITE_ADMIN_KEY":
- lambda: os.environ.get("APHRODITE_ADMIN_KEY", None),
-
- "S3_ACCESS_KEY_ID":
- lambda: os.environ.get("S3_ACCESS_KEY_ID", None),
- "S3_SECRET_ACCESS_KEY":
- lambda: os.environ.get("S3_SECRET_ACCESS_KEY", None),
- "S3_ENDPOINT_URL":
- lambda: os.environ.get("S3_ENDPOINT_URL", None),
-
-
-
-
-
- "APHRODITE_CONFIGURE_LOGGING":
- lambda: int(os.getenv("APHRODITE_CONFIGURE_LOGGING", "1")),
- "APHRODITE_LOGGING_CONFIG_PATH":
- lambda: os.getenv("APHRODITE_LOGGING_CONFIG_PATH"),
-
- "APHRODITE_LOGGING_LEVEL":
- lambda: os.getenv("APHRODITE_LOGGING_LEVEL", "INFO"),
-
-
-
- "APHRODITE_TRACE_FUNCTION":
- lambda: int(os.getenv("APHRODITE_TRACE_FUNCTION", "0")),
-
-
-
-
-
-
-
- "APHRODITE_ATTENTION_BACKEND":
- lambda: os.getenv("APHRODITE_ATTENTION_BACKEND", None),
-
- "APHRODITE_USE_SAMPLING_KERNELS":
- lambda: bool(int(os.getenv("APHRODITE_USE_SAMPLING_KERNELS", "0"))),
-
- "APHRODITE_PP_LAYER_PARTITION":
- lambda: os.getenv("APHRODITE_PP_LAYER_PARTITION", None),
-
-
- "APHRODITE_CPU_KVCACHE_SPACE":
- lambda: int(os.getenv("APHRODITE_CPU_KVCACHE_SPACE", "0")),
-
-
- "APHRODITE_CPU_OMP_THREADS_BIND":
- lambda: os.getenv("APHRODITE_CPU_OMP_THREADS_BIND", "all"),
-
-
- "APHRODITE_OPENVINO_KVCACHE_SPACE":
- lambda: int(os.getenv("APHRODITE_OPENVINO_KVCACHE_SPACE", "0")),
-
-
-
- "APHRODITE_OPENVINO_CPU_KV_CACHE_PRECISION":
- lambda: os.getenv("APHRODITE_OPENVINO_CPU_KV_CACHE_PRECISION", None),
-
-
- "APHRODITE_OPENVINO_ENABLE_QUANTIZED_WEIGHTS":
- lambda: bool(os.getenv(
- "APHRODITE_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", False)),
-
-
-
-
- "APHRODITE_USE_RAY_SPMD_WORKER":
- lambda: bool(int(os.getenv("APHRODITE_USE_RAY_SPMD_WORKER", "0"))),
-
-
-
- "APHRODITE_USE_RAY_COMPILED_DAG":
- lambda: bool(int(os.getenv("APHRODITE_USE_RAY_COMPILED_DAG", "0"))),
-
-
-
- "APHRODITE_USE_RAY_COMPILED_DAG_NCCL_CHANNEL":
- lambda: bool(int(
- os.getenv("APHRODITE_USE_RAY_COMPILED_DAG_NCCL_CHANNEL", "1"))),
-
-
- "APHRODITE_WORKER_MULTIPROC_METHOD":
- lambda: os.getenv("APHRODITE_WORKER_MULTIPROC_METHOD", "fork"),
-
- "APHRODITE_ASSETS_CACHE":
- lambda: os.path.expanduser(
- os.getenv(
- "APHRODITE_ASSETS_CACHE",
- os.path.join(get_default_cache_root(), "aphrodite", "assets"),
- )),
-
-
- "APHRODITE_IMAGE_FETCH_TIMEOUT":
- lambda: int(os.getenv("APHRODITE_IMAGE_FETCH_TIMEOUT", "5")),
-
-
- "APHRODITE_AUDIO_FETCH_TIMEOUT":
- lambda: int(os.getenv("APHRODITE_AUDIO_FETCH_TIMEOUT", "5")),
-
-
- "APHRODITE_XLA_CACHE_PATH":
- lambda: os.path.expanduser(
- os.getenv(
- "APHRODITE_XLA_CACHE_PATH",
- os.path.join(get_default_cache_root(), "aphrodite", "xla_cache"),
- )),
- "APHRODITE_FUSED_MOE_CHUNK_SIZE":
- lambda: int(os.getenv("APHRODITE_FUSED_MOE_CHUNK_SIZE", "65536")),
-
- "APHRODITE_NO_DEPRECATION_WARNING":
- lambda: bool(int(os.getenv("APHRODITE_NO_DEPRECATION_WARNING", "0"))),
-
-
- "APHRODITE_KEEP_ALIVE_ON_ENGINE_DEATH":
- lambda: bool(os.getenv("APHRODITE_KEEP_ALIVE_ON_ENGINE_DEATH", 0)),
-
-
-
-
- "APHRODITE_DYNAMIC_ROPE_SCALING":
- lambda:
- (os.environ.get(
- "APHRODITE_DYNAMIC_ROPE_SCALING",
- "0").strip().lower() in ("1", "true")),
-
-
- "APHRODITE_TEST_FORCE_FP8_MARLIN":
- lambda:
- (os.environ.get("APHRODITE_TEST_FORCE_FP8_MARLIN", "0").strip().lower() in
- ("1", "true")),
-
-
- "APHRODITE_RPC_TIMEOUT":
- lambda: int(os.getenv("APHRODITE_RPC_TIMEOUT", "5000")),
-
-
-
- "APHRODITE_PLUGINS":
- lambda: None if "APHRODITE_PLUGINS" not in os.environ else os.environ[
- "APHRODITE_PLUGINS"].split(","),
-
- "APHRODITE_FORCE_SINGLE_USER_PREFIX_CACHE":
- lambda: bool(int(os.getenv("APHRODITE_FORCE_SINGLE_USER_PREFIX_CACHE",
- "0"))),
-
- "APHRODITE_USE_TRITON_AWQ":
- lambda: bool(int(os.getenv("APHRODITE_USE_TRITON_AWQ", "0"))),
-
- "APHRODITE_USE_TRITON_LAYERNORM":
- lambda: bool(int(os.getenv("APHRODITE_USE_TRITON_LAYERNORM", "0"))),
- }
- def __getattr__(name: str):
-
- if name in environment_variables:
- return environment_variables[name]()
- raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
- def __dir__():
- return list(environment_variables.keys())
|