il y a 7 mois · 6b1fdd07bd
--- a/aphrodite/adapter_commons/models.py
+++ b/aphrodite/adapter_commons/models.py
@@ -1,8 +1,8 @@
 
				 from abc import ABC, abstractmethod
			
 
				 from typing import Any, Callable, Dict, Hashable, Optional, TypeVar
			
 
				 
			
 
				-from torch import nn
			
 
				 from loguru import logger
			
 
				+from torch import nn
			
 
				 
			
 
				 from aphrodite.common.utils import LRUCache
			
 
				 
			
--- a/aphrodite/attention/__init__.py
+++ b/aphrodite/attention/__init__.py
@@ -1,8 +1,6 @@
 
				-from aphrodite.attention.backends.abstract import (
			
 
				-    AttentionBackend,
			
 
				-    AttentionMetadata,
			
 
				-    AttentionMetadataBuilder,
			
 
				-)
			
 
				+from aphrodite.attention.backends.abstract import (AttentionBackend,
			
 
				+                                                   AttentionMetadata,
			
 
				+                                                   AttentionMetadataBuilder)
			
 
				 from aphrodite.attention.layer import Attention
			
 
				 from aphrodite.attention.selector import get_attn_backend
			
 
				 
			
--- a/aphrodite/attention/backends/ipex_attn.py
+++ b/aphrodite/attention/backends/ipex_attn.py
@@ -10,9 +10,9 @@ from aphrodite.attention.backends.abstract import (AttentionBackend,
 
				                                                    AttentionImpl,
			
 
				                                                    AttentionMetadata,
			
 
				                                                    AttentionType)
			
 
				+from aphrodite.attention.backends.utils import CommonMetadataBuilder
			
 
				 from aphrodite.attention.ops.paged_attn import (PagedAttention,
			
 
				                                                 PagedAttentionMetadata)
			
 
				-from aphrodite.attention.backends.utils import CommonMetadataBuilder
			
 
				 
			
 
				 _PARTITION_SIZE = 512
			
 
				 
			
--- a/aphrodite/attention/ops/blocksparse_attention/interface.py
+++ b/aphrodite/attention/ops/blocksparse_attention/interface.py
@@ -4,8 +4,8 @@ import torch
 
				 
			
 
				 from aphrodite.attention.ops.blocksparse_attention.utils import (
			
 
				     dense_to_crow_col, get_head_sliding_step, get_sparse_attn_mask)
			
 
				-from aphrodite.platforms import current_platform
			
 
				 from aphrodite.common.utils import is_cpu, is_hip
			
 
				+from aphrodite.platforms import current_platform
			
 
				 
			
 
				 IS_COMPUTE_8_OR_ABOVE = (torch.cuda.is_available()
			
 
				                          and current_platform.get_device_capability()[0] >= 8)
			
--- a/aphrodite/common/connections.py
+++ b/aphrodite/common/connections.py
@@ -1,5 +1,5 @@
 
				 from pathlib import Path
			
 
				-from typing import Mapping, Optional, MutableMapping
			
 
				+from typing import Mapping, MutableMapping, Optional
			
 
				 from urllib.parse import urlparse
			
 
				 
			
 
				 import aiohttp
			
--- a/aphrodite/distributed/device_communicators/pynccl.py
+++ b/aphrodite/distributed/device_communicators/pynccl.py
@@ -4,8 +4,8 @@ from typing import Optional, Union
 
				 # ===================== import region =====================
			
 
				 import torch
			
 
				 import torch.distributed as dist
			
 
				-from torch.distributed import ProcessGroup, ReduceOp
			
 
				 from loguru import logger
			
 
				+from torch.distributed import ProcessGroup, ReduceOp
			
 
				 
			
 
				 from aphrodite.distributed.device_communicators.pynccl_wrapper import (
			
 
				     NCCLLibrary, buffer_type, cudaStream_t, ncclComm_t, ncclDataTypeEnum,
			
--- a/aphrodite/executor/multiproc_worker_utils.py
+++ b/aphrodite/executor/multiproc_worker_utils.py
@@ -11,6 +11,7 @@ from multiprocessing.connection import wait
 
				 from multiprocessing.process import BaseProcess
			
 
				 from typing import (Any, Callable, Dict, Generic, List, Optional, TextIO,
			
 
				                     TypeVar, Union)
			
 
				+
			
 
				 from loguru import logger
			
 
				 
			
 
				 T = TypeVar('T')
			
--- a/aphrodite/executor/openvino_executor.py
+++ b/aphrodite/executor/openvino_executor.py
@@ -1,5 +1,5 @@
 
				-from typing import List, Set, Tuple
			
 
				 import os
			
 
				+from typing import List, Set, Tuple
			
 
				 
			
 
				 import openvino as ov
			
 
				 import openvino.properties.hint as hints
			
--- a/aphrodite/modeling/guided_decoding/outlines_decoding.py
+++ b/aphrodite/modeling/guided_decoding/outlines_decoding.py
@@ -8,15 +8,10 @@ from typing import Tuple, Union
 
				 from pydantic import BaseModel
			
 
				 from transformers import PreTrainedTokenizerBase
			
 
				 
			
 
				-from aphrodite.endpoints.openai.protocol import (
			
 
				-    ChatCompletionRequest,
			
 
				-    CompletionRequest,
			
 
				-)
			
 
				+from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
			
 
				+                                                 CompletionRequest)
			
 
				 from aphrodite.modeling.guided_decoding.outlines_logits_processors import (
			
 
				-    CFGLogitsProcessor,
			
 
				-    JSONLogitsProcessor,
			
 
				-    RegexLogitsProcessor,
			
 
				-)
			
 
				+    CFGLogitsProcessor, JSONLogitsProcessor, RegexLogitsProcessor)
			
 
				 
			
 
				 
			
 
				 class GuidedDecodingMode(Enum):
			
--- a/aphrodite/modeling/layers/activation.py
+++ b/aphrodite/modeling/layers/activation.py
@@ -8,9 +8,9 @@ import torch.nn.functional as F
 
				 
			
 
				 from aphrodite.distributed import (divide, get_tensor_model_parallel_rank,
			
 
				                                    get_tensor_model_parallel_world_size)
			
 
				+from aphrodite.modeling._custom_op import CustomOp
			
 
				 from aphrodite.modeling.utils import set_weight_attrs
			
 
				 from aphrodite.quantization import QuantizationConfig
			
 
				-from aphrodite.modeling._custom_op import CustomOp
			
 
				 
			
 
				 
			
 
				 class SiluAndMul(CustomOp):
			
--- a/aphrodite/modeling/layers/linear.py
+++ b/aphrodite/modeling/layers/linear.py
@@ -6,11 +6,16 @@ import torch.nn.functional as F
 
				 from loguru import logger
			
 
				 from torch.nn.parameter import Parameter
			
 
				 
			
 
				-from aphrodite.distributed import (
			
 
				-    divide, get_current_tp_rank_partition_offset,
			
 
				-    get_current_tp_rank_partition_size, get_tensor_model_parallel_rank,
			
 
				-    get_tensor_model_parallel_world_size, split_tensor_along_last_dim,
			
 
				-    tensor_model_parallel_all_gather, tensor_model_parallel_all_reduce)
			
 
				+# yapf: disable
			
 
				+from aphrodite.distributed import (divide,
			
 
				+                                   get_current_tp_rank_partition_offset,
			
 
				+                                   get_current_tp_rank_partition_size,
			
 
				+                                   get_tensor_model_parallel_rank,
			
 
				+                                   get_tensor_model_parallel_world_size,
			
 
				+                                   split_tensor_along_last_dim,
			
 
				+                                   tensor_model_parallel_all_gather,
			
 
				+                                   tensor_model_parallel_all_reduce)
			
 
				+# yapf: enable
			
 
				 from aphrodite.modeling.utils import set_weight_attrs
			
 
				 from aphrodite.quantization.base_config import (QuantizationConfig,
			
 
				                                                 QuantizeMethodBase)
			
--- a/aphrodite/modeling/models/__init__.py
+++ b/aphrodite/modeling/models/__init__.py
@@ -3,8 +3,8 @@ import importlib
 
				 from typing import Dict, List, Optional, Type
			
 
				 
			
 
				 import torch.nn as nn
			
 
				-
			
 
				 from loguru import logger
			
 
				+
			
 
				 from aphrodite.common.utils import is_hip
			
 
				 
			
 
				 # Architecture -> (module, class).
			
--- a/aphrodite/modeling/models/llava_next.py
+++ b/aphrodite/modeling/models/llava_next.py
@@ -10,9 +10,9 @@ from typing_extensions import NotRequired
 
				 
			
 
				 from aphrodite.attention import AttentionMetadata
			
 
				 from aphrodite.common.config import CacheConfig, MultiModalConfig
			
 
				+from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
			
 
				 from aphrodite.inputs import INPUT_REGISTRY, InputContext, LLMInputs
			
 
				 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
			
 
				-from aphrodite.quantization.base_config import (QuantizationConfig)
			
 
				 from aphrodite.modeling.layers.sampler import Sampler
			
 
				 from aphrodite.modeling.layers.vocab_parallel_embedding import ParallelLMHead
			
 
				 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
			
@@ -20,7 +20,7 @@ from aphrodite.modeling.models.clip import CLIPVisionModel
 
				 from aphrodite.modeling.models.llama import LlamaModel
			
 
				 from aphrodite.modeling.sampling_metadata import SamplingMetadata
			
 
				 from aphrodite.multimodal import MULTIMODAL_REGISTRY, BatchedTensors
			
 
				-from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
			
 
				+from aphrodite.quantization.base_config import QuantizationConfig
			
 
				 
			
 
				 from .clip import (dummy_image_for_clip, dummy_seq_data_for_clip,
			
 
				                    get_clip_patch_grid_length, input_processor_for_clip)
			
--- a/aphrodite/modeling/models/qwen2.py
+++ b/aphrodite/modeling/models/qwen2.py
@@ -32,8 +32,8 @@ from aphrodite.attention import Attention, AttentionMetadata
 
				 from aphrodite.common.config import CacheConfig, LoRAConfig
			
 
				 from aphrodite.common.sequence import IntermediateTensors, SamplerOutput
			
 
				 from aphrodite.distributed import (get_current_tp_rank_partition_size,
			
 
				-                                   get_tensor_model_parallel_world_size,
			
 
				-                                   get_tensor_model_parallel_rank)
			
 
				+                                   get_tensor_model_parallel_rank,
			
 
				+                                   get_tensor_model_parallel_world_size)
			
 
				 from aphrodite.modeling.layers.activation import SiluAndMul
			
 
				 from aphrodite.modeling.layers.layernorm import RMSNorm
			
 
				 from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
			
--- a/aphrodite/processing/embedding_model_block_manager.py
+++ b/aphrodite/processing/embedding_model_block_manager.py
@@ -1,7 +1,7 @@
 
				 from typing import List, Tuple
			
 
				 
			
 
				-from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
			
 
				 from aphrodite.common.sequence import Sequence, SequenceGroup
			
 
				+from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
			
 
				 
			
 
				 
			
 
				 class EmbeddingModelBlockSpaceManager(BlockSpaceManager):
			
--- a/aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py
+++ b/aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py
@@ -4,9 +4,9 @@ import torch
 
				 import torch.nn.functional as F
			
 
				 from torch.nn import Parameter
			
 
				 
			
 
				-from aphrodite.quantization.compressed_tensors.schemes import (
			
 
				-    CompressedTensorsScheme)
			
 
				 from aphrodite.modeling.utils import set_weight_attrs
			
 
				+from aphrodite.quantization.compressed_tensors.schemes import \
			
 
				+    CompressedTensorsScheme
			
 
				 
			
 
				 __all__ = ["CompressedTensorsUnquantized"]
			
 
				 
			
--- a/aphrodite/quantization/deepspeedfp.py
+++ b/aphrodite/quantization/deepspeedfp.py
@@ -5,8 +5,8 @@ import torch.nn as nn
 
				 import torch.nn.functional as F
			
 
				 
			
 
				 from aphrodite.modeling.layers.linear import LinearBase, LinearMethodBase
			
 
				-from aphrodite.quantization.base_config import (QuantizationConfig)
			
 
				 from aphrodite.modeling.utils import set_weight_attrs
			
 
				+from aphrodite.quantization.base_config import QuantizationConfig
			
 
				 
			
 
				 
			
 
				 class DeepSpeedFPConfig(QuantizationConfig):
			
--- a/aphrodite/spec_decode/ngram_worker.py
+++ b/aphrodite/spec_decode/ngram_worker.py
@@ -13,7 +13,7 @@ class NGramWorker(NonLLMProposerWorkerBase):
 
				     """NGramWorker provides a light drafter without need for model.
			
 
				 
			
 
				     Current NGramWorker only implement prompt lookup decoding,
			
 
				-    and in future we may also do RAG type drafter and other scenerios
			
 
				+    and in future we may also do RAG type drafter and other scenarios
			
 
				     which don't rely on LLM model to give proposals.
			
 
				     """
			
 
				 
			
--- a/aphrodite/transformers_utils/tokenizer_group/__init__.py
+++ b/aphrodite/transformers_utils/tokenizer_group/__init__.py
@@ -1,15 +1,15 @@
 
				 from typing import Optional, Type
			
 
				 
			
 
				 from aphrodite.common.config import TokenizerPoolConfig
			
 
				-from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (
			
 
				-    BaseTokenizerGroup)
			
 
				-from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import (
			
 
				-    TokenizerGroup)
			
 
				 from aphrodite.executor.ray_utils import ray
			
 
				+from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
			
 
				+    BaseTokenizerGroup
			
 
				+from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import \
			
 
				+    TokenizerGroup
			
 
				 
			
 
				 if ray:
			
 
				-    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import (  # noqa: E501
			
 
				-        RayTokenizerGroupPool)
			
 
				+    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import \
			
 
				+        RayTokenizerGroupPool  # noqa: E501
			
 
				 else:
			
 
				     RayTokenizerGroupPool = None
			
 
				 
			
--- a/aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py
+++ b/aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py
@@ -7,9 +7,10 @@ try:
 
				 except ImportError:
			
 
				     # For older versions of Ray
			
 
				     from ray.exceptions import RayActorError as ActorDiedError
			
 
				+
			
 
				+from loguru import logger
			
 
				 from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
			
 
				 from transformers import PreTrainedTokenizer
			
 
				-from loguru import logger
			
 
				 
			
 
				 from aphrodite.common.config import TokenizerPoolConfig
			
 
				 from aphrodite.executor.ray_utils import ray
			
--- a/examples/openai_api/vision.py
+++ b/examples/openai_api/vision.py
@@ -4,8 +4,8 @@ Launch the Aphrodite server with the following command:
 
				 aphrodite run llava-hf/llava-1.5-7b-hf \
			
 
				     --chat-template template_llava.jinja
			
 
				 """
			
 
				-import os
			
 
				 import base64
			
 
				+import os
			
 
				 
			
 
				 from openai import OpenAI
			
 
				 
			
--- a/examples/vision/vision_example.py
+++ b/examples/vision/vision_example.py
@@ -6,8 +6,8 @@ on HuggingFace model repository.
 
				 """
			
 
				 import os
			
 
				 
			
 
				-from transformers import AutoTokenizer
			
 
				 from PIL import Image
			
 
				+from transformers import AutoTokenizer
			
 
				 
			
 
				 from aphrodite import LLM, SamplingParams
			
 
				 from aphrodite.common.utils import FlexibleArgumentParser
			
--- a/formatting.sh
+++ b/formatting.sh
@@ -25,6 +25,7 @@ YAPF_VERSION=$(yapf --version | awk '{print $2}')
 
				 RUFF_VERSION=$(ruff --version | awk '{print $2}')
			
 
				 MYPY_VERSION=$(mypy --version | awk '{print $2}')
			
 
				 CODESPELL_VERSION=$(codespell --version)
			
 
				+ISORT_VERSION=$(isort --vn)
			
 
				 CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
			
 
				 
			
 
				 # # params: tool name, tool version, required version
			
@@ -35,11 +36,11 @@ tool_version_check() {
 
				     fi
			
 
				 }
			
 
				 
			
 
				-tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)"
			
 
				-tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)"
			
 
				-tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)"
			
 
				-tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)"
			
 
				-tool_version_check "clang-format" "$CLANGFORMAT_VERSION" "$(grep clang-format requirements-dev.txt | cut -d'=' -f3)"
			
 
				+tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-lint.txt | cut -d'=' -f3)"
			
 
				+tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-lint.txt | cut -d'=' -f3)"
			
 
				+tool_version_check "isort" "$ISORT_VERSION" "$(grep isort requirements-lint.txt | cut -d'=' -f3)"
			
 
				+tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-lint.txt | cut -d'=' -f3)"
			
 
				+tool_version_check "clang-format" "$CLANGFORMAT_VERSION" "$(grep clang-format requirements-lint.txt | cut -d'=' -f3)"
			
 
				 
			
 
				 YAPF_FLAGS=(
			
 
				     '--recursive'
			
@@ -92,8 +93,12 @@ else
 
				 fi
			
 
				 echo 'Aphrodite yapf: Done'
			
 
				 
			
 
				+
			
 
				+# If git diff returns a file that is in the skip list, the file may be checked anyway:
			
 
				+# https://github.com/codespell-project/codespell/issues/1915
			
 
				+# Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem
			
 
				 CODESPELL_EXCLUDES=(
			
 
				-    '--skip' '*docs/source/_build/**'
			
 
				+    '--skip' './tests/benchmarks/sonnet.txt,build/**'
			
 
				 )
			
 
				 
			
 
				 # check spelling of specified files
			
@@ -114,10 +119,9 @@ spell_check_changed() {
 
				     # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
			
 
				     # exist on both branches.
			
 
				     MERGEBASE="$(git merge-base origin/main HEAD)"
			
 
				-
			
 
				     if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
			
 
				         git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
			
 
				-             codespell "${CODESPELL_EXCLUDES[@]}"
			
 
				+            codespell "${CODESPELL_EXCLUDES[@]}"
			
 
				     fi
			
 
				 }
			
 
				 
			
@@ -161,7 +165,6 @@ lint_changed() {
 
				 }
			
 
				 
			
 
				 # Run Ruff
			
 
				-echo 'Aphrodite ruff:'
			
 
				 ### This flag lints individual files. --files *must* be the first command line
			
 
				 ### arg to use this option.
			
 
				 if [[ "$1" == '--files' ]]; then
			
@@ -174,14 +177,55 @@ else
 
				    # Format only the files that changed in last commit.
			
 
				    lint_changed
			
 
				 fi
			
 
				+echo 'Aphrodite ruff: Done'
			
 
				+
			
 
				+# check spelling of specified files
			
 
				+isort_check() {
			
 
				+    isort "$@"
			
 
				+}
			
 
				+
			
 
				+isort_check_all(){
			
 
				+  isort .
			
 
				+}
			
 
				+
			
 
				+# Spelling  check of files that differ from main branch.
			
 
				+isort_check_changed() {
			
 
				+    # The `if` guard ensures that the list of filenames is not empty, which
			
 
				+    # could cause ruff to receive 0 positional arguments, making it hang
			
 
				+    # waiting for STDIN.
			
 
				+    #
			
 
				+    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
			
 
				+    # exist on both branches.
			
 
				+    MERGEBASE="$(git merge-base origin/main HEAD)"
			
 
				+
			
 
				+    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
			
 
				+        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
			
 
				+             isort
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+# Run Isort
			
 
				+# This flag runs spell check of individual files. --files *must* be the first command line
			
 
				+# arg to use this option.
			
 
				+if [[ "$1" == '--files' ]]; then
			
 
				+   isort_check "${@:2}"
			
 
				+   # If `--all` is passed, then any further arguments are ignored and the
			
 
				+   # entire python directory is linted.
			
 
				+elif [[ "$1" == '--all' ]]; then
			
 
				+   isort_check_all
			
 
				+else
			
 
				+   # Check spelling only of the files that changed in last commit.
			
 
				+   isort_check_changed
			
 
				+fi
			
 
				+echo 'Aphrodite isort: Done'
			
 
				 
			
 
				 # Clang-format section
			
 
				 # Exclude some files for formatting because they are vendored
			
 
				 # NOTE: Keep up to date with .github/workflows/clang-format.yml
			
 
				 CLANG_FORMAT_EXCLUDES=(
			
 
				-    'kernels/moe/topk_softmax_kernels.cu'
			
 
				-    'kerneks/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
			
 
				-    'kerneks/punica/bgmv/bgmv_config.h'
			
 
				+    'kernels/moe/softmax.cu'
			
 
				+    'kernels/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
			
 
				+    'kernels/punica/bgmv/bgmv_config.h'
			
 
				     'kernels/punica/bgmv/bgmv_impl.cuh'
			
 
				     'kernels/punica/bgmv/vec_dtypes.cuh'
			
 
				     'kernels/punica/punica_ops.cu'
			
@@ -227,6 +271,7 @@ else
 
				 fi
			
 
				 echo 'Aphrodite clang-format: Done'
			
 
				 
			
 
				+
			
 
				 if ! git diff --quiet &>/dev/null; then
			
 
				     echo 'Reformatted files. Please review and stage the changes.'
			
 
				     echo 'Changes not staged for commit:'
			
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,30 +1,5 @@
 
				-# formatting
			
 
				-yapf==0.32.0
			
 
				-toml==0.10.2
			
 
				-tomli==2.0.1
			
 
				-ruff==0.1.5
			
 
				-codespell==2.2.6
			
 
				-clang-format==18.1.5
			
 
				+-r requirements-lint.txt
			
 
				+-r requirements-test.txt
			
 
				 
			
 
				-# type checking
			
 
				-mypy==0.991
			
 
				-types-PyYAML
			
 
				-types-requests
			
 
				-types-setuptools
			
 
				-
			
 
				-# testing
			
 
				-pytest
			
 
				-pytest-forked
			
 
				-pytest-asyncio
			
 
				-pytest-rerunfailures
			
 
				-httpx
			
 
				-einops # required for MPT
			
 
				-openai
			
 
				-requests
			
 
				-ray
			
 
				-peft
			
 
				-
			
 
				-# Benchmarking
			
 
				-aiohttp
			
 
				-
			
 
				-bitsandbytes==0.42.0
			
 
				+# Avoid adding requirements directly to this file.
			
 
				+# Instead, modify the two files referenced above.
			
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@@ -0,0 +1,14 @@
 
				+# formatting
			
 
				+yapf==0.32.0
			
 
				+toml==0.10.2
			
 
				+tomli==2.0.1
			
 
				+ruff==0.1.5
			
 
				+codespell==2.3.0
			
 
				+isort==5.13.2
			
 
				+clang-format==18.1.5
			
 
				+
			
 
				+# type checking
			
 
				+mypy==1.9.0
			
 
				+types-PyYAML
			
 
				+types-requests
			
 
				+types-setuptools
			
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -0,0 +1,25 @@
 
				+# testing
			
 
				+pytest
			
 
				+tensorizer>=2.9.0
			
 
				+pytest-forked
			
 
				+pytest-asyncio
			
 
				+pytest-rerunfailures
			
 
				+pytest-shard
			
 
				+
			
 
				+# testing utils
			
 
				+awscli
			
 
				+einops # required for MPT
			
 
				+httpx
			
 
				+peft
			
 
				+requests
			
 
				+ray
			
 
				+sentence-transformers # required for embedding
			
 
				+sparseml==1.8.0 # required for compressed-tensors
			
 
				+compressed-tensors==0.4.0 # required for compressed-tensors
			
 
				+timm # required for internvl test
			
 
				+
			
 
				+# Benchmarking
			
 
				+aiohttp
			
 
				+
			
 
				+# quantization
			
 
				+bitsandbytes==0.42.0
			
--- a/tests/benchmarks/engine/latency.py
+++ b/tests/benchmarks/engine/latency.py
@@ -10,10 +10,10 @@ import torch
 
				 from tqdm import tqdm
			
 
				 
			
 
				 from aphrodite import LLM, SamplingParams
			
 
				+from aphrodite.common.utils import FlexibleArgumentParser
			
 
				 from aphrodite.engine.args_tools import EngineArgs
			
 
				 from aphrodite.inputs import PromptStrictInputs
			
 
				 from aphrodite.quantization import QUANTIZATION_METHODS
			
 
				-from aphrodite.common.utils import FlexibleArgumentParser
			
 
				 
			
 
				 
			
 
				 def main(args: argparse.Namespace):
			
--- a/tests/samplers/test_rejection_sampling.py
+++ b/tests/samplers/test_rejection_sampling.py
@@ -1,13 +1,12 @@
 
				 """Tests for rejection sampling."""
			
 
				-import pytest
			
 
				 from typing import List, Tuple
			
 
				 
			
 
				+import pytest
			
 
				 import torch
			
 
				 import torch.nn.functional as F
			
 
				 
			
 
				-from aphrodite.modeling.utils import set_random_seed
			
 
				-
			
 
				 from aphrodite.modeling.layers.rejection_sampler import RejectionSampler
			
 
				+from aphrodite.modeling.utils import set_random_seed
			
 
				 
			
 
				 
			
 
				 def mock_causal_accepted_tensor(