Browse Source

ci: take one of fixing lint issues

AlpinDale 6 months ago
parent
commit
4d4e767838
65 changed files with 192 additions and 188 deletions
  1. 2 2
      aphrodite/attention/backends/abstract.py
  2. 2 2
      aphrodite/attention/backends/rocm_flash_attn.py
  3. 2 2
      aphrodite/attention/ops/blocksparse_attention/interface.py
  4. 12 12
      aphrodite/attention/selector.py
  5. 2 2
      aphrodite/common/config.py
  6. 2 2
      aphrodite/distributed/device_communicators/custom_all_reduce.py
  7. 2 2
      aphrodite/distributed/device_communicators/custom_all_reduce_utils.py
  8. 8 8
      aphrodite/distributed/parallel_state.py
  9. 4 4
      aphrodite/endpoints/chat_utils.py
  10. 4 4
      aphrodite/endpoints/openai/api_server.py
  11. 2 2
      aphrodite/endpoints/openai/rpc/client.py
  12. 2 2
      aphrodite/endpoints/openai/serving_engine.py
  13. 6 6
      aphrodite/engine/aphrodite_engine.py
  14. 2 2
      aphrodite/engine/args_tools.py
  15. 8 8
      aphrodite/engine/async_aphrodite.py
  16. 4 4
      aphrodite/engine/output_processor/interfaces.py
  17. 2 2
      aphrodite/engine/output_processor/multi_step.py
  18. 2 2
      aphrodite/engine/output_processor/single_step.py
  19. 2 2
      aphrodite/lora/layers.py
  20. 6 6
      aphrodite/modeling/guided_decoding/__init__.py
  21. 2 2
      aphrodite/modeling/guided_decoding/lm_format_enforcer_decoding.py
  22. 2 2
      aphrodite/modeling/guided_decoding/outlines_decoding.py
  23. 2 2
      aphrodite/modeling/layers/logits_processor.py
  24. 2 2
      aphrodite/modeling/layers/typical_acceptance_sampler.py
  25. 2 2
      aphrodite/modeling/model_loader/tensorizer.py
  26. 2 2
      aphrodite/modeling/models/bloom.py
  27. 2 2
      aphrodite/modeling/models/falcon.py
  28. 2 2
      aphrodite/modeling/models/gemma.py
  29. 2 2
      aphrodite/modeling/models/gemma2.py
  30. 2 2
      aphrodite/modeling/models/gpt2.py
  31. 2 2
      aphrodite/modeling/models/gpt_bigcode.py
  32. 2 2
      aphrodite/modeling/models/jais.py
  33. 2 2
      aphrodite/modeling/models/llama.py
  34. 2 2
      aphrodite/modeling/models/mpt.py
  35. 2 2
      aphrodite/modeling/models/opt.py
  36. 2 2
      aphrodite/modeling/models/siglip.py
  37. 2 2
      aphrodite/processing/block/cpu_gpu_block_allocator.py
  38. 2 2
      aphrodite/processing/block_manager_v1.py
  39. 4 4
      aphrodite/processing/block_manager_v2.py
  40. 6 6
      aphrodite/processing/interfaces.py
  41. 2 2
      aphrodite/prompt_adapter/layers.py
  42. 2 2
      aphrodite/prompt_adapter/models.py
  43. 2 2
      aphrodite/quantization/__init__.py
  44. 2 2
      aphrodite/quantization/compressed_tensors/compressed_tensors.py
  45. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py
  46. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py
  47. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
  48. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
  49. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
  50. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
  51. 2 2
      aphrodite/quantization/fp8.py
  52. 2 2
      aphrodite/quantization/gguf.py
  53. 2 2
      aphrodite/spec_decode/draft_model_runner.py
  54. 2 2
      aphrodite/spec_decode/metrics.py
  55. 4 4
      aphrodite/spec_decode/spec_decode_worker.py
  56. 2 2
      aphrodite/task_handler/model_runner.py
  57. 2 2
      aphrodite/transformers_utils/config.py
  58. 2 2
      aphrodite/transformers_utils/configs/__init__.py
  59. 2 2
      aphrodite/transformers_utils/detokenizer.py
  60. 2 2
      aphrodite/transformers_utils/tokenizer_group/__init__.py
  61. 4 4
      aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py
  62. 2 2
      aphrodite/triton_utils/__init__.py
  63. 12 10
      examples/aphrodite_engine_example.py
  64. 2 0
      pyproject.toml
  65. 2 2
      tests/benchmarks/kernels/marlin.py

+ 2 - 2
aphrodite/attention/backends/abstract.py

@@ -7,8 +7,8 @@ from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Set,
 import torch
 import torch
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
-    from aphrodite.task_handler.model_runner_base import \
-        ModelRunnerInputBuilderBase
+    from aphrodite.task_handler.model_runner_base import (
+        ModelRunnerInputBuilderBase)
 
 
 
 
 class AttentionType(Enum):
 class AttentionType(Enum):

+ 2 - 2
aphrodite/attention/backends/rocm_flash_attn.py

@@ -279,8 +279,8 @@ class ROCmFlashAttentionImpl(AttentionImpl):
             "APHRODITE_USE_TRITON_FLASH_ATTN", "True").lower()
             "APHRODITE_USE_TRITON_FLASH_ATTN", "True").lower()
                                       in ("true", "1"))
                                       in ("true", "1"))
         if self.use_triton_flash_attn:
         if self.use_triton_flash_attn:
-            from aphrodite.attention.ops.triton_flash_attn import \
-                triton_attention  # noqa: F401
+            from aphrodite.attention.ops.triton_flash_attn import (  # noqa: F401
+                triton_attention)
             self.attn_func = triton_attention
             self.attn_func = triton_attention
             logger.debug("Using Triton FA in ROCmBackend")
             logger.debug("Using Triton FA in ROCmBackend")
             if self.sliding_window != (-1, -1):
             if self.sliding_window != (-1, -1):

+ 2 - 2
aphrodite/attention/ops/blocksparse_attention/interface.py

@@ -11,8 +11,8 @@ IS_COMPUTE_8_OR_ABOVE = (torch.cuda.is_available()
                          and current_platform.get_device_capability()[0] >= 8)
                          and current_platform.get_device_capability()[0] >= 8)
 
 
 if IS_COMPUTE_8_OR_ABOVE:
 if IS_COMPUTE_8_OR_ABOVE:
-    from aphrodite.attention.ops.blocksparse_attention.blocksparse_attention_kernel import \
-        blocksparse_flash_attn_varlen_fwd  # noqa: E501
+    from aphrodite.attention.ops.blocksparse_attention.blocksparse_attention_kernel import (  # noqa: E501
+        blocksparse_flash_attn_varlen_fwd)
 
 
 
 
 class LocalStridedBlockSparseAttn(torch.nn.Module):
 class LocalStridedBlockSparseAttn(torch.nn.Module):

+ 12 - 12
aphrodite/attention/selector.py

@@ -38,8 +38,8 @@ def get_attn_backend(
 
 
     if is_blocksparse:
     if is_blocksparse:
         logger.info("Using BlocksparseFlashAttention backend.")
         logger.info("Using BlocksparseFlashAttention backend.")
-        from aphrodite.attention.backends.blocksparse_attn import \
-            BlocksparseFlashAttentionBackend
+        from aphrodite.attention.backends.blocksparse_attn import (
+            BlocksparseFlashAttentionBackend)
         return BlocksparseFlashAttentionBackend
         return BlocksparseFlashAttentionBackend
     """Determine which attention backend to use and only import
     """Determine which attention backend to use and only import
     the selected backend module.
     the selected backend module.
@@ -48,18 +48,18 @@ def get_attn_backend(
                                 sliding_window, dtype, kv_cache_dtype,
                                 sliding_window, dtype, kv_cache_dtype,
                                 block_size)
                                 block_size)
     if backend == _Backend.FLASH_ATTN:
     if backend == _Backend.FLASH_ATTN:
-        from aphrodite.attention.backends.flash_attn import \
-            FlashAttentionBackend  # noqa: F401
+        from aphrodite.attention.backends.flash_attn import (  # noqa: F401
+            FlashAttentionBackend)
         return FlashAttentionBackend
         return FlashAttentionBackend
     if backend == _Backend.XFORMERS:
     if backend == _Backend.XFORMERS:
         logger.info("Using XFormers backend.")
         logger.info("Using XFormers backend.")
-        from aphrodite.attention.backends.xformers import \
-            XFormersBackend  # noqa: F401
+        from aphrodite.attention.backends.xformers import (  # noqa: F401
+            XFormersBackend)
         return XFormersBackend
         return XFormersBackend
     elif backend == _Backend.ROCM_FLASH:
     elif backend == _Backend.ROCM_FLASH:
         logger.info("Using ROCmFlashAttention backend.")
         logger.info("Using ROCmFlashAttention backend.")
-        from aphrodite.attention.backends.rocm_flash_attn import \
-            ROCmFlashAttentionBackend  # noqa: F401
+        from aphrodite.attention.backends.rocm_flash_attn import (  # noqa: F401
+            ROCmFlashAttentionBackend)
         return ROCmFlashAttentionBackend
         return ROCmFlashAttentionBackend
     elif backend == _Backend.TORCH_SDPA:
     elif backend == _Backend.TORCH_SDPA:
         assert is_cpu(), RuntimeError(
         assert is_cpu(), RuntimeError(
@@ -69,8 +69,8 @@ def get_attn_backend(
         return TorchSDPABackend
         return TorchSDPABackend
     elif backend == _Backend.OPENVINO:
     elif backend == _Backend.OPENVINO:
         logger.info("Using OpenVINO attention backend.")
         logger.info("Using OpenVINO attention backend.")
-        from aphrodite.attention.backends.openvino import \
-            OpenVINOAttentionBackend
+        from aphrodite.attention.backends.openvino import (
+            OpenVINOAttentionBackend)
         return OpenVINOAttentionBackend
         return OpenVINOAttentionBackend
     elif backend == _Backend.IPEX:
     elif backend == _Backend.IPEX:
         assert is_xpu(), RuntimeError(
         assert is_xpu(), RuntimeError(
@@ -177,8 +177,8 @@ def which_attn_to_use(
         try:
         try:
             import aphrodite_flash_attn  # noqa: F401
             import aphrodite_flash_attn  # noqa: F401
 
 
-            from aphrodite.attention.backends.flash_attn import \
-                FlashAttentionBackend  # noqa: F401
+            from aphrodite.attention.backends.flash_attn import (  # noqa: F401
+                FlashAttentionBackend)
 
 
             supported_sizes = FlashAttentionBackend.get_supported_head_sizes()
             supported_sizes = FlashAttentionBackend.get_supported_head_sizes()
             if head_size not in supported_sizes:
             if head_size not in supported_sizes:

+ 2 - 2
aphrodite/common/config.py

@@ -23,8 +23,8 @@ if TYPE_CHECKING:
 
 
     from aphrodite.executor.executor_base import ExecutorBase
     from aphrodite.executor.executor_base import ExecutorBase
     from aphrodite.modeling.model_loader.loader import BaseModelLoader
     from aphrodite.modeling.model_loader.loader import BaseModelLoader
-    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-        BaseTokenizerGroup  # noqa: E501
+    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (  # noqa: E501
+        BaseTokenizerGroup)
 
 
 # If true, will load models from ModelScope instead of Hugging Face Hub.
 # If true, will load models from ModelScope instead of Hugging Face Hub.
 APHRODITE_USE_MODELSCOPE = os.environ.get("APHRODITE_USE_MODELSCOPE",
 APHRODITE_USE_MODELSCOPE = os.environ.get("APHRODITE_USE_MODELSCOPE",

+ 2 - 2
aphrodite/distributed/device_communicators/custom_all_reduce.py

@@ -9,8 +9,8 @@ from torch.distributed import ProcessGroup
 
 
 from aphrodite import _custom_ops as ops
 from aphrodite import _custom_ops as ops
 from aphrodite.common.utils import cuda_device_count_stateless, is_full_nvlink
 from aphrodite.common.utils import cuda_device_count_stateless, is_full_nvlink
-from aphrodite.distributed.device_communicators.custom_all_reduce_utils import \
-    gpu_p2p_access_check
+from aphrodite.distributed.device_communicators.custom_all_reduce_utils import (
+    gpu_p2p_access_check)
 from aphrodite.distributed.parallel_state import in_the_same_node_as
 from aphrodite.distributed.parallel_state import in_the_same_node_as
 
 
 try:
 try:

+ 2 - 2
aphrodite/distributed/device_communicators/custom_all_reduce_utils.py

@@ -13,8 +13,8 @@ from loguru import logger
 
 
 from aphrodite.common.utils import (cuda_device_count_stateless,
 from aphrodite.common.utils import (cuda_device_count_stateless,
                                     update_environment_variables)
                                     update_environment_variables)
-from aphrodite.distributed.device_communicators.cuda_wrapper import \
-    CudaRTLibrary
+from aphrodite.distributed.device_communicators.cuda_wrapper import (
+    CudaRTLibrary)
 
 
 
 
 def producer(batch_src: Sequence[int],
 def producer(batch_src: Sequence[int],

+ 8 - 8
aphrodite/distributed/parallel_state.py

@@ -144,10 +144,10 @@ class GroupCoordinator:
         self.use_tpu_communicator = use_tpu_communicator
         self.use_tpu_communicator = use_tpu_communicator
 
 
         # lazy import to avoid documentation build error
         # lazy import to avoid documentation build error
-        from aphrodite.distributed.device_communicators.custom_all_reduce import \
-            CustomAllreduce  # noqa: E501
-        from aphrodite.distributed.device_communicators.pynccl import \
-            PyNcclCommunicator
+        from aphrodite.distributed.device_communicators.custom_all_reduce import (  # noqa: E501
+            CustomAllreduce)
+        from aphrodite.distributed.device_communicators.pynccl import (
+            PyNcclCommunicator)
 
 
         self.pynccl_comm: Optional[PyNcclCommunicator]
         self.pynccl_comm: Optional[PyNcclCommunicator]
         if use_pynccl and self.world_size > 1:
         if use_pynccl and self.world_size > 1:
@@ -168,14 +168,14 @@ class GroupCoordinator:
         else:
         else:
             self.ca_comm = None
             self.ca_comm = None
 
 
-        from aphrodite.distributed.device_communicators.tpu_communicator import \
-            TpuCommunicator  # noqa: E501
+        from aphrodite.distributed.device_communicators.tpu_communicator import (  # noqa: E501
+            TpuCommunicator)
         self.tpu_communicator: Optional[TpuCommunicator]
         self.tpu_communicator: Optional[TpuCommunicator]
         if use_tpu_communicator and self.world_size > 1:
         if use_tpu_communicator and self.world_size > 1:
             self.tpu_communicator = TpuCommunicator(group=self.cpu_group)
             self.tpu_communicator = TpuCommunicator(group=self.cpu_group)
 
 
-        from aphrodite.distributed.device_communicators.shm_broadcast import \
-            MessageQueue
+        from aphrodite.distributed.device_communicators.shm_broadcast import (
+            MessageQueue)
         self.mq_broadcaster: Optional[MessageQueue] = None
         self.mq_broadcaster: Optional[MessageQueue] = None
         if use_message_queue_broadcaster and self.world_size > 1:
         if use_message_queue_broadcaster and self.world_size > 1:
             self.mq_broadcaster = MessageQueue.create_from_process_group(
             self.mq_broadcaster = MessageQueue.create_from_process_group(

+ 4 - 4
aphrodite/endpoints/chat_utils.py

@@ -10,11 +10,11 @@ from loguru import logger
 # yapf conflicts with isort for this block
 # yapf conflicts with isort for this block
 # yapf: disable
 # yapf: disable
 from openai.types.chat import ChatCompletionContentPartImageParam
 from openai.types.chat import ChatCompletionContentPartImageParam
-from openai.types.chat import \
-    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam
+from openai.types.chat import (
+    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam)
 from openai.types.chat import ChatCompletionContentPartTextParam
 from openai.types.chat import ChatCompletionContentPartTextParam
-from openai.types.chat import \
-    ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
+from openai.types.chat import (
+    ChatCompletionMessageParam as OpenAIChatCompletionMessageParam)
 # yapf: enable
 # yapf: enable
 # pydantic needs the TypedDict from typing_extensions
 # pydantic needs the TypedDict from typing_extensions
 from pydantic import ConfigDict
 from pydantic import ConfigDict

+ 4 - 4
aphrodite/endpoints/openai/api_server.py

@@ -42,11 +42,11 @@ from aphrodite.endpoints.openai.rpc.client import AsyncEngineRPCClient
 from aphrodite.endpoints.openai.rpc.server import run_rpc_server
 from aphrodite.endpoints.openai.rpc.server import run_rpc_server
 # yapf: enable
 # yapf: enable
 from aphrodite.endpoints.openai.serving_chat import OpenAIServingChat
 from aphrodite.endpoints.openai.serving_chat import OpenAIServingChat
-from aphrodite.endpoints.openai.serving_completions import \
-    OpenAIServingCompletion
+from aphrodite.endpoints.openai.serving_completions import (
+    OpenAIServingCompletion)
 from aphrodite.endpoints.openai.serving_embedding import OpenAIServingEmbedding
 from aphrodite.endpoints.openai.serving_embedding import OpenAIServingEmbedding
-from aphrodite.endpoints.openai.serving_tokenization import \
-    OpenAIServingTokenization
+from aphrodite.endpoints.openai.serving_tokenization import (
+    OpenAIServingTokenization)
 from aphrodite.engine.args_tools import AsyncEngineArgs
 from aphrodite.engine.args_tools import AsyncEngineArgs
 from aphrodite.engine.async_aphrodite import AsyncAphrodite
 from aphrodite.engine.async_aphrodite import AsyncAphrodite
 from aphrodite.engine.protocol import AsyncEngineClient
 from aphrodite.engine.protocol import AsyncEngineClient

+ 2 - 2
aphrodite/endpoints/openai/rpc/client.py

@@ -17,8 +17,8 @@ from aphrodite.endpoints.openai.rpc import (APHRODITE_RPC_HEALTHY_STR,
 from aphrodite.inputs import PromptInputs
 from aphrodite.inputs import PromptInputs
 from aphrodite.lora.request import LoRARequest
 from aphrodite.lora.request import LoRARequest
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
-from aphrodite.transformers_utils.tokenizer_group import \
-    init_tokenizer_from_configs
+from aphrodite.transformers_utils.tokenizer_group import (
+    init_tokenizer_from_configs)
 
 
 
 
 class AsyncEngineRPCClient:
 class AsyncEngineRPCClient:

+ 2 - 2
aphrodite/endpoints/openai/serving_engine.py

@@ -29,8 +29,8 @@ from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
 from aphrodite.engine.protocol import AsyncEngineClient
 from aphrodite.engine.protocol import AsyncEngineClient
 from aphrodite.inputs import parse_and_batch_prompt
 from aphrodite.inputs import parse_and_batch_prompt
 from aphrodite.lora.request import LoRARequest
 from aphrodite.lora.request import LoRARequest
-from aphrodite.modeling.guided_decoding import \
-    get_guided_decoding_logits_processor
+from aphrodite.modeling.guided_decoding import (
+    get_guided_decoding_logits_processor)
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
 
 
 
 

+ 6 - 6
aphrodite/engine/aphrodite_engine.py

@@ -26,11 +26,11 @@ from aphrodite.common.utils import Counter
 from aphrodite.engine.args_tools import EngineArgs
 from aphrodite.engine.args_tools import EngineArgs
 from aphrodite.engine.metrics import (LoggingStatLogger, PrometheusStatLogger,
 from aphrodite.engine.metrics import (LoggingStatLogger, PrometheusStatLogger,
                                       StatLoggerBase, Stats)
                                       StatLoggerBase, Stats)
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.engine.output_processor.stop_checker import StopChecker
-from aphrodite.engine.output_processor.util import \
-    create_output_by_sequence_group
+from aphrodite.engine.output_processor.util import (
+    create_output_by_sequence_group)
 from aphrodite.executor.executor_base import ExecutorBase
 from aphrodite.executor.executor_base import ExecutorBase
 from aphrodite.executor.ray_utils import initialize_ray_cluster
 from aphrodite.executor.ray_utils import initialize_ray_cluster
 from aphrodite.inputs import INPUT_REGISTRY, LLMInputs, PromptInputs
 from aphrodite.inputs import INPUT_REGISTRY, LLMInputs, PromptInputs
@@ -368,8 +368,8 @@ class AphroditeEngine:
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutor
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutor
             executor_class = RayGPUExecutor
             executor_class = RayGPUExecutor
         elif distributed_executor_backend == "mp":
         elif distributed_executor_backend == "mp":
-            from aphrodite.executor.multiproc_gpu_executor import \
-                MultiprocessingGPUExecutor
+            from aphrodite.executor.multiproc_gpu_executor import (
+                MultiprocessingGPUExecutor)
             assert not APHRODITE_USE_RAY_SPMD_WORKER, (
             assert not APHRODITE_USE_RAY_SPMD_WORKER, (
                 "multiprocessing distributed executor backend does not "
                 "multiprocessing distributed executor backend does not "
                 "support APHRODITE_USE_RAY_SPMD_WORKER=1")
                 "support APHRODITE_USE_RAY_SPMD_WORKER=1")

+ 2 - 2
aphrodite/engine/args_tools.py

@@ -17,8 +17,8 @@ from aphrodite.executor.executor_base import ExecutorBase
 from aphrodite.quantization import QUANTIZATION_METHODS
 from aphrodite.quantization import QUANTIZATION_METHODS
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
-    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-        BaseTokenizerGroup  # noqa: E501
+    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (  # noqa: E501
+        BaseTokenizerGroup)
 
 
 
 
 @dataclass
 @dataclass

+ 8 - 8
aphrodite/engine/async_aphrodite.py

@@ -408,8 +408,8 @@ class AsyncAphrodite:
         elif engine_config.device_config.device_type == "tpu":
         elif engine_config.device_config.device_type == "tpu":
             if distributed_executor_backend == "ray":
             if distributed_executor_backend == "ray":
                 initialize_ray_cluster(engine_config.parallel_config)
                 initialize_ray_cluster(engine_config.parallel_config)
-                from aphrodite.executor.ray_tpu_executor import \
-                    RayTPUExecutorAsync
+                from aphrodite.executor.ray_tpu_executor import (
+                    RayTPUExecutorAsync)
                 executor_class = RayTPUExecutorAsync
                 executor_class = RayTPUExecutorAsync
             else:
             else:
                 assert distributed_executor_backend is None
                 assert distributed_executor_backend is None
@@ -422,8 +422,8 @@ class AsyncAphrodite:
             assert distributed_executor_backend is None, (
             assert distributed_executor_backend is None, (
                 "Distributed execution is not supported with the OpenVINO "
                 "Distributed execution is not supported with the OpenVINO "
                 "backend.")
                 "backend.")
-            from aphrodite.executor.openvino_executor import \
-                OpenVINOExecutorAsync
+            from aphrodite.executor.openvino_executor import (
+                OpenVINOExecutorAsync)
             executor_class = OpenVINOExecutorAsync
             executor_class = OpenVINOExecutorAsync
         elif engine_config.device_config.device_type == "xpu":
         elif engine_config.device_config.device_type == "xpu":
             if distributed_executor_backend is None:
             if distributed_executor_backend is None:
@@ -431,8 +431,8 @@ class AsyncAphrodite:
                 executor_class = XPUExecutorAsync
                 executor_class = XPUExecutorAsync
             elif distributed_executor_backend == "ray":
             elif distributed_executor_backend == "ray":
                 initialize_ray_cluster(engine_config.parallel_config)
                 initialize_ray_cluster(engine_config.parallel_config)
-                from aphrodite.executor.ray_xpu_executor import \
-                    RayXPUExecutorAsync
+                from aphrodite.executor.ray_xpu_executor import (
+                    RayXPUExecutorAsync)
                 executor_class = RayXPUExecutorAsync
                 executor_class = RayXPUExecutorAsync
             else:
             else:
                 raise RuntimeError(
                 raise RuntimeError(
@@ -442,8 +442,8 @@ class AsyncAphrodite:
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutorAsync
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutorAsync
             executor_class = RayGPUExecutorAsync
             executor_class = RayGPUExecutorAsync
         elif distributed_executor_backend == "mp":
         elif distributed_executor_backend == "mp":
-            from aphrodite.executor.multiproc_gpu_executor import \
-                MultiprocessingGPUExecutorAsync
+            from aphrodite.executor.multiproc_gpu_executor import (
+                MultiprocessingGPUExecutorAsync)
             executor_class = MultiprocessingGPUExecutorAsync
             executor_class = MultiprocessingGPUExecutorAsync
         else:
         else:
             from aphrodite.executor.gpu_executor import GPUExecutorAsync
             from aphrodite.executor.gpu_executor import GPUExecutorAsync

+ 4 - 4
aphrodite/engine/output_processor/interfaces.py

@@ -40,8 +40,8 @@ class SequenceGroupOutputProcessor(ABC):
         """
         """
         if scheduler_config.num_lookahead_slots == 0:
         if scheduler_config.num_lookahead_slots == 0:
             # Importing here to avoid cycle.
             # Importing here to avoid cycle.
-            from aphrodite.engine.output_processor.single_step import \
-                SingleStepOutputProcessor
+            from aphrodite.engine.output_processor.single_step import (
+                SingleStepOutputProcessor)
             return SingleStepOutputProcessor(
             return SingleStepOutputProcessor(
                 scheduler_config,
                 scheduler_config,
                 detokenizer,
                 detokenizer,
@@ -51,8 +51,8 @@ class SequenceGroupOutputProcessor(ABC):
             )
             )
         else:
         else:
             # Importing here to avoid cycle.
             # Importing here to avoid cycle.
-            from aphrodite.engine.output_processor.multi_step import \
-                MultiStepOutputProcessor
+            from aphrodite.engine.output_processor.multi_step import (
+                MultiStepOutputProcessor)
             return MultiStepOutputProcessor(
             return MultiStepOutputProcessor(
                 detokenizer,
                 detokenizer,
                 scheduler,
                 scheduler,

+ 2 - 2
aphrodite/engine/output_processor/multi_step.py

@@ -9,8 +9,8 @@ from aphrodite.common.sequence import (Sequence, SequenceGroup,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceStatus)
                                        SequenceStatus)
 from aphrodite.common.utils import Counter
 from aphrodite.common.utils import Counter
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.transformers_utils.detokenizer import Detokenizer
 from aphrodite.transformers_utils.detokenizer import Detokenizer

+ 2 - 2
aphrodite/engine/output_processor/single_step.py

@@ -6,8 +6,8 @@ from aphrodite.common.sequence import (Sequence, SequenceGroup,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceStatus)
                                        SequenceStatus)
 from aphrodite.common.utils import Counter
 from aphrodite.common.utils import Counter
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.transformers_utils.detokenizer import Detokenizer
 from aphrodite.transformers_utils.detokenizer import Detokenizer

+ 2 - 2
aphrodite/lora/layers.py

@@ -26,8 +26,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import (
 from aphrodite.modeling.layers.rotary_embedding import (
     LinearScalingRotaryEmbedding, RotaryEmbedding)
     LinearScalingRotaryEmbedding, RotaryEmbedding)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     pass
     pass

+ 6 - 6
aphrodite/modeling/guided_decoding/__init__.py

@@ -4,8 +4,8 @@ from aphrodite.common.sampling_params import LogitsProcessorFunc
 from aphrodite.endpoints.openai.protocol import (
 from aphrodite.endpoints.openai.protocol import (
     ChatCompletionNamedToolChoiceParam, ChatCompletionRequest,
     ChatCompletionNamedToolChoiceParam, ChatCompletionRequest,
     CompletionRequest)
     CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.outlines_decoding import (
 from aphrodite.modeling.guided_decoding.outlines_decoding import (
     get_local_outlines_guided_decoding_logits_processor,
     get_local_outlines_guided_decoding_logits_processor,
     get_outlines_guided_decoding_logits_processor)
     get_outlines_guided_decoding_logits_processor)
@@ -20,8 +20,8 @@ async def get_guided_decoding_logits_processor(
         return await get_outlines_guided_decoding_logits_processor(
         return await get_outlines_guided_decoding_logits_processor(
             request, tokenizer)
             request, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
     if guided_decoding_backend == 'lm-format-enforcer':
-        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import \
-            get_lm_format_enforcer_guided_decoding_logits_processor  # noqa
+        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import (  # noqa
+            get_lm_format_enforcer_guided_decoding_logits_processor)
         return await get_lm_format_enforcer_guided_decoding_logits_processor(
         return await get_lm_format_enforcer_guided_decoding_logits_processor(
             request, tokenizer)
             request, tokenizer)
 
 
@@ -39,8 +39,8 @@ def get_local_guided_decoding_logits_processor(
         return get_local_outlines_guided_decoding_logits_processor(
         return get_local_outlines_guided_decoding_logits_processor(
             guided_options, tokenizer)
             guided_options, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
     if guided_decoding_backend == 'lm-format-enforcer':
-        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import \
-            get_local_lm_format_enforcer_guided_decoding_logits_processor  # noqa
+        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import (  # noqa
+            get_local_lm_format_enforcer_guided_decoding_logits_processor)
         return get_local_lm_format_enforcer_guided_decoding_logits_processor(
         return get_local_lm_format_enforcer_guided_decoding_logits_processor(
             guided_options, tokenizer)
             guided_options, tokenizer)
 
 

+ 2 - 2
aphrodite/modeling/guided_decoding/lm_format_enforcer_decoding.py

@@ -11,8 +11,8 @@ from transformers import PreTrainedTokenizerBase
 from aphrodite.common.sampling_params import LogitsProcessorFunc
 from aphrodite.common.sampling_params import LogitsProcessorFunc
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
                                                  CompletionRequest)
                                                  CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.lm_format_enforcer_logits_processors import (  # noqa: E501
 from aphrodite.modeling.guided_decoding.lm_format_enforcer_logits_processors import (  # noqa: E501
     build_aphrodite_logits_processor,
     build_aphrodite_logits_processor,
     build_aphrodite_token_enforcer_tokenizer_data)
     build_aphrodite_token_enforcer_tokenizer_data)

+ 2 - 2
aphrodite/modeling/guided_decoding/outlines_decoding.py

@@ -10,8 +10,8 @@ from transformers import PreTrainedTokenizerBase
 
 
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
                                                  CompletionRequest)
                                                  CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.outlines_logits_processors import (
 from aphrodite.modeling.guided_decoding.outlines_logits_processors import (
     CFGLogitsProcessor, JSONLogitsProcessor, RegexLogitsProcessor)
     CFGLogitsProcessor, JSONLogitsProcessor, RegexLogitsProcessor)
 
 

+ 2 - 2
aphrodite/modeling/layers/logits_processor.py

@@ -7,8 +7,8 @@ import torch.nn as nn
 
 
 from aphrodite.distributed import (tensor_model_parallel_all_gather,
 from aphrodite.distributed import (tensor_model_parallel_all_gather,
                                    tensor_model_parallel_gather)
                                    tensor_model_parallel_gather)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.platforms import current_platform
 from aphrodite.platforms import current_platform
 
 

+ 2 - 2
aphrodite/modeling/layers/typical_acceptance_sampler.py

@@ -1,8 +1,8 @@
 import torch
 import torch
 import torch.jit
 import torch.jit
 
 
-from aphrodite.modeling.layers.spec_decode_base_sampler import \
-    SpecDecodeDeterministicBaseSampler
+from aphrodite.modeling.layers.spec_decode_base_sampler import (
+    SpecDecodeDeterministicBaseSampler)
 
 
 
 
 class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):
 class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):

+ 2 - 2
aphrodite/modeling/model_loader/tensorizer.py

@@ -16,8 +16,8 @@ from transformers import PretrainedConfig
 from aphrodite.common.config import ModelConfig, ParallelConfig
 from aphrodite.common.config import ModelConfig, ParallelConfig
 from aphrodite.engine.aphrodite_engine import AphroditeEngine
 from aphrodite.engine.aphrodite_engine import AphroditeEngine
 from aphrodite.engine.args_tools import EngineArgs
 from aphrodite.engine.args_tools import EngineArgs
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig
 
 
 tensorizer_error_msg = None
 tensorizer_error_msg = None

+ 2 - 2
aphrodite/modeling/models/bloom.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/falcon.py

@@ -39,8 +39,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import get_rope
 from aphrodite.modeling.layers.rotary_embedding import get_rope
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gemma.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gemma2.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gpt2.py

@@ -33,8 +33,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gpt_bigcode.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/jais.py

@@ -35,8 +35,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/llama.py

@@ -53,8 +53,8 @@ from aphrodite.modeling.models.utils import (PPMissingLayer,
                                              make_layers)
                                              make_layers)
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig
-from aphrodite.quantization.compressed_tensors.utils import \
-    get_compressed_tensors_cache_scale
+from aphrodite.quantization.compressed_tensors.utils import (
+    get_compressed_tensors_cache_scale)
 
 
 
 
 class LlamaMLP(nn.Module):
 class LlamaMLP(nn.Module):

+ 2 - 2
aphrodite/modeling/models/mpt.py

@@ -17,8 +17,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/opt.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/siglip.py

@@ -20,8 +20,8 @@ from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               QKVParallelLinear,
                                               QKVParallelLinear,
                                               RowParallelLinear)
                                               RowParallelLinear)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.multimodal.image import (cached_get_tokenizer,
 from aphrodite.multimodal.image import (cached_get_tokenizer,
                                         repeat_and_pad_image_tokens)
                                         repeat_and_pad_image_tokens)
 from aphrodite.quantization import QuantizationConfig
 from aphrodite.quantization import QuantizationConfig

+ 2 - 2
aphrodite/processing/block/cpu_gpu_block_allocator.py

@@ -6,8 +6,8 @@ from aphrodite.processing.block.interfaces import (Block, BlockAllocator,
                                                    DeviceAwareBlockAllocator)
                                                    DeviceAwareBlockAllocator)
 from aphrodite.processing.block.naive_block import (NaiveBlock,
 from aphrodite.processing.block.naive_block import (NaiveBlock,
                                                     NaiveBlockAllocator)
                                                     NaiveBlockAllocator)
-from aphrodite.processing.block.prefix_caching_block import \
-    PrefixCachingBlockAllocator
+from aphrodite.processing.block.prefix_caching_block import (
+    PrefixCachingBlockAllocator)
 
 
 
 
 class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
 class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):

+ 2 - 2
aphrodite/processing/block_manager_v1.py

@@ -12,8 +12,8 @@ from loguru import logger
 from aphrodite.common.block import BlockTable, PhysicalTokenBlock
 from aphrodite.common.block import BlockTable, PhysicalTokenBlock
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.utils import Device
 from aphrodite.common.utils import Device
-from aphrodite.processing.block.utils import \
-    check_no_caching_or_swa_for_blockmgr_encdec
+from aphrodite.processing.block.utils import (
+    check_no_caching_or_swa_for_blockmgr_encdec)
 from aphrodite.processing.evictor_v1 import (EvictionPolicy, Evictor,
 from aphrodite.processing.evictor_v1 import (EvictionPolicy, Evictor,
                                              make_evictor)
                                              make_evictor)
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager

+ 4 - 4
aphrodite/processing/block_manager_v2.py

@@ -7,13 +7,13 @@ from typing import Tuple
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.utils import Device
 from aphrodite.common.utils import Device
 from aphrodite.processing.block.block_table import BlockTable
 from aphrodite.processing.block.block_table import BlockTable
-from aphrodite.processing.block.cpu_gpu_block_allocator import \
-    CpuGpuBlockAllocator
+from aphrodite.processing.block.cpu_gpu_block_allocator import (
+    CpuGpuBlockAllocator)
 from aphrodite.processing.block.interfaces import Block
 from aphrodite.processing.block.interfaces import Block
 from aphrodite.processing.block.prefix_caching_block import (
 from aphrodite.processing.block.prefix_caching_block import (
     ComputedBlocksTracker, LastAccessBlocksTracker)
     ComputedBlocksTracker, LastAccessBlocksTracker)
-from aphrodite.processing.block.utils import \
-    check_no_caching_or_swa_for_blockmgr_encdec
+from aphrodite.processing.block.utils import (
+    check_no_caching_or_swa_for_blockmgr_encdec)
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
 
 
 SeqId = int
 SeqId = int

+ 6 - 6
aphrodite/processing/interfaces.py

@@ -28,18 +28,18 @@ class BlockSpaceManager(ABC):
         version = version.lower()
         version = version.lower()
 
 
         if version == "v1":
         if version == "v1":
-            from aphrodite.processing.block_manager_v1 import \
-                BlockSpaceManagerV1
+            from aphrodite.processing.block_manager_v1 import (
+                BlockSpaceManagerV1)
             return BlockSpaceManagerV1
             return BlockSpaceManagerV1
 
 
         if version == "v2":
         if version == "v2":
-            from aphrodite.processing.block_manager_v2 import \
-                BlockSpaceManagerV2
+            from aphrodite.processing.block_manager_v2 import (
+                BlockSpaceManagerV2)
             return BlockSpaceManagerV2
             return BlockSpaceManagerV2
 
 
         if version == "embedding":
         if version == "embedding":
-            from aphrodite.processing.embedding_model_block_manager import \
-                EmbeddingModelBlockSpaceManager
+            from aphrodite.processing.embedding_model_block_manager import (
+                EmbeddingModelBlockSpaceManager)
             return EmbeddingModelBlockSpaceManager
             return EmbeddingModelBlockSpaceManager
 
 
         raise ValueError(f"Unknown version {version=}")
         raise ValueError(f"Unknown version {version=}")

+ 2 - 2
aphrodite/prompt_adapter/layers.py

@@ -6,8 +6,8 @@ from torch import nn
 
 
 from aphrodite.adapter_commons.layers import AdapterMapping
 from aphrodite.adapter_commons.layers import AdapterMapping
 from aphrodite.common.config import PromptAdapterConfig
 from aphrodite.common.config import PromptAdapterConfig
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 
 
 
 
 @dataclass
 @dataclass

+ 2 - 2
aphrodite/prompt_adapter/models.py

@@ -12,8 +12,8 @@ from aphrodite.adapter_commons.utils import (add_adapter, deactivate_adapter,
                                              remove_adapter,
                                              remove_adapter,
                                              set_adapter_mapping)
                                              set_adapter_mapping)
 from aphrodite.common.config import PromptAdapterConfig
 from aphrodite.common.config import PromptAdapterConfig
-from aphrodite.prompt_adapter.layers import \
-    VocabParallelEmbeddingWithPromptAdapter  # yapf: disable
+from aphrodite.prompt_adapter.layers import (
+    VocabParallelEmbeddingWithPromptAdapter)  # yapf: disable
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)

+ 2 - 2
aphrodite/quantization/__init__.py

@@ -6,8 +6,8 @@ from aphrodite.quantization.awq import AWQConfig
 from aphrodite.quantization.awq_marlin import AWQMarlinConfig
 from aphrodite.quantization.awq_marlin import AWQMarlinConfig
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.bitsandbytes import BitsAndBytesConfig
 from aphrodite.quantization.bitsandbytes import BitsAndBytesConfig
-from aphrodite.quantization.compressed_tensors.compressed_tensors import \
-    CompressedTensorsConfig
+from aphrodite.quantization.compressed_tensors.compressed_tensors import (
+    CompressedTensorsConfig)
 from aphrodite.quantization.deepspeedfp import DeepSpeedFPConfig
 from aphrodite.quantization.deepspeedfp import DeepSpeedFPConfig
 from aphrodite.quantization.eetq import EETQConfig
 from aphrodite.quantization.eetq import EETQConfig
 from aphrodite.quantization.exl2 import Exl2Config
 from aphrodite.quantization.exl2 import Exl2Config

+ 2 - 2
aphrodite/quantization/compressed_tensors/compressed_tensors.py

@@ -57,8 +57,8 @@ class CompressedTensorsConfig(QuantizationConfig):
         layer: torch.nn.Module,
         layer: torch.nn.Module,
         prefix: str,
         prefix: str,
     ) -> Optional["QuantizeMethodBase"]:
     ) -> Optional["QuantizeMethodBase"]:
-        from aphrodite.attention.layer import \
-            Attention  # Avoid circular import
+        from aphrodite.attention.layer import (
+            Attention)  # Avoid circular import
         if isinstance(layer, LinearBase):
         if isinstance(layer, LinearBase):
             return CompressedTensorsLinearMethod(self)
             return CompressedTensorsLinearMethod(self)
         if isinstance(layer, Attention):
         if isinstance(layer, Attention):

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py

@@ -5,8 +5,8 @@ import torch.nn.functional as F
 from torch.nn import Parameter
 from torch.nn import Parameter
 
 
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 
 
 __all__ = ["CompressedTensorsUnquantized"]
 __all__ = ["CompressedTensorsUnquantized"]
 
 

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py

@@ -5,8 +5,8 @@ from torch.nn import Parameter
 
 
 from aphrodite import _custom_ops as ops
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 from aphrodite.quantization.gptq_marlin_24 import (GPTQ_MARLIN_24_MAX_PARALLEL,
 from aphrodite.quantization.gptq_marlin_24 import (GPTQ_MARLIN_24_MAX_PARALLEL,
                                                    GPTQ_MARLIN_24_MIN_THREAD_N)
                                                    GPTQ_MARLIN_24_MIN_THREAD_N)
 from aphrodite.scalar_type import scalar_types
 from aphrodite.scalar_type import scalar_types

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py

@@ -3,10 +3,10 @@ from typing import Callable, List, Optional
 import torch
 import torch
 
 
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.marlin_utils_fp8 import (
 from aphrodite.quantization.utils.marlin_utils_fp8 import (
     apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin)
     apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin)
 from aphrodite.quantization.utils.w8a8_utils import (
 from aphrodite.quantization.utils.w8a8_utils import (

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py

@@ -4,10 +4,10 @@ import torch
 from torch.nn import Parameter
 from torch.nn import Parameter
 
 
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.w8a8_utils import (
 from aphrodite.quantization.utils.w8a8_utils import (
     apply_fp8_linear, create_per_channel_scale_param,
     apply_fp8_linear, create_per_channel_scale_param,
     create_per_tensor_scale_param, cutlass_fp8_supported,
     create_per_tensor_scale_param, cutlass_fp8_supported,

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py

@@ -4,10 +4,10 @@ import torch
 from torch.nn import Parameter
 from torch.nn import Parameter
 
 
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.w8a8_utils import (
 from aphrodite.quantization.utils.w8a8_utils import (
     apply_int8_linear, convert_to_channelwise, create_per_channel_scale_param,
     apply_int8_linear, convert_to_channelwise, create_per_channel_scale_param,
     create_per_tensor_scale_param)
     create_per_tensor_scale_param)

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py

@@ -5,8 +5,8 @@ from torch.nn import Parameter
 
 
 from aphrodite import _custom_ops as ops
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 from aphrodite.quantization.utils.marlin_utils import (
 from aphrodite.quantization.utils.marlin_utils import (
     apply_gptq_marlin_linear, marlin_make_empty_g_idx, marlin_make_workspace,
     apply_gptq_marlin_linear, marlin_make_empty_g_idx, marlin_make_workspace,
     marlin_permute_scales, replace_tensor, verify_marlin_supported,
     marlin_permute_scales, replace_tensor, verify_marlin_supported,

+ 2 - 2
aphrodite/quantization/fp8.py

@@ -76,8 +76,8 @@ class Fp8Config(QuantizationConfig):
 
 
     def get_quant_method(self, layer: torch.nn.Module,
     def get_quant_method(self, layer: torch.nn.Module,
                          prefix: str) -> Optional["QuantizeMethodBase"]:
                          prefix: str) -> Optional["QuantizeMethodBase"]:
-        from aphrodite.attention.layer import \
-            Attention  # Avoid circular import
+        from aphrodite.attention.layer import (
+            Attention)  # Avoid circular import
 
 
         if isinstance(layer, LinearBase):
         if isinstance(layer, LinearBase):
             if is_layer_skipped(prefix, self.ignored_layers):
             if is_layer_skipped(prefix, self.ignored_layers):

+ 2 - 2
aphrodite/quantization/gguf.py

@@ -6,8 +6,8 @@ from torch.nn.parameter import Parameter, UninitializedParameter
 
 
 from aphrodite import _custom_ops as ops
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.layers.linear import LinearBase, LinearMethodBase
 from aphrodite.modeling.layers.linear import LinearBase, LinearMethodBase
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.quantization.base_config import (QuantizationConfig,
 from aphrodite.quantization.base_config import (QuantizationConfig,
                                                 QuantizeMethodBase)
                                                 QuantizeMethodBase)

+ 2 - 2
aphrodite/spec_decode/draft_model_runner.py

@@ -9,8 +9,8 @@ try:
     from aphrodite.attention.backends.flash_attn import FlashAttentionMetadata
     from aphrodite.attention.backends.flash_attn import FlashAttentionMetadata
 except ModuleNotFoundError:
 except ModuleNotFoundError:
     # aphrodite_flash_attn is not installed, use the identical ROCm FA metadata
     # aphrodite_flash_attn is not installed, use the identical ROCm FA metadata
-    from aphrodite.attention.backends.rocm_flash_attn import \
-        ROCmFlashAttentionMetadata as FlashAttentionMetadata
+    from aphrodite.attention.backends.rocm_flash_attn import (
+        ROCmFlashAttentionMetadata as FlashAttentionMetadata)
 
 
 try:
 try:
     from flashinfer import BatchDecodeWithPagedKVCacheWrapper
     from flashinfer import BatchDecodeWithPagedKVCacheWrapper

+ 2 - 2
aphrodite/spec_decode/metrics.py

@@ -5,8 +5,8 @@ from typing import Callable, Optional
 import torch
 import torch
 
 
 from aphrodite.common.utils import is_pin_memory_available
 from aphrodite.common.utils import is_pin_memory_available
-from aphrodite.modeling.layers.spec_decode_base_sampler import \
-    SpecDecodeBaseSampler
+from aphrodite.modeling.layers.spec_decode_base_sampler import (
+    SpecDecodeBaseSampler)
 
 
 
 
 @dataclass
 @dataclass

+ 4 - 4
aphrodite/spec_decode/spec_decode_worker.py

@@ -15,8 +15,8 @@ from aphrodite.distributed.communication_op import broadcast_tensor_dict
 from aphrodite.modeling.layers.rejection_sampler import RejectionSampler
 from aphrodite.modeling.layers.rejection_sampler import RejectionSampler
 from aphrodite.modeling.layers.spec_decode_base_sampler import (
 from aphrodite.modeling.layers.spec_decode_base_sampler import (
     SpecDecodeBaseSampler, SpecDecodeStochasticBaseSampler)
     SpecDecodeBaseSampler, SpecDecodeStochasticBaseSampler)
-from aphrodite.modeling.layers.typical_acceptance_sampler import \
-    TypicalAcceptanceSampler
+from aphrodite.modeling.layers.typical_acceptance_sampler import (
+    TypicalAcceptanceSampler)
 from aphrodite.spec_decode.batch_expansion import BatchExpansionTop1Scorer
 from aphrodite.spec_decode.batch_expansion import BatchExpansionTop1Scorer
 from aphrodite.spec_decode.draft_model_runner import TP1DraftModelRunner
 from aphrodite.spec_decode.draft_model_runner import TP1DraftModelRunner
 from aphrodite.spec_decode.interfaces import (SpeculativeProposals,
 from aphrodite.spec_decode.interfaces import (SpeculativeProposals,
@@ -28,8 +28,8 @@ from aphrodite.spec_decode.mlp_speculator_worker import MLPSpeculatorWorker
 from aphrodite.spec_decode.multi_step_worker import MultiStepWorker
 from aphrodite.spec_decode.multi_step_worker import MultiStepWorker
 from aphrodite.spec_decode.ngram_worker import NGramWorker
 from aphrodite.spec_decode.ngram_worker import NGramWorker
 from aphrodite.spec_decode.proposer_worker_base import ProposerWorkerBase
 from aphrodite.spec_decode.proposer_worker_base import ProposerWorkerBase
-from aphrodite.spec_decode.smaller_tp_proposer_worker import \
-    SmallerTpProposerWorker
+from aphrodite.spec_decode.smaller_tp_proposer_worker import (
+    SmallerTpProposerWorker)
 from aphrodite.spec_decode.target_model_runner import TargetModelRunner
 from aphrodite.spec_decode.target_model_runner import TargetModelRunner
 from aphrodite.spec_decode.util import (Timer, create_sequence_group_output,
 from aphrodite.spec_decode.util import (Timer, create_sequence_group_output,
                                         get_all_num_logprobs,
                                         get_all_num_logprobs,

+ 2 - 2
aphrodite/task_handler/model_runner.py

@@ -53,8 +53,8 @@ from aphrodite.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
                                   MultiModalInputs)
                                   MultiModalInputs)
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
-from aphrodite.prompt_adapter.worker_manager import \
-    LRUCacheWorkerPromptAdapterManager
+from aphrodite.prompt_adapter.worker_manager import (
+    LRUCacheWorkerPromptAdapterManager)
 from aphrodite.task_handler.model_runner_base import (
 from aphrodite.task_handler.model_runner_base import (
     ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
     ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
     _add_attn_metadata_broadcastable_dict,
     _add_attn_metadata_broadcastable_dict,

+ 2 - 2
aphrodite/transformers_utils/config.py

@@ -5,8 +5,8 @@ from typing import Dict, Optional, Type, Union
 
 
 from loguru import logger
 from loguru import logger
 from transformers import GenerationConfig, PretrainedConfig
 from transformers import GenerationConfig, PretrainedConfig
-from transformers.models.auto.modeling_auto import \
-    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+from transformers.models.auto.modeling_auto import (
+    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
 
 
 from aphrodite.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
 from aphrodite.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                                   InternVLChatConfig,
                                                   InternVLChatConfig,

+ 2 - 2
aphrodite/transformers_utils/configs/__init__.py

@@ -7,8 +7,8 @@ from aphrodite.transformers_utils.configs.falcon import RWConfig
 from aphrodite.transformers_utils.configs.internvl import InternVLChatConfig
 from aphrodite.transformers_utils.configs.internvl import InternVLChatConfig
 from aphrodite.transformers_utils.configs.jais import JAISConfig
 from aphrodite.transformers_utils.configs.jais import JAISConfig
 from aphrodite.transformers_utils.configs.medusa import MedusaConfig
 from aphrodite.transformers_utils.configs.medusa import MedusaConfig
-from aphrodite.transformers_utils.configs.mlp_speculator import \
-    MLPSpeculatorConfig
+from aphrodite.transformers_utils.configs.mlp_speculator import (
+    MLPSpeculatorConfig)
 from aphrodite.transformers_utils.configs.mpt import MPTConfig
 from aphrodite.transformers_utils.configs.mpt import MPTConfig
 
 
 __all__ = [
 __all__ = [

+ 2 - 2
aphrodite/transformers_utils/detokenizer.py

@@ -4,8 +4,8 @@ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
 
 from aphrodite.common.sequence import (Logprob, SamplingParams, Sequence,
 from aphrodite.common.sequence import (Logprob, SamplingParams, Sequence,
                                        SequenceGroup)
                                        SequenceGroup)
-from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-    BaseTokenizerGroup
+from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
 
 
 # Used eg. for marking rejected tokens in spec decoding.
 # Used eg. for marking rejected tokens in spec decoding.
 INVALID_TOKEN_ID = -1
 INVALID_TOKEN_ID = -1

+ 2 - 2
aphrodite/transformers_utils/tokenizer_group/__init__.py

@@ -8,8 +8,8 @@ from .base_tokenizer_group import AnyTokenizer, BaseTokenizerGroup
 from .tokenizer_group import TokenizerGroup
 from .tokenizer_group import TokenizerGroup
 
 
 if ray:
 if ray:
-    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import \
-        RayTokenizerGroupPool  # noqa E501
+    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import (  # noqa E501
+        RayTokenizerGroupPool)
 else:
 else:
     RayTokenizerGroupPool = None  # type: ignore
     RayTokenizerGroupPool = None  # type: ignore
 
 

+ 4 - 4
aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py

@@ -15,10 +15,10 @@ from transformers import PreTrainedTokenizer
 from aphrodite.common.config import TokenizerPoolConfig
 from aphrodite.common.config import TokenizerPoolConfig
 from aphrodite.executor.ray_utils import ray
 from aphrodite.executor.ray_utils import ray
 from aphrodite.lora.request import LoRARequest
 from aphrodite.lora.request import LoRARequest
-from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-    BaseTokenizerGroup
-from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import \
-    TokenizerGroup
+from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import (
+    TokenizerGroup)
 
 
 
 
 class RayTokenizerGroupPool(BaseTokenizerGroup):
 class RayTokenizerGroupPool(BaseTokenizerGroup):

+ 2 - 2
aphrodite/triton_utils/__init__.py

@@ -4,8 +4,8 @@ __all__ = ["HAS_TRITON"]
 
 
 if HAS_TRITON:
 if HAS_TRITON:
 
 
-    from aphrodite.triton_utils.custom_cache_manager import \
-        maybe_set_triton_cache_manager
+    from aphrodite.triton_utils.custom_cache_manager import (
+        maybe_set_triton_cache_manager)
     from aphrodite.triton_utils.libentry import libentry
     from aphrodite.triton_utils.libentry import libentry
 
 
     __all__ += ["maybe_set_triton_cache_manager", "libentry"]
     __all__ += ["maybe_set_triton_cache_manager", "libentry"]

+ 12 - 10
examples/aphrodite_engine_example.py

@@ -1,6 +1,6 @@
 import argparse
 import argparse
 
 
-from aphrodite import EngineArgs, AphroditeEngine, SamplingParams
+from aphrodite import AphroditeEngine, EngineArgs, SamplingParams
 
 
 
 
 def main(args: argparse.Namespace):
 def main(args: argparse.Namespace):
@@ -12,14 +12,16 @@ def main(args: argparse.Namespace):
     test_prompts = [
     test_prompts = [
         ("<|system|>Enter chat mode.<|user|>Hello!<|model|>",
         ("<|system|>Enter chat mode.<|user|>Hello!<|model|>",
          SamplingParams(temperature=0.0)),
          SamplingParams(temperature=0.0)),
-        ("<|system|>Enter RP mode.<|model|>Hello!<|user|>What are you doing?<|model|>",
-         SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
-        ("<|system|>Enter chat mode.<|user|>What is the meaning of life?<|model|>",
-         SamplingParams(n=2,
-                        best_of=5,
-                        temperature=0.8,
-                        top_p=0.95,
-                        frequency_penalty=0.1)),
+        (
+            "<|system|>Enter RP mode.<|model|>Hello!<|user|>What are you doing?<|model|>",  # noqa: E501
+            SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
+        (
+            "<|system|>Enter chat mode.<|user|>What is the meaning of life?<|model|>",  # noqa: E501
+            SamplingParams(n=2,
+                           best_of=5,
+                           temperature=0.8,
+                           top_p=0.95,
+                           frequency_penalty=0.1)),
         ("<|system|>Enter QA mode.<|user|>What is a man?<|model|>A miserable",
         ("<|system|>Enter QA mode.<|user|>What is a man?<|model|>A miserable",
          SamplingParams(n=3, best_of=3, use_beam_search=True,
          SamplingParams(n=3, best_of=3, use_beam_search=True,
                         temperature=0.0)),
                         temperature=0.0)),
@@ -48,4 +50,4 @@ if __name__ == '__main__':
         description='Demo on using the AphroditeEngine class directly')
         description='Demo on using the AphroditeEngine class directly')
     parser = EngineArgs.add_cli_args(parser)
     parser = EngineArgs.add_cli_args(parser)
     args = parser.parse_args()
     args = parser.parse_args()
-    main(args)
+    main(args)

+ 2 - 0
pyproject.toml

@@ -41,6 +41,8 @@ ignore = [
     "E731",
     "E731",
     # Loop control variable not used within loop body
     # Loop control variable not used within loop body
     "B007",
     "B007",
+    # f-strings in logger
+    "G004",
 ]
 ]
 
 
 [tool.codespell]
 [tool.codespell]

+ 2 - 2
tests/benchmarks/kernels/marlin.py

@@ -14,8 +14,8 @@ from aphrodite.quantization.utils.marlin_utils import (
     GPTQ_MARLIN_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_SUPPORTED_NUM_BITS)
     GPTQ_MARLIN_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_SUPPORTED_NUM_BITS)
 from aphrodite.quantization.utils.marlin_utils_test import (MarlinWorkspace,
 from aphrodite.quantization.utils.marlin_utils_test import (MarlinWorkspace,
                                                             marlin_quantize)
                                                             marlin_quantize)
-from aphrodite.quantization.utils.marlin_utils_test_24 import \
-    marlin_24_quantize
+from aphrodite.quantization.utils.marlin_utils_test_24 import (
+    marlin_24_quantize)
 from aphrodite.quantization.utils.quant_utils import (gptq_pack,
 from aphrodite.quantization.utils.quant_utils import (gptq_pack,
                                                       quantize_weights,
                                                       quantize_weights,
                                                       sort_weights)
                                                       sort_weights)