Jelajahi Sumber

ci: take one of fixing lint issues

AlpinDale 6 bulan lalu
induk
melakukan
4d4e767838
65 mengubah file dengan 192 tambahan dan 188 penghapusan
  1. 2 2
      aphrodite/attention/backends/abstract.py
  2. 2 2
      aphrodite/attention/backends/rocm_flash_attn.py
  3. 2 2
      aphrodite/attention/ops/blocksparse_attention/interface.py
  4. 12 12
      aphrodite/attention/selector.py
  5. 2 2
      aphrodite/common/config.py
  6. 2 2
      aphrodite/distributed/device_communicators/custom_all_reduce.py
  7. 2 2
      aphrodite/distributed/device_communicators/custom_all_reduce_utils.py
  8. 8 8
      aphrodite/distributed/parallel_state.py
  9. 4 4
      aphrodite/endpoints/chat_utils.py
  10. 4 4
      aphrodite/endpoints/openai/api_server.py
  11. 2 2
      aphrodite/endpoints/openai/rpc/client.py
  12. 2 2
      aphrodite/endpoints/openai/serving_engine.py
  13. 6 6
      aphrodite/engine/aphrodite_engine.py
  14. 2 2
      aphrodite/engine/args_tools.py
  15. 8 8
      aphrodite/engine/async_aphrodite.py
  16. 4 4
      aphrodite/engine/output_processor/interfaces.py
  17. 2 2
      aphrodite/engine/output_processor/multi_step.py
  18. 2 2
      aphrodite/engine/output_processor/single_step.py
  19. 2 2
      aphrodite/lora/layers.py
  20. 6 6
      aphrodite/modeling/guided_decoding/__init__.py
  21. 2 2
      aphrodite/modeling/guided_decoding/lm_format_enforcer_decoding.py
  22. 2 2
      aphrodite/modeling/guided_decoding/outlines_decoding.py
  23. 2 2
      aphrodite/modeling/layers/logits_processor.py
  24. 2 2
      aphrodite/modeling/layers/typical_acceptance_sampler.py
  25. 2 2
      aphrodite/modeling/model_loader/tensorizer.py
  26. 2 2
      aphrodite/modeling/models/bloom.py
  27. 2 2
      aphrodite/modeling/models/falcon.py
  28. 2 2
      aphrodite/modeling/models/gemma.py
  29. 2 2
      aphrodite/modeling/models/gemma2.py
  30. 2 2
      aphrodite/modeling/models/gpt2.py
  31. 2 2
      aphrodite/modeling/models/gpt_bigcode.py
  32. 2 2
      aphrodite/modeling/models/jais.py
  33. 2 2
      aphrodite/modeling/models/llama.py
  34. 2 2
      aphrodite/modeling/models/mpt.py
  35. 2 2
      aphrodite/modeling/models/opt.py
  36. 2 2
      aphrodite/modeling/models/siglip.py
  37. 2 2
      aphrodite/processing/block/cpu_gpu_block_allocator.py
  38. 2 2
      aphrodite/processing/block_manager_v1.py
  39. 4 4
      aphrodite/processing/block_manager_v2.py
  40. 6 6
      aphrodite/processing/interfaces.py
  41. 2 2
      aphrodite/prompt_adapter/layers.py
  42. 2 2
      aphrodite/prompt_adapter/models.py
  43. 2 2
      aphrodite/quantization/__init__.py
  44. 2 2
      aphrodite/quantization/compressed_tensors/compressed_tensors.py
  45. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py
  46. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py
  47. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
  48. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
  49. 4 4
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
  50. 2 2
      aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
  51. 2 2
      aphrodite/quantization/fp8.py
  52. 2 2
      aphrodite/quantization/gguf.py
  53. 2 2
      aphrodite/spec_decode/draft_model_runner.py
  54. 2 2
      aphrodite/spec_decode/metrics.py
  55. 4 4
      aphrodite/spec_decode/spec_decode_worker.py
  56. 2 2
      aphrodite/task_handler/model_runner.py
  57. 2 2
      aphrodite/transformers_utils/config.py
  58. 2 2
      aphrodite/transformers_utils/configs/__init__.py
  59. 2 2
      aphrodite/transformers_utils/detokenizer.py
  60. 2 2
      aphrodite/transformers_utils/tokenizer_group/__init__.py
  61. 4 4
      aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py
  62. 2 2
      aphrodite/triton_utils/__init__.py
  63. 12 10
      examples/aphrodite_engine_example.py
  64. 2 0
      pyproject.toml
  65. 2 2
      tests/benchmarks/kernels/marlin.py

+ 2 - 2
aphrodite/attention/backends/abstract.py

@@ -7,8 +7,8 @@ from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Set,
 import torch
 
 if TYPE_CHECKING:
-    from aphrodite.task_handler.model_runner_base import \
-        ModelRunnerInputBuilderBase
+    from aphrodite.task_handler.model_runner_base import (
+        ModelRunnerInputBuilderBase)
 
 
 class AttentionType(Enum):

+ 2 - 2
aphrodite/attention/backends/rocm_flash_attn.py

@@ -279,8 +279,8 @@ class ROCmFlashAttentionImpl(AttentionImpl):
             "APHRODITE_USE_TRITON_FLASH_ATTN", "True").lower()
                                       in ("true", "1"))
         if self.use_triton_flash_attn:
-            from aphrodite.attention.ops.triton_flash_attn import \
-                triton_attention  # noqa: F401
+            from aphrodite.attention.ops.triton_flash_attn import (  # noqa: F401
+                triton_attention)
             self.attn_func = triton_attention
             logger.debug("Using Triton FA in ROCmBackend")
             if self.sliding_window != (-1, -1):

+ 2 - 2
aphrodite/attention/ops/blocksparse_attention/interface.py

@@ -11,8 +11,8 @@ IS_COMPUTE_8_OR_ABOVE = (torch.cuda.is_available()
                          and current_platform.get_device_capability()[0] >= 8)
 
 if IS_COMPUTE_8_OR_ABOVE:
-    from aphrodite.attention.ops.blocksparse_attention.blocksparse_attention_kernel import \
-        blocksparse_flash_attn_varlen_fwd  # noqa: E501
+    from aphrodite.attention.ops.blocksparse_attention.blocksparse_attention_kernel import (  # noqa: E501
+        blocksparse_flash_attn_varlen_fwd)
 
 
 class LocalStridedBlockSparseAttn(torch.nn.Module):

+ 12 - 12
aphrodite/attention/selector.py

@@ -38,8 +38,8 @@ def get_attn_backend(
 
     if is_blocksparse:
         logger.info("Using BlocksparseFlashAttention backend.")
-        from aphrodite.attention.backends.blocksparse_attn import \
-            BlocksparseFlashAttentionBackend
+        from aphrodite.attention.backends.blocksparse_attn import (
+            BlocksparseFlashAttentionBackend)
         return BlocksparseFlashAttentionBackend
     """Determine which attention backend to use and only import
     the selected backend module.
@@ -48,18 +48,18 @@ def get_attn_backend(
                                 sliding_window, dtype, kv_cache_dtype,
                                 block_size)
     if backend == _Backend.FLASH_ATTN:
-        from aphrodite.attention.backends.flash_attn import \
-            FlashAttentionBackend  # noqa: F401
+        from aphrodite.attention.backends.flash_attn import (  # noqa: F401
+            FlashAttentionBackend)
         return FlashAttentionBackend
     if backend == _Backend.XFORMERS:
         logger.info("Using XFormers backend.")
-        from aphrodite.attention.backends.xformers import \
-            XFormersBackend  # noqa: F401
+        from aphrodite.attention.backends.xformers import (  # noqa: F401
+            XFormersBackend)
         return XFormersBackend
     elif backend == _Backend.ROCM_FLASH:
         logger.info("Using ROCmFlashAttention backend.")
-        from aphrodite.attention.backends.rocm_flash_attn import \
-            ROCmFlashAttentionBackend  # noqa: F401
+        from aphrodite.attention.backends.rocm_flash_attn import (  # noqa: F401
+            ROCmFlashAttentionBackend)
         return ROCmFlashAttentionBackend
     elif backend == _Backend.TORCH_SDPA:
         assert is_cpu(), RuntimeError(
@@ -69,8 +69,8 @@ def get_attn_backend(
         return TorchSDPABackend
     elif backend == _Backend.OPENVINO:
         logger.info("Using OpenVINO attention backend.")
-        from aphrodite.attention.backends.openvino import \
-            OpenVINOAttentionBackend
+        from aphrodite.attention.backends.openvino import (
+            OpenVINOAttentionBackend)
         return OpenVINOAttentionBackend
     elif backend == _Backend.IPEX:
         assert is_xpu(), RuntimeError(
@@ -177,8 +177,8 @@ def which_attn_to_use(
         try:
             import aphrodite_flash_attn  # noqa: F401
 
-            from aphrodite.attention.backends.flash_attn import \
-                FlashAttentionBackend  # noqa: F401
+            from aphrodite.attention.backends.flash_attn import (  # noqa: F401
+                FlashAttentionBackend)
 
             supported_sizes = FlashAttentionBackend.get_supported_head_sizes()
             if head_size not in supported_sizes:

+ 2 - 2
aphrodite/common/config.py

@@ -23,8 +23,8 @@ if TYPE_CHECKING:
 
     from aphrodite.executor.executor_base import ExecutorBase
     from aphrodite.modeling.model_loader.loader import BaseModelLoader
-    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-        BaseTokenizerGroup  # noqa: E501
+    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (  # noqa: E501
+        BaseTokenizerGroup)
 
 # If true, will load models from ModelScope instead of Hugging Face Hub.
 APHRODITE_USE_MODELSCOPE = os.environ.get("APHRODITE_USE_MODELSCOPE",

+ 2 - 2
aphrodite/distributed/device_communicators/custom_all_reduce.py

@@ -9,8 +9,8 @@ from torch.distributed import ProcessGroup
 
 from aphrodite import _custom_ops as ops
 from aphrodite.common.utils import cuda_device_count_stateless, is_full_nvlink
-from aphrodite.distributed.device_communicators.custom_all_reduce_utils import \
-    gpu_p2p_access_check
+from aphrodite.distributed.device_communicators.custom_all_reduce_utils import (
+    gpu_p2p_access_check)
 from aphrodite.distributed.parallel_state import in_the_same_node_as
 
 try:

+ 2 - 2
aphrodite/distributed/device_communicators/custom_all_reduce_utils.py

@@ -13,8 +13,8 @@ from loguru import logger
 
 from aphrodite.common.utils import (cuda_device_count_stateless,
                                     update_environment_variables)
-from aphrodite.distributed.device_communicators.cuda_wrapper import \
-    CudaRTLibrary
+from aphrodite.distributed.device_communicators.cuda_wrapper import (
+    CudaRTLibrary)
 
 
 def producer(batch_src: Sequence[int],

+ 8 - 8
aphrodite/distributed/parallel_state.py

@@ -144,10 +144,10 @@ class GroupCoordinator:
         self.use_tpu_communicator = use_tpu_communicator
 
         # lazy import to avoid documentation build error
-        from aphrodite.distributed.device_communicators.custom_all_reduce import \
-            CustomAllreduce  # noqa: E501
-        from aphrodite.distributed.device_communicators.pynccl import \
-            PyNcclCommunicator
+        from aphrodite.distributed.device_communicators.custom_all_reduce import (  # noqa: E501
+            CustomAllreduce)
+        from aphrodite.distributed.device_communicators.pynccl import (
+            PyNcclCommunicator)
 
         self.pynccl_comm: Optional[PyNcclCommunicator]
         if use_pynccl and self.world_size > 1:
@@ -168,14 +168,14 @@ class GroupCoordinator:
         else:
             self.ca_comm = None
 
-        from aphrodite.distributed.device_communicators.tpu_communicator import \
-            TpuCommunicator  # noqa: E501
+        from aphrodite.distributed.device_communicators.tpu_communicator import (  # noqa: E501
+            TpuCommunicator)
         self.tpu_communicator: Optional[TpuCommunicator]
         if use_tpu_communicator and self.world_size > 1:
             self.tpu_communicator = TpuCommunicator(group=self.cpu_group)
 
-        from aphrodite.distributed.device_communicators.shm_broadcast import \
-            MessageQueue
+        from aphrodite.distributed.device_communicators.shm_broadcast import (
+            MessageQueue)
         self.mq_broadcaster: Optional[MessageQueue] = None
         if use_message_queue_broadcaster and self.world_size > 1:
             self.mq_broadcaster = MessageQueue.create_from_process_group(

+ 4 - 4
aphrodite/endpoints/chat_utils.py

@@ -10,11 +10,11 @@ from loguru import logger
 # yapf conflicts with isort for this block
 # yapf: disable
 from openai.types.chat import ChatCompletionContentPartImageParam
-from openai.types.chat import \
-    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam
+from openai.types.chat import (
+    ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam)
 from openai.types.chat import ChatCompletionContentPartTextParam
-from openai.types.chat import \
-    ChatCompletionMessageParam as OpenAIChatCompletionMessageParam
+from openai.types.chat import (
+    ChatCompletionMessageParam as OpenAIChatCompletionMessageParam)
 # yapf: enable
 # pydantic needs the TypedDict from typing_extensions
 from pydantic import ConfigDict

+ 4 - 4
aphrodite/endpoints/openai/api_server.py

@@ -42,11 +42,11 @@ from aphrodite.endpoints.openai.rpc.client import AsyncEngineRPCClient
 from aphrodite.endpoints.openai.rpc.server import run_rpc_server
 # yapf: enable
 from aphrodite.endpoints.openai.serving_chat import OpenAIServingChat
-from aphrodite.endpoints.openai.serving_completions import \
-    OpenAIServingCompletion
+from aphrodite.endpoints.openai.serving_completions import (
+    OpenAIServingCompletion)
 from aphrodite.endpoints.openai.serving_embedding import OpenAIServingEmbedding
-from aphrodite.endpoints.openai.serving_tokenization import \
-    OpenAIServingTokenization
+from aphrodite.endpoints.openai.serving_tokenization import (
+    OpenAIServingTokenization)
 from aphrodite.engine.args_tools import AsyncEngineArgs
 from aphrodite.engine.async_aphrodite import AsyncAphrodite
 from aphrodite.engine.protocol import AsyncEngineClient

+ 2 - 2
aphrodite/endpoints/openai/rpc/client.py

@@ -17,8 +17,8 @@ from aphrodite.endpoints.openai.rpc import (APHRODITE_RPC_HEALTHY_STR,
 from aphrodite.inputs import PromptInputs
 from aphrodite.lora.request import LoRARequest
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
-from aphrodite.transformers_utils.tokenizer_group import \
-    init_tokenizer_from_configs
+from aphrodite.transformers_utils.tokenizer_group import (
+    init_tokenizer_from_configs)
 
 
 class AsyncEngineRPCClient:

+ 2 - 2
aphrodite/endpoints/openai/serving_engine.py

@@ -29,8 +29,8 @@ from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
 from aphrodite.engine.protocol import AsyncEngineClient
 from aphrodite.inputs import parse_and_batch_prompt
 from aphrodite.lora.request import LoRARequest
-from aphrodite.modeling.guided_decoding import \
-    get_guided_decoding_logits_processor
+from aphrodite.modeling.guided_decoding import (
+    get_guided_decoding_logits_processor)
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
 
 

+ 6 - 6
aphrodite/engine/aphrodite_engine.py

@@ -26,11 +26,11 @@ from aphrodite.common.utils import Counter
 from aphrodite.engine.args_tools import EngineArgs
 from aphrodite.engine.metrics import (LoggingStatLogger, PrometheusStatLogger,
                                       StatLoggerBase, Stats)
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
-from aphrodite.engine.output_processor.util import \
-    create_output_by_sequence_group
+from aphrodite.engine.output_processor.util import (
+    create_output_by_sequence_group)
 from aphrodite.executor.executor_base import ExecutorBase
 from aphrodite.executor.ray_utils import initialize_ray_cluster
 from aphrodite.inputs import INPUT_REGISTRY, LLMInputs, PromptInputs
@@ -368,8 +368,8 @@ class AphroditeEngine:
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutor
             executor_class = RayGPUExecutor
         elif distributed_executor_backend == "mp":
-            from aphrodite.executor.multiproc_gpu_executor import \
-                MultiprocessingGPUExecutor
+            from aphrodite.executor.multiproc_gpu_executor import (
+                MultiprocessingGPUExecutor)
             assert not APHRODITE_USE_RAY_SPMD_WORKER, (
                 "multiprocessing distributed executor backend does not "
                 "support APHRODITE_USE_RAY_SPMD_WORKER=1")

+ 2 - 2
aphrodite/engine/args_tools.py

@@ -17,8 +17,8 @@ from aphrodite.executor.executor_base import ExecutorBase
 from aphrodite.quantization import QUANTIZATION_METHODS
 
 if TYPE_CHECKING:
-    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-        BaseTokenizerGroup  # noqa: E501
+    from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (  # noqa: E501
+        BaseTokenizerGroup)
 
 
 @dataclass

+ 8 - 8
aphrodite/engine/async_aphrodite.py

@@ -408,8 +408,8 @@ class AsyncAphrodite:
         elif engine_config.device_config.device_type == "tpu":
             if distributed_executor_backend == "ray":
                 initialize_ray_cluster(engine_config.parallel_config)
-                from aphrodite.executor.ray_tpu_executor import \
-                    RayTPUExecutorAsync
+                from aphrodite.executor.ray_tpu_executor import (
+                    RayTPUExecutorAsync)
                 executor_class = RayTPUExecutorAsync
             else:
                 assert distributed_executor_backend is None
@@ -422,8 +422,8 @@ class AsyncAphrodite:
             assert distributed_executor_backend is None, (
                 "Distributed execution is not supported with the OpenVINO "
                 "backend.")
-            from aphrodite.executor.openvino_executor import \
-                OpenVINOExecutorAsync
+            from aphrodite.executor.openvino_executor import (
+                OpenVINOExecutorAsync)
             executor_class = OpenVINOExecutorAsync
         elif engine_config.device_config.device_type == "xpu":
             if distributed_executor_backend is None:
@@ -431,8 +431,8 @@ class AsyncAphrodite:
                 executor_class = XPUExecutorAsync
             elif distributed_executor_backend == "ray":
                 initialize_ray_cluster(engine_config.parallel_config)
-                from aphrodite.executor.ray_xpu_executor import \
-                    RayXPUExecutorAsync
+                from aphrodite.executor.ray_xpu_executor import (
+                    RayXPUExecutorAsync)
                 executor_class = RayXPUExecutorAsync
             else:
                 raise RuntimeError(
@@ -442,8 +442,8 @@ class AsyncAphrodite:
             from aphrodite.executor.ray_gpu_executor import RayGPUExecutorAsync
             executor_class = RayGPUExecutorAsync
         elif distributed_executor_backend == "mp":
-            from aphrodite.executor.multiproc_gpu_executor import \
-                MultiprocessingGPUExecutorAsync
+            from aphrodite.executor.multiproc_gpu_executor import (
+                MultiprocessingGPUExecutorAsync)
             executor_class = MultiprocessingGPUExecutorAsync
         else:
             from aphrodite.executor.gpu_executor import GPUExecutorAsync

+ 4 - 4
aphrodite/engine/output_processor/interfaces.py

@@ -40,8 +40,8 @@ class SequenceGroupOutputProcessor(ABC):
         """
         if scheduler_config.num_lookahead_slots == 0:
             # Importing here to avoid cycle.
-            from aphrodite.engine.output_processor.single_step import \
-                SingleStepOutputProcessor
+            from aphrodite.engine.output_processor.single_step import (
+                SingleStepOutputProcessor)
             return SingleStepOutputProcessor(
                 scheduler_config,
                 detokenizer,
@@ -51,8 +51,8 @@ class SequenceGroupOutputProcessor(ABC):
             )
         else:
             # Importing here to avoid cycle.
-            from aphrodite.engine.output_processor.multi_step import \
-                MultiStepOutputProcessor
+            from aphrodite.engine.output_processor.multi_step import (
+                MultiStepOutputProcessor)
             return MultiStepOutputProcessor(
                 detokenizer,
                 scheduler,

+ 2 - 2
aphrodite/engine/output_processor/multi_step.py

@@ -9,8 +9,8 @@ from aphrodite.common.sequence import (Sequence, SequenceGroup,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceStatus)
 from aphrodite.common.utils import Counter
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.transformers_utils.detokenizer import Detokenizer

+ 2 - 2
aphrodite/engine/output_processor/single_step.py

@@ -6,8 +6,8 @@ from aphrodite.common.sequence import (Sequence, SequenceGroup,
                                        SequenceGroupOutput, SequenceOutput,
                                        SequenceStatus)
 from aphrodite.common.utils import Counter
-from aphrodite.engine.output_processor.interfaces import \
-    SequenceGroupOutputProcessor
+from aphrodite.engine.output_processor.interfaces import (
+    SequenceGroupOutputProcessor)
 from aphrodite.engine.output_processor.stop_checker import StopChecker
 from aphrodite.processing.scheduler import Scheduler
 from aphrodite.transformers_utils.detokenizer import Detokenizer

+ 2 - 2
aphrodite/lora/layers.py

@@ -26,8 +26,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import (
     LinearScalingRotaryEmbedding, RotaryEmbedding)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 
 if TYPE_CHECKING:
     pass

+ 6 - 6
aphrodite/modeling/guided_decoding/__init__.py

@@ -4,8 +4,8 @@ from aphrodite.common.sampling_params import LogitsProcessorFunc
 from aphrodite.endpoints.openai.protocol import (
     ChatCompletionNamedToolChoiceParam, ChatCompletionRequest,
     CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.outlines_decoding import (
     get_local_outlines_guided_decoding_logits_processor,
     get_outlines_guided_decoding_logits_processor)
@@ -20,8 +20,8 @@ async def get_guided_decoding_logits_processor(
         return await get_outlines_guided_decoding_logits_processor(
             request, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
-        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import \
-            get_lm_format_enforcer_guided_decoding_logits_processor  # noqa
+        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import (  # noqa
+            get_lm_format_enforcer_guided_decoding_logits_processor)
         return await get_lm_format_enforcer_guided_decoding_logits_processor(
             request, tokenizer)
 
@@ -39,8 +39,8 @@ def get_local_guided_decoding_logits_processor(
         return get_local_outlines_guided_decoding_logits_processor(
             guided_options, tokenizer)
     if guided_decoding_backend == 'lm-format-enforcer':
-        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import \
-            get_local_lm_format_enforcer_guided_decoding_logits_processor  # noqa
+        from aphrodite.modeling.guided_decoding.lm_format_enforcer_decoding import (  # noqa
+            get_local_lm_format_enforcer_guided_decoding_logits_processor)
         return get_local_lm_format_enforcer_guided_decoding_logits_processor(
             guided_options, tokenizer)
 

+ 2 - 2
aphrodite/modeling/guided_decoding/lm_format_enforcer_decoding.py

@@ -11,8 +11,8 @@ from transformers import PreTrainedTokenizerBase
 from aphrodite.common.sampling_params import LogitsProcessorFunc
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
                                                  CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.lm_format_enforcer_logits_processors import (  # noqa: E501
     build_aphrodite_logits_processor,
     build_aphrodite_token_enforcer_tokenizer_data)

+ 2 - 2
aphrodite/modeling/guided_decoding/outlines_decoding.py

@@ -10,8 +10,8 @@ from transformers import PreTrainedTokenizerBase
 
 from aphrodite.endpoints.openai.protocol import (ChatCompletionRequest,
                                                  CompletionRequest)
-from aphrodite.modeling.guided_decoding.guided_fields import \
-    GuidedDecodingRequest
+from aphrodite.modeling.guided_decoding.guided_fields import (
+    GuidedDecodingRequest)
 from aphrodite.modeling.guided_decoding.outlines_logits_processors import (
     CFGLogitsProcessor, JSONLogitsProcessor, RegexLogitsProcessor)
 

+ 2 - 2
aphrodite/modeling/layers/logits_processor.py

@@ -7,8 +7,8 @@ import torch.nn as nn
 
 from aphrodite.distributed import (tensor_model_parallel_all_gather,
                                    tensor_model_parallel_gather)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.platforms import current_platform
 

+ 2 - 2
aphrodite/modeling/layers/typical_acceptance_sampler.py

@@ -1,8 +1,8 @@
 import torch
 import torch.jit
 
-from aphrodite.modeling.layers.spec_decode_base_sampler import \
-    SpecDecodeDeterministicBaseSampler
+from aphrodite.modeling.layers.spec_decode_base_sampler import (
+    SpecDecodeDeterministicBaseSampler)
 
 
 class TypicalAcceptanceSampler(SpecDecodeDeterministicBaseSampler):

+ 2 - 2
aphrodite/modeling/model_loader/tensorizer.py

@@ -16,8 +16,8 @@ from transformers import PretrainedConfig
 from aphrodite.common.config import ModelConfig, ParallelConfig
 from aphrodite.engine.aphrodite_engine import AphroditeEngine
 from aphrodite.engine.args_tools import EngineArgs
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.quantization.base_config import QuantizationConfig
 
 tensorizer_error_msg = None

+ 2 - 2
aphrodite/modeling/models/bloom.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/falcon.py

@@ -39,8 +39,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import get_rope
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gemma.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gemma2.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (MergedColumnParallelLinear,
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.rotary_embedding import GemmaRotaryEmbedding
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gpt2.py

@@ -33,8 +33,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/gpt_bigcode.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/jais.py

@@ -35,8 +35,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/llama.py

@@ -53,8 +53,8 @@ from aphrodite.modeling.models.utils import (PPMissingLayer,
                                              make_layers)
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig
-from aphrodite.quantization.compressed_tensors.utils import \
-    get_compressed_tensors_cache_scale
+from aphrodite.quantization.compressed_tensors.utils import (
+    get_compressed_tensors_cache_scale)
 
 
 class LlamaMLP(nn.Module):

+ 2 - 2
aphrodite/modeling/models/mpt.py

@@ -17,8 +17,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/opt.py

@@ -34,8 +34,8 @@ from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               RowParallelLinear)
 from aphrodite.modeling.layers.logits_processor import LogitsProcessor
 from aphrodite.modeling.layers.sampler import Sampler
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.model_loader.weight_utils import default_weight_loader
 from aphrodite.modeling.sampling_metadata import SamplingMetadata
 from aphrodite.quantization.base_config import QuantizationConfig

+ 2 - 2
aphrodite/modeling/models/siglip.py

@@ -20,8 +20,8 @@ from aphrodite.modeling.layers.activation import get_act_fn
 from aphrodite.modeling.layers.linear import (ColumnParallelLinear,
                                               QKVParallelLinear,
                                               RowParallelLinear)
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.multimodal.image import (cached_get_tokenizer,
                                         repeat_and_pad_image_tokens)
 from aphrodite.quantization import QuantizationConfig

+ 2 - 2
aphrodite/processing/block/cpu_gpu_block_allocator.py

@@ -6,8 +6,8 @@ from aphrodite.processing.block.interfaces import (Block, BlockAllocator,
                                                    DeviceAwareBlockAllocator)
 from aphrodite.processing.block.naive_block import (NaiveBlock,
                                                     NaiveBlockAllocator)
-from aphrodite.processing.block.prefix_caching_block import \
-    PrefixCachingBlockAllocator
+from aphrodite.processing.block.prefix_caching_block import (
+    PrefixCachingBlockAllocator)
 
 
 class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):

+ 2 - 2
aphrodite/processing/block_manager_v1.py

@@ -12,8 +12,8 @@ from loguru import logger
 from aphrodite.common.block import BlockTable, PhysicalTokenBlock
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.utils import Device
-from aphrodite.processing.block.utils import \
-    check_no_caching_or_swa_for_blockmgr_encdec
+from aphrodite.processing.block.utils import (
+    check_no_caching_or_swa_for_blockmgr_encdec)
 from aphrodite.processing.evictor_v1 import (EvictionPolicy, Evictor,
                                              make_evictor)
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager

+ 4 - 4
aphrodite/processing/block_manager_v2.py

@@ -7,13 +7,13 @@ from typing import Tuple
 from aphrodite.common.sequence import Sequence, SequenceGroup, SequenceStatus
 from aphrodite.common.utils import Device
 from aphrodite.processing.block.block_table import BlockTable
-from aphrodite.processing.block.cpu_gpu_block_allocator import \
-    CpuGpuBlockAllocator
+from aphrodite.processing.block.cpu_gpu_block_allocator import (
+    CpuGpuBlockAllocator)
 from aphrodite.processing.block.interfaces import Block
 from aphrodite.processing.block.prefix_caching_block import (
     ComputedBlocksTracker, LastAccessBlocksTracker)
-from aphrodite.processing.block.utils import \
-    check_no_caching_or_swa_for_blockmgr_encdec
+from aphrodite.processing.block.utils import (
+    check_no_caching_or_swa_for_blockmgr_encdec)
 from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
 
 SeqId = int

+ 6 - 6
aphrodite/processing/interfaces.py

@@ -28,18 +28,18 @@ class BlockSpaceManager(ABC):
         version = version.lower()
 
         if version == "v1":
-            from aphrodite.processing.block_manager_v1 import \
-                BlockSpaceManagerV1
+            from aphrodite.processing.block_manager_v1 import (
+                BlockSpaceManagerV1)
             return BlockSpaceManagerV1
 
         if version == "v2":
-            from aphrodite.processing.block_manager_v2 import \
-                BlockSpaceManagerV2
+            from aphrodite.processing.block_manager_v2 import (
+                BlockSpaceManagerV2)
             return BlockSpaceManagerV2
 
         if version == "embedding":
-            from aphrodite.processing.embedding_model_block_manager import \
-                EmbeddingModelBlockSpaceManager
+            from aphrodite.processing.embedding_model_block_manager import (
+                EmbeddingModelBlockSpaceManager)
             return EmbeddingModelBlockSpaceManager
 
         raise ValueError(f"Unknown version {version=}")

+ 2 - 2
aphrodite/prompt_adapter/layers.py

@@ -6,8 +6,8 @@ from torch import nn
 
 from aphrodite.adapter_commons.layers import AdapterMapping
 from aphrodite.common.config import PromptAdapterConfig
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 
 
 @dataclass

+ 2 - 2
aphrodite/prompt_adapter/models.py

@@ -12,8 +12,8 @@ from aphrodite.adapter_commons.utils import (add_adapter, deactivate_adapter,
                                              remove_adapter,
                                              set_adapter_mapping)
 from aphrodite.common.config import PromptAdapterConfig
-from aphrodite.prompt_adapter.layers import \
-    VocabParallelEmbeddingWithPromptAdapter  # yapf: disable
+from aphrodite.prompt_adapter.layers import (
+    VocabParallelEmbeddingWithPromptAdapter)  # yapf: disable
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 
 logger = logging.getLogger(__name__)

+ 2 - 2
aphrodite/quantization/__init__.py

@@ -6,8 +6,8 @@ from aphrodite.quantization.awq import AWQConfig
 from aphrodite.quantization.awq_marlin import AWQMarlinConfig
 from aphrodite.quantization.base_config import QuantizationConfig
 from aphrodite.quantization.bitsandbytes import BitsAndBytesConfig
-from aphrodite.quantization.compressed_tensors.compressed_tensors import \
-    CompressedTensorsConfig
+from aphrodite.quantization.compressed_tensors.compressed_tensors import (
+    CompressedTensorsConfig)
 from aphrodite.quantization.deepspeedfp import DeepSpeedFPConfig
 from aphrodite.quantization.eetq import EETQConfig
 from aphrodite.quantization.exl2 import Exl2Config

+ 2 - 2
aphrodite/quantization/compressed_tensors/compressed_tensors.py

@@ -57,8 +57,8 @@ class CompressedTensorsConfig(QuantizationConfig):
         layer: torch.nn.Module,
         prefix: str,
     ) -> Optional["QuantizeMethodBase"]:
-        from aphrodite.attention.layer import \
-            Attention  # Avoid circular import
+        from aphrodite.attention.layer import (
+            Attention)  # Avoid circular import
         if isinstance(layer, LinearBase):
             return CompressedTensorsLinearMethod(self)
         if isinstance(layer, Attention):

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_unquantized.py

@@ -5,8 +5,8 @@ import torch.nn.functional as F
 from torch.nn import Parameter
 
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 
 __all__ = ["CompressedTensorsUnquantized"]
 

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py

@@ -5,8 +5,8 @@ from torch.nn import Parameter
 
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 from aphrodite.quantization.gptq_marlin_24 import (GPTQ_MARLIN_24_MAX_PARALLEL,
                                                    GPTQ_MARLIN_24_MIN_THREAD_N)
 from aphrodite.scalar_type import scalar_types

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py

@@ -3,10 +3,10 @@ from typing import Callable, List, Optional
 import torch
 
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.marlin_utils_fp8 import (
     apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin)
 from aphrodite.quantization.utils.w8a8_utils import (

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py

@@ -4,10 +4,10 @@ import torch
 from torch.nn import Parameter
 
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.w8a8_utils import (
     apply_fp8_linear, create_per_channel_scale_param,
     create_per_tensor_scale_param, cutlass_fp8_supported,

+ 4 - 4
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py

@@ -4,10 +4,10 @@ import torch
 from torch.nn import Parameter
 
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
-from aphrodite.quantization.compressed_tensors.utils import \
-    QuantizationStrategy
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
+from aphrodite.quantization.compressed_tensors.utils import (
+    QuantizationStrategy)
 from aphrodite.quantization.utils.w8a8_utils import (
     apply_int8_linear, convert_to_channelwise, create_per_channel_scale_param,
     create_per_tensor_scale_param)

+ 2 - 2
aphrodite/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py

@@ -5,8 +5,8 @@ from torch.nn import Parameter
 
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.utils import set_weight_attrs
-from aphrodite.quantization.compressed_tensors.schemes import \
-    CompressedTensorsScheme
+from aphrodite.quantization.compressed_tensors.schemes import (
+    CompressedTensorsScheme)
 from aphrodite.quantization.utils.marlin_utils import (
     apply_gptq_marlin_linear, marlin_make_empty_g_idx, marlin_make_workspace,
     marlin_permute_scales, replace_tensor, verify_marlin_supported,

+ 2 - 2
aphrodite/quantization/fp8.py

@@ -76,8 +76,8 @@ class Fp8Config(QuantizationConfig):
 
     def get_quant_method(self, layer: torch.nn.Module,
                          prefix: str) -> Optional["QuantizeMethodBase"]:
-        from aphrodite.attention.layer import \
-            Attention  # Avoid circular import
+        from aphrodite.attention.layer import (
+            Attention)  # Avoid circular import
 
         if isinstance(layer, LinearBase):
             if is_layer_skipped(prefix, self.ignored_layers):

+ 2 - 2
aphrodite/quantization/gguf.py

@@ -6,8 +6,8 @@ from torch.nn.parameter import Parameter, UninitializedParameter
 
 from aphrodite import _custom_ops as ops
 from aphrodite.modeling.layers.linear import LinearBase, LinearMethodBase
-from aphrodite.modeling.layers.vocab_parallel_embedding import \
-    VocabParallelEmbedding
+from aphrodite.modeling.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from aphrodite.modeling.utils import set_weight_attrs
 from aphrodite.quantization.base_config import (QuantizationConfig,
                                                 QuantizeMethodBase)

+ 2 - 2
aphrodite/spec_decode/draft_model_runner.py

@@ -9,8 +9,8 @@ try:
     from aphrodite.attention.backends.flash_attn import FlashAttentionMetadata
 except ModuleNotFoundError:
     # aphrodite_flash_attn is not installed, use the identical ROCm FA metadata
-    from aphrodite.attention.backends.rocm_flash_attn import \
-        ROCmFlashAttentionMetadata as FlashAttentionMetadata
+    from aphrodite.attention.backends.rocm_flash_attn import (
+        ROCmFlashAttentionMetadata as FlashAttentionMetadata)
 
 try:
     from flashinfer import BatchDecodeWithPagedKVCacheWrapper

+ 2 - 2
aphrodite/spec_decode/metrics.py

@@ -5,8 +5,8 @@ from typing import Callable, Optional
 import torch
 
 from aphrodite.common.utils import is_pin_memory_available
-from aphrodite.modeling.layers.spec_decode_base_sampler import \
-    SpecDecodeBaseSampler
+from aphrodite.modeling.layers.spec_decode_base_sampler import (
+    SpecDecodeBaseSampler)
 
 
 @dataclass

+ 4 - 4
aphrodite/spec_decode/spec_decode_worker.py

@@ -15,8 +15,8 @@ from aphrodite.distributed.communication_op import broadcast_tensor_dict
 from aphrodite.modeling.layers.rejection_sampler import RejectionSampler
 from aphrodite.modeling.layers.spec_decode_base_sampler import (
     SpecDecodeBaseSampler, SpecDecodeStochasticBaseSampler)
-from aphrodite.modeling.layers.typical_acceptance_sampler import \
-    TypicalAcceptanceSampler
+from aphrodite.modeling.layers.typical_acceptance_sampler import (
+    TypicalAcceptanceSampler)
 from aphrodite.spec_decode.batch_expansion import BatchExpansionTop1Scorer
 from aphrodite.spec_decode.draft_model_runner import TP1DraftModelRunner
 from aphrodite.spec_decode.interfaces import (SpeculativeProposals,
@@ -28,8 +28,8 @@ from aphrodite.spec_decode.mlp_speculator_worker import MLPSpeculatorWorker
 from aphrodite.spec_decode.multi_step_worker import MultiStepWorker
 from aphrodite.spec_decode.ngram_worker import NGramWorker
 from aphrodite.spec_decode.proposer_worker_base import ProposerWorkerBase
-from aphrodite.spec_decode.smaller_tp_proposer_worker import \
-    SmallerTpProposerWorker
+from aphrodite.spec_decode.smaller_tp_proposer_worker import (
+    SmallerTpProposerWorker)
 from aphrodite.spec_decode.target_model_runner import TargetModelRunner
 from aphrodite.spec_decode.util import (Timer, create_sequence_group_output,
                                         get_all_num_logprobs,

+ 2 - 2
aphrodite/task_handler/model_runner.py

@@ -53,8 +53,8 @@ from aphrodite.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
                                   MultiModalInputs)
 from aphrodite.prompt_adapter.layers import PromptAdapterMapping
 from aphrodite.prompt_adapter.request import PromptAdapterRequest
-from aphrodite.prompt_adapter.worker_manager import \
-    LRUCacheWorkerPromptAdapterManager
+from aphrodite.prompt_adapter.worker_manager import (
+    LRUCacheWorkerPromptAdapterManager)
 from aphrodite.task_handler.model_runner_base import (
     ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
     _add_attn_metadata_broadcastable_dict,

+ 2 - 2
aphrodite/transformers_utils/config.py

@@ -5,8 +5,8 @@ from typing import Dict, Optional, Type, Union
 
 from loguru import logger
 from transformers import GenerationConfig, PretrainedConfig
-from transformers.models.auto.modeling_auto import \
-    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+from transformers.models.auto.modeling_auto import (
+    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
 
 from aphrodite.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                                   InternVLChatConfig,

+ 2 - 2
aphrodite/transformers_utils/configs/__init__.py

@@ -7,8 +7,8 @@ from aphrodite.transformers_utils.configs.falcon import RWConfig
 from aphrodite.transformers_utils.configs.internvl import InternVLChatConfig
 from aphrodite.transformers_utils.configs.jais import JAISConfig
 from aphrodite.transformers_utils.configs.medusa import MedusaConfig
-from aphrodite.transformers_utils.configs.mlp_speculator import \
-    MLPSpeculatorConfig
+from aphrodite.transformers_utils.configs.mlp_speculator import (
+    MLPSpeculatorConfig)
 from aphrodite.transformers_utils.configs.mpt import MPTConfig
 
 __all__ = [

+ 2 - 2
aphrodite/transformers_utils/detokenizer.py

@@ -4,8 +4,8 @@ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
 
 from aphrodite.common.sequence import (Logprob, SamplingParams, Sequence,
                                        SequenceGroup)
-from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-    BaseTokenizerGroup
+from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
 
 # Used eg. for marking rejected tokens in spec decoding.
 INVALID_TOKEN_ID = -1

+ 2 - 2
aphrodite/transformers_utils/tokenizer_group/__init__.py

@@ -8,8 +8,8 @@ from .base_tokenizer_group import AnyTokenizer, BaseTokenizerGroup
 from .tokenizer_group import TokenizerGroup
 
 if ray:
-    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import \
-        RayTokenizerGroupPool  # noqa E501
+    from aphrodite.transformers_utils.tokenizer_group.ray_tokenizer_group import (  # noqa E501
+        RayTokenizerGroupPool)
 else:
     RayTokenizerGroupPool = None  # type: ignore
 

+ 4 - 4
aphrodite/transformers_utils/tokenizer_group/ray_tokenizer_group.py

@@ -15,10 +15,10 @@ from transformers import PreTrainedTokenizer
 from aphrodite.common.config import TokenizerPoolConfig
 from aphrodite.executor.ray_utils import ray
 from aphrodite.lora.request import LoRARequest
-from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import \
-    BaseTokenizerGroup
-from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import \
-    TokenizerGroup
+from aphrodite.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+from aphrodite.transformers_utils.tokenizer_group.tokenizer_group import (
+    TokenizerGroup)
 
 
 class RayTokenizerGroupPool(BaseTokenizerGroup):

+ 2 - 2
aphrodite/triton_utils/__init__.py

@@ -4,8 +4,8 @@ __all__ = ["HAS_TRITON"]
 
 if HAS_TRITON:
 
-    from aphrodite.triton_utils.custom_cache_manager import \
-        maybe_set_triton_cache_manager
+    from aphrodite.triton_utils.custom_cache_manager import (
+        maybe_set_triton_cache_manager)
     from aphrodite.triton_utils.libentry import libentry
 
     __all__ += ["maybe_set_triton_cache_manager", "libentry"]

+ 12 - 10
examples/aphrodite_engine_example.py

@@ -1,6 +1,6 @@
 import argparse
 
-from aphrodite import EngineArgs, AphroditeEngine, SamplingParams
+from aphrodite import AphroditeEngine, EngineArgs, SamplingParams
 
 
 def main(args: argparse.Namespace):
@@ -12,14 +12,16 @@ def main(args: argparse.Namespace):
     test_prompts = [
         ("<|system|>Enter chat mode.<|user|>Hello!<|model|>",
          SamplingParams(temperature=0.0)),
-        ("<|system|>Enter RP mode.<|model|>Hello!<|user|>What are you doing?<|model|>",
-         SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
-        ("<|system|>Enter chat mode.<|user|>What is the meaning of life?<|model|>",
-         SamplingParams(n=2,
-                        best_of=5,
-                        temperature=0.8,
-                        top_p=0.95,
-                        frequency_penalty=0.1)),
+        (
+            "<|system|>Enter RP mode.<|model|>Hello!<|user|>What are you doing?<|model|>",  # noqa: E501
+            SamplingParams(temperature=0.8, top_k=5, presence_penalty=0.2)),
+        (
+            "<|system|>Enter chat mode.<|user|>What is the meaning of life?<|model|>",  # noqa: E501
+            SamplingParams(n=2,
+                           best_of=5,
+                           temperature=0.8,
+                           top_p=0.95,
+                           frequency_penalty=0.1)),
         ("<|system|>Enter QA mode.<|user|>What is a man?<|model|>A miserable",
          SamplingParams(n=3, best_of=3, use_beam_search=True,
                         temperature=0.0)),
@@ -48,4 +50,4 @@ if __name__ == '__main__':
         description='Demo on using the AphroditeEngine class directly')
     parser = EngineArgs.add_cli_args(parser)
     args = parser.parse_args()
-    main(args)
+    main(args)

+ 2 - 0
pyproject.toml

@@ -41,6 +41,8 @@ ignore = [
     "E731",
     # Loop control variable not used within loop body
     "B007",
+    # f-strings in logger
+    "G004",
 ]
 
 [tool.codespell]

+ 2 - 2
tests/benchmarks/kernels/marlin.py

@@ -14,8 +14,8 @@ from aphrodite.quantization.utils.marlin_utils import (
     GPTQ_MARLIN_SUPPORTED_GROUP_SIZES, GPTQ_MARLIN_SUPPORTED_NUM_BITS)
 from aphrodite.quantization.utils.marlin_utils_test import (MarlinWorkspace,
                                                             marlin_quantize)
-from aphrodite.quantization.utils.marlin_utils_test_24 import \
-    marlin_24_quantize
+from aphrodite.quantization.utils.marlin_utils_test_24 import (
+    marlin_24_quantize)
 from aphrodite.quantization.utils.quant_utils import (gptq_pack,
                                                       quantize_weights,
                                                       sort_weights)