Browse Source

fix: guard for lora + chunked prefill

AlpinDale 7 months ago
parent
commit
ee174ea4fd
1 changed files with 2 additions and 0 deletions
  1. 2 0
      aphrodite/common/config.py

+ 2 - 0
aphrodite/common/config.py

@@ -1171,6 +1171,8 @@ class LoRAConfig:
                 "Due to limitations of the custom LoRA CUDA kernel, "
                 "max_num_batched_tokens must be <= 65528 when "
                 "LoRA is enabled.")
+        if scheduler_config.chunked_prefill_enabled:
+            raise ValueError("LoRA is not supported with chunked prefill yet.")
 
 
 @dataclass