Bläddra i källkod

feat: enforce the max possible seqlen

AlpinDale 7 månader sedan
förälder
incheckning
eb2c5c77df
1 ändrade filer med 19 tillägg och 5 borttagningar
  1. 19 5
      aphrodite/task_handler/worker.py

+ 19 - 5
aphrodite/task_handler/worker.py

@@ -374,9 +374,23 @@ def raise_if_cache_size_invalid(num_gpu_blocks, block_size,
     logger.info(f"Maximum sequence length allowed in the cache: "
     logger.info(f"Maximum sequence length allowed in the cache: "
                 f"{max_seq_len}")
                 f"{max_seq_len}")
     if max_model_len > max_seq_len:
     if max_model_len > max_seq_len:
-        raise ValueError(
-            f"The model's max seq len ({max_model_len}) "
+        original_max_model_len = max_model_len
+        max_model_len = max_seq_len
+        # raise ValueError(
+        #     f"The model's max seq len ({max_model_len}) "
+        #     "is larger than the maximum number of tokens that can be "
+        #     f"stored in KV cache ({max_seq_len}). Try increasing "
+        #     "`gpu_memory_utilization` or decreasing `max_model_len` when "
+        #     "initializing the engine.")
+        # set the max_model_len to the max_seq_len, but raise a logger.error
+        # so the user is made aware of this
+        logger.error(
+            f"The model's max seq len ({original_max_model_len}) "
             "is larger than the maximum number of tokens that can be "
             "is larger than the maximum number of tokens that can be "
-            f"stored in KV cache ({max_seq_len}). Try increasing "
-            "`gpu_memory_utilization` or decreasing `max_model_len` when "
-            "initializing the engine.")
+            f"stored in KV cache ({max_seq_len}). "
+            "Try increasing "
+            "`gpu_memory_utilization`, setting "
+            "`--enable-chunked-prefill`, or `--kv-cache-dtype fp8` "
+            "when initializing the engine. The last two are currently "
+            "mutually exclusive.\n"
+            f"Forcing max_model_len to {max_seq_len}.")