david
/
aphrodite-engine
镜像来自 https://github.com/PygmalionAI/aphrodite-engine


			
							123456789101112131415161718
							from loguru import logger


def check_block_size_valid(num_gpu_blocks, block_size, max_model_len) -> None:
    if num_gpu_blocks <= 0:
        raise ValueError("No available memory for the cache blocks. "
                         "Try increasing `gpu_memory_utilization` when "
                         "initializing the engine.")
    max_seq_len = block_size * num_gpu_blocks
    logger.info(f"Maximum sequence length allowed in the cache: "
                f"{max_seq_len}")
    if max_model_len > max_seq_len:
        raise ValueError(
            f"The model's max seq len ({max_model_len}) "
            "is larger than the maximum number of tokens that can be "
            f"stored in KV cache ({max_seq_len}). Try increasing "
            "`gpu_memory_utilization` or decreasing `max_model_len` when "
            "initializing the engine.")