12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- from abc import ABC, abstractmethod
- from typing import Dict, List, Optional
- from aphrodite.common.config import (
- CacheConfig,
- DeviceConfig,
- ModelConfig,
- ParallelConfig,
- SchedulerConfig,
- LoRAConfig,
- )
- from aphrodite.lora.request import LoRARequest
- from aphrodite.common.sequence import SamplerOutput, SequenceGroupMetadata
- class ExecutorBase(ABC):
- """Base class for all executors.
- An executor is responsible for executing the model on a specific device
- type (e.g., CPU, GPU, Neuron, etc.). Or it can be a distributed executor
- that can execute the model on multiple devices.
- """
- @abstractmethod
- def __init__(
- self,
- model_config: ModelConfig,
- cache_config: CacheConfig,
- parallel_config: ParallelConfig,
- scheduler_config: SchedulerConfig,
- device_config: DeviceConfig,
- lora_config: Optional[LoRAConfig],
- ) -> None:
- raise NotImplementedError
- @abstractmethod
- def execute_model(
- self,
- seq_group_metadata_list: List[SequenceGroupMetadata],
- blocks_to_swap_in: Dict[int, int],
- blocks_to_swap_out: Dict[int, int],
- blocks_to_copy: Dict[int, List[int]],
- ) -> SamplerOutput:
- """Executes one model step on the given sequences."""
- raise NotImplementedError
- @abstractmethod
- def add_lora(self, lora_request: LoRARequest) -> bool:
- raise NotImplementedError
- @abstractmethod
- def remove_lora(self, lora_id: int) -> bool:
- raise NotImplementedError
- @abstractmethod
- def list_loras(self) -> List[int]:
- raise NotImplementedError
- @abstractmethod
- def check_health(self) -> None:
- """Checks if the executor is healthy. If not, it should raise an
- exception."""
- raise NotImplementedError
- class ExecutorAsyncBase(ExecutorBase):
- @abstractmethod
- async def execute_model_async(
- self,
- seq_group_metadata_list: List[SequenceGroupMetadata],
- blocks_to_swap_in: Dict[int, int],
- blocks_to_swap_out: Dict[int, int],
- blocks_to_copy: Dict[int, List[int]],
- ) -> SamplerOutput:
- """Executes one model step on the given sequences."""
- raise NotImplementedError
- @abstractmethod
- async def check_health_async(self) -> None:
- """Checks if the executor is healthy. If not, it should raise an
- exception."""
- raise NotImplementedError
|