123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- """
- These types are defined in this file to avoid importing
- aphrodite.engine.metrics and therefore importing prometheus_client.
- This is required due to usage of Prometheus multiprocess mode to enable
- metrics after splitting out the uvicorn process from the engine process.
- Prometheus multiprocess mode requires setting PROMETHEUS_MULTIPROC_DIR
- before prometheus_client is imported. Typically, this is done by setting
- the env variable before launch, but since we are a library, we need to
- do this in Python code and lazily import prometheus_client.
- """
- import time
- from abc import ABC, abstractmethod
- from dataclasses import dataclass
- from typing import Dict, List, Optional, Protocol
- from aphrodite.spec_decode.metrics import SpecDecodeWorkerMetrics
- @dataclass
- class Stats:
- """Created by AphroditeEngine for use by StatLogger."""
- now: float
- # System stats (should have _sys suffix)
- # Scheduler State
- num_running_sys: int
- num_waiting_sys: int
- num_swapped_sys: int
- # KV Cache Usage in %
- gpu_cache_usage_sys: float
- cpu_cache_usage_sys: float
- # Prefix caching block hit rate
- cpu_prefix_cache_hit_rate: float
- gpu_prefix_cache_hit_rate: float
- # Iteration stats (should have _iter suffix)
- num_prompt_tokens_iter: int
- num_generation_tokens_iter: int
- time_to_first_tokens_iter: List[float]
- time_per_output_tokens_iter: List[float]
- num_preemption_iter: int
- # Request stats (should have _requests suffix)
- # Latency
- time_e2e_requests: List[float]
- # Metadata
- num_prompt_tokens_requests: List[int]
- num_generation_tokens_requests: List[int]
- best_of_requests: List[int]
- n_requests: List[int]
- finished_reason_requests: List[str]
- spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
- class SupportsMetricsInfo(Protocol):
- def metrics_info(self) -> Dict[str, str]:
- ...
- class StatLoggerBase(ABC):
- """Base class for StatLogger."""
- def __init__(self, local_interval: float) -> None:
- # Tracked stats over current local logging interval.
- self.num_prompt_tokens: List[int] = []
- self.num_generation_tokens: List[int] = []
- self.last_local_log = time.time()
- self.local_interval = local_interval
- self.spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
- @abstractmethod
- def log(self, stats: Stats) -> None:
- raise NotImplementedError
- @abstractmethod
- def info(self, type: str, obj: SupportsMetricsInfo) -> None:
- raise NotImplementedError
- def maybe_update_spec_decode_metrics(self, stats: Stats):
- """Save spec decode metrics (since they are unlikely
- to be emitted at same time as log interval)."""
- if stats.spec_decode_metrics is not None:
- self.spec_decode_metrics = stats.spec_decode_metrics
|