metrics_types.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. """
  2. These types are defined in this file to avoid importing
  3. aphrodite.engine.metrics and therefore importing prometheus_client.
  4. This is required due to usage of Prometheus multiprocess mode to enable
  5. metrics after splitting out the uvicorn process from the engine process.
  6. Prometheus multiprocess mode requires setting PROMETHEUS_MULTIPROC_DIR
  7. before prometheus_client is imported. Typically, this is done by setting
  8. the env variable before launch, but since we are a library, we need to
  9. do this in Python code and lazily import prometheus_client.
  10. """
  11. import time
  12. from abc import ABC, abstractmethod
  13. from dataclasses import dataclass
  14. from typing import Dict, List, Optional, Protocol
  15. from aphrodite.spec_decode.metrics import SpecDecodeWorkerMetrics
  16. @dataclass
  17. class Stats:
  18. """Created by AphroditeEngine for use by StatLogger."""
  19. now: float
  20. # System stats (should have _sys suffix)
  21. # Scheduler State
  22. num_running_sys: int
  23. num_waiting_sys: int
  24. num_swapped_sys: int
  25. # KV Cache Usage in %
  26. gpu_cache_usage_sys: float
  27. cpu_cache_usage_sys: float
  28. # Prefix caching block hit rate
  29. cpu_prefix_cache_hit_rate: float
  30. gpu_prefix_cache_hit_rate: float
  31. # Iteration stats (should have _iter suffix)
  32. num_prompt_tokens_iter: int
  33. num_generation_tokens_iter: int
  34. time_to_first_tokens_iter: List[float]
  35. time_per_output_tokens_iter: List[float]
  36. num_preemption_iter: int
  37. # Request stats (should have _requests suffix)
  38. # Latency
  39. time_e2e_requests: List[float]
  40. # Metadata
  41. num_prompt_tokens_requests: List[int]
  42. num_generation_tokens_requests: List[int]
  43. best_of_requests: List[int]
  44. n_requests: List[int]
  45. finished_reason_requests: List[str]
  46. spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
  47. class SupportsMetricsInfo(Protocol):
  48. def metrics_info(self) -> Dict[str, str]:
  49. ...
  50. class StatLoggerBase(ABC):
  51. """Base class for StatLogger."""
  52. def __init__(self, local_interval: float) -> None:
  53. # Tracked stats over current local logging interval.
  54. self.num_prompt_tokens: List[int] = []
  55. self.num_generation_tokens: List[int] = []
  56. self.last_local_log = time.time()
  57. self.local_interval = local_interval
  58. self.spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
  59. @abstractmethod
  60. def log(self, stats: Stats) -> None:
  61. raise NotImplementedError
  62. @abstractmethod
  63. def info(self, type: str, obj: SupportsMetricsInfo) -> None:
  64. raise NotImplementedError
  65. def maybe_update_spec_decode_metrics(self, stats: Stats):
  66. """Save spec decode metrics (since they are unlikely
  67. to be emitted at same time as log interval)."""
  68. if stats.spec_decode_metrics is not None:
  69. self.spec_decode_metrics = stats.spec_decode_metrics