metrics_types.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """
  2. These types are defined in this file to avoid importing
  3. aphrodite.engine.metrics and therefore importing prometheus_client.
  4. This is required due to usage of Prometheus multiprocess mode to enable
  5. metrics after splitting out the uvicorn process from the engine process.
  6. Prometheus multiprocess mode requires setting PROMETHEUS_MULTIPROC_DIR
  7. before prometheus_client is imported. Typically, this is done by setting
  8. the env variable before launch, but since we are a library, we need to
  9. do this in Python code and lazily import prometheus_client.
  10. """
  11. import time
  12. from abc import ABC, abstractmethod
  13. from dataclasses import dataclass
  14. from typing import Dict, List, Optional, Protocol
  15. from aphrodite.spec_decode.metrics import SpecDecodeWorkerMetrics
  16. @dataclass
  17. class Stats:
  18. """Created by AphroditeEngine for use by StatLogger."""
  19. now: float
  20. # System stats (should have _sys suffix)
  21. # Scheduler State
  22. num_running_sys: int
  23. num_waiting_sys: int
  24. num_swapped_sys: int
  25. # KV Cache Usage in %
  26. gpu_cache_usage_sys: float
  27. cpu_cache_usage_sys: float
  28. # Iteration stats (should have _iter suffix)
  29. num_prompt_tokens_iter: int
  30. num_generation_tokens_iter: int
  31. time_to_first_tokens_iter: List[float]
  32. time_per_output_tokens_iter: List[float]
  33. num_preemption_iter: int
  34. # Request stats (should have _requests suffix)
  35. # Latency
  36. time_e2e_requests: List[float]
  37. # Metadata
  38. num_prompt_tokens_requests: List[int]
  39. num_generation_tokens_requests: List[int]
  40. best_of_requests: List[int]
  41. n_requests: List[int]
  42. finished_reason_requests: List[str]
  43. spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
  44. class SupportsMetricsInfo(Protocol):
  45. def metrics_info(self) -> Dict[str, str]:
  46. ...
  47. class StatLoggerBase(ABC):
  48. """Base class for StatLogger."""
  49. def __init__(self, local_interval: float) -> None:
  50. # Tracked stats over current local logging interval.
  51. self.num_prompt_tokens: List[int] = []
  52. self.num_generation_tokens: List[int] = []
  53. self.last_local_log = time.time()
  54. self.local_interval = local_interval
  55. self.spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
  56. @abstractmethod
  57. def log(self, stats: Stats) -> None:
  58. raise NotImplementedError
  59. @abstractmethod
  60. def info(self, type: str, obj: SupportsMetricsInfo) -> None:
  61. raise NotImplementedError
  62. def maybe_update_spec_decode_metrics(self, stats: Stats):
  63. """Save spec decode metrics (since they are unlikely
  64. to be emitted at same time as log interval)."""
  65. if stats.spec_decode_metrics is not None:
  66. self.spec_decode_metrics = stats.spec_decode_metrics