metrics.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from aioprometheus import Gauge
  2. # The begin-* and end* here are used by the documentation generator
  3. # to extract the metrics definitions.
  4. # begin-metrics-definitions
  5. gauge_avg_prompt_throughput = Gauge(
  6. "aphrodite:avg_prompt_throughput_toks_per_s",
  7. "Average prefill throughput in tokens/s.")
  8. gauge_avg_generation_throughput = Gauge(
  9. "aphrodite:avg_generation_throughput_toks_per_s",
  10. "Average generation throughput in tokens/s.")
  11. gauge_scheduler_running = Gauge(
  12. "aphrodite:num_requests_running",
  13. "Number of requests that is currently running for inference.")
  14. gauge_scheduler_swapped = Gauge("aphrodite:num_requests_swapped",
  15. "Number requests swapped to CPU.")
  16. gauge_scheduler_waiting = Gauge("aphrodite:num_requests_waiting",
  17. "Number of requests waiting to be processed.")
  18. gauge_gpu_cache_usage = Gauge(
  19. "aphrodite:gpu_cache_usage_perc",
  20. "GPU KV-cache usage. 1 means 100 percent usage.")
  21. gauge_cpu_cache_usage = Gauge(
  22. "aphrodite:cpu_cache_usage_perc",
  23. "CPU KV-cache usage. 1 means 100 percent usage.")
  24. # end-metrics-definitions
  25. labels = {}
  26. def add_global_metrics_labels(**kwargs):
  27. labels.update(kwargs)
  28. def record_metrics(
  29. avg_prompt_throughput: float,
  30. avg_generation_throughput: float,
  31. scheduler_running: int,
  32. scheduler_swapped: int,
  33. scheduler_waiting: int,
  34. gpu_cache_usage: float,
  35. cpu_cache_usage: float,
  36. ):
  37. gauge_avg_prompt_throughput.set(labels, avg_prompt_throughput)
  38. gauge_avg_generation_throughput.set(labels, avg_generation_throughput)
  39. gauge_scheduler_running.set(labels, scheduler_running)
  40. gauge_scheduler_swapped.set(labels, scheduler_swapped)
  41. gauge_scheduler_waiting.set(labels, scheduler_waiting)
  42. gauge_gpu_cache_usage.set(labels, gpu_cache_usage)
  43. gauge_cpu_cache_usage.set(labels, cpu_cache_usage)