from aioprometheus import Gauge # The begin-* and end* here are used by the documentation generator # to extract the metrics definitions. # begin-metrics-definitions gauge_avg_prompt_throughput = Gauge( "aphrodite:avg_prompt_throughput_toks_per_s", "Average prefill throughput in tokens/s.") gauge_avg_generation_throughput = Gauge( "aphrodite:avg_generation_throughput_toks_per_s", "Average generation throughput in tokens/s.") gauge_scheduler_running = Gauge( "aphrodite:num_requests_running", "Number of requests that is currently running for inference.") gauge_scheduler_swapped = Gauge("aphrodite:num_requests_swapped", "Number requests swapped to CPU.") gauge_scheduler_waiting = Gauge("aphrodite:num_requests_waiting", "Number of requests waiting to be processed.") gauge_gpu_cache_usage = Gauge( "aphrodite:gpu_cache_usage_perc", "GPU KV-cache usage. 1 means 100 percent usage.") gauge_cpu_cache_usage = Gauge( "aphrodite:cpu_cache_usage_perc", "CPU KV-cache usage. 1 means 100 percent usage.") # end-metrics-definitions labels = {} def add_global_metrics_labels(**kwargs): labels.update(kwargs) def record_metrics( avg_prompt_throughput: float, avg_generation_throughput: float, scheduler_running: int, scheduler_swapped: int, scheduler_waiting: int, gpu_cache_usage: float, cpu_cache_usage: float, ): gauge_avg_prompt_throughput.set(labels, avg_prompt_throughput) gauge_avg_generation_throughput.set(labels, avg_generation_throughput) gauge_scheduler_running.set(labels, scheduler_running) gauge_scheduler_swapped.set(labels, scheduler_swapped) gauge_scheduler_waiting.set(labels, scheduler_waiting) gauge_gpu_cache_usage.set(labels, gpu_cache_usage) gauge_cpu_cache_usage.set(labels, cpu_cache_usage)