{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "description": "Monitoring Aphrodite Inference Server", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 29, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "End to end request latency measured in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, "id": 9, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "rate(aphrodite:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Average", "range": true, "refId": "E" } ], "title": "E2E Request Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Number of tokens processed per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, "id": 8, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "rate(aphrodite:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Prompt Tokens/Sec", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "rate(aphrodite:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "Generation Tokens/Sec", "range": true, "refId": "B", "useBackend": false } ], "title": "Token Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Inter token latency in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "rate(aphrodite:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Mean", "range": true, "refId": "E" } ], "title": "Time Per Output Token Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Number of requests in RUNNING, WAITING, and SWAPPED state", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "id": 3, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "aphrodite:num_requests_running{model_name=\"$model_name\"}", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Running", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "aphrodite:num_requests_swapped{model_name=\"$model_name\"}", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Swapped", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "aphrodite:num_requests_waiting{model_name=\"$model_name\"}", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Num Waiting", "range": true, "refId": "C", "useBackend": false } ], "title": "Scheduler State", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "P50, P90, P95, and P99 TTFT latency in seconds.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P95", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P90", "range": true, "refId": "C", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, "legendFormat": "P50", "range": true, "refId": "D", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "rate(aphrodite:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Average", "range": true, "refId": "E" } ], "title": "Time To First Token Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Percentage of used cache blocks by Aphrodite.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "id": 4, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "aphrodite:gpu_cache_usage_perc{model_name=\"$model_name\"}", "instant": false, "legendFormat": "GPU Cache Usage", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "aphrodite:cpu_cache_usage_perc{model_name=\"$model_name\"}", "hide": false, "instant": false, "legendFormat": "CPU Cache Usage", "range": true, "refId": "B" } ], "title": "Cache Utilization", "type": "timeseries" }, { "type": "heatmap", "title": "Request Prompt Length", "description": "Heatmap of request prompt length", "gridPos": { "x": 0, "y": 24, "w": 12, "h": 8 }, "datasource": { "uid": "prometheus", "type": "prometheus" }, "id": 12, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "refId": "A", "expr": "sum by(le) (increase(aphrodite:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", "range": true, "instant": false, "editorMode": "builder", "legendFormat": "{{le}}", "useBackend": false, "disableTextWrap": false, "fullMetaSearch": false, "includeNullMetadata": true, "format": "heatmap" } ], "options": { "calculate": false, "yAxis": { "axisPlacement": "left", "reverse": false, "unit": "none", "axisLabel": "Prompt Length" }, "rowsFrame": { "layout": "auto", "value": "Request count" }, "color": { "mode": "scheme", "fill": "dark-orange", "scale": "exponential", "exponent": 0.5, "scheme": "Spectral", "steps": 64, "reverse": false, "min": 0 }, "cellGap": 1, "filterValues": { "le": 1e-9 }, "tooltip": { "show": true, "yHistogram": true }, "legend": { "show": true }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "cellValues": { "unit": "none" } }, "fieldConfig": { "defaults": { "custom": { "scaleDistribution": { "type": "linear" }, "hideFrom": { "tooltip": false, "viz": false, "legend": false } } }, "overrides": [] }, "pluginVersion": "10.2.0" }, { "datasource": { "uid": "prometheus", "type": "prometheus" }, "type": "heatmap", "title": "Request Generation Length", "description": "Heatmap of request generation length", "gridPos": { "x": 12, "y": 24, "w": 12, "h": 8 }, "id": 13, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "refId": "A", "expr": "sum by(le) (increase(aphrodite:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", "range": true, "instant": false, "editorMode": "builder", "legendFormat": "{{le}}", "useBackend": false, "disableTextWrap": false, "fullMetaSearch": false, "includeNullMetadata": true, "format": "heatmap" } ], "options": { "calculate": false, "yAxis": { "axisPlacement": "left", "reverse": false, "unit": "none", "axisLabel": "Generation Length" }, "rowsFrame": { "layout": "auto", "value": "Request count" }, "color": { "mode": "scheme", "fill": "dark-orange", "scale": "exponential", "exponent": 0.5, "scheme": "Spectral", "steps": 64, "reverse": false, "min": 0 }, "cellGap": 1, "filterValues": { "le": 1e-9 }, "tooltip": { "show": true, "yHistogram": true }, "legend": { "show": true }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "cellValues": { "unit": "none" } }, "fieldConfig": { "defaults": { "custom": { "scaleDistribution": { "type": "linear" }, "hideFrom": { "tooltip": false, "viz": false, "legend": false } } }, "overrides": [] }, "pluginVersion": "10.2.0" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "lineInterpolation": "linear", "barAlignment": 0, "lineWidth": 1, "fillOpacity": 0, "gradientMode": "none", "spanNulls": false, "insertNulls": false, "showPoints": "auto", "pointSize": 5, "stacking": { "mode": "none", "group": "A" }, "axisPlacement": "auto", "axisLabel": "", "axisColorMode": "text", "axisBorderShow": false, "scaleDistribution": { "type": "linear" }, "axisCenteredZero": false, "hideFrom": { "tooltip": false, "viz": false, "legend": false }, "thresholdsStyle": { "mode": "off" } }, "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 32 }, "id": 11, "options": { "tooltip": { "mode": "single", "sort": "none" }, "legend": { "showLegend": true, "displayMode": "list", "placement": "bottom", "calcs": [] } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum by(finished_reason) (increase(aphrodite:request_success_total{model_name=\"$model_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "interval": "", "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Finish Reason", "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.", "type": "timeseries" } ], "refresh": "", "schemaVersion": 37, "style": "dark", "tags": [], "templating": { "list": [ { "current": { "selected": false, "text": "aphrodite", "value": "aphrodite" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, "definition": "label_values(model_name)", "hide": 0, "includeAll": false, "label": "model_name", "multi": false, "name": "model_name", "options": [], "query": { "query": "label_values(model_name)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" } ] }, "time": { "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Aphrodite Engine", "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b", "version": 2, "weekStart": "" }