123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254 |
- {
- "__inputs": [
- ],
- "__elements": {},
- "__requires": [
- {
- "type": "grafana",
- "id": "grafana",
- "name": "Grafana",
- "version": "10.4.2"
- },
- {
- "type": "panel",
- "id": "heatmap",
- "name": "Heatmap",
- "version": ""
- },
- {
- "type": "datasource",
- "id": "prometheus",
- "name": "Prometheus",
- "version": "1.0.0"
- },
- {
- "type": "panel",
- "id": "timeseries",
- "name": "Time series",
- "version": ""
- }
- ],
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": {
- "type": "grafana",
- "uid": "-- Grafana --"
- },
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "target": {
- "limit": 100,
- "matchAny": false,
- "tags": [],
- "type": "dashboard"
- },
- "type": "dashboard"
- }
- ]
- },
- "description": "Monitoring Aphrodite Inference Server",
- "editable": true,
- "fiscalYearStartMonth": 0,
- "graphTooltip": 0,
- "id": null,
- "links": [],
- "liveNow": false,
- "panels": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "End to end request latency measured in seconds.",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 0
- },
- "id": 9,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P99",
- "range": true,
- "refId": "A",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P95",
- "range": true,
- "refId": "B",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P90",
- "range": true,
- "refId": "C",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P50",
- "range": true,
- "refId": "D",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "editorMode": "code",
- "expr": "rate(aphrodite:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
- "hide": false,
- "instant": false,
- "legendFormat": "Average",
- "range": true,
- "refId": "E"
- }
- ],
- "title": "E2E Request Latency",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Number of tokens processed per second",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 0
- },
- "id": 8,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "rate(aphrodite:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
- "fullMetaSearch": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "Prompt Tokens/Sec",
- "range": true,
- "refId": "A",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "rate(aphrodite:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "Generation Tokens/Sec",
- "range": true,
- "refId": "B",
- "useBackend": false
- }
- ],
- "title": "Token Throughput",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Inter token latency in seconds.",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 8
- },
- "id": 10,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P99",
- "range": true,
- "refId": "A",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P95",
- "range": true,
- "refId": "B",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P90",
- "range": true,
- "refId": "C",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P50",
- "range": true,
- "refId": "D",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "editorMode": "code",
- "expr": "rate(aphrodite:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
- "hide": false,
- "instant": false,
- "legendFormat": "Mean",
- "range": true,
- "refId": "E"
- }
- ],
- "title": "Time Per Output Token Latency",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "none"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 8
- },
- "id": 3,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "aphrodite:num_requests_running{model_name=\"$model_name\"}",
- "fullMetaSearch": false,
- "includeNullMetadata": true,
- "instant": false,
- "legendFormat": "Num Running",
- "range": true,
- "refId": "A",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "aphrodite:num_requests_swapped{model_name=\"$model_name\"}",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": true,
- "instant": false,
- "legendFormat": "Num Swapped",
- "range": true,
- "refId": "B",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "aphrodite:num_requests_waiting{model_name=\"$model_name\"}",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": true,
- "instant": false,
- "legendFormat": "Num Waiting",
- "range": true,
- "refId": "C",
- "useBackend": false
- }
- ],
- "title": "Scheduler State",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 16
- },
- "id": 5,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P99",
- "range": true,
- "refId": "A",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P95",
- "range": true,
- "refId": "B",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P90",
- "range": true,
- "refId": "C",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
- "fullMetaSearch": false,
- "hide": false,
- "includeNullMetadata": false,
- "instant": false,
- "legendFormat": "P50",
- "range": true,
- "refId": "D",
- "useBackend": false
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "editorMode": "code",
- "expr": "rate(aphrodite:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
- "hide": false,
- "instant": false,
- "legendFormat": "Average",
- "range": true,
- "refId": "E"
- }
- ],
- "title": "Time To First Token Latency",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Percentage of used cache blocks by Aphrodite.",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "percentunit"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 16
- },
- "id": 4,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "editorMode": "code",
- "expr": "aphrodite:gpu_cache_usage_perc{model_name=\"$model_name\"}",
- "instant": false,
- "legendFormat": "GPU Cache Usage",
- "range": true,
- "refId": "A"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "editorMode": "code",
- "expr": "aphrodite:cpu_cache_usage_perc{model_name=\"$model_name\"}",
- "hide": false,
- "instant": false,
- "legendFormat": "CPU Cache Usage",
- "range": true,
- "refId": "B"
- }
- ],
- "title": "Cache Utilization",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Heatmap of request prompt length",
- "fieldConfig": {
- "defaults": {
- "custom": {
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "scaleDistribution": {
- "type": "linear"
- }
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 24
- },
- "id": 12,
- "options": {
- "calculate": false,
- "cellGap": 1,
- "cellValues": {
- "unit": "none"
- },
- "color": {
- "exponent": 0.5,
- "fill": "dark-orange",
- "min": 0,
- "mode": "scheme",
- "reverse": false,
- "scale": "exponential",
- "scheme": "Spectral",
- "steps": 64
- },
- "exemplars": {
- "color": "rgba(255,0,255,0.7)"
- },
- "filterValues": {
- "le": 1e-9
- },
- "legend": {
- "show": true
- },
- "rowsFrame": {
- "layout": "auto",
- "value": "Request count"
- },
- "tooltip": {
- "mode": "single",
- "showColorScale": false,
- "yHistogram": true
- },
- "yAxis": {
- "axisLabel": "Prompt Length",
- "axisPlacement": "left",
- "reverse": false,
- "unit": "none"
- }
- },
- "pluginVersion": "10.4.2",
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "sum by(le) (increase(aphrodite:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
- "format": "heatmap",
- "fullMetaSearch": false,
- "includeNullMetadata": true,
- "instant": false,
- "legendFormat": "{{le}}",
- "range": true,
- "refId": "A",
- "useBackend": false
- }
- ],
- "title": "Request Prompt Length",
- "type": "heatmap"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Heatmap of request generation length",
- "fieldConfig": {
- "defaults": {
- "custom": {
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "scaleDistribution": {
- "type": "linear"
- }
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 24
- },
- "id": 13,
- "options": {
- "calculate": false,
- "cellGap": 1,
- "cellValues": {
- "unit": "none"
- },
- "color": {
- "exponent": 0.5,
- "fill": "dark-orange",
- "min": 0,
- "mode": "scheme",
- "reverse": false,
- "scale": "exponential",
- "scheme": "Spectral",
- "steps": 64
- },
- "exemplars": {
- "color": "rgba(255,0,255,0.7)"
- },
- "filterValues": {
- "le": 1e-9
- },
- "legend": {
- "show": true
- },
- "rowsFrame": {
- "layout": "auto",
- "value": "Request count"
- },
- "tooltip": {
- "mode": "single",
- "showColorScale": false,
- "yHistogram": true
- },
- "yAxis": {
- "axisLabel": "Generation Length",
- "axisPlacement": "left",
- "reverse": false,
- "unit": "none"
- }
- },
- "pluginVersion": "10.4.2",
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "sum by(le) (increase(aphrodite:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
- "format": "heatmap",
- "fullMetaSearch": false,
- "includeNullMetadata": true,
- "instant": false,
- "legendFormat": "{{le}}",
- "range": true,
- "refId": "A",
- "useBackend": false
- }
- ],
- "title": "Request Generation Length",
- "type": "heatmap"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisBorderShow": false,
- "axisCenteredZero": false,
- "axisColorMode": "text",
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 0,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "viz": false
- },
- "insertNulls": false,
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "auto",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 32
- },
- "id": 11,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom",
- "showLegend": true
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "${DS_PROMETHEUS}"
- },
- "disableTextWrap": false,
- "editorMode": "builder",
- "expr": "sum by(finished_reason) (increase(aphrodite:request_success_total{model_name=\"$model_name\"}[$__rate_interval]))",
- "fullMetaSearch": false,
- "includeNullMetadata": true,
- "instant": false,
- "interval": "",
- "legendFormat": "__auto",
- "range": true,
- "refId": "A",
- "useBackend": false
- }
- ],
- "title": "Finish Reason",
- "type": "timeseries"
- }
- ],
- "refresh": "",
- "schemaVersion": 39,
- "tags": [],
- "templating": {
- "list": [
- {
- "type": "datasource",
- "name": "DS_PROMETHEUS",
- "label": "datasource",
- "current": {},
- "hide": 0,
- "includeAll": false,
- "multi": false,
- "options": [],
- "query": "prometheus",
- "queryValue": "",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false
- },
- {
- "definition": "label_values(model_name)",
- "hide": 0,
- "includeAll": false,
- "label": "model_name",
- "multi": false,
- "name": "model_name",
- "options": [],
- "query": {
- "query": "label_values(model_name)",
- "refId": "StandardVariableQuery"
- },
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "sort": 0,
- "type": "query"
- }
- ]
- },
- "time": {
- "from": "now-5m",
- "to": "now"
- },
- "timepicker": {},
- "timezone": "",
- "title": "Aphrodite",
- "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b",
- "version": 1,
- "weekStart": ""
- }
|