grafana.json 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. {
  2. "__inputs": [
  3. ],
  4. "__elements": {},
  5. "__requires": [
  6. {
  7. "type": "grafana",
  8. "id": "grafana",
  9. "name": "Grafana",
  10. "version": "10.4.2"
  11. },
  12. {
  13. "type": "panel",
  14. "id": "heatmap",
  15. "name": "Heatmap",
  16. "version": ""
  17. },
  18. {
  19. "type": "datasource",
  20. "id": "prometheus",
  21. "name": "Prometheus",
  22. "version": "1.0.0"
  23. },
  24. {
  25. "type": "panel",
  26. "id": "timeseries",
  27. "name": "Time series",
  28. "version": ""
  29. }
  30. ],
  31. "annotations": {
  32. "list": [
  33. {
  34. "builtIn": 1,
  35. "datasource": {
  36. "type": "grafana",
  37. "uid": "-- Grafana --"
  38. },
  39. "enable": true,
  40. "hide": true,
  41. "iconColor": "rgba(0, 211, 255, 1)",
  42. "name": "Annotations & Alerts",
  43. "target": {
  44. "limit": 100,
  45. "matchAny": false,
  46. "tags": [],
  47. "type": "dashboard"
  48. },
  49. "type": "dashboard"
  50. }
  51. ]
  52. },
  53. "description": "Monitoring Aphrodite Inference Server",
  54. "editable": true,
  55. "fiscalYearStartMonth": 0,
  56. "graphTooltip": 0,
  57. "id": null,
  58. "links": [],
  59. "liveNow": false,
  60. "panels": [
  61. {
  62. "datasource": {
  63. "type": "prometheus",
  64. "uid": "${DS_PROMETHEUS}"
  65. },
  66. "description": "End to end request latency measured in seconds.",
  67. "fieldConfig": {
  68. "defaults": {
  69. "color": {
  70. "mode": "palette-classic"
  71. },
  72. "custom": {
  73. "axisBorderShow": false,
  74. "axisCenteredZero": false,
  75. "axisColorMode": "text",
  76. "axisLabel": "",
  77. "axisPlacement": "auto",
  78. "barAlignment": 0,
  79. "drawStyle": "line",
  80. "fillOpacity": 0,
  81. "gradientMode": "none",
  82. "hideFrom": {
  83. "legend": false,
  84. "tooltip": false,
  85. "viz": false
  86. },
  87. "insertNulls": false,
  88. "lineInterpolation": "linear",
  89. "lineWidth": 1,
  90. "pointSize": 5,
  91. "scaleDistribution": {
  92. "type": "linear"
  93. },
  94. "showPoints": "auto",
  95. "spanNulls": false,
  96. "stacking": {
  97. "group": "A",
  98. "mode": "none"
  99. },
  100. "thresholdsStyle": {
  101. "mode": "off"
  102. }
  103. },
  104. "mappings": [],
  105. "thresholds": {
  106. "mode": "absolute",
  107. "steps": [
  108. {
  109. "color": "green",
  110. "value": null
  111. },
  112. {
  113. "color": "red",
  114. "value": 80
  115. }
  116. ]
  117. },
  118. "unit": "s"
  119. },
  120. "overrides": []
  121. },
  122. "gridPos": {
  123. "h": 8,
  124. "w": 12,
  125. "x": 0,
  126. "y": 0
  127. },
  128. "id": 9,
  129. "options": {
  130. "legend": {
  131. "calcs": [],
  132. "displayMode": "list",
  133. "placement": "bottom",
  134. "showLegend": true
  135. },
  136. "tooltip": {
  137. "mode": "single",
  138. "sort": "none"
  139. }
  140. },
  141. "targets": [
  142. {
  143. "datasource": {
  144. "type": "prometheus",
  145. "uid": "${DS_PROMETHEUS}"
  146. },
  147. "disableTextWrap": false,
  148. "editorMode": "builder",
  149. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  150. "fullMetaSearch": false,
  151. "includeNullMetadata": false,
  152. "instant": false,
  153. "legendFormat": "P99",
  154. "range": true,
  155. "refId": "A",
  156. "useBackend": false
  157. },
  158. {
  159. "datasource": {
  160. "type": "prometheus",
  161. "uid": "${DS_PROMETHEUS}"
  162. },
  163. "disableTextWrap": false,
  164. "editorMode": "builder",
  165. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  166. "fullMetaSearch": false,
  167. "hide": false,
  168. "includeNullMetadata": false,
  169. "instant": false,
  170. "legendFormat": "P95",
  171. "range": true,
  172. "refId": "B",
  173. "useBackend": false
  174. },
  175. {
  176. "datasource": {
  177. "type": "prometheus",
  178. "uid": "${DS_PROMETHEUS}"
  179. },
  180. "disableTextWrap": false,
  181. "editorMode": "builder",
  182. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  183. "fullMetaSearch": false,
  184. "hide": false,
  185. "includeNullMetadata": false,
  186. "instant": false,
  187. "legendFormat": "P90",
  188. "range": true,
  189. "refId": "C",
  190. "useBackend": false
  191. },
  192. {
  193. "datasource": {
  194. "type": "prometheus",
  195. "uid": "${DS_PROMETHEUS}"
  196. },
  197. "disableTextWrap": false,
  198. "editorMode": "builder",
  199. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  200. "fullMetaSearch": false,
  201. "hide": false,
  202. "includeNullMetadata": false,
  203. "instant": false,
  204. "legendFormat": "P50",
  205. "range": true,
  206. "refId": "D",
  207. "useBackend": false
  208. },
  209. {
  210. "datasource": {
  211. "type": "prometheus",
  212. "uid": "${DS_PROMETHEUS}"
  213. },
  214. "editorMode": "code",
  215. "expr": "rate(aphrodite:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  216. "hide": false,
  217. "instant": false,
  218. "legendFormat": "Average",
  219. "range": true,
  220. "refId": "E"
  221. }
  222. ],
  223. "title": "E2E Request Latency",
  224. "type": "timeseries"
  225. },
  226. {
  227. "datasource": {
  228. "type": "prometheus",
  229. "uid": "${DS_PROMETHEUS}"
  230. },
  231. "description": "Number of tokens processed per second",
  232. "fieldConfig": {
  233. "defaults": {
  234. "color": {
  235. "mode": "palette-classic"
  236. },
  237. "custom": {
  238. "axisBorderShow": false,
  239. "axisCenteredZero": false,
  240. "axisColorMode": "text",
  241. "axisLabel": "",
  242. "axisPlacement": "auto",
  243. "barAlignment": 0,
  244. "drawStyle": "line",
  245. "fillOpacity": 0,
  246. "gradientMode": "none",
  247. "hideFrom": {
  248. "legend": false,
  249. "tooltip": false,
  250. "viz": false
  251. },
  252. "insertNulls": false,
  253. "lineInterpolation": "linear",
  254. "lineWidth": 1,
  255. "pointSize": 5,
  256. "scaleDistribution": {
  257. "type": "linear"
  258. },
  259. "showPoints": "auto",
  260. "spanNulls": false,
  261. "stacking": {
  262. "group": "A",
  263. "mode": "none"
  264. },
  265. "thresholdsStyle": {
  266. "mode": "off"
  267. }
  268. },
  269. "mappings": [],
  270. "thresholds": {
  271. "mode": "absolute",
  272. "steps": [
  273. {
  274. "color": "green",
  275. "value": null
  276. },
  277. {
  278. "color": "red",
  279. "value": 80
  280. }
  281. ]
  282. }
  283. },
  284. "overrides": []
  285. },
  286. "gridPos": {
  287. "h": 8,
  288. "w": 12,
  289. "x": 12,
  290. "y": 0
  291. },
  292. "id": 8,
  293. "options": {
  294. "legend": {
  295. "calcs": [],
  296. "displayMode": "list",
  297. "placement": "bottom",
  298. "showLegend": true
  299. },
  300. "tooltip": {
  301. "mode": "single",
  302. "sort": "none"
  303. }
  304. },
  305. "targets": [
  306. {
  307. "datasource": {
  308. "type": "prometheus",
  309. "uid": "${DS_PROMETHEUS}"
  310. },
  311. "disableTextWrap": false,
  312. "editorMode": "builder",
  313. "expr": "rate(aphrodite:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
  314. "fullMetaSearch": false,
  315. "includeNullMetadata": false,
  316. "instant": false,
  317. "legendFormat": "Prompt Tokens/Sec",
  318. "range": true,
  319. "refId": "A",
  320. "useBackend": false
  321. },
  322. {
  323. "datasource": {
  324. "type": "prometheus",
  325. "uid": "${DS_PROMETHEUS}"
  326. },
  327. "disableTextWrap": false,
  328. "editorMode": "builder",
  329. "expr": "rate(aphrodite:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
  330. "fullMetaSearch": false,
  331. "hide": false,
  332. "includeNullMetadata": false,
  333. "instant": false,
  334. "legendFormat": "Generation Tokens/Sec",
  335. "range": true,
  336. "refId": "B",
  337. "useBackend": false
  338. }
  339. ],
  340. "title": "Token Throughput",
  341. "type": "timeseries"
  342. },
  343. {
  344. "datasource": {
  345. "type": "prometheus",
  346. "uid": "${DS_PROMETHEUS}"
  347. },
  348. "description": "Inter token latency in seconds.",
  349. "fieldConfig": {
  350. "defaults": {
  351. "color": {
  352. "mode": "palette-classic"
  353. },
  354. "custom": {
  355. "axisBorderShow": false,
  356. "axisCenteredZero": false,
  357. "axisColorMode": "text",
  358. "axisLabel": "",
  359. "axisPlacement": "auto",
  360. "barAlignment": 0,
  361. "drawStyle": "line",
  362. "fillOpacity": 0,
  363. "gradientMode": "none",
  364. "hideFrom": {
  365. "legend": false,
  366. "tooltip": false,
  367. "viz": false
  368. },
  369. "insertNulls": false,
  370. "lineInterpolation": "linear",
  371. "lineWidth": 1,
  372. "pointSize": 5,
  373. "scaleDistribution": {
  374. "type": "linear"
  375. },
  376. "showPoints": "auto",
  377. "spanNulls": false,
  378. "stacking": {
  379. "group": "A",
  380. "mode": "none"
  381. },
  382. "thresholdsStyle": {
  383. "mode": "off"
  384. }
  385. },
  386. "mappings": [],
  387. "thresholds": {
  388. "mode": "absolute",
  389. "steps": [
  390. {
  391. "color": "green",
  392. "value": null
  393. },
  394. {
  395. "color": "red",
  396. "value": 80
  397. }
  398. ]
  399. },
  400. "unit": "s"
  401. },
  402. "overrides": []
  403. },
  404. "gridPos": {
  405. "h": 8,
  406. "w": 12,
  407. "x": 0,
  408. "y": 8
  409. },
  410. "id": 10,
  411. "options": {
  412. "legend": {
  413. "calcs": [],
  414. "displayMode": "list",
  415. "placement": "bottom",
  416. "showLegend": true
  417. },
  418. "tooltip": {
  419. "mode": "single",
  420. "sort": "none"
  421. }
  422. },
  423. "targets": [
  424. {
  425. "datasource": {
  426. "type": "prometheus",
  427. "uid": "${DS_PROMETHEUS}"
  428. },
  429. "disableTextWrap": false,
  430. "editorMode": "builder",
  431. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  432. "fullMetaSearch": false,
  433. "includeNullMetadata": false,
  434. "instant": false,
  435. "legendFormat": "P99",
  436. "range": true,
  437. "refId": "A",
  438. "useBackend": false
  439. },
  440. {
  441. "datasource": {
  442. "type": "prometheus",
  443. "uid": "${DS_PROMETHEUS}"
  444. },
  445. "disableTextWrap": false,
  446. "editorMode": "builder",
  447. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  448. "fullMetaSearch": false,
  449. "hide": false,
  450. "includeNullMetadata": false,
  451. "instant": false,
  452. "legendFormat": "P95",
  453. "range": true,
  454. "refId": "B",
  455. "useBackend": false
  456. },
  457. {
  458. "datasource": {
  459. "type": "prometheus",
  460. "uid": "${DS_PROMETHEUS}"
  461. },
  462. "disableTextWrap": false,
  463. "editorMode": "builder",
  464. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  465. "fullMetaSearch": false,
  466. "hide": false,
  467. "includeNullMetadata": false,
  468. "instant": false,
  469. "legendFormat": "P90",
  470. "range": true,
  471. "refId": "C",
  472. "useBackend": false
  473. },
  474. {
  475. "datasource": {
  476. "type": "prometheus",
  477. "uid": "${DS_PROMETHEUS}"
  478. },
  479. "disableTextWrap": false,
  480. "editorMode": "builder",
  481. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  482. "fullMetaSearch": false,
  483. "hide": false,
  484. "includeNullMetadata": false,
  485. "instant": false,
  486. "legendFormat": "P50",
  487. "range": true,
  488. "refId": "D",
  489. "useBackend": false
  490. },
  491. {
  492. "datasource": {
  493. "type": "prometheus",
  494. "uid": "${DS_PROMETHEUS}"
  495. },
  496. "editorMode": "code",
  497. "expr": "rate(aphrodite:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  498. "hide": false,
  499. "instant": false,
  500. "legendFormat": "Mean",
  501. "range": true,
  502. "refId": "E"
  503. }
  504. ],
  505. "title": "Time Per Output Token Latency",
  506. "type": "timeseries"
  507. },
  508. {
  509. "datasource": {
  510. "type": "prometheus",
  511. "uid": "${DS_PROMETHEUS}"
  512. },
  513. "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
  514. "fieldConfig": {
  515. "defaults": {
  516. "color": {
  517. "mode": "palette-classic"
  518. },
  519. "custom": {
  520. "axisBorderShow": false,
  521. "axisCenteredZero": false,
  522. "axisColorMode": "text",
  523. "axisLabel": "",
  524. "axisPlacement": "auto",
  525. "barAlignment": 0,
  526. "drawStyle": "line",
  527. "fillOpacity": 0,
  528. "gradientMode": "none",
  529. "hideFrom": {
  530. "legend": false,
  531. "tooltip": false,
  532. "viz": false
  533. },
  534. "insertNulls": false,
  535. "lineInterpolation": "linear",
  536. "lineWidth": 1,
  537. "pointSize": 5,
  538. "scaleDistribution": {
  539. "type": "linear"
  540. },
  541. "showPoints": "auto",
  542. "spanNulls": false,
  543. "stacking": {
  544. "group": "A",
  545. "mode": "none"
  546. },
  547. "thresholdsStyle": {
  548. "mode": "off"
  549. }
  550. },
  551. "mappings": [],
  552. "thresholds": {
  553. "mode": "absolute",
  554. "steps": [
  555. {
  556. "color": "green",
  557. "value": null
  558. },
  559. {
  560. "color": "red",
  561. "value": 80
  562. }
  563. ]
  564. },
  565. "unit": "none"
  566. },
  567. "overrides": []
  568. },
  569. "gridPos": {
  570. "h": 8,
  571. "w": 12,
  572. "x": 12,
  573. "y": 8
  574. },
  575. "id": 3,
  576. "options": {
  577. "legend": {
  578. "calcs": [],
  579. "displayMode": "list",
  580. "placement": "bottom",
  581. "showLegend": true
  582. },
  583. "tooltip": {
  584. "mode": "single",
  585. "sort": "none"
  586. }
  587. },
  588. "targets": [
  589. {
  590. "datasource": {
  591. "type": "prometheus",
  592. "uid": "${DS_PROMETHEUS}"
  593. },
  594. "disableTextWrap": false,
  595. "editorMode": "builder",
  596. "expr": "aphrodite:num_requests_running{model_name=\"$model_name\"}",
  597. "fullMetaSearch": false,
  598. "includeNullMetadata": true,
  599. "instant": false,
  600. "legendFormat": "Num Running",
  601. "range": true,
  602. "refId": "A",
  603. "useBackend": false
  604. },
  605. {
  606. "datasource": {
  607. "type": "prometheus",
  608. "uid": "${DS_PROMETHEUS}"
  609. },
  610. "disableTextWrap": false,
  611. "editorMode": "builder",
  612. "expr": "aphrodite:num_requests_swapped{model_name=\"$model_name\"}",
  613. "fullMetaSearch": false,
  614. "hide": false,
  615. "includeNullMetadata": true,
  616. "instant": false,
  617. "legendFormat": "Num Swapped",
  618. "range": true,
  619. "refId": "B",
  620. "useBackend": false
  621. },
  622. {
  623. "datasource": {
  624. "type": "prometheus",
  625. "uid": "${DS_PROMETHEUS}"
  626. },
  627. "disableTextWrap": false,
  628. "editorMode": "builder",
  629. "expr": "aphrodite:num_requests_waiting{model_name=\"$model_name\"}",
  630. "fullMetaSearch": false,
  631. "hide": false,
  632. "includeNullMetadata": true,
  633. "instant": false,
  634. "legendFormat": "Num Waiting",
  635. "range": true,
  636. "refId": "C",
  637. "useBackend": false
  638. }
  639. ],
  640. "title": "Scheduler State",
  641. "type": "timeseries"
  642. },
  643. {
  644. "datasource": {
  645. "type": "prometheus",
  646. "uid": "${DS_PROMETHEUS}"
  647. },
  648. "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
  649. "fieldConfig": {
  650. "defaults": {
  651. "color": {
  652. "mode": "palette-classic"
  653. },
  654. "custom": {
  655. "axisBorderShow": false,
  656. "axisCenteredZero": false,
  657. "axisColorMode": "text",
  658. "axisLabel": "",
  659. "axisPlacement": "auto",
  660. "barAlignment": 0,
  661. "drawStyle": "line",
  662. "fillOpacity": 0,
  663. "gradientMode": "none",
  664. "hideFrom": {
  665. "legend": false,
  666. "tooltip": false,
  667. "viz": false
  668. },
  669. "insertNulls": false,
  670. "lineInterpolation": "linear",
  671. "lineWidth": 1,
  672. "pointSize": 5,
  673. "scaleDistribution": {
  674. "type": "linear"
  675. },
  676. "showPoints": "auto",
  677. "spanNulls": false,
  678. "stacking": {
  679. "group": "A",
  680. "mode": "none"
  681. },
  682. "thresholdsStyle": {
  683. "mode": "off"
  684. }
  685. },
  686. "mappings": [],
  687. "thresholds": {
  688. "mode": "absolute",
  689. "steps": [
  690. {
  691. "color": "green",
  692. "value": null
  693. },
  694. {
  695. "color": "red",
  696. "value": 80
  697. }
  698. ]
  699. },
  700. "unit": "s"
  701. },
  702. "overrides": []
  703. },
  704. "gridPos": {
  705. "h": 8,
  706. "w": 12,
  707. "x": 0,
  708. "y": 16
  709. },
  710. "id": 5,
  711. "options": {
  712. "legend": {
  713. "calcs": [],
  714. "displayMode": "list",
  715. "placement": "bottom",
  716. "showLegend": true
  717. },
  718. "tooltip": {
  719. "mode": "single",
  720. "sort": "none"
  721. }
  722. },
  723. "targets": [
  724. {
  725. "datasource": {
  726. "type": "prometheus",
  727. "uid": "${DS_PROMETHEUS}"
  728. },
  729. "disableTextWrap": false,
  730. "editorMode": "builder",
  731. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  732. "fullMetaSearch": false,
  733. "hide": false,
  734. "includeNullMetadata": false,
  735. "instant": false,
  736. "legendFormat": "P99",
  737. "range": true,
  738. "refId": "A",
  739. "useBackend": false
  740. },
  741. {
  742. "datasource": {
  743. "type": "prometheus",
  744. "uid": "${DS_PROMETHEUS}"
  745. },
  746. "disableTextWrap": false,
  747. "editorMode": "builder",
  748. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  749. "fullMetaSearch": false,
  750. "includeNullMetadata": false,
  751. "instant": false,
  752. "legendFormat": "P95",
  753. "range": true,
  754. "refId": "B",
  755. "useBackend": false
  756. },
  757. {
  758. "datasource": {
  759. "type": "prometheus",
  760. "uid": "${DS_PROMETHEUS}"
  761. },
  762. "disableTextWrap": false,
  763. "editorMode": "builder",
  764. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  765. "fullMetaSearch": false,
  766. "hide": false,
  767. "includeNullMetadata": false,
  768. "instant": false,
  769. "legendFormat": "P90",
  770. "range": true,
  771. "refId": "C",
  772. "useBackend": false
  773. },
  774. {
  775. "datasource": {
  776. "type": "prometheus",
  777. "uid": "${DS_PROMETHEUS}"
  778. },
  779. "disableTextWrap": false,
  780. "editorMode": "builder",
  781. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  782. "fullMetaSearch": false,
  783. "hide": false,
  784. "includeNullMetadata": false,
  785. "instant": false,
  786. "legendFormat": "P50",
  787. "range": true,
  788. "refId": "D",
  789. "useBackend": false
  790. },
  791. {
  792. "datasource": {
  793. "type": "prometheus",
  794. "uid": "${DS_PROMETHEUS}"
  795. },
  796. "editorMode": "code",
  797. "expr": "rate(aphrodite:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  798. "hide": false,
  799. "instant": false,
  800. "legendFormat": "Average",
  801. "range": true,
  802. "refId": "E"
  803. }
  804. ],
  805. "title": "Time To First Token Latency",
  806. "type": "timeseries"
  807. },
  808. {
  809. "datasource": {
  810. "type": "prometheus",
  811. "uid": "${DS_PROMETHEUS}"
  812. },
  813. "description": "Percentage of used cache blocks by Aphrodite.",
  814. "fieldConfig": {
  815. "defaults": {
  816. "color": {
  817. "mode": "palette-classic"
  818. },
  819. "custom": {
  820. "axisBorderShow": false,
  821. "axisCenteredZero": false,
  822. "axisColorMode": "text",
  823. "axisLabel": "",
  824. "axisPlacement": "auto",
  825. "barAlignment": 0,
  826. "drawStyle": "line",
  827. "fillOpacity": 0,
  828. "gradientMode": "none",
  829. "hideFrom": {
  830. "legend": false,
  831. "tooltip": false,
  832. "viz": false
  833. },
  834. "insertNulls": false,
  835. "lineInterpolation": "linear",
  836. "lineWidth": 1,
  837. "pointSize": 5,
  838. "scaleDistribution": {
  839. "type": "linear"
  840. },
  841. "showPoints": "auto",
  842. "spanNulls": false,
  843. "stacking": {
  844. "group": "A",
  845. "mode": "none"
  846. },
  847. "thresholdsStyle": {
  848. "mode": "off"
  849. }
  850. },
  851. "mappings": [],
  852. "thresholds": {
  853. "mode": "absolute",
  854. "steps": [
  855. {
  856. "color": "green",
  857. "value": null
  858. },
  859. {
  860. "color": "red",
  861. "value": 80
  862. }
  863. ]
  864. },
  865. "unit": "percentunit"
  866. },
  867. "overrides": []
  868. },
  869. "gridPos": {
  870. "h": 8,
  871. "w": 12,
  872. "x": 12,
  873. "y": 16
  874. },
  875. "id": 4,
  876. "options": {
  877. "legend": {
  878. "calcs": [],
  879. "displayMode": "list",
  880. "placement": "bottom",
  881. "showLegend": true
  882. },
  883. "tooltip": {
  884. "mode": "single",
  885. "sort": "none"
  886. }
  887. },
  888. "targets": [
  889. {
  890. "datasource": {
  891. "type": "prometheus",
  892. "uid": "${DS_PROMETHEUS}"
  893. },
  894. "editorMode": "code",
  895. "expr": "aphrodite:gpu_cache_usage_perc{model_name=\"$model_name\"}",
  896. "instant": false,
  897. "legendFormat": "GPU Cache Usage",
  898. "range": true,
  899. "refId": "A"
  900. },
  901. {
  902. "datasource": {
  903. "type": "prometheus",
  904. "uid": "${DS_PROMETHEUS}"
  905. },
  906. "editorMode": "code",
  907. "expr": "aphrodite:cpu_cache_usage_perc{model_name=\"$model_name\"}",
  908. "hide": false,
  909. "instant": false,
  910. "legendFormat": "CPU Cache Usage",
  911. "range": true,
  912. "refId": "B"
  913. }
  914. ],
  915. "title": "Cache Utilization",
  916. "type": "timeseries"
  917. },
  918. {
  919. "datasource": {
  920. "type": "prometheus",
  921. "uid": "${DS_PROMETHEUS}"
  922. },
  923. "description": "Heatmap of request prompt length",
  924. "fieldConfig": {
  925. "defaults": {
  926. "custom": {
  927. "hideFrom": {
  928. "legend": false,
  929. "tooltip": false,
  930. "viz": false
  931. },
  932. "scaleDistribution": {
  933. "type": "linear"
  934. }
  935. }
  936. },
  937. "overrides": []
  938. },
  939. "gridPos": {
  940. "h": 8,
  941. "w": 12,
  942. "x": 0,
  943. "y": 24
  944. },
  945. "id": 12,
  946. "options": {
  947. "calculate": false,
  948. "cellGap": 1,
  949. "cellValues": {
  950. "unit": "none"
  951. },
  952. "color": {
  953. "exponent": 0.5,
  954. "fill": "dark-orange",
  955. "min": 0,
  956. "mode": "scheme",
  957. "reverse": false,
  958. "scale": "exponential",
  959. "scheme": "Spectral",
  960. "steps": 64
  961. },
  962. "exemplars": {
  963. "color": "rgba(255,0,255,0.7)"
  964. },
  965. "filterValues": {
  966. "le": 1e-9
  967. },
  968. "legend": {
  969. "show": true
  970. },
  971. "rowsFrame": {
  972. "layout": "auto",
  973. "value": "Request count"
  974. },
  975. "tooltip": {
  976. "mode": "single",
  977. "showColorScale": false,
  978. "yHistogram": true
  979. },
  980. "yAxis": {
  981. "axisLabel": "Prompt Length",
  982. "axisPlacement": "left",
  983. "reverse": false,
  984. "unit": "none"
  985. }
  986. },
  987. "pluginVersion": "10.4.2",
  988. "targets": [
  989. {
  990. "datasource": {
  991. "type": "prometheus",
  992. "uid": "${DS_PROMETHEUS}"
  993. },
  994. "disableTextWrap": false,
  995. "editorMode": "builder",
  996. "expr": "sum by(le) (increase(aphrodite:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
  997. "format": "heatmap",
  998. "fullMetaSearch": false,
  999. "includeNullMetadata": true,
  1000. "instant": false,
  1001. "legendFormat": "{{le}}",
  1002. "range": true,
  1003. "refId": "A",
  1004. "useBackend": false
  1005. }
  1006. ],
  1007. "title": "Request Prompt Length",
  1008. "type": "heatmap"
  1009. },
  1010. {
  1011. "datasource": {
  1012. "type": "prometheus",
  1013. "uid": "${DS_PROMETHEUS}"
  1014. },
  1015. "description": "Heatmap of request generation length",
  1016. "fieldConfig": {
  1017. "defaults": {
  1018. "custom": {
  1019. "hideFrom": {
  1020. "legend": false,
  1021. "tooltip": false,
  1022. "viz": false
  1023. },
  1024. "scaleDistribution": {
  1025. "type": "linear"
  1026. }
  1027. }
  1028. },
  1029. "overrides": []
  1030. },
  1031. "gridPos": {
  1032. "h": 8,
  1033. "w": 12,
  1034. "x": 12,
  1035. "y": 24
  1036. },
  1037. "id": 13,
  1038. "options": {
  1039. "calculate": false,
  1040. "cellGap": 1,
  1041. "cellValues": {
  1042. "unit": "none"
  1043. },
  1044. "color": {
  1045. "exponent": 0.5,
  1046. "fill": "dark-orange",
  1047. "min": 0,
  1048. "mode": "scheme",
  1049. "reverse": false,
  1050. "scale": "exponential",
  1051. "scheme": "Spectral",
  1052. "steps": 64
  1053. },
  1054. "exemplars": {
  1055. "color": "rgba(255,0,255,0.7)"
  1056. },
  1057. "filterValues": {
  1058. "le": 1e-9
  1059. },
  1060. "legend": {
  1061. "show": true
  1062. },
  1063. "rowsFrame": {
  1064. "layout": "auto",
  1065. "value": "Request count"
  1066. },
  1067. "tooltip": {
  1068. "mode": "single",
  1069. "showColorScale": false,
  1070. "yHistogram": true
  1071. },
  1072. "yAxis": {
  1073. "axisLabel": "Generation Length",
  1074. "axisPlacement": "left",
  1075. "reverse": false,
  1076. "unit": "none"
  1077. }
  1078. },
  1079. "pluginVersion": "10.4.2",
  1080. "targets": [
  1081. {
  1082. "datasource": {
  1083. "type": "prometheus",
  1084. "uid": "${DS_PROMETHEUS}"
  1085. },
  1086. "disableTextWrap": false,
  1087. "editorMode": "builder",
  1088. "expr": "sum by(le) (increase(aphrodite:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
  1089. "format": "heatmap",
  1090. "fullMetaSearch": false,
  1091. "includeNullMetadata": true,
  1092. "instant": false,
  1093. "legendFormat": "{{le}}",
  1094. "range": true,
  1095. "refId": "A",
  1096. "useBackend": false
  1097. }
  1098. ],
  1099. "title": "Request Generation Length",
  1100. "type": "heatmap"
  1101. },
  1102. {
  1103. "datasource": {
  1104. "type": "prometheus",
  1105. "uid": "${DS_PROMETHEUS}"
  1106. },
  1107. "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.",
  1108. "fieldConfig": {
  1109. "defaults": {
  1110. "color": {
  1111. "mode": "palette-classic"
  1112. },
  1113. "custom": {
  1114. "axisBorderShow": false,
  1115. "axisCenteredZero": false,
  1116. "axisColorMode": "text",
  1117. "axisLabel": "",
  1118. "axisPlacement": "auto",
  1119. "barAlignment": 0,
  1120. "drawStyle": "line",
  1121. "fillOpacity": 0,
  1122. "gradientMode": "none",
  1123. "hideFrom": {
  1124. "legend": false,
  1125. "tooltip": false,
  1126. "viz": false
  1127. },
  1128. "insertNulls": false,
  1129. "lineInterpolation": "linear",
  1130. "lineWidth": 1,
  1131. "pointSize": 5,
  1132. "scaleDistribution": {
  1133. "type": "linear"
  1134. },
  1135. "showPoints": "auto",
  1136. "spanNulls": false,
  1137. "stacking": {
  1138. "group": "A",
  1139. "mode": "none"
  1140. },
  1141. "thresholdsStyle": {
  1142. "mode": "off"
  1143. }
  1144. },
  1145. "mappings": [],
  1146. "thresholds": {
  1147. "mode": "absolute",
  1148. "steps": [
  1149. {
  1150. "color": "green",
  1151. "value": null
  1152. },
  1153. {
  1154. "color": "red",
  1155. "value": 80
  1156. }
  1157. ]
  1158. }
  1159. },
  1160. "overrides": []
  1161. },
  1162. "gridPos": {
  1163. "h": 8,
  1164. "w": 12,
  1165. "x": 0,
  1166. "y": 32
  1167. },
  1168. "id": 11,
  1169. "options": {
  1170. "legend": {
  1171. "calcs": [],
  1172. "displayMode": "list",
  1173. "placement": "bottom",
  1174. "showLegend": true
  1175. },
  1176. "tooltip": {
  1177. "mode": "single",
  1178. "sort": "none"
  1179. }
  1180. },
  1181. "targets": [
  1182. {
  1183. "datasource": {
  1184. "type": "prometheus",
  1185. "uid": "${DS_PROMETHEUS}"
  1186. },
  1187. "disableTextWrap": false,
  1188. "editorMode": "builder",
  1189. "expr": "sum by(finished_reason) (increase(aphrodite:request_success_total{model_name=\"$model_name\"}[$__rate_interval]))",
  1190. "fullMetaSearch": false,
  1191. "includeNullMetadata": true,
  1192. "instant": false,
  1193. "interval": "",
  1194. "legendFormat": "__auto",
  1195. "range": true,
  1196. "refId": "A",
  1197. "useBackend": false
  1198. }
  1199. ],
  1200. "title": "Finish Reason",
  1201. "type": "timeseries"
  1202. }
  1203. ],
  1204. "refresh": "",
  1205. "schemaVersion": 39,
  1206. "tags": [],
  1207. "templating": {
  1208. "list": [
  1209. {
  1210. "type": "datasource",
  1211. "name": "DS_PROMETHEUS",
  1212. "label": "datasource",
  1213. "current": {},
  1214. "hide": 0,
  1215. "includeAll": false,
  1216. "multi": false,
  1217. "options": [],
  1218. "query": "prometheus",
  1219. "queryValue": "",
  1220. "refresh": 1,
  1221. "regex": "",
  1222. "skipUrlSync": false
  1223. },
  1224. {
  1225. "definition": "label_values(model_name)",
  1226. "hide": 0,
  1227. "includeAll": false,
  1228. "label": "model_name",
  1229. "multi": false,
  1230. "name": "model_name",
  1231. "options": [],
  1232. "query": {
  1233. "query": "label_values(model_name)",
  1234. "refId": "StandardVariableQuery"
  1235. },
  1236. "refresh": 1,
  1237. "regex": "",
  1238. "skipUrlSync": false,
  1239. "sort": 0,
  1240. "type": "query"
  1241. }
  1242. ]
  1243. },
  1244. "time": {
  1245. "from": "now-5m",
  1246. "to": "now"
  1247. },
  1248. "timepicker": {},
  1249. "timezone": "",
  1250. "title": "Aphrodite",
  1251. "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b",
  1252. "version": 1,
  1253. "weekStart": ""
  1254. }