grafana.json 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. {
  2. "annotations": {
  3. "list": [
  4. {
  5. "builtIn": 1,
  6. "datasource": {
  7. "type": "grafana",
  8. "uid": "-- Grafana --"
  9. },
  10. "enable": true,
  11. "hide": true,
  12. "iconColor": "rgba(0, 211, 255, 1)",
  13. "name": "Annotations & Alerts",
  14. "target": {
  15. "limit": 100,
  16. "matchAny": false,
  17. "tags": [],
  18. "type": "dashboard"
  19. },
  20. "type": "dashboard"
  21. }
  22. ]
  23. },
  24. "description": "Monitoring Aphrodite Inference Server",
  25. "editable": true,
  26. "fiscalYearStartMonth": 0,
  27. "graphTooltip": 0,
  28. "id": 29,
  29. "links": [],
  30. "liveNow": false,
  31. "panels": [
  32. {
  33. "datasource": {
  34. "type": "prometheus",
  35. "uid": "prometheus"
  36. },
  37. "description": "End to end request latency measured in seconds.",
  38. "fieldConfig": {
  39. "defaults": {
  40. "color": {
  41. "mode": "palette-classic"
  42. },
  43. "custom": {
  44. "axisCenteredZero": false,
  45. "axisColorMode": "text",
  46. "axisLabel": "",
  47. "axisPlacement": "auto",
  48. "barAlignment": 0,
  49. "drawStyle": "line",
  50. "fillOpacity": 0,
  51. "gradientMode": "none",
  52. "hideFrom": {
  53. "legend": false,
  54. "tooltip": false,
  55. "viz": false
  56. },
  57. "lineInterpolation": "linear",
  58. "lineWidth": 1,
  59. "pointSize": 5,
  60. "scaleDistribution": {
  61. "type": "linear"
  62. },
  63. "showPoints": "auto",
  64. "spanNulls": false,
  65. "stacking": {
  66. "group": "A",
  67. "mode": "none"
  68. },
  69. "thresholdsStyle": {
  70. "mode": "off"
  71. }
  72. },
  73. "mappings": [],
  74. "thresholds": {
  75. "mode": "absolute",
  76. "steps": [
  77. {
  78. "color": "green",
  79. "value": null
  80. },
  81. {
  82. "color": "red",
  83. "value": 80
  84. }
  85. ]
  86. },
  87. "unit": "s"
  88. },
  89. "overrides": []
  90. },
  91. "gridPos": {
  92. "h": 8,
  93. "w": 12,
  94. "x": 0,
  95. "y": 0
  96. },
  97. "id": 9,
  98. "options": {
  99. "legend": {
  100. "calcs": [],
  101. "displayMode": "list",
  102. "placement": "bottom",
  103. "showLegend": true
  104. },
  105. "tooltip": {
  106. "mode": "single",
  107. "sort": "none"
  108. }
  109. },
  110. "targets": [
  111. {
  112. "datasource": {
  113. "type": "prometheus",
  114. "uid": "prometheus"
  115. },
  116. "disableTextWrap": false,
  117. "editorMode": "builder",
  118. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  119. "fullMetaSearch": false,
  120. "includeNullMetadata": false,
  121. "instant": false,
  122. "legendFormat": "P99",
  123. "range": true,
  124. "refId": "A",
  125. "useBackend": false
  126. },
  127. {
  128. "datasource": {
  129. "type": "prometheus",
  130. "uid": "prometheus"
  131. },
  132. "disableTextWrap": false,
  133. "editorMode": "builder",
  134. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  135. "fullMetaSearch": false,
  136. "hide": false,
  137. "includeNullMetadata": false,
  138. "instant": false,
  139. "legendFormat": "P95",
  140. "range": true,
  141. "refId": "B",
  142. "useBackend": false
  143. },
  144. {
  145. "datasource": {
  146. "type": "prometheus",
  147. "uid": "prometheus"
  148. },
  149. "disableTextWrap": false,
  150. "editorMode": "builder",
  151. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  152. "fullMetaSearch": false,
  153. "hide": false,
  154. "includeNullMetadata": false,
  155. "instant": false,
  156. "legendFormat": "P90",
  157. "range": true,
  158. "refId": "C",
  159. "useBackend": false
  160. },
  161. {
  162. "datasource": {
  163. "type": "prometheus",
  164. "uid": "prometheus"
  165. },
  166. "disableTextWrap": false,
  167. "editorMode": "builder",
  168. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  169. "fullMetaSearch": false,
  170. "hide": false,
  171. "includeNullMetadata": false,
  172. "instant": false,
  173. "legendFormat": "P50",
  174. "range": true,
  175. "refId": "D",
  176. "useBackend": false
  177. },
  178. {
  179. "datasource": {
  180. "type": "prometheus",
  181. "uid": "prometheus"
  182. },
  183. "editorMode": "code",
  184. "expr": "rate(aphrodite:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  185. "hide": false,
  186. "instant": false,
  187. "legendFormat": "Average",
  188. "range": true,
  189. "refId": "E"
  190. }
  191. ],
  192. "title": "E2E Request Latency",
  193. "type": "timeseries"
  194. },
  195. {
  196. "datasource": {
  197. "type": "prometheus",
  198. "uid": "prometheus"
  199. },
  200. "description": "Number of tokens processed per second",
  201. "fieldConfig": {
  202. "defaults": {
  203. "color": {
  204. "mode": "palette-classic"
  205. },
  206. "custom": {
  207. "axisCenteredZero": false,
  208. "axisColorMode": "text",
  209. "axisLabel": "",
  210. "axisPlacement": "auto",
  211. "barAlignment": 0,
  212. "drawStyle": "line",
  213. "fillOpacity": 0,
  214. "gradientMode": "none",
  215. "hideFrom": {
  216. "legend": false,
  217. "tooltip": false,
  218. "viz": false
  219. },
  220. "lineInterpolation": "linear",
  221. "lineWidth": 1,
  222. "pointSize": 5,
  223. "scaleDistribution": {
  224. "type": "linear"
  225. },
  226. "showPoints": "auto",
  227. "spanNulls": false,
  228. "stacking": {
  229. "group": "A",
  230. "mode": "none"
  231. },
  232. "thresholdsStyle": {
  233. "mode": "off"
  234. }
  235. },
  236. "mappings": [],
  237. "thresholds": {
  238. "mode": "absolute",
  239. "steps": [
  240. {
  241. "color": "green",
  242. "value": null
  243. },
  244. {
  245. "color": "red",
  246. "value": 80
  247. }
  248. ]
  249. }
  250. },
  251. "overrides": []
  252. },
  253. "gridPos": {
  254. "h": 8,
  255. "w": 12,
  256. "x": 12,
  257. "y": 0
  258. },
  259. "id": 8,
  260. "options": {
  261. "legend": {
  262. "calcs": [],
  263. "displayMode": "list",
  264. "placement": "bottom",
  265. "showLegend": true
  266. },
  267. "tooltip": {
  268. "mode": "single",
  269. "sort": "none"
  270. }
  271. },
  272. "targets": [
  273. {
  274. "datasource": {
  275. "type": "prometheus",
  276. "uid": "prometheus"
  277. },
  278. "disableTextWrap": false,
  279. "editorMode": "builder",
  280. "expr": "rate(aphrodite:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
  281. "fullMetaSearch": false,
  282. "includeNullMetadata": false,
  283. "instant": false,
  284. "legendFormat": "Prompt Tokens/Sec",
  285. "range": true,
  286. "refId": "A",
  287. "useBackend": false
  288. },
  289. {
  290. "datasource": {
  291. "type": "prometheus",
  292. "uid": "prometheus"
  293. },
  294. "disableTextWrap": false,
  295. "editorMode": "builder",
  296. "expr": "rate(aphrodite:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
  297. "fullMetaSearch": false,
  298. "hide": false,
  299. "includeNullMetadata": false,
  300. "instant": false,
  301. "legendFormat": "Generation Tokens/Sec",
  302. "range": true,
  303. "refId": "B",
  304. "useBackend": false
  305. }
  306. ],
  307. "title": "Token Throughput",
  308. "type": "timeseries"
  309. },
  310. {
  311. "datasource": {
  312. "type": "prometheus",
  313. "uid": "prometheus"
  314. },
  315. "description": "Inter token latency in seconds.",
  316. "fieldConfig": {
  317. "defaults": {
  318. "color": {
  319. "mode": "palette-classic"
  320. },
  321. "custom": {
  322. "axisCenteredZero": false,
  323. "axisColorMode": "text",
  324. "axisLabel": "",
  325. "axisPlacement": "auto",
  326. "barAlignment": 0,
  327. "drawStyle": "line",
  328. "fillOpacity": 0,
  329. "gradientMode": "none",
  330. "hideFrom": {
  331. "legend": false,
  332. "tooltip": false,
  333. "viz": false
  334. },
  335. "lineInterpolation": "linear",
  336. "lineWidth": 1,
  337. "pointSize": 5,
  338. "scaleDistribution": {
  339. "type": "linear"
  340. },
  341. "showPoints": "auto",
  342. "spanNulls": false,
  343. "stacking": {
  344. "group": "A",
  345. "mode": "none"
  346. },
  347. "thresholdsStyle": {
  348. "mode": "off"
  349. }
  350. },
  351. "mappings": [],
  352. "thresholds": {
  353. "mode": "absolute",
  354. "steps": [
  355. {
  356. "color": "green",
  357. "value": null
  358. },
  359. {
  360. "color": "red",
  361. "value": 80
  362. }
  363. ]
  364. },
  365. "unit": "s"
  366. },
  367. "overrides": []
  368. },
  369. "gridPos": {
  370. "h": 8,
  371. "w": 12,
  372. "x": 0,
  373. "y": 8
  374. },
  375. "id": 10,
  376. "options": {
  377. "legend": {
  378. "calcs": [],
  379. "displayMode": "list",
  380. "placement": "bottom",
  381. "showLegend": true
  382. },
  383. "tooltip": {
  384. "mode": "single",
  385. "sort": "none"
  386. }
  387. },
  388. "targets": [
  389. {
  390. "datasource": {
  391. "type": "prometheus",
  392. "uid": "prometheus"
  393. },
  394. "disableTextWrap": false,
  395. "editorMode": "builder",
  396. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  397. "fullMetaSearch": false,
  398. "includeNullMetadata": false,
  399. "instant": false,
  400. "legendFormat": "P99",
  401. "range": true,
  402. "refId": "A",
  403. "useBackend": false
  404. },
  405. {
  406. "datasource": {
  407. "type": "prometheus",
  408. "uid": "prometheus"
  409. },
  410. "disableTextWrap": false,
  411. "editorMode": "builder",
  412. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  413. "fullMetaSearch": false,
  414. "hide": false,
  415. "includeNullMetadata": false,
  416. "instant": false,
  417. "legendFormat": "P95",
  418. "range": true,
  419. "refId": "B",
  420. "useBackend": false
  421. },
  422. {
  423. "datasource": {
  424. "type": "prometheus",
  425. "uid": "prometheus"
  426. },
  427. "disableTextWrap": false,
  428. "editorMode": "builder",
  429. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  430. "fullMetaSearch": false,
  431. "hide": false,
  432. "includeNullMetadata": false,
  433. "instant": false,
  434. "legendFormat": "P90",
  435. "range": true,
  436. "refId": "C",
  437. "useBackend": false
  438. },
  439. {
  440. "datasource": {
  441. "type": "prometheus",
  442. "uid": "prometheus"
  443. },
  444. "disableTextWrap": false,
  445. "editorMode": "builder",
  446. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  447. "fullMetaSearch": false,
  448. "hide": false,
  449. "includeNullMetadata": false,
  450. "instant": false,
  451. "legendFormat": "P50",
  452. "range": true,
  453. "refId": "D",
  454. "useBackend": false
  455. },
  456. {
  457. "datasource": {
  458. "type": "prometheus",
  459. "uid": "prometheus"
  460. },
  461. "editorMode": "code",
  462. "expr": "rate(aphrodite:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  463. "hide": false,
  464. "instant": false,
  465. "legendFormat": "Mean",
  466. "range": true,
  467. "refId": "E"
  468. }
  469. ],
  470. "title": "Time Per Output Token Latency",
  471. "type": "timeseries"
  472. },
  473. {
  474. "datasource": {
  475. "type": "prometheus",
  476. "uid": "prometheus"
  477. },
  478. "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
  479. "fieldConfig": {
  480. "defaults": {
  481. "color": {
  482. "mode": "palette-classic"
  483. },
  484. "custom": {
  485. "axisCenteredZero": false,
  486. "axisColorMode": "text",
  487. "axisLabel": "",
  488. "axisPlacement": "auto",
  489. "barAlignment": 0,
  490. "drawStyle": "line",
  491. "fillOpacity": 0,
  492. "gradientMode": "none",
  493. "hideFrom": {
  494. "legend": false,
  495. "tooltip": false,
  496. "viz": false
  497. },
  498. "lineInterpolation": "linear",
  499. "lineWidth": 1,
  500. "pointSize": 5,
  501. "scaleDistribution": {
  502. "type": "linear"
  503. },
  504. "showPoints": "auto",
  505. "spanNulls": false,
  506. "stacking": {
  507. "group": "A",
  508. "mode": "none"
  509. },
  510. "thresholdsStyle": {
  511. "mode": "off"
  512. }
  513. },
  514. "mappings": [],
  515. "thresholds": {
  516. "mode": "absolute",
  517. "steps": [
  518. {
  519. "color": "green",
  520. "value": null
  521. },
  522. {
  523. "color": "red",
  524. "value": 80
  525. }
  526. ]
  527. },
  528. "unit": "none"
  529. },
  530. "overrides": []
  531. },
  532. "gridPos": {
  533. "h": 8,
  534. "w": 12,
  535. "x": 12,
  536. "y": 8
  537. },
  538. "id": 3,
  539. "options": {
  540. "legend": {
  541. "calcs": [],
  542. "displayMode": "list",
  543. "placement": "bottom",
  544. "showLegend": true
  545. },
  546. "tooltip": {
  547. "mode": "single",
  548. "sort": "none"
  549. }
  550. },
  551. "targets": [
  552. {
  553. "datasource": {
  554. "type": "prometheus",
  555. "uid": "prometheus"
  556. },
  557. "disableTextWrap": false,
  558. "editorMode": "builder",
  559. "expr": "aphrodite:num_requests_running{model_name=\"$model_name\"}",
  560. "fullMetaSearch": false,
  561. "includeNullMetadata": true,
  562. "instant": false,
  563. "legendFormat": "Num Running",
  564. "range": true,
  565. "refId": "A",
  566. "useBackend": false
  567. },
  568. {
  569. "datasource": {
  570. "type": "prometheus",
  571. "uid": "prometheus"
  572. },
  573. "disableTextWrap": false,
  574. "editorMode": "builder",
  575. "expr": "aphrodite:num_requests_swapped{model_name=\"$model_name\"}",
  576. "fullMetaSearch": false,
  577. "hide": false,
  578. "includeNullMetadata": true,
  579. "instant": false,
  580. "legendFormat": "Num Swapped",
  581. "range": true,
  582. "refId": "B",
  583. "useBackend": false
  584. },
  585. {
  586. "datasource": {
  587. "type": "prometheus",
  588. "uid": "prometheus"
  589. },
  590. "disableTextWrap": false,
  591. "editorMode": "builder",
  592. "expr": "aphrodite:num_requests_waiting{model_name=\"$model_name\"}",
  593. "fullMetaSearch": false,
  594. "hide": false,
  595. "includeNullMetadata": true,
  596. "instant": false,
  597. "legendFormat": "Num Waiting",
  598. "range": true,
  599. "refId": "C",
  600. "useBackend": false
  601. }
  602. ],
  603. "title": "Scheduler State",
  604. "type": "timeseries"
  605. },
  606. {
  607. "datasource": {
  608. "type": "prometheus",
  609. "uid": "prometheus"
  610. },
  611. "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
  612. "fieldConfig": {
  613. "defaults": {
  614. "color": {
  615. "mode": "palette-classic"
  616. },
  617. "custom": {
  618. "axisCenteredZero": false,
  619. "axisColorMode": "text",
  620. "axisLabel": "",
  621. "axisPlacement": "auto",
  622. "barAlignment": 0,
  623. "drawStyle": "line",
  624. "fillOpacity": 0,
  625. "gradientMode": "none",
  626. "hideFrom": {
  627. "legend": false,
  628. "tooltip": false,
  629. "viz": false
  630. },
  631. "lineInterpolation": "linear",
  632. "lineWidth": 1,
  633. "pointSize": 5,
  634. "scaleDistribution": {
  635. "type": "linear"
  636. },
  637. "showPoints": "auto",
  638. "spanNulls": false,
  639. "stacking": {
  640. "group": "A",
  641. "mode": "none"
  642. },
  643. "thresholdsStyle": {
  644. "mode": "off"
  645. }
  646. },
  647. "mappings": [],
  648. "thresholds": {
  649. "mode": "absolute",
  650. "steps": [
  651. {
  652. "color": "green",
  653. "value": null
  654. },
  655. {
  656. "color": "red",
  657. "value": 80
  658. }
  659. ]
  660. },
  661. "unit": "s"
  662. },
  663. "overrides": []
  664. },
  665. "gridPos": {
  666. "h": 8,
  667. "w": 12,
  668. "x": 0,
  669. "y": 16
  670. },
  671. "id": 5,
  672. "options": {
  673. "legend": {
  674. "calcs": [],
  675. "displayMode": "list",
  676. "placement": "bottom",
  677. "showLegend": true
  678. },
  679. "tooltip": {
  680. "mode": "single",
  681. "sort": "none"
  682. }
  683. },
  684. "targets": [
  685. {
  686. "datasource": {
  687. "type": "prometheus",
  688. "uid": "prometheus"
  689. },
  690. "disableTextWrap": false,
  691. "editorMode": "builder",
  692. "expr": "histogram_quantile(0.99, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  693. "fullMetaSearch": false,
  694. "hide": false,
  695. "includeNullMetadata": false,
  696. "instant": false,
  697. "legendFormat": "P99",
  698. "range": true,
  699. "refId": "A",
  700. "useBackend": false
  701. },
  702. {
  703. "datasource": {
  704. "type": "prometheus",
  705. "uid": "prometheus"
  706. },
  707. "disableTextWrap": false,
  708. "editorMode": "builder",
  709. "expr": "histogram_quantile(0.95, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  710. "fullMetaSearch": false,
  711. "includeNullMetadata": false,
  712. "instant": false,
  713. "legendFormat": "P95",
  714. "range": true,
  715. "refId": "B",
  716. "useBackend": false
  717. },
  718. {
  719. "datasource": {
  720. "type": "prometheus",
  721. "uid": "prometheus"
  722. },
  723. "disableTextWrap": false,
  724. "editorMode": "builder",
  725. "expr": "histogram_quantile(0.9, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  726. "fullMetaSearch": false,
  727. "hide": false,
  728. "includeNullMetadata": false,
  729. "instant": false,
  730. "legendFormat": "P90",
  731. "range": true,
  732. "refId": "C",
  733. "useBackend": false
  734. },
  735. {
  736. "datasource": {
  737. "type": "prometheus",
  738. "uid": "prometheus"
  739. },
  740. "disableTextWrap": false,
  741. "editorMode": "builder",
  742. "expr": "histogram_quantile(0.5, sum by(le) (rate(aphrodite:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
  743. "fullMetaSearch": false,
  744. "hide": false,
  745. "includeNullMetadata": false,
  746. "instant": false,
  747. "legendFormat": "P50",
  748. "range": true,
  749. "refId": "D",
  750. "useBackend": false
  751. },
  752. {
  753. "datasource": {
  754. "type": "prometheus",
  755. "uid": "prometheus"
  756. },
  757. "editorMode": "code",
  758. "expr": "rate(aphrodite:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(aphrodite:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
  759. "hide": false,
  760. "instant": false,
  761. "legendFormat": "Average",
  762. "range": true,
  763. "refId": "E"
  764. }
  765. ],
  766. "title": "Time To First Token Latency",
  767. "type": "timeseries"
  768. },
  769. {
  770. "datasource": {
  771. "type": "prometheus",
  772. "uid": "prometheus"
  773. },
  774. "description": "Percentage of used cache blocks by Aphrodite.",
  775. "fieldConfig": {
  776. "defaults": {
  777. "color": {
  778. "mode": "palette-classic"
  779. },
  780. "custom": {
  781. "axisCenteredZero": false,
  782. "axisColorMode": "text",
  783. "axisLabel": "",
  784. "axisPlacement": "auto",
  785. "barAlignment": 0,
  786. "drawStyle": "line",
  787. "fillOpacity": 0,
  788. "gradientMode": "none",
  789. "hideFrom": {
  790. "legend": false,
  791. "tooltip": false,
  792. "viz": false
  793. },
  794. "lineInterpolation": "linear",
  795. "lineWidth": 1,
  796. "pointSize": 5,
  797. "scaleDistribution": {
  798. "type": "linear"
  799. },
  800. "showPoints": "auto",
  801. "spanNulls": false,
  802. "stacking": {
  803. "group": "A",
  804. "mode": "none"
  805. },
  806. "thresholdsStyle": {
  807. "mode": "off"
  808. }
  809. },
  810. "mappings": [],
  811. "thresholds": {
  812. "mode": "absolute",
  813. "steps": [
  814. {
  815. "color": "green",
  816. "value": null
  817. },
  818. {
  819. "color": "red",
  820. "value": 80
  821. }
  822. ]
  823. },
  824. "unit": "percentunit"
  825. },
  826. "overrides": []
  827. },
  828. "gridPos": {
  829. "h": 8,
  830. "w": 12,
  831. "x": 12,
  832. "y": 16
  833. },
  834. "id": 4,
  835. "options": {
  836. "legend": {
  837. "calcs": [],
  838. "displayMode": "list",
  839. "placement": "bottom",
  840. "showLegend": true
  841. },
  842. "tooltip": {
  843. "mode": "single",
  844. "sort": "none"
  845. }
  846. },
  847. "targets": [
  848. {
  849. "datasource": {
  850. "type": "prometheus",
  851. "uid": "prometheus"
  852. },
  853. "editorMode": "code",
  854. "expr": "aphrodite:gpu_cache_usage_perc{model_name=\"$model_name\"}",
  855. "instant": false,
  856. "legendFormat": "GPU Cache Usage",
  857. "range": true,
  858. "refId": "A"
  859. },
  860. {
  861. "datasource": {
  862. "type": "prometheus",
  863. "uid": "prometheus"
  864. },
  865. "editorMode": "code",
  866. "expr": "aphrodite:cpu_cache_usage_perc{model_name=\"$model_name\"}",
  867. "hide": false,
  868. "instant": false,
  869. "legendFormat": "CPU Cache Usage",
  870. "range": true,
  871. "refId": "B"
  872. }
  873. ],
  874. "title": "Cache Utilization",
  875. "type": "timeseries"
  876. },
  877. {
  878. "type": "heatmap",
  879. "title": "Request Prompt Length",
  880. "description": "Heatmap of request prompt length",
  881. "gridPos": {
  882. "x": 0,
  883. "y": 24,
  884. "w": 12,
  885. "h": 8
  886. },
  887. "datasource": {
  888. "uid": "prometheus",
  889. "type": "prometheus"
  890. },
  891. "id": 12,
  892. "targets": [
  893. {
  894. "datasource": {
  895. "type": "prometheus",
  896. "uid": "prometheus"
  897. },
  898. "refId": "A",
  899. "expr": "sum by(le) (increase(aphrodite:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
  900. "range": true,
  901. "instant": false,
  902. "editorMode": "builder",
  903. "legendFormat": "{{le}}",
  904. "useBackend": false,
  905. "disableTextWrap": false,
  906. "fullMetaSearch": false,
  907. "includeNullMetadata": true,
  908. "format": "heatmap"
  909. }
  910. ],
  911. "options": {
  912. "calculate": false,
  913. "yAxis": {
  914. "axisPlacement": "left",
  915. "reverse": false,
  916. "unit": "none",
  917. "axisLabel": "Prompt Length"
  918. },
  919. "rowsFrame": {
  920. "layout": "auto",
  921. "value": "Request count"
  922. },
  923. "color": {
  924. "mode": "scheme",
  925. "fill": "dark-orange",
  926. "scale": "exponential",
  927. "exponent": 0.5,
  928. "scheme": "Spectral",
  929. "steps": 64,
  930. "reverse": false,
  931. "min": 0
  932. },
  933. "cellGap": 1,
  934. "filterValues": {
  935. "le": 1e-9
  936. },
  937. "tooltip": {
  938. "show": true,
  939. "yHistogram": true
  940. },
  941. "legend": {
  942. "show": true
  943. },
  944. "exemplars": {
  945. "color": "rgba(255,0,255,0.7)"
  946. },
  947. "cellValues": {
  948. "unit": "none"
  949. }
  950. },
  951. "fieldConfig": {
  952. "defaults": {
  953. "custom": {
  954. "scaleDistribution": {
  955. "type": "linear"
  956. },
  957. "hideFrom": {
  958. "tooltip": false,
  959. "viz": false,
  960. "legend": false
  961. }
  962. }
  963. },
  964. "overrides": []
  965. },
  966. "pluginVersion": "10.2.0"
  967. },
  968. {
  969. "datasource": {
  970. "uid": "prometheus",
  971. "type": "prometheus"
  972. },
  973. "type": "heatmap",
  974. "title": "Request Generation Length",
  975. "description": "Heatmap of request generation length",
  976. "gridPos": {
  977. "x": 12,
  978. "y": 24,
  979. "w": 12,
  980. "h": 8
  981. },
  982. "id": 13,
  983. "targets": [
  984. {
  985. "datasource": {
  986. "type": "prometheus",
  987. "uid": "prometheus"
  988. },
  989. "refId": "A",
  990. "expr": "sum by(le) (increase(aphrodite:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
  991. "range": true,
  992. "instant": false,
  993. "editorMode": "builder",
  994. "legendFormat": "{{le}}",
  995. "useBackend": false,
  996. "disableTextWrap": false,
  997. "fullMetaSearch": false,
  998. "includeNullMetadata": true,
  999. "format": "heatmap"
  1000. }
  1001. ],
  1002. "options": {
  1003. "calculate": false,
  1004. "yAxis": {
  1005. "axisPlacement": "left",
  1006. "reverse": false,
  1007. "unit": "none",
  1008. "axisLabel": "Generation Length"
  1009. },
  1010. "rowsFrame": {
  1011. "layout": "auto",
  1012. "value": "Request count"
  1013. },
  1014. "color": {
  1015. "mode": "scheme",
  1016. "fill": "dark-orange",
  1017. "scale": "exponential",
  1018. "exponent": 0.5,
  1019. "scheme": "Spectral",
  1020. "steps": 64,
  1021. "reverse": false,
  1022. "min": 0
  1023. },
  1024. "cellGap": 1,
  1025. "filterValues": {
  1026. "le": 1e-9
  1027. },
  1028. "tooltip": {
  1029. "show": true,
  1030. "yHistogram": true
  1031. },
  1032. "legend": {
  1033. "show": true
  1034. },
  1035. "exemplars": {
  1036. "color": "rgba(255,0,255,0.7)"
  1037. },
  1038. "cellValues": {
  1039. "unit": "none"
  1040. }
  1041. },
  1042. "fieldConfig": {
  1043. "defaults": {
  1044. "custom": {
  1045. "scaleDistribution": {
  1046. "type": "linear"
  1047. },
  1048. "hideFrom": {
  1049. "tooltip": false,
  1050. "viz": false,
  1051. "legend": false
  1052. }
  1053. }
  1054. },
  1055. "overrides": []
  1056. },
  1057. "pluginVersion": "10.2.0"
  1058. },
  1059. {
  1060. "datasource": {
  1061. "type": "prometheus",
  1062. "uid": "prometheus"
  1063. },
  1064. "fieldConfig": {
  1065. "defaults": {
  1066. "custom": {
  1067. "drawStyle": "line",
  1068. "lineInterpolation": "linear",
  1069. "barAlignment": 0,
  1070. "lineWidth": 1,
  1071. "fillOpacity": 0,
  1072. "gradientMode": "none",
  1073. "spanNulls": false,
  1074. "insertNulls": false,
  1075. "showPoints": "auto",
  1076. "pointSize": 5,
  1077. "stacking": {
  1078. "mode": "none",
  1079. "group": "A"
  1080. },
  1081. "axisPlacement": "auto",
  1082. "axisLabel": "",
  1083. "axisColorMode": "text",
  1084. "axisBorderShow": false,
  1085. "scaleDistribution": {
  1086. "type": "linear"
  1087. },
  1088. "axisCenteredZero": false,
  1089. "hideFrom": {
  1090. "tooltip": false,
  1091. "viz": false,
  1092. "legend": false
  1093. },
  1094. "thresholdsStyle": {
  1095. "mode": "off"
  1096. }
  1097. },
  1098. "color": {
  1099. "mode": "palette-classic"
  1100. },
  1101. "mappings": [],
  1102. "thresholds": {
  1103. "mode": "absolute",
  1104. "steps": [
  1105. {
  1106. "color": "green",
  1107. "value": null
  1108. },
  1109. {
  1110. "color": "red",
  1111. "value": 80
  1112. }
  1113. ]
  1114. }
  1115. },
  1116. "overrides": []
  1117. },
  1118. "gridPos": {
  1119. "h": 8,
  1120. "w": 12,
  1121. "x": 0,
  1122. "y": 32
  1123. },
  1124. "id": 11,
  1125. "options": {
  1126. "tooltip": {
  1127. "mode": "single",
  1128. "sort": "none"
  1129. },
  1130. "legend": {
  1131. "showLegend": true,
  1132. "displayMode": "list",
  1133. "placement": "bottom",
  1134. "calcs": []
  1135. }
  1136. },
  1137. "targets": [
  1138. {
  1139. "datasource": {
  1140. "type": "prometheus",
  1141. "uid": "prometheus"
  1142. },
  1143. "disableTextWrap": false,
  1144. "editorMode": "builder",
  1145. "expr": "sum by(finished_reason) (increase(aphrodite:request_success_total{model_name=\"$model_name\"}[$__rate_interval]))",
  1146. "fullMetaSearch": false,
  1147. "includeNullMetadata": true,
  1148. "instant": false,
  1149. "interval": "",
  1150. "legendFormat": "__auto",
  1151. "range": true,
  1152. "refId": "A",
  1153. "useBackend": false
  1154. }
  1155. ],
  1156. "title": "Finish Reason",
  1157. "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.",
  1158. "type": "timeseries"
  1159. }
  1160. ],
  1161. "refresh": "",
  1162. "schemaVersion": 37,
  1163. "style": "dark",
  1164. "tags": [],
  1165. "templating": {
  1166. "list": [
  1167. {
  1168. "current": {
  1169. "selected": false,
  1170. "text": "aphrodite",
  1171. "value": "aphrodite"
  1172. },
  1173. "datasource": {
  1174. "type": "prometheus",
  1175. "uid": "prometheus"
  1176. },
  1177. "definition": "label_values(model_name)",
  1178. "hide": 0,
  1179. "includeAll": false,
  1180. "label": "model_name",
  1181. "multi": false,
  1182. "name": "model_name",
  1183. "options": [],
  1184. "query": {
  1185. "query": "label_values(model_name)",
  1186. "refId": "StandardVariableQuery"
  1187. },
  1188. "refresh": 1,
  1189. "regex": "",
  1190. "skipUrlSync": false,
  1191. "sort": 0,
  1192. "type": "query"
  1193. }
  1194. ]
  1195. },
  1196. "time": {
  1197. "from": "now-5m",
  1198. "to": "now"
  1199. },
  1200. "timepicker": {},
  1201. "timezone": "",
  1202. "title": "Aphrodite Engine",
  1203. "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b",
  1204. "version": 2,
  1205. "weekStart": ""
  1206. }