Przeglądaj źródła

feat: add `/metrics` route for kobold (#229)

* remove second redefinition of app

* formatting
AlpinDale 1 rok temu
rodzic
commit
59df05f341
1 zmienionych plików z 12 dodań i 3 usunięć
  1. 12 3
      aphrodite/endpoints/kobold/api_server.py

+ 12 - 3
aphrodite/endpoints/kobold/api_server.py

@@ -8,13 +8,17 @@ import os
 from http import HTTPStatus
 from typing import List, Tuple, AsyncGenerator
 
+from aioprometheus import MetricsMiddleware
+from aioprometheus.asgi.starlette import metrics
 import uvicorn
-from fastapi import FastAPI, APIRouter, Request, Response
+import fastapi
+from fastapi import APIRouter, Request, Response
 from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 
 from aphrodite.engine.args_tools import AsyncEngineArgs
 from aphrodite.engine.async_aphrodite import AsyncAphrodite
+from aphrodite.engine.metrics import add_global_metrics_labels
 from aphrodite.common.logger import init_logger
 from aphrodite.common.outputs import RequestOutput
 from aphrodite.common.sampling_params import SamplingParams, _SAMPLING_EPS
@@ -28,9 +32,13 @@ logger = init_logger(__name__)
 served_model: str = "Read Only"
 engine: AsyncAphrodite = None
 gen_cache: dict = {}
+app = fastapi.FastAPI()
 
 badwordsids: List[int] = []
 
+app.add_middleware(MetricsMiddleware)  # trace HTTP server metrics
+app.add_route("/metrics", metrics)
+
 
 def _set_badwords(tokenizer, hf_config):  # pylint: disable=redefined-outer-name
     global badwordsids
@@ -47,7 +55,6 @@ def _set_badwords(tokenizer, hf_config):  # pylint: disable=redefined-outer-name
     badwordsids.append(tokenizer.eos_token_id)
 
 
-app = FastAPI()
 kai_api = APIRouter()
 extra_api = APIRouter()
 kobold_lite_ui = ""
@@ -336,7 +343,7 @@ if __name__ == "__main__":
     engine_args = AsyncEngineArgs.from_cli_args(args)
     engine = AsyncAphrodite.from_engine_args(engine_args)
     engine_model_config = asyncio.run(engine.get_model_config())
-    max_model_len = engine_model_config.get_max_model_len()
+    max_model_len = engine_model_config.max_model_len
 
     # A separate tokenizer to map token IDs to strings.
     tokenizer = get_tokenizer(engine_args.tokenizer,
@@ -345,6 +352,8 @@ if __name__ == "__main__":
 
     _set_badwords(tokenizer, engine_model_config.hf_config)
 
+    add_global_metrics_labels(model_name=engine_args.model)
+
     uvicorn.run(app,
                 host=args.host,
                 port=args.port,