|
@@ -8,13 +8,17 @@ import os
|
|
|
from http import HTTPStatus
|
|
|
from typing import List, Tuple, AsyncGenerator
|
|
|
|
|
|
+from aioprometheus import MetricsMiddleware
|
|
|
+from aioprometheus.asgi.starlette import metrics
|
|
|
import uvicorn
|
|
|
-from fastapi import FastAPI, APIRouter, Request, Response
|
|
|
+import fastapi
|
|
|
+from fastapi import APIRouter, Request, Response
|
|
|
from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
from aphrodite.engine.args_tools import AsyncEngineArgs
|
|
|
from aphrodite.engine.async_aphrodite import AsyncAphrodite
|
|
|
+from aphrodite.engine.metrics import add_global_metrics_labels
|
|
|
from aphrodite.common.logger import init_logger
|
|
|
from aphrodite.common.outputs import RequestOutput
|
|
|
from aphrodite.common.sampling_params import SamplingParams, _SAMPLING_EPS
|
|
@@ -28,9 +32,13 @@ logger = init_logger(__name__)
|
|
|
served_model: str = "Read Only"
|
|
|
engine: AsyncAphrodite = None
|
|
|
gen_cache: dict = {}
|
|
|
+app = fastapi.FastAPI()
|
|
|
|
|
|
badwordsids: List[int] = []
|
|
|
|
|
|
+app.add_middleware(MetricsMiddleware) # trace HTTP server metrics
|
|
|
+app.add_route("/metrics", metrics)
|
|
|
+
|
|
|
|
|
|
def _set_badwords(tokenizer, hf_config): # pylint: disable=redefined-outer-name
|
|
|
global badwordsids
|
|
@@ -47,7 +55,6 @@ def _set_badwords(tokenizer, hf_config): # pylint: disable=redefined-outer-name
|
|
|
badwordsids.append(tokenizer.eos_token_id)
|
|
|
|
|
|
|
|
|
-app = FastAPI()
|
|
|
kai_api = APIRouter()
|
|
|
extra_api = APIRouter()
|
|
|
kobold_lite_ui = ""
|
|
@@ -336,7 +343,7 @@ if __name__ == "__main__":
|
|
|
engine_args = AsyncEngineArgs.from_cli_args(args)
|
|
|
engine = AsyncAphrodite.from_engine_args(engine_args)
|
|
|
engine_model_config = asyncio.run(engine.get_model_config())
|
|
|
- max_model_len = engine_model_config.get_max_model_len()
|
|
|
+ max_model_len = engine_model_config.max_model_len
|
|
|
|
|
|
# A separate tokenizer to map token IDs to strings.
|
|
|
tokenizer = get_tokenizer(engine_args.tokenizer,
|
|
@@ -345,6 +352,8 @@ if __name__ == "__main__":
|
|
|
|
|
|
_set_badwords(tokenizer, engine_model_config.hf_config)
|
|
|
|
|
|
+ add_global_metrics_labels(model_name=engine_args.model)
|
|
|
+
|
|
|
uvicorn.run(app,
|
|
|
host=args.host,
|
|
|
port=args.port,
|