123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- """aphrodite.endpoints.api_server with some extra logging for testing."""
- from typing import Any, Dict, Iterable
- import uvicorn
- from fastapi.responses import JSONResponse, Response
- import aphrodite.endpoints.api_server
- from aphrodite.engine.args_tools import AsyncEngineArgs
- from aphrodite.engine.async_aphrodite import AsyncAphrodite
- from aphrodite.common.utils import FlexibleArgumentParser
- app = aphrodite.endpoints.api_server.app
- class AsyncAphroditeWithStats(AsyncAphrodite):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self._num_aborts = 0
- async def _engine_abort(self, request_ids: Iterable[str]):
- ids = list(request_ids)
- self._num_aborts += len(ids)
- await super()._engine_abort(ids)
- def testing_stats(self) -> Dict[str, Any]:
- return {"num_aborted_requests": self._num_aborts}
- @app.get("/stats")
- def stats() -> Response:
- """Get the statistics of the engine."""
- return JSONResponse(engine.testing_stats())
- if __name__ == "__main__":
- parser = FlexibleArgumentParser()
- parser.add_argument("--host", type=str, default="localhost")
- parser.add_argument("--port", type=int, default=8000)
- parser = AsyncEngineArgs.add_cli_args(parser)
- args = parser.parse_args()
- engine_args = AsyncEngineArgs.from_cli_args(args)
- engine = AsyncAphroditeWithStats.from_engine_args(engine_args)
- aphrodite.endpoints.api_server.engine = engine
- uvicorn.run(
- app,
- host=args.host,
- port=args.port,
- log_level="debug",
- timeout_keep_alive=aphrodite.endpoints.api_server.TIMEOUT_KEEP_ALIVE)
|