Przeglądaj źródła

feat: add uvloop (#550)

Uvloop is a high-performance asyncio event loop replacement. Add
the option to use it at the risk of possible breakage.

Signed-off-by: kingbri <bdashore3@proton.me>
Brian Dashore 7 miesięcy temu
rodzic
commit
5533ab845e

+ 4 - 0
aphrodite/endpoints/openai/api_server.py

@@ -4,6 +4,7 @@ import inspect
 import json
 import os
 import re
+import uvloop
 from contextlib import asynccontextmanager
 from http import HTTPStatus
 from typing import AsyncGenerator, List, Optional, Set, Tuple
@@ -520,6 +521,9 @@ def run_server(args):
     else:
         served_model_names = [args.model]
 
+    if args.uvloop:
+        uvloop.install()
+
     engine_args = AsyncEngineArgs.from_cli_args(args)
     engine = AsyncAphrodite.from_engine_args(engine_args)
     tokenizer = get_tokenizer(

+ 8 - 0
aphrodite/engine/args_tools.py

@@ -691,6 +691,7 @@ class AsyncEngineArgs(EngineArgs):
     engine_use_ray: bool = False
     disable_log_requests: bool = False
     max_log_len: int = 0
+    uvloop: bool = False
 
     @staticmethod
     def add_cli_args(
@@ -715,4 +716,11 @@ class AsyncEngineArgs(EngineArgs):
             "ID numbers being printed in log. "
             "Default: unlimited.",
         )
+        parser.add_argument(
+            "--uvloop",
+            action="store_true",
+            default=EngineArgs.ngram_prompt_lookup_min,
+            help="Use the Uvloop asyncio event loop to possibly increase "
+            "performance"
+        )
         return parser

+ 2 - 1
requirements-common.txt

@@ -24,4 +24,5 @@ hf_transfer # for faster downloads
 sentence-transformers # for embeddings
 tiktoken == 0.6.0 # for DBRX tokenizer
 outlines >= 0.0.27
-lm-format-enforcer >= 0.10.1
+lm-format-enforcer >= 0.10.1
+uvloop