浏览代码

feat: add uvloop (#550)

Uvloop is a high-performance asyncio event loop replacement. Add
the option to use it at the risk of possible breakage.

Signed-off-by: kingbri <bdashore3@proton.me>
Brian Dashore 7 月之前
父节点
当前提交
5533ab845e
共有 3 个文件被更改,包括 14 次插入1 次删除
  1. 4 0
      aphrodite/endpoints/openai/api_server.py
  2. 8 0
      aphrodite/engine/args_tools.py
  3. 2 1
      requirements-common.txt

+ 4 - 0
aphrodite/endpoints/openai/api_server.py

@@ -4,6 +4,7 @@ import inspect
 import json
 import os
 import re
+import uvloop
 from contextlib import asynccontextmanager
 from http import HTTPStatus
 from typing import AsyncGenerator, List, Optional, Set, Tuple
@@ -520,6 +521,9 @@ def run_server(args):
     else:
         served_model_names = [args.model]
 
+    if args.uvloop:
+        uvloop.install()
+
     engine_args = AsyncEngineArgs.from_cli_args(args)
     engine = AsyncAphrodite.from_engine_args(engine_args)
     tokenizer = get_tokenizer(

+ 8 - 0
aphrodite/engine/args_tools.py

@@ -691,6 +691,7 @@ class AsyncEngineArgs(EngineArgs):
     engine_use_ray: bool = False
     disable_log_requests: bool = False
     max_log_len: int = 0
+    uvloop: bool = False
 
     @staticmethod
     def add_cli_args(
@@ -715,4 +716,11 @@ class AsyncEngineArgs(EngineArgs):
             "ID numbers being printed in log. "
             "Default: unlimited.",
         )
+        parser.add_argument(
+            "--uvloop",
+            action="store_true",
+            default=EngineArgs.ngram_prompt_lookup_min,
+            help="Use the Uvloop asyncio event loop to possibly increase "
+            "performance"
+        )
         return parser

+ 2 - 1
requirements-common.txt

@@ -24,4 +24,5 @@ hf_transfer # for faster downloads
 sentence-transformers # for embeddings
 tiktoken == 0.6.0 # for DBRX tokenizer
 outlines >= 0.0.27
-lm-format-enforcer >= 0.10.1
+lm-format-enforcer >= 0.10.1
+uvloop