فهرست منبع

fix: typos in the attention file

AlpinDale 1 سال پیش
والد
کامیت
16df1763c8
3فایلهای تغییر یافته به همراه8 افزوده شده و 7 حذف شده
  1. 2 1
      .gitignore
  2. 3 3
      aphrodite/__init__.py
  3. 3 3
      aphrodite/modeling/layers/attention.py

+ 2 - 1
.gitignore

@@ -3,4 +3,5 @@ __pycache__
 repos
 .vscode
 *.env
-c_cpp_properties.json
+*egg-info
+*.so

+ 3 - 3
aphrodite/__init__.py

@@ -2,7 +2,7 @@ from aphrodite.engine.args_tools import AsyncEngineArgs, EngineArgs
 from aphrodite.engine.async_aphrodite import AsyncAphrodite
 from aphrodite.engine.aphrodite import AphroditeEngine
 from aphrodite.engine.ray_tools import initialize_cluster
-from aphrodite.common.outputs import ChatCompletionOutput, RequestOutput
+from aphrodite.common.outputs import CompletionOutput, RequestOutput
 from aphrodite.common.sampling_params import SamplingParams
 
 __version__ = "0.0"
@@ -15,5 +15,5 @@ __all__ = [
     "initialize_cluster",
     "SamplingParams",
     "RequestOutput",
-    "ChatCompletionOutput",
-]
+    "CompletionOutput",
+]

+ 3 - 3
aphrodite/modeling/layers/attention.py

@@ -13,7 +13,7 @@ from aphrodite import cache_ops
 from aphrodite import pos_encoding_ops
 from aphrodite.modeling.metadata import InputMetadata
 
-_SUPPORTED_HEAD_SIZES = 
+_SUPPORTED_HEAD_SIZES = [64, 80, 96, 112, 128]
 
 class PagedAttention(nn.Module):
     """GPT-style multi-head PagedAttention.
@@ -150,8 +150,8 @@ class PagedAttention(nn.Module):
             )
 
             self.single_query_cached_kv_attention(
-                output[num_prompt_tokens:num_valid_tokens]
-                query[num_prompt_tokens:num_valid_tokens]
+                output[num_prompt_tokens:num_valid_tokens],
+                query[num_prompt_tokens:num_valid_tokens],
                 key_cache,
                 value_cache,
                 input_metadata)