Переглянути джерело

fix: Phi3.5 Mini and MoE LoRA inference (#1070)

AlpinDale 2 місяців тому
батько
коміт
ec17b6c4d0

+ 1 - 1
aphrodite/modeling/models/__init__.py

@@ -47,7 +47,7 @@ _GENERATION_MODELS = {
     "OPTForCausalLM": ("opt", "OPTForCausalLM"),
     "OrionForCausalLM": ("orion", "OrionForCausalLM"),
     "PhiForCausalLM": ("phi", "PhiForCausalLM"),
-    "Phi3ForCausalLM": ("llama", "LlamaForCausalLM"),
+    "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
     "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
     "Qwen2MoeForCausalLM": ("qwen2_moe", "Qwen2MoeForCausalLM"),

+ 15 - 0
aphrodite/modeling/models/phi3.py

@@ -0,0 +1,15 @@
+# coding=utf-8
+# Adapted from llama.py
+"""Inference-only Phi3 model code inherit from Llama.py"""
+from aphrodite.modeling.models.llama import LlamaForCausalLM
+
+
+class Phi3ForCausalLM(LlamaForCausalLM):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "qkv_proj",
+        ],
+        "gate_up_proj": [
+            "gate_up_proj",
+        ],
+    }

+ 4 - 0
aphrodite/modeling/models/phimoe.py

@@ -490,6 +490,10 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA):
         "o_proj",
         "embed_tokens",
         "lm_head",
+        "w1",
+        "w2",
+        "w3",
+        "gate",
     ]
     embedding_modules = {
         "embed_tokens": "input_embeddings",