Browse Source

fix: epsilon variance value for RMSNorm

AlpinDale 1 year ago
parent
commit
f2fe14ec1c
2 changed files with 21 additions and 1 deletions
  1. 1 1
      aphrodite/modeling/layers/layernorm.py
  2. 20 0
      aphrodite/modeling/models/gpt_j.py

+ 1 - 1
aphrodite/modeling/layers/layernorm.py

@@ -14,7 +14,7 @@ class RMSNorm(nn.Module):
     def __init__(
         self,
         hidden_size: int,
-        eps: float = 1e-8, # Refer to https://github.com/bzhangGo/rmsnorm/blob/2e726f1a3f106bb719056422f4f9b6aca03d3ce6/rmsnorm_torch.py#L13
+        eps: float = 1e-6, # the epsilon value used by llama models
     ) -> None:
         super().__init__()
         self.weight = nn.Parameter(torch.ones(hidden_size))

+ 20 - 0
aphrodite/modeling/models/gpt_j.py

@@ -0,0 +1,20 @@
+from typing import Dict, List, Optional, Tuple
+
+import torch
+from torch import nn
+from transformers import GPTJConfig
+
+from aphrodite.modeling.metadata import InputMetadata
+from aphrodite.modeling.layers.activation import get_act_fn
+from aphrodite.modeling.layers.attention import PagedAttentionWithRoPE
+from aphrodite.modeling.layers.sampler import Sampler
+from aphrodite.modeling.hf_downloader import hf_model_weights_iterator, load_tensor_parallel_weights
+from aphrodite.modeling.megatron.parallel_state import (
+    get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
+from aphrodite.modeling.megatron.tensor_parallel import (
+    VocabParallelEmbedding, ColumnParallelLinear, RowParallelLinear)
+from aphrodite.common.sequence import SequenceOutputs
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+class GPTJAttention(nn.Module):