yi.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. """ Yi model configuration"""
  2. from transformers.configuration_utils import PretrainedConfig
  3. from transformers.utils import logging
  4. logger = logging.get_logger(__name__)
  5. Yi_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
  6. class YiConfig(PretrainedConfig):
  7. r"""
  8. Reference:
  9. https://huggingface.co/01-ai/Yi-6B/blob/main/configuration_yi.py
  10. """
  11. model_type = "Yi"
  12. keys_to_ignore_at_inference = ["past_key_values"]
  13. def __init__(
  14. self,
  15. vocab_size=64000,
  16. hidden_size=4096,
  17. intermediate_size=11008,
  18. num_hidden_layers=32,
  19. num_attention_heads=32,
  20. num_key_value_heads=4,
  21. hidden_act="silu",
  22. max_position_embeddings=4096,
  23. initializer_range=0.02,
  24. rms_norm_eps=1e-5,
  25. use_cache=True,
  26. pad_token_id=0,
  27. bos_token_id=1,
  28. eos_token_id=2,
  29. tie_word_embeddings=False,
  30. output_attentions=False,
  31. rope_theta=5000000.0,
  32. **kwargs,
  33. ):
  34. self.vocab_size = vocab_size
  35. self.max_position_embeddings = max_position_embeddings
  36. self.hidden_size = hidden_size
  37. self.intermediate_size = intermediate_size
  38. self.num_hidden_layers = num_hidden_layers
  39. self.num_attention_heads = num_attention_heads
  40. # for backward compatibility
  41. if num_key_value_heads is None:
  42. num_key_value_heads = num_attention_heads
  43. self.num_key_value_heads = num_key_value_heads
  44. self.hidden_act = hidden_act
  45. self.initializer_range = initializer_range
  46. self.rms_norm_eps = rms_norm_eps
  47. self.use_cache = use_cache
  48. self.output_attentions = output_attentions
  49. self.rope_theta = rope_theta
  50. super().__init__(
  51. pad_token_id=pad_token_id,
  52. bos_token_id=bos_token_id,
  53. eos_token_id=eos_token_id,
  54. tie_word_embeddings=tie_word_embeddings,
  55. **kwargs,
  56. )