gpt2-hf.yaml 444 B

12345678910111213
  1. defaults:
  2. - _self_
  3. - gpt2model: gpt2-small
  4. _target_: transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel
  5. _recursive_: True
  6. config:
  7. _target_: transformers.GPT2Config
  8. # Mistral's config: https://github.com/stanford-crfm/mistral/blob/main/conf/models/gpt2-small.yaml
  9. # However, reorder_and_upcast_attn slows things down
  10. reorder_and_upcast_attn: false
  11. scale_attn_by_inverse_layer_idx: true
  12. n_positions: ${datamodule.max_length}