defaults: - _self_ - gpt2model: gpt2-small _target_: transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel _recursive_: True config: _target_: transformers.GPT2Config # Mistral's config: https://github.com/stanford-crfm/mistral/blob/main/conf/models/gpt2-small.yaml # However, reorder_and_upcast_attn slows things down reorder_and_upcast_attn: false scale_attn_by_inverse_layer_idx: true n_positions: ${datamodule.max_length}