- # @package _global_
- defaults:
- - /experiment/owt/gpt2m.yaml
- - override /model/gpt2model: gpt2-xlarge
- - override /optimizer: adamw-zero
- datamodule:
- batch_size: 2 # Per GPU
- trainer:
- strategy:
- _target_: src.utils.ddp_zero1.DDPStrategyZero1
- find_unused_parameters: False
- gradient_as_bucket_view: True
|