# @package _global_ defaults: - /experiment/owt/gpt2m.yaml - override /model/gpt2model: gpt2-xlarge - override /optimizer: adamw-zero datamodule: batch_size: 2 # Per GPU trainer: strategy: _target_: src.utils.ddp_zero1.DDPStrategyZero1 find_unused_parameters: False gradient_as_bucket_view: True