s1longer.yaml 516 B

12345678910111213141516171819202122232425262728293031
  1. train:
  2. seed: 1234
  3. epochs: 20
  4. batch_size: 8
  5. save_every_n_epoch: 1
  6. precision: 16-mixed
  7. gradient_clip: 1.0
  8. optimizer:
  9. lr: 0.01
  10. lr_init: 0.00001
  11. lr_end: 0.0001
  12. warmup_steps: 2000
  13. decay_steps: 40000
  14. data:
  15. max_eval_sample: 8
  16. max_sec: 54
  17. num_workers: 4
  18. pad_val: 1024 # same with EOS in model
  19. model:
  20. vocab_size: 1025
  21. phoneme_vocab_size: 512
  22. embedding_dim: 512
  23. hidden_dim: 512
  24. head: 16
  25. linear_units: 2048
  26. n_layer: 24
  27. dropout: 0
  28. EOS: 1024
  29. random_bert: 0
  30. inference:
  31. top_k: 5