40k.json 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. {
  2. "train": {
  3. "log_interval": 200,
  4. "seed": 1234,
  5. "epochs": 20000,
  6. "learning_rate": 1e-4,
  7. "betas": [0.8, 0.99],
  8. "eps": 1e-9,
  9. "batch_size": 4,
  10. "fp16_run": false,
  11. "lr_decay": 0.999875,
  12. "segment_size": 12800,
  13. "init_lr_ratio": 1,
  14. "warmup_epochs": 0,
  15. "c_mel": 45,
  16. "c_kl": 1.0
  17. },
  18. "data": {
  19. "max_wav_value": 32768.0,
  20. "sampling_rate": 40000,
  21. "filter_length": 2048,
  22. "hop_length": 400,
  23. "win_length": 2048,
  24. "n_mel_channels": 125,
  25. "mel_fmin": 0.0,
  26. "mel_fmax": null
  27. },
  28. "model": {
  29. "inter_channels": 192,
  30. "hidden_channels": 192,
  31. "filter_channels": 768,
  32. "n_heads": 2,
  33. "n_layers": 6,
  34. "kernel_size": 3,
  35. "p_dropout": 0,
  36. "resblock": "1",
  37. "resblock_kernel_sizes": [3,7,11],
  38. "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
  39. "upsample_rates": [10,10,2,2],
  40. "upsample_initial_channel": 512,
  41. "upsample_kernel_sizes": [16,16,4,4],
  42. "use_spectral_norm": false,
  43. "gin_channels": 256,
  44. "spk_embed_dim": 109
  45. }
  46. }