hparams.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. from synthesizer.hparams import hparams as _syn_hp
  2. # Audio settings------------------------------------------------------------------------
  3. # Match the values of the synthesizer
  4. sample_rate = _syn_hp.sample_rate
  5. n_fft = _syn_hp.n_fft
  6. num_mels = _syn_hp.num_mels
  7. hop_length = _syn_hp.hop_size
  8. win_length = _syn_hp.win_size
  9. fmin = _syn_hp.fmin
  10. min_level_db = _syn_hp.min_level_db
  11. ref_level_db = _syn_hp.ref_level_db
  12. mel_max_abs_value = _syn_hp.max_abs_value
  13. preemphasis = _syn_hp.preemphasis
  14. apply_preemphasis = _syn_hp.preemphasize
  15. bits = 9 # bit depth of signal
  16. mu_law = True # Recommended to suppress noise if using raw bits in hp.voc_mode
  17. # below
  18. # WAVERNN / VOCODER --------------------------------------------------------------------------------
  19. voc_mode = 'RAW' # either 'RAW' (softmax on raw bits) or 'MOL' (sample from
  20. # mixture of logistics)
  21. voc_upsample_factors = (5, 5, 8) # NB - this needs to correctly factorise hop_length
  22. voc_rnn_dims = 512
  23. voc_fc_dims = 512
  24. voc_compute_dims = 128
  25. voc_res_out_dims = 128
  26. voc_res_blocks = 10
  27. # Training
  28. voc_batch_size = 100
  29. voc_lr = 1e-4
  30. voc_gen_at_checkpoint = 5 # number of samples to generate at each checkpoint
  31. voc_pad = 2 # this will pad the input so that the resnet can 'see' wider
  32. # than input length
  33. voc_seq_len = hop_length * 5 # must be a multiple of hop_length
  34. # Generating / Synthesizing
  35. voc_gen_batched = True # very fast (realtime+) single utterance batched generation
  36. voc_target = 8000 # target number of samples to be generated in each batch entry
  37. voc_overlap = 400 # number of samples for crossfading between batches