inference.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. from vocoder.models.fatchord_version import WaveRNN
  2. from vocoder import hparams as hp
  3. import torch
  4. _model = None # type: WaveRNN
  5. def load_model(weights_fpath, verbose=True):
  6. global _model, _device
  7. if verbose:
  8. print("Building Wave-RNN")
  9. _model = WaveRNN(
  10. rnn_dims=hp.voc_rnn_dims,
  11. fc_dims=hp.voc_fc_dims,
  12. bits=hp.bits,
  13. pad=hp.voc_pad,
  14. upsample_factors=hp.voc_upsample_factors,
  15. feat_dims=hp.num_mels,
  16. compute_dims=hp.voc_compute_dims,
  17. res_out_dims=hp.voc_res_out_dims,
  18. res_blocks=hp.voc_res_blocks,
  19. hop_length=hp.hop_length,
  20. sample_rate=hp.sample_rate,
  21. mode=hp.voc_mode
  22. )
  23. if torch.cuda.is_available():
  24. _model = _model.cuda()
  25. _device = torch.device('cuda')
  26. else:
  27. _device = torch.device('cpu')
  28. if verbose:
  29. print("Loading model weights at %s" % weights_fpath)
  30. checkpoint = torch.load(weights_fpath, _device)
  31. _model.load_state_dict(checkpoint['model_state'])
  32. _model.eval()
  33. def is_loaded():
  34. return _model is not None
  35. def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
  36. progress_callback=None):
  37. """
  38. Infers the waveform of a mel spectrogram output by the synthesizer (the format must match
  39. that of the synthesizer!)
  40. :param normalize:
  41. :param batched:
  42. :param target:
  43. :param overlap:
  44. :return:
  45. """
  46. if _model is None:
  47. raise Exception("Please load Wave-RNN in memory before using it")
  48. if normalize:
  49. mel = mel / hp.mel_max_abs_value
  50. mel = torch.from_numpy(mel[None, ...])
  51. wav = _model.generate(mel, batched, target, overlap, hp.mu_law, progress_callback)
  52. return wav