synthesizer_preprocess_embeds.py 1.2 KB

12345678910111213141516171819202122232425
  1. from synthesizer.preprocess import create_embeddings
  2. from utils.argutils import print_args
  3. from pathlib import Path
  4. import argparse
  5. if __name__ == "__main__":
  6. parser = argparse.ArgumentParser(
  7. description="Creates embeddings for the synthesizer from the LibriSpeech utterances.",
  8. formatter_class=argparse.ArgumentDefaultsHelpFormatter
  9. )
  10. parser.add_argument("synthesizer_root", type=Path, help=\
  11. "Path to the synthesizer training data that contains the audios and the train.txt file. "
  12. "If you let everything as default, it should be <datasets_root>/SV2TTS/synthesizer/.")
  13. parser.add_argument("-e", "--encoder_model_fpath", type=Path,
  14. default="saved_models/default/encoder.pt", help=\
  15. "Path your trained encoder model.")
  16. parser.add_argument("-n", "--n_processes", type=int, default=4, help= \
  17. "Number of parallel processes. An encoder is created for each, so you may need to lower "
  18. "this value on GPUs with low memory. Set it to 1 if CUDA is unhappy.")
  19. args = parser.parse_args()
  20. # Preprocess the dataset
  21. print_args(args, parser)
  22. create_embeddings(**vars(args))