symbols.py 633 B

1234567891011121314151617
  1. """
  2. Defines the set of symbols used in text input to the model.
  3. The default is a set of ASCII characters that works well for English or text that has been run
  4. through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
  5. """
  6. # from . import cmudict
  7. _pad = "_"
  8. _eos = "~"
  9. _characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'\"(),-.:;? "
  10. # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
  11. #_arpabet = ["@' + s for s in cmudict.valid_symbols]
  12. # Export all symbols:
  13. symbols = [_pad, _eos] + list(_characters) #+ _arpabet