symbols.py 673 B

123456789
  1. # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/text_processing/symbols.py
  2. PAD = "_"
  3. PUNCTUATION = ';:,.!?¡¿—…"«»“” '
  4. LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  5. IPA_LETTERS = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
  6. SYMBOLS = [PAD] + list(PUNCTUATION) + list(LETTERS) + list(IPA_LETTERS)
  7. SPACE_ID = SYMBOLS.index(" ")
  8. SYMBOL_TO_ID = {s: i for i, s in enumerate(SYMBOLS)}
  9. ID_TO_SYMBOL = {i: s for i, s in enumerate(SYMBOLS)}