123456789 |
- # modified from https://github.com/feng-yufei/shared_debugging_code/blob/main/text_processing/symbols.py
- PAD = "_"
- PUNCTUATION = ';:,.!?¡¿—…"«»“” '
- LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
- IPA_LETTERS = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
- SYMBOLS = [PAD] + list(PUNCTUATION) + list(LETTERS) + list(IPA_LETTERS)
- SPACE_ID = SYMBOLS.index(" ")
- SYMBOL_TO_ID = {s: i for i, s in enumerate(SYMBOLS)}
- ID_TO_SYMBOL = {i: s for i, s in enumerate(SYMBOLS)}
|