test_tokenizer.py 649 B

1234567891011121314151617181920
  1. import pytest
  2. from transformers import PreTrainedTokenizerBase
  3. from aphrodite.transformers_utils.tokenizer import get_tokenizer
  4. TOKENIZER_NAMES = [
  5. "facebook/opt-125m",
  6. "gpt2",
  7. ]
  8. @pytest.mark.parametrize("tokenizer_name", TOKENIZER_NAMES)
  9. def test_tokenizer_revision(tokenizer_name: str):
  10. # Assume that "main" branch always exists
  11. tokenizer = get_tokenizer(tokenizer_name, revision="main")
  12. assert isinstance(tokenizer, PreTrainedTokenizerBase)
  13. # Assume that "never" branch always does not exist
  14. with pytest.raises(OSError, match='not a valid git identifier'):
  15. get_tokenizer(tokenizer_name, revision="never")