tokenizer.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. import os
  2. from typing import Optional, Union
  3. import huggingface_hub
  4. from loguru import logger
  5. from transformers import (AutoTokenizer, PreTrainedTokenizer,
  6. PreTrainedTokenizerFast)
  7. from aphrodite.common.config import APHRODITE_USE_MODELSCOPE
  8. from aphrodite.common.utils import make_async
  9. from aphrodite.lora.request import LoRARequest
  10. from aphrodite.transformers_utils.tokenizers import BaichuanTokenizer
  11. def get_cached_tokenizer(
  12. tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
  13. ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
  14. """Get tokenizer with cached properties.
  15. This will patch the tokenizer object in place.
  16. By default, transformers will recompute multiple tokenizer properties
  17. each time they are called, leading to a significant slowdown. This
  18. function caches these properties for faster access."""
  19. tokenizer_all_special_ids = set(tokenizer.all_special_ids)
  20. tokenizer_all_special_tokens_extended = (
  21. tokenizer.all_special_tokens_extended)
  22. tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
  23. tokenizer_len = len(tokenizer)
  24. class CachedTokenizer(tokenizer.__class__): # type: ignore
  25. @property
  26. def all_special_ids(self):
  27. return tokenizer_all_special_ids
  28. @property
  29. def all_special_tokens(self):
  30. return tokenizer_all_special_tokens
  31. @property
  32. def all_special_tokens_extended(self):
  33. return tokenizer_all_special_tokens_extended
  34. def __len__(self):
  35. return tokenizer_len
  36. CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
  37. tokenizer.__class__ = CachedTokenizer
  38. return tokenizer
  39. def get_tokenizer(
  40. tokenizer_name: str,
  41. *args,
  42. tokenizer_mode: str = "auto",
  43. trust_remote_code: bool = False,
  44. revision: Optional[str] = None,
  45. download_dir: Optional[str] = None,
  46. **kwargs,
  47. ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
  48. """Gets a tokenizer for the given model name via Huggingface/modelscope."""
  49. if APHRODITE_USE_MODELSCOPE:
  50. # download model from ModelScope hub,
  51. # lazy import so that modelscope is not required for normal use.
  52. # pylint: disable=C.
  53. from modelscope.hub.snapshot_download import snapshot_download
  54. # Only set the tokenizer here, model will be downloaded on the workers.
  55. if not os.path.exists(tokenizer_name):
  56. tokenizer_path = snapshot_download(
  57. model_id=tokenizer_name,
  58. cache_dir=download_dir,
  59. revision=revision,
  60. local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
  61. # Ignore weights - we only need the tokenizer.
  62. ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"])
  63. tokenizer_name = tokenizer_path
  64. if tokenizer_mode == "slow":
  65. if kwargs.get("use_fast", False):
  66. raise ValueError(
  67. "Cannot use the fast tokenizer in slow tokenizer mode.")
  68. kwargs["use_fast"] = False
  69. if "truncation_side" not in kwargs:
  70. kwargs["truncation_side"] = "left"
  71. try:
  72. tokenizer = AutoTokenizer.from_pretrained(
  73. tokenizer_name,
  74. *args,
  75. trust_remote_code=trust_remote_code,
  76. revision=revision,
  77. **kwargs)
  78. except ValueError as e:
  79. # If the error pertains to the tokenizer class not existing or not
  80. # currently being imported, suggest using the --trust-remote-code flag.
  81. if (not trust_remote_code and
  82. ("does not exist or is not currently imported." in str(e)
  83. or "requires you to execute the tokenizer file" in str(e))):
  84. err_msg = (
  85. "Failed to load the tokenizer. If the tokenizer is a custom "
  86. "tokenizer not yet available in the HuggingFace transformers "
  87. "library, consider setting `trust_remote_code=True` in LLM "
  88. "or using the `--trust-remote-code` flag in the CLI.")
  89. raise RuntimeError(err_msg) from e
  90. else:
  91. raise e
  92. except AttributeError as e:
  93. if "BaichuanTokenizer" in str(e):
  94. # This is for the error "'BaichuanTokenizer' object has no
  95. # attribute 'sp_model'".
  96. tokenizer = BaichuanTokenizer.from_pretrained(
  97. tokenizer_name,
  98. *args,
  99. trust_remote_code=trust_remote_code,
  100. revision=revision,
  101. **kwargs)
  102. else:
  103. raise e
  104. if not isinstance(tokenizer, PreTrainedTokenizerFast):
  105. logger.warning(
  106. "Using a slow tokenizer. This might cause a significant "
  107. "slowdown. Consider using a fast tokenizer instead.")
  108. return get_cached_tokenizer(tokenizer)
  109. def get_lora_tokenizer(lora_request: LoRARequest, *args,
  110. **kwargs) -> Optional[PreTrainedTokenizer]:
  111. if lora_request is None:
  112. return None
  113. try:
  114. tokenizer = get_tokenizer(lora_request.lora_local_path, *args,
  115. **kwargs)
  116. except OSError as e:
  117. # No tokenizer was found in the LoRA folder,
  118. # use base model tokenizer
  119. logger.warning(
  120. f"No tokenizer found in {lora_request.lora_local_path}, "
  121. "using base model tokenizer instead. "
  122. f"(Exception: {str(e)})")
  123. tokenizer = None
  124. return tokenizer
  125. get_lora_tokenizer_async = make_async(get_lora_tokenizer)