processor.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435
  1. from typing import cast
  2. def get_processor(
  3. processor_name: str,
  4. *args,
  5. trust_remote_code: bool = False,
  6. **kwargs,
  7. ):
  8. """Gets a processor for the given model name via HuggingFace."""
  9. # don't put this import at the top level
  10. # it will call torch.cuda.device_count()
  11. from transformers import AutoProcessor
  12. from transformers.processing_utils import ProcessorMixin
  13. try:
  14. processor = AutoProcessor.from_pretrained(
  15. processor_name, *args, trust_remote_code=trust_remote_code, **kwargs
  16. )
  17. except ValueError as e:
  18. # If the error pertains to the processor class not existing or not
  19. # currently being imported, suggest using the --trust-remote-code flag.
  20. # Unlike AutoTokenizer, AutoProcessor does not separate such errors
  21. if not trust_remote_code:
  22. err_msg = (
  23. "Failed to load the processor. If the processor is "
  24. "a custom processor not yet available in the HuggingFace "
  25. "transformers library, consider setting "
  26. "`trust_remote_code=True` in LLM or using the "
  27. "`--trust-remote-code` flag in the CLI."
  28. )
  29. raise RuntimeError(err_msg) from e
  30. else:
  31. raise e
  32. return cast(ProcessorMixin, processor)