internvl.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # Adapted from
  2. # https://huggingface.co/OpenGVLab/InternVL2-1B/blob/main/configuration_internvl_chat.py
  3. # --------------------------------------------------------
  4. # InternVL
  5. # Copyright (c) 2024 OpenGVLab
  6. # Licensed under The MIT License [see LICENSE for details]
  7. # --------------------------------------------------------
  8. from transformers.configuration_utils import PretrainedConfig
  9. class InternVLChatConfig(PretrainedConfig):
  10. model_type = 'internvl_chat'
  11. is_composition = True
  12. def __init__(self,
  13. vision_config=None,
  14. llm_config=None,
  15. use_backbone_lora=0,
  16. use_llm_lora=0,
  17. select_layer=-1,
  18. force_image_size=None,
  19. downsample_ratio=0.5,
  20. template=None,
  21. dynamic_image_size=False,
  22. use_thumbnail=False,
  23. ps_version='v1',
  24. min_dynamic_patch=1,
  25. max_dynamic_patch=6,
  26. **kwargs):
  27. super().__init__(**kwargs)
  28. if vision_config is None:
  29. vision_config = {}
  30. if llm_config is None:
  31. llm_config = {}
  32. self.vision_config = PretrainedConfig(**vision_config)
  33. self.text_config = PretrainedConfig(**llm_config)
  34. self.use_backbone_lora = use_backbone_lora
  35. self.use_llm_lora = use_llm_lora
  36. self.select_layer = select_layer
  37. self.force_image_size = force_image_size
  38. self.downsample_ratio = downsample_ratio
  39. self.template = template
  40. self.dynamic_image_size = dynamic_image_size
  41. self.use_thumbnail = use_thumbnail
  42. self.ps_version = ps_version # pixel shuffle version
  43. self.min_dynamic_patch = min_dynamic_patch
  44. self.max_dynamic_patch = max_dynamic_patch