paligemma_example.py 857 B

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import os
  2. from PIL import Image
  3. from aphrodite import LLM, SamplingParams
  4. def run_paligemma():
  5. llm = LLM(model="google/paligemma-3b-mix-224")
  6. prompt = "caption es"
  7. image_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
  8. "burg.jpg")
  9. image = Image.open(image_path)
  10. sampling_params = SamplingParams(temperature=1.1,
  11. min_p=0.06,
  12. max_tokens=512)
  13. outputs = llm.generate(
  14. {
  15. "prompt": prompt,
  16. "multi_modal_data": {
  17. "image": image
  18. },
  19. },
  20. sampling_params=sampling_params)
  21. for o in outputs:
  22. generated_text = o.outputs[0].text
  23. print(generated_text)
  24. def main():
  25. run_paligemma()
  26. if __name__ == "__main__":
  27. main()