draw_flag_bmp.py 1.0 KB

12345678910111213141516171819202122
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can write a C program that draws an image. This test requires the ability to understand the .bmp specification, and draw a flag that can be correctly parsed and seen by the evaluator."
  3. TAGS = ['code', 'c', 'visual']
  4. question = """
  5. Write me a C program that emits a .bmp that's a depiction of an american flag to stdout.
  6. """
  7. TestFlagDrawBMP = question >> LLMRun() >> ExtractCode(keep_main=True) >> CRun(out_bytes=True) >> LLMVisionRun("What flag is shown in this image?") >> (SubstringEvaluator("United States") | SubstringEvaluator("USA") | SubstringEvaluator("America"))
  8. question2 = """
  9. Write me a C program that emits a .bmp that's a depiction of an italian flag to stdout.
  10. """
  11. TestEasyFlagDrawBMP = question2 >> LLMRun() >> ExtractCode(keep_main=True) >> CRun(out_bytes=True) >> LLMVisionRun("What flag is shown in this image?") >> (SubstringEvaluator("Italy") | SubstringEvaluator("Italian"))
  12. if __name__ == "__main__":
  13. print(run_test(TestFlagDrawBMP))