12345678910111213141516171819202122 |
- from evaluator import *
- DESCRIPTION = "Test if the model can write a C program that draws an image. This test requires the ability to understand the .bmp specification, and draw a flag that can be correctly parsed and seen by the evaluator."
- TAGS = ['code', 'c', 'visual']
- question = """
- Write me a C program that emits a .bmp that's a depiction of an american flag to stdout.
- """
- TestFlagDrawBMP = question >> LLMRun() >> ExtractCode(keep_main=True) >> CRun(out_bytes=True) >> LLMVisionRun("What flag is shown in this image?") >> (SubstringEvaluator("United States") | SubstringEvaluator("USA") | SubstringEvaluator("America"))
- question2 = """
- Write me a C program that emits a .bmp that's a depiction of an italian flag to stdout.
- """
- TestEasyFlagDrawBMP = question2 >> LLMRun() >> ExtractCode(keep_main=True) >> CRun(out_bytes=True) >> LLMVisionRun("What flag is shown in this image?") >> (SubstringEvaluator("Italy") | SubstringEvaluator("Italian"))
- if __name__ == "__main__":
- print(run_test(TestFlagDrawBMP))
|