c_weird_expression.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435
  1. from evaluator import *
  2. DESCRIPTION = "This test case is meant to test if the model can correctly evaluate a complex C expression."
  3. TAGS = ['explain', 'c']
  4. evaluation = """
  5. Below is a student's answer to a question about what a particular C expression evaluates to: <A>
  6. Does the student's final answer say that the expression evaluates to *x+2 (or even v+2 is okay)?
  7. Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that the answer is *x+2. Giving something that's equal but more complicated is wrong.
  8. If the answer is correct say "The student passes" otherwise "The student fails".
  9. """
  10. def parse(x):
  11. return x.lower().split("final answer:")[-1]
  12. question = """
  13. What does the c expression `-~++*x--` evaluate to in simplest form? Ignore side effects. Think out loud then give a final summary. Write "Final answer:" and then give your answer.
  14. """
  15. TestExplainWeirdCEasy = question >> LLMRun() >> PyFunc(parse) >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
  16. question_harder = """
  17. What does the c expression `-~++*x--` evaluate to in simplest form? Ignore side effects. Write "Final answer:" and then give your answer.
  18. """
  19. TestExplainWeirdC = question_harder >> LLMRun() >> PyFunc(parse) >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
  20. if __name__ == "__main__":
  21. print(run_test(TestExplainWeirdCEasy))