vague_sum_data.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can infer what data to sum and what to ignore by example with vague instructions."
  3. TAGS = ['code', 'python']
  4. question = '''
  5. Here's some data
  6. QUERY A361
  7. Found at: 160392127418
  8. Number of times present: 4
  9. Found at: 150158692866
  10. Number of times present: 3
  11. Found at: 122340445248
  12. Number of times present: 17
  13. Found at: 137248651778
  14. Number of times present: 84
  15. Found at: 131891981708
  16. Number of times present: 66
  17. QUERY 32B57
  18. Found at: 194561247604
  19. Number of times present: 4
  20. Found at: 82888490886
  21. Number of times present: 3
  22. Found at: 115836535414
  23. Number of times present: 19
  24. Found at: 79615924948
  25. Number of times present: 80
  26. Write a python function called `process` that will take data like this as an argument and return a dict where the keys are the like A361/32B57 and the values are how often its present in total.
  27. '''
  28. data = "QUERY A361\\nFound at: 160392127418\\nNumber of times present: 4\\n\\n\\nFound at: 150158692866\\nNumber of times present: 4\\n\\n\\nFound at: 122340445248\\nNumber of times present: 17\\n\\n\\nFound at: 137248651778\\nNumber of times present: 84\\n\\n\\nFound at: 131891981708\\nNumber of times present: 66\\n\\n\\nQUERY 32B57\\nFound at: 194561247604\\nNumber of times present: 4\\n\\n\\nFound at: 82888490886\\nNumber of times present: 3\\n\\n\\nFound at: 115836535414\\nNumber of times present: 19\\n\\n\\nFound at: 79615924948\\nNumber of times present: 81\\n"
  29. test_case, answer = make_python_test([(f"process('{data}')", "{'A361': 175, '32B57': 107}")])
  30. TestSumSomeData = question >> LLMRun() >> ExtractCode(keep_main=True) >> PythonRun(test_case) >> SubstringEvaluator(answer)
  31. if __name__ == "__main__":
  32. print(run_test(TestSumSomeData))