fix_tokenizer.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can identify and fix an issue with a tokenizer in a Python code snippet. Identifying the problem is in the regex, and fixing the regex, are both hard."
  3. TAGS = ['code', 'fix', 'python']
  4. question = """
  5. Fix this tokenizer keeping it as similar as possible
  6. class Tokenizer:
  7. def __init__(self, input_str):
  8. self.tokens = re.findall(r'\d+|(\+|\-|\*|/|sin|cos|\(|\))', input_str)
  9. self.position = 0
  10. def get_next_token(self):
  11. if self.position >= len(self.tokens):
  12. return None
  13. token = self.tokens[self.position]
  14. self.position += 1
  15. return token
  16. def peek_next_token(self):
  17. if self.position >= len(self.tokens):
  18. return None
  19. return self.tokens[self.position]
  20. """
  21. test_case, answer = make_python_test([("Tokenizer('sin(3+2*4)-cos(15)').tokens", "['sin', '(', '3', '+', '2', '*', '4', ')', '-', 'cos', '(', '15', ')']")])
  22. TestSimpleFix = question >> LLMRun() >> ExtractCode() >> PythonRun(test_case) >> SubstringEvaluator(answer)
  23. if __name__ == "__main__":
  24. print(run_test(TestSimpleFix))