git_cherrypick.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can guide a user through a series of git commands to identify and cherrypick a specific commit from a branch onto the main branch."
  3. TAGS = ['bash', "git", "agent"]
  4. question = """
  5. You are in a git repository. I have two branches, main and some other one I forgot the name of. Take just the first commit that differs between main and the other branch, and commit it onto main.
  6. On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:
  7. INPUT: `pwd`
  8. OUTPUT: `/usr/src/app`
  9. INPUT: `ls`
  10. OUTPUT: `main.py a b`
  11. Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""
  12. def test_ok():
  13. import os
  14. try:
  15. return open("e").read().strip() == "e" and not os.path.exists("d") and open("c").read().strip() == "c"
  16. except:
  17. return False
  18. return success
  19. def setup():
  20. import os
  21. print(os.popen("""
  22. git config --global user.name "Your Name"
  23. git config --global user.email "you@example.com"
  24. git config --global init.defaultBranch main
  25. echo "a" > a
  26. echo "b" > b
  27. git init
  28. git add a b
  29. git commit -a -m "Initial commit"
  30. git branch other
  31. git checkout other
  32. echo "c" > c
  33. git add c
  34. git commit -a -m "Add c"
  35. echo "d" > d
  36. git add d
  37. git commit -a -m "Add d"
  38. git log
  39. git checkout main
  40. echo "e" > e
  41. git add e
  42. git commit -a -m "Add e"
  43. git log
  44. """).read())
  45. def do_extract(x):
  46. if 'INPUT:' in x:
  47. x = x.split("INPUT:")[1]
  48. try:
  49. to_send = x.split("`")[1]
  50. except:
  51. to_send = x.split("\n")[0]
  52. return to_send
  53. def do_prepare(x):
  54. print("Preparing to pass back", x)
  55. return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
  56. TestGitCherrypick = Setup(setup) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok), (LLMConversation() >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok)
  57. if __name__ == "__main__":
  58. print(run_test(TestGitCherrypick))