git_merge.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can guide a user through a series of git commands to merge a specific branch into the main branch."
  3. TAGS = ['bash', "git", "agent"]
  4. question = """
  5. You are in a git repository. I have two branches, main and some other one I forgot the name of. Merge the other branch into main.
  6. On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:
  7. INPUT: `pwd`
  8. OUTPUT: `/usr/src/app`
  9. INPUT: `ls`
  10. OUTPUT: `main.py a b`
  11. Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""
  12. def test_ok_merge():
  13. try:
  14. math_content = open("math.py").read().strip()
  15. readme_content = open("README.md").read().strip()
  16. mul_content = open("mul.py").read().strip()
  17. return (
  18. math_content == "x + y\nx - y"
  19. and readme_content == "A repo about something\nYou run it like this"
  20. and mul_content == "x * y"
  21. )
  22. except:
  23. return False
  24. def test_merge_conflict_resolved():
  25. import os
  26. try:
  27. return open("sum.py").read().strip() == "x + y" and open("b").read().strip() == "b"
  28. except:
  29. return False
  30. def setup_merge():
  31. import os
  32. print(os.popen("""
  33. git config --global user.name "Your Name"
  34. git config --global user.email "you@example.com"
  35. git config --global init.defaultBranch main
  36. echo "x + y" > math.py
  37. echo "A repo about something" > README.md
  38. git init
  39. git add math.py README.md
  40. git commit -a -m "Initial commit"
  41. git branch other
  42. git checkout other
  43. echo "x + y\nx - y" > math.py
  44. git add math.py
  45. git commit -a -m "Subtraction"
  46. echo "A repo about something\nYou run it like this" > README.md
  47. git add README.md
  48. git commit -a -m "README"
  49. git log
  50. git checkout main
  51. echo "x * y" > mul.py
  52. git add mul.py
  53. git commit -a -m "Multiplication"
  54. git log
  55. """).read())
  56. def setup_merge_conflict():
  57. import os
  58. print(os.popen("""
  59. git config --global user.name "Your Name"
  60. git config --global user.email "you@example.com"
  61. git config --global init.defaultBranch main
  62. echo "a" > sum.py
  63. echo "b" > b
  64. git init
  65. git add sum.py b
  66. git commit -a -m "Initial commit"
  67. git branch other
  68. git checkout other
  69. echo "x + y" > sum.py
  70. git add sum.py
  71. git commit -a -m "addition"
  72. git log
  73. git checkout main
  74. echo "x - y" > sum.py
  75. git add sum.py
  76. git commit -a -m "subtraction"
  77. git log
  78. """).read())
  79. def do_extract(x):
  80. if 'INPUT:' in x:
  81. x = x.split("INPUT:")[1]
  82. try:
  83. to_send = x.split("`")[1]
  84. except:
  85. to_send = x.split("\n")[0]
  86. return to_send
  87. def do_prepare(x):
  88. print("Preparing to pass back", x)
  89. return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
  90. TestGitMerge = Setup(setup_merge) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok_merge), (LLMConversation(
  91. ) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok_merge)
  92. TestGitMergeConflict = Setup(setup_merge_conflict) >> Echo() >> question >> UntilDone(PyEvaluator(test_merge_conflict_resolved), (LLMConversation(
  93. ) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=10) >> PyEvaluator(test_merge_conflict_resolved)
  94. if __name__ == "__main__":
  95. print(run_test(TestGitMerge))
  96. print(run_test(TestGitMergeConflict))