1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- from evaluator import *
- DESCRIPTION = "Test if the model can guide a user through a series of git commands to identify and cherrypick a specific commit from a branch onto the main branch."
- TAGS = ['bash', "git", "agent"]
- question = """
- You are in a git repository. I have two branches, main and some other one I forgot the name of. Take just the first commit that differs between main and the other branch, and commit it onto main.
- On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:
- INPUT: `pwd`
- OUTPUT: `/usr/src/app`
- INPUT: `ls`
- OUTPUT: `main.py a b`
- Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""
- def test_ok():
- import os
- try:
- return open("e").read().strip() == "e" and not os.path.exists("d") and open("c").read().strip() == "c"
- except:
- return False
-
- return success
- def setup():
- import os
- print(os.popen("""
- git config --global user.name "Your Name"
- git config --global user.email "you@example.com"
- git config --global init.defaultBranch main
- echo "a" > a
- echo "b" > b
- git init
- git add a b
- git commit -a -m "Initial commit"
- git branch other
- git checkout other
- echo "c" > c
- git add c
- git commit -a -m "Add c"
- echo "d" > d
- git add d
- git commit -a -m "Add d"
- git log
- git checkout main
- echo "e" > e
- git add e
- git commit -a -m "Add e"
- git log
- """).read())
- def do_extract(x):
- if 'INPUT:' in x:
- x = x.split("INPUT:")[1]
- try:
- to_send = x.split("`")[1]
- except:
- to_send = x.split("\n")[0]
- return to_send
- def do_prepare(x):
- print("Preparing to pass back", x)
- return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
-
- TestGitCherrypick = Setup(setup) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok), (LLMConversation() >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok)
-
- if __name__ == "__main__":
- print(run_test(TestGitCherrypick))
|