brain-assistant
/
yet-another-applied-llm-benchmark


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
							from evaluator import *

DESCRIPTION = "Test if the model can guide a user through a series of git commands to identify and cherrypick a specific commit from a branch onto the main branch."

TAGS = ['bash', "git", "agent"]


question = """
You are in a git repository. I have two branches, main and some other one I forgot the name of. Take just the first commit that differs between main and the other branch, and commit it onto main.

On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:

INPUT: `pwd`

OUTPUT: `/usr/src/app`

INPUT: `ls`

OUTPUT: `main.py a b`

Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""


def test_ok():
    import os
    try:
        return open("e").read().strip() == "e" and not os.path.exists("d") and open("c").read().strip() == "c"
    except:
        return False
    
    return success

def setup():
    import os
    print(os.popen("""
    git config --global user.name "Your Name"
    git config --global user.email "you@example.com"
    git config --global init.defaultBranch main
    echo "a" > a
    echo "b" > b
    git init
    git add a b
    git commit -a -m "Initial commit"
    git branch other
    git checkout other
    echo "c" > c
    git add c
    git commit -a -m "Add c"
    echo "d" > d
    git add d
    git commit -a -m "Add d"
    git log
    git checkout main
    echo "e" > e
    git add e
    git commit -a -m "Add e"
    git log
    """).read())

def do_extract(x):
    if 'INPUT:' in x:
        x = x.split("INPUT:")[1]
    try:
        to_send = x.split("`")[1]
    except:
        to_send = x.split("\n")[0]
    return to_send

def do_prepare(x):
    print("Preparing to pass back", x)
    return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"
    
TestGitCherrypick = Setup(setup) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok), (LLMConversation() >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok)
    

if __name__ == "__main__":
    print(run_test(TestGitCherrypick))