brain-assistant
/
yet-another-applied-llm-benchmark


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
							from evaluator import *

DESCRIPTION = "Test if the model can guide a user through a series of git commands to merge a specific branch into the main branch."

TAGS = ['bash', "git", "agent"]

question = """
You are in a git repository. I have two branches, main and some other one I forgot the name of. Merge the other branch into main.

On each round you should just give me just one command to type, I'll run it exactly as-is, and give you the exact output. I will not make any changes. So do not write any placholder text. I'll get started:

INPUT: `pwd`

OUTPUT: `/usr/src/app`

INPUT: `ls`

OUTPUT: `main.py a b`

Now your turn. What do you want to run? Do not explain yourself, just give me exactly one command. Start your response with INPUT:"""


def test_ok_merge():
    try:
        math_content = open("math.py").read().strip()
        readme_content = open("README.md").read().strip()
        mul_content = open("mul.py").read().strip()

        return (
            math_content == "x + y\nx - y"
            and readme_content == "A repo about something\nYou run it like this"
            and mul_content == "x * y"
        )
    except:
        return False


def test_merge_conflict_resolved():
    import os
    try:
        return open("sum.py").read().strip() == "x + y" and open("b").read().strip() == "b"
    except:
        return False


def setup_merge():
    import os
    print(os.popen("""
    git config --global user.name "Your Name"
    git config --global user.email "you@example.com"
    git config --global init.defaultBranch main
    echo "x + y" > math.py
    echo "A repo about something" > README.md
    git init
    git add math.py README.md
    git commit -a -m "Initial commit"
    git branch other
    git checkout other
    echo "x + y\nx - y" > math.py
    git add math.py
    git commit -a -m "Subtraction"
    echo "A repo about something\nYou run it like this" > README.md
    git add README.md
    git commit -a -m "README"
    git log
    git checkout main
    echo "x * y" > mul.py
    git add mul.py
    git commit -a -m "Multiplication"
    git log
    """).read())


def setup_merge_conflict():
    import os
    print(os.popen("""
    git config --global user.name "Your Name"
    git config --global user.email "you@example.com"
    git config --global init.defaultBranch main
    echo "a" > sum.py
    echo "b" > b
    git init
    git add sum.py b
    git commit -a -m "Initial commit"
    git branch other
    git checkout other
    echo "x + y" > sum.py
    git add sum.py
    git commit -a -m "addition"
    git log
    git checkout main
    echo "x - y" > sum.py
    git add sum.py
    git commit -a -m "subtraction"
    git log
    """).read())


def do_extract(x):
    if 'INPUT:' in x:
        x = x.split("INPUT:")[1]
    try:
        to_send = x.split("`")[1]
    except:
        to_send = x.split("\n")[0]
    return to_send


def do_prepare(x):
    print("Preparing to pass back", x)
    return f"I get an OUTPUT: `{x}`.\n\nWhat is the exact command I should run next? Start your response with INPUT:"


TestGitMerge = Setup(setup_merge) >> Echo() >> question >> UntilDone(PyEvaluator(test_ok_merge), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=6) >> PyEvaluator(test_ok_merge)

TestGitMergeConflict = Setup(setup_merge_conflict) >> Echo() >> question >> UntilDone(PyEvaluator(test_merge_conflict_resolved), (LLMConversation(
) >> PyFunc(do_extract) >> TerminalRun() >> PyFunc(do_prepare)), max_iters=10) >> PyEvaluator(test_merge_conflict_resolved)

if __name__ == "__main__":
    print(run_test(TestGitMerge))
    print(run_test(TestGitMergeConflict))