123456789101112131415161718192021222324252627282930313233 |
- from evaluator import *
- DESCRIPTION = "Test if a model can run an incremental update of a bash command without overwriting files that already exist"
- TAGS = ['bash']
- question = 'Run find . -name "*.pdf" -exec python3 process.py {} txt/{}.txt \; but don\'t rewrite files that already exist. Instead of .pdf.txt make it just .txt'
- def setup_fn():
- open("process.py", "w").write("import sys\nopen(sys.argv[2], 'w').write(open(sys.argv[1]).read())\n")
- open("a.pdf", "w").write("hello1")
- open("b.pdf", "w").write("hello2")
- open("c.pdf", "w").write("hello3")
- import os
- os.mkdir("txt")
- open("txt/a.txt", "w").write("done1")
- open("txt/c.txt", "w").write("done3")
- def check_ok():
- import os
- return open("txt/a.txt").read() == "done1" and open("txt/b.txt").read() == "hello2" and open("txt/c.txt").read() == "done3" and len(os.listdir("txt")) == 3
-
- TestBashIncrementalUpdate = Setup(setup_fn) >> question >> LLMRun() >> ExtractCode(keep_main=True) >> BashRun() >> PyEvaluator(check_ok)
- if __name__ == "__main__":
- print(run_test(TestBashIncrementalUpdate))
|