123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- from evaluator import *
- DESCRIPTION = "Test if the model can write a program in a new assembly language. This ability to learn a new language on-the-fly is important for many tasks."
- TAGS = ['code']
- class AssemblyEmulator:
- def __init__(self, instructions):
- self.registers = {"R1": 0, "R2": 0, "R3": 0, "R4": 0, "R5": 0, "R6": 0, "R7": 0, "R8": 0}
- self.memory = [0] * 100
- self.instruction_pointer = 0
- self.instructions = instructions.split("\n")
- self.flag = False
- print(instructions)
- def run(self):
- def lookup(register_or_const):
- if register_or_const.startswith('R'):
- return self.registers[register_or_const]
- else:
- return int(register_or_const)
- bin_op = {
- "ADD": lambda a, b: a + b,
- "SUB": lambda a, b: a - b,
- "MUL": lambda a, b: a * b,
- "DIV": lambda a, b: a // b,
- "MOD": lambda a, b: a % b,
- }
- cmp_op = {
- "EQ": lambda a, b: a == b,
- "NEQ": lambda a, b: a != b,
- "LT": lambda a, b: a < b,
- "LTE": lambda a, b: a <= b,
- "GT": lambda a, b: a > b,
- "GTE": lambda a, b: a >= b,
- }
-
-
- ctr = 0
- while self.instruction_pointer < len(self.instructions):
- ctr += 1
- if ctr > 1e6:
- raise Exception("Infinite loop detected")
- parts = self.instructions[self.instruction_pointer].split("//")[0].replace(",","").split()
- if len(parts) == 0:
- self.instruction_pointer += 1
- continue
- instruction, args = parts[0], parts[1:]
- if instruction == "SET":
- self.registers[args[0]] = lookup(args[1])
- elif instruction in bin_op:
- self.registers[args[0]] = bin_op[instruction](lookup(args[1]), lookup(args[2]))
- elif instruction in cmp_op:
- self.flag = cmp_op[instruction](lookup(args[0]), lookup(args[1]))
- elif instruction == "INC":
- self.registers[args[0]] += 1
- elif instruction == "DEC":
- self.registers[args[0]] -= 1
- elif instruction == "JT" and self.flag:
- self.instruction_pointer = self.find_label(args[0])
- continue
- elif instruction == "JF" and not self.flag:
- self.instruction_pointer = self.find_label(args[0])
- continue
- elif instruction == "JMP":
- self.instruction_pointer = self.find_label(args[0])
- continue
- elif instruction == "LOAD":
- self.memory[lookup(args[1])] = lookup(args[0])
- elif instruction == "STORE":
- self.memory[lookup(args[1])] = lookup(args[0])
- elif instruction == "HCF":
- return
- self.instruction_pointer += 1
- def find_label(self, label):
- return next(i for i, instruction in enumerate(self.instructions) if instruction.strip().startswith(label + ':'))
-
- question = """Here is the description of a new assembly language:
- * 8 registers (R1, R2, R3, R4, R5, R6, R7, R8) that can hold integers.
- * 1 flag that can hold a boolean value (True or False).
- * 100 memory addresses (0-99) that can hold integers.
- * 1 instruction pointer that points to the current instruction being executed.
- Each instruction is of the form
- OP ARG1 ARG2 ...
- where ARGn can be either a register (e.g., R1) or a constant (e.g., 10).
- Labels are written with a lowercase word followed by colon.
- The assembly language supports the following instructions:
- * SET Rx C: Assigns the value C to register Rx.
- * ADD Rx Ry Rz: Adds the values of Ry and Rz and stores the result in Rx.
- * (similarly for SUB, MUL, DIV, MOD)
- * EQ Rx Ry: Sets the flag to True if Rx and Ry are equal, False otherwise.
- * (similarly for NEQ, LT (Rx < Ry), LTE, GT, GTE)
- * INC/DEC Rx: Increments/Decrements the value of Rx by one.
- * JMP L: Jumps to label L unconditionally.
- * JT/JF (jump if true / jump if false) L: Jumps to label L if the is set or not set.
- * LOAD Rx M: Loads the value at memory address M into register Rx.
- * STORE Rx M: Stores the value of register Rx into memory address M.
- * HCF: Stops the program (with pizzazz)
- Given this assembly language, write a program that {}"""
- def expect(answer):
- def evaluate(code):
- try:
- emulator = AssemblyEmulator(code)
- emulator.run()
- except Exception as e:
- print("EXCEPTION", e)
- return False, str(e)
-
- return answer(emulator.memory), ""
- return evaluate
- TestNewAssemblySquareNumbers = question.format("puts the first 20 square (1, 4, 9...) numbers in memory cells 0..19") >> LLMRun() >> ExtractCode(lang="a new assembly language I just made up") >> PyFunc(expect(lambda m: m[:20] == [i*i for i in range(1,21)]))
- TestNewAssemblyPrimeNumbers = question.format("puts the first 20 prime numbers in memory cells 0..19") >> LLMRun() >> ExtractCode(lang="a new assembly language I just made up") >> PyFunc(expect(lambda m: m[:20] == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]))
- if __name__ == "__main__":
- print(run_test(TestNewAssemblySquareNumbers))
-
|