program_in_new_assembly.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can write a program in a new assembly language. This ability to learn a new language on-the-fly is important for many tasks."
  3. TAGS = ['code']
  4. class AssemblyEmulator:
  5. def __init__(self, instructions):
  6. self.registers = {"R1": 0, "R2": 0, "R3": 0, "R4": 0, "R5": 0, "R6": 0, "R7": 0, "R8": 0}
  7. self.memory = [0] * 100
  8. self.instruction_pointer = 0
  9. self.instructions = instructions.split("\n")
  10. self.flag = False
  11. print(instructions)
  12. def run(self):
  13. def lookup(register_or_const):
  14. if register_or_const.startswith('R'):
  15. return self.registers[register_or_const]
  16. else:
  17. return int(register_or_const)
  18. bin_op = {
  19. "ADD": lambda a, b: a + b,
  20. "SUB": lambda a, b: a - b,
  21. "MUL": lambda a, b: a * b,
  22. "DIV": lambda a, b: a // b,
  23. "MOD": lambda a, b: a % b,
  24. }
  25. cmp_op = {
  26. "EQ": lambda a, b: a == b,
  27. "NEQ": lambda a, b: a != b,
  28. "LT": lambda a, b: a < b,
  29. "LTE": lambda a, b: a <= b,
  30. "GT": lambda a, b: a > b,
  31. "GTE": lambda a, b: a >= b,
  32. }
  33. ctr = 0
  34. while self.instruction_pointer < len(self.instructions):
  35. ctr += 1
  36. if ctr > 1e6:
  37. raise Exception("Infinite loop detected")
  38. parts = self.instructions[self.instruction_pointer].split("//")[0].replace(",","").split()
  39. if len(parts) == 0:
  40. self.instruction_pointer += 1
  41. continue
  42. instruction, args = parts[0], parts[1:]
  43. if instruction == "SET":
  44. self.registers[args[0]] = lookup(args[1])
  45. elif instruction in bin_op:
  46. self.registers[args[0]] = bin_op[instruction](lookup(args[1]), lookup(args[2]))
  47. elif instruction in cmp_op:
  48. self.flag = cmp_op[instruction](lookup(args[0]), lookup(args[1]))
  49. elif instruction == "INC":
  50. self.registers[args[0]] += 1
  51. elif instruction == "DEC":
  52. self.registers[args[0]] -= 1
  53. elif instruction == "JT" and self.flag:
  54. self.instruction_pointer = self.find_label(args[0])
  55. continue
  56. elif instruction == "JF" and not self.flag:
  57. self.instruction_pointer = self.find_label(args[0])
  58. continue
  59. elif instruction == "JMP":
  60. self.instruction_pointer = self.find_label(args[0])
  61. continue
  62. elif instruction == "LOAD":
  63. self.memory[lookup(args[1])] = lookup(args[0])
  64. elif instruction == "STORE":
  65. self.memory[lookup(args[1])] = lookup(args[0])
  66. elif instruction == "HCF":
  67. return
  68. self.instruction_pointer += 1
  69. def find_label(self, label):
  70. return next(i for i, instruction in enumerate(self.instructions) if instruction.strip().startswith(label + ':'))
  71. question = """Here is the description of a new assembly language:
  72. * 8 registers (R1, R2, R3, R4, R5, R6, R7, R8) that can hold integers.
  73. * 1 flag that can hold a boolean value (True or False).
  74. * 100 memory addresses (0-99) that can hold integers.
  75. * 1 instruction pointer that points to the current instruction being executed.
  76. Each instruction is of the form
  77. OP ARG1 ARG2 ...
  78. where ARGn can be either a register (e.g., R1) or a constant (e.g., 10).
  79. Labels are written with a lowercase word followed by colon.
  80. The assembly language supports the following instructions:
  81. * SET Rx C: Assigns the value C to register Rx.
  82. * ADD Rx Ry Rz: Adds the values of Ry and Rz and stores the result in Rx.
  83. * (similarly for SUB, MUL, DIV, MOD)
  84. * EQ Rx Ry: Sets the flag to True if Rx and Ry are equal, False otherwise.
  85. * (similarly for NEQ, LT (Rx < Ry), LTE, GT, GTE)
  86. * INC/DEC Rx: Increments/Decrements the value of Rx by one.
  87. * JMP L: Jumps to label L unconditionally.
  88. * JT/JF (jump if true / jump if false) L: Jumps to label L if the is set or not set.
  89. * LOAD Rx M: Loads the value at memory address M into register Rx.
  90. * STORE Rx M: Stores the value of register Rx into memory address M.
  91. * HCF: Stops the program (with pizzazz)
  92. Given this assembly language, write a program that {}"""
  93. def expect(answer):
  94. def evaluate(code):
  95. try:
  96. emulator = AssemblyEmulator(code)
  97. emulator.run()
  98. except Exception as e:
  99. print("EXCEPTION", e)
  100. return False, str(e)
  101. return answer(emulator.memory), ""
  102. return evaluate
  103. TestNewAssemblySquareNumbers = question.format("puts the first 20 square (1, 4, 9...) numbers in memory cells 0..19") >> LLMRun() >> ExtractCode(lang="a new assembly language I just made up") >> PyFunc(expect(lambda m: m[:20] == [i*i for i in range(1,21)]))
  104. TestNewAssemblyPrimeNumbers = question.format("puts the first 20 prime numbers in memory cells 0..19") >> LLMRun() >> ExtractCode(lang="a new assembly language I just made up") >> PyFunc(expect(lambda m: m[:20] == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]))
  105. if __name__ == "__main__":
  106. print(run_test(TestNewAssemblySquareNumbers))