implement_assembly_interpreter.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. from evaluator import *
  2. DESCRIPTION = "Test if the model can implement an interpreter for a new assembly language from a text description."
  3. TAGS = ['code', 'python']
  4. question = """Here is the description of a new assembly language:
  5. * 8 registers (R1, R2, R3, R4, R5, R6, R7, R8) that can hold integers.
  6. * 1 flag that can hold a boolean value (True or False).
  7. * 100 memory addresses (0-99) that can hold integers.
  8. * 1 instruction pointer that points to the current instruction being executed.
  9. Each instruction is of the form
  10. OP ARG1 ARG2 ...
  11. where ARGn can be either a register (e.g., R1) or a constant (e.g., 10).
  12. Labels are written with a lowercase word followed by colon.
  13. The assembly language supports the following instructions:
  14. * SET Rx C: Assigns the value C to register Rx.
  15. * ADD Rx Ry Rz: Adds the values of Ry and Rz and stores the result in Rx.
  16. * (similarly for SUB, MUL, DIV, MOD)
  17. * EQ Rx Ry: Sets the flag to True if Rx and Ry are equal, False otherwise.
  18. * (similarly for NEQ, LT (Rx < Ry), LTE, GT, GTE)
  19. * INC/DEC Rx: Increments/Decrements the value of Rx by one.
  20. * JMP L: Jumps to label L unconditionally.
  21. * JT/JF (jump if true / jump if false) L: Jumps to label L if the is set or not set.
  22. * LOAD Rx M: Loads the value at memory address M into register Rx.
  23. * STORE Rx M: Stores the value of register Rx into memory address M.
  24. * HCF: Stops the program (with pizzazz)
  25. For example here is a program to compute the first 20 square numbers (1, 4, 9, 16, 25, ...):
  26. SET R1 0 // Counter for storing squares
  27. SET R2 1 // Number to square
  28. loop:
  29. MUL R3 R2 R2 // R3 = R2 * R2
  30. STORE R3 R1 // Store R3 at address R1
  31. INC R1 // Increment address
  32. INC R2 // Increment number
  33. SET R3 20
  34. EQ R1 R3 // Check if 20 squares are found
  35. JF loop // If not 20 squares found yet, continue finding
  36. end:
  37. HCF // Stop program
  38. Write me a python interpreter `evaluate(str)` that returns the resulting memory state after running the program. For example, `evaluate(program)` should return `[1, 4, 9, 16, 25, ...]` for the above program.
  39. """
  40. primes = """
  41. SET R1 2 // Starting number to check for prime
  42. start_find_primes:
  43. JMP is_prime // Control will return after executing is_prime with R1 as input and R2 containing the result
  44. ready_prime:
  45. SET R7 1
  46. EQ R2 R7 // Check if R2 is 1 (prime)
  47. JF increment // If not prime, skip storing and increment the number
  48. // Store prime number in memory and increment count
  49. STORE R1 R8 // Store prime number at address pointed by R8
  50. INC R8 // Increment prime count
  51. // Check if 100 primes are found
  52. SET R7 100
  53. EQ R8 R7
  54. JF increment // If not 100 primes found yet, continue finding
  55. JMP end // If 100 primes found, end program
  56. increment:
  57. INC R1 // Increment number to check for prime
  58. JMP start_find_primes // Check next number
  59. is_prime:
  60. SET R2 1 // Assume number is prime initially
  61. SET R3 2 // Start divisor from 2
  62. start_loop: // Label to start the loop
  63. // Check if we have exceeded the square root of R1
  64. MUL R4 R3 R3 // R4 = R3 * R3
  65. GT R4 R1 // Set flag if R4 > R1
  66. JT is_prime_end // If not exceeded, continue; else, end loop
  67. MOD R6 R1 R3 // R6 = R1 % R3
  68. SET R7 0
  69. EQ R7 R6 // Check if R6 is 0
  70. JT not_prime // If yes, number is not prime
  71. INC R3 // Increment divisor
  72. JMP start_loop // Repeat loop
  73. not_prime:
  74. SET R2 0 // Set result to 0 (not prime)
  75. is_prime_end:
  76. JMP ready_prime
  77. end:
  78. """
  79. code = """
  80. SET R1 0
  81. SET R2 1
  82. loop:
  83. MUL R3 R2 R2
  84. STORE R3 R1
  85. INC R1
  86. INC R2
  87. SET R3 20
  88. EQ R1 R3
  89. JF loop
  90. """
  91. test_case, answer = make_python_test([(f'evaluate("""{code}""")[:10]', "[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]"),
  92. (f'evaluate("""{primes}""")[:10]', "[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]")
  93. ])
  94. TestImplementAssembly = question >> LLMRun() >> ExtractCode(lang="python") >> PythonRun(test_case) >> SubstringEvaluator(answer)
  95. if __name__ == "__main__":
  96. print(run_test(TestImplementAssembly))