basic_code_understanding.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. from evaluator import *
  2. DESCRIPTION = "Test if a model can solve a simple capture-the-flag like entry in C."
  3. TAGS = ['c', "explain"]
  4. INPUT = '''#include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #define MAX_INPUTS 10
  8. #define DATA_LENGTH 100
  9. #define ENTRY_LENGTH 5
  10. #define COMMAND_LENGTH 4
  11. static const char* flag = "SECRET";
  12. static char data[MAX_INPUTS][DATA_LENGTH];
  13. static int input_lengths[MAX_INPUTS];
  14. static int inputs = 0;
  15. void data_write() {
  16. char input[DATA_LENGTH], len[ENTRY_LENGTH];
  17. long length;
  18. printf("Enter data for storage:\n"); // Updated message
  19. fgets(input, DATA_LENGTH, stdin);
  20. input[strcspn(input, "\n")] = 0;
  21. do {
  22. printf("Input the data size:\n"); // Updated message
  23. fgets(len, ENTRY_LENGTH, stdin);
  24. length = strtol(len, NULL, 10);
  25. if (!length) puts("Invalid data length, try again."); // Updated message
  26. } while (!length);
  27. if (inputs >= MAX_INPUTS) inputs = 0;
  28. strcpy(data[inputs], input);
  29. input_lengths[inputs++] = length;
  30. printf("Data stored at position: %d\n", inputs); // Updated message
  31. }
  32. void data_read() {
  33. char entry[ENTRY_LENGTH], output[DATA_LENGTH] = {'\0'};
  34. printf("Which data position to read?\n"); // Updated message
  35. fgets(entry, ENTRY_LENGTH, stdin);
  36. entry[strcspn(entry, "\n")] = 0;
  37. long entry_number = strtol(entry, NULL, 10);
  38. if (!entry_number) {
  39. puts(flag);
  40. fseek(stdin, 0, SEEK_END);
  41. exit(0);
  42. }
  43. strncpy(output, data[--entry_number], input_lengths[entry_number]);
  44. puts(output);
  45. }
  46. int main() {
  47. char input[COMMAND_LENGTH];
  48. long command;
  49. puts("Welcome to the Data Echo System!"); // Updated message
  50. while (1) {
  51. printf("Choose option (1-Write, 2-Read, 3-Exit):\n"); // Updated message
  52. fgets(input, COMMAND_LENGTH, stdin);
  53. input[strcspn(input, "\n")] = 0;
  54. command = strtol(input, NULL, 10);
  55. if (!command) {
  56. puts("Invalid command, enter 1, 2 or 3."); // Updated message
  57. } else if (command == 1) {
  58. data_write();
  59. puts("Data written. Perform another action?"); // Updated message
  60. } else if (command == 2) {
  61. if (!inputs) {
  62. puts("Database is empty."); // Updated message
  63. continue;
  64. }
  65. data_read();
  66. puts("Data read. Perform another action?"); // Updated message
  67. } else if (command == 3) {
  68. puts("Exiting. Thank you!"); // Updated message
  69. break;
  70. } else {
  71. puts("Please enter a valid command: 1, 2, or 3."); // Updated message
  72. }
  73. }
  74. return 0;
  75. }
  76. '''
  77. question = f"Give me sequence of the inputs that can make the code output the value of the flag. Here is the code: \n {INPUT}"
  78. evaluation = """
  79. Below is a student's answer to a question about functionality of the code in C: <A>
  80. Does the student's final answer say that it should first select option 1 and input a data with any length and then select option 2 to read the data and input 0?
  81. Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that it should first input some data by selecting option 1 and then select option 2 to read and input 0. Giving something that's equal but more complicated is wrong.
  82. If the answer is correct say "The student passes" otherwise "The student fails".
  83. """
  84. TestCodeUnderstanding = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
  85. if __name__ == "__main__":
  86. print(run_test(TestCodeUnderstanding))