from evaluator import * DESCRIPTION = "Test if a model can solve a simple capture-the-flag like entry in C." TAGS = ['c', "explain"] INPUT = '''#include #include #include #define MAX_INPUTS 10 #define DATA_LENGTH 100 #define ENTRY_LENGTH 5 #define COMMAND_LENGTH 4 static const char* flag = "SECRET"; static char data[MAX_INPUTS][DATA_LENGTH]; static int input_lengths[MAX_INPUTS]; static int inputs = 0; void data_write() { char input[DATA_LENGTH], len[ENTRY_LENGTH]; long length; printf("Enter data for storage:\n"); // Updated message fgets(input, DATA_LENGTH, stdin); input[strcspn(input, "\n")] = 0; do { printf("Input the data size:\n"); // Updated message fgets(len, ENTRY_LENGTH, stdin); length = strtol(len, NULL, 10); if (!length) puts("Invalid data length, try again."); // Updated message } while (!length); if (inputs >= MAX_INPUTS) inputs = 0; strcpy(data[inputs], input); input_lengths[inputs++] = length; printf("Data stored at position: %d\n", inputs); // Updated message } void data_read() { char entry[ENTRY_LENGTH], output[DATA_LENGTH] = {'\0'}; printf("Which data position to read?\n"); // Updated message fgets(entry, ENTRY_LENGTH, stdin); entry[strcspn(entry, "\n")] = 0; long entry_number = strtol(entry, NULL, 10); if (!entry_number) { puts(flag); fseek(stdin, 0, SEEK_END); exit(0); } strncpy(output, data[--entry_number], input_lengths[entry_number]); puts(output); } int main() { char input[COMMAND_LENGTH]; long command; puts("Welcome to the Data Echo System!"); // Updated message while (1) { printf("Choose option (1-Write, 2-Read, 3-Exit):\n"); // Updated message fgets(input, COMMAND_LENGTH, stdin); input[strcspn(input, "\n")] = 0; command = strtol(input, NULL, 10); if (!command) { puts("Invalid command, enter 1, 2 or 3."); // Updated message } else if (command == 1) { data_write(); puts("Data written. Perform another action?"); // Updated message } else if (command == 2) { if (!inputs) { puts("Database is empty."); // Updated message continue; } data_read(); puts("Data read. Perform another action?"); // Updated message } else if (command == 3) { puts("Exiting. Thank you!"); // Updated message break; } else { puts("Please enter a valid command: 1, 2, or 3."); // Updated message } } return 0; } ''' question = f"Give me sequence of the inputs that can make the code output the value of the flag. Here is the code: \n {INPUT}" evaluation = """ Below is a student's answer to a question about functionality of the code in C: Does the student's final answer say that it should first select option 1 and input a data with any length and then select option 2 to read the data and input 0? Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that it should first input some data by selecting option 1 and then select option 2 to read and input 0. Giving something that's equal but more complicated is wrong. If the answer is correct say "The student passes" otherwise "The student fails". """ TestCodeUnderstanding = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes") if __name__ == "__main__": print(run_test(TestCodeUnderstanding))