123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- from evaluator import *
- DESCRIPTION = "Test if a model can solve a simple capture-the-flag like entry in C."
- TAGS = ['c', "explain"]
- INPUT = '''#include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #define MAX_INPUTS 10
- #define DATA_LENGTH 100
- #define ENTRY_LENGTH 5
- #define COMMAND_LENGTH 4
- static const char* flag = "SECRET";
- static char data[MAX_INPUTS][DATA_LENGTH];
- static int input_lengths[MAX_INPUTS];
- static int inputs = 0;
- void data_write() {
- char input[DATA_LENGTH], len[ENTRY_LENGTH];
- long length;
- printf("Enter data for storage:\n"); // Updated message
- fgets(input, DATA_LENGTH, stdin);
- input[strcspn(input, "\n")] = 0;
- do {
- printf("Input the data size:\n"); // Updated message
- fgets(len, ENTRY_LENGTH, stdin);
- length = strtol(len, NULL, 10);
- if (!length) puts("Invalid data length, try again."); // Updated message
- } while (!length);
- if (inputs >= MAX_INPUTS) inputs = 0;
- strcpy(data[inputs], input);
- input_lengths[inputs++] = length;
- printf("Data stored at position: %d\n", inputs); // Updated message
- }
- void data_read() {
- char entry[ENTRY_LENGTH], output[DATA_LENGTH] = {'\0'};
- printf("Which data position to read?\n"); // Updated message
- fgets(entry, ENTRY_LENGTH, stdin);
- entry[strcspn(entry, "\n")] = 0;
- long entry_number = strtol(entry, NULL, 10);
- if (!entry_number) {
- puts(flag);
- fseek(stdin, 0, SEEK_END);
- exit(0);
- }
- strncpy(output, data[--entry_number], input_lengths[entry_number]);
- puts(output);
- }
- int main() {
- char input[COMMAND_LENGTH];
- long command;
- puts("Welcome to the Data Echo System!"); // Updated message
- while (1) {
- printf("Choose option (1-Write, 2-Read, 3-Exit):\n"); // Updated message
- fgets(input, COMMAND_LENGTH, stdin);
- input[strcspn(input, "\n")] = 0;
- command = strtol(input, NULL, 10);
- if (!command) {
- puts("Invalid command, enter 1, 2 or 3."); // Updated message
- } else if (command == 1) {
- data_write();
- puts("Data written. Perform another action?"); // Updated message
- } else if (command == 2) {
- if (!inputs) {
- puts("Database is empty."); // Updated message
- continue;
- }
- data_read();
- puts("Data read. Perform another action?"); // Updated message
- } else if (command == 3) {
- puts("Exiting. Thank you!"); // Updated message
- break;
- } else {
- puts("Please enter a valid command: 1, 2, or 3."); // Updated message
- }
- }
- return 0;
- }
- '''
- question = f"Give me sequence of the inputs that can make the code output the value of the flag. Here is the code: \n {INPUT}"
- evaluation = """
- Below is a student's answer to a question about functionality of the code in C: <A>
- Does the student's final answer say that it should first select option 1 and input a data with any length and then select option 2 to read the data and input 0?
- Ignore their reasoning. Just look at the final answer, if they say **EXPLICITLY** that it should first input some data by selecting option 1 and then select option 2 to read and input 0. Giving something that's equal but more complicated is wrong.
- If the answer is correct say "The student passes" otherwise "The student fails".
- """
- TestCodeUnderstanding = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
-
- if __name__ == "__main__":
- print(run_test(TestCodeUnderstanding))
|