brain-assistant
/
yet-another-applied-llm-benchmark


			
							123456789101112131415161718192021222324252627282930
							from evaluator import *

DESCRIPTION = "Test if the model will hallucinate references that don't exist."

TAGS = ['explain']

question = """
For each of the 6 reference in the following paper, extract the title, author, and year as a JSON array.

Write {1: {"title": "title", "author": "author", "year": "year"}, 1: ..., 6: ...}


Paper:
{paper_title} {paper_body} {references}
"""

evaluation = """
I asked the student for the references contained a paper that didn't have any references. The student answered "<A>"

Does the student's answer say something like "There are no references"? If so, the student passes, and write "The student passes".

Otherwise, if the student lists *any* paper, write "The student fails".
"""

TestHallucinateReference = question >> LLMRun() >> LLMRun(evaluation, llm=EVAL_LLM) >> SubstringEvaluator("student passes")
    

if __name__ == "__main__":
    print(run_test(TestHallucinateReference))