david
/
aphrodite-engine
zrkadlo https://github.com/PygmalionAI/aphrodite-engine


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
							import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer
from aphrodite import LLM, SamplingParams

# Load the wikitext2 dataset.
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1')

# Get the first 2000 elements from the 'train' split.
prompts = dataset['train']['text'][:2000]

model_id = "mistralai/Mistral-7B-Instruct-v0.2"
# Create a tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Tokenize the prompts and discard or truncate any prompts longer than 2048 tokens.
tokenized_prompts = [tokenizer.encode(prompt, truncation=True,
                                      max_length=4096) for prompt in prompts]

# Detokenize the prompts.
detokenized_prompts = [tokenizer.decode(tokens
                                        ) for tokens in tokenized_prompts]

# Create a sampling params object.
sampling_params = SamplingParams(
    temperature=0.0,
    ignore_eos=True,
    max_tokens=10,
    skip_special_tokens=False,
    spaces_between_special_tokens=False,
    logprobs=1,
    prompt_logprobs=1,
)

# Create an LLM.
llm = LLM(model=model_id)

# Generate texts from the detokenized prompts.
outputs = llm.generate(detokenized_prompts, sampling_params)

# Calculate the perplexity.
all_logprobs = []
for output in outputs:
    all_logprobs.extend([next(iter(lp.values())) for lp in output.prompt_logprobs[1:]])

all_logprobs = np.array([lp.logprob for lp in all_logprobs])
# NOTE: we need to divide by 2 to match the perplexity results
# for the same model on llama.cpp. I'm unsure if this
# approach to ppx measurement is correct.
perplexity = (np.exp(-all_logprobs.mean())) / 2
print(f"Perplexity: {perplexity}")