|
@@ -5,10 +5,10 @@ from typing import Dict, List, Literal, Optional, Union
|
|
|
|
|
|
from pydantic import (AliasChoices, BaseModel, Field, model_validator,
|
|
|
root_validator)
|
|
|
-import torch
|
|
|
|
|
|
from aphrodite.common.utils import random_uuid
|
|
|
from aphrodite.common.sampling_params import SamplingParams
|
|
|
+from aphrodite.common.logits_processor import BiasLogitsProcessor
|
|
|
|
|
|
|
|
|
class ErrorResponse(BaseModel):
|
|
@@ -103,23 +103,18 @@ class ChatCompletionRequest(BaseModel):
|
|
|
guided_regex: Optional[str] = None
|
|
|
guided_choice: Optional[List[str]] = None
|
|
|
|
|
|
- def to_sampling_params(self) -> SamplingParams:
|
|
|
+ def to_sampling_params(self, vocab_size: int) -> SamplingParams:
|
|
|
if self.logprobs and not self.top_logprobs:
|
|
|
raise ValueError("Top logprobs must be set when logprobs is.")
|
|
|
|
|
|
- logits_processors = None
|
|
|
+ logits_processors = []
|
|
|
if self.logit_bias:
|
|
|
-
|
|
|
- def logit_bias_logits_processor(
|
|
|
- token_ids: List[int],
|
|
|
- logits: torch.Tensor) -> torch.Tensor:
|
|
|
- for token_id, bias in self.logit_bias.items():
|
|
|
- # Clamp the bias between -100 and 100 per OpenAI API spec
|
|
|
- bias = min(100, max(-100, bias))
|
|
|
- logits[int(token_id)] += bias
|
|
|
- return logits
|
|
|
-
|
|
|
- logits_processors = [logit_bias_logits_processor]
|
|
|
+ biases = {
|
|
|
+ int(tok): min(100, max(float(bias), -100))
|
|
|
+ for tok, bias in self.logit_bias.items()
|
|
|
+ if 0 < int(tok) < vocab_size
|
|
|
+ }
|
|
|
+ logits_processors.append(BiasLogitsProcessor(biases))
|
|
|
|
|
|
return SamplingParams(
|
|
|
n=self.n,
|
|
@@ -230,21 +225,18 @@ class CompletionRequest(BaseModel):
|
|
|
guided_regex: Optional[str] = None
|
|
|
guided_choice: Optional[List[str]] = None
|
|
|
|
|
|
- def to_sampling_params(self) -> SamplingParams:
|
|
|
+ def to_sampling_params(self, vocab_size: int) -> SamplingParams:
|
|
|
echo_without_generation = self.echo and self.max_tokens == 0
|
|
|
|
|
|
- logits_processors = None
|
|
|
+ logits_processors = []
|
|
|
if self.logit_bias:
|
|
|
+ biases = {
|
|
|
+ int(tok): min(100, max(float(bias), -100))
|
|
|
+ for tok, bias in self.logit_bias.items()
|
|
|
+ if 0 < int(tok) < vocab_size
|
|
|
+ }
|
|
|
+ logits_processors.append(BiasLogitsProcessor(biases))
|
|
|
|
|
|
- def logit_bias_logits_processor(
|
|
|
- token_ids: List[int],
|
|
|
- logits: torch.Tensor) -> torch.Tensor:
|
|
|
- for token_id, bias in self.logit_bias.items():
|
|
|
- bias = min(100, max(-100, bias))
|
|
|
- logits[int(token_id)] += bias
|
|
|
- return logits
|
|
|
-
|
|
|
- logits_processors = [logit_bias_logits_processor]
|
|
|
return SamplingParams(
|
|
|
n=self.n,
|
|
|
max_tokens=self.max_tokens if not echo_without_generation else 1,
|