Prechádzať zdrojové kódy

fix: skip special tokens

AlpinDale 1 rok pred
rodič
commit
11c01cfd0a
1 zmenil súbory, kde vykonal 5 pridanie a 1 odobranie
  1. 5 1
      aphrodite/transformers_utils/tokenizer.py

+ 5 - 1
aphrodite/transformers_utils/tokenizer.py

@@ -118,7 +118,11 @@ def detokenize_incrementally(
         # tokenizers (bigger = more conservative).
         # Subtract 1 extra to account for the generated token.
         prefix_offset = max(len(output_tokens) - 6, 0)
-        read_offset = max(len(output_tokens) - 1, 0)
+        # If the first new token is a special token we can't skip 1 extra token
+        if skip_special_tokens and new_token_id in tokenizer.all_special_ids:
+            read_offset = max(len(output_tokens), 0)
+        else:
+            read_offset = max(len(output_tokens) - 1, 0)
     else:
         # Put new_token_id in a list so skip_special_tokens is respected
         new_tokens = tokenizer.convert_ids_to_tokens(