Browse Source

fix memory usage with ngram spec decoding

AlpinDale 8 months ago
parent
commit
6e63d7a9db
1 changed files with 1 additions and 1 deletions
  1. 1 1
      aphrodite/spec_decode/ngram_worker.py

+ 1 - 1
aphrodite/spec_decode/ngram_worker.py

@@ -137,7 +137,7 @@ class NGramWorker(LoraNotSupportedWorkerBase):
                 SamplerOutput(
                     outputs=None,
                     sampled_token_probs=token_probs[i],
-                    logprobs=token_logprobs,
+                    logprobs=token_logprobs[i],
                     sampled_token_ids=token_ids[i],
                 ))
         return outputs, False