1
0
Эх сурвалжийг харах

fix memory usage with ngram spec decoding

AlpinDale 8 сар өмнө
parent
commit
6e63d7a9db

+ 1 - 1
aphrodite/spec_decode/ngram_worker.py

@@ -137,7 +137,7 @@ class NGramWorker(LoraNotSupportedWorkerBase):
                 SamplerOutput(
                     outputs=None,
                     sampled_token_probs=token_probs[i],
-                    logprobs=token_logprobs,
+                    logprobs=token_logprobs[i],
                     sampled_token_ids=token_ids[i],
                 ))
         return outputs, False