decoder_xqa_impl_precompiled.h 579 B

123456789101112131415161718
  1. #pragma once
  2. #include "decoder_xqa_impl.h"
  3. class DecoderXQAImplPrecompiled : public DecoderXQAImpl {
  4. public:
  5. DecoderXQAImplPrecompiled(DecoderXQARunner* runner)
  6. : DecoderXQAImpl(runner) {}
  7. void runWithKVBlockArray(XQAParams const& xqa_params,
  8. KVCacheListParams const& kv_block_array,
  9. cudaStream_t const& stream) override;
  10. private:
  11. void runDispatchBuffer(XQAParams const& xqa_params,
  12. KVCacheListParams const& kv_cache_buffer,
  13. cudaStream_t const& stream);
  14. };