123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- // Adapted from turboderp exllama: https://github.com/turboderp/exllama
- #define _cuda_buffers_cu
- #include "cuda_buffers.cuh"
- CudaBuffers* g_buffers[CUDA_MAX_DEVICES] = {NULL};
- // __constant__ half2 q4_table[16][256];
- // half2 q4_table_host[16][256];
- // bool q4_table_init = false;
- CudaBuffers::CudaBuffers
- (
- int _device,
- int _temp_state_size,
- half* _temp_state,
- half* _temp_dq
- ) :
- device(_device),
- temp_state_size(_temp_state_size),
- temp_state(_temp_state),
- temp_dq(_temp_dq)
- {
- cudaSetDevice(_device);
- cudaStreamCreate(&alt_stream_1);
- cudaStreamCreate(&alt_stream_2);
- cudaStreamCreate(&alt_stream_3);
- cudaEventCreate(&alt_stream_1_done);
- cudaEventCreate(&alt_stream_2_done);
- cudaEventCreate(&alt_stream_3_done);
- }
- CudaBuffers::~CudaBuffers()
- {
- cudaStreamDestroy(alt_stream_1);
- cudaStreamDestroy(alt_stream_2);
- cudaStreamDestroy(alt_stream_3);
- cudaEventDestroy(alt_stream_1_done);
- cudaEventDestroy(alt_stream_2_done);
- cudaEventDestroy(alt_stream_3_done);
- }
- CudaBuffers* get_buffers(const int device_index)
- {
- return g_buffers[device_index];
- }
- void prepare_buffers_cuda
- (
- int _device,
- int _temp_state_size,
- half* _temp_state,
- half* _temp_dq
- )
- {
- CudaBuffers* buffers = new CudaBuffers
- (
- _device,
- _temp_state_size,
- _temp_state,
- _temp_dq
- );
- g_buffers[_device] = buffers;
- }
- void cleanup_buffers_cuda()
- {
- for (int i = 0; i < CUDA_MAX_DEVICES; i++)
- {
- if (!g_buffers[i]) continue;
- delete g_buffers[i];
- g_buffers[i] = NULL;
- }
- }
|