.. |
__init__.py
|
5884e0b904
add bitnetforcausallm support
|
6 kuukautta sitten |
arctic.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
baichuan.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
bitnet.py
|
9bbc75d2e3
wip
|
6 kuukautta sitten |
bloom.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
chatglm.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
commandr.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
dbrx.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
decilm.py
|
50b7c13db0
refactor: attention selector (#552)
|
6 kuukautta sitten |
deepseek.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
falcon.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
gemma.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
gpt2.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
gpt_bigcode.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
gpt_j.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
gpt_neox.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
internlm2.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
jais.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
llama.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |
llama_embedding.py
|
50b7c13db0
refactor: attention selector (#552)
|
6 kuukautta sitten |
llava.py
|
24a2d9c2c8
minor llava refactoring
|
6 kuukautta sitten |
minicpm.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
mixtral.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |
mixtral_quant.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |
mpt.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
olmo.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
opt.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
orion.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
phi.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
phi3_small.py
|
696f2cd59c
add phi3_small support with blocksparse attention
|
6 kuukautta sitten |
qwen.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
qwen2.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |
qwen2_moe.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
stablelm.py
|
656459fd84
make fp8_e4m3 work on nvidia
|
6 kuukautta sitten |
starcoder2.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |
vlm_base.py
|
f970f3f3fb
add base class for VLMs
|
6 kuukautta sitten |
xverse.py
|
ac79d115b3
add guards for prefix caching, fp8, chunked, etc
|
6 kuukautta sitten |