.. |
__init__.py
|
c2aaaefd57
allow out-of-tree model registry
|
пре 9 месеци |
baichuan.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
bloom.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
chatglm.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
cohere.py
|
a3b1602391
fix: rope scaling for cohere and qwen (#436)
|
пре 9 месеци |
dbrx.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
decilm.py
|
e31c6f0b45
feat: refactor modeling logic and support more models (#274)
|
пре 11 месеци |
deepseek.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
falcon.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
gemma.py
|
0d0c6b313c
fix: linear bias of qkv layers in models (#430)
|
пре 9 месеци |
gpt2.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
gpt_bigcode.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
gpt_j.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
gpt_neox.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
internlm2.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
llama.py
|
0d0c6b313c
fix: linear bias of qkv layers in models (#430)
|
пре 9 месеци |
llava.py
|
4d33ce60da
feat: Triton flash attention backend for ROCm (#407)
|
пре 9 месеци |
mixtral.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
mpt.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
olmo.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
opt.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
phi.py
|
50c2434267
move megatron to a top-level directory
|
пре 9 месеци |
qwen.py
|
a3b1602391
fix: rope scaling for cohere and qwen (#436)
|
пре 9 месеци |
qwen2.py
|
a3b1602391
fix: rope scaling for cohere and qwen (#436)
|
пре 9 месеци |
qwen2moe.py
|
a3b1602391
fix: rope scaling for cohere and qwen (#436)
|
пре 9 месеци |
stablelm.py
|
0d0c6b313c
fix: linear bias of qkv layers in models (#430)
|
пре 9 месеци |