.. |
__init__.py
|
ec17b6c4d0
fix: Phi3.5 Mini and MoE LoRA inference (#1070)
|
2 месяцев назад |
arctic.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
baichuan.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
bart.py
|
a985143768
core: add cuda graph support for encoder-decoder models (#1051)
|
2 месяцев назад |
blip.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
blip2.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
bloom.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
chameleon.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
chatglm.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
clip.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
commandr.py
|
135dfd648b
fix: LoRA support for Cohere and Jamba models (#1004)
|
2 месяцев назад |
dbrx.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
decilm.py
|
9022c6d869
remove progress_bar imports
|
4 месяцев назад |
deepseek.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
deepseek_v2.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
eagle.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
exaone.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
falcon.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
fuyu.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
gemma.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
gemma2.py
|
b33cf04386
quants: add bitsandbytes support for gemma2 model (#1026)
|
2 месяцев назад |
gpt2.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
gpt_bigcode.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
gpt_j.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
gpt_neox.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
granite.py
|
11f49b5341
fix: granite logit scale in logit computation (#1054)
|
2 месяцев назад |
idefics2_vision_model.py
|
f1d0b77c92
[0.6.0] Release Candidate (#481)
|
6 месяцев назад |
interfaces.py
|
0b8b407b6d
feat: support profiling with multiple multi-modal inputs per prompt (#712)
|
6 месяцев назад |
intern_vit.py
|
f56d6b396a
vlm: fallback to SDPA for ViT models on CPU backend (#982)
|
2 месяцев назад |
internlm2.py
|
7632f91429
fix: InternLM2 model with Tensor Parallel (#980)
|
2 месяцев назад |
internvl.py
|
41ceb754a6
vlm: fix internvl2 inference with various num_patches (#1030)
|
2 месяцев назад |
jais.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
jamba.py
|
135dfd648b
fix: LoRA support for Cohere and Jamba models (#1004)
|
2 месяцев назад |
llama.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
llama_embedding.py
|
9022c6d869
remove progress_bar imports
|
4 месяцев назад |
llava.py
|
4d14bd1fe5
vlm: add multi-input support for LLaVA and InternVL models (#1002)
|
2 месяцев назад |
llava_next.py
|
766ea79b89
vlm: fix feature size calculation for llava-next models (#1079)
|
2 месяцев назад |
llava_next_video.py
|
be59e30139
vlm: add support for video modality + llava next video (#1014)
|
2 месяцев назад |
mamba.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
2 месяцев назад |
mamba_cache.py
|
a113309876
kernel: add meta functions for ops to prevent graph breaks (#1019)
|
2 месяцев назад |
medusa.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
minicpm.py
|
ce7b602f03
model: add support for MiniCPM-3 (#1044)
|
2 месяцев назад |
minicpm3.py
|
ce7b602f03
model: add support for MiniCPM-3 (#1044)
|
2 месяцев назад |
minicpmv.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
mixtral.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
mixtral_quant.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
mlp_speculator.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
molmo.py
|
acc0c727c8
vlm: add support for molmo vision model (#1069)
|
2 месяцев назад |
mpt.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
na_vit.py
|
9f3e7c86e2
feat: add fused Marlin MoE kernel (#934)
|
2 месяцев назад |
nemotron.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
olmo.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
olmoe.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
opt.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
orion.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
paligemma.py
|
46d577f019
vlm: fix siglip layernorm and paligemma weight loading (#991)
|
2 месяцев назад |
persimmon.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
phi.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
phi3.py
|
ec17b6c4d0
fix: Phi3.5 Mini and MoE LoRA inference (#1070)
|
2 месяцев назад |
phi3_small.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
phi3v.py
|
4d14bd1fe5
vlm: add multi-input support for LLaVA and InternVL models (#1002)
|
2 месяцев назад |
phimoe.py
|
ec17b6c4d0
fix: Phi3.5 Mini and MoE LoRA inference (#1070)
|
2 месяцев назад |
pixtral.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
qwen.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
qwen2.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
qwen2_moe.py
|
5224389dae
chore: skip loading extra bias for qwen2 moe GPTQ (#1011)
|
2 месяцев назад |
qwen2_vl.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
siglip.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
solar.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
stablelm.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
starcoder2.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |
ultravox.py
|
651678d2df
VLM: use `SequenceData.from_token_counts` to create dummy data (#1093)
|
1 месяц назад |
utils.py
|
a8bdd488b9
distributed: support pipeline parallelism for internvl and internlm2 (#965)
|
2 месяцев назад |
xverse.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
2 месяцев назад |