.. |
__init__.py
|
04b53d2db5
chore: add initializer files
|
1 year ago |
cache_engine.py
|
bf88c8567e
feat: mamba model support (#674)
|
4 months ago |
cpu_model_runner.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
cpu_worker.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
embedding_model_runner.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
enc_dec_model_runner.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
model_runner.py
|
0a369f9171
feat: support chunked prefill with LoRA (#823)
|
1 month ago |
model_runner_base.py
|
48a8693aed
feat: multi-step scheduling (#831)
|
1 month ago |
multi_step_model_runner.py
|
48a8693aed
feat: multi-step scheduling (#831)
|
1 month ago |
multi_step_worker.py
|
48a8693aed
feat: multi-step scheduling (#831)
|
1 month ago |
neuron_model_runner.py
|
008e646c7e
chore: add support for up to 2048 block size (#715)
|
3 months ago |
neuron_worker.py
|
008e646c7e
chore: add support for up to 2048 block size (#715)
|
3 months ago |
openvino_model_runner.py
|
bf88c8567e
feat: mamba model support (#674)
|
4 months ago |
openvino_worker.py
|
bf88c8567e
feat: mamba model support (#674)
|
4 months ago |
tpu_model_runner.py
|
81c5f196eb
chore: various TPU fixes and optimizations (#746)
|
3 months ago |
tpu_worker.py
|
81c5f196eb
chore: various TPU fixes and optimizations (#746)
|
3 months ago |
utils.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
worker.py
|
48a8693aed
feat: multi-step scheduling (#831)
|
1 month ago |
worker_base.py
|
48a8693aed
feat: multi-step scheduling (#831)
|
1 month ago |
xpu_model_runner.py
|
89a2c6dee1
chore: refactor `MultiModalConfig` initialization and profiling (#745)
|
3 months ago |
xpu_worker.py
|
f1d0b77c92
[0.6.0] Release Candidate (#481)
|
4 months ago |