.. |
__init__.py
|
f8dfac6372
chore: attention refactor and upstream sync apr01 (#365)
|
vor 9 Monaten |
cpu_executor.py
|
f2b6dc3872
cpu: add support for W8A8 quantization via compressed-tensor (#1017)
|
vor 2 Wochen |
distributed_gpu_executor.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
vor 2 Wochen |
executor_base.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
vor 2 Wochen |
gpu_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
msgspec_utils.py
|
2f61644f6e
SPMD optimizations (#824)
|
vor 1 Monat |
multiproc_gpu_executor.py
|
0dfa6b60ec
core: support logprobs with multi-step scheduling (#963)
|
vor 2 Wochen |
multiproc_worker_utils.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
multiproc_xpu_executor.py
|
15cb8d5c26
xpu: support pipeline parallel (#932)
|
vor 2 Wochen |
neuron_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
openvino_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
ray_gpu_executor.py
|
4737c22ab3
fix: pass `APHRODITE_ATTENTION_BACKEND` to ray workers (#1009)
|
vor 2 Wochen |
ray_tpu_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
ray_utils.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
ray_xpu_executor.py
|
673621a3d2
xpu: refactor the model runner for tensor parallelism (#910)
|
vor 3 Wochen |
tpu_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |
xpu_executor.py
|
3bb0f07461
chore: rename `task_handler` to `worker` (#985)
|
vor 2 Wochen |