pybind.cpp 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #include "../cache.h"
  2. #include "../cuda_utils.h"
  3. #include "../ops.h"
  4. #include <torch/extension.h>
  5. PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  6. // Aphrodite custom ops
  7. pybind11::module ops = m.def_submodule("ops", "Aphrodite custom operators");
  8. // Attention ops
  9. ops.def(
  10. "paged_attention_v1",
  11. &paged_attention_v1,
  12. "Compute the attention between an input query and the cached keys/values using PagedAttention.");
  13. ops.def(
  14. "paged_attention_v2",
  15. &paged_attention_v2,
  16. "PagedAttention V2.");
  17. // Activation ops
  18. ops.def(
  19. "silu_and_mul",
  20. &silu_and_mul,
  21. "Activation function used in SwiGLU.");
  22. ops.def(
  23. "gelu_and_mul",
  24. &gelu_and_mul,
  25. "Activation function used in GeGLU with `none` approximation.");
  26. ops.def(
  27. "gelu_tanh_and_mul",
  28. &gelu_tanh_and_mul,
  29. "Activation function used in GeGLU with `tanh` approximation.");
  30. ops.def(
  31. "gelu_new",
  32. &gelu_new,
  33. "GELU implementation used in GPT-2.");
  34. ops.def(
  35. "gelu_fast",
  36. &gelu_fast,
  37. "Approximate GELU implementation.");
  38. // Layernorm
  39. ops.def(
  40. "rms_norm",
  41. &rms_norm,
  42. "Apply Root Mean Square (RMS) Normalization to the input tensor.");
  43. ops.def(
  44. "fused_add_rms_norm",
  45. &fused_add_rms_norm,
  46. "In-place fused Add and RMS Normalization");
  47. // Rotary embedding
  48. ops.def(
  49. "rotary_embedding",
  50. &rotary_embedding,
  51. "Apply GPT-NeoX or GPT-J style rotary embedding to query and key");
  52. // Cache ops
  53. pybind11::module cache_ops = m.def_submodule("cache_ops", "Aphrodite cache ops");
  54. cache_ops.def(
  55. "swap_blocks",
  56. &swap_blocks,
  57. "Swap in (out) the cache blocks from src to dst");
  58. cache_ops.def(
  59. "copy_blocks",
  60. &copy_blocks,
  61. "Copy the cache blocks from src to dst");
  62. cache_ops.def(
  63. "reshape_and_cache",
  64. &reshape_and_cache,
  65. "Reshape the key and value tensors and cache them");
  66. }