benchmark_shapes.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. WEIGHT_SHAPES = {
  2. "ideal": [[4 * 256 * 32, 256 * 32]],
  3. "mistralai/Mistral-7B-v0.1/TP1": [
  4. [4096, 6144],
  5. [4096, 4096],
  6. [4096, 28672],
  7. [14336, 4096],
  8. ],
  9. "mistralai/Mistral-7B-v0.1/TP2": [
  10. [4096, 3072],
  11. [2048, 4096],
  12. [4096, 14336],
  13. [7168, 4096],
  14. ],
  15. "mistralai/Mistral-7B-v0.1/TP4": [
  16. [4096, 1536],
  17. [1024, 4096],
  18. [4096, 7168],
  19. [3584, 4096],
  20. ],
  21. "meta-llama/Llama-2-7b-hf/TP1": [
  22. [4096, 12288],
  23. [4096, 4096],
  24. [4096, 22016],
  25. [11008, 4096],
  26. ],
  27. "meta-llama/Llama-2-7b-hf/TP2": [
  28. [4096, 6144],
  29. [2048, 4096],
  30. [4096, 11008],
  31. [5504, 4096],
  32. ],
  33. "meta-llama/Llama-2-7b-hf/TP4": [
  34. [4096, 3072],
  35. [1024, 4096],
  36. [4096, 5504],
  37. [2752, 4096],
  38. ],
  39. "meta-llama/Llama-2-13b-hf/TP1": [
  40. [5120, 15360],
  41. [5120, 5120],
  42. [5120, 27648],
  43. [13824, 5120],
  44. ],
  45. "meta-llama/Llama-2-13b-hf/TP2": [
  46. [5120, 7680],
  47. [2560, 5120],
  48. [5120, 13824],
  49. [6912, 5120],
  50. ],
  51. "meta-llama/Llama-2-13b-hf/TP4": [
  52. [5120, 3840],
  53. [1280, 5120],
  54. [5120, 6912],
  55. [3456, 5120],
  56. ],
  57. "meta-llama/Llama-2-70b-hf/TP1": [
  58. [8192, 10240],
  59. [8192, 8192],
  60. [8192, 57344],
  61. [28672, 8192],
  62. ],
  63. "meta-llama/Llama-2-70b-hf/TP2": [
  64. [8192, 5120],
  65. [4096, 8192],
  66. [8192, 28672],
  67. [14336, 8192],
  68. ],
  69. "meta-llama/Llama-2-70b-hf/TP4": [
  70. [8192, 2560],
  71. [2048, 8192],
  72. [8192, 14336],
  73. [7168, 8192],
  74. ],
  75. }