test_tokenization.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import openai # use the official client for correctness check
  2. import pytest
  3. import requests
  4. from aphrodite.transformers_utils.tokenizer import get_tokenizer
  5. from ...utils import RemoteOpenAIServer
  6. from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
  7. from .test_completion import zephyr_lora_files # noqa: F401
  8. # any model with a chat template should work here
  9. MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
  10. @pytest.fixture(scope="module")
  11. def server(zephyr_lora_added_tokens_files: str): # noqa: F811
  12. args = [
  13. # use half precision for speed and memory savings in CI environment
  14. "--dtype",
  15. "bfloat16",
  16. "--max-model-len",
  17. "8192",
  18. "--enforce-eager",
  19. "--max-num-seqs",
  20. "128",
  21. # lora config
  22. "--enable-lora",
  23. "--lora-modules",
  24. f"zephyr-lora2={zephyr_lora_added_tokens_files}",
  25. "--max-lora-rank",
  26. "64",
  27. ]
  28. with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
  29. yield remote_server
  30. @pytest.fixture(scope="module")
  31. def tokenizer_name(model_name: str,
  32. zephyr_lora_added_tokens_files: str): # noqa: F811
  33. return zephyr_lora_added_tokens_files if (
  34. model_name == "zephyr-lora2") else model_name
  35. @pytest.fixture(scope="module")
  36. def client(server):
  37. return server.get_async_client()
  38. @pytest.mark.asyncio
  39. @pytest.mark.parametrize(
  40. "model_name,tokenizer_name",
  41. [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
  42. indirect=["tokenizer_name"],
  43. )
  44. async def test_tokenize_completions(client: openai.AsyncOpenAI,
  45. model_name: str, tokenizer_name: str):
  46. base_url = str(client.base_url)[:-3].strip("/")
  47. tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
  48. tokenizer_mode="fast")
  49. for add_special in [False, True]:
  50. prompt = "aphrodite1 This is a test prompt."
  51. tokens = tokenizer.encode(prompt, add_special_tokens=add_special)
  52. response = requests.post(base_url + "/v1/tokenize",
  53. json={
  54. "add_special_tokens": add_special,
  55. "model": model_name,
  56. "prompt": prompt
  57. })
  58. response.raise_for_status()
  59. assert response.json() == {
  60. "tokens": tokens,
  61. "count": len(tokens),
  62. "max_model_len": 8192
  63. }
  64. @pytest.mark.asyncio
  65. @pytest.mark.parametrize(
  66. "model_name,tokenizer_name",
  67. [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
  68. indirect=["tokenizer_name"],
  69. )
  70. async def test_tokenize_chat(client: openai.AsyncOpenAI, model_name: str,
  71. tokenizer_name: str):
  72. base_url = str(client.base_url)[:-3].strip("/")
  73. tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
  74. tokenizer_mode="fast")
  75. for add_generation in [False, True]:
  76. for add_special in [False, True]:
  77. conversation = [{
  78. "role": "user",
  79. "content": "Hi there!"
  80. }, {
  81. "role": "assistant",
  82. "content": "Nice to meet you!"
  83. }, {
  84. "role": "user",
  85. "content": "Can I ask a question? aphrodite1"
  86. }]
  87. prompt = tokenizer.apply_chat_template(
  88. add_generation_prompt=add_generation,
  89. conversation=conversation,
  90. tokenize=False)
  91. tokens = tokenizer.encode(prompt, add_special_tokens=add_special)
  92. response = requests.post(base_url + "/v1/tokenize",
  93. json={
  94. "add_generation_prompt":
  95. add_generation,
  96. "add_special_tokens": add_special,
  97. "messages": conversation,
  98. "model": model_name
  99. })
  100. response.raise_for_status()
  101. assert response.json() == {
  102. "tokens": tokens,
  103. "count": len(tokens),
  104. "max_model_len": 8192
  105. }
  106. @pytest.mark.asyncio
  107. @pytest.mark.parametrize(
  108. "model_name,tokenizer_name",
  109. [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")],
  110. indirect=["tokenizer_name"],
  111. )
  112. async def test_detokenize(client: openai.AsyncOpenAI, model_name: str,
  113. tokenizer_name: str):
  114. base_url = str(client.base_url)[:-3].strip("/")
  115. tokenizer = get_tokenizer(tokenizer_name=tokenizer_name,
  116. tokenizer_mode="fast")
  117. prompt = "This is a test prompt. aphrodite1"
  118. tokens = tokenizer.encode(prompt, add_special_tokens=False)
  119. print(f"CALLING {base_url} FOR {model_name}")
  120. response = requests.post(base_url + "/v1/detokenize",
  121. json={
  122. "model": model_name,
  123. "tokens": tokens
  124. })
  125. response.raise_for_status()
  126. assert response.json() == {"prompt": prompt}