test_chat.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. # imports for guided decoding tests
  2. import json
  3. import re
  4. from typing import List
  5. import jsonschema
  6. import openai # use the official client for correctness check
  7. import pytest
  8. import torch
  9. from openai import BadRequestError
  10. from ...utils import RemoteOpenAIServer
  11. from .test_completion import zephyr_lora_added_tokens_files # noqa: F401
  12. from .test_completion import zephyr_lora_files # noqa: F401
  13. # any model with a chat template should work here
  14. MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
  15. # technically this needs Mistral-7B-v0.1 as base, but we're not testing
  16. # generation quality here
  17. LORA_NAME = "typeof/zephyr-7b-beta-lora"
  18. @pytest.fixture(scope="module")
  19. def server(zephyr_lora_files, zephyr_lora_added_tokens_files): # noqa: F811
  20. args = [
  21. # use half precision for speed and memory savings in CI environment
  22. "--dtype",
  23. "bfloat16",
  24. "--max-model-len",
  25. "8192",
  26. "--enforce-eager",
  27. # lora config below
  28. "--enable-lora",
  29. "--lora-modules",
  30. f"zephyr-lora={zephyr_lora_files}",
  31. f"zephyr-lora2={zephyr_lora_added_tokens_files}",
  32. "--max-lora-rank",
  33. "64",
  34. "--max-cpu-loras",
  35. "2",
  36. "--max-num-seqs",
  37. "128",
  38. ]
  39. with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
  40. yield remote_server
  41. @pytest.fixture(scope="module")
  42. def client(server):
  43. return server.get_async_client()
  44. @pytest.mark.asyncio
  45. @pytest.mark.parametrize(
  46. # first test base model, then test loras
  47. "model_name",
  48. [MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
  49. )
  50. async def test_no_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
  51. messages = [{
  52. "role": "system",
  53. "content": "you are a helpful assistant"
  54. }, {
  55. "role": "user",
  56. "content": "what is 1+1?"
  57. }]
  58. chat_completion = await client.chat.completions.create(model=model_name,
  59. messages=messages,
  60. max_tokens=5,
  61. temperature=0.0,
  62. logprobs=False)
  63. choice = chat_completion.choices[0]
  64. assert choice.logprobs is None
  65. @pytest.mark.asyncio
  66. @pytest.mark.parametrize(
  67. # just test 1 lora hereafter
  68. "model_name",
  69. [MODEL_NAME, "zephyr-lora"],
  70. )
  71. async def test_zero_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
  72. messages = [{
  73. "role": "system",
  74. "content": "you are a helpful assistant"
  75. }, {
  76. "role": "user",
  77. "content": "what is 1+1?"
  78. }]
  79. chat_completion = await client.chat.completions.create(model=model_name,
  80. messages=messages,
  81. max_tokens=5,
  82. temperature=0.0,
  83. logprobs=True,
  84. top_logprobs=0)
  85. choice = chat_completion.choices[0]
  86. assert choice.logprobs is not None
  87. assert choice.logprobs.content is not None
  88. assert len(choice.logprobs.content[0].top_logprobs) == 0
  89. @pytest.mark.asyncio
  90. @pytest.mark.parametrize(
  91. "model_name",
  92. [MODEL_NAME, "zephyr-lora"],
  93. )
  94. async def test_some_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
  95. messages = [{
  96. "role": "system",
  97. "content": "you are a helpful assistant"
  98. }, {
  99. "role": "user",
  100. "content": "what is 1+1?"
  101. }]
  102. chat_completion = await client.chat.completions.create(model=model_name,
  103. messages=messages,
  104. max_tokens=5,
  105. temperature=0.0,
  106. logprobs=True,
  107. top_logprobs=5)
  108. choice = chat_completion.choices[0]
  109. assert choice.logprobs is not None
  110. assert choice.logprobs.content is not None
  111. assert len(choice.logprobs.content[0].top_logprobs) == 5
  112. @pytest.mark.asyncio
  113. @pytest.mark.parametrize(
  114. "model_name",
  115. [MODEL_NAME, "zephyr-lora"],
  116. )
  117. async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
  118. model_name: str):
  119. messages = [{
  120. "role": "system",
  121. "content": "you are a helpful assistant"
  122. }, {
  123. "role": "user",
  124. "content": "what is 1+1?"
  125. }]
  126. # Default max_logprobs is 20, so this should raise an error
  127. with pytest.raises((openai.BadRequestError, openai.APIError)):
  128. stream = await client.chat.completions.create(model=model_name,
  129. messages=messages,
  130. max_tokens=10,
  131. logprobs=True,
  132. top_logprobs=21,
  133. stream=True)
  134. async for chunk in stream:
  135. ...
  136. with pytest.raises(openai.BadRequestError):
  137. await client.chat.completions.create(model=model_name,
  138. messages=messages,
  139. max_tokens=10,
  140. logprobs=True,
  141. top_logprobs=30,
  142. stream=False)
  143. # the server should still work afterwards
  144. chat_completion = await client.chat.completions.create(model=model_name,
  145. messages=messages,
  146. max_tokens=10,
  147. stream=False)
  148. message = chat_completion.choices[0].message
  149. assert message.content is not None and len(message.content) >= 0
  150. @pytest.mark.asyncio
  151. @pytest.mark.parametrize(
  152. "model_name",
  153. [MODEL_NAME, "zephyr-lora"],
  154. )
  155. async def test_single_chat_session(client: openai.AsyncOpenAI,
  156. model_name: str):
  157. messages = [{
  158. "role": "system",
  159. "content": "you are a helpful assistant"
  160. }, {
  161. "role": "user",
  162. "content": "what is 1+1?"
  163. }]
  164. # test single completion
  165. chat_completion = await client.chat.completions.create(model=model_name,
  166. messages=messages,
  167. max_tokens=10,
  168. logprobs=True,
  169. top_logprobs=5)
  170. assert chat_completion.id is not None
  171. assert len(chat_completion.choices) == 1
  172. choice = chat_completion.choices[0]
  173. assert choice.finish_reason == "length"
  174. assert chat_completion.usage == openai.types.CompletionUsage(
  175. completion_tokens=10, prompt_tokens=37, total_tokens=47)
  176. message = choice.message
  177. assert message.content is not None and len(message.content) >= 10
  178. assert message.role == "assistant"
  179. messages.append({"role": "assistant", "content": message.content})
  180. # test multi-turn dialogue
  181. messages.append({"role": "user", "content": "express your result in json"})
  182. chat_completion = await client.chat.completions.create(
  183. model=model_name,
  184. messages=messages,
  185. max_tokens=10,
  186. )
  187. message = chat_completion.choices[0].message
  188. assert message.content is not None and len(message.content) >= 0
  189. @pytest.mark.asyncio
  190. @pytest.mark.parametrize(
  191. # just test 1 lora hereafter
  192. "model_name",
  193. [MODEL_NAME, "zephyr-lora"],
  194. )
  195. async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
  196. messages = [{
  197. "role": "system",
  198. "content": "you are a helpful assistant"
  199. }, {
  200. "role": "user",
  201. "content": "what is 1+1?"
  202. }]
  203. # test single completion
  204. chat_completion = await client.chat.completions.create(
  205. model=model_name,
  206. messages=messages,
  207. max_tokens=10,
  208. temperature=0.0,
  209. )
  210. output = chat_completion.choices[0].message.content
  211. stop_reason = chat_completion.choices[0].finish_reason
  212. # test streaming
  213. stream = await client.chat.completions.create(
  214. model=model_name,
  215. messages=messages,
  216. max_tokens=10,
  217. temperature=0.0,
  218. stream=True,
  219. )
  220. chunks: List[str] = []
  221. finish_reason_count = 0
  222. async for chunk in stream:
  223. delta = chunk.choices[0].delta
  224. if delta.role:
  225. assert delta.role == "assistant"
  226. if delta.content:
  227. chunks.append(delta.content)
  228. if chunk.choices[0].finish_reason is not None:
  229. finish_reason_count += 1
  230. # finish reason should only return in last block
  231. assert finish_reason_count == 1
  232. assert chunk.choices[0].finish_reason == stop_reason
  233. assert delta.content
  234. assert "".join(chunks) == output
  235. @pytest.mark.asyncio
  236. @pytest.mark.parametrize(
  237. "model_name",
  238. ["HuggingFaceH4/zephyr-7b-beta", "zephyr-lora"],
  239. )
  240. async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
  241. model_name: str):
  242. messages = [{
  243. "role": "system",
  244. "content": "You are a helpful assistant."
  245. }, {
  246. "role": "user",
  247. "content": "What is the capital of France?"
  248. }]
  249. # Test stream=True, stream_options={"include_usage": False}
  250. stream = await client.chat.completions.create(
  251. model=model_name,
  252. messages=messages,
  253. max_tokens=10,
  254. temperature=0.0,
  255. stream=True,
  256. stream_options={"include_usage": False})
  257. async for chunk in stream:
  258. assert chunk.usage is None
  259. # Test stream=True, stream_options={"include_usage": True,
  260. # "continuous_usage_stats": False}}
  261. stream = await client.chat.completions.create(model=model_name,
  262. messages=messages,
  263. max_tokens=10,
  264. temperature=0.0,
  265. stream=True,
  266. stream_options={
  267. "include_usage":
  268. True,
  269. "continuous_usage_stats":
  270. False
  271. })
  272. async for chunk in stream:
  273. if chunk.choices[0].finish_reason is None:
  274. assert chunk.usage is None
  275. else:
  276. assert chunk.usage is None
  277. final_chunk = await stream.__anext__()
  278. assert final_chunk.usage is not None
  279. assert final_chunk.usage.prompt_tokens > 0
  280. assert final_chunk.usage.completion_tokens > 0
  281. assert final_chunk.usage.total_tokens == (
  282. final_chunk.usage.prompt_tokens +
  283. final_chunk.usage.completion_tokens)
  284. assert final_chunk.choices == []
  285. # Test stream=False, stream_options={"include_usage": None}
  286. with pytest.raises(BadRequestError):
  287. await client.chat.completions.create(
  288. model=model_name,
  289. messages=messages,
  290. max_tokens=10,
  291. temperature=0.0,
  292. stream=False,
  293. stream_options={"include_usage": None})
  294. # Test stream=False, stream_options={"include_usage": True}
  295. with pytest.raises(BadRequestError):
  296. await client.chat.completions.create(
  297. model=model_name,
  298. messages=messages,
  299. max_tokens=10,
  300. temperature=0.0,
  301. stream=False,
  302. stream_options={"include_usage": True})
  303. # Test stream=True, stream_options={"include_usage": True,
  304. # "continuous_usage_stats": True}
  305. stream = await client.chat.completions.create(
  306. model=model_name,
  307. messages=messages,
  308. max_tokens=10,
  309. temperature=0.0,
  310. stream=True,
  311. stream_options={
  312. "include_usage": True,
  313. "continuous_usage_stats": True
  314. },
  315. )
  316. async for chunk in stream:
  317. assert chunk.usage.prompt_tokens >= 0
  318. assert chunk.usage.completion_tokens >= 0
  319. assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens +
  320. chunk.usage.completion_tokens)
  321. # NOTE: Not sure why, but when I place this after `test_guided_regex_chat`
  322. # (i.e. using the same ordering as in the Completions API tests), the test
  323. # will fail on the second `guided_decoding_backend` even when I swap their order
  324. # (ref: https://github.com/aphrodite-project/aphrodite/pull/5526#issuecomment-2173772256)
  325. @pytest.mark.asyncio
  326. @pytest.mark.parametrize("guided_decoding_backend",
  327. ["outlines", "lm-format-enforcer"])
  328. async def test_guided_choice_chat(client: openai.AsyncOpenAI,
  329. guided_decoding_backend: str,
  330. sample_guided_choice):
  331. messages = [{
  332. "role": "system",
  333. "content": "you are a helpful assistant"
  334. }, {
  335. "role":
  336. "user",
  337. "content":
  338. "The best language for type-safe systems programming is "
  339. }]
  340. chat_completion = await client.chat.completions.create(
  341. model=MODEL_NAME,
  342. messages=messages,
  343. max_tokens=10,
  344. extra_body=dict(guided_choice=sample_guided_choice,
  345. guided_decoding_backend=guided_decoding_backend))
  346. choice1 = chat_completion.choices[0].message.content
  347. assert choice1 in sample_guided_choice
  348. messages.append({"role": "assistant", "content": choice1})
  349. messages.append({
  350. "role": "user",
  351. "content": "I disagree, pick another one"
  352. })
  353. chat_completion = await client.chat.completions.create(
  354. model=MODEL_NAME,
  355. messages=messages,
  356. max_tokens=10,
  357. extra_body=dict(guided_choice=sample_guided_choice,
  358. guided_decoding_backend=guided_decoding_backend))
  359. choice2 = chat_completion.choices[0].message.content
  360. assert choice2 in sample_guided_choice
  361. assert choice1 != choice2
  362. @pytest.mark.asyncio
  363. @pytest.mark.parametrize("guided_decoding_backend",
  364. ["outlines", "lm-format-enforcer"])
  365. async def test_guided_json_chat(client: openai.AsyncOpenAI,
  366. guided_decoding_backend: str,
  367. sample_json_schema):
  368. messages = [{
  369. "role": "system",
  370. "content": "you are a helpful assistant"
  371. }, {
  372. "role":
  373. "user",
  374. "content":
  375. f"Give an example JSON for an employee profile that "
  376. f"fits this schema: {sample_json_schema}"
  377. }]
  378. chat_completion = await client.chat.completions.create(
  379. model=MODEL_NAME,
  380. messages=messages,
  381. max_tokens=1000,
  382. extra_body=dict(guided_json=sample_json_schema,
  383. guided_decoding_backend=guided_decoding_backend))
  384. message = chat_completion.choices[0].message
  385. assert message.content is not None
  386. json1 = json.loads(message.content)
  387. jsonschema.validate(instance=json1, schema=sample_json_schema)
  388. messages.append({"role": "assistant", "content": message.content})
  389. messages.append({
  390. "role":
  391. "user",
  392. "content":
  393. "Give me another one with a different name and age"
  394. })
  395. chat_completion = await client.chat.completions.create(
  396. model=MODEL_NAME,
  397. messages=messages,
  398. max_tokens=1000,
  399. extra_body=dict(guided_json=sample_json_schema,
  400. guided_decoding_backend=guided_decoding_backend))
  401. message = chat_completion.choices[0].message
  402. assert message.content is not None
  403. json2 = json.loads(message.content)
  404. jsonschema.validate(instance=json2, schema=sample_json_schema)
  405. assert json1["name"] != json2["name"]
  406. assert json1["age"] != json2["age"]
  407. @pytest.mark.asyncio
  408. @pytest.mark.parametrize("guided_decoding_backend",
  409. ["outlines", "lm-format-enforcer"])
  410. async def test_guided_regex_chat(client: openai.AsyncOpenAI,
  411. guided_decoding_backend: str, sample_regex):
  412. messages = [{
  413. "role": "system",
  414. "content": "you are a helpful assistant"
  415. }, {
  416. "role":
  417. "user",
  418. "content":
  419. f"Give an example IP address with this regex: {sample_regex}"
  420. }]
  421. chat_completion = await client.chat.completions.create(
  422. model=MODEL_NAME,
  423. messages=messages,
  424. max_tokens=20,
  425. extra_body=dict(guided_regex=sample_regex,
  426. guided_decoding_backend=guided_decoding_backend))
  427. ip1 = chat_completion.choices[0].message.content
  428. assert ip1 is not None
  429. assert re.fullmatch(sample_regex, ip1) is not None
  430. messages.append({"role": "assistant", "content": ip1})
  431. messages.append({"role": "user", "content": "Give me a different one"})
  432. chat_completion = await client.chat.completions.create(
  433. model=MODEL_NAME,
  434. messages=messages,
  435. max_tokens=20,
  436. extra_body=dict(guided_regex=sample_regex,
  437. guided_decoding_backend=guided_decoding_backend))
  438. ip2 = chat_completion.choices[0].message.content
  439. assert ip2 is not None
  440. assert re.fullmatch(sample_regex, ip2) is not None
  441. assert ip1 != ip2
  442. @pytest.mark.asyncio
  443. async def test_guided_decoding_type_error(client: openai.AsyncOpenAI):
  444. messages = [{
  445. "role": "system",
  446. "content": "you are a helpful assistant"
  447. }, {
  448. "role":
  449. "user",
  450. "content":
  451. "The best language for type-safe systems programming is "
  452. }]
  453. with pytest.raises(openai.BadRequestError):
  454. _ = await client.chat.completions.create(model=MODEL_NAME,
  455. messages=messages,
  456. extra_body=dict(guided_regex={
  457. 1: "Python",
  458. 2: "C++"
  459. }))
  460. @pytest.mark.asyncio
  461. @pytest.mark.parametrize("guided_decoding_backend",
  462. ["outlines", "lm-format-enforcer"])
  463. async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
  464. guided_decoding_backend: str,
  465. sample_guided_choice):
  466. messages = [{
  467. "role": "system",
  468. "content": "you are a helpful assistant"
  469. }, {
  470. "role":
  471. "user",
  472. "content":
  473. "The best language for type-safe systems programming is "
  474. }]
  475. chat_completion = await client.chat.completions.create(
  476. model=MODEL_NAME,
  477. messages=messages,
  478. max_tokens=10,
  479. logprobs=True,
  480. top_logprobs=5,
  481. extra_body=dict(guided_choice=sample_guided_choice,
  482. guided_decoding_backend=guided_decoding_backend))
  483. assert chat_completion.choices[0].logprobs is not None
  484. assert chat_completion.choices[0].logprobs.content is not None
  485. top_logprobs = chat_completion.choices[0].logprobs.content[0].top_logprobs
  486. # -9999.0 is the minimum logprob returned by OpenAI
  487. for item in top_logprobs:
  488. assert item.logprob >= -9999.0, f"Failed (top_logprobs={top_logprobs})"
  489. @pytest.mark.asyncio
  490. @pytest.mark.parametrize("guided_decoding_backend",
  491. ["outlines", "lm-format-enforcer"])
  492. async def test_named_tool_use(client: openai.AsyncOpenAI,
  493. guided_decoding_backend: str,
  494. sample_json_schema):
  495. messages = [{
  496. "role": "system",
  497. "content": "you are a helpful assistant"
  498. }, {
  499. "role":
  500. "user",
  501. "content":
  502. f"Give an example JSON for an employee profile that "
  503. f"fits this schema: {sample_json_schema}"
  504. }]
  505. # non-streaming
  506. chat_completion = await client.chat.completions.create(
  507. model=MODEL_NAME,
  508. messages=messages,
  509. max_tokens=1000,
  510. tools=[{
  511. "type": "function",
  512. "function": {
  513. "name": "dummy_function_name",
  514. "description": "This is a dummy function",
  515. "parameters": sample_json_schema
  516. }
  517. }],
  518. tool_choice={
  519. "type": "function",
  520. "function": {
  521. "name": "dummy_function_name"
  522. }
  523. })
  524. message = chat_completion.choices[0].message
  525. assert len(message.content) == 0
  526. json_string = message.tool_calls[0].function.arguments
  527. json1 = json.loads(json_string)
  528. jsonschema.validate(instance=json1, schema=sample_json_schema)
  529. messages.append({"role": "assistant", "content": json_string})
  530. messages.append({
  531. "role":
  532. "user",
  533. "content":
  534. "Give me another one with a different name and age"
  535. })
  536. # streaming
  537. stream = await client.chat.completions.create(
  538. model=MODEL_NAME,
  539. messages=messages,
  540. max_tokens=1000,
  541. tools=[{
  542. "type": "function",
  543. "function": {
  544. "name": "dummy_function_name",
  545. "description": "This is a dummy function",
  546. "parameters": sample_json_schema
  547. }
  548. }],
  549. tool_choice={
  550. "type": "function",
  551. "function": {
  552. "name": "dummy_function_name"
  553. }
  554. },
  555. stream=True)
  556. output = []
  557. finish_reason_count = 0
  558. async for chunk in stream:
  559. delta = chunk.choices[0].delta
  560. if delta.role:
  561. assert delta.role == "assistant"
  562. assert delta.content is None or len(delta.content) == 0
  563. if delta.tool_calls:
  564. output.append(delta.tool_calls[0].function.arguments)
  565. if chunk.choices[0].finish_reason is not None:
  566. finish_reason_count += 1
  567. # finish reason should only return in last block
  568. assert finish_reason_count == 1
  569. json2 = json.loads("".join(output))
  570. jsonschema.validate(instance=json2, schema=sample_json_schema)
  571. assert json1["name"] != json2["name"]
  572. assert json1["age"] != json2["age"]
  573. @pytest.mark.asyncio
  574. @pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
  575. async def test_required_tool_use_not_yet_supported(
  576. client: openai.AsyncOpenAI, guided_decoding_backend: str,
  577. sample_json_schema):
  578. messages = [{
  579. "role": "system",
  580. "content": "you are a helpful assistant"
  581. }, {
  582. "role":
  583. "user",
  584. "content":
  585. f"Give an example JSON for an employee profile that "
  586. f"fits this schema: {sample_json_schema}"
  587. }]
  588. with pytest.raises(openai.BadRequestError):
  589. await client.chat.completions.create(
  590. model=MODEL_NAME,
  591. messages=messages,
  592. max_tokens=1000,
  593. tools=[{
  594. "type": "function",
  595. "function": {
  596. "name": "dummy_function_name",
  597. "description": "This is a dummy function",
  598. "parameters": sample_json_schema
  599. }
  600. }],
  601. tool_choice="required")
  602. with pytest.raises(openai.BadRequestError):
  603. await client.chat.completions.create(
  604. model=MODEL_NAME,
  605. messages=messages,
  606. max_tokens=1000,
  607. tools=[{
  608. "type": "function",
  609. "function": {
  610. "name": "dummy_function_name",
  611. "description": "This is a dummy function",
  612. "parameters": sample_json_schema
  613. }
  614. }],
  615. tool_choice="auto")
  616. @pytest.mark.asyncio
  617. @pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
  618. async def test_inconsistent_tool_choice_and_tools(client: openai.AsyncOpenAI,
  619. guided_decoding_backend: str,
  620. sample_json_schema):
  621. messages = [{
  622. "role": "system",
  623. "content": "you are a helpful assistant"
  624. }, {
  625. "role":
  626. "user",
  627. "content":
  628. f"Give an example JSON for an employee profile that "
  629. f"fits this schema: {sample_json_schema}"
  630. }]
  631. with pytest.raises(openai.BadRequestError):
  632. await client.chat.completions.create(model=MODEL_NAME,
  633. messages=messages,
  634. max_tokens=1000,
  635. tool_choice={
  636. "type": "function",
  637. "function": {
  638. "name":
  639. "dummy_function_name"
  640. }
  641. })
  642. with pytest.raises(openai.BadRequestError):
  643. await client.chat.completions.create(
  644. model=MODEL_NAME,
  645. messages=messages,
  646. max_tokens=1000,
  647. tools=[{
  648. "type": "function",
  649. "function": {
  650. "name": "dummy_function_name",
  651. "description": "This is a dummy function",
  652. "parameters": sample_json_schema
  653. }
  654. }],
  655. tool_choice={
  656. "type": "function",
  657. "function": {
  658. "name": "nondefined_function_name"
  659. }
  660. })
  661. @pytest.mark.asyncio
  662. async def test_response_format_json_object(client: openai.AsyncOpenAI):
  663. for _ in range(2):
  664. resp = await client.chat.completions.create(
  665. model=MODEL_NAME,
  666. messages=[{
  667. "role":
  668. "user",
  669. "content": ('what is 1+1? please respond with a JSON object, '
  670. 'the format is {"result": 2}')
  671. }],
  672. response_format={"type": "json_object"})
  673. content = resp.choices[0].message.content
  674. assert content is not None
  675. loaded = json.loads(content)
  676. assert loaded == {"result": 2}, loaded
  677. @pytest.mark.asyncio
  678. async def test_extra_fields(client: openai.AsyncOpenAI):
  679. with pytest.raises(BadRequestError) as exc_info:
  680. await client.chat.completions.create(
  681. model=MODEL_NAME,
  682. messages=[{
  683. "role": "system",
  684. "content": "You are a helpful assistant.",
  685. "extra_field": "0",
  686. }], # type: ignore
  687. temperature=0,
  688. seed=0)
  689. assert "extra_forbidden" in exc_info.value.message
  690. @pytest.mark.asyncio
  691. async def test_complex_message_content(client: openai.AsyncOpenAI):
  692. resp = await client.chat.completions.create(
  693. model=MODEL_NAME,
  694. messages=[{
  695. "role":
  696. "user",
  697. "content": [{
  698. "type":
  699. "text",
  700. "text":
  701. "what is 1+1? please provide the result without any other text."
  702. }]
  703. }],
  704. temperature=0,
  705. seed=0)
  706. content = resp.choices[0].message.content
  707. assert content == "2"
  708. @pytest.mark.asyncio
  709. async def test_custom_role(client: openai.AsyncOpenAI):
  710. # Not sure how the model handles custom roles so we just check that
  711. # both string and complex message content are handled in the same way
  712. resp1 = await client.chat.completions.create(
  713. model=MODEL_NAME,
  714. messages=[{
  715. "role": "my-custom-role",
  716. "content": "what is 1+1?",
  717. }], # type: ignore
  718. temperature=0,
  719. seed=0)
  720. resp2 = await client.chat.completions.create(
  721. model=MODEL_NAME,
  722. messages=[{
  723. "role": "my-custom-role",
  724. "content": [{
  725. "type": "text",
  726. "text": "what is 1+1?"
  727. }]
  728. }], # type: ignore
  729. temperature=0,
  730. seed=0)
  731. content1 = resp1.choices[0].message.content
  732. content2 = resp2.choices[0].message.content
  733. assert content1 == content2
  734. @pytest.mark.asyncio
  735. async def test_long_seed(client: openai.AsyncOpenAI):
  736. for seed in [
  737. torch.iinfo(torch.long).min - 1,
  738. torch.iinfo(torch.long).max + 1
  739. ]:
  740. with pytest.raises(BadRequestError) as exc_info:
  741. await client.chat.completions.create(
  742. model=MODEL_NAME,
  743. messages=[{
  744. "role": "system",
  745. "content": "You are a helpful assistant.",
  746. }],
  747. temperature=0,
  748. seed=seed)
  749. assert ("greater_than_equal" in exc_info.value.message
  750. or "less_than_equal" in exc_info.value.message)