gradio_server.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import argparse
  2. import json
  3. import gradio as gr
  4. import requests
  5. def http_bot(prompt):
  6. headers = {"User-Agent": "Aphrodite Client"}
  7. pload = {
  8. "prompt": prompt,
  9. "stream": True,
  10. "max_tokens": 512,
  11. }
  12. response = requests.post(args.model_url,
  13. headers=headers,
  14. json=pload,
  15. stream=True)
  16. for chunk in response.iter_lines(chunk_size=8192,
  17. decode_unicode=False,
  18. delimiter=b"\0"):
  19. if chunk:
  20. data = json.loads(chunk.decode("utf-8"))
  21. output = data["text"][0]
  22. yield output
  23. def build_demo():
  24. with gr.Blocks() as demo:
  25. gr.Markdown("# Aphrodite text completion demo\n")
  26. inputbox = gr.Textbox(label="Input",
  27. placeholder="Enter text and press ENTER")
  28. outputbox = gr.Textbox(label="Output",
  29. placeholder="Generated result from the model")
  30. inputbox.submit(http_bot, [inputbox], [outputbox])
  31. return demo
  32. if __name__ == "__main__":
  33. parser = argparse.ArgumentParser()
  34. parser.add_argument("--host", type=str, default="localhost")
  35. parser.add_argument("--port", type=int, default=8001)
  36. parser.add_argument("--model-url",
  37. type=str,
  38. default="http://localhost:2242/api/v1/generate")
  39. args = parser.parse_args()
  40. demo = build_demo()
  41. demo.queue(concurrency_count=100).launch(server_name=args.host,
  42. server_port=args.port,
  43. share=True)