gradio_server.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import argparse
  2. import json
  3. import gradio as gr
  4. import requests
  5. def http_bot(prompt):
  6. headers = {"User-Agent": "Aphrodite Client"}
  7. pload = {
  8. "prompt": prompt,
  9. "stream": True,
  10. "max_tokens": 512,
  11. }
  12. response = requests.post(args.model_url, headers=headers, json=pload, stream=True)
  13. for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False, delimiter=b"\0"):
  14. if chunk:
  15. data = json.loads(chunk.decode("utf-8"))
  16. output = data["text"][0]
  17. yield output
  18. def build_demo():
  19. with gr.Blocks() as demo:
  20. gr.Markdown(
  21. "# Aphrodite text completion demo\n"
  22. )
  23. inputbox = gr.Textbox(label="Input", placeholder="Enter text and press ENTER")
  24. outputbox = gr.Textbox(label="Output", placeholder="Generated result from the model")
  25. inputbox.submit(http_bot, [inputbox], [outputbox])
  26. return demo
  27. if __name__ == "__main__":
  28. parser = argparse.ArgumentParser()
  29. parser.add_argument("--host", type=str, default="localhost")
  30. parser.add_argument("--port", type=int, default=8001)
  31. parser.add_argument("--model-url", type=str, default="http://localhost:2242/api/v1/generate")
  32. args = parser.parse_args()
  33. demo = build_demo()
  34. demo.queue(concurrency_count=100).launch(server_name=args.host,
  35. server_port=args.port,
  36. share=True)