1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- import argparse
- import json
- import gradio as gr
- import requests
- def http_bot(prompt):
- headers = {"User-Agent": "Aphrodite Client"}
- pload = {
- "prompt": prompt,
- "stream": True,
- "max_tokens": 512,
- }
- response = requests.post(args.model_url,
- headers=headers,
- json=pload,
- stream=True)
- for chunk in response.iter_lines(chunk_size=8192,
- decode_unicode=False,
- delimiter=b"\0"):
- if chunk:
- data = json.loads(chunk.decode("utf-8"))
- output = data["text"][0]
- yield output
- def build_demo():
- with gr.Blocks() as demo:
- gr.Markdown("# Aphrodite text completion demo\n")
- inputbox = gr.Textbox(label="Input",
- placeholder="Enter text and press ENTER")
- outputbox = gr.Textbox(label="Output",
- placeholder="Generated result from the model")
- inputbox.submit(http_bot, [inputbox], [outputbox])
- return demo
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--host", type=str, default="localhost")
- parser.add_argument("--port", type=int, default=8001)
- parser.add_argument("--model-url",
- type=str,
- default="http://localhost:2242/api/v1/generate")
- args = parser.parse_args()
- demo = build_demo()
- demo.queue(concurrency_count=100).launch(server_name=args.host,
- server_port=args.port,
- share=True)
|