diff --git a/README.md b/README.md index 7d3c7b20..267fad32 100644 --- a/README.md +++ b/README.md @@ -238,6 +238,7 @@ CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path lmsys ```bash python3 -m fastchat.serve.gradio_web_server_multi ``` +- The default model worker based on huggingface/transformers has great compatibility but can be slow. If you want high-throughput serving, you can try [vLLM integration](docs/vllm_integration.md). ## API ### OpenAI-Compatible RESTful APIs & SDK diff --git a/fastchat/serve/controller.py b/fastchat/serve/controller.py index 3c0518e8..a67da62c 100644 --- a/fastchat/serve/controller.py +++ b/fastchat/serve/controller.py @@ -337,12 +337,12 @@ def create_controller(): args, controller = create_controller() if args.ssl: uvicorn.run( - app, - host=args.host, - port=args.port, - log_level="info", - ssl_keyfile=os.environ["SSL_KEYFILE"], - ssl_certfile=os.environ["SSL_CERTFILE"] + app, + host=args.host, + port=args.port, + log_level="info", + ssl_keyfile=os.environ["SSL_KEYFILE"], + ssl_certfile=os.environ["SSL_CERTFILE"], ) else: uvicorn.run(app, host=args.host, port=args.port, log_level="info")