|
60 | 60 | StatefulSemaphore, |
61 | 61 | api_server_logger, |
62 | 62 | console_logger, |
63 | | - is_package_installed, |
64 | 63 | is_port_available, |
65 | 64 | retrive_model_from_server, |
66 | 65 | ) |
67 | 66 |
|
68 | 67 | parser = FlexibleArgumentParser() |
69 | 68 | parser.add_argument("--port", default=8000, type=int, help="port to the http server") |
70 | 69 | parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server") |
71 | | -parser.add_argument("--workers", default=None, type=int, help="number of workers") |
| 70 | +parser.add_argument("--workers", default=1, type=int, help="number of workers") |
72 | 71 | parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server") |
73 | 72 | parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server") |
74 | 73 | parser.add_argument( |
|
83 | 82 | ) |
84 | 83 | parser = EngineArgs.add_cli_args(parser) |
85 | 84 | args = parser.parse_args() |
86 | | - |
87 | | - |
88 | | -if args.workers is None: |
89 | | - # In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs` |
90 | | - if is_package_installed("paddlepaddle-gpu"): |
91 | | - args.workers = max(min(int(args.max_num_seqs // 32), 8), 1) |
92 | | - else: |
93 | | - args.workers = 1 |
94 | | -console_logger.info(f"Number of api-server workers: {args.workers}.") |
95 | | - |
96 | 85 | args.model = retrive_model_from_server(args.model, args.revision) |
97 | 86 | chat_template = load_chat_template(args.chat_template, args.model) |
98 | 87 | if args.tool_parser_plugin: |
|
0 commit comments