Skip to content

Commit 4bc8dc3

Browse files
committed
update code comments
1 parent 8500f5d commit 4bc8dc3

23 files changed

Lines changed: 749 additions & 576 deletions

llm/client/fastdeploy_client/chatbot.py

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -60,28 +60,25 @@ def stream_generate(self,
6060
Streaming interface
6161
6262
Args:
63-
message (Union[str, List[str], ChatMessage]): 消息内容或ChatMessage对象
64-
max_dec_len (int, optional): 最大解码长度. Defaults to 1024.
65-
min_dec_len (int, optional): 最小解码长度. Defaults to 1.
66-
topp (float, optional): 控制随机性参数,数值越大则随机性越大,范围是0~1. Defaults to 0.7.
67-
temperature (float, optional): 温度值. Defaults to 0.95.
68-
frequency_score (float, optional): 频率分数. Defaults to 0.0.
69-
penalty_score (float, optional): 惩罚分数. Defaults to 1.0.
70-
presence_score (float, optional): 存在分数. Defaults to 0.0.
71-
system (str, optional): 系统设定. Defaults to None.
72-
**kwargs: 其他参数
73-
req_id (str, optional): 请求ID,用于区分不同的请求. Defaults to None.
74-
eos_token_ids (List[int], optional): 指定结束的token id. Defaults to None.
75-
benchmark (bool, optional): 设置benchmark模式,如果是则返回完整的response. Defaults to False.
76-
timeout (int, optional): 请求超时时间,不设置则使用120s. Defaults to None.
63+
message (Union[str, List[str], ChatMessage]): message or ChatMessage object
64+
max_dec_len (int, optional): max decoding length. Defaults to 1024.
65+
min_dec_len (int, optional): min decoding length. Defaults to 1.
66+
topp (float, optional): randomness of the generated tokens. Defaults to 0.7.
67+
temperature (float, optional): temperature. Defaults to 0.95.
68+
frequency_score (float, optional): frequency score. Defaults to 0.0.
69+
penalty_score (float, optional): penalty score. Defaults to 1.0.
70+
presence_score (float, optional): presence score. Defaults to 0.0.
71+
system (str, optional): system settings. Defaults to None.
72+
**kwargs: others
73+
74+
For more details, please refer to https://github.com/PaddlePaddle/FastDeploy/blob/develop/llm/docs/FastDeploy_usage_tutorial.md#%E8%AF%B7%E6%B1%82%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D
7775
7876
Returns:
79-
返回一个生成器,每次yield返回一个字典。
80-
正常情况下,生成器返回字典的示例{"req_id": "xxx", "token": "好的", "is_end": 0},其中token为生成的字符,is_end表明是否为最后一个字符(0表示否,1表示是)
81-
错误情况下,生成器返回错误信息的字典,示例 {"req_id": "xxx", "error_msg": "error message"}
77+
return a generator object, which yields a dict.
78+
Normal, return {'token': xxx, 'is_end': xxx, 'send_idx': xxx, ..., 'error_msg': '', 'error_code': 0}
79+
Others, return {'error_msg': xxx, 'error_code': xxx}, error_msg not None, error_code != 0
8280
"""
8381
try:
84-
# 准备输入
8582
model_name = "model"
8683
inputs = [grpcclient.InferInput("IN", [1], triton_utils.np_to_triton_dtype(np.object_))]
8784
outputs = [grpcclient.InferRequestedOutput("OUT")]
@@ -96,14 +93,11 @@ def stream_generate(self,
9693
timeout = kwargs.get("timeout", self.timeout)
9794

9895
with grpcclient.InferenceServerClient(url=self.url, verbose=False) as triton_client:
99-
# 建立连接
10096
triton_client.start_stream(callback=partial(triton_callback, output_data))
101-
# 发送请求
10297
triton_client.async_stream_infer(model_name=model_name,
10398
inputs=inputs,
10499
request_id=req_id,
105100
outputs=outputs)
106-
# 处理结果
107101
answer_str = ""
108102
enable_benchmark = is_enable_benchmark(**kwargs)
109103
while True:
@@ -129,7 +123,6 @@ def stream_generate(self,
129123
yield response
130124
if response.get("is_end") == 1 or response.get("error_msg") is not None:
131125
break
132-
# 手动关闭
133126
triton_client.stop_stream(cancel_requests=True)
134127
triton_client.close()
135128

@@ -150,27 +143,26 @@ def generate(self,
150143
system=None,
151144
**kwargs):
152145
"""
153-
整句返回,直接使用流式返回的接口。
146+
Return the entire sentence using the streaming interface.
154147
155148
Args:
156-
message (Union[str, List[str], ChatMessage]): 消息内容或ChatMessage对象
157-
max_dec_len (int, optional): 最大解码长度. Defaults to 1024.
158-
min_dec_len (int, optional): 最小解码长度. Defaults to 1.
159-
topp (float, optional): 控制随机性参数,数值越大则随机性越大,范围是0~1. Defaults to 0.7.
160-
temperature (float, optional): 温度值. Defaults to 0.95.
161-
frequency_score (float, optional): 频率分数. Defaults to 0.0.
162-
penalty_score (float, optional): 惩罚分数. Defaults to 1.0.
163-
presence_score (float, optional): 存在分数. Defaults to 0.0.
164-
system (str, optional): 系统设定. Defaults to None.
165-
**kwargs: 其他参数
166-
req_id (str, optional): 请求ID,用于区分不同的请求. Defaults to None.
167-
eos_token_ids (List[int], optional): 指定结束的token id. Defaults to None.
168-
timeout (int, optional): 请求超时时间,不设置则使用120s. Defaults to None.
149+
message (Union[str, List[str], ChatMessage]): message or ChatMessage object
150+
max_dec_len (int, optional): max decoding length. Defaults to 1024.
151+
min_dec_len (int, optional): min decoding length. Defaults to 1.
152+
topp (float, optional): randomness of the generated tokens. Defaults to 0.7.
153+
temperature (float, optional): temperature. Defaults to 0.95.
154+
frequency_score (float, optional): frequency score. Defaults to 0.0.
155+
penalty_score (float, optional): penalty score. Defaults to 1.0.
156+
presence_score (float, optional): presence score. Defaults to 0.0.
157+
system (str, optional): system settings. Defaults to None.
158+
**kwargs: others
159+
160+
For more details, please refer to https://github.com/PaddlePaddle/FastDeploy/blob/develop/llm/docs/FastDeploy_usage_tutorial.md#%E8%AF%B7%E6%B1%82%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D
169161
170162
Returns:
171-
返回一个字典。
172-
正常情况下,返回字典的示例{"req_id": "xxx", "results": "好的,我知道了。"}
173-
错误情况下,返回错误信息的字典,示例 {"req_id": "xxx", "error_msg": "error message"}
163+
return the entire sentence or error message.
164+
Normal, return {'tokens_all': xxx, ..., 'error_msg': '', 'error_code': 0}
165+
Others, return {'error_msg': xxx, 'error_code': xxx}, error_msg not None, error_code != 0
174166
"""
175167
stream_response = self.stream_generate(message, max_dec_len,
176168
min_dec_len, topp, temperature,
@@ -205,7 +197,7 @@ def _prepare_input_data(self,
205197
system=None,
206198
**kwargs):
207199
"""
208-
准备输入数据。
200+
Prepare to input data
209201
"""
210202
inputs = {
211203
"max_dec_len": max_dec_len,
@@ -248,7 +240,7 @@ def _prepare_input_data(self,
248240

249241
def _format_response(self, response, req_id):
250242
"""
251-
对服务返回字段进行格式化
243+
Format the service return fields
252244
"""
253245
response = json.loads(response.as_numpy("OUT")[0])
254246
if isinstance(response, (list, tuple)):
@@ -273,13 +265,17 @@ def _format_response(self, response, req_id):
273265

274266

275267
class OutputData:
276-
"""接收Triton服务返回的数据"""
268+
"""
269+
Receive data returned by Triton service
270+
"""
277271
def __init__(self):
278272
self._completed_requests = queue.Queue()
279273

280274

281275
def triton_callback(output_data, result, error):
282-
"""Triton客户端的回调函数"""
276+
"""
277+
callback function for Triton server
278+
"""
283279
if error:
284280
output_data._completed_requests.put(error)
285281
else:
@@ -288,17 +284,17 @@ def triton_callback(output_data, result, error):
288284

289285
class ChatBot(object):
290286
"""
291-
对外的接口,用于创建ChatBotForPushMode的示例
287+
External interface, create a client object ChatBotForPushMode
292288
"""
293289
def __new__(cls, hostname, port, timeout=120):
294290
"""
295-
初始化函数,用于创建一个GRPCInferenceService客户端对象
291+
initialize a GRPCInferenceService client
296292
Args:
297-
hostname (str): 服务器的地址
298-
port (int): 服务器的端口号
299-
timeout (int): 请求超时时间,单位为秒,默认120秒
293+
hostname (str): server hostname
294+
port (int): GRPC port
295+
timeout (int): timeout(s), default 120 seconds
300296
Returns:
301-
ChatBotClass: 返回一个BaseChatBot对象
297+
ChatBotClass: BaseChatBot object
302298
"""
303299
if not isinstance(hostname, str) or not hostname:
304300
raise ValueError("Invalid hostname")

llm/client/fastdeploy_client/command.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121

2222
def _get_service_configuration():
2323
"""
24-
从环境变量获取服务配置信息
24+
get service url from environment
25+
26+
Returns:
27+
tuple: (hostname, port)
2528
"""
2629
url = os.getenv("FASTDEPLOY_MODEL_URL")
2730

@@ -38,7 +41,7 @@ def _get_service_configuration():
3841

3942
def stream_generate(prompt):
4043
"""
41-
命令工具:流式返回
44+
Streaming interface
4245
"""
4346
hostname, port = _get_service_configuration()
4447
chatbot = ChatBot(hostname=hostname, port=port)
@@ -49,7 +52,7 @@ def stream_generate(prompt):
4952

5053
def generate(prompt):
5154
"""
52-
命令工具:整句返回
55+
entire sentence interface
5356
"""
5457
hostname, port = _get_service_configuration()
5558
chatbot = ChatBot(hostname=hostname, port=port)
@@ -58,9 +61,6 @@ def generate(prompt):
5861

5962

6063
def main():
61-
"""
62-
命令工具主入口
63-
"""
6464
if len(sys.argv) < 2 or sys.argv[1] not in ["generate", "stream_generate"]:
6565
logging.error("Usage 1: fdclient generate \"Hello, How are you?\"")
6666
return

llm/client/fastdeploy_client/message.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@
1414

1515
class ChatMessage(object):
1616
"""
17-
多轮对话数据结构,当使用这个与ChatBot对话时
18-
会将对话记录存储在此结构体内,支持多轮
17+
multi-turn chat message with ChatBot
1918
"""
2019
def __init__(self, prompt=None):
2120
if prompt is not None:
@@ -25,7 +24,7 @@ def __init__(self, prompt=None):
2524

2625
def add_user_message(self, content):
2726
"""
28-
添加一个用户消息
27+
add user message
2928
"""
3029
if len(self.message) > 0 and self.message[-1]["role"] != "assistant":
3130
raise Exception("Cannot add user message, because the role of the "
@@ -34,7 +33,7 @@ def add_user_message(self, content):
3433

3534
def add_assistant_message(self, content):
3635
"""
37-
添加一个assistant消息
36+
add assistant message
3837
"""
3938
if len(self.message) > 0 and self.message[-1]["role"] != "user":
4039
raise Exception("Cannot add user message, because the role of the "
@@ -43,7 +42,7 @@ def add_assistant_message(self, content):
4342

4443
def next_prompt(self, content):
4544
"""
46-
添加一个新的对话,保留用于兼容。
45+
add user message and return a new prompt
4746
"""
4847
self.add_user_message(content)
4948

llm/client/fastdeploy_client/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@
1313
# limitations under the License.
1414

1515
def is_enable_benchmark(**kwargs):
16-
"""是否是benchmark模式"""
16+
"""
17+
Check if enable benchmark
18+
"""
1719
return "benchmark" in kwargs and kwargs["benchmark"] == 1

llm/dockerfiles/Dockerfile_serving_cuda118_cudnn8

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda11.8-cudnn8-nccl2.15.5
22

33
WORKDIR /opt/output/
4-
COPY ./server/ /opt/output/Serving
4+
COPY ./server/ /opt/output/Serving/
55
COPY ./client/ /opt/output/client/
66

7+
ENV LD_LIBRARY_PATH "/usr/local/cuda-11.8/compat/:$LD_LIBRARY_PATH"
8+
79
RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu118/ \
810
&& python3 -m pip install paddlenlp==3.0.0b0 \
911
&& python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \
1012
&& apt-get clean && rm -rf /var/lib/apt/lists/*
1113

12-
ENV LD_LIBRARY_PATH "/usr/local/cuda-11.8/compat/:$LD_LIBRARY_PATH"
1314
RUN git clone https://gitee.com/paddlepaddle/PaddleNLP.git && cd PaddleNLP/csrc \
1415
&& python3 setup_cuda.py build && python3 setup_cuda.py install --user \
1516
&& cp -r /opt/output/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \

llm/dockerfiles/Dockerfile_serving_cuda123_cudnn9

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
FROM registry.baidubce.com/paddlepaddle/fastdeploy:llm-base-gcc12.3-cuda12.3-cudnn9-nccl2.15.5
22

33
WORKDIR /opt/output/
4-
COPY ./server/ /opt/output/Serving
4+
COPY ./server/ /opt/output/Serving/
55
COPY ./client/ /opt/output/client/
66

7+
ENV LD_LIBRARY_PATH "/usr/local/cuda-12.3/compat/:$LD_LIBRARY_PATH"
8+
79
RUN python3 -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu123/ \
810
&& python3 -m pip install paddlenlp==3.0.0b0 \
911
&& python3 -m pip install --no-cache-dir sentencepiece pycryptodome tritonclient[all]==2.41.1 \
1012
&& apt-get clean && rm -rf /var/lib/apt/lists/*
1113

12-
ENV LD_LIBRARY_PATH "/usr/local/cuda-12.3/compat/:$LD_LIBRARY_PATH"
1314
RUN git clone https://gitee.com/paddlepaddle/PaddleNLP.git && cd PaddleNLP/csrc \
1415
&& python3 setup_cuda.py build && python3 setup_cuda.py install --user \
1516
&& cp -r /opt/output/PaddleNLP/paddlenlp /usr/local/lib/python3.10/dist-packages/ \

0 commit comments

Comments
 (0)