Merge pull request #2562 from kevincheng2/develop

juncaipeng · web-flow · commit fd44c00ad432 · 2024-11-21T13:02:40.000+08:00
[LLM] update v1.2 images
diff --git a/llm/README.md b/llm/README.md
@@ -15,9 +15,9 @@
     # 挂载模型文件
     export MODEL_PATH=${PWD}/Llama-3-8B-A8W8C8
 
-    docker run --gpus all --shm-size 5G --network=host \
+    docker run --gpus all --shm-size 5G --network=host --privileged --cap-add=SYS_PTRACE \
     -v ${MODEL_PATH}:/models/ \
-    -dit registry.baidubce.com/paddlepaddle/fastdeploy:llm-serving-cuda123-cudnn9-v1.0 \
+    -dit registry.baidubce.com/paddlepaddle/fastdeploy:llm-serving-cuda123-cudnn9-v1.2 \
     bash -c 'export USE_CACHE_KV_INT8=1 && cd /opt/output/Serving && bash start_server.sh; exec bash'
   ```
 
diff --git a/llm/docs/FastDeploy_usage_tutorial.md b/llm/docs/FastDeploy_usage_tutorial.md
@@ -144,7 +144,7 @@ health接口：（模型是否准备好推理）
 from fastdeploy_client.chatbot import ChatBot
 
 hostname = "127.0.0.1"          # 服务部署的hostname
-port = 8000                     # 服务配置的GRPC_PORT
+port = 8811                     # 服务配置的GRPC_PORT
 
 chatbot = ChatBot(hostname=hostname, port=port)
 
@@ -153,7 +153,7 @@ result = chatbot.generate("你好", topp=0.8, max_dec_len=128, timeout=120)
 print(result)
 
 # 流式接口
-chatbot = ChatBot(hostname=hostname, port=port, model_id=model_id, mode=mode)
+chatbot = ChatBot(hostname=hostname, port=port)
 stream_result = chatbot.stream_generate("你好", max_dec_len=128, timeout=120)
 for res in stream_result:
     print(res)