Skip to content

Commit bba279c

Browse files
ltd0924yuanlehome
andauthored
[Feature] support rdma IB transfer (#4123)
* Update serving_chat.py * Update serving_completion.py * Update serving_completion.py * mv connection_manager init * [BugFix] fix kv cache * fix format --------- Co-authored-by: Yuanle Liu <yuanlehome@163.com>
1 parent 4f460db commit bba279c

4 files changed

Lines changed: 60 additions & 16 deletions

File tree

fastdeploy/cache_manager/transfer_factory/ipc_cache_transfer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __init__(self, rank_id_, remote_gpu_id_, layer_num, local_gpu_id_):
4545
self.local_gpu_id = int(local_gpu_id_)
4646
tmp = paddle.ones([1, 1])
4747
logger.info(f"init ipc rank{self.rank_id} with remote {self.remote_gpu_id} {self.local_gpu_id}")
48+
paddle.set_device(f"gpu:{self.local_gpu_id}")
4849
for layer_id in range(layer_num):
4950
key_unique_name = f"key_caches_{layer_id}_rank{self.rank_id}.device{self.remote_gpu_id}"
5051
value_unique_name = f"value_caches_{layer_id}_rank{self.rank_id}.device{self.remote_gpu_id}"

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/include/kvcache_connection.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,13 @@ struct IbDeviceInfo {
7373
int realPort;
7474
int maxQp;
7575
};
76-
7776
/// @brief Queue Pair information for RDMA
7877
struct QpInfo {
7978
uint32_t lid;
8079
uint32_t qpn;
8180
uint32_t psn;
81+
uint8_t sl; // Service Level for IB networks
82+
uint8_t path_bits; // Path Bits for IB networks
8283
union ibv_gid gid;
8384
enum ibv_mtu mtu;
8485

@@ -88,7 +89,10 @@ struct QpInfo {
8889
intBuffer[0] = htonl(lid);
8990
intBuffer[1] = htonl(qpn);
9091
intBuffer[2] = htonl(psn);
91-
memcpy(buffer + 12, gid.raw, sizeof(gid.raw));
92+
// Pack SL and Path Bits into the 4th uint32_t
93+
uint32_t sl_path = (static_cast<uint32_t>(sl) << 8) | static_cast<uint32_t>(path_bits);
94+
intBuffer[3] = htonl(sl_path);
95+
memcpy(buffer + 16, gid.raw, sizeof(gid.raw));
9296
intBuffer[7] = htonl(static_cast<uint32_t>(mtu));
9397
}
9498

@@ -98,11 +102,14 @@ struct QpInfo {
98102
lid = ntohl(intBuffer[0]);
99103
qpn = ntohl(intBuffer[1]);
100104
psn = ntohl(intBuffer[2]);
101-
memcpy(gid.raw, buffer + 12, sizeof(gid.raw));
105+
uint32_t sl_path = ntohl(intBuffer[3]);
106+
sl = static_cast<uint8_t>((sl_path >> 8) & 0xFF);
107+
path_bits = static_cast<uint8_t>(sl_path & 0xFF);
108+
memcpy(gid.raw, buffer + 16, sizeof(gid.raw));
102109
mtu = static_cast<ibv_mtu>(ntohl(intBuffer[7]));
103110
}
104111

105-
static const size_t size = 12 + sizeof(gid.raw) + 4;
112+
static const size_t size = 16 + sizeof(gid.raw) + 4;
106113
};
107114

108115
/// @brief RDMA connection context

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/include/util.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ class KVCacheConfig {
156156
const char* error_file_path_;
157157
bool relax_ordering_enabled_;
158158
int ib_timeout_;
159+
int ib_service_level_;
160+
int ib_src_path_bits_;
159161
const char* rdma_nics_;
160162

161163
// Private constructor for singleton pattern
@@ -213,6 +215,18 @@ class KVCacheConfig {
213215
"KVCACHE_IB_TIMEOUT"
214216
);
215217

218+
ib_service_level_ = parse_int_value(
219+
std::getenv("KVCACHE_IB_SERVICE_LEVEL"),
220+
0,
221+
"KVCACHE_IB_SERVICE_LEVEL"
222+
);
223+
224+
ib_src_path_bits_ = parse_int_value(
225+
std::getenv("KVCACHE_IB_SRC_PATH_BITS"),
226+
0,
227+
"KVCACHE_IB_SRC_PATH_BITS"
228+
);
229+
216230
rdma_nics_ = std::getenv("KVCACHE_RDMA_NICS");
217231
}
218232

@@ -255,6 +269,8 @@ class KVCacheConfig {
255269
}
256270

257271
int get_ib_timeout() const { return ib_timeout_; }
272+
int get_ib_service_level() const { return ib_service_level_; }
273+
int get_ib_src_path_bits() const { return ib_src_path_bits_; }
258274

259275
// Configuration retrieval methods
260276
int get_rdma_gid_index() const { return rdma_gid_index_; }

fastdeploy/cache_manager/transfer_factory/kvcache_transfer/src/kvcache_connection.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,12 @@ int parse_port_ib_info() {
169169
dev_info.maxQp = dev_attr.max_qp;
170170
strncpy(dev_info.devName, dev_name, MAXNAMESIZE);
171171

172-
INFO("Adding device %s port %d (%s)", dev_name, port_num,
173-
port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND ? "IB" : "RoCE");
172+
if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
173+
INFO("Adding IB device %s port %d (LID:0x%x Rate:%dGbps)",
174+
dev_name, port_num, port_attr.lid, port_attr.active_speed/10);
175+
} else {
176+
INFO("Adding RoCE device %s port %d", dev_name, port_num);
177+
}
174178

175179
g_ib_all_devs.push_back(dev_info);
176180
++g_kvcache_ib_dev_nums;
@@ -304,14 +308,24 @@ QpStatus modify_qp_to_rts(
304308
attr.max_dest_rd_atomic = 1;
305309
attr.min_rnr_timer = 12;
306310

307-
attr.ah_attr.is_global = 1;
308-
attr.ah_attr.grh.hop_limit = 255;
309-
attr.ah_attr.grh.flow_label = 0;
310-
attr.ah_attr.grh.traffic_class = 0;
311-
attr.ah_attr.grh.dgid.global.subnet_prefix = (dest->gid.global.subnet_prefix);
312-
attr.ah_attr.grh.dgid.global.interface_id = (dest->gid.global.interface_id);
313-
attr.ah_attr.grh.sgid_index = sgid_id;
311+
bool use_grh = (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET);
314312

313+
if (use_grh) {
314+
attr.ah_attr.is_global = 1;
315+
attr.ah_attr.grh.hop_limit = 255;
316+
attr.ah_attr.grh.flow_label = 0;
317+
attr.ah_attr.grh.traffic_class = 0;
318+
attr.ah_attr.grh.dgid.global.subnet_prefix = (dest->gid.global.subnet_prefix);
319+
attr.ah_attr.grh.dgid.global.interface_id = (dest->gid.global.interface_id);
320+
attr.ah_attr.grh.sgid_index = sgid_id;
321+
} else {
322+
attr.ah_attr.is_global = 0;
323+
attr.ah_attr.dlid = dest->lid;
324+
attr.ah_attr.sl = KVCacheConfig::getInstance().get_ib_service_level(); // 从配置获取服务级别
325+
if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
326+
attr.ah_attr.src_path_bits = KVCacheConfig::getInstance().get_ib_src_path_bits(); // IB特定路径位
327+
}
328+
}
315329

316330
attr.ah_attr.src_path_bits = 0;
317331
attr.ah_attr.port_num = port;
@@ -602,11 +616,17 @@ bool client_exchange_destinations(
602616

603617
my_dest.lid = ctx->portinfo.lid;
604618
my_dest.mtu = ctx->portinfo.active_mtu;
619+
my_dest.sl = KVCacheConfig::getInstance().get_ib_service_level();
620+
my_dest.path_bits = KVCacheConfig::getInstance().get_ib_src_path_bits();
605621

606622
// Validate LID for InfiniBand
607-
if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest.lid) {
608-
ERR("Invalid LID 0x%04x for non-Ethernet link layer", my_dest.lid);
609-
return false;
623+
if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET) {
624+
if (!my_dest.lid) {
625+
ERR("Invalid LID 0x%04x for IB network", my_dest.lid);
626+
return false;
627+
}
628+
LOGD("IB network detected - LID:0x%04x SL:%d PathBits:%d",
629+
my_dest.lid, my_dest.sl, my_dest.path_bits);
610630
}
611631

612632
// Get GID if specified

0 commit comments

Comments
 (0)