We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent bba279c commit 7bdc6f4Copy full SHA for 7bdc6f4
1 file changed
fastdeploy/model_executor/models/glm4_moe.py
@@ -160,7 +160,7 @@ def forward(self, x):
160
out = out + shared_experts_out
161
# We do to TP all reduce after the sum of experts.
162
if self.tensor_parallel_size > 1:
163
- tensor_model_parallel_all_reduce(out)
+ tensor_model_parallel_all_reduce(out, self.tp_group)
164
return out
165
166
0 commit comments