|
23 | 23 | from paddle.autograd import PyLayer |
24 | 24 | from paddle.distributed.fleet.utils import recompute |
25 | 25 |
|
26 | | -from fastdeploy.model_executor.layers.utils import _set_var_distributed, get_tensor |
| 26 | +from fastdeploy.model_executor.layers.utils import get_tensor |
27 | 27 | from fastdeploy.model_executor.models.ernie4_5_vl.dist_utils import ( |
28 | 28 | RowSequenceParallelLinear, |
29 | 29 | all_gather_group, |
@@ -197,19 +197,7 @@ def __init__( |
197 | 197 | self.after_norm = RMSNorm(out_config) |
198 | 198 |
|
199 | 199 | if self.tensor_parallel_degree > 1: |
200 | | - for idx in [2, 3]: |
201 | | - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].weight) |
202 | | - mark_as_sequence_parallel_parameter(self.spatial_linear[idx].bias) |
203 | | - _set_var_distributed(self.spatial_linear[idx].weight, split_axis=0) |
204 | | - _set_var_distributed(self.spatial_linear[idx].bias, split_axis=0) |
205 | | - if self.use_temporal_conv: |
206 | | - for idx in [0, 2, 3]: |
207 | | - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].weight) |
208 | | - mark_as_sequence_parallel_parameter(self.temporal_linear[idx].bias) |
209 | | - |
210 | | - mark_as_sequence_parallel_parameter(self.mlp.weight) |
211 | | - mark_as_sequence_parallel_parameter(self.mlp.bias) |
212 | | - mark_as_sequence_parallel_parameter(self.after_norm.weight) |
| 200 | + |
213 | 201 | set_weight_attrs(self.spatial_linear[0].weight, {"output_dim": False}) |
214 | 202 |
|
215 | 203 | def spatial_conv_reshape(self, x, spatial_conv_size): |
|
0 commit comments