From b7e1010b391304617e9f6e45df5d9c636a5d591f Mon Sep 17 00:00:00 2001 From: Connor Holmes Date: Fri, 23 Jun 2023 09:33:11 -0700 Subject: [PATCH] Missing strided copy for gated MLP (#3788) Co-authored-by: Ammar Ahmad Awan Co-authored-by: Jeff Rasley Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> --- deepspeed/module_inject/containers/features/gated_mlp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/deepspeed/module_inject/containers/features/gated_mlp.py b/deepspeed/module_inject/containers/features/gated_mlp.py index a2a99786..24f0826d 100644 --- a/deepspeed/module_inject/containers/features/gated_mlp.py +++ b/deepspeed/module_inject/containers/features/gated_mlp.py @@ -48,7 +48,14 @@ class HybridGatedMLPContainer(HybridEngineContainer): int8=reversed_dim, allocate_tensor=reversed_dim) if src is not None else None else: - super().mlp_inter_mp(mp_replace) + self.module.mlp.inter_w = mp_replace.strided_copy(self.module.mlp.inter_w, + self._h4h_w, + num_splits=2, + int8=reversed_dim) + self.module.mlp.inter_b = mp_replace.strided_copy(self.module.mlp.inter_b, + self._h4h_b, + num_splits=2, + int8=reversed_dim) def release_mlp(self): super().release_mlp() -- GitLab