未验证 提交 6366e0a9 编写于 作者: R Roc 提交者: GitHub

fix recompute (#42128) (#42216)

* fix recompute

* modify return
上级 40ac92b2
...@@ -399,7 +399,7 @@ class MoELayer(nn.Layer): ...@@ -399,7 +399,7 @@ class MoELayer(nn.Layer):
def experts_fwd(x, fwd_expert_count, experts): def experts_fwd(x, fwd_expert_count, experts):
if x.shape[0] == 0: if x.shape[0] == 0:
return paddle.empty(x.shape, x.dtype) return x
y = [] y = []
last_index = 0 last_index = 0
assert isinstance(fwd_expert_count, np.ndarray) assert isinstance(fwd_expert_count, np.ndarray)
...@@ -411,7 +411,7 @@ class MoELayer(nn.Layer): ...@@ -411,7 +411,7 @@ class MoELayer(nn.Layer):
last_index = expert_count + last_index last_index = expert_count + last_index
return paddle.concat(y, axis=0) return paddle.concat(y, axis=0)
if self.recompute_interval <= 0: if self.recompute_interval <= 0 or x.shape[0] == 0:
x = experts_fwd(x, fwd_expert_count.numpy(), self.experts) x = experts_fwd(x, fwd_expert_count.numpy(), self.experts)
else: else:
x = _hp_recompute(experts_fwd, x, x = _hp_recompute(experts_fwd, x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册