提交 f0470e05 编写于 作者: H Hui Zhang

not dump all grad info, since slow down train porcess

上级 8fef667f
...@@ -47,7 +47,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): ...@@ -47,7 +47,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
sum_square = layers.reduce_sum(square) sum_square = layers.reduce_sum(square)
sum_square_list.append(sum_square) sum_square_list.append(sum_square)
# debug log # debug log, not dump all since slow down train process
if i < 10:
logger.debug( logger.debug(
f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }") f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")
...@@ -75,7 +76,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm): ...@@ -75,7 +76,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
new_grad = layers.elementwise_mul(x=g, y=clip_var) new_grad = layers.elementwise_mul(x=g, y=clip_var)
params_and_grads.append((p, new_grad)) params_and_grads.append((p, new_grad))
# debug log # debug log, not dump all since slow down train process
if i < 10:
logger.debug( logger.debug(
f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}" f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}"
) )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册