未验证 提交 926b8861 编写于 作者: R ronnywang 提交者: GitHub

[CustomDevice] fix BatchNorm (#53820)

上级 434343c6
......@@ -975,6 +975,29 @@ class BatchNorm(Layer):
)
self._variance.stop_gradient = True
# TODO(qili93): temporary for ascned npu performance to be removed along with npu_identity op
if (
_global_flags()['FLAGS_npu_storage_format']
and 'npu' in get_all_custom_device_type()
):
with no_grad():
weight_trans = _C_ops.npu_identity(
self.weight, 3
) # ACL_FORMAT_NC1HWC0 = 3
bias_trans = _C_ops.npu_identity(
self.bias, 3
) # ACL_FORMAT_NC1HWC0 = 3
mean_trans = _C_ops.npu_identity(
self._mean, 3
) # ACL_FORMAT_NC1HWC0 = 3
var_trans = _C_ops.npu_identity(
self._variance, 3
) # ACL_FORMAT_NC1HWC0 = 3
weight_trans._share_underline_tensor_to(self.weight)
bias_trans._share_underline_tensor_to(self.bias)
mean_trans._share_underline_tensor_to(self._mean)
var_trans._share_underline_tensor_to(self._variance)
self._in_place = in_place
self._data_layout = data_layout
self._momentum = momentum
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册