未验证 提交 3662fb71 编写于 作者: Z Zeng Jinle 提交者: GitHub

remove eval() calls in Eigen, test=develop (#21498)

上级 a3535812
...@@ -522,7 +522,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T> ...@@ -522,7 +522,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
EigenVectorArrayMap<T> inv_var_tmp(running_inv_var_data, C); EigenVectorArrayMap<T> inv_var_tmp(running_inv_var_data, C);
ConstEigenVectorArrayMap<T> var_arr(running_variance->data<T>(), C); ConstEigenVectorArrayMap<T> var_arr(running_variance->data<T>(), C);
inv_var_tmp = (var_arr + epsilon).sqrt().inverse().eval(); inv_var_tmp = (var_arr + epsilon).sqrt().inverse();
inv_var_data = running_inv_var_data; inv_var_data = running_inv_var_data;
} }
......
...@@ -70,7 +70,7 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> { ...@@ -70,7 +70,7 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> {
if (bias) { if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias); auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1); Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast).eval() + output_mat; output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
} }
} }
}; };
...@@ -143,8 +143,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> { ...@@ -143,8 +143,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
if (d_x) { if (d_x) {
y_scale_mat.device(place) = y_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1)) output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_x) .broadcast(bcast_for_x) *
.eval() *
y_mat; y_mat;
blas.GEMM(CblasNoTrans, CblasTrans, batch_size, x_dim, y_dim, 1, blas.GEMM(CblasNoTrans, CblasTrans, batch_size, x_dim, y_dim, 1,
y_scale.data<T>(), weight_i.data<T>(), 1, d_x->data<T>()); y_scale.data<T>(), weight_i.data<T>(), 1, d_x->data<T>());
...@@ -153,8 +152,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> { ...@@ -153,8 +152,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
if (d_y || d_weight) { if (d_y || d_weight) {
auto output_vec_y = auto output_vec_y =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1)) output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_y) .broadcast(bcast_for_y);
.eval();
x_scale_mat.device(place) = output_vec_y * x_mat; x_scale_mat.device(place) = output_vec_y * x_mat;
if (d_y) { if (d_y) {
blas.GEMM(CblasNoTrans, CblasNoTrans, batch_size, y_dim, x_dim, 1, blas.GEMM(CblasNoTrans, CblasNoTrans, batch_size, y_dim, x_dim, 1,
......
...@@ -75,7 +75,7 @@ class ClipByNormKernel : public framework::OpKernel<T> { ...@@ -75,7 +75,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
auto temp = (x_norm <= max_norm).template cast<T>().eval(); auto temp = (x_norm <= max_norm).template cast<T>();
auto scaling = temp + (static_cast<T>(1) - temp) * max_norm / x_norm; auto scaling = temp + (static_cast<T>(1) - temp) * max_norm / x_norm;
Eigen::array<int, 1> one_dim{{1}}; Eigen::array<int, 1> one_dim{{1}};
Eigen::DSizes<int, 1> m_dsize(input->numel()); Eigen::DSizes<int, 1> m_dsize(input->numel());
......
...@@ -491,7 +491,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T> ...@@ -491,7 +491,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
sample_size * inv_var_tile_data * inv_var_tile_data * sample_size * inv_var_tile_data * inv_var_tile_data *
(ddx_arr.colwise().sum() / sample_size - ddx_arr); (ddx_arr.colwise().sum() / sample_size - ddx_arr);
dx_arr = scale_tile_data * dx_arr.eval(); dx_arr = scale_tile_data * dx_arr;
} }
if (ddScale) { if (ddScale) {
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C); ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
...@@ -532,7 +532,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T> ...@@ -532,7 +532,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
x_sub_mean_mul_invstd_arr * x_sub_mean_mul_invstd_arr *
(dy_arr * x_sub_mean_mul_invstd_arr).colwise().sum() / (dy_arr * x_sub_mean_mul_invstd_arr).colwise().sum() /
sample_size); sample_size);
first_grad_arr = first_grad_arr.eval() * ddx_arr; first_grad_arr = first_grad_arr * ddx_arr;
for (int nc = 0; nc < NxC; ++nc) { for (int nc = 0; nc < NxC; ++nc) {
int c = nc % C; int c = nc % C;
dscale_arr(c) += first_grad_arr.colwise().sum()(nc); dscale_arr(c) += first_grad_arr.colwise().sum()(nc);
......
...@@ -71,7 +71,7 @@ class KLDivLossKernel : public framework::OpKernel<T> { ...@@ -71,7 +71,7 @@ class KLDivLossKernel : public framework::OpKernel<T> {
if ("none" == reduction) { if ("none" == reduction) {
loss_t.device(place) = output; loss_t.device(place) = output;
} else if ("batchmean" == reduction) { } else if ("batchmean" == reduction) {
auto output_sum = output.sum().eval(); auto output_sum = output.sum();
loss_t.device(place) = output_sum / output_sum.constant(n); loss_t.device(place) = output_sum / output_sum.constant(n);
} else if ("mean" == reduction) { } else if ("mean" == reduction) {
loss_t.device(place) = output.mean(); loss_t.device(place) = output.mean();
......
...@@ -90,7 +90,7 @@ struct SumGradFunctor { ...@@ -90,7 +90,7 @@ struct SumGradFunctor {
typename DY, typename Dim> typename DY, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy, void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy,
const Dim& dim, int size) { const Dim& dim, int size) {
dx->device(place) = dy->eval().broadcast(dim); dx->device(place) = dy->broadcast(dim);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册