未验证 提交 3662fb71 编写于 作者: Z Zeng Jinle 提交者: GitHub

remove eval() calls in Eigen, test=develop (#21498)

上级 a3535812
......@@ -522,7 +522,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
EigenVectorArrayMap<T> inv_var_tmp(running_inv_var_data, C);
ConstEigenVectorArrayMap<T> var_arr(running_variance->data<T>(), C);
inv_var_tmp = (var_arr + epsilon).sqrt().inverse().eval();
inv_var_tmp = (var_arr + epsilon).sqrt().inverse();
inv_var_data = running_inv_var_data;
}
......
......@@ -70,7 +70,7 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> {
if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast).eval() + output_mat;
output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
}
}
};
......@@ -143,8 +143,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
if (d_x) {
y_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_x)
.eval() *
.broadcast(bcast_for_x) *
y_mat;
blas.GEMM(CblasNoTrans, CblasTrans, batch_size, x_dim, y_dim, 1,
y_scale.data<T>(), weight_i.data<T>(), 1, d_x->data<T>());
......@@ -153,8 +152,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
if (d_y || d_weight) {
auto output_vec_y =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_y)
.eval();
.broadcast(bcast_for_y);
x_scale_mat.device(place) = output_vec_y * x_mat;
if (d_y) {
blas.GEMM(CblasNoTrans, CblasNoTrans, batch_size, y_dim, x_dim, 1,
......
......@@ -75,7 +75,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
auto temp = (x_norm <= max_norm).template cast<T>().eval();
auto temp = (x_norm <= max_norm).template cast<T>();
auto scaling = temp + (static_cast<T>(1) - temp) * max_norm / x_norm;
Eigen::array<int, 1> one_dim{{1}};
Eigen::DSizes<int, 1> m_dsize(input->numel());
......
......@@ -491,7 +491,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
sample_size * inv_var_tile_data * inv_var_tile_data *
(ddx_arr.colwise().sum() / sample_size - ddx_arr);
dx_arr = scale_tile_data * dx_arr.eval();
dx_arr = scale_tile_data * dx_arr;
}
if (ddScale) {
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
......@@ -532,7 +532,7 @@ class InstanceNormDoubleGradKernel<platform::CPUDeviceContext, T>
x_sub_mean_mul_invstd_arr *
(dy_arr * x_sub_mean_mul_invstd_arr).colwise().sum() /
sample_size);
first_grad_arr = first_grad_arr.eval() * ddx_arr;
first_grad_arr = first_grad_arr * ddx_arr;
for (int nc = 0; nc < NxC; ++nc) {
int c = nc % C;
dscale_arr(c) += first_grad_arr.colwise().sum()(nc);
......
......@@ -71,7 +71,7 @@ class KLDivLossKernel : public framework::OpKernel<T> {
if ("none" == reduction) {
loss_t.device(place) = output;
} else if ("batchmean" == reduction) {
auto output_sum = output.sum().eval();
auto output_sum = output.sum();
loss_t.device(place) = output_sum / output_sum.constant(n);
} else if ("mean" == reduction) {
loss_t.device(place) = output.mean();
......
......@@ -90,7 +90,7 @@ struct SumGradFunctor {
typename DY, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, DX* dx, DY* dy,
const Dim& dim, int size) {
dx->device(place) = dy->eval().broadcast(dim);
dx->device(place) = dy->broadcast(dim);
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册