From 36e26a53b03ff4e61b45404cb00247a53c05591c Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Wed, 28 Nov 2018 11:53:40 +0800 Subject: [PATCH] Optimize bilinear tensor product op (#14485) * optimize bilinear_tensor_product * add set zero to set grad to 0. --- .../operators/bilinear_tensor_product_op.h | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/operators/bilinear_tensor_product_op.h b/paddle/fluid/operators/bilinear_tensor_product_op.h index f23336f7b98..5017c3a457a 100644 --- a/paddle/fluid/operators/bilinear_tensor_product_op.h +++ b/paddle/fluid/operators/bilinear_tensor_product_op.h @@ -70,7 +70,7 @@ class BilinearTensorProductKernel : public framework::OpKernel { if (bias) { auto bias_vec = EigenMatrix::From(*bias); Eigen::DSizes bcast(batch_size, 1); - output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat; + output_mat.device(place) = bias_vec.broadcast(bcast).eval() + output_mat; } } }; @@ -99,13 +99,13 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { auto d_out_mat = EigenMatrix::From(*d_out); auto& place = *ctx.template device_context().eigen_device(); auto& dev_ctx = ctx.template device_context(); - // Create the intermediate variable to caculate the Output(Y@Grad). + // Create the intermediate variable to calculate the Output(Y@Grad). Tensor x_scale; x_scale.mutable_data(framework::make_ddim({batch_size, x_dim}), ctx.GetPlace()); auto x_scale_mat = EigenMatrix::From(x_scale); - // Create the intermediate variable to caculate the Output(X@Grad). + // Create the intermediate variable to calculate the Output(X@Grad). Tensor y_scale; y_scale.mutable_data(framework::make_ddim({batch_size, y_dim}), ctx.GetPlace()); @@ -113,65 +113,64 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { math::SetConstant set_zero; - // Set Output(X@Grad) be zero. if (d_x) { d_x->mutable_data(ctx.GetPlace()); set_zero(dev_ctx, d_x, static_cast(0)); } - // Set Output(Y@Grad) be zero. if (d_y) { d_y->mutable_data(ctx.GetPlace()); set_zero(dev_ctx, d_y, static_cast(0)); } + if (d_weight) { + d_weight->mutable_data(ctx.GetPlace()); + } + auto blas = math::GetBlas(ctx); // Caculate the Output(X@Grad) and Output(Y@Grad). - if (d_x || d_y) { + if (d_x || d_y || d_weight) { Eigen::DSizes bcast_for_x(1, y_dim); Eigen::DSizes bcast_for_y(1, x_dim); + Eigen::DSizes bcast_for_weight(1, x_dim); + for (int i = 0; i < out_dim; ++i) { Tensor weight_i = weight->Slice(i, i + 1).Resize( framework::make_ddim({x_dim, y_dim})); auto output_vec = d_out_mat.chip(i, 1); + if (d_x) { y_scale_mat.device(place) = output_vec.reshape(Eigen::DSizes(batch_size, 1)) - .broadcast(bcast_for_x) * + .broadcast(bcast_for_x) + .eval() * y_mat; blas.GEMM(CblasNoTrans, CblasTrans, batch_size, x_dim, y_dim, 1, y_scale.data(), weight_i.data(), 1, d_x->data()); } - if (d_y) { - x_scale_mat.device(place) = + + if (d_y || d_weight) { + auto output_vec_y = output_vec.reshape(Eigen::DSizes(batch_size, 1)) - .broadcast(bcast_for_y) * - x_mat; - blas.GEMM(CblasNoTrans, CblasNoTrans, batch_size, y_dim, x_dim, 1, - x_scale.data(), weight_i.data(), 1, d_y->data()); + .broadcast(bcast_for_y) + .eval(); + x_scale_mat.device(place) = output_vec_y * x_mat; + if (d_y) { + blas.GEMM(CblasNoTrans, CblasNoTrans, batch_size, y_dim, x_dim, 1, + x_scale.data(), weight_i.data(), 1, d_y->data()); + } + if (d_weight) { + Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( + framework::make_ddim({x_dim, y_dim})); + blas.GEMM(CblasTrans, CblasNoTrans, x_dim, y_dim, batch_size, 1, + x_scale.data(), y->data(), 0, d_weight_i.data()); + } } } } - // Caculate the gradient of Input(Weight). - if (d_weight) { - d_weight->mutable_data(ctx.GetPlace()); - Eigen::DSizes bcast_for_weight(1, x_dim); - for (int i = 0; i < out_dim; ++i) { - Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize( - framework::make_ddim({x_dim, y_dim})); - auto output_vec = d_out_mat.chip(i, 1); - x_scale_mat.device(place) = - output_vec.reshape(Eigen::DSizes(batch_size, 1)) - .broadcast(bcast_for_weight) * - x_mat; - blas.GEMM(CblasTrans, CblasNoTrans, x_dim, y_dim, batch_size, 1, - x_scale.data(), y->data(), 0, d_weight_i.data()); - } - } - - // Caculate the gradient of Input(Bias). + // calculate the gradient of Input(Bias). if (d_bias) { d_bias->mutable_data(ctx.GetPlace()); auto d_bias_mat = framework::EigenVector::Flatten(*d_bias); -- GitLab