diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 912415192659dc004f54a76e9cd1a20581d512a6..2e31d1c9c708225135e27c93ba94722794c4b282 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -865,8 +865,8 @@ struct SwishGradFunctor : public BaseActivationFunctor { void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = static_cast(1) / (static_cast(1) + (static_cast(-beta) * x).exp()); - auto temp2 = temp1 * (static_cast(1) - (beta * out)); - dx.device(d) = dout * ((beta * out) + temp2); + auto temp2 = temp1 * (static_cast(1) - (static_cast(beta) * out)); + dx.device(d) = dout * ((static_cast(beta) * out) + temp2); } }; diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index a02128c5a54c80ca7ccf9db347cd53f28bbb50f8..39b0c856996c11c6efdb530f1396afd5731c778d 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" -#include #include #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 2d9faed648aef78da60706e13db3862080c96514..f18d09d33e9052929b1ff9b36bb2b371fb513d37 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -92,12 +92,12 @@ class GRUUnitKernel : public framework::OpKernel { gate_data, frame_size * 3); // calculate activited gate - Eigen::array extents({{batch_size, frame_size}}); - Eigen::array u_offsets({{0, 0}}); + Eigen::array extents = {batch_size, frame_size}; + Eigen::array u_offsets = {0, 0}; ActCompute(context.Attr("gate_activation"), place, g.slice(u_offsets, extents), g.slice(u_offsets, extents)); auto u = g.slice(u_offsets, extents); // update gate - Eigen::array r_offsets({{0, frame_size}}); + Eigen::array r_offsets = {0, frame_size}; ActCompute(context.Attr("gate_activation"), place, g.slice(r_offsets, extents), g.slice(r_offsets, extents)); auto r = g.slice(r_offsets, extents); // reset gate @@ -107,7 +107,7 @@ class GRUUnitKernel : public framework::OpKernel { weight_data + frame_size * frame_size * 2, frame_size, 1, gate_data + frame_size * 2, frame_size * 3); - Eigen::array c_offsets({{0, frame_size * 2}}); + Eigen::array c_offsets = {0, frame_size * 2}; ActCompute(context.Attr("activation"), place, g.slice(c_offsets, extents), g.slice(c_offsets, extents)); auto c = g.slice(c_offsets, extents); // output candidate @@ -171,12 +171,12 @@ class GRUUnitGradKernel : public framework::OpKernel { int batch_size = input->dims()[0]; int frame_size = hidden_prev->dims()[1]; - Eigen::array extents({{batch_size, frame_size}}); - Eigen::array u_offsets({{0, 0}}); + Eigen::array extents = {batch_size, frame_size}; + Eigen::array u_offsets = {0, 0}; auto u = g.slice(u_offsets, extents); // update gate - Eigen::array r_offsets({{0, frame_size}}); + Eigen::array r_offsets = {0, frame_size}; auto r = g.slice(r_offsets, extents); // reset gate - Eigen::array c_offsets({{0, frame_size * 2}}); + Eigen::array c_offsets = {0, frame_size * 2}; auto c = g.slice(c_offsets, extents); // output candidate // backward for unactivated update gate diff --git a/paddle/fluid/operators/label_smooth_op.h b/paddle/fluid/operators/label_smooth_op.h index f56fd95e96526c59e040fbbd2812360e59570a08..f3da17de011053fa118b5a4257bb5c3b00084741 100644 --- a/paddle/fluid/operators/label_smooth_op.h +++ b/paddle/fluid/operators/label_smooth_op.h @@ -38,7 +38,8 @@ class LabelSmoothKernel : public framework::OpKernel { auto dist = framework::EigenVector::Flatten(*dist_t); out.device(dev) = static_cast(1 - epsilon) * in + - epsilon * dist.broadcast(Eigen::DSizes(in_t->numel())); + static_cast(epsilon) * + dist.broadcast(Eigen::DSizes(in_t->numel())); } else { out.device(dev) = static_cast(1 - epsilon) * in + static_cast(epsilon / label_dim);