diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index cff8cdd8f59fcdfe8ca6b96973f2021ad56cb900..c73dfd65e76a563ef697b56c517860e42a35ff96 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -131,8 +131,10 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { if (axis != -1 && axis != rank - 1) { X_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); Out_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); - TransCompute(rank, dev_ctx, *X, &X_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); + TransCompute(rank, dev_ctx, *X, &X_trans, + perm); + TransCompute(rank, dev_ctx, *Out, + &Out_trans, perm); auto dims = X_trans.dims(); auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); X_2d.ShareDataWith(X_trans).Resize(flattened_dims); @@ -202,7 +204,8 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { } if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, Out_trans, Out, perm); + TransCompute(rank, dev_ctx, Out_trans, Out, + perm); } } }; @@ -241,9 +244,12 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { dX_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); Out_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); dOut_trans.mutable_data(framework::make_ddim(shape), ctx.GetPlace()); - TransCompute(rank, dev_ctx, *dX, &dX_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); - TransCompute(rank, dev_ctx, *dOut, &dOut_trans, perm); + TransCompute(rank, dev_ctx, *dX, &dX_trans, + perm); + TransCompute(rank, dev_ctx, *Out, + &Out_trans, perm); + TransCompute(rank, dev_ctx, *dOut, + &dOut_trans, perm); auto dims = dX_trans.dims(); auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); dX_2d.ShareDataWith(dX_trans).Resize(flattened_dims); @@ -308,7 +314,8 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { stream(stream::kind::eager).submit(pipeline).wait(); if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, dX_trans, dX, perm); + TransCompute(rank, dev_ctx, dX_trans, dX, + perm); } } }; diff --git a/paddle/fluid/operators/softmax_cudnn_op.cu.cc b/paddle/fluid/operators/softmax_cudnn_op.cu.cc index dc5b7bb0af4a484bc97821bf65f12c82340dd273..9e24c76793cb8e4b915db1af1710477e95d5ceb6 100644 --- a/paddle/fluid/operators/softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/softmax_cudnn_op.cu.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/operators/softmax_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/softmax_op.h" namespace paddle { namespace operators { @@ -25,7 +25,8 @@ template class SoftmaxCUDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto& dev_ctx = context.template device_context(); + auto& dev_ctx = + context.template device_context(); auto* X = context.Input("X"); auto* Out = context.Output("Out"); const int axis = context.Attr("axis"); @@ -41,9 +42,12 @@ class SoftmaxCUDNNKernel : public framework::OpKernel { Tensor X_trans, Out_trans; if (axis != -1 && axis != rank - 1) { X_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - Out_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - TransCompute(rank, dev_ctx, *X, &X_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); + Out_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); + TransCompute(rank, dev_ctx, *X, &X_trans, + perm); + TransCompute(rank, dev_ctx, *Out, + &Out_trans, perm); X_2d = framework::ReshapeToMatrix(X_trans, rank - 1); Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1); } else { @@ -52,11 +56,12 @@ class SoftmaxCUDNNKernel : public framework::OpKernel { } math::SoftmaxCUDNNFunctor()( - context.template device_context(), - &X_2d, &Out_2d); + context.template device_context(), &X_2d, + &Out_2d); if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, Out_trans, Out, perm); + TransCompute(rank, dev_ctx, Out_trans, + Out, perm); } } }; @@ -65,7 +70,8 @@ template class SoftmaxGradCUDNNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto& dev_ctx = context.template device_context(); + auto& dev_ctx = + context.template device_context(); auto* Out = context.Input("Out"); auto* dOut = context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); @@ -82,11 +88,16 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel { Tensor dX_trans, Out_trans, dOut_trans; if (axis != -1 && axis != rank - 1) { dX_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - Out_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - dOut_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - TransCompute(rank, dev_ctx, *dX, &dX_trans, perm); - TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); - TransCompute(rank, dev_ctx, *dOut, &dOut_trans, perm); + Out_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); + dOut_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); + TransCompute(rank, dev_ctx, *dX, + &dX_trans, perm); + TransCompute(rank, dev_ctx, *Out, + &Out_trans, perm); + TransCompute(rank, dev_ctx, *dOut, + &dOut_trans, perm); dX_2d = framework::ReshapeToMatrix(dX_trans, rank - 1); Out_2d = framework::ReshapeToMatrix(Out_trans, rank - 1); dOut_2d = framework::ReshapeToMatrix(dOut_trans, rank - 1); @@ -97,11 +108,12 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel { } math::SoftmaxGradCUDNNFunctor()( - context.template device_context(), - &Out_2d, &dOut_2d, &dX_2d); + context.template device_context(), &Out_2d, + &dOut_2d, &dX_2d); if (axis != -1 && axis != rank - 1) { - TransCompute(rank, dev_ctx, dX_trans, dX, perm); + TransCompute(rank, dev_ctx, dX_trans, dX, + perm); } } }; diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 02f256fa64427a900cb73cc08ef3e737388fbae1..f04c5db9e115d79b8e4412d69ca4837cf5aaaa57 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/softmax_op.h" +#include #include #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h index 1810b23e0d456245a0a6e5bbec4c9e36850a433f..10b3f63339fcbf4919e127f2bf5d3fd62e00f5dc 100644 --- a/paddle/fluid/operators/softmax_op.h +++ b/paddle/fluid/operators/softmax_op.h @@ -24,7 +24,8 @@ namespace operators { using Tensor = framework::Tensor; static inline void CalcTransPermAndShapeByAxis(const Tensor& x, const int axis, - std::vector* perm, std::vector* shape) { + std::vector* perm, + std::vector* shape) { auto dim_x = x.dims(); int rank = dim_x.size(); @@ -65,7 +66,8 @@ class SoftmaxKernel : public framework::OpKernel { Tensor X_trans, Out_trans; if (axis != -1 && axis != rank - 1) { X_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - Out_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); + Out_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); TransCompute(rank, dev_ctx, *X, &X_trans, perm); TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); X_2d = framework::ReshapeToMatrix(X_trans, rank - 1); @@ -75,7 +77,6 @@ class SoftmaxKernel : public framework::OpKernel { Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); } - #ifdef PADDLE_ON_INFERENCE math::SoftmaxFunctor()( context.template device_context(), &X_2d, &Out_2d); @@ -111,8 +112,10 @@ class SoftmaxGradKernel : public framework::OpKernel { Tensor dX_trans, Out_trans, dOut_trans; if (axis != -1 && axis != rank - 1) { dX_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - Out_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); - dOut_trans.mutable_data(framework::make_ddim(shape), context.GetPlace()); + Out_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); + dOut_trans.mutable_data(framework::make_ddim(shape), + context.GetPlace()); TransCompute(rank, dev_ctx, *dX, &dX_trans, perm); TransCompute(rank, dev_ctx, *Out, &Out_trans, perm); TransCompute(rank, dev_ctx, *dOut, &dOut_trans, perm); diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 273d74ca6ed19af945cc83545c42e90ef9656c4c..276344df58e2e5060e10a4e352a2868b8f6b10a8 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1872,10 +1872,8 @@ def softmax(input, use_cudnn=False, name=None, axis=-1): type="softmax", inputs={"X": input}, outputs={"Out": softmax_out}, - attrs={ - "axis": axis, - "use_cudnn": use_cudnn - }) + attrs={"axis": axis, + "use_cudnn": use_cudnn}) return softmax_out