diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cfec8e70b4a3d166e3b45048408d7f5e45ce6e4..c62cc9bfd70d72d926eeee5eb52a69428855eb9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -302,6 +302,14 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") +if (ON_INFER) + message(STATUS "On inference mode, will take place some specific optimization.") + add_definitions(-DPADDLE_ON_INFERENCE) +else() + #TODO(luotao), combine this warning with `make inference_lib_dist` command. + message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.") +endif() + add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) @@ -312,10 +320,3 @@ if(WITH_DOC) find_python_module(recommonmark REQUIRED) add_subdirectory(doc) endif() - -if (ON_INFER) - message(STATUS "On inference mode, will take place some specific optimization.") -else() - #TODO(luotao), combine this warning with `make inference_lib_dist` command. - message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.") -endif() diff --git a/paddle/fluid/operators/math/softmax.h b/paddle/fluid/operators/math/softmax.h index bf698dc2f753f0002557af07ad7ea976c85edada..089458e957dfaac1cbc3bf1bc2b4be4877e702c9 100644 --- a/paddle/fluid/operators/math/softmax.h +++ b/paddle/fluid/operators/math/softmax.h @@ -19,7 +19,8 @@ namespace paddle { namespace operators { namespace math { -template +template class SoftmaxFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor* X, diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index 7cf98f27251db3cfe5e8e295ed21056f6e5a2963..0f3e5b20086378da8ef1138a5f5c005b724f7fa2 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/blas.h" namespace paddle { namespace operators { namespace math { @@ -32,8 +33,8 @@ struct ValueClip { } }; -template -void SoftmaxFunctor::operator()( +template +void SoftmaxFunctor::operator()( const DeviceContext& context, const framework::Tensor* X, framework::Tensor* Y) { auto logits = EigenMatrix::From(*X); @@ -65,36 +66,46 @@ void SoftmaxFunctor::operator()( .broadcast(one_by_class)); } -template -class SoftmaxFunctor { +template +using enable_if_CPU = typename std::enable_if< + std::is_same::value>::type; + +template +class SoftmaxFunctor> { void operator()(const DeviceContext& context, const framework::Tensor* X, framework::Tensor* Y) { - auto logits = EigenMatrix::From(*X); - auto softmax = EigenMatrix::From(*Y); - + auto in_dims = X->dims(); + auto out_dims = Y->dims(); + const float* in_data = X->data(); + float* out_data = Y->data(); const int kBatchDim = 0; const int kClassDim = 1; - - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); - - Eigen::DSizes along_class(kClassDim); - Eigen::DSizes batch_by_one(batch_size, 1); - Eigen::DSizes one_by_class(1, num_classes); - - auto shifted_logits = (logits - - logits.maximum(along_class) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); - - softmax.device(*context.eigen_device()) = shifted_logits.exp(); - softmax.device(*context.eigen_device()) = (softmax * - softmax.sum(along_class) - .inverse() - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)); + // 2D data. Batch x C + const int batch_size = in_dims[kBatchDim]; + const int num_classes = in_dims[kClassDim]; + std::vector entities(batch_size); + auto blas = math::GetBlas(context); + for (int n = 0; n < batch_size; ++n) { + entities[n] = in_data[n * num_classes]; + for (int c = 1; c < num_classes; ++c) { + entities[n] = in_data[n * num_classes + c] > entities[n] + ? in_data[n * num_classes + c] + : entities[n]; + } + for (int c = 0; c < num_classes; ++c) { + out_data[n * num_classes + c] = + in_data[n * num_classes + c] - entities[n]; + } + } + + blas.VEXP(num_classes * batch_size, out_data, out_data); + for (int n = 0; n < batch_size; ++n) { + entities[n] = out_data[n * num_classes]; + for (int c = 1; c < num_classes; ++c) { + entities[n] += out_data[n * num_classes + c]; + } + blas.SCAL(num_classes, 1.0f / entities[n], &out_data[n * num_classes]); + } } }; diff --git a/paddle/fluid/operators/softmax_op.h b/paddle/fluid/operators/softmax_op.h index 2fea8a65bc5141b11549ef400f11b54278be35f9..8eb5c7691efe930e9f79ad6a381cb290107d1a14 100644 --- a/paddle/fluid/operators/softmax_op.h +++ b/paddle/fluid/operators/softmax_op.h @@ -35,8 +35,10 @@ class SoftmaxKernel : public framework::OpKernel { Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1); Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); -#ifdef ON_INFER - math::SoftmaxFunctor()( +#ifdef PADDLE_ON_INFERENCE + math::SoftmaxFunctor< + DeviceContext, T, + std::is_same::value>()( context.template device_context(), &X_2d, &Out_2d); #else math::SoftmaxFunctor()(