From b5c410a290418962836b9f554c4f41edad33a772 Mon Sep 17 00:00:00 2001 From: lijianshe02 <48898730+lijianshe02@users.noreply.github.com> Date: Fri, 31 May 2019 09:55:51 +0800 Subject: [PATCH] =?UTF-8?q?add=20ops=20and=20kernels=20that=20mul,=20scale?= =?UTF-8?q?,=20fc,=20relu,=20softmax,=20dropout,=20elem=E2=80=A6=20(#17711?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix conflicts * fix kernel registry realted bugs test=develop --- paddle/fluid/framework/framework.proto | 2 +- paddle/fluid/lite/api/CMakeLists.txt | 12 +- paddle/fluid/lite/api/cxx_api_test.cc | 31 ++++- paddle/fluid/lite/core/context.h | 6 +- .../core/mir/runtime_context_assign_pass.cc | 13 +- paddle/fluid/lite/core/op_registry.cc | 4 + paddle/fluid/lite/kernels/x86/CMakeLists.txt | 15 ++- .../fluid/lite/kernels/x86/dropout_compute.cc | 87 ++++++++++++++ .../lite/kernels/x86/elementwise_compute.cc | 36 +++++- paddle/fluid/lite/kernels/x86/fc_compute.cc | 112 ++++++++++++++++++ .../lite/kernels/x86/fill_constant_compute.cc | 2 +- paddle/fluid/lite/kernels/x86/mean_compute.cc | 4 +- paddle/fluid/lite/kernels/x86/mul_compute.cc | 4 +- paddle/fluid/lite/kernels/x86/relu_compute.cc | 56 +++++++++ .../fluid/lite/kernels/x86/scale_compute.cc | 62 ++++++++++ .../fluid/lite/kernels/x86/softmax_compute.cc | 90 ++++++++++++++ paddle/fluid/lite/operators/CMakeLists.txt | 7 +- paddle/fluid/lite/operators/dropout_op.cc | 75 ++++++++++++ .../fluid/lite/operators/elementwise_ops.cc | 6 +- paddle/fluid/lite/operators/fc_op_test.cc | 8 +- paddle/fluid/lite/operators/op_params.h | 58 ++++++++- paddle/fluid/lite/operators/relu_op.cc | 5 +- paddle/fluid/lite/operators/relu_op.h | 2 +- paddle/fluid/lite/utils/varient.h | 3 +- paddle/fluid/memory/CMakeLists.txt | 3 +- 25 files changed, 668 insertions(+), 35 deletions(-) create mode 100644 paddle/fluid/lite/kernels/x86/dropout_compute.cc create mode 100644 paddle/fluid/lite/kernels/x86/fc_compute.cc create mode 100644 paddle/fluid/lite/kernels/x86/relu_compute.cc create mode 100644 paddle/fluid/lite/kernels/x86/scale_compute.cc create mode 100644 paddle/fluid/lite/kernels/x86/softmax_compute.cc create mode 100644 paddle/fluid/lite/operators/dropout_op.cc diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index efdabffb9b3..6c60a041a19 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ syntax = "proto2"; -option optimize_for = LITE_RUNTIME; +// option optimize_for = LITE_RUNTIME; package paddle.framework.proto; // Any incompatible changes to ProgramDesc and its dependencies should diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index d39950f2a03..689eb658ae1 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") -# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc -# DEPS cxx_api_lite model_parser_lite target_wrapper_host -# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model -# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) +if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) +lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc + DEPS cxx_api_lite model_parser_lite target_wrapper_host + ${ops_lite} ${host_kernels} ${x86_kernels} + ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model + --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) +add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) +endif(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) if(WITH_TESTING) lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 7a73982e9f7..dbc364e1dde 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -32,7 +32,8 @@ namespace lite { TEST(CXXApi, test) { lite::ExecutorLite predictor; #ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kX86), PRECISION(kFloat)}}); #else std::vector valid_places({ Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, @@ -44,7 +45,8 @@ TEST(CXXApi, test) { }); #endif - predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, + predictor.Build(FLAGS_model_dir, + Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda valid_places); auto* input_tensor = predictor.GetInput(0); @@ -69,7 +71,8 @@ TEST(CXXApi, test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi, save_model) { lite::ExecutorLite predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, valid_places); @@ -78,7 +81,7 @@ TEST(CXXApi, save_model) { #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -TEST(CXXTrainer, train) { +/*TEST(CXXTrainer, train) { Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); std::vector valid_places({prefer_place}); auto scope = std::make_shared(); @@ -108,7 +111,7 @@ TEST(CXXTrainer, train) { data0[0] = 0; exe.Run(); -} +}*/ #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK } // namespace lite @@ -116,13 +119,31 @@ TEST(CXXTrainer, train) { USE_LITE_OP(mul); USE_LITE_OP(fc); +USE_LITE_OP(relu); USE_LITE_OP(scale); USE_LITE_OP(feed); USE_LITE_OP(fetch); USE_LITE_OP(io_copy); +USE_LITE_OP(elementwise_add) +USE_LITE_OP(elementwise_sub) +USE_LITE_OP(square) +USE_LITE_OP(softmax) +USE_LITE_OP(dropout) USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); +#ifdef LITE_WITH_X86 +USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def); +#endif + #ifdef LITE_WITH_CUDA USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); diff --git a/paddle/fluid/lite/core/context.h b/paddle/fluid/lite/core/context.h index e09a03f55bd..9d5decfdbed 100644 --- a/paddle/fluid/lite/core/context.h +++ b/paddle/fluid/lite/core/context.h @@ -95,7 +95,11 @@ struct CUDAContext { #ifdef LITE_WITH_X86 struct X86Context { // overall information - + X86Context() { + x86_device_context.reset(new ::paddle::platform::CPUDeviceContext); + x86_execution_context.reset( + new ::paddle::framework::ExecutionContext(*x86_device_context)); + } // kernel information // legacy info. diff --git a/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc b/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc index ecca00e33c5..f7c983b675f 100644 --- a/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc +++ b/paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc @@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass { auto& inst = node.AsStmt(); switch (inst.picked_kernel().target()) { case TARGET(kHost): - case TARGET(kX86): inst.picked_kernel().SetContext(NewHostContext()); break; +#ifdef LITE_WITH_X86 + case TARGET(kX86): + inst.picked_kernel().SetContext(NewX86Context()); + break; +#endif #ifdef LITE_WITH_CUDA case TARGET(kCUDA): inst.picked_kernel().SetContext(NewCudaContext()); @@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass { return ctx; } +#ifdef LITE_WITH_X86 + std::unique_ptr NewX86Context() { + std::unique_ptr ctx(new KernelContext); + ctx->As(); + return ctx; + } +#endif #ifdef LITE_WITH_ARM std::unique_ptr NewARMContext() { diff --git a/paddle/fluid/lite/core/op_registry.cc b/paddle/fluid/lite/core/op_registry.cc index 94d487d724b..8c3e44733df 100644 --- a/paddle/fluid/lite/core/op_registry.cc +++ b/paddle/fluid/lite/core/op_registry.cc @@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry() INIT_FOR(kHost, kAny, kNCHW); INIT_FOR(kHost, kAny, kAny); + INIT_FOR(kX86, kFloat, kNCHW); + INIT_FOR(kX86, kAny, kNCHW); + INIT_FOR(kX86, kAny, kAny); + INIT_FOR(kARM, kFloat, kNCHW); INIT_FOR(kARM, kAny, kNCHW); INIT_FOR(kARM, kAny, kAny); diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index 4fd44525a40..aa9896b0c26 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86) endif() cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) -cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps}) cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps}) cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps}) -cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps}) cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps}) +cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps}) +cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps}) +cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps}) +cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps}) +cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op) +cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax) +cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} ) + set(x86_kernels activation_compute_x86 elementwise_compute_x86 mean_compute_x86 fill_constant_compute_x86 mul_compute_x86 + relu_compute_x86 + fc_compute_x86 + scale_compute_x86 + softmax_compute_x86 + dropout_compute_x86 ) set(x86_kernels "${x86_kernels}" CACHE INTERNAL "x86 kernels") diff --git a/paddle/fluid/lite/kernels/x86/dropout_compute.cc b/paddle/fluid/lite/kernels/x86/dropout_compute.cc new file mode 100644 index 00000000000..d762ec2a06f --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/dropout_compute.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +using EigenMatrix = framework::EigenMatrix; + +template +class DropoutCompute : public KernelLite { + public: + using param_t = operators::DropoutParam; + void Run() override { + auto& param = *param_.get_mutable(); + const auto* x_data = param.x->data(); + auto* out_data = param.output->template mutable_data(); + if (!param.is_test) { + auto* mask_data = param.mask->template mutable_data(); + std::random_device rnd; + std::minstd_rand engine; + int seed = param.fix_seed ? param.seed : rnd(); + engine.seed(seed); + std::uniform_real_distribution dist(0, 1); + + size_t size = framework::product(param.mask->dims().data()); + for (size_t i = 0; i < size; ++i) { + if (dist(engine) < param.dropout_prob) { + mask_data[i] = 0; + out_data[i] = 0; + } else { + if (param.dropout_implementation == "upscale_in_train") { + mask_data[i] = 1.0f / static_cast(1.0f - param.dropout_prob); + out_data[i] = x_data[i] / static_cast(1.0f - param.dropout_prob); + } else { + mask_data[i] = 1; + out_data[i] = x_data[i]; + } + } + } + } else { + auto X = EigenMatrix::Reshape(param.x->raw_tensor(), 1); + auto Y = EigenMatrix::Reshape(param.output->raw_tensor(), 1); + auto& place = *platform::CPUDeviceContext().eigen_device(); + if (param.dropout_implementation == "upscale_in_train") { + Y.device(place) = X; + } else { + Y.device(place) = X * static_cast(1.0f - param.dropout_prob); + } + } + } + + virtual ~DropoutCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::DropoutCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Mask", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc index 9e9b7a86b39..b1326ee730f 100644 --- a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc +++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc @@ -30,6 +30,11 @@ struct SubFunctor { inline HOSTDEVICE T operator()(T a, T b) const { return a - b; } }; +template +struct AddFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return a + b; } +}; + template class ElementwiseSubCompute : public KernelLite { @@ -67,10 +72,9 @@ class ElementwiseSubGradCompute : public KernelLite { public: using param_t = operators::ElementwiseGradParam; - void Run() override { auto& param = *param_.get_mutable(); - auto& context = context_->As(); + auto& context = ctx_->As(); CHECK(context.x86_device_context); param.X_grad->template mutable_data(); @@ -89,6 +93,26 @@ class ElementwiseSubGradCompute virtual ~ElementwiseSubGradCompute() = default; }; +template +class ElementwiseAddCompute + : public KernelLite { + public: + using param_t = operators::ElementwiseParam; + void Run() override { + auto& param = *param_.get_mutable(); + auto& context = ctx_->As(); + CHECK(context.x86_device_context); + param.Out->template mutable_data(); + paddle::operators::ElementwiseComputeEx, + platform::CPUDeviceContext, T>( + *context.x86_execution_context, ¶m.X->raw_tensor(), + ¶m.Y->raw_tensor(), param.axis, AddFunctor(), + ¶m.Out->raw_tensor()); + } + + virtual ~ElementwiseAddCompute() = default; +}; + } // namespace x86 } // namespace kernels } // namespace lite @@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW, .BindOutput(paddle::framework::GradVarName("Y"), {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); + +REGISTER_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::ElementwiseAddCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/fc_compute.cc b/paddle/fluid/lite/kernels/x86/fc_compute.cc new file mode 100644 index 00000000000..c89f0f19dad --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/fc_compute.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/fc_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +void fc_compute_eigen(const T* x, int x_w, int x_h, // + const T* w, int w_w, int w_h, // + const T* b, // + T* out) { + using matrix_t = + Eigen::Matrix; + + Eigen::Map X(x, x_h, x_w); + Eigen::Map W(w, w_h, w_w); + Eigen::Map Out(out, x_h, w_h); + + Out = X * W.transpose(); + + if (b) { + Eigen::Map> B(b, w_h); + Out = Out.array().rowwise() + B.transpose().array(); + } +} + +template +__attribute__((optimize("unroll-loops"))) // +T dot(const T* x, const T* y, int dim) { + T out{}; + for (int i = 0; i < dim; i++) { + out += x[i] * y[i]; + } + return out; +} + +template +void fc_compute_naive(const T* x, int x_w, int x_h, // + const T* w, int w_w, int w_h, // + const T* b, // + T* out) { + CHECK_EQ(x_w, w_w); + // out shape: (x_h, w_w) + memset(out, 0, x_h * w_h * sizeof(T)); + + for (int r = 0; r < x_h; r++) { + for (int c = 0; c < w_h; c++) { + out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c]; + } + } +} + +template +class FcCompute : public KernelLite { + public: + using param_t = operators::FcParam; + + void Run() override { + auto& param = *param_.get_mutable(); + CHECK_GE(param.input->dims().size(), 2UL); + CHECK_EQ(param.output->dims().size(), 2UL); + + fc_compute_eigen( + param.input->data(), // x + param.input->dims().Slice(0, param.in_num_col_dims).production(), + param.input->dims() + .Slice(param.in_num_col_dims, param.input->dims().size()) + .production(), + param.w->data(), // w + param.w->dims()[1], // w_w + param.w->dims()[0], // w_h + param.bias->data(), // b + param.output->mutable_data()); + } + + virtual ~FcCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(fc, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::FcCompute, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("W", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/fill_constant_compute.cc b/paddle/fluid/lite/kernels/x86/fill_constant_compute.cc index 3f8a3fe11c4..d0b03c78ee0 100644 --- a/paddle/fluid/lite/kernels/x86/fill_constant_compute.cc +++ b/paddle/fluid/lite/kernels/x86/fill_constant_compute.cc @@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite { void Run() override { auto& param = *param_.get_mutable(); - auto& context = context_->As(); + auto& context = ctx_->As(); CHECK(context.x86_device_context); param.Out->template mutable_data(); diff --git a/paddle/fluid/lite/kernels/x86/mean_compute.cc b/paddle/fluid/lite/kernels/x86/mean_compute.cc index f1dbc4d53fc..95cb0c89e03 100644 --- a/paddle/fluid/lite/kernels/x86/mean_compute.cc +++ b/paddle/fluid/lite/kernels/x86/mean_compute.cc @@ -37,7 +37,7 @@ class MeanCompute : public KernelLite { void Run() override { auto& param = *param_.get_mutable(); - auto& context = context_->As(); + auto& context = ctx_->As(); CHECK(context.x86_device_context); param.Out->template mutable_data(); @@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite { void Run() override { auto& param = *param_.get_mutable(); - auto& context = context_->As(); + auto& context = ctx_->As(); CHECK_EQ(param.Out_grad->raw_tensor().numel(), 1); CHECK(context.x86_device_context); diff --git a/paddle/fluid/lite/kernels/x86/mul_compute.cc b/paddle/fluid/lite/kernels/x86/mul_compute.cc index f0c962347fb..a099a2fdf13 100644 --- a/paddle/fluid/lite/kernels/x86/mul_compute.cc +++ b/paddle/fluid/lite/kernels/x86/mul_compute.cc @@ -30,7 +30,7 @@ class MulCompute : public KernelLite { using param_t = operators::MulParam; void Run() override { - auto& context = context_->As(); + auto& context = ctx_->As(); auto& param = *param_.get_mutable(); CHECK(context.x86_device_context); @@ -68,7 +68,7 @@ template class MulGradCompute : public KernelLite { public: void Run() override { - auto& context = context_->As(); + auto& context = ctx_->As(); auto& param = *param_.get_mutable(); CHECK(context.x86_device_context); diff --git a/paddle/fluid/lite/kernels/x86/relu_compute.cc b/paddle/fluid/lite/kernels/x86/relu_compute.cc new file mode 100644 index 00000000000..44b1f525ab0 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/relu_compute.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/relu_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class ReluCompute : public KernelLite { + public: + using param_t = operators::ReluParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto n = param.input->dims().production(); + const float* input = param.input->data(); + float* output = param.output->mutable_data(); + for (int i = 0; i < n; i++) { + output[i] = std::max(0.f, input[i]); + } + } + + virtual ~ReluCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(relu, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::ReluCompute, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/scale_compute.cc b/paddle/fluid/lite/kernels/x86/scale_compute.cc new file mode 100644 index 00000000000..0135a6f614e --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/scale_compute.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/relu_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +void scale_compute(const T* x, T* out, int size, float scale, float bias, + bool bias_before) { + if (bias_before) bias *= scale; + for (int i = 0; i < size; i++) { + out[i] = x[i] * scale + bias; + } +} + +template +class ScaleCompute : public KernelLite { + public: + using param_t = operators::ScaleParam; + + void Run() override { + auto& param = *param_.get_mutable(); + scale_compute(param.x->data(), param.output->mutable_data(), + param.x->dims().production(), param.scale, param.bias, + param.bias_after_scale); + } + + virtual ~ScaleCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(scale, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::ScaleCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/softmax_compute.cc b/paddle/fluid/lite/kernels/x86/softmax_compute.cc new file mode 100644 index 00000000000..fe408aa3c84 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/softmax_compute.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/math/softmax.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +static inline int CanonicalAxis(const int axis, const int rank) { + if (axis < 0) { + return axis + rank; + } + return axis; +} + +static inline int SizeToAxis(const int axis, lite::DDim dims) { + int size = 1; + for (int i = 0; i < axis; i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeFromAxis(const int axis, lite::DDim dims) { + int size = 1; + for (int i = axis; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +template +class SoftmaxCompute : public KernelLite { + public: + using param_t = operators::SoftmaxParam; + + void Run() override { + auto& param = *param_.get_mutable(); + // auto& context = context_->As(); + CHECK(param.output); + CHECK(param.x); + const int rank = param.x->dims().size(); + const int axis = CanonicalAxis(param.axis, rank); + int axis_dim = param.x->dims()[axis]; + const int n = SizeToAxis(axis, param.x->dims()); + const int d = SizeFromAxis(axis, param.x->dims()); + std::vector shape{n, d}; + + lite::Tensor input_2d, out_2d; + input_2d.ShareDataWith(*param.x); + input_2d.Resize(lite::DDim(shape)); + out_2d.ShareDataWith(*param.output); + out_2d.Resize(lite::DDim(shape)); + + paddle::operators::math::SoftmaxFunctor()( + platform::CPUDeviceContext(), axis_dim, &input_2d.raw_tensor(), + &out_2d.raw_tensor()); + } + + virtual ~SoftmaxCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::SoftmaxCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/operators/CMakeLists.txt b/paddle/fluid/lite/operators/CMakeLists.txt index fe690973b60..13b1c9f7668 100644 --- a/paddle/fluid/lite/operators/CMakeLists.txt +++ b/paddle/fluid/lite/operators/CMakeLists.txt @@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS}) cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS}) cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS}) #cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS}) - cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite) +cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS}) + set(ops_lite fc_op_lite relu_op_lite @@ -27,7 +28,9 @@ set(ops_lite elementwise_ops_lite mean_op_lite fill_constant_op_lite + activation_ops_lite + dropout_op_lite PARENT_SCOPE) -lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite) +lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite fc_compute_x86) lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite) diff --git a/paddle/fluid/lite/operators/dropout_op.cc b/paddle/fluid/lite/operators/dropout_op.cc new file mode 100644 index 00000000000..cc0761b2bc7 --- /dev/null +++ b/paddle/fluid/lite/operators/dropout_op.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +class DropoutOpLite : public OpLite { + public: + explicit DropoutOpLite(const std::string& type) : OpLite(type) {} + + bool CheckShape() const override { + CHECK_OR_FALSE(param_.x); + return true; + } + + bool InferShape() const override { + const auto x_dims = param_.x->dims(); + param_.output->Resize(x_dims); + if (param_.is_test == false) { + param_.mask->Resize(x_dims); + } + // share LoD + // param_.output->set_lod(param_.input->lod()); + return true; + } + + void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } + // TODO(Superjomn) replace framework::OpDesc with a lite one. + bool AttachImpl(const OpDesc& op_desc, lite::Scope* scope) override { + auto input = op_desc.Input("X").front(); + auto out = op_desc.Output("Out").front(); + auto Mask = op_desc.Output("Mask").front(); + + param_.x = GetVar(scope, input); + param_.output = GetMutableVar(scope, out); + param_.mask = GetMutableVar(scope, Mask); + + param_.dropout_prob = boost::get(op_desc.GetAttr("dropout_prob")); + if (op_desc.HasAttr("axis")) { + param_.is_test = boost::get(op_desc.GetAttr("is_test")); + } + param_.fix_seed = boost::get(op_desc.GetAttr("fix_seed")); + param_.seed = boost::get(op_desc.GetAttr("seed")); + param_.dropout_implementation = + boost::get(op_desc.GetAttr("dropout_implementation")); + return true; + } + + std::string DebugString() const override { return "dropout"; } + + private: + mutable DropoutParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(dropout, paddle::lite::operators::DropoutOpLite); diff --git a/paddle/fluid/lite/operators/elementwise_ops.cc b/paddle/fluid/lite/operators/elementwise_ops.cc index 9e1267ad01f..0ca89cccf30 100644 --- a/paddle/fluid/lite/operators/elementwise_ops.cc +++ b/paddle/fluid/lite/operators/elementwise_ops.cc @@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite { param_.X = GetVar(scope, X_name); param_.Y = GetVar(scope, Y_name); - param_.Out = GetMutableVar(scope, Out_name); - param_.axis = GetAttr(opdesc.GetAttr("axis")); - + param_.Out = GetMutableVar(scope, Out_name); + param_.axis = boost::get(opdesc.GetAttr("axis")); return true; } @@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp); REGISTER_LITE_OP(elementwise_sub_grad, paddle::lite::operators::ElementwiseGradExplicitOp); #endif +REGISTER_LITE_OP(elementwise_add, paddle::lite::operators::ElementwiseOp); diff --git a/paddle/fluid/lite/operators/fc_op_test.cc b/paddle/fluid/lite/operators/fc_op_test.cc index bdeb5bb9476..0d12024acab 100644 --- a/paddle/fluid/lite/operators/fc_op_test.cc +++ b/paddle/fluid/lite/operators/fc_op_test.cc @@ -57,10 +57,16 @@ TEST(fc_op_lite, test) { FcOpLite fc("fc"); - fc.SetValidPlaces({Place{TARGET(kHost), PRECISION(kFloat)}}); + fc.SetValidPlaces({Place{TARGET(kX86), PRECISION(kFloat)}}); fc.Attach(desc, &scope); + auto kernels = fc.CreateKernels({Place{TARGET(kX86), PRECISION(kFloat)}}); + ASSERT_FALSE(kernels.empty()); } } // namespace operators } // namespace lite } // namespace paddle +#ifdef LITE_WITH_X86 + +USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); +#endif diff --git a/paddle/fluid/lite/operators/op_params.h b/paddle/fluid/lite/operators/op_params.h index d3093c48894..d65a096d2d8 100644 --- a/paddle/fluid/lite/operators/op_params.h +++ b/paddle/fluid/lite/operators/op_params.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/framework.pb.h" @@ -94,14 +95,67 @@ struct ScaleParam { bool bias_after_scale{true}; }; -// For Softmax Op +// For Softmax op struct SoftmaxParam { lite::Tensor* x{}; lite::Tensor* output{}; - int axis{-1}; }; +// For Convolution op +struct ConvParam { + lite::Tensor* x{}; + lite::Tensor* filter{}; + lite::Tensor* bias{}; + lite::Tensor* residualData{}; + lite::Tensor* output{}; + std::vector strides{1, 1}; + std::vector paddings{0, 0}; + int groups{1}; + std::vector dilations{1, 1}; + bool fuse_relu_before_depthwise_conv{false}; + bool use_mkldnn{false}; + bool fuse_relu{false}; // only used in mkldnn kernel + bool use_quantizer{ + false}; // set true for op that should be quantized, only used for cpu + bool fuse_residual_connection{false}; + float scale_in{1.0f}; // only used with mkl-dnn int8 + float scale_out{1.0f}; // only used with mkl-dnn int8 + float scale_in_eltwise{1.0f}; // only used with mkl-dnn int8 + float scale_weights{1.0f}; // only used with mkl-dnn int8 + bool force_fp32_output{false}; // only used in mkl-dnn int8 + std::string data_format{"Anylayout"}; +}; + +// For Pooling op +struct PoolParam { + lite::Tensor* x{}; + lite::Tensor* output{}; + std::string pooling_type{""}; + std::vector ksize{}; + bool global_pooling{ + false}; // if true, knernel size and paddings will be ignored + std::vector strides{1, 1}; + std::vector paddings{0, 0}; + bool exclusive{true}; + bool adaptive{false}; + bool ceil_mode{false}; + bool use_quantizer{false}; + std::string data_format{"AnyLayout"}; +}; + +// For Dropout op +struct DropoutParam { + const lite::Tensor* x{}; + lite::Tensor* output{}; + lite::Tensor* mask{}; + float dropout_prob{.5f}; + bool is_test{false}; + bool fix_seed{false}; + int seed{0}; + std::string dropout_implementation{"downgrade_in_infer"}; +}; + /// ----------------------- element wise operators ---------------------- struct ElementwiseParam { const lite::Tensor* X{}; diff --git a/paddle/fluid/lite/operators/relu_op.cc b/paddle/fluid/lite/operators/relu_op.cc index 8f6ffd13992..4fa02c5eb94 100644 --- a/paddle/fluid/lite/operators/relu_op.cc +++ b/paddle/fluid/lite/operators/relu_op.cc @@ -25,7 +25,6 @@ bool ReluOp::InferShape() const { CHECK_OR_FALSE(param_.output); // TODO(Superjomn) Enable data sharing. param_.output->Resize(param_.input->dims()); - // param_.output->ShareDataWith(*param_.input); // share lod // param_.output->set_lod(param_.input->lod()); return true; @@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) { return true; } -REGISTER_LITE_OP(relu, ReluOp); - } // namespace operators } // namespace lite } // namespace paddle + +REGISTER_LITE_OP(relu, paddle::lite::operators::ReluOp); diff --git a/paddle/fluid/lite/operators/relu_op.h b/paddle/fluid/lite/operators/relu_op.h index a6204a107d8..ffb03368788 100644 --- a/paddle/fluid/lite/operators/relu_op.h +++ b/paddle/fluid/lite/operators/relu_op.h @@ -35,7 +35,7 @@ class ReluOp : public OpLite { bool AttachImpl(const OpDesc &opdesc, lite::Scope *scope) override; void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } - std::string DebugString() const override { return "tanh"; } + std::string DebugString() const override { return "relu"; } private: mutable ReluParam param_; diff --git a/paddle/fluid/lite/utils/varient.h b/paddle/fluid/lite/utils/varient.h index 97285c508d6..2d2a3061108 100644 --- a/paddle/fluid/lite/utils/varient.h +++ b/paddle/fluid/lite/utils/varient.h @@ -128,8 +128,9 @@ struct variant { if (type_id == typeid(T).hash_code()) return reinterpret_cast(&data); else - LOG(FATAL) << "unmatched type get, should be " << type_id << " but get " + LOG(ERROR) << "unmatched type get, should be " << type_id << " but get " << typeid(T).name(); + throw std::invalid_argument("unmatched type"); } ~variant() { helper_t::destroy(type_id, &data); } }; diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 7eb663ea280..0d4c5c37e1d 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place) cc_library(memory DEPS malloc - memcpy) + memcpy + ) #if (WITH_GPU) # nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) #endif() -- GitLab