From e9f333207d65b01c508654efeab293ae4746dc6a Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Thu, 16 May 2019 14:53:23 +0800 Subject: [PATCH] Add some ops for training (#17442) --- paddle/fluid/framework/operator.h | 18 +++ paddle/fluid/incubate/CMakeLists.txt | 1 + paddle/fluid/lite/api/cxx_api.cc | 2 +- paddle/fluid/lite/api/cxx_api.h | 68 ++++++++++- paddle/fluid/lite/api/cxx_api_bin.cc | 2 +- paddle/fluid/lite/api/cxx_api_test.cc | 44 ++++++- paddle/fluid/lite/core/context.h | 10 +- paddle/fluid/lite/core/hvy_tensor.h | 16 ++- paddle/fluid/lite/core/naive_test_model.py | 46 ++++++++ paddle/fluid/lite/core/op_lite.cc | 2 +- paddle/fluid/lite/core/op_lite.h | 16 +++ paddle/fluid/lite/core/program.h | 3 +- paddle/fluid/lite/kernels/CMakeLists.txt | 1 + paddle/fluid/lite/kernels/x86/CMakeLists.txt | 6 + .../lite/kernels/x86/activation_compute.cc | 108 ++++++++++++++++++ .../lite/kernels/x86/elementwise_compute.cc | 56 +++++++++ .../fluid/lite/model_parser/model_parser.cc | 2 +- paddle/fluid/lite/model_parser/model_parser.h | 2 + paddle/fluid/lite/operators/CMakeLists.txt | 2 + paddle/fluid/lite/operators/activation_ops.cc | 37 ++++++ .../fluid/lite/operators/elementwise_ops.cc | 47 ++++++++ paddle/fluid/lite/operators/feed_op.cc | 1 - paddle/fluid/lite/operators/mul_op.h | 9 +- paddle/fluid/lite/operators/op_params.h | 42 ++++++- paddle/fluid/lite/utils/all.h | 1 + paddle/fluid/lite/utils/factory.h | 5 +- 26 files changed, 522 insertions(+), 25 deletions(-) create mode 100644 paddle/fluid/incubate/CMakeLists.txt create mode 100644 paddle/fluid/lite/core/naive_test_model.py create mode 100644 paddle/fluid/lite/kernels/x86/CMakeLists.txt create mode 100644 paddle/fluid/lite/kernels/x86/activation_compute.cc create mode 100644 paddle/fluid/lite/kernels/x86/elementwise_compute.cc create mode 100644 paddle/fluid/lite/operators/activation_ops.cc create mode 100644 paddle/fluid/lite/operators/elementwise_ops.cc diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 4bc94b4c5cd..5b4bfc1eb47 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -232,6 +232,17 @@ using OpKernelConfigsMap = std::unordered_map, OpKernelType::Hash>; +class OpDuppy : public OperatorBase { + public: + OpDuppy() : OperatorBase("duppy", {}, {}, {}) {} + + void RunImpl(const Scope& scope, + const platform::Place& place) const override {} +}; +OpDuppy op_duppy; +Scope scope_duppy; +RuntimeContext runtime_context_duppy({}, {}); + class ExecutionContext { public: ExecutionContext(const OperatorBase& op, const Scope& scope, @@ -244,6 +255,13 @@ class ExecutionContext { ctx_(ctx), kernel_configs_(configs) {} + ExecutionContext(const platform::DeviceContext& device_context) + : op_(op_duppy), + scope_(scope_duppy), + device_context_(device_context), + ctx_(runtime_context_duppy), + kernel_configs_(nullptr) {} + const OperatorBase& op() const { return op_; } const Scope& scope() const { return scope_; } diff --git a/paddle/fluid/incubate/CMakeLists.txt b/paddle/fluid/incubate/CMakeLists.txt new file mode 100644 index 00000000000..a6ded520492 --- /dev/null +++ b/paddle/fluid/incubate/CMakeLists.txt @@ -0,0 +1 @@ +include_directories(lite) \ No newline at end of file diff --git a/paddle/fluid/lite/api/cxx_api.cc b/paddle/fluid/lite/api/cxx_api.cc index 6bf789c35fe..1ea8be2c0b5 100644 --- a/paddle/fluid/lite/api/cxx_api.cc +++ b/paddle/fluid/lite/api/cxx_api.cc @@ -25,7 +25,7 @@ namespace paddle { namespace lite { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -void CXXPredictor::SaveModel(const std::string &dir) { +void ExecutorLite::SaveModel(const std::string &dir) { MkDirRecursively(dir.c_str()); program_->PersistModel(dir, program_desc_); } diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index 72e1bea6141..13679413958 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -28,14 +28,24 @@ namespace lite { struct Config {}; -class CXXPredictor { +class ExecutorLite { public: - CXXPredictor() { scope_ = std::make_shared(); } + ExecutorLite() { scope_ = std::make_shared(); } + explicit ExecutorLite(const std::shared_ptr& root_scope) { + scope_ = root_scope; + } void Build(const std::string& model_path, const Place& prefer_place, const std::vector& valid_places) { LoadModel(model_path, scope_.get(), &program_desc_); - Program program(program_desc_, scope_, valid_places); + Build(program_desc_, prefer_place, valid_places); + } + + void Build(const framework::proto::ProgramDesc& desc, + const Place& prefer_place, + const std::vector& valid_places) { + program_desc_ = desc; + Program program(desc, scope_, valid_places); optimizer_.KernelPickPreferPlace(prefer_place); core::KernelPickFactor factor; @@ -81,5 +91,57 @@ class CXXPredictor { std::unique_ptr program_; }; +/* + * An executor for training. + * + * Usage: + * + * CXXTrainer trainer(...); + * trainer.RunStartupProgram(...); + * auto exe = BuildMainProgramExecutor(...); + * + * for (auto& epoch : epoches) { + * auto* tensor0 = exe.GetInput(...); + * // fill data for tensor0 + * exe.Run(); + * } + */ +class CXXTrainer { + public: + CXXTrainer(const std::shared_ptr& root_scope, + const Place& preferred_place, + const std::vector& valid_places) + : scope_(root_scope), + preferred_place_(preferred_place), + valid_places_(valid_places), + main_program_executor_(ExecutorLite(scope_)) {} + + // Build the RuntimeProgram cache for the main program. The cache will run + // multiple times for the epoches. + // NOTE Just support to execute the 0-th block currently. + ExecutorLite& BuildMainProgramExecutor( + const framework::proto::ProgramDesc& desc, int block_id = 0) { + main_program_executor_.Build(desc, preferred_place_, valid_places_); + return main_program_executor_; + } + + // Run the startup program. It just executes once, no cache needed. + void RunStartupProgram(const framework::proto::ProgramDesc& desc, + int block_id = 0) { + ExecutorLite exe(scope_); + exe.Build(desc, preferred_place_, valid_places_); + exe.Run(); + } + + private: + std::shared_ptr scope_; + + Place preferred_place_; + std::vector valid_places_; + + // The training program. + ExecutorLite main_program_executor_; +}; + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index 4340387f853..f7e7426a45a 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -20,7 +20,7 @@ namespace paddle { namespace lite { void Run(const char* model_dir) { - lite::CXXPredictor predictor; + lite::Executor predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); #else diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index bbd19340a14..ae78a0c177f 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -22,11 +22,15 @@ DEFINE_string(model_dir, "", ""); DEFINE_string(optimized_model, "", ""); +// For training. +DEFINE_string(startup_program_path, "", ""); +DEFINE_string(main_program_path, "", ""); + namespace paddle { namespace lite { TEST(CXXApi, test) { - lite::CXXPredictor predictor; + lite::ExecutorLite predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); #else @@ -64,14 +68,48 @@ TEST(CXXApi, test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi, save_model) { - lite::CXXPredictor predictor; + lite::ExecutorLite predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, valid_places); predictor.SaveModel(FLAGS_optimized_model); } -#endif +#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + +#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +TEST(CXXTrainer, train) { + Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); + std::vector valid_places({prefer_place}); + auto scope = std::make_shared(); + + CXXTrainer trainer(scope, prefer_place, valid_places); + + std::string main_program_pb, startup_program_pb; + ReadBinaryFile(FLAGS_main_program_path, &main_program_pb); + ReadBinaryFile(FLAGS_startup_program_path, &startup_program_pb); + framework::proto::ProgramDesc main_program_desc, startup_program_desc; + main_program_desc.ParseFromString(main_program_pb); + startup_program_desc.ParseFromString(startup_program_pb); + + LOG(INFO) << main_program_desc.DebugString(); + + for (const auto& op : main_program_desc.blocks(0).ops()) { + LOG(INFO) << "get op " << op.type(); + } + + return; + + trainer.RunStartupProgram(startup_program_desc); + auto& exe = trainer.BuildMainProgramExecutor(main_program_desc); + auto* tensor0 = exe.GetInput(0); + tensor0->Resize(std::vector({100, 100})); + auto* data0 = tensor0->mutable_data(); + data0[0] = 0; + + exe.Run(); +} +#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/core/context.h b/paddle/fluid/lite/core/context.h index d255645c72c..01253e0de19 100644 --- a/paddle/fluid/lite/core/context.h +++ b/paddle/fluid/lite/core/context.h @@ -16,9 +16,13 @@ #include "paddle/fluid/lite/utils/any.h" #ifdef LITE_WITH_CUDA -#include +#include "paddle/fluid/lite/cuda/blas.h" #include "paddle/fluid/lite/cuda/cuda_utils.h" #endif +#ifdef LITE_WITH_X86 +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/device_context.h" +#endif #include #include #include @@ -54,6 +58,10 @@ struct X86Context { // overall information // kernel information + + // legacy info. + std::unique_ptr<::paddle::platform::CPUDeviceContext> x86_device_context; + std::unique_ptr<::paddle::framework::ExecutionContext> x86_execution_context; }; #endif diff --git a/paddle/fluid/lite/core/hvy_tensor.h b/paddle/fluid/lite/core/hvy_tensor.h index 5344c96e671..1fa8dbbee33 100644 --- a/paddle/fluid/lite/core/hvy_tensor.h +++ b/paddle/fluid/lite/core/hvy_tensor.h @@ -29,7 +29,7 @@ namespace lite { class DDimHvy : public DDimBase { public: DDimHvy() = default; - explicit DDimHvy(const std::vector& x) : DDimBase() { + DDimHvy(const std::vector& x) : DDimBase() { // NOLINT ConstructFrom(x); } explicit DDimHvy(const framework::DDim& x) : data_(x) {} @@ -47,6 +47,14 @@ class DDimHvy : public DDimBase { size_t size() const { return data_.size(); } bool empty() const { return data_.size() == 0; } + bool operator==(const DDimHvy& other) { + if (data_.size() != other.data_.size()) return false; + for (int i = 0; i < data_.size(); i++) { + if (data_[i] != other.data_[i]) return false; + } + return true; + } + private: framework::DDim data_; }; @@ -85,8 +93,7 @@ class TensorHvy : public TensorBase { const void* raw_data() const { return data_.raw_data(); } - template - void Resize(const DimT& dims) { + void Resize(const DDimHvy& dims) { LOG(INFO) << "dims.size " << dims.size(); data_.Resize(framework::make_ddim(dims.Vectorize())); } @@ -103,6 +110,9 @@ class TensorHvy : public TensorBase { const framework::LoD& lod() const { return data_.lod(); } framework::LoD* mutable_lod() { return data_.mutable_lod(); } + const framework::LoDTensor& raw_tensor() const { return data_; } + framework::LoDTensor& raw_tensor() { return data_; } + private: framework::LoDTensor data_; }; diff --git a/paddle/fluid/lite/core/naive_test_model.py b/paddle/fluid/lite/core/naive_test_model.py new file mode 100644 index 00000000000..f4bbdefceca --- /dev/null +++ b/paddle/fluid/lite/core/naive_test_model.py @@ -0,0 +1,46 @@ +import numpy +import sys, os +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.backward import append_backward + +a = fluid.layers.data(name="a", shape=[100], dtype='float32') +label = fluid.layers.data(name="label", shape=[100], dtype='float32') + +a1 = fluid.layers.fc(input=a, size=500, act=None, bias_attr=False) + +cost = fluid.layers.square_error_cost(a1, label) +avg_cost = fluid.layers.mean(cost) + +optimizer = fluid.optimizer.SGD(learning_rate=0.001) +optimizer.minimize(cost) + +cpu = fluid.core.CPUPlace() +loss = exe = fluid.Executor(cpu) + +exe.run(fluid.default_startup_program()) +with open('startup_program.pb', 'wb') as f: + f.write(fluid.default_startup_program().desc.serialize_to_string()) + +data_1 = np.array(numpy.random.random([100, 100]), dtype='float32') + +#fluid.default_main_program().desc. + + + +#prog = fluid.compiler.CompiledProgram(fluid.default_main_program()) +prog = fluid.default_main_program() + +#append_backward(loss) + +with open('main_program.pb', 'wb') as f: + f.write(prog.desc.serialize_to_string()) + + +#outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost]) + +sys.exit(0) +fluid.io.save_inference_model("./model2", [a.name], [a1], exe) + +print(numpy.array(outs)) + diff --git a/paddle/fluid/lite/core/op_lite.cc b/paddle/fluid/lite/core/op_lite.cc index 47e0c441e75..e54053026d9 100644 --- a/paddle/fluid/lite/core/op_lite.cc +++ b/paddle/fluid/lite/core/op_lite.cc @@ -71,7 +71,7 @@ bool OpLite::Run() { bool OpLite::Attach(const OpDesc &opdesc, lite::Scope *scope) { // valid_places_.clear(); CHECK(scope != nullptr); - // CHECK(!op_info_.get()); + //CHECK(!op_info_.get()); scope_ = scope; op_info_.reset(new OpInfo); // Force clean the out-of-date infomation. op_info_->Build(opdesc.ReadonlyProto()); diff --git a/paddle/fluid/lite/core/op_lite.h b/paddle/fluid/lite/core/op_lite.h index 9230ec8c954..2f878905ca3 100644 --- a/paddle/fluid/lite/core/op_lite.h +++ b/paddle/fluid/lite/core/op_lite.h @@ -116,6 +116,22 @@ class OpLite : public Registry { friend class mir::Node; friend class mir::SSAGraph; + protected: + // some helper functions. + template + const T *GetVar(Scope *scope, const std::string &name) { + auto *var = scope->FindVar(name); + CHECK(var) << "No var found for " << name; + return &var->Get(); + } + template + T *GetMutableVar(Scope *scope, const std::string &name) { + auto *var = scope->FindVar(name); + CHECK(var) << "No var found for " << name; + return var->GetMutable(); + } + + protected: lite::Scope *scope_{}; std::unique_ptr kernel_; diff --git a/paddle/fluid/lite/core/program.h b/paddle/fluid/lite/core/program.h index 33f2f2a7789..b25b6ae7d16 100644 --- a/paddle/fluid/lite/core/program.h +++ b/paddle/fluid/lite/core/program.h @@ -62,13 +62,13 @@ struct Program { // Build from a program and scope. void Build(const framework::proto::ProgramDesc& program) { CHECK(ops.empty()) << "Executor duplicate Build found"; - // Create operators. for (const auto& proto_op_desc : program.blocks(0).ops()) { lite::OpDesc op_desc(proto_op_desc); auto op_type = op_desc.Type(); // if (op_type == "feed" || op_type == "fetch") continue; VLOG(4) << "create Op [" << op_type << "]"; + LOG(INFO) << "create Op [" << op_type << "]"; auto op = LiteOpRegistry::Global().Create(op_type); CHECK(op) << "no Op found for " << op_type; ops.emplace_back(std::move(op)); @@ -86,6 +86,7 @@ struct Program { tmp_vars.push_back("feed"); tmp_vars.push_back("fetch"); + CHECK(!program.blocks().empty()); for (auto proto_var_desc : program.blocks(0).vars()) { lite::VarDesc var_desc(proto_var_desc); if (!var_desc.Persistable()) { diff --git a/paddle/fluid/lite/kernels/CMakeLists.txt b/paddle/fluid/lite/kernels/CMakeLists.txt index cf9da97084a..877ac7e05e3 100644 --- a/paddle/fluid/lite/kernels/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/CMakeLists.txt @@ -3,3 +3,4 @@ set(lite_kernel_deps type_system kernel_lite op_lite op_registry_lite ${tensor_l add_subdirectory(host) add_subdirectory(arm) add_subdirectory(cuda) +add_subdirectory(x86) diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt new file mode 100644 index 00000000000..90e3d20a27e --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -0,0 +1,6 @@ +if(NOT LITE_WITH_X86) + return() +endif() + +cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) +cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op) diff --git a/paddle/fluid/lite/kernels/x86/activation_compute.cc b/paddle/fluid/lite/kernels/x86/activation_compute.cc new file mode 100644 index 00000000000..4873a30ba4c --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/activation_compute.cc @@ -0,0 +1,108 @@ +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/operators/activation_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +void Activate(const platform::CPUDeviceContext& context, + const framework::LoDTensor* X, framework::LoDTensor* Out) { + using T = typename Functor::ELEMENT_TYPE; + auto* place = context.eigen_device(); + auto x = + framework::EigenVector::Flatten(paddle::operators::detail::Ref(X)); + auto out = + framework::EigenVector::Flatten(paddle::operators::detail::Ref(Out)); + Functor()(*place, x, out); +} + +template +void ActivateGrad(const platform::CPUDeviceContext& context, + const framework::LoDTensor* X, + const framework::LoDTensor* Out, + const framework::LoDTensor* Out_grad, + framework::LoDTensor* X_grad) { + using T = typename Functor::ELEMENT_TYPE; + auto* place = context.eigen_device(); + auto x = + framework::EigenVector::Flatten(paddle::operators::detail::Ref(X)); + auto out = + framework::EigenVector::Flatten(paddle::operators::detail::Ref(Out)); + auto x_grad = framework::EigenVector::Flatten( + paddle::operators::detail::Ref(X_grad)); + auto out_grad = framework::EigenVector::Flatten( + paddle::operators::detail::Ref(Out_grad)); + Functor()(*place, x, out, out_grad, x_grad); +} + +template +class SquareCompute : public KernelLite { + public: + using param_t = operators::ActivationParam; + + void Run() override { + auto& context = context_->As(); + auto& param = *param_.get_mutable(); + CHECK(context.x86_device_context); + + param.Out->template mutable_data(); + Activate>(*context.x86_device_context, + ¶m.X->raw_tensor(), + ¶m.Out->raw_tensor()); + } + + // TargetType target() const override; + // PrecisionType precision() const override; + + virtual ~SquareCompute() = default; +}; + +template +class SquareGradCompute : public KernelLite { + public: + using param_t = operators::ActivationGradParam; + + void Run() override { + auto& context = context_->As(); + auto& param = *param_.get_mutable(); + CHECK(context.x86_device_context); + param.X_grad->template mutable_data(); + + ActivateGrad>( + *context.x86_device_context, ¶m.X->raw_tensor(), + ¶m.Out->raw_tensor(), ¶m.Out_grad->raw_tensor(), + ¶m.X_grad->raw_tensor()); + } + + // TargetType target() const override; + // PrecisionType precision() const override; + + virtual ~SquareGradCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +// float +REGISTER_LITE_KERNEL(square, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::SquareCompute, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("W", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) + .Finalize(); + +REGISTER_LITE_KERNEL(square_grad, kX86, kFloat, kNCHW, + paddle::lite::kernels::x86::SquareGradCompute, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("W", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc new file mode 100644 index 00000000000..e5fabd87323 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc @@ -0,0 +1,56 @@ +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +struct SubFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return a - b; } +}; + +template +class ElementwiseSubCompute + : public KernelLite { + public: + using param_t = operators::ElementwiseParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto& context = context_->As(); + CHECK(context.x86_device_context); + + param.Out->template mutable_data(); + paddle::operators::ElementwiseComputeEx, + platform::CPUDeviceContext, T>( + *context.x86_execution_context, ¶m.X->raw_tensor(), + ¶m.Y->raw_tensor(), param.axis, SubFunctor(), + ¶m.Out->raw_tensor()); + } + + // TargetType target() const override; + // PrecisionType precision() const override; + + virtual ~ElementwiseSubCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +// float +REGISTER_LITE_KERNEL(square, kHost, kFloat, kNCHW, + paddle::lite::kernels::x86::ElementwiseSubCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) + .Finalize(); diff --git a/paddle/fluid/lite/model_parser/model_parser.cc b/paddle/fluid/lite/model_parser/model_parser.cc index 2addf5ff911..c829259ee42 100644 --- a/paddle/fluid/lite/model_parser/model_parser.cc +++ b/paddle/fluid/lite/model_parser/model_parser.cc @@ -114,7 +114,7 @@ void LoadLoDTensor(std::istream &is, Variable *var) { void ReadBinaryFile(const std::string &filename, std::string *contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - CHECK(fin.is_open()) << "Cannot open file " << filename; + CHECK(fin.is_open()) << "Cannot open file: " << filename; fin.seekg(0, std::ios::end); auto size = fin.tellg(); contents->clear(); diff --git a/paddle/fluid/lite/model_parser/model_parser.h b/paddle/fluid/lite/model_parser/model_parser.h index de40c82ab7d..8934546a200 100644 --- a/paddle/fluid/lite/model_parser/model_parser.h +++ b/paddle/fluid/lite/model_parser/model_parser.h @@ -47,5 +47,7 @@ void SerializeTensor(std::ostream& os, const lite::Scope& scope, // LoDTensor to ostream void TensorToStream(std::ostream& os, const lite::Tensor& tensor); +void ReadBinaryFile(const std::string& filename, std::string* contents); + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/operators/CMakeLists.txt b/paddle/fluid/lite/operators/CMakeLists.txt index ba9ecb77e43..184acb8485d 100644 --- a/paddle/fluid/lite/operators/CMakeLists.txt +++ b/paddle/fluid/lite/operators/CMakeLists.txt @@ -7,6 +7,8 @@ cc_library(scale_op_lite SRCS scale_op.cc DEPS ${op_DEPS}) cc_library(feed_op_lite SRCS feed_op.cc DEPS ${op_DEPS}) cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS}) cc_library(io_copy_op_lite SRCS io_copy_op.cc DEPS ${op_DEPS}) +cc_library(activation_ops_lite SRCS activation_ops.cc DEPS ${op_DEPS}) +cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS}) cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite}) set(ops_lite diff --git a/paddle/fluid/lite/operators/activation_ops.cc b/paddle/fluid/lite/operators/activation_ops.cc new file mode 100644 index 00000000000..1e824e8580e --- /dev/null +++ b/paddle/fluid/lite/operators/activation_ops.cc @@ -0,0 +1,37 @@ +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +class ActivationOp : public OpLite { + public: + explicit ActivationOp(const std::string& type) : OpLite(type) {} + + bool CheckShape() const override { return true; } + + bool InferShape() const override { + param_.Out->Resize(param_.X->dims()); + return true; + } + + bool AttachImpl(const OpDesc& opdesc, lite::Scope* scope) override { + auto X_name = opdesc.Input("X").front(); + auto Out_name = opdesc.Output("Out").front(); + + param_.X = GetVar(scope, X_name); + param_.Out = GetMutableVar(scope, Out_name); + } + + void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } + + private: + mutable ActivationParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(square, paddle::lite::operators::ActivationOp); diff --git a/paddle/fluid/lite/operators/elementwise_ops.cc b/paddle/fluid/lite/operators/elementwise_ops.cc new file mode 100644 index 00000000000..f4a22c6fcd8 --- /dev/null +++ b/paddle/fluid/lite/operators/elementwise_ops.cc @@ -0,0 +1,47 @@ +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +class ElementwiseOp : public OpLite { + public: + explicit ElementwiseOp(const std::string& type) : OpLite(type) {} + + bool CheckShape() const override { + CHECK_OR_FALSE(param_.X); + CHECK_OR_FALSE(param_.Y); + CHECK_OR_FALSE(param_.Out); + return true; + } + + bool InferShape() const override { + CHECK_OR_FALSE(param_.X->dims() == param_.Y->dims()); + param_.Out->Resize(param_.X->dims()); + return true; + } + + bool AttachImpl(const OpDesc& opdesc, lite::Scope* scope) override { + CHECK_EQ(opdesc.Inputs().size(), 2UL); + auto X_name = opdesc.Input("X").front(); + auto Y_name = opdesc.Input("Y").front(); + auto Out_name = opdesc.Output("Out").front(); + + param_.X = GetVar(scope, X_name); + param_.Y = GetVar(scope, Y_name); + param_.Out = GetMutableVar(scope, Out_name); + param_.axis = boost::get(opdesc.GetAttr("axis")); + } + + void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } + + private: + mutable operators::ElementwiseParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp); diff --git a/paddle/fluid/lite/operators/feed_op.cc b/paddle/fluid/lite/operators/feed_op.cc index 26ca59dd0d5..45a7c198cb6 100644 --- a/paddle/fluid/lite/operators/feed_op.cc +++ b/paddle/fluid/lite/operators/feed_op.cc @@ -31,7 +31,6 @@ class FeedOp : public OpLite { bool InferShape() const override { return true; } - protected: void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } protected: diff --git a/paddle/fluid/lite/operators/mul_op.h b/paddle/fluid/lite/operators/mul_op.h index 613e8c9f0c1..806fdb01f9b 100644 --- a/paddle/fluid/lite/operators/mul_op.h +++ b/paddle/fluid/lite/operators/mul_op.h @@ -41,9 +41,12 @@ class MulOpLite : public OpLite { auto input = op_desc.Input("X").front(); auto W = op_desc.Input("Y").front(); auto out = op_desc.Output("Out").front(); - - param_.x = scope->FindVar(input)->GetMutable(); - param_.y = scope->FindVar(W)->GetMutable(); + auto *var = scope->FindVar(input); + CHECK(var); + param_.x = var->GetMutable(); + var = scope->FindVar(W); + CHECK(var); + param_.y = var->GetMutable(); CHECK(scope->FindVar(out)); param_.output = scope->FindVar(out)->GetMutable(); param_.x_num_col_dims = GetAttr(op_desc.GetAttr("x_num_col_dims")); diff --git a/paddle/fluid/lite/operators/op_params.h b/paddle/fluid/lite/operators/op_params.h index a080bafead6..d21c0e3135d 100644 --- a/paddle/fluid/lite/operators/op_params.h +++ b/paddle/fluid/lite/operators/op_params.h @@ -25,6 +25,9 @@ namespace paddle { namespace lite { namespace operators { +using param_t = Any; + +/// ----------------------- Functional operators ------------------------------ struct FeedParam { const std::vector* feed_list{}; lite::Tensor* out{}; @@ -37,6 +40,14 @@ struct FetchParam { int col; }; +// Helper op for lite framework +struct IoCopyParam { + const lite::Tensor* x{}; + lite::Tensor* y{}; +}; + +/// -------------------------- NN operators ------------------------------------ + struct FcParam { lite::Tensor* input{}; lite::Tensor* w{}; @@ -71,13 +82,34 @@ struct ScaleParam { bool bias_after_scale{true}; }; -struct IoCopyParam { - const lite::Tensor* x{}; - lite::Tensor* y{}; +/// ----------------------- element wise operators ---------------------- +struct ElementwiseParam { + const lite::Tensor* X{}; + const lite::Tensor* Y{}; + lite::Tensor* Out{}; + int axis{-1}; // for broadcasting. +}; + +struct ElementwiseGradParam { + const lite::Tensor* X_grad{}; + const lite::Tensor* Y_grad{}; + lite::Tensor* Out_grad{}; + int axis{-1}; // for broadcasting. }; -using param_t = variant; +/// ----------------------- activation operators ---------------------- +struct ActivationParam { + const lite::Tensor* X{}; + lite::Tensor* Out{}; +}; + +struct ActivationGradParam { + const lite::Tensor* X{}; + const lite::Tensor* Out{}; + // for backward + lite::Tensor* X_grad{}; + const lite::Tensor* Out_grad{}; +}; } // namespace operators } // namespace lite diff --git a/paddle/fluid/lite/utils/all.h b/paddle/fluid/lite/utils/all.h index 2ee0e078c38..70e71ae3008 100644 --- a/paddle/fluid/lite/utils/all.h +++ b/paddle/fluid/lite/utils/all.h @@ -21,3 +21,4 @@ #include "paddle/fluid/lite/utils/io.h" #include "paddle/fluid/lite/utils/macros.h" #include "paddle/fluid/lite/utils/varient.h" +#include "paddle/fluid/lite/utils/any.h" diff --git a/paddle/fluid/lite/utils/factory.h b/paddle/fluid/lite/utils/factory.h index 3f86df842f1..f8db0bc42fe 100644 --- a/paddle/fluid/lite/utils/factory.h +++ b/paddle/fluid/lite/utils/factory.h @@ -55,7 +55,10 @@ class Factory { } item_ptr_t Create(const std::string& op_type) const { - return std::move(Creates(op_type).front()); + auto res = Creates(op_type); + if (res.empty()) return nullptr; + CHECK_EQ(res.size(), 1UL) << "Get multiple Op for type " << op_type; + return std::move(res.front()); } std::list Creates(const std::string& op_type) const { -- GitLab