From 01e1cdac7755bf9d15e386b913fa7ccfc2b65009 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Mon, 10 Jun 2019 15:56:47 +0800 Subject: [PATCH] init code gen for light framework(#17905) --- paddle/fluid/lite/CMakeLists.txt | 42 ++- paddle/fluid/lite/api/cxx_api_test.cc | 1 + paddle/fluid/lite/core/CMakeLists.txt | 5 +- paddle/fluid/lite/core/context.h | 5 + paddle/fluid/lite/core/hvy_tensor.h | 10 +- paddle/fluid/lite/core/kernel.h | 2 +- paddle/fluid/lite/core/lite_tensor.h | 8 + paddle/fluid/lite/core/op_lite.cc | 14 +- paddle/fluid/lite/core/op_registry.h | 9 +- paddle/fluid/lite/core/scope.cc | 8 +- paddle/fluid/lite/core/target_wrapper.h | 41 ++- paddle/fluid/lite/core/tensor.h | 30 ++- paddle/fluid/lite/core/variable.h | 2 +- paddle/fluid/lite/gen_code/CMakeLists.txt | 24 ++ paddle/fluid/lite/gen_code/gen_code.cc | 193 +++++++++++++ paddle/fluid/lite/gen_code/gen_code.h | 254 ++++++++++++++++++ paddle/fluid/lite/gen_code/gen_code_test.cc | 139 ++++++++++ .../lite/gen_code/generated_code_test.cc | 46 ++++ paddle/fluid/lite/gen_code/paddle_infer.cc | 139 ++++++++++ paddle/fluid/lite/gen_code/paddle_infer.h | 70 +++++ paddle/fluid/lite/kernels/x86/CMakeLists.txt | 2 +- paddle/fluid/lite/model_parser/cpp/op_desc.cc | 14 +- paddle/fluid/lite/model_parser/pb/op_desc.cc | 2 +- paddle/fluid/lite/operators/feed_op.cc | 4 +- paddle/fluid/lite/operators/mul_op.h | 7 +- paddle/fluid/lite/tools/build.sh | 34 ++- paddle/fluid/lite/utils/CMakeLists.txt | 2 +- paddle/fluid/lite/utils/string.cc | 19 ++ paddle/fluid/lite/utils/string.h | 77 ++++++ 29 files changed, 1145 insertions(+), 58 deletions(-) create mode 100644 paddle/fluid/lite/gen_code/CMakeLists.txt create mode 100644 paddle/fluid/lite/gen_code/gen_code.cc create mode 100644 paddle/fluid/lite/gen_code/gen_code.h create mode 100644 paddle/fluid/lite/gen_code/gen_code_test.cc create mode 100644 paddle/fluid/lite/gen_code/generated_code_test.cc create mode 100644 paddle/fluid/lite/gen_code/paddle_infer.cc create mode 100644 paddle/fluid/lite/gen_code/paddle_infer.h create mode 100644 paddle/fluid/lite/utils/string.cc create mode 100644 paddle/fluid/lite/utils/string.h diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index 028384cca07..2c263cc4f68 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -34,7 +34,7 @@ endfunction() function (lite_deps TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) + set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS) cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps ${lite_deps_DEPS}) @@ -63,14 +63,35 @@ function (lite_deps TARGET) endforeach(var) endif() + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + foreach(var ${lite_deps_LIGHT_DEPS}) + set(deps ${deps} ${var}) + endforeach(var) + endif() + + if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + foreach(var ${lite_deps_HVY_DEPS}) + set(deps ${deps} ${var}) + endforeach(var) + endif() + set(${TARGET} ${deps} PARENT_SCOPE) endfunction() +# cc_library with branch support. +# The branches: +# X86_DEPS: works only when LITE_WITH_X86 is ON. +# CUDA_DEPS: LITE_WITH_CUDA +# ARM_DEPS: LITE_WITH_ARM +# PROFILE_DEPS: LITE_WITH_PROFILE +# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK function(lite_cc_library TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS + HVY_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -79,7 +100,10 @@ function(lite_cc_library TARGET) X86_DEPS ${args_X86_DEPS} CUDA_DEPS ${args_CUDA_DEPS} ARM_DEPS ${args_ARM_DEPS} - PROFILE_DEPS ${args_PROFILE_DEPS}) + PROFILE_DEPS ${args_PROFILE_DEPS} + LIGHT_DEPS ${args_LIGHT_DEPS} + HVY_DEPS ${args_HVY_DEPS} + ) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) endfunction() @@ -87,7 +111,8 @@ endfunction() function(lite_cc_binary TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS + LIGHT_DEPS HVY_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -97,6 +122,8 @@ function(lite_cc_binary TARGET) CUDA_DEPS ${args_CUDA_DEPS} ARM_DEPS ${args_ARM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} + LIGHT_DEPS ${args_LIGHT_DEPS} + HVY_DEPS ${args_HVY_DEPS} ) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) endfunction() @@ -112,7 +139,9 @@ endfunction() function(lite_cc_test TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS) + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS + LIGHT_DEPS HVY_DEPS + ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps "") @@ -122,6 +151,8 @@ function(lite_cc_test TARGET) CUDA_DEPS ${args_CUDA_DEPS} ARM_DEPS ${args_ARM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} + LIGHT_DEPS ${args_LIGHT_DEPS} + HVY_DEPS ${args_HVY_DEPS} ) _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS}) register_test_offline("${TARGET}") @@ -137,3 +168,4 @@ add_subdirectory(kernels) add_subdirectory(model_parser) add_subdirectory(utils) add_subdirectory(api) +add_subdirectory(gen_code) diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 05630384044..c818f33e029 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -76,6 +76,7 @@ TEST(CXXApi, save_model) { predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, valid_places); + LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model); } #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 94085934112..e2ab2354e9c 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -8,7 +8,7 @@ lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc lite_cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite) lite_cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite) if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) - cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor) + lite_cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor HVY_DEPS framework_proto) endif() if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) @@ -26,8 +26,7 @@ cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) cc_library(cpu_info_lite SRCS cpu_info.cc) cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite) cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite - cpp_op_desc_lite - ${tensor_lite}) + cpp_op_desc_lite ${tensor_lite}) cc_library(types_lite SRCS types.cc) cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite) diff --git a/paddle/fluid/lite/core/context.h b/paddle/fluid/lite/core/context.h index 4702512af3a..483f5154144 100644 --- a/paddle/fluid/lite/core/context.h +++ b/paddle/fluid/lite/core/context.h @@ -173,6 +173,11 @@ class Context { new ::paddle::framework::ExecutionContext(*x86_device_context_)); } + Context(Context&& ctx) { + x86_device_context_ = std::move(ctx.x86_device_context_); + x86_execution_context_ = std::move(ctx.x86_execution_context_); + } + // NOTE: InitOnce should only be used by ContextScheduler void InitOnce() {} diff --git a/paddle/fluid/lite/core/hvy_tensor.h b/paddle/fluid/lite/core/hvy_tensor.h index f86ca272921..21bfa2b48e2 100644 --- a/paddle/fluid/lite/core/hvy_tensor.h +++ b/paddle/fluid/lite/core/hvy_tensor.h @@ -21,6 +21,7 @@ #pragma once #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/lite/core/target_wrapper.h" #include "paddle/fluid/lite/core/tensor.h" namespace paddle { @@ -65,6 +66,14 @@ class TensorHvy : public TensorBase { using DDimT = DDimHvy; using LoDT = framework::LoD; + template + void Assign(DType* data, const DimT& dim) { + Resize(dim); + auto* dst = mutable_data(Target); + CopySync(dst, data, dim.production() * sizeof(DType), + IoDirection::HtoD); + } + TargetType target() const { if (platform::is_gpu_place(data_.place())) { return TARGET(kCUDA); @@ -95,7 +104,6 @@ class TensorHvy : public TensorBase { const void* raw_data() const { return data_.raw_data(); } void Resize(const DDimHvy& dims) { - LOG(INFO) << "dims.size " << dims.size(); data_.Resize(framework::make_ddim(dims.Vectorize())); } diff --git a/paddle/fluid/lite/core/kernel.h b/paddle/fluid/lite/core/kernel.h index 629da86bbdd..d7b296eec12 100644 --- a/paddle/fluid/lite/core/kernel.h +++ b/paddle/fluid/lite/core/kernel.h @@ -150,7 +150,7 @@ class KernelBase { void Torch() {} protected: - std::unique_ptr ctx_; + std::unique_ptr ctx_{nullptr}; mutable operators::param_t param_; // The corresponding op type. std::string op_type_{}; diff --git a/paddle/fluid/lite/core/lite_tensor.h b/paddle/fluid/lite/core/lite_tensor.h index 79f1352cb06..6cccdc0dd03 100644 --- a/paddle/fluid/lite/core/lite_tensor.h +++ b/paddle/fluid/lite/core/lite_tensor.h @@ -61,6 +61,14 @@ class TensorLite : public TensorBase { TensorLite() : buffer_(std::make_shared()) {} + template + void Assign(DType *data, const DimT &dim) { + Resize(dim); + auto *dst = mutable_data(Target); + CopySync(dst, data, dim.product() * sizeof(DType), + IoDirection::HtoD); + } + template const T *data() const { return static_cast(buffer_->data()); diff --git a/paddle/fluid/lite/core/op_lite.cc b/paddle/fluid/lite/core/op_lite.cc index dc22e4fb4b4..bc30a00a497 100644 --- a/paddle/fluid/lite/core/op_lite.cc +++ b/paddle/fluid/lite/core/op_lite.cc @@ -28,15 +28,23 @@ std::vector> OpLite::CreateKernels( CHECK(!op_type_.empty()) << "op_type_ should be set first"; auto pick_kernel = [&](const Place &place) { - auto ks = KernelRegistry::Global().Create( - (kernel_type.empty() ? op_type_ : kernel_type), place.target, - place.precision, place.layout); + auto ks = KernelRegistry::Global().Create(op_type_, place.target, + place.precision, place.layout); for (auto &&it : ks) { AttachKernel(it.get()); kernels.emplace_back(std::move(it)); } }; + if (!kernel_type.empty()) { + Place place; + std::string op_type, alias; + KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); + pick_kernel(place); + CHECK(!kernels.empty()) << "no kernel for kernel type " << kernel_type; + return kernels; + } + std::set place_set; for (auto place : places) { place_set.insert(place); diff --git a/paddle/fluid/lite/core/op_registry.h b/paddle/fluid/lite/core/op_registry.h index 1052419ecda..49332262deb 100644 --- a/paddle/fluid/lite/core/op_registry.h +++ b/paddle/fluid/lite/core/op_registry.h @@ -91,9 +91,9 @@ class KernelRegistry final { void Register(const std::string &name, typename KernelRegistryForTarget::creator_t &&creator) { - VLOG(3) << "register for " << TargetToStr(Target) << ":" - << PrecisionToStr(Precision) << "//" - << GetKernelOffset(); + // VLOG(3) << "register for " << TargetToStr(Target) << ":" + //<< PrecisionToStr(Precision) << "//" + //<< GetKernelOffset(); using kernel_registor_t = KernelRegistryForTarget; auto &varient = registries_[GetKernelOffset()]; @@ -153,9 +153,6 @@ class KernelRegistor : public lite::Registor { public: KernelRegistor(const std::string &op_type, const std::string &alias) : Registor([=] { - VLOG(3) << "Register kernel " << op_type << " for " - << TargetToStr(target) << " " << PrecisionToStr(precision) - << " " << DataLayoutToStr(layout) << " alias " << alias; KernelRegistry::Global().Register( op_type, [=]() -> std::unique_ptr { std::unique_ptr x(new KernelType); diff --git a/paddle/fluid/lite/core/scope.cc b/paddle/fluid/lite/core/scope.cc index 053803b00a0..fbb837aedd3 100644 --- a/paddle/fluid/lite/core/scope.cc +++ b/paddle/fluid/lite/core/scope.cc @@ -17,7 +17,13 @@ namespace paddle { namespace lite { -Scope::~Scope() {} +Scope::~Scope() { + for (auto *x : kids_) { + if (x) { + delete x; + } + } +} Scope &Scope::NewScope() const { kids_.push_back(new Scope); diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h index 1f0d1ecf140..1029bf5300e 100644 --- a/paddle/fluid/lite/core/target_wrapper.h +++ b/paddle/fluid/lite/core/target_wrapper.h @@ -63,7 +63,8 @@ static const std::string& TargetToStr(TargetType target) { } static const std::string& PrecisionToStr(PrecisionType precision) { - static const std::string precision2string[] = {"unk", "float", "int8", "any"}; + static const std::string precision2string[] = {"unk", "float", "int8_t", + "any"}; auto x = static_cast(precision); CHECK_LT(x, static_cast(PRECISION(NUM))); return precision2string[x]; @@ -76,6 +77,29 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) { return datalayout2string[x]; } +static const std::string& TargetRepr(TargetType target) { + static const std::string target2string[] = {"kUnk", "kHost", "kX86", "kCUDA", + "kAny"}; + auto x = static_cast(target); + CHECK_LT(x, static_cast(TARGET(NUM))); + return target2string[x]; +} + +static const std::string& PrecisionRepr(PrecisionType precision) { + static const std::string precision2string[] = {"kUnk", "kFloat", "kInt8", + "kAny"}; + auto x = static_cast(precision); + CHECK_LT(x, static_cast(PRECISION(NUM))); + return precision2string[x]; +} + +static const std::string& DataLayoutRepr(DataLayoutType layout) { + static const std::string datalayout2string[] = {"kUnk", "kNCHW", "kAny"}; + auto x = static_cast(layout); + CHECK_LT(x, static_cast(DATALAYOUT(NUM))); + return datalayout2string[x]; +} + /* * Place specifies the execution context of a Kernel or input/output for a * kernel. It is used to make the analysis of the MIR more clear and accurate. @@ -228,5 +252,20 @@ class TargetWrapper { }; #endif // LITE_WITH_CUDA +template +void CopySync(void* dst, void* src, size_t size, IoDirection dir) { + switch (Target) { + case TARGET(kX86): + case TARGET(kHost): + case TARGET(kARM): + TargetWrapperX86::MemcpySync(dst, src, size, IoDirection::HtoH); + break; +#ifdef LITE_WITH_CUDA + case TARGET(kCUDA): + TargetWrapperCuda::MemcpySync(dst, src, size, dir); +#endif + } +} + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/core/tensor.h b/paddle/fluid/lite/core/tensor.h index 11b682a617c..d6980ff8898 100644 --- a/paddle/fluid/lite/core/tensor.h +++ b/paddle/fluid/lite/core/tensor.h @@ -47,7 +47,8 @@ class DDimBase { DDimBase() = default; explicit DDimBase(const std::vector &x) { self()->ConstructFrom(x); } - value_type operator[](int offset) const { return (*self())[offset]; } + value_type operator[](int offset) const { return (*const_self())[offset]; } + value_type &operator[](int offset) { return (*self())[offset]; } std::vector Vectorize() const { return self()->Vectorize(); } size_t size() const { return const_self()->size(); } bool empty() const { return const_self()->empty(); } @@ -73,18 +74,19 @@ class DDimBase { {Slice(0, col).production(), Slice(col, size()).production()})); } - friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) { - if (dims.empty()) { - os << "[]"; - return os; + std::string repr() const { + std::stringstream ss; + ss << "{"; + for (size_t i = 0; i < this->size() - 1; i++) { + ss << (*this)[i] << ","; } + if (!this->empty()) ss << (*this)[size() - 1]; + ss << "}"; + return ss.str(); + } - os << "["; - for (size_t i = 0; i < dims.size() - 1; i++) { - os << dims[i] << " "; - } - if (!dims.empty()) os << dims[dims.size() - 1]; - os << "]"; + friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) { + os << dims.repr(); return os; } @@ -102,6 +104,12 @@ template class TensorBase { public: TensorBase() = default; + + template + void Assign(T *data, const DimT &dim) { + self()->Assign(data, dim); + } + TargetType target() const { return self()->target(); } template diff --git a/paddle/fluid/lite/core/variable.h b/paddle/fluid/lite/core/variable.h index c83871446d2..d52a813a09c 100644 --- a/paddle/fluid/lite/core/variable.h +++ b/paddle/fluid/lite/core/variable.h @@ -24,7 +24,7 @@ namespace lite { class Variable { public: template - const T& Get() { + const T& Get() const { return blob_.get(); } diff --git a/paddle/fluid/lite/gen_code/CMakeLists.txt b/paddle/fluid/lite/gen_code/CMakeLists.txt new file mode 100644 index 00000000000..5d09dd567ea --- /dev/null +++ b/paddle/fluid/lite/gen_code/CMakeLists.txt @@ -0,0 +1,24 @@ +lite_cc_library(gen_code_lite SRCS gen_code.cc + DEPS program_lite op_lite scope + cpp_op_desc_lite + HVY_DEPS operator) +lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite) + +lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite} + mul_op_lite + compatible_pb_lite + model_parser_lite + X86_DEPS mul_compute_x86 + ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) + +lite_cc_library(__generated_code__ + SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc + DEPS scope_lite op_lite kernel_lite paddle_infer_gencode +) + +lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_code__ + ${ops_lite} ${host_kernels} + X86_DEPS ${x86_kernels} + ) + +add_dependencies(__generated_code__ test_gen_code_lite) diff --git a/paddle/fluid/lite/gen_code/gen_code.cc b/paddle/fluid/lite/gen_code/gen_code.cc new file mode 100644 index 00000000000..a50241bb715 --- /dev/null +++ b/paddle/fluid/lite/gen_code/gen_code.cc @@ -0,0 +1,193 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/gen_code/gen_code.h" +#include +#include +#include + +namespace paddle { +namespace lite { +namespace gencode { + +void Module::AddWeight(const std::string &name, const TensorRepr &tensor) { + auto w_name = WeightUniqueName(); + Line(string_format("// Create weight: %s", name.c_str())); + // auto* w0 = scope.Var("w0")->GetMutable(); + Line(string_format("auto* %s = scope->Var(%s)->GetMutable();", + w_name.c_str(), Repr(name).c_str())); + // lite::DDim w_ddim({1, 2}) + Line(string_format("lite::DDim %s_ddim(std::vector(%s));", + w_name.c_str(), tensor.ddim.repr().c_str())); + // std::vector w_data({}); + auto w_data_repr = DataRepr( + std::string(static_cast(tensor.raw_data), tensor.num_bytes), + tensor.dtype); + Line(string_format("std::vector<%s> %s_data({%s});", + PrecisionToStr(tensor.dtype).c_str(), w_name.c_str(), + w_data_repr.c_str())); + // w0->Assign(w0_data.data(), w0_ddim); + Line(string_format( + "%s->Assign<%s, lite::DDim, TARGET(kX86)>(%s_data.data(), %s_ddim);", + w_name.c_str(), PrecisionToStr(tensor.dtype).c_str(), w_name.c_str(), + w_name.c_str())); + Line(""); +} + +void Module::AddHeaderIncludeGenCode() { + Line(""); + Line("#include "); + Line("#include "); + Line("#include \"paddle/fluid/lite/core/compatible_tensor.h\""); + Line("#include \"paddle/fluid/lite/core/context.h\""); + Line("#include \"paddle/fluid/lite/gen_code/paddle_infer.h\""); + Line("#include \"paddle/fluid/lite/core/op_registry.h\""); + Line("#include \"paddle/fluid/lite/core/scope.h\""); + Line("#include \"paddle/fluid/lite/model_parser/cpp/op_desc.h\""); + Line(""); + Line(""); +} + +std::string Module::DataRepr(const std::string &raw_data, PrecisionType dtype) { + std::stringstream ss; + switch (dtype) { + case PRECISION(kFloat): { + const float *raw = reinterpret_cast(raw_data.c_str()); + int num_elems = raw_data.size() / sizeof(float); + if (num_elems) { + for (int i = 0; i < num_elems - 1; i++) { + ss << raw[i] << ","; + } + ss << raw[num_elems - 1]; + } + } break; + + default: + LOG(FATAL) << "Unsupported type " << PrecisionToStr(dtype); + } + return ss.str(); +} + +void Module::AddOpDescHelper(const std::string &op_id, + const cpp::OpDesc &desc) { + std::string desc_var = op_id + "_desc"; + Line(string_format("lite::cpp::OpDesc %s;", desc_var.c_str())); + auto vec_str_repr = [](const std::vector &vec) { + return Repr(vec); + }; + for (auto &item : desc.inputs()) { + Line(string_format("%s.SetInput(%s, %s);", desc_var.c_str(), + Repr(item.first).c_str(), + vec_str_repr(item.second).c_str())); + } + + for (auto &item : desc.outputs()) { + Line(string_format("%s.SetOutput(%s, %s);", desc_var.c_str(), + Repr(item.first).c_str(), + vec_str_repr(item.second).c_str())); + } + + auto attr_repr = [&](const std::string &name) -> std::string { + using AttrType = OpDescAPI::AttrType; + auto type = desc.GetAttrType(name); + + switch (type) { + case AttrType::INT: + return std::to_string(desc.GetAttr(name)); + case AttrType::FLOAT: + return std::to_string(desc.GetAttr(name)); + case AttrType::BOOLEAN: + return std::to_string(desc.GetAttr(name)); + case AttrType::STRING: + return "\"" + desc.GetAttr(name) + "\""; + case AttrType::STRINGS: { + std::vector tmp; + auto vals = desc.GetAttr>(name); + std::transform(vals.begin(), vals.end(), std::back_inserter(tmp), + [](const std::string &x) { return Repr(x); }); + return "{" + Join(tmp, ",") + "}"; + } + default: + LOG(FATAL) << "Unsupported attribute type: " << static_cast(type); + } + return ""; + }; + + auto attr_type_repr = [&](const std::string &name) -> std::string { + using AttrType = OpDescAPI::AttrType; + auto type = desc.GetAttrType(name); + + switch (type) { + case AttrType::INT: + return "int"; + case AttrType::FLOAT: + return "float"; + case AttrType::BOOLEAN: + return "bool"; + case AttrType::STRING: + return "std::string"; + case AttrType::STRINGS: + return "std::vector"; + default: + LOG(FATAL) << "Unsupported attribute type: " << static_cast(type); + } + + return "unk_t"; + }; + for (auto &item : desc.AttrNames()) { + // Drop the python information. + if (item == "op_callstack") continue; + auto attr_type = attr_type_repr(item); + auto attr_val = attr_repr(item); + Line(string_format("%s.SetAttr<%s>(%s, %s);", // + desc_var.c_str(), attr_type.c_str(), Repr(item).c_str(), + attr_val.c_str())); + } +} + +void Module::AddOp(const cpp::OpDesc &op) { + auto op_name = OpUniqueName(); + AddOpDescHelper(op_name, op); + + Line(string_format("// Create Op: %s", op.Type().c_str())); + + Line(string_format("auto %s = lite::LiteOpRegistry::Global().Create(\"%s\");", + op_name.c_str(), op.Type().c_str())); + + CHECK(op.HasAttr(kKernelTypeAttr)) + << "the kernel type should be specified before generate code."; + auto kernel_type = op.GetAttr(kKernelTypeAttr); + Line(string_format("%s->Attach(%s, exec_scope);", op_name.c_str(), + (op_name + "_desc").c_str())); + + // Create kernel + auto kernel_name = KernelUniqueName(); + Line(string_format( + "auto %s = std::move(%s->CreateKernels(valid_places, \"%s\").front());", + kernel_name.c_str(), op_name.c_str(), kernel_type.c_str())); + + // Set Context for kernel + // clang-format off + Line(string_format("%s->SetContext(lite::ContextScheduler::Global().NewContext(%s->target()));", kernel_name.c_str(), kernel_name.c_str())); // NOLINT + // clang-format on + + Line(string_format("ops.push_back(%s);", op_name.c_str())); + Line(string_format("kernels.push_back(std::move(%s));", kernel_name.c_str())); + + op_kinds_.insert(op.Type()); + kernel_kinds_.insert(kernel_type); +} +} // namespace gencode +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/gen_code/gen_code.h b/paddle/fluid/lite/gen_code/gen_code.h new file mode 100644 index 00000000000..1a55483f03a --- /dev/null +++ b/paddle/fluid/lite/gen_code/gen_code.h @@ -0,0 +1,254 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/framework.pb.h" +#include "paddle/fluid/lite/core/program.h" +#include "paddle/fluid/lite/core/target_wrapper.h" +#include "paddle/fluid/lite/model_parser/cpp/op_desc.h" +#include "paddle/fluid/lite/model_parser/desc_apis.h" +#include "paddle/fluid/lite/utils/string.h" + +namespace paddle { +namespace lite { +namespace gencode { + +struct TensorRepr { + TensorRepr() = default; + TensorRepr(PrecisionType dtype, const std::vector &ddim, + void *raw_data, size_t num_bytes) + : dtype(dtype), ddim(ddim), raw_data(raw_data), num_bytes(num_bytes) {} + + PrecisionType dtype; + lite::DDim ddim; + const void *raw_data; + size_t num_bytes{}; +}; + +class Module { + std::vector ops; + std::vector weights; + std::vector tmp_vars_; + std::stringstream stream_; + std::set kernel_kinds_; + std::set op_kinds_; + + int line_indent_{}; + const int indent_unit_{2}; + + public: + void NewOp(const cpp::OpDesc &desc) { ops.push_back(desc); } + void NewWeight(const TensorRepr &x) { weights.push_back(x); } + void NewTmpVar(const std::string &x) { tmp_vars_.push_back(x); } + + std::stringstream &stream() { return stream_; } + + void AddHeaderIncludeGenCode(); + + void AddNamespaceBegin() { + Line("namespace paddle {"); + Line("namespace gencode{"); + Line(""); + } + + void AddNamespaceEnd() { + Line(""); + Line("} // namespace gencode"); + Line("} // namespace paddle"); + } + + void AddInitFuncBegin() { + Line("void PaddlePredictor::Init() {"); + Line(""); + IncIndent(); + } + + void AddInitFuncEnd() { + DecIndent(); + Line(""); + Line("}"); + } + + void AddScopeDecl() { + Line("lite::Scope* scope = static_cast(raw_scope_);"); + + // clang-format off + Line("lite::Scope* exec_scope = static_cast(raw_exe_scope_);"); // NOLINT + // clang-format on + + // Create feed and fetch in exec_scope. + Line(string_format("exec_scope->Var(%s);", Repr("feed").c_str())); + Line(string_format("exec_scope->Var(%s);", Repr("fetch").c_str())); + } + + void AddValidPlaceDecl() { + // clang-format off + Line("std::vector valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)}), lite::Place({TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)})});"); // NOLINT + // clang-format on + } + + void AddMemberCast() { + Line("// Cast the raw members"); + // clang-format off + Line(string_format("auto& ops = *static_cast>*>(raw_ops_);")); // NOLINT + Line(string_format("auto& kernels = *static_cast>*>(raw_kernels_);")); // NOLINT + // clang-format on + Line(""); + } + + void AddWeight(const std::string &name, const TensorRepr &tensor); + + void AddTmpVar(const std::string &x) { + Line(string_format("// Create temporary variable: %s", x.c_str())); + Line(string_format("exec_scope->Var(%s);", Repr(x).c_str())); + Line(""); + } + + void AddOp(const cpp::OpDesc &op); + + void AddOpDescHelper(const std::string &op_id, const cpp::OpDesc &desc); + + void AddOpCompileDeps() { + Line(""); + Line("// Add Operator compile deps"); + for (auto &op_type : op_kinds_) { + Line(string_format("USE_LITE_OP(%s)", op_type.c_str())); + } + Line(""); + } + void AddKernelCompileDeps() { + Line("// Add Kernel compile deps"); + + std::string op_type, alias; + Place place; + for (auto &kernel_type : kernel_kinds_) { + KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); + Line(string_format("USE_LITE_KERNEL(%s, %s, %s, %s, %s)", // + op_type.c_str(), // + TargetRepr(place.target).c_str(), + PrecisionRepr(place.precision).c_str(), + DataLayoutRepr(place.layout).c_str(), alias.c_str())); + } + } + + private: + std::string WeightUniqueName() const { + return "w_" + std::to_string(weight_counter_++); + } + std::string TmpVarUniqueName() const { + return "tmp_" + std::to_string(tmp_var_counter_++); + } + std::string OpUniqueName() const { + return "op_" + std::to_string(op_counter_++); + } + std::string KernelUniqueName() const { + return "kernel_" + std::to_string(kernel_counter_++); + } + + std::string DataRepr(const std::string &raw_data, PrecisionType dtype); + + void IncIndent() { line_indent_++; } + void DecIndent() { line_indent_--; } + + void Line(const std::string &x) { + std::string indent_str(line_indent_ * indent_unit_, ' '); + stream() << indent_str << x << "\n"; + } + + private: + mutable int weight_counter_{}; + mutable int tmp_var_counter_{}; + mutable int op_counter_{}; + mutable int kernel_counter_{}; +}; + +class ProgramCodeGenerator { + public: + ProgramCodeGenerator(const framework::proto::ProgramDesc &program, + const lite::Scope &exec_scope) + : program_(program), exec_scope_(exec_scope) { + LOG(INFO) << program.DebugString(); + } + + std::string GenCode() { + Module m; + m.AddHeaderIncludeGenCode(); + m.AddNamespaceBegin(); + m.AddInitFuncBegin(); + m.AddMemberCast(); + m.AddScopeDecl(); + m.AddValidPlaceDecl(); + + AddWeights(&m); + AddTmpVars(&m); + AddOps(&m); + + m.AddInitFuncEnd(); + m.AddNamespaceEnd(); + + m.AddOpCompileDeps(); + m.AddKernelCompileDeps(); + + return m.stream().str(); + } + + void AddWeights(Module *m) { + for (auto &var : program_.blocks(0).vars()) { + if (var.persistable()) { + auto name = var.name(); + if (name == "feed" || name == "fetch") continue; + const auto &tensor = exec_scope_.FindVar(name)->Get(); + TensorRepr repr; + TensorToRepr(tensor, &repr); + m->AddWeight(name, repr); + } + } + } + void AddTmpVars(Module *m) { + for (auto &var : program_.blocks(0).vars()) { + if (!var.persistable()) { + m->AddTmpVar(var.name()); + } + } + } + void AddOps(Module *m) { + for (auto &op : program_.blocks(0).ops()) { + pb::OpDesc pb_desc(op); + cpp::OpDesc cpp_desc; + TransformOpDescPbToCpp(pb_desc, &cpp_desc); + m->AddOp(cpp_desc); + } + } + + private: + void TensorToRepr(const lite::Tensor &tensor, TensorRepr *repr) { + repr->ddim = tensor.dims(); + // TODO(Superjomn) support other types. + repr->dtype = PRECISION(kFloat); + repr->raw_data = tensor.data(); + repr->num_bytes = repr->ddim.production() * sizeof(float); + } + + private: + const framework::proto::ProgramDesc &program_; + const lite::Scope &exec_scope_; +}; + +} // namespace gencode +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/gen_code/gen_code_test.cc b/paddle/fluid/lite/gen_code/gen_code_test.cc new file mode 100644 index 00000000000..96ef56e857e --- /dev/null +++ b/paddle/fluid/lite/gen_code/gen_code_test.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/gen_code/gen_code.h" +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/context.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/scope.h" +#include "paddle/fluid/lite/model_parser/cpp/op_desc.h" +#include "paddle/fluid/lite/model_parser/model_parser.h" + +DEFINE_string(optimized_model, "", ""); +DEFINE_string(generated_code_file, "__generated_code__.cc", ""); + +namespace paddle { +namespace lite { +namespace gencode { + +// Manually construct a program. +TEST(gen_code, manual) { + // For holding the weights. + lite::Scope scope; + // For holding the temporary variables. + auto &tmp_scope = scope.NewScope(); + + // Create weight variables. + auto *w0 = scope.Var("w0")->GetMutable(); + // Create temporary variables. + auto *a = tmp_scope.Var("x")->GetMutable(); + tmp_scope.Var("out")->GetMutable(); + + // Set weights. + std::vector w0_data({0, 1, 2, 3}); + w0->Assign( + w0_data.data(), lite::DDim{std::vector({2, 2})}); + + std::vector a_data({0, 1, 2, 3}); + a->Assign( + a_data.data(), lite::DDim{std::vector({2, 2})}); + + std::vector valid_places({ + Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kAny)}, + }); + auto mul_op = LiteOpRegistry::Global().Create("mul"); + cpp::OpDesc mul_op_desc; + mul_op_desc.SetType("mul"); + mul_op_desc.SetInput("X", {"x"}); + mul_op_desc.SetInput("Y", {"w0"}); + mul_op_desc.SetAttr("x_num_col_dims", 1); + mul_op_desc.SetAttr("y_num_col_dims", 1); + mul_op_desc.SetOutput("Out", {"out"}); + + mul_op->Attach(mul_op_desc, &tmp_scope); + auto mul_kernel = std::move(mul_op->CreateKernels(valid_places).front()); + auto fc_ctx = ContextScheduler::Global().NewContext(TARGET(kX86)); + mul_op->CheckShape(); + mul_op->InferShape(); + mul_kernel->SetContext(std::move(fc_ctx)); + mul_kernel->Launch(); +} + +TEST(gen_code, auto_gen) { + std::vector w0_data({0, 1, 2, 3}); + TensorRepr w0(PRECISION(kFloat), std::vector({2, 2}), w0_data.data(), + w0_data.size() * sizeof(float)); + + std::vector w1_data({0.01, 1.2, 2.3, 3.4, 1.1, 2.2}); + TensorRepr w1(PRECISION(kFloat), std::vector({3, 2}), w1_data.data(), + w1_data.size() * sizeof(float)); + + cpp::OpDesc op0; + op0.SetType("mul"); + op0.SetInput("X", {"a", "b"}); + op0.SetOutput("Out", {"out0"}); + op0.SetAttr("desc", "this is a desc"); + op0.SetAttr("x_col", 1); + op0.SetAttr("y_col", 2); + op0.SetAttr(kKernelTypeAttr, "x86"); + + gencode::Module module; + module.AddHeaderIncludeGenCode(); + + module.AddNamespaceBegin(); + module.AddInitFuncBegin(); + + module.AddMemberCast(); + + module.AddWeight("w0", w0); + module.AddWeight("w1", w1); + module.AddTmpVar("a"); + module.AddTmpVar("b"); + + module.AddOp(op0); + + module.AddInitFuncEnd(); + module.AddNamespaceEnd(); + + LOG(INFO) << module.stream().str(); +} + +TEST(gen_code, optimized_program) { + lite::Scope scope; + framework::proto::ProgramDesc desc; + LoadModel(FLAGS_optimized_model, &scope, &desc); + + ProgramCodeGenerator codegen(desc, scope); + + std::ofstream file(FLAGS_generated_code_file); + + file << codegen.GenCode(); + + file.close(); +} + +} // namespace gencode +} // namespace lite +} // namespace paddle + +USE_LITE_OP(mul); +USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/gen_code/generated_code_test.cc b/paddle/fluid/lite/gen_code/generated_code_test.cc new file mode 100644 index 00000000000..e5874a2e149 --- /dev/null +++ b/paddle/fluid/lite/gen_code/generated_code_test.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/lite/gen_code/paddle_infer.h" + +namespace paddle { +namespace lite { + +TEST(PaddlePredictor, Init) { + gencode::PaddlePredictor predictor; + predictor.Init(); +} + +TEST(PaddlePredictor, Run) { + gencode::PaddlePredictor predictor; + predictor.Init(); + + LOG(INFO) << "run the generated code"; + auto input_tensor = predictor.GetInput(0); + input_tensor->Resize(std::vector({100, 100})); + auto* data = input_tensor->mutable_data(); + for (int i = 0; i < 100 * 100; i++) { + data[i] = i; + } + + predictor.Run(); + + auto output_tensor = predictor.GetOutput(0); + LOG(INFO) << "output: " << output_tensor->data()[0]; +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/gen_code/paddle_infer.cc b/paddle/fluid/lite/gen_code/paddle_infer.cc new file mode 100644 index 00000000000..ac4e99cb714 --- /dev/null +++ b/paddle/fluid/lite/gen_code/paddle_infer.cc @@ -0,0 +1,139 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/gen_code/paddle_infer.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/op_lite.h" + +namespace paddle { +namespace gencode { + +void Tensor::Resize(const Tensor::ddim_t &shape) { + CHECK(raw_mutable_tensor_); + auto *tensor = static_cast(raw_mutable_tensor_); + tensor->Resize(shape); +} + +#define FOR_EACH_TYPE(HANDLE) \ + HANDLE(int); \ + HANDLE(float); \ + HANDLE(int8_t); \ + HANDLE(int64_t); + +#define IMPL_DATA(T) \ + template <> \ + const T *Tensor::data() const { \ + CHECK(raw_tensor_); \ + const auto *tensor = static_cast(raw_tensor_); \ + return tensor->data(); \ + } +FOR_EACH_TYPE(IMPL_DATA); +#undef IMPL_DATA + +#define IMPL_MUTABLE_DATA(T) \ + template <> \ + T *Tensor::mutable_data() { \ + CHECK(raw_mutable_tensor_); \ + auto *tensor = static_cast(raw_mutable_tensor_); \ + return tensor->mutable_data(); \ + } +FOR_EACH_TYPE(IMPL_MUTABLE_DATA); +#undef IMPL_MUTABLE_DATA + +PaddlePredictor::PaddlePredictor() { + raw_ops_ = new std::vector>; + raw_kernels_ = new std::vector>; + raw_scope_ = new lite::Scope; + raw_exe_scope_ = &(static_cast(raw_scope_)->NewScope()); +} + +std::unique_ptr PaddlePredictor::GetTensor( + const std::string &id) const { + auto *exe_scope = static_cast(raw_exe_scope_); + const auto *var = exe_scope->FindVar(id); + const auto &tensor = var->Get(); + return std::unique_ptr(new Tensor(&tensor, nullptr)); +} + +std::unique_ptr PaddlePredictor::GetMutableTensor( + const std::string &id) { + auto *exe_scope = static_cast(raw_exe_scope_); + auto *var = exe_scope->FindVar(id); + auto *tensor = var->GetMutable(); + return std::unique_ptr(new Tensor(nullptr, tensor)); +} + +#define CAST_OPS \ + auto *ops = \ + static_cast> *>(raw_ops_); +#define CAST_KERNELS \ + auto *kernels = \ + static_cast> *>( \ + raw_kernels_); +#define CAST_SCOPE auto *scope = static_cast(raw_scope_); + +PaddlePredictor::~PaddlePredictor() { + CAST_OPS + CAST_KERNELS + CAST_SCOPE + + if (ops) { + delete ops; + } + if (kernels) { + delete kernels; + } + if (scope) { + delete scope; + } +} + +void PaddlePredictor::Run() { + CAST_OPS + CAST_KERNELS + + CHECK(ops); + CHECK(kernels); + CHECK_EQ(ops->size(), kernels->size()); + + for (size_t i = 0; i < ops->size(); i++) { + LOG(INFO) << "Running the " << i << "-th operator"; + ops->at(i)->InferShape(); + kernels->at(i)->Launch(); + } +} + +std::unique_ptr PaddlePredictor::GetInput(size_t offset) { + auto *exec_scope = static_cast(raw_exe_scope_); + auto *_feed_list = exec_scope->FindVar("feed"); + CHECK(_feed_list) << "no feed variable in exec_scope"; + auto *feed_list = _feed_list->GetMutable>(); + if (offset >= feed_list->size()) { + feed_list->resize(offset + 1); + } + + return std::unique_ptr(new Tensor(nullptr, &feed_list->at(offset))); +} + +std::unique_ptr PaddlePredictor::GetOutput(size_t offset) { + auto *exec_scope = static_cast(raw_exe_scope_); + auto *_fetch_list = exec_scope->FindVar("fetch"); + CHECK(_fetch_list) << "no fatch variable in exec_scope"; + auto &fetch_list = *_fetch_list->GetMutable>(); + CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; + return std::unique_ptr(new Tensor(&fetch_list.at(offset), nullptr)); +} + +} // namespace gencode +} // namespace paddle diff --git a/paddle/fluid/lite/gen_code/paddle_infer.h b/paddle/fluid/lite/gen_code/paddle_infer.h new file mode 100644 index 00000000000..99158b0503c --- /dev/null +++ b/paddle/fluid/lite/gen_code/paddle_infer.h @@ -0,0 +1,70 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +namespace paddle { +namespace gencode { + +/// Zero Copy Tensor. +class Tensor { + public: + using ddim_t = std::vector; + + Tensor(const void *raw_tensor, void *raw_mutable_tensor) + : raw_tensor_(raw_tensor), raw_mutable_tensor_(raw_mutable_tensor) {} + + void Resize(const ddim_t &shape); + template + const T *data() const; + template + T *mutable_data(); + + private: + const void *raw_tensor_; + void *raw_mutable_tensor_{}; +}; + +/* + * Predictor for the generated code. + */ +class PaddlePredictor { + public: + void Init(); + + std::unique_ptr GetTensor(const std::string &id) const; + std::unique_ptr GetMutableTensor(const std::string &id); + + // Get offset-th col of feed. + std::unique_ptr GetInput(size_t offset); + + std::unique_ptr GetOutput(size_t offset); + + void Run(); + + PaddlePredictor(); + ~PaddlePredictor(); + + private: + void *raw_ops_; + void *raw_kernels_; + void *raw_scope_{}; + void *raw_exe_scope_{}; // raw_exe_scope is not owned. +}; + +} // namespace gencode +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index 1c2937df5be..62db7a0a226 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -25,7 +25,7 @@ set(x86_kernels relu_compute_x86 fc_compute_x86 scale_compute_x86 - softmax_compute_x86 + softmax_compute_x86 dropout_compute_x86 concat_compute_x86 ) diff --git a/paddle/fluid/lite/model_parser/cpp/op_desc.cc b/paddle/fluid/lite/model_parser/cpp/op_desc.cc index 01ee4703143..b6b854d72af 100644 --- a/paddle/fluid/lite/model_parser/cpp/op_desc.cc +++ b/paddle/fluid/lite/model_parser/cpp/op_desc.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/lite/model_parser/cpp/op_desc.h" #include +#include namespace paddle { namespace lite { @@ -44,12 +45,13 @@ FindAttr(const cpp::OpDesc& desc, const std::string& name) { return std::make_pair(it, attr_it); } -#define GET_IMPL_ONE(T, repr__) \ - template <> \ - T OpDesc::GetAttr(const std::string& name) const { \ - auto pair = FindAttr(*this, name); \ - CHECK(pair.second->second == AttrType::repr__); \ - return pair.first->second.get(); \ +#define GET_IMPL_ONE(T, repr__) \ + template <> \ + T OpDesc::GetAttr(const std::string& name) const { \ + auto pair = FindAttr(*this, name); \ + CHECK(pair.second->second == AttrType::repr__) \ + << "required type is " << #repr__ << " not match the true type"; \ + return pair.first->second.get(); \ } GET_IMPL_ONE(int32_t, INT); diff --git a/paddle/fluid/lite/model_parser/pb/op_desc.cc b/paddle/fluid/lite/model_parser/pb/op_desc.cc index 1de4fb275e4..7f84510a3fa 100644 --- a/paddle/fluid/lite/model_parser/pb/op_desc.cc +++ b/paddle/fluid/lite/model_parser/pb/op_desc.cc @@ -44,7 +44,7 @@ FindAttr(framework::proto::OpDesc *desc, const std::string &name) { } SET_IMPL_ONE(int, INT, i); SET_IMPL_ONE(float, FLOAT, f); -SET_IMPL_ONE(bool, FLOAT, f); +SET_IMPL_ONE(bool, BOOLEAN, b); template <> void OpDesc::SetAttr>(const std::string &name, diff --git a/paddle/fluid/lite/operators/feed_op.cc b/paddle/fluid/lite/operators/feed_op.cc index 8c7d33e9e59..c977adfd4b3 100644 --- a/paddle/fluid/lite/operators/feed_op.cc +++ b/paddle/fluid/lite/operators/feed_op.cc @@ -38,8 +38,8 @@ class FeedOp : public OpLite { auto feed_var_name = opdesc.Input("X").front(); auto* feed_var = scope->FindVar(feed_var_name); CHECK(feed_var); - auto& feed_tensor_list = feed_var->Get>(); - param_.feed_list = &feed_tensor_list; + auto* feed_tensor_list = feed_var->GetMutable>(); + param_.feed_list = feed_tensor_list; auto out_name = opdesc.Output("Out").front(); auto* out_var = scope->FindVar(out_name); diff --git a/paddle/fluid/lite/operators/mul_op.h b/paddle/fluid/lite/operators/mul_op.h index e21540d2c6f..7aa1581bb2a 100644 --- a/paddle/fluid/lite/operators/mul_op.h +++ b/paddle/fluid/lite/operators/mul_op.h @@ -45,10 +45,11 @@ class MulOpLite : public OpLite { CHECK(var); param_.x = var->GetMutable(); var = scope->FindVar(W); - CHECK(var); + CHECK(var) << "no var called " << W; param_.y = var->GetMutable(); - CHECK(scope->FindVar(out)); - param_.output = scope->FindVar(out)->GetMutable(); + var = scope->FindVar(out); + CHECK(var) << "no var called " << out; + param_.output = var->GetMutable(); param_.x_num_col_dims = op_desc.GetAttr("x_num_col_dims"); param_.y_num_col_dims = op_desc.GetAttr("y_num_col_dims"); diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 96a378441b4..5afbc003cf7 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -4,15 +4,26 @@ set -ex TESTS_FILE="./lite_tests.txt" readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF" + +# for code gen, a source file is generated after a test, but is dependended by some targets in cmake. +# here we fake an empty file to make cmake works. +function prepare_for_codegen { + # in build directory + mkdir -p ./paddle/fluid/lite/gen_code + touch ./paddle/fluid/lite/gen_code/__generated_code__.cc +} function cmake_x86 { + prepare_for_codegen cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} } function cmake_x86_for_CI { + prepare_for_codegen cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON } function cmake_gpu { + prepare_for_codegen cmake .. " -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON" } @@ -31,18 +42,19 @@ function cmake_arm { -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 } -function build { - file=$1 - for _test in $(cat $file); do - make $_test -j$(expr $(nproc) - 2) - done -} +# function build { +# file=$1 +# for _test in $(cat $file); do +# make $_test -j$(expr $(nproc) - 2) +# done +# } # It will eagerly test all lite related unittests. function test_lite { local file=$1 echo "file: ${file}" for _test in $(cat $file); do + make $_test -j$(expr $(nproc) - 2) ctest -R $_test -V done } @@ -86,7 +98,7 @@ function build_test_server { cd ./build export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib" cmake_x86_for_CI - build $TESTS_FILE + #build $TESTS_FILE test_lite $TESTS_FILE } @@ -165,10 +177,10 @@ function main { TESTS_FILE="${i#*=}" shift ;; - build) - build $TESTS_FILE - shift - ;; + # build) + # build $TESTS_FILE + # shift + # ;; cmake_x86) cmake_x86 shift diff --git a/paddle/fluid/lite/utils/CMakeLists.txt b/paddle/fluid/lite/utils/CMakeLists.txt index bd161555f08..08eeaa54f8e 100644 --- a/paddle/fluid/lite/utils/CMakeLists.txt +++ b/paddle/fluid/lite/utils/CMakeLists.txt @@ -8,4 +8,4 @@ set(utils_DEPS glog) lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite) cc_library(any_lite SRCS any.cc) -cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite) +cc_library(utils_lite SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any_lite) diff --git a/paddle/fluid/lite/utils/string.cc b/paddle/fluid/lite/utils/string.cc new file mode 100644 index 00000000000..c608c31fb9f --- /dev/null +++ b/paddle/fluid/lite/utils/string.cc @@ -0,0 +1,19 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/utils/string.h" + +namespace paddle { +namespace lite {} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/utils/string.h b/paddle/fluid/lite/utils/string.h new file mode 100644 index 00000000000..3e06f93a252 --- /dev/null +++ b/paddle/fluid/lite/utils/string.h @@ -0,0 +1,77 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include // For va_start, etc. +#include +#include +#include // For std::unique_ptr +#include +#include +#include + +namespace paddle { +namespace lite { + +static std::string string_format(const std::string fmt_str, ...) { + /* Reserve two times as much as the length of the fmt_str */ + int final_n, n = (static_cast(fmt_str.size())) * 2; + std::unique_ptr formatted; + va_list ap; + while (1) { + formatted.reset( + new char[n]); /* Wrap the plain char array into the unique_ptr */ + std::strcpy(&formatted[0], fmt_str.c_str()); // NOLINT + va_start(ap, fmt_str); + final_n = vsnprintf(&formatted[0], n, fmt_str.c_str(), ap); + va_end(ap); + if (final_n < 0 || final_n >= n) + n += abs(final_n - n + 1); + else + break; + } + return std::string(formatted.get()); +} + +template +static std::string to_string_with_precision(const T& v, const int n = 6) { + std::stringstream ss; + ss.precision(n); + ss << std::fixed << v; + return ss.str(); +} + +static std::string Join(const std::vector& vec, + const std::string& delim) { + if (vec.empty()) return ""; + + std::stringstream ss; + for (size_t i = 0; i < vec.size() - 1; i++) ss << vec[i] << delim; + if (!vec.empty()) { + ss << vec.back(); + } + return ss.str(); +} + +static std::string Repr(const std::string& x) { return "\"" + x + "\""; } + +static std::string Repr(const std::vector& v) { + std::vector tmp; + std::transform(v.begin(), v.end(), std::back_inserter(tmp), + [](const std::string& x) { return Repr(x); }); + return "{" + Join(tmp, ",") + "}"; +} + +} // namespace lite +} // namespace paddle -- GitLab