diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index ba2d784b942c04c169a19d4747352d9048fd6ff2..915a469a58765f102ff01c28ed9856d185311168 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -50,6 +50,7 @@ class ExecutorLite { optimizer_.KernelPickPreferPlace(prefer_place); core::KernelPickFactor factor; factor.ConsiderTarget(); + factor.ConsiderPrecision(); optimizer_.Run(std::move(program), valid_places, factor); program_ = optimizer_.GenRuntimeProgram(); } diff --git a/paddle/fluid/lite/arm/math/CMakeLists.txt b/paddle/fluid/lite/arm/math/CMakeLists.txt index dd439bbf0f6e23b721c1f61fb5e39d821b79fb26..32f367f703e6cdf1484a2bf2e53edcf38f879357 100644 --- a/paddle/fluid/lite/arm/math/CMakeLists.txt +++ b/paddle/fluid/lite/arm/math/CMakeLists.txt @@ -35,6 +35,8 @@ cc_library(math_arm SRCS split.cc activation.cc dropout.cc + gemm_prepacked_int8.cc + gemv_arm_int8.cc DEPS ${lite_kernel_deps} eigen3 framework_proto_lite) # TODO(TJ): fix me do not deps proto diff --git a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h index b4778aab182abf368461984bbfb9ef827b6c0fb9..29ff767e772cdd63149c965107d1c448788dc9db 100644 --- a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h +++ b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h @@ -25,7 +25,7 @@ namespace fusion { /* The model trained by fluid quantization is a simulation of real int8. * The quantized Ops(conv2d, mul, depthwise conv2d etc) have fake_quantop - * in front and fake_dequantop behind. + * in front and fake_dequantop behind. * * When in int8 mode, the pattern like "fake_quant + quantized_op + * fake_dequant" diff --git a/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc b/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc index 9f0b2e1f3225d708f0e71c255bad2eec71628f76..322ddb29064de5eb8771f50508d20ba9ba7f053c 100644 --- a/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc +++ b/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc @@ -41,7 +41,7 @@ void FuseBase::DeleteInterNodes(SSAGraph *graph) { } } - LOG(INFO) << "keys: " << key2nodes_.size(); + VLOG(4) << "keys: " << key2nodes_.size(); std::unordered_set nodes2rm; for (auto &matched : key2nodes_) { for (const auto &key : keys) { diff --git a/paddle/fluid/lite/core/op_registry.h b/paddle/fluid/lite/core/op_registry.h index 1052419ecda8bcad8d919c0d8f8e2ab3f969440f..fc4cd25fa56eec295c522857a67e17315ed49ba8 100644 --- a/paddle/fluid/lite/core/op_registry.h +++ b/paddle/fluid/lite/core/op_registry.h @@ -80,6 +80,8 @@ class KernelRegistry final { KernelRegistryForTarget *, // KernelRegistryForTarget *, // + KernelRegistryForTarget * // >; diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h index ea65329b668c89405ca43f55121f2ca1790539c0..c42699ff10a6e9e926693c46b38f3cd6343a4dd0 100644 --- a/paddle/fluid/lite/core/optimizer.h +++ b/paddle/fluid/lite/core/optimizer.h @@ -58,7 +58,6 @@ class Optimizer { #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "lite_elementwise_add_activation_fuse_pass", // #endif - "lite_fc_fuse_pass", // "static_kernel_pick_pass", // "variable_place_inference_pass", // "argument_type_display_pass", // diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h index c4a870ab83f0c61fc4a5116f8c3dd379e8ead9db..66fbc652203dc4045aeae5eca87df856e76febbe 100644 --- a/paddle/fluid/lite/core/target_wrapper.h +++ b/paddle/fluid/lite/core/target_wrapper.h @@ -38,6 +38,7 @@ enum class PrecisionType : int { kUnk = 0, kFloat, kInt8, + kInt32, kAny, // any precision NUM, // number of fields. }; @@ -48,6 +49,19 @@ enum class DataLayoutType : int { NUM, // number of fields. }; +static size_t PrecisionTypeLength(PrecisionType type) { + switch (type) { + case PrecisionType::kFloat: + return 4; + case PrecisionType::kInt8: + return 1; + case PrecisionType::kInt32: + return 4; + default: + return 4; + } +} + // Some helper macro to get a specific TargetType. #define TARGET(item__) paddle::lite::TargetType::item__ // Some helper macro to get a specific PrecisionType. @@ -87,7 +101,7 @@ static const std::string& TargetRepr(TargetType target) { static const std::string& PrecisionRepr(PrecisionType precision) { static const std::string precision2string[] = {"kUnk", "kFloat", "kInt8", - "kAny"}; + "kInt32", "kAny"}; auto x = static_cast(precision); CHECK_LT(x, static_cast(PRECISION(NUM))); return precision2string[x]; diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc index 5e9ddb6271684120c8cab68e6e10bade3a3ab015..af8f8e1242a32f58727ad1658b7db2cefbc1b653 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc @@ -92,6 +92,9 @@ void ConvCompute::Run() { // } } +void ConvComputeInt8::PrepareForRun() {} +void ConvComputeInt8::Run() {} + } // namespace arm } // namespace kernels } // namespace lite @@ -112,3 +115,23 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))}) .Finalize(); + +REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, + paddle::lite::kernels::arm::ConvComputeInt8, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))}) + .BindInput("Filter", + {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .Finalize(); + +REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kInt8, kNCHW, + paddle::lite::kernels::arm::ConvComputeInt8, def) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))}) + .BindInput("Filter", + {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.h b/paddle/fluid/lite/kernels/arm/conv_compute.h index 21fabf8c3e8f7983a891265135c39b96aaf42e8d..e5d5721a3b30256bd14a165400723cc4563cd942 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.h +++ b/paddle/fluid/lite/kernels/arm/conv_compute.h @@ -41,6 +41,25 @@ class ConvCompute : public KernelLite { nullptr}; }; +class ConvComputeInt8 : public KernelLite { + public: + using param_t = operators::ConvParam; + + void PrepareForRun() override; + + void Run() override; + + ~ConvComputeInt8() { + if (impl_ != nullptr) { + delete impl_; + } + } + + private: + lite::arm::math::ImplBase* impl_{ + nullptr}; +}; + } // namespace arm } // namespace kernels } // namespace lite