diff --git a/CMakeLists.txt b/CMakeLists.txt index 03275b1a8d9943f66246463ea80081dc6bc6b0db..3643379acb32320c710e786c18c37424313e726e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,7 @@ lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF IF LITE_WITH_PROFILE) lite_option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF) lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF) +lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF) # publish options lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) @@ -104,6 +105,9 @@ if (LITE_ON_TINY_PUBLISH) endif() include_directories("${PADDLE_SOURCE_DIR}") +# the generated header files. +set(LITE_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}") +include_directories("${LITE_GENERATED_INCLUDE_DIR}") # for mobile if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index b919c147c7064f39e964b0d30e522303168c291b..67830fe2e0ec3c35064acb4c00ec152989ddb655 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -34,33 +34,6 @@ elseif(SSE3_FOUND) set(SIMD_FLAG ${SSE3_FLAG}) endif() -if(WIN32) - # windows header option for all targets. - add_definitions(-D_XKEYCHECK_H) - # Use symbols instead of absolute path, reduce the cmake link command length. - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) - SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") - SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") - - # Specify the program to use when building static libraries - SET(CMAKE_C_CREATE_STATIC_LIBRARY " lib ") - SET(CMAKE_CXX_CREATE_STATIC_LIBRARY " lib ") - - # set defination for the dll export - if (NOT MSVC) - message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") - endif(NOT MSVC) -endif(WIN32) - -if(WITH_PSLIB) - add_definitions(-DPADDLE_WITH_PSLIB) -endif() - if(LITE_WITH_CUDA) add_definitions(-DLITE_WITH_CUDA) add_definitions(-DEIGEN_USE_GPU) @@ -180,3 +153,8 @@ endif() if (LITE_ON_TINY_PUBLISH) add_definitions("-DLITE_ON_TINY_PUBLISH") endif() + +if (LITE_ON_MODEL_OPTIMIZE_TOOL) + add_definitions("-DLITE_ON_MODEL_OPTIMIZE_TOOL") +endif(LITE_ON_MODEL_OPTIMIZE_TOOL) + diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 84be88226f4144c30840fe5a37d35d54b357630c..2a88cf0321fa42d358fb3fc9d3555e5cabb8c4a6 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -185,6 +185,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) SET(SOURCE_DIR "${CMAKE_SOURCE_DIR}/third-party/protobuf-host") IF(BUILD_FOR_HOST) + # set for server compile. + if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + set(HOST_C_COMPILER "${CMAKE_C_COMPILER}") + set(HOST_CXX_COMPILER "${CMAKE_CXX_COMPILER}") + endif() + SET(OPTIONAL_ARGS "-DCMAKE_C_COMPILER=${HOST_C_COMPILER}" "-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}" @@ -276,7 +282,11 @@ IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) ENDIF() IF(NOT PROTOBUF_FOUND) - build_protobuf(extern_protobuf FALSE) + if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + build_protobuf(extern_protobuf FALSE) + else() + build_protobuf(extern_protobuf TRUE) + endif() SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} CACHE PATH "protobuf include directory." FORCE) diff --git a/cmake/lite.cmake b/cmake/lite.cmake index 2c839d36e27429672b1098bae4d5cbed16731115..707982a3e7030aabe1733e015792d542eaa9f152 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -240,6 +240,21 @@ function(add_kernel TARGET device level) return() endif() + if (LITE_ON_MODEL_OPTIMIZE_TOOL) + # the source list will collect for model_optimize_tool to fake kernel generation. + foreach(src ${args_SRCS}) + file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() + return() + endif() + + # when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated, + # no need to continue the compilation of the true kernel source. + if (LITE_ON_MODEL_OPTIMIZE_TOOL) + return() + endif(LITE_ON_MODEL_OPTIMIZE_TOOL) + + if ("${device}" STREQUAL "Host") set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "") endif() @@ -274,6 +289,7 @@ function(add_kernel TARGET device level) set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "") endif() + # the source list will collect for paddle_use_kernel.h code generation. foreach(src ${args_SRCS}) file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") endforeach() diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index cc958f1b59d439e57e1b0ec093ffad9345687476..937781293a77732fa6c115327b1a4c824c1f0930 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -69,12 +69,12 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include" COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" - COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin" + #COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin" COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin" COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin" ) if(NOT IOS) - add_dependencies(publish_inference_cxx_lib model_optimize_tool) + #add_dependencies(publish_inference_cxx_lib model_optimize_tool) add_dependencies(publish_inference_cxx_lib paddle_code_generator) add_dependencies(publish_inference_cxx_lib bundle_full_api) add_dependencies(publish_inference_cxx_lib bundle_light_api) diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index dc31164c0eed754c6599abd25a46a1b8c83eaea6..7767458b3789eeb6c5775ae0f86da121aee10820 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -195,6 +195,14 @@ endif() if (LITE_ON_TINY_PUBLISH) return() endif() + +if (LITE_ON_MODEL_OPTIMIZE_TOOL) + message(STATUS "Compiling model_optimize_tool") + lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc + DEPS gflags kernel op optimizer mir_passes utils) + add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc) +endif(LITE_ON_MODEL_OPTIMIZE_TOOL) + lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light ${ops} ARM_DEPS ${arm_kernels} @@ -209,14 +217,14 @@ endif() # Some bins if(NOT IOS) - lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags + lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags utils ${ops} ARM_DEPS ${arm_kernels} NPU_DEPS ${npu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} X86_DEPS ${x86_kernels}) - lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags + lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils ${ops} ARM_DEPS ${arm_kernels} NPU_DEPS ${npu_kernels} @@ -229,7 +237,3 @@ endif() #X86_DEPS operator #DEPS light_api model_parser target_wrapper_host mir_passes #ARM_DEPS ${arm_kernels}) NPU_DEPS ${npu_kernels}) - -lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc - DEPS paddle_api_full gflags - CL_DEPS ${opencl_kernels}) diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc index 7124e38ac20d597f809ea9eaf3c853db53a83aea..37c09b344698f1cd3382fa47e8f66359a05615c0 100644 --- a/lite/api/model_optimize_tool.cc +++ b/lite/api/model_optimize_tool.cc @@ -16,10 +16,11 @@ #ifdef PADDLE_WITH_TESTING #include #endif +#include "all_kernel_faked.cc" // NOLINT #include "lite/api/paddle_api.h" -#include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" +#include "lite/core/op_registry.h" #include "lite/utils/cp_logging.h" #include "lite/utils/string.h" @@ -33,6 +34,7 @@ DEFINE_string( optimize_out_type, "protobuf", "store type of the output optimized model. protobuf/naive_buffer"); +DEFINE_bool(display_kernels, false, "Display kernel information"); DEFINE_string(optimize_out, "", "path of the output optimized model"); DEFINE_string(valid_targets, "arm", @@ -43,12 +45,22 @@ DEFINE_bool(prefer_int8_kernel, false, "Prefer to run model with int8 kernels"); namespace paddle { namespace lite_api { +//! Display the kernel information. +void DisplayKernels() { + LOG(INFO) << ::paddle::lite::KernelRegistry::Global().DebugString(); +} + void Main() { if (!FLAGS_model_file.empty() && !FLAGS_param_file.empty()) { LOG(WARNING) << "Load combined-param model. Option model_dir will be ignored"; } + if (FLAGS_display_kernels) { + DisplayKernels(); + exit(0); + } + lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); config.set_model_file(FLAGS_model_file); @@ -75,6 +87,7 @@ void Main() { CHECK(!valid_places.empty()) << "At least one target should be set, should set the " "command argument 'valid_targets'"; + if (FLAGS_prefer_int8_kernel) { LOG(WARNING) << "Int8 mode is only support by ARM target"; valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)}); diff --git a/lite/api/paddle_lite_factory_helper.h b/lite/api/paddle_lite_factory_helper.h index 544cd0e313034ef4a8c378298f4e86c9597d6a98..e99127e233bc4adf159a6a567dfb15f6fd784a27 100644 --- a/lite/api/paddle_lite_factory_helper.h +++ b/lite/api/paddle_lite_factory_helper.h @@ -25,7 +25,7 @@ #define USE_LITE_KERNEL(op_type__, target__, precision__, layout__, alias__) \ extern int touch_##op_type__##target__##precision__##layout__##alias__(); \ - int op_type__##target__##precision__##layout__##alias__ \ + int op_type__##target__##precision__##layout__##alias__##__use_lite_kernel \ __attribute__((unused)) = \ touch_##op_type__##target__##precision__##layout__##alias__(); diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index 19d973fc1e3c19f94c32e4c8f5390b8a4916f1c0..ff80accbb73d03c0f538aeaa86d88ed334fe48ce 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -53,8 +53,16 @@ add_custom_command( ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h ) +# generate fake kernels for memory_optimize_tool +add_custom_command( + COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py + ${kernels_src_list} + ${CMAKE_BINARY_DIR}/all_kernel_faked.cc + OUTPUT ${CMAKE_BINARY_DIR}/all_kernel_faked.cc + ) add_custom_target(op_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h) add_custom_target(kernel_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h) +add_custom_target(all_kernel_faked_cc DEPENDS ${CMAKE_BINARY_DIR}/all_kernel_faked.cc) #----------------------------------------------- NOT CHANGE ----------------------------------------------- lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor diff --git a/lite/core/context.h b/lite/core/context.h index 4109c3333410604f03eaf3818adf183ff407a26f..bac0e3a627199c54dd744120bb664dcf76b2ce6d 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -356,7 +356,10 @@ class ContextScheduler { break; #endif default: +#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL LOG(FATAL) << "unsupported target " << TargetToStr(target); +#endif + break; } return ctx; } diff --git a/lite/core/op_registry.h b/lite/core/op_registry.h index 60d82a89cac6808b7d4d2ee5bc1e5d06a3231b02..5b48c251c80db32305079bcfa933edd6f512f083 100644 --- a/lite/core/op_registry.h +++ b/lite/core/op_registry.h @@ -15,9 +15,11 @@ #pragma once #include +#include #include #include #include +#include #include #include #include @@ -26,6 +28,7 @@ #include "lite/core/op_lite.h" #include "lite/core/target_wrapper.h" #include "lite/utils/all.h" +#include "lite/utils/macros.h" using LiteType = paddle::lite::Type; @@ -159,6 +162,10 @@ class KernelRegistry final { auto *reg = varient.template get(); CHECK(reg) << "Can not be empty of " << name; reg->Register(name, std::move(creator)); +#ifdef LITE_ON_MODEL_OPTIMIZE_TOOL + kernel_info_map_[name].push_back( + std::make_tuple(Target, Precision, Layout)); +#endif // LITE_ON_MODEL_OPTIMIZE_TOOL } template :\n"; - constexpr TargetType tgt = TARGET(kHost); - constexpr PrecisionType dt = PRECISION(kFloat); - constexpr DataLayoutType lt = DATALAYOUT(kNCHW); - constexpr DataLayoutType kany = DATALAYOUT(kAny); - using kernel_registor_t = KernelRegistryForTarget; - auto *reg = registries_[GetKernelOffset()] - .template get(); - ss << reg->DebugString() << "\n"; + ss << "\n"; + ss << "Count of kernel kinds: "; + int count = 0; + for (auto &item : kernel_info_map_) { + for (auto &kernel : item.second) ++count; + } + ss << count << "\n"; + + ss << "Count of registered kernels: " << kernel_info_map_.size() << "\n"; + for (auto &item : kernel_info_map_) { + ss << "op: " << item.first << "\n"; + for (auto &kernel : item.second) { + ss << " - (" << TargetToStr(std::get<0>(kernel)) << ","; + ss << PrecisionToStr(std::get<1>(kernel)) << ","; + ss << DataLayoutToStr(std::get<2>(kernel)); + ss << ")"; + ss << "\n"; + } + } + return ss.str(); - return ""; +#endif // LITE_ON_MODEL_OPTIMIZE_TOOL } private: mutable std::vector registries_; +#ifndef LITE_ON_TINY_PUBLISH + mutable std::map< + std::string, + std::vector>> + kernel_info_map_; +#endif }; template ::Run() { } // namespace kernels } // namespace lite } // namespace paddle -REGISTER_LITE_KERNEL( - logical_xor, - kARM, - kFloat, - kNCHW, - paddle::lite::kernels::arm::BinaryLogicalCompute< - paddle::lite::kernels::arm::_LogicalXorFunctor>, - // paddle::lite::kernels::arm::BinaryLogicalCompute>, - def) + +REGISTER_LITE_KERNEL(logical_xor, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::arm::BinaryLogicalCompute< + paddle::lite::kernels::arm::_LogicalXorFunctor>, + def) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) .Finalize(); -REGISTER_LITE_KERNEL( - logical_and, - kARM, - kFloat, - kNCHW, - // paddle::lite::kernels::arm::BinaryLogicalCompute>, - paddle::lite::kernels::arm::BinaryLogicalCompute< - paddle::lite::kernels::arm::_LogicalAndFunctor>, - def) +REGISTER_LITE_KERNEL(logical_and, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::arm::BinaryLogicalCompute< + paddle::lite::kernels::arm::_LogicalAndFunctor>, + def) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))}) diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index ff950be06048a99a6f122655b52edd8fcf064400..428cc213ce63b8d24193a44f23d61fea78f63d6a 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -5,5 +5,5 @@ add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kerne add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op) add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps}) -lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any) +#lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any) #lite_cc_test(test_multiclass_nms_compute_host SRCS multiclass_nms_compute_test.cc DEPS multiclass_nms_compute_host any) diff --git a/lite/kernels/x86/CMakeLists.txt b/lite/kernels/x86/CMakeLists.txt index 7941cde03e78e01fa97b2565b4c6b4d642badd00..48165ead936ad1a33a80f0b2b55ec55ccdf7eced 100644 --- a/lite/kernels/x86/CMakeLists.txt +++ b/lite/kernels/x86/CMakeLists.txt @@ -1,7 +1,3 @@ -if(NOT LITE_WITH_X86) - return() -endif() - # lite_cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) # lite_cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps}) @@ -38,6 +34,10 @@ add_kernel(shape_compute_x86 X86 basic SRCS shape_compute.cc DEPS ${lite_kernel_ add_kernel(sequence_pool_compute_x86 X86 basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} sequence_pooling) add_kernel(softmax_compute_x86 X86 basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax) +if(NOT LITE_WITH_X86) + return() +endif() + lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86) lite_cc_test(test_slice_compute_x86 SRCS slice_compute_test.cc DEPS slice_compute_x86) lite_cc_test(test_squeeze_compute_x86 SRCS squeeze_compute_test.cc DEPS squeeze_compute_x86) diff --git a/lite/kernels/x86/mul_compute.cc b/lite/kernels/x86/mul_compute.cc index 3e5fccfc3a4b76412cdf32bb9eada75686185dbe..64558f66772381ad402a3eb203bb6efd9fceff60 100644 --- a/lite/kernels/x86/mul_compute.cc +++ b/lite/kernels/x86/mul_compute.cc @@ -25,20 +25,20 @@ REGISTER_LITE_KERNEL(mul, .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); -#ifdef LITE_WITH_TRAIN -REGISTER_LITE_KERNEL(mul_grad, - kX86, - kFloat, - kNCHW, - paddle::lite::kernels::x86::MulGradCompute, - def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) - .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))}) - .BindInput(paddle::framework::GradVarName("Out"), - {LiteType::GetTensorTy(TARGET(kX86))}) - .BindOutput(paddle::framework::GradVarName("X"), - {LiteType::GetTensorTy(TARGET(kX86))}) - .BindOutput(paddle::framework::GradVarName("Y"), - {LiteType::GetTensorTy(TARGET(kX86))}) - .Finalize(); -#endif +// #ifdef LITE_WITH_TRAIN +// REGISTER_LITE_KERNEL(mul_grad, +// kX86, +// kFloat, +// kNCHW, +// paddle::lite::kernels::x86::MulGradCompute, +// def) +// .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) +// .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))}) +// .BindInput(paddle::framework::GradVarName("Out"), +// {LiteType::GetTensorTy(TARGET(kX86))}) +// .BindOutput(paddle::framework::GradVarName("X"), +// {LiteType::GetTensorTy(TARGET(kX86))}) +// .BindOutput(paddle::framework::GradVarName("Y"), +// {LiteType::GetTensorTy(TARGET(kX86))}) +// .Finalize(); +// #endif diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index c04bbb7c62919e39e7f980843e86692c7a5068b6..7e9c5068d5e44bf9c9e6819f5dcf91ba9b9592af 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -224,6 +224,7 @@ function build_test_server { build test_server + test_model_optimize_tool_compile } function build_test_train { @@ -393,20 +394,27 @@ function test_arm_model { adb -s emulator-${port} shell "${adb_work_dir}/${test_name} --model_dir=$adb_model_path" } -function _test_model_optimize_tool { - local port=$1 - local remote_model_path=$ADB_WORK_DIR/lite_naive_model - local remote_test=$ADB_WORK_DIR/model_optimize_tool - local adb="adb -s emulator-${port}" - +# function _test_model_optimize_tool { +# local port=$1 +# local remote_model_path=$ADB_WORK_DIR/lite_naive_model +# local remote_test=$ADB_WORK_DIR/model_optimize_tool +# local adb="adb -s emulator-${port}" + +# make model_optimize_tool -j$NUM_CORES_FOR_COMPILE +# local test_path=$(find . -name model_optimize_tool | head -n1) +# local model_path=$(find . -name lite_naive_model | head -n1) +# $adb push ${test_path} ${ADB_WORK_DIR} +# $adb shell mkdir -p $remote_model_path +# $adb push $model_path/* $remote_model_path +# $adb shell $remote_test --model_dir $remote_model_path --optimize_out ${remote_model_path}.opt \ +# --valid_targets "arm" +# } + +function test_model_optimize_tool_compile { + cd $workspace + cd build + cmake .. -DWITH_LITE=ON -DLITE_ON_MODEL_OPTIMIZE_TOOL=ON -DWITH_TESTING=OFF -DLITE_BUILD_EXTRA=ON make model_optimize_tool -j$NUM_CORES_FOR_COMPILE - local test_path=$(find . -name model_optimize_tool | head -n1) - local model_path=$(find . -name lite_naive_model | head -n1) - $adb push ${test_path} ${ADB_WORK_DIR} - $adb shell mkdir -p $remote_model_path - $adb push $model_path/* $remote_model_path - $adb shell $remote_test --model_dir $remote_model_path --optimize_out ${remote_model_path}.opt \ - --valid_targets "arm" } function _test_paddle_code_generator { @@ -558,8 +566,8 @@ function test_arm { # test finally test_arm_api $port - _test_model_optimize_tool $port - _test_paddle_code_generator $port + # _test_model_optimize_tool $port + # _test_paddle_code_generator $port } function prepare_emulator { diff --git a/lite/tools/cmake_tools/ast.py b/lite/tools/cmake_tools/ast.py new file mode 100644 index 0000000000000000000000000000000000000000..7df41cbc8faf610b981d5201629d0bd1c8700ba4 --- /dev/null +++ b/lite/tools/cmake_tools/ast.py @@ -0,0 +1,321 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +class SyntaxParser(object): + def __init__(self, str): + self.str = str + self.cur_pos = 0 + self.N = len(self.str) + self.token = '' + + def eat_char(self): + self.cur_pos += 1 + + def eat_str(self): + ''' + "xx" + ''' + self.token = '' + assert self.cur == '"'; + self.cur_pos += 1; + + assert self.cur_pos < self.N + while self.cur != '"': + self.token += self.cur + self.cur_pos += 1 + assert self.cur_pos < self.N + assert self.cur == '"' + self.cur_pos += 1 + #logging.warning('get: %s' % self.token) + + def eat_word(self): + self.token = '' + str = '' + while self.cur.isalnum() or self.cur in ('_', ':',): + self.token += self.cur + self.forward() + + #logging.warning('get: %s' % self.token) + + def eat_left_parentheses(self): + ''' + ( + ''' + self.assert_is('(') + self.token = '(' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_right_parentheses(self): + ''' + ) + ''' + self.assert_is(')') + self.token = ')' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_left_brace(self): + ''' + { + ''' + self.assert_is('{') + self.token = '{' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_right_brace(self): + ''' + } + ''' + self.assert_is('}') + self.token = '}' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_comma(self): + ''' + , + ''' + self.assert_is(',') + self.token = ',' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_spaces(self): + ''' + eat space like string. + ''' + while self.cur_pos < len(self.str): + if self.cur in (' ', '\t', '\n'): + self.forward() + else: + break + + def eat_point(self): + ''' + . + ''' + self.assert_is('.') + self.token = '.' + self.forward() + #logging.warning('get: %s' % self.token) + + def eat_any_but_brace(self): + ''' + anything but {} + ''' + start = self.cur_pos + while self.cur not in ('{', '}'): + self.cur_pos += 1 + + self.token = self.str[start:self.cur_pos] + #logging.warning('get: %s' % self.token) + + def eat_semicolon(self): + ''' + ; + ''' + self.assert_is(';') + self.token = ';' + self.forward() + #logging.warning('get: %s' % self.token) + + def assert_is(self, w): + assert self.cur == w, "token should be %s, but get %s" % (w, self.cur) + + @property + def cur(self): + assert self.cur_pos < self.N + return self.str[self.cur_pos] + #logging.warning('get: %s' % self.token) + + def forward(self): + self.cur_pos += 1 + + +class IO: + def __init__(self): + self.name = '' + self.type = '' + + def __repr__(self): + return "- %s: %s" % (self.name, self.type) + + +class KernelRegistry: + def __init__(self): + self.op_type = '' + self.target = '' + self.precision = '' + self.data_layout = '' + self.class_ = '' + self.alias = '' + self.inputs = [] + self.outputs = [] + + def __repr__(self): + str = "Kernel({op_type}, {target}, {precision}, {data_layout}, {alias}):".format( + op_type = self.op_type, + target = self.target, + precision = self.precision, + data_layout = self.data_layout, + alias = self.alias, + ) + + str += '\n' + '\n'.join(repr(io) for io in self.inputs) + str += '\n' + '\n'.join(repr(io) for io in self.outputs) + str += '\n' + return str + + +class RegisterLiteKernelParser(SyntaxParser): + + KEYWORD = 'REGISTER_LITE_KERNEL' + + def __init__(self, str): + super(RegisterLiteKernelParser, self).__init__(str) + + self.kernels = [] + + def parse(self): + find_registry_command = False + + while self.cur_pos < len(self.str): + start = self.str.find(self.KEYWORD, self.cur_pos) + if start != -1: + #print 'str ', start, self.str[start-2: start] + if start != 0 and '/' in self.str[start-2: start]: + ''' + skip commented code + ''' + self.cur_pos = start + 1 + continue + self.cur_pos = start + k = KernelRegistry() + self.kernels.append(self.parse_register(k)) + else: + break + + def eat_class(self): + start = self.cur_pos + self.eat_word() + stack = '' + if self.cur == '<': + stack = stack + '<' + self.forward() + while stack: + if self.cur == '<': + stack = stack + '<' + elif self.cur == '>': + stack = stack[1:] + else: + pass + self.forward() + self.token = self.str[start:self.cur_pos] + + + def parse_register(self, k): + + self.eat_word() + assert self.token == self.KEYWORD + self.eat_spaces() + + self.eat_left_parentheses() + self.eat_spaces() + + self.eat_word() + k.op_type = self.token + self.eat_comma() + self.eat_spaces() + + + self.eat_word() + k.target = self.token + self.eat_comma() + self.eat_spaces() + + self.eat_word() + k.precision = self.token + self.eat_comma() + self.eat_spaces() + + self.eat_word() + k.data_layout = self.token + self.eat_comma() + self.eat_spaces() + + self.eat_class() + k.class_ = self.token + self.eat_comma() + self.eat_spaces() + + self.eat_word() + k.alias = self.token + self.eat_spaces() + + self.eat_right_parentheses() + self.eat_spaces() + + + def eat_io(is_input, io): + self.eat_left_parentheses() + self.eat_str() + io.name = self.token + self.eat_comma() + self.eat_spaces() + + self.eat_left_brace() + self.eat_any_but_brace() + io.type = self.token + self.eat_right_brace() + self.eat_spaces() + self.eat_right_parentheses() + self.eat_spaces() + + + # eat input and output + while self.cur_pos < len(self.str): + self.eat_point() + self.eat_spaces() + self.eat_word() + assert self.token in ('BindInput', 'BindOutput', 'Finalize') + io = IO() + + if self.token == 'BindInput': + eat_io(True, io) + k.inputs.append(io) + elif self.token == 'BindOutput': + eat_io(False, io) + k.outputs.append(io) + else: + self.eat_left_parentheses() + self.eat_right_parentheses() + self.eat_semicolon() + self.eat_spaces() + return k + break + + +if __name__ == '__main__': + with open('/home/chunwei/project2/Paddle-Lite/lite/kernels/arm/activation_compute.cc') as f: + c = f.read() + kernel_parser = RegisterLiteKernelParser(c) + + kernel_parser.parse() + + for k in kernel_parser.kernels: + print k diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py new file mode 100644 index 0000000000000000000000000000000000000000..7031c8d18b91361dad664e297a47b9bb54f50bb9 --- /dev/null +++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py @@ -0,0 +1,104 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import logging +from ast import RegisterLiteKernelParser +from utils import * + +ops_list_path = sys.argv[1] +dest_path = sys.argv[2] + +out_lines = [ + '#pragma once', + '#include "lite/core/op_registry.h"', + '#include "lite/core/kernel.h"', + '#include "lite/core/type_system.h"', + '', +] + +fake_kernel = ''' + +namespace paddle { +namespace lite { + +class %s : public KernelLite { + public: + void PrepareForRun() override {} + + void Run() override {} + + virtual ~%s() = default; +}; + +} // namespace lite +} // namespace paddle +''' + + + +with open(ops_list_path) as f: + paths = set([path for path in f]) + for path in paths: + print 'path', path + with open(path.strip()) as g: + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + + for k in kernel_parser.kernels: + kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format( + op_type = k.op_type, + target = k.target, + precision = k.precision, + data_layout = k.data_layout, + alias = k.alias, + ) + + kernel_define = fake_kernel % ( + kernel_name, + k.target, + k.precision, + k.data_layout, + kernel_name, + ) + + out_lines.append(kernel_define) + out_lines.append("") + + + key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + '::paddle::lite::' + kernel_name, + k.alias, + ) + out_lines.append(key) + + for input in k.inputs: + io = ' .BindInput("%s", {%s})' % (input.name, input.type) + out_lines.append(io) + for output in k.outputs: + io = ' .BindOutput("%s", {%s})' % (output.name, output.type) + out_lines.append(io) + out_lines.append(" .Finalize();") + out_lines.append("") + out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias)) + + +with open(dest_path, 'w') as f: + logging.info("write kernel list to %s" % dest_path) + f.write('\n'.join(out_lines)) diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py index 623d58190acda4c4e2fcd304dd92272bacd73945..b9bfbb2692403c44240c58fca55a2491928f95ad 100644 --- a/lite/tools/cmake_tools/parse_kernel_registry.py +++ b/lite/tools/cmake_tools/parse_kernel_registry.py @@ -14,6 +14,7 @@ import sys import logging +from ast import RegisterLiteKernelParser ops_list_path = sys.argv[1] dest_path = sys.argv[2] @@ -24,56 +25,25 @@ out_lines = [ '', ] -left_pattern = 'REGISTER_LITE_KERNEL(' -right_pattern = ')' -def find_right_pattern(context, start): - if start >= len(context): return -1 - fake_left_num = 0 - while start < len(context): - if context[start] == right_pattern: - if fake_left_num == 0: - return start - else: - fake_left_num -= 1 - elif context[start] == '(': - fake_left_num += 1 - start += 1 - return -1 - -lines = set() with open(ops_list_path) as f: - for line in f: - lines.add(line.strip()) - -for line in lines: - path = line.strip() - - status = '' - with open(path) as g: - context = ''.join([item.strip() for item in g]) - index = 0 - cxt_len = len(context) - while index < cxt_len and index >= 0: - left_index = context.find(left_pattern, index) - if left_index < 0: break - right_index = find_right_pattern(context, left_index+len(left_pattern)) - if right_index < 0: - raise ValueError("Left Pattern and Right Pattern does not match") - tmp = context[left_index+len(left_pattern) : right_index] - index = right_index + 1 - if tmp.startswith('/'): continue - fields = [item.strip() for item in tmp.split(',')] - if len(fields) < 6: - raise ValueError("Invalid REGISTER_LITE_KERNEL format") - - op, target, precision, layout = fields[:4] - alias = fields[-1] - key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % ( - op, target, precision, layout, alias) - if "_grad" in key: continue - out_lines.append(key) - + paths = set([path for path in f]) + for path in paths: + with open(path.strip()) as g: + print 'path: ', path + c = g.read() + kernel_parser = RegisterLiteKernelParser(c) + kernel_parser.parse() + + for k in kernel_parser.kernels: + key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % ( + k.op_type, + k.target, + k.precision, + k.data_layout, + k.alias, + ) + out_lines.append(key) with open(dest_path, 'w') as f: logging.info("write kernel list to %s" % dest_path) diff --git a/lite/tools/cmake_tools/utils.py b/lite/tools/cmake_tools/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..832ead301b6bb8d2d260e3867031582fd9b5330d --- /dev/null +++ b/lite/tools/cmake_tools/utils.py @@ -0,0 +1,18 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def gen_use_kernel_statement(op_type, target, precision, layout, alias): + return 'USE_LITE_KERNEL(%s, %s, %s, %s, %s);' %( + op_type, target, precision, layout, alias + ) diff --git a/lite/utils/CMakeLists.txt b/lite/utils/CMakeLists.txt index 7ab0c61b8f022e0f2a3c91a01dbe0d5730b51c62..6337085d829b115dc6d2553473ddcef8ac5115f8 100644 --- a/lite/utils/CMakeLists.txt +++ b/lite/utils/CMakeLists.txt @@ -3,23 +3,23 @@ # else() # endif() -if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) +if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR LITE_ON_MODEL_OPTIMIZE_TOOL) lite_cc_library(logging SRCS logging.cc) set(utils_DEPS logging) lite_cc_test(test_logging SRCS logging_test.cc DEPS ${utils_DEPS}) else() - set(utils_DEPS glog) -endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + set(utils_DEPS glog) +endif() lite_cc_test(test_varient SRCS varient_test.cc DEPS utils) lite_cc_library(any SRCS any.cc) -if(LITE_ON_TINY_PUBLISH) -lite_cc_library(stream SRCS replace_stl/stream.cc) +if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL) + lite_cc_library(stream SRCS replace_stl/stream.cc) endif() #lite_cc_library(utils SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any) -if(LITE_ON_TINY_PUBLISH) +if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL) lite_cc_library(utils SRCS string.cc DEPS ${utils_DEPS} any stream) else() lite_cc_library(utils SRCS string.cc DEPS ${utils_DEPS} any) diff --git a/lite/utils/cp_logging.h b/lite/utils/cp_logging.h index c756832a873ef4051fdf68ff903be6316315bd14..cc10bece471af7a99f3b271990dd13731c08b9f8 100644 --- a/lite/utils/cp_logging.h +++ b/lite/utils/cp_logging.h @@ -13,7 +13,8 @@ // limitations under the License. #pragma once -#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +#if defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) || \ + defined(LITE_ON_MODEL_OPTIMIZE_TOOL) #include "lite/utils/logging.h" #else // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #include diff --git a/lite/utils/logging.cc b/lite/utils/logging.cc index 9a4cad34f74a6346b293cad6948f237bc1d09c75..c83dd79eb8d902bb1fc181de86e950c715bcbd4c 100644 --- a/lite/utils/logging.cc +++ b/lite/utils/logging.cc @@ -19,7 +19,8 @@ #include "lite/utils/logging.h" -#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +#if defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) || \ + defined(LITE_ON_MODEL_OPTIMIZE_TOOL) #ifndef LITE_SHUTDOWN_LOG namespace paddle { @@ -48,7 +49,7 @@ void gen_log(STL::ostream& log_stream_, << tv.tv_usec / 1000 << " "; if (len > kMaxLen) { - log_stream_ << "..." << file + len - kMaxLen << " " << func << ":" << lineno + log_stream_ << "..." << file + len - kMaxLen << ":" << lineno << " " << func << "] "; } else { log_stream_ << file << " " << func << ":" << lineno << "] "; diff --git a/lite/utils/logging.h b/lite/utils/logging.h index 8dbb7a9752fb5905168b9c6eb2280f6f025a7309..85c716d52ff9ce31be8d3236573e130f33fcb8ca 100644 --- a/lite/utils/logging.h +++ b/lite/utils/logging.h @@ -81,7 +81,7 @@ void gen_log(STL::ostream& log_stream_, const char* func, int lineno, const char* level, - const int kMaxLen = 20); + const int kMaxLen = 40); // LogMessage class LogMessage {