未验证 提交 83d4b0e8 编写于 作者: Y Yan Chunwei 提交者: GitHub

make model_optimize_tool run on host (#1990)

上级 487d6089
......@@ -82,6 +82,7 @@ lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF IF LITE_WITH_PROFILE)
lite_option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF)
lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
# publish options
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
......@@ -104,6 +105,9 @@ if (LITE_ON_TINY_PUBLISH)
endif()
include_directories("${PADDLE_SOURCE_DIR}")
# the generated header files.
set(LITE_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}")
include_directories("${LITE_GENERATED_INCLUDE_DIR}")
# for mobile
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
......
......@@ -34,33 +34,6 @@ elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG})
endif()
if(WIN32)
# windows header option for all targets.
add_definitions(-D_XKEYCHECK_H)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
# Specify the program to use when building static libraries
SET(CMAKE_C_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
SET(CMAKE_CXX_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
# set defination for the dll export
if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC)
endif(WIN32)
if(WITH_PSLIB)
add_definitions(-DPADDLE_WITH_PSLIB)
endif()
if(LITE_WITH_CUDA)
add_definitions(-DLITE_WITH_CUDA)
add_definitions(-DEIGEN_USE_GPU)
......@@ -180,3 +153,8 @@ endif()
if (LITE_ON_TINY_PUBLISH)
add_definitions("-DLITE_ON_TINY_PUBLISH")
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
add_definitions("-DLITE_ON_MODEL_OPTIMIZE_TOOL")
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
......@@ -185,6 +185,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET(SOURCE_DIR "${CMAKE_SOURCE_DIR}/third-party/protobuf-host")
IF(BUILD_FOR_HOST)
# set for server compile.
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set(HOST_C_COMPILER "${CMAKE_C_COMPILER}")
set(HOST_CXX_COMPILER "${CMAKE_CXX_COMPILER}")
endif()
SET(OPTIONAL_ARGS
"-DCMAKE_C_COMPILER=${HOST_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}"
......@@ -276,7 +282,11 @@ IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
ENDIF()
IF(NOT PROTOBUF_FOUND)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
build_protobuf(extern_protobuf FALSE)
else()
build_protobuf(extern_protobuf TRUE)
endif()
SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR}
CACHE PATH "protobuf include directory." FORCE)
......
......@@ -240,6 +240,21 @@ function(add_kernel TARGET device level)
return()
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
# the source list will collect for model_optimize_tool to fake kernel generation.
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return()
endif()
# when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated,
# no need to continue the compilation of the true kernel source.
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
return()
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
if ("${device}" STREQUAL "Host")
set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
endif()
......@@ -274,6 +289,7 @@ function(add_kernel TARGET device level)
set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
endif()
# the source list will collect for paddle_use_kernel.h code generation.
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
......
......@@ -69,12 +69,12 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin"
#COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
)
if(NOT IOS)
add_dependencies(publish_inference_cxx_lib model_optimize_tool)
#add_dependencies(publish_inference_cxx_lib model_optimize_tool)
add_dependencies(publish_inference_cxx_lib paddle_code_generator)
add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_cxx_lib bundle_light_api)
......
......@@ -195,6 +195,14 @@ endif()
if (LITE_ON_TINY_PUBLISH)
return()
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
message(STATUS "Compiling model_optimize_tool")
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc
DEPS gflags kernel op optimizer mir_passes utils)
add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc)
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${ops}
ARM_DEPS ${arm_kernels}
......@@ -209,14 +217,14 @@ endif()
# Some bins
if(NOT IOS)
lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags
lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels})
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
......@@ -229,7 +237,3 @@ endif()
#X86_DEPS operator
#DEPS light_api model_parser target_wrapper_host mir_passes
#ARM_DEPS ${arm_kernels}) NPU_DEPS ${npu_kernels})
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc
DEPS paddle_api_full gflags
CL_DEPS ${opencl_kernels})
......@@ -16,10 +16,11 @@
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#endif
#include "all_kernel_faked.cc" // NOLINT
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/op_registry.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
......@@ -33,6 +34,7 @@ DEFINE_string(
optimize_out_type,
"protobuf",
"store type of the output optimized model. protobuf/naive_buffer");
DEFINE_bool(display_kernels, false, "Display kernel information");
DEFINE_string(optimize_out, "", "path of the output optimized model");
DEFINE_string(valid_targets,
"arm",
......@@ -43,12 +45,22 @@ DEFINE_bool(prefer_int8_kernel, false, "Prefer to run model with int8 kernels");
namespace paddle {
namespace lite_api {
//! Display the kernel information.
void DisplayKernels() {
LOG(INFO) << ::paddle::lite::KernelRegistry::Global().DebugString();
}
void Main() {
if (!FLAGS_model_file.empty() && !FLAGS_param_file.empty()) {
LOG(WARNING)
<< "Load combined-param model. Option model_dir will be ignored";
}
if (FLAGS_display_kernels) {
DisplayKernels();
exit(0);
}
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_model_file(FLAGS_model_file);
......@@ -75,6 +87,7 @@ void Main() {
CHECK(!valid_places.empty())
<< "At least one target should be set, should set the "
"command argument 'valid_targets'";
if (FLAGS_prefer_int8_kernel) {
LOG(WARNING) << "Int8 mode is only support by ARM target";
valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)});
......
......@@ -25,7 +25,7 @@
#define USE_LITE_KERNEL(op_type__, target__, precision__, layout__, alias__) \
extern int touch_##op_type__##target__##precision__##layout__##alias__(); \
int op_type__##target__##precision__##layout__##alias__ \
int op_type__##target__##precision__##layout__##alias__##__use_lite_kernel \
__attribute__((unused)) = \
touch_##op_type__##target__##precision__##layout__##alias__();
......
......@@ -53,8 +53,16 @@ add_custom_command(
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
)
# generate fake kernels for memory_optimize_tool
add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
${kernels_src_list}
${CMAKE_BINARY_DIR}/all_kernel_faked.cc
OUTPUT ${CMAKE_BINARY_DIR}/all_kernel_faked.cc
)
add_custom_target(op_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h)
add_custom_target(kernel_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS ${CMAKE_BINARY_DIR}/all_kernel_faked.cc)
#----------------------------------------------- NOT CHANGE -----------------------------------------------
lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor
......
......@@ -356,7 +356,10 @@ class ContextScheduler {
break;
#endif
default:
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
LOG(FATAL) << "unsupported target " << TargetToStr(target);
#endif
break;
}
return ctx;
}
......
......@@ -15,9 +15,11 @@
#pragma once
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>
......@@ -26,6 +28,7 @@
#include "lite/core/op_lite.h"
#include "lite/core/target_wrapper.h"
#include "lite/utils/all.h"
#include "lite/utils/macros.h"
using LiteType = paddle::lite::Type;
......@@ -159,6 +162,10 @@ class KernelRegistry final {
auto *reg = varient.template get<kernel_registor_t *>();
CHECK(reg) << "Can not be empty of " << name;
reg->Register(name, std::move(creator));
#ifdef LITE_ON_MODEL_OPTIMIZE_TOOL
kernel_info_map_[name].push_back(
std::make_tuple(Target, Precision, Layout));
#endif // LITE_ON_MODEL_OPTIMIZE_TOOL
}
template <TargetType Target,
......@@ -190,22 +197,42 @@ class KernelRegistry final {
}
std::string DebugString() const {
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
return "No more debug info";
#else // LITE_ON_MODEL_OPTIMIZE_TOOL
STL::stringstream ss;
ss << "KernelCreator<host, float>:\n";
constexpr TargetType tgt = TARGET(kHost);
constexpr PrecisionType dt = PRECISION(kFloat);
constexpr DataLayoutType lt = DATALAYOUT(kNCHW);
constexpr DataLayoutType kany = DATALAYOUT(kAny);
using kernel_registor_t = KernelRegistryForTarget<tgt, dt, lt>;
auto *reg = registries_[GetKernelOffset<tgt, dt, kany>()]
.template get<kernel_registor_t *>();
ss << reg->DebugString() << "\n";
ss << "\n";
ss << "Count of kernel kinds: ";
int count = 0;
for (auto &item : kernel_info_map_) {
for (auto &kernel : item.second) ++count;
}
ss << count << "\n";
ss << "Count of registered kernels: " << kernel_info_map_.size() << "\n";
for (auto &item : kernel_info_map_) {
ss << "op: " << item.first << "\n";
for (auto &kernel : item.second) {
ss << " - (" << TargetToStr(std::get<0>(kernel)) << ",";
ss << PrecisionToStr(std::get<1>(kernel)) << ",";
ss << DataLayoutToStr(std::get<2>(kernel));
ss << ")";
ss << "\n";
}
}
return ss.str();
return "";
#endif // LITE_ON_MODEL_OPTIMIZE_TOOL
}
private:
mutable std::vector<any_kernel_registor_t> registries_;
#ifndef LITE_ON_TINY_PUBLISH
mutable std::map<
std::string,
std::vector<std::tuple<TargetType, PrecisionType, DataLayoutType>>>
kernel_info_map_;
#endif
};
template <TargetType target,
......
if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))
return()
endif()
message(STATUS "compile with lite ARM kernels")
add_kernel(fc_compute_arm ARM basic SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_compute_arm ARM basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mul_compute_arm ARM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
......@@ -73,6 +67,15 @@ add_kernel(fill_constant_compute_arm ARM extra SRCS fill_constant_compute.cc DEP
add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(is_empty_compute_arm ARM extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps} math_arm)
# NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered
# to the model_optimize_tool.
if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))
return()
endif()
message(STATUS "compile with lite ARM kernels")
lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm)
lite_cc_test(test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm)
......
......@@ -82,25 +82,22 @@ void UnaryLogicalCompute<Functor>::Run() {
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
logical_xor,
REGISTER_LITE_KERNEL(logical_xor,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::BinaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalXorFunctor>,
// paddle::lite::kernels::arm::BinaryLogicalCompute<paddle::lite::kernels::arm::_LogicalXorFunctor<bool>>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(
logical_and,
REGISTER_LITE_KERNEL(logical_and,
kARM,
kFloat,
kNCHW,
// paddle::lite::kernels::arm::BinaryLogicalCompute<paddle::lite::kernels::arm::_LogicalAndFunctor<bool>>,
paddle::lite::kernels::arm::BinaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalAndFunctor>,
def)
......
......@@ -5,5 +5,5 @@ add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kerne
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op)
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any)
#lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any)
#lite_cc_test(test_multiclass_nms_compute_host SRCS multiclass_nms_compute_test.cc DEPS multiclass_nms_compute_host any)
if(NOT LITE_WITH_X86)
return()
endif()
# lite_cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
# lite_cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
......@@ -38,6 +34,10 @@ add_kernel(shape_compute_x86 X86 basic SRCS shape_compute.cc DEPS ${lite_kernel_
add_kernel(sequence_pool_compute_x86 X86 basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} sequence_pooling)
add_kernel(softmax_compute_x86 X86 basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
if(NOT LITE_WITH_X86)
return()
endif()
lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86)
lite_cc_test(test_slice_compute_x86 SRCS slice_compute_test.cc DEPS slice_compute_x86)
lite_cc_test(test_squeeze_compute_x86 SRCS squeeze_compute_test.cc DEPS squeeze_compute_x86)
......
......@@ -25,20 +25,20 @@ REGISTER_LITE_KERNEL(mul,
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#ifdef LITE_WITH_TRAIN
REGISTER_LITE_KERNEL(mul_grad,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::MulGradCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput(paddle::framework::GradVarName("Out"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput(paddle::framework::GradVarName("X"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput(paddle::framework::GradVarName("Y"),
{LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
#endif
// #ifdef LITE_WITH_TRAIN
// REGISTER_LITE_KERNEL(mul_grad,
// kX86,
// kFloat,
// kNCHW,
// paddle::lite::kernels::x86::MulGradCompute<float>,
// def)
// .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
// .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
// .BindInput(paddle::framework::GradVarName("Out"),
// {LiteType::GetTensorTy(TARGET(kX86))})
// .BindOutput(paddle::framework::GradVarName("X"),
// {LiteType::GetTensorTy(TARGET(kX86))})
// .BindOutput(paddle::framework::GradVarName("Y"),
// {LiteType::GetTensorTy(TARGET(kX86))})
// .Finalize();
// #endif
......@@ -224,6 +224,7 @@ function build_test_server {
build
test_server
test_model_optimize_tool_compile
}
function build_test_train {
......@@ -393,20 +394,27 @@ function test_arm_model {
adb -s emulator-${port} shell "${adb_work_dir}/${test_name} --model_dir=$adb_model_path"
}
function _test_model_optimize_tool {
local port=$1
local remote_model_path=$ADB_WORK_DIR/lite_naive_model
local remote_test=$ADB_WORK_DIR/model_optimize_tool
local adb="adb -s emulator-${port}"
# function _test_model_optimize_tool {
# local port=$1
# local remote_model_path=$ADB_WORK_DIR/lite_naive_model
# local remote_test=$ADB_WORK_DIR/model_optimize_tool
# local adb="adb -s emulator-${port}"
# make model_optimize_tool -j$NUM_CORES_FOR_COMPILE
# local test_path=$(find . -name model_optimize_tool | head -n1)
# local model_path=$(find . -name lite_naive_model | head -n1)
# $adb push ${test_path} ${ADB_WORK_DIR}
# $adb shell mkdir -p $remote_model_path
# $adb push $model_path/* $remote_model_path
# $adb shell $remote_test --model_dir $remote_model_path --optimize_out ${remote_model_path}.opt \
# --valid_targets "arm"
# }
function test_model_optimize_tool_compile {
cd $workspace
cd build
cmake .. -DWITH_LITE=ON -DLITE_ON_MODEL_OPTIMIZE_TOOL=ON -DWITH_TESTING=OFF -DLITE_BUILD_EXTRA=ON
make model_optimize_tool -j$NUM_CORES_FOR_COMPILE
local test_path=$(find . -name model_optimize_tool | head -n1)
local model_path=$(find . -name lite_naive_model | head -n1)
$adb push ${test_path} ${ADB_WORK_DIR}
$adb shell mkdir -p $remote_model_path
$adb push $model_path/* $remote_model_path
$adb shell $remote_test --model_dir $remote_model_path --optimize_out ${remote_model_path}.opt \
--valid_targets "arm"
}
function _test_paddle_code_generator {
......@@ -558,8 +566,8 @@ function test_arm {
# test finally
test_arm_api $port
_test_model_optimize_tool $port
_test_paddle_code_generator $port
# _test_model_optimize_tool $port
# _test_paddle_code_generator $port
}
function prepare_emulator {
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
class SyntaxParser(object):
def __init__(self, str):
self.str = str
self.cur_pos = 0
self.N = len(self.str)
self.token = ''
def eat_char(self):
self.cur_pos += 1
def eat_str(self):
'''
"xx"
'''
self.token = ''
assert self.cur == '"';
self.cur_pos += 1;
assert self.cur_pos < self.N
while self.cur != '"':
self.token += self.cur
self.cur_pos += 1
assert self.cur_pos < self.N
assert self.cur == '"'
self.cur_pos += 1
#logging.warning('get: %s' % self.token)
def eat_word(self):
self.token = ''
str = ''
while self.cur.isalnum() or self.cur in ('_', ':',):
self.token += self.cur
self.forward()
#logging.warning('get: %s' % self.token)
def eat_left_parentheses(self):
'''
(
'''
self.assert_is('(')
self.token = '('
self.forward()
#logging.warning('get: %s' % self.token)
def eat_right_parentheses(self):
'''
)
'''
self.assert_is(')')
self.token = ')'
self.forward()
#logging.warning('get: %s' % self.token)
def eat_left_brace(self):
'''
{
'''
self.assert_is('{')
self.token = '{'
self.forward()
#logging.warning('get: %s' % self.token)
def eat_right_brace(self):
'''
}
'''
self.assert_is('}')
self.token = '}'
self.forward()
#logging.warning('get: %s' % self.token)
def eat_comma(self):
'''
,
'''
self.assert_is(',')
self.token = ','
self.forward()
#logging.warning('get: %s' % self.token)
def eat_spaces(self):
'''
eat space like string.
'''
while self.cur_pos < len(self.str):
if self.cur in (' ', '\t', '\n'):
self.forward()
else:
break
def eat_point(self):
'''
.
'''
self.assert_is('.')
self.token = '.'
self.forward()
#logging.warning('get: %s' % self.token)
def eat_any_but_brace(self):
'''
anything but {}
'''
start = self.cur_pos
while self.cur not in ('{', '}'):
self.cur_pos += 1
self.token = self.str[start:self.cur_pos]
#logging.warning('get: %s' % self.token)
def eat_semicolon(self):
'''
;
'''
self.assert_is(';')
self.token = ';'
self.forward()
#logging.warning('get: %s' % self.token)
def assert_is(self, w):
assert self.cur == w, "token should be %s, but get %s" % (w, self.cur)
@property
def cur(self):
assert self.cur_pos < self.N
return self.str[self.cur_pos]
#logging.warning('get: %s' % self.token)
def forward(self):
self.cur_pos += 1
class IO:
def __init__(self):
self.name = ''
self.type = ''
def __repr__(self):
return "- %s: %s" % (self.name, self.type)
class KernelRegistry:
def __init__(self):
self.op_type = ''
self.target = ''
self.precision = ''
self.data_layout = ''
self.class_ = ''
self.alias = ''
self.inputs = []
self.outputs = []
def __repr__(self):
str = "Kernel({op_type}, {target}, {precision}, {data_layout}, {alias}):".format(
op_type = self.op_type,
target = self.target,
precision = self.precision,
data_layout = self.data_layout,
alias = self.alias,
)
str += '\n' + '\n'.join(repr(io) for io in self.inputs)
str += '\n' + '\n'.join(repr(io) for io in self.outputs)
str += '\n'
return str
class RegisterLiteKernelParser(SyntaxParser):
KEYWORD = 'REGISTER_LITE_KERNEL'
def __init__(self, str):
super(RegisterLiteKernelParser, self).__init__(str)
self.kernels = []
def parse(self):
find_registry_command = False
while self.cur_pos < len(self.str):
start = self.str.find(self.KEYWORD, self.cur_pos)
if start != -1:
#print 'str ', start, self.str[start-2: start]
if start != 0 and '/' in self.str[start-2: start]:
'''
skip commented code
'''
self.cur_pos = start + 1
continue
self.cur_pos = start
k = KernelRegistry()
self.kernels.append(self.parse_register(k))
else:
break
def eat_class(self):
start = self.cur_pos
self.eat_word()
stack = ''
if self.cur == '<':
stack = stack + '<'
self.forward()
while stack:
if self.cur == '<':
stack = stack + '<'
elif self.cur == '>':
stack = stack[1:]
else:
pass
self.forward()
self.token = self.str[start:self.cur_pos]
def parse_register(self, k):
self.eat_word()
assert self.token == self.KEYWORD
self.eat_spaces()
self.eat_left_parentheses()
self.eat_spaces()
self.eat_word()
k.op_type = self.token
self.eat_comma()
self.eat_spaces()
self.eat_word()
k.target = self.token
self.eat_comma()
self.eat_spaces()
self.eat_word()
k.precision = self.token
self.eat_comma()
self.eat_spaces()
self.eat_word()
k.data_layout = self.token
self.eat_comma()
self.eat_spaces()
self.eat_class()
k.class_ = self.token
self.eat_comma()
self.eat_spaces()
self.eat_word()
k.alias = self.token
self.eat_spaces()
self.eat_right_parentheses()
self.eat_spaces()
def eat_io(is_input, io):
self.eat_left_parentheses()
self.eat_str()
io.name = self.token
self.eat_comma()
self.eat_spaces()
self.eat_left_brace()
self.eat_any_but_brace()
io.type = self.token
self.eat_right_brace()
self.eat_spaces()
self.eat_right_parentheses()
self.eat_spaces()
# eat input and output
while self.cur_pos < len(self.str):
self.eat_point()
self.eat_spaces()
self.eat_word()
assert self.token in ('BindInput', 'BindOutput', 'Finalize')
io = IO()
if self.token == 'BindInput':
eat_io(True, io)
k.inputs.append(io)
elif self.token == 'BindOutput':
eat_io(False, io)
k.outputs.append(io)
else:
self.eat_left_parentheses()
self.eat_right_parentheses()
self.eat_semicolon()
self.eat_spaces()
return k
break
if __name__ == '__main__':
with open('/home/chunwei/project2/Paddle-Lite/lite/kernels/arm/activation_compute.cc') as f:
c = f.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
print k
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import logging
from ast import RegisterLiteKernelParser
from utils import *
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
out_lines = [
'#pragma once',
'#include "lite/core/op_registry.h"',
'#include "lite/core/kernel.h"',
'#include "lite/core/type_system.h"',
'',
]
fake_kernel = '''
namespace paddle {
namespace lite {
class %s : public KernelLite<TARGET(%s), PRECISION(%s), DATALAYOUT(%s)> {
public:
void PrepareForRun() override {}
void Run() override {}
virtual ~%s() = default;
};
} // namespace lite
} // namespace paddle
'''
with open(ops_list_path) as f:
paths = set([path for path in f])
for path in paths:
print 'path', path
with open(path.strip()) as g:
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
kernel_name = "{op_type}_{target}_{precision}_{data_layout}_{alias}_class".format(
op_type = k.op_type,
target = k.target,
precision = k.precision,
data_layout = k.data_layout,
alias = k.alias,
)
kernel_define = fake_kernel % (
kernel_name,
k.target,
k.precision,
k.data_layout,
kernel_name,
)
out_lines.append(kernel_define)
out_lines.append("")
key = "REGISTER_LITE_KERNEL(%s, %s, %s, %s, %s, %s)" % (
k.op_type,
k.target,
k.precision,
k.data_layout,
'::paddle::lite::' + kernel_name,
k.alias,
)
out_lines.append(key)
for input in k.inputs:
io = ' .BindInput("%s", {%s})' % (input.name, input.type)
out_lines.append(io)
for output in k.outputs:
io = ' .BindOutput("%s", {%s})' % (output.name, output.type)
out_lines.append(io)
out_lines.append(" .Finalize();")
out_lines.append("")
out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines))
......@@ -14,6 +14,7 @@
import sys
import logging
from ast import RegisterLiteKernelParser
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
......@@ -24,57 +25,26 @@ out_lines = [
'',
]
left_pattern = 'REGISTER_LITE_KERNEL('
right_pattern = ')'
def find_right_pattern(context, start):
if start >= len(context): return -1
fake_left_num = 0
while start < len(context):
if context[start] == right_pattern:
if fake_left_num == 0:
return start
else:
fake_left_num -= 1
elif context[start] == '(':
fake_left_num += 1
start += 1
return -1
lines = set()
with open(ops_list_path) as f:
for line in f:
lines.add(line.strip())
for line in lines:
path = line.strip()
status = ''
with open(path) as g:
context = ''.join([item.strip() for item in g])
index = 0
cxt_len = len(context)
while index < cxt_len and index >= 0:
left_index = context.find(left_pattern, index)
if left_index < 0: break
right_index = find_right_pattern(context, left_index+len(left_pattern))
if right_index < 0:
raise ValueError("Left Pattern and Right Pattern does not match")
tmp = context[left_index+len(left_pattern) : right_index]
index = right_index + 1
if tmp.startswith('/'): continue
fields = [item.strip() for item in tmp.split(',')]
if len(fields) < 6:
raise ValueError("Invalid REGISTER_LITE_KERNEL format")
op, target, precision, layout = fields[:4]
alias = fields[-1]
paths = set([path for path in f])
for path in paths:
with open(path.strip()) as g:
print 'path: ', path
c = g.read()
kernel_parser = RegisterLiteKernelParser(c)
kernel_parser.parse()
for k in kernel_parser.kernels:
key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
op, target, precision, layout, alias)
if "_grad" in key: continue
k.op_type,
k.target,
k.precision,
k.data_layout,
k.alias,
)
out_lines.append(key)
with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def gen_use_kernel_statement(op_type, target, precision, layout, alias):
return 'USE_LITE_KERNEL(%s, %s, %s, %s, %s);' %(
op_type, target, precision, layout, alias
)
......@@ -3,23 +3,23 @@
# else()
# endif()
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_library(logging SRCS logging.cc)
set(utils_DEPS logging)
lite_cc_test(test_logging SRCS logging_test.cc DEPS ${utils_DEPS})
else()
set(utils_DEPS glog)
endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
endif()
lite_cc_test(test_varient SRCS varient_test.cc DEPS utils)
lite_cc_library(any SRCS any.cc)
if(LITE_ON_TINY_PUBLISH)
lite_cc_library(stream SRCS replace_stl/stream.cc)
if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_library(stream SRCS replace_stl/stream.cc)
endif()
#lite_cc_library(utils SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any)
if(LITE_ON_TINY_PUBLISH)
if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_library(utils SRCS string.cc DEPS ${utils_DEPS} any stream)
else()
lite_cc_library(utils SRCS string.cc DEPS ${utils_DEPS} any)
......
......@@ -13,7 +13,8 @@
// limitations under the License.
#pragma once
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#if defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) || \
defined(LITE_ON_MODEL_OPTIMIZE_TOOL)
#include "lite/utils/logging.h"
#else // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include <glog/logging.h>
......
......@@ -19,7 +19,8 @@
#include "lite/utils/logging.h"
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#if defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) || \
defined(LITE_ON_MODEL_OPTIMIZE_TOOL)
#ifndef LITE_SHUTDOWN_LOG
namespace paddle {
......@@ -48,7 +49,7 @@ void gen_log(STL::ostream& log_stream_,
<< tv.tv_usec / 1000 << " ";
if (len > kMaxLen) {
log_stream_ << "..." << file + len - kMaxLen << " " << func << ":" << lineno
log_stream_ << "..." << file + len - kMaxLen << ":" << lineno << " " << func
<< "] ";
} else {
log_stream_ << file << " " << func << ":" << lineno << "] ";
......
......@@ -81,7 +81,7 @@ void gen_log(STL::ostream& log_stream_,
const char* func,
int lineno,
const char* level,
const int kMaxLen = 20);
const int kMaxLen = 40);
// LogMessage
class LogMessage {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册