提交 9fa47bf7 编写于 作者: S Shixiaowei02

Merge branch 'incubate/lite' of http://10.87.145.36/inference/paddlelite into temp/debug1

......@@ -147,6 +147,7 @@ endif()
# for lite, both server and mobile framework.
option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
......
# Bundle several static libraries into one.
function(bundle_static_library tgt_name bundled_tgt_name fake_target)
list(APPEND static_libs ${tgt_name})
function(_recursively_collect_dependencies input_target)
set(_input_link_libraries LINK_LIBRARIES)
get_target_property(_input_type ${input_target} TYPE)
if (${_input_type} STREQUAL "INTERFACE_LIBRARY")
set(_input_link_libraries INTERFACE_LINK_LIBRARIES)
endif()
get_target_property(public_dependencies ${input_target} ${_input_link_libraries})
foreach(dependency IN LISTS public_dependencies)
if(TARGET ${dependency})
get_target_property(alias ${dependency} ALIASED_TARGET)
if (TARGET ${alias})
set(dependency ${alias})
endif()
get_target_property(_type ${dependency} TYPE)
if (${_type} STREQUAL "STATIC_LIBRARY")
list(APPEND static_libs ${dependency})
endif()
get_property(library_already_added
GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency})
if (NOT library_already_added)
set_property(GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency} ON)
_recursively_collect_dependencies(${dependency})
endif()
endif()
endforeach()
set(static_libs ${static_libs} PARENT_SCOPE)
endfunction()
_recursively_collect_dependencies(${tgt_name})
list(REMOVE_DUPLICATES static_libs)
set(bundled_tgt_full_name
${CMAKE_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX})
message(STATUS "+++++ bundled_tgt_full_name: ${bundled_tgt_full_name}")
file(WRITE ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in
"CREATE ${bundled_tgt_full_name}\n" )
foreach(tgt IN LISTS static_libs)
file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in
"ADDLIB $<TARGET_FILE:${tgt}>\n")
endforeach()
file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in "SAVE\n")
file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in "END\n")
file(GENERATE
OUTPUT ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar
INPUT ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in)
set(ar_tool ${CMAKE_AR})
if (CMAKE_INTERPROCEDURAL_OPTIMIZATION)
set(ar_tool ${CMAKE_CXX_COMPILER_AR})
endif()
add_custom_command(
COMMAND ${ar_tool} -M < ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar
OUTPUT ${bundled_tgt_full_name}
COMMENT "Bundling ${bundled_tgt_name}"
VERBATIM)
add_custom_target(${fake_target} ALL DEPENDS ${bundled_tgt_full_name})
add_dependencies(${fake_target} ${tgt_name})
add_library(${bundled_tgt_name} STATIC IMPORTED)
set_target_properties(${bundled_tgt_name}
PROPERTIES
IMPORTED_LOCATION ${bundled_tgt_full_name}
INTERFACE_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:${tgt_name},INTERFACE_INCLUDE_DIRECTORIES>)
add_dependencies(${bundled_tgt_name} ${fake_target})
endfunction()
......@@ -2,6 +2,8 @@ if (NOT WITH_LITE)
return()
endif()
include(lite)
message(WARNING "Lite enabled!")
message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}")
message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
......@@ -85,9 +87,9 @@ function (lite_deps TARGET)
endif()
set(${TARGET} ${deps} PARENT_SCOPE)
endfunction()
# A fake target to include all the libraries and tests the lite module depends.
add_custom_target(lite_compile_deps COMMAND echo 1)
......@@ -95,6 +97,10 @@ add_custom_target(lite_compile_deps COMMAND echo 1)
# the whole fluid project to accelerate the compile speed.
set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt")
file(WRITE ${offline_lib_registry_file} "") # clean
set(__lite_cc_files "";"")
set(__lite_cc_files "${CMAKE_BINARY_DIR}/lite_cc_files.txt")
file(WRITE ${__lite_cc_files} "") # clean
# cc_library with branch support.
# The branches:
# X86_DEPS: works only when LITE_WITH_X86 is ON.
......@@ -104,7 +110,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
function(lite_cc_library TARGET)
set(options STATIC static SHARED shared)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS ARGS)
......@@ -120,14 +126,24 @@ function(lite_cc_library TARGET)
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
if (${args_SHARED} OR ${args_shared})
if (args_SHARED OR ARGS_shared)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED)
elseif (args_MODULE OR ARGS_module)
add_library(${TARGET} MODULE ${args_SRCS})
add_dependencies(${TARGET} ${deps} ${args_DEPS})
else()
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endif()
foreach(cc_file ${args_SRCS})
file(APPEND ${__lite_cc_files} "${cc_file}\n")
endforeach()
# collect targets need to compile for lite
add_dependencies(lite_compile_deps ${TARGET})
if (args_SRCS)
add_dependencies(lite_compile_deps ${TARGET})
endif()
# register a library name.
file(APPEND ${offline_lib_registry_file} "${TARGET}\n")
......@@ -224,9 +240,9 @@ add_custom_target(publish_inference_cxx_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/paddle/fluid/lite/api/libpaddle_api_full.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
)
add_dependencies(publish_inference_cxx_lib paddle_api_full)
add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_lite publish_inference_cxx_lib)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
......@@ -235,9 +251,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_custom_target(publish_inference_mobile_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/mobile/lib"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/mobile/include"
COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/paddle/fluid/lite/api/libpaddle_api_light.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/mobile/include"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/mobile/lib"
)
add_dependencies(publish_inference_mobile_lib paddle_api_light)
add_dependencies(publish_inference_mobile_lib paddle_api_light bundle_light_api)
add_dependencies(publish_inference_lite publish_inference_mobile_lib)
endif()
......@@ -102,18 +102,39 @@ lite_cc_test(test_apis_lite SRCS apis_test.cc
lite_cc_library(paddle_api_lite SRCS paddle_api.cc DEPS op_params_lite)
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api_lite)
#-----------------------------------------------------------------------------------------------------
# The final inference library for both CxxConfig and MobileConfig.
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api_lite
${ops_lite}
ARM_DEPS ${arm_kernels}
)
# The final inference library for just MobileConfig.
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api_lite paddle_api_lite)
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api)
#-----------------------------------------------------------------------------------------------------
lite_cc_test(test_paddle_api_lite SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${ops_lite}
ARM_DEPS ${arm_kernels}
X86_DEPS ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
lite_cc_test(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light
${ops_lite}
ARM_DEPS ${arm_kernels}
X86_DEPS ${x86_kernels})
if (WITH_TESTING)
add_dependencies(test_paddle_api_lite extern_lite_download_lite_naive_model_tar_gz)
endif()
if (LITE_WITH_JAVA AND LITE_WITH_ARM)
add_subdirectory(android)
endif()
#lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
#X86_DEPS operator
#DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes
......
if ((NOT LITE_WITH_JAVA) OR (NOT LITE_WITH_ARM))
return()
endif()
add_subdirectory(jni)
/PaddleListTest.class
/PaddleLite.class
/bin/
if ((NOT LITE_WITH_ARM) OR (NOT LITE_WITH_JAVA))
return()
endif()
include(UseJava)
find_package(Java REQUIRED)
# We are only interested in finding jni.h: we do not care about extended JVM
# functionality or the AWT library.
set(JAVA_AWT_LIBRARY NotNeeded)
set(JAVA_JVM_LIBRARY NotNeeded)
set(JAVA_INCLUDE_PATH2 NotNeeded)
set(JAVA_AWT_INCLUDE_PATH NotNeeded)
find_package(JNI REQUIRED)
# Generate PaddlePredictor.jar
include_directories(${JNI_INCLUDE_DIRS})
add_jar(PaddlePredictor
src/com/baidu/paddle/lite/PaddlePredictor.java
src/com/baidu/paddle/lite/Place.java)
get_target_property(_jarFile PaddlePredictor JAR_FILE)
get_target_property(_classDir PaddlePredictor CLASSDIR)
set(_stubDir "${CMAKE_CURRENT_BINARY_DIR}")
# Generate paddle_lite_jni.h
add_custom_target(
paddle_lite_jni_header ALL
COMMAND ${Java_JAVAH_EXECUTABLE} -verbose
-classpath ${_classDir}
-o paddle_lite_jni.h
-jni
com.baidu.paddle.lite.PaddlePredictor
DEPENDS PaddlePredictor
)
# Generate paddle_lite_jni.so
include_directories(${JNI_INCLUDE_DIRS} ${_classDir} ${_stubDir})
lite_cc_library(paddle_lite_jni MODULE SRCS paddle_lite_jni.cc
DEPS light_api_lite cxx_api_lite
paddle_api_full paddle_api_lite paddle_api_light op_registry_lite
${ops_lite} ${lite_kernel_deps}
ARM_DEPS ${arm_kernels})
if (APPLE)
# MacOS only accepts JNI lib ends with .jnilib or .dylib
set_target_properties(paddle_lite_jni PROPERTIES SUFFIX ".jnilib")
elseif (WIN32)
# Windows only accepts JNI lib ends with .dll
set_target_properties(paddle_lite_jni PROPERTIES SUFFIX ".dll")
endif (APPLE)
target_link_libraries(paddle_lite_jni light_api_lite cxx_api_lite
paddle_api_full paddle_api_lite paddle_api_light op_registry_lite
${ops_lite} ${arm_kernels} ${lite_kernel_deps})
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/api/android/jni/paddle_lite_jni.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/api/light_api.h"
#include "paddle/fluid/lite/api/paddle_api.h"
#include "paddle/fluid/lite/api/paddle_lite_factory_helper.h"
#include "paddle/fluid/lite/api/paddle_place.h"
#include "paddle/fluid/lite/api/paddle_use_kernels.h"
#include "paddle/fluid/lite/api/paddle_use_ops.h"
#include "paddle/fluid/lite/api/paddle_use_passes.h"
#include "paddle/fluid/lite/kernels/arm/activation_compute.h"
#include "paddle/fluid/lite/kernels/arm/batch_norm_compute.h"
#include "paddle/fluid/lite/kernels/arm/calib_compute.h"
#include "paddle/fluid/lite/kernels/arm/concat_compute.h"
#include "paddle/fluid/lite/kernels/arm/conv_compute.h"
#include "paddle/fluid/lite/kernels/arm/dropout_compute.h"
#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h"
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include "paddle/fluid/lite/kernels/arm/mul_compute.h"
#include "paddle/fluid/lite/kernels/arm/pool_compute.h"
#include "paddle/fluid/lite/kernels/arm/scale_compute.h"
#include "paddle/fluid/lite/kernels/arm/softmax_compute.h"
#include "paddle/fluid/lite/kernels/arm/split_compute.h"
#include "paddle/fluid/lite/kernels/arm/transpose_compute.h"
#define ARM_KERNEL_POINTER(kernel_class_name__) \
std::unique_ptr<paddle::lite::kernels::arm::kernel_class_name__> \
p##kernel_class_name__( \
new paddle::lite::kernels::arm::kernel_class_name__);
#ifdef __cplusplus
extern "C" {
#endif
using paddle::lite_api::CxxConfig;
using paddle::lite_api::MobileConfig;
using paddle::lite_api::PaddlePredictor;
using paddle::lite_api::Place;
using paddle::lite_api::Tensor;
static std::shared_ptr<PaddlePredictor> predictor;
/**
* Not sure why, we have to initial a pointer first for kernels.
* Otherwise it throws null pointer error when do KernelRegistor.
*/
static void use_arm_kernels() {
ARM_KERNEL_POINTER(BatchNormCompute);
ARM_KERNEL_POINTER(CalibComputeFp32ToInt8);
ARM_KERNEL_POINTER(CalibComputeInt8ToFp32);
ARM_KERNEL_POINTER(ConvCompute);
ARM_KERNEL_POINTER(ConcatCompute);
ARM_KERNEL_POINTER(ElementwiseAddCompute);
ARM_KERNEL_POINTER(DropoutCompute);
ARM_KERNEL_POINTER(FcCompute);
ARM_KERNEL_POINTER(MulCompute);
ARM_KERNEL_POINTER(PoolCompute);
ARM_KERNEL_POINTER(ReluCompute);
ARM_KERNEL_POINTER(ScaleCompute);
ARM_KERNEL_POINTER(SoftmaxCompute);
ARM_KERNEL_POINTER(SplitCompute);
ARM_KERNEL_POINTER(TransposeCompute);
ARM_KERNEL_POINTER(Transpose2Compute);
}
inline std::string jstring_to_cpp_string(JNIEnv *env, jstring jstr) {
// In java, a unicode char will be encoded using 2 bytes (utf16).
// so jstring will contain characters utf16. std::string in c++ is
// essentially a string of bytes, not characters, so if we want to
// pass jstring from JNI to c++, we have convert utf16 to bytes.
if (!jstr) {
return "";
}
const jclass stringClass = env->GetObjectClass(jstr);
const jmethodID getBytes =
env->GetMethodID(stringClass, "getBytes", "(Ljava/lang/String;)[B");
const jbyteArray stringJbytes = (jbyteArray)env->CallObjectMethod(
jstr, getBytes, env->NewStringUTF("UTF-8"));
size_t length = (size_t)env->GetArrayLength(stringJbytes);
jbyte *pBytes = env->GetByteArrayElements(stringJbytes, NULL);
std::string ret = std::string(reinterpret_cast<char *>(pBytes), length);
env->ReleaseByteArrayElements(stringJbytes, pBytes, JNI_ABORT);
env->DeleteLocalRef(stringJbytes);
env->DeleteLocalRef(stringClass);
return ret;
}
inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env, const float *buf,
int64_t len) {
jfloatArray result = env->NewFloatArray(len);
env->SetFloatArrayRegion(result, 0, len, buf);
return result;
}
inline jintArray cpp_array_to_jintarray(JNIEnv *env, const int *buf,
int64_t len) {
jintArray result = env->NewIntArray(len);
env->SetIntArrayRegion(result, 0, len, buf);
return result;
}
inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env, const int8_t *buf,
int64_t len) {
jbyteArray result = env->NewByteArray(len);
env->SetByteArrayRegion(result, 0, len, buf);
return result;
}
inline std::vector<int64_t> jintarray_to_int64_vector(JNIEnv *env,
jintArray dims) {
int dim_size = env->GetArrayLength(dims);
jint *dim_nums = env->GetIntArrayElements(dims, nullptr);
std::vector<int64_t> dim_vec(dim_nums, dim_nums + dim_size);
env->ReleaseIntArrayElements(dims, dim_nums, 0);
return dim_vec;
}
/**
* Converts Java com.baidu.paddle.lite.Place to c++ paddle::lite_api::Place.
*/
inline static Place jplace_to_cpp_place(JNIEnv *env, jobject java_place) {
jclass place_jclazz = env->GetObjectClass(java_place);
jmethodID target_method =
env->GetMethodID(place_jclazz, "getTargetInt", "()I");
jmethodID precision_method =
env->GetMethodID(place_jclazz, "getPrecisionInt", "()I");
jmethodID data_layout_method =
env->GetMethodID(place_jclazz, "getDataLayoutInt", "()I");
jmethodID device_method = env->GetMethodID(place_jclazz, "getDevice", "()I");
int target = env->CallIntMethod(java_place, target_method);
int precision = env->CallIntMethod(java_place, precision_method);
int data_layout = env->CallIntMethod(java_place, data_layout_method);
int device = env->CallIntMethod(java_place, device_method);
return Place(static_cast<paddle::lite_api::TargetType>(target),
static_cast<paddle::lite_api::PrecisionType>(precision),
static_cast<paddle::lite_api::DataLayoutType>(data_layout),
device);
}
inline static int64_t product(const std::vector<int64_t> &vec) {
if (vec.empty()) {
return 0;
}
int64_t result = 1;
for (int64_t d : vec) {
result *= d;
}
return result;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_loadCxxModel(
JNIEnv *env, jclass thiz, jstring model_path, jobject preferred_place,
jobjectArray valid_places) {
if (predictor != nullptr) {
return JNI_FALSE;
}
use_arm_kernels();
int valid_place_count = env->GetArrayLength(valid_places);
std::vector<Place> cpp_valid_places;
for (int i = 0; i < valid_place_count; ++i) {
jobject jplace = env->GetObjectArrayElement(valid_places, i);
cpp_valid_places.push_back(jplace_to_cpp_place(env, jplace));
}
CxxConfig config;
config.set_model_dir(jstring_to_cpp_string(env, model_path));
config.set_preferred_place(jplace_to_cpp_place(env, preferred_place));
config.set_valid_places(cpp_valid_places);
predictor = paddle::lite_api::CreatePaddlePredictor(config);
return predictor == nullptr ? JNI_FALSE : JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_loadMobileModel(JNIEnv *env,
jclass thiz,
jstring model_path) {
if (predictor != nullptr) {
return JNI_FALSE;
}
use_arm_kernels();
MobileConfig config;
config.set_model_dir(jstring_to_cpp_string(env, model_path));
predictor = paddle::lite_api::CreatePaddlePredictor(config);
return predictor == nullptr ? JNI_FALSE : JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_saveOptimizedModel(
JNIEnv *env, jclass thiz, jstring model_path) {
if (predictor == nullptr) {
return JNI_FALSE;
}
predictor->SaveOptimizedModel(jstring_to_cpp_string(env, model_path));
return JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_clear(JNIEnv *env, jclass thiz) {
if (predictor == nullptr) {
return JNI_FALSE;
}
predictor.reset();
return JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3F(
JNIEnv *env, jclass thiz, jint offset, jintArray dims, jfloatArray buf) {
if (predictor == nullptr) {
return JNI_FALSE;
}
std::vector<int64_t> ddim = jintarray_to_int64_vector(env, dims);
int len = env->GetArrayLength(buf);
if ((int64_t)len != product(ddim)) {
return JNI_FALSE;
}
float *buffer = env->GetFloatArrayElements(buf, nullptr);
std::unique_ptr<Tensor> tensor =
predictor->GetInput(static_cast<int>(offset));
tensor->Resize(ddim);
float *input = tensor->mutable_data<float>();
for (int i = 0; i < len; ++i) {
input[i] = buffer[i];
}
return JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3B(
JNIEnv *env, jclass thiz, jint offset, jintArray dims, jbyteArray buf) {
if (predictor == nullptr) {
return JNI_FALSE;
}
std::vector<int64_t> ddim = jintarray_to_int64_vector(env, dims);
int len = env->GetArrayLength(buf);
if ((int64_t)len != product(ddim)) {
return JNI_FALSE;
}
jbyte *buffer = env->GetByteArrayElements(buf, nullptr);
std::unique_ptr<Tensor> tensor =
predictor->GetInput(static_cast<int>(offset));
tensor->Resize(ddim);
int8_t *input = tensor->mutable_data<int8_t>();
for (int i = 0; i < len; ++i) {
input[i] = (int8_t)buffer[i];
}
return JNI_TRUE;
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_run(JNIEnv *, jclass) {
if (predictor == nullptr) {
return JNI_FALSE;
}
predictor->Run();
return JNI_TRUE;
}
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_getFloatOutput(JNIEnv *env,
jclass thiz,
jint offset) {
std::unique_ptr<const Tensor> tensor =
predictor->GetOutput(static_cast<int>(offset));
int64_t len = product(tensor->shape());
return cpp_array_to_jfloatarray(env, tensor->data<float>(), len);
}
JNIEXPORT jbyteArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_getByteOutput(JNIEnv *env,
jclass thiz,
jint offset) {
std::unique_ptr<const Tensor> tensor =
predictor->GetOutput(static_cast<int>(offset));
int64_t len = product(tensor->shape());
return cpp_array_to_jbytearray(env, tensor->data<int8_t>(), len);
}
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_fetchFloat(JNIEnv *env, jclass thiz,
jstring name) {
std::string cpp_name = jstring_to_cpp_string(env, name);
std::unique_ptr<const Tensor> tensor = predictor->GetTensor(cpp_name);
int64_t len = product(tensor->shape());
return cpp_array_to_jfloatarray(env, tensor->data<float>(), len);
}
JNIEXPORT jbyteArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_fetchByte(JNIEnv *env, jclass thiz,
jstring name) {
std::string cpp_name = jstring_to_cpp_string(env, name);
std::unique_ptr<const Tensor> tensor = predictor->GetTensor(cpp_name);
int64_t len = product(tensor->shape());
return cpp_array_to_jbytearray(env, tensor->data<int8_t>(), len);
}
#ifdef __cplusplus
}
#endif
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class com_baidu_paddle_lite_PaddlePredictor */
#ifndef PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_
#define PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: loadCxxModel
* Signature:
* (Ljava/lang/String;Lcom/baidu/paddle/lite/Place;[Lcom/baidu/paddle/lite/Place;)Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_loadCxxModel(JNIEnv *, jclass,
jstring, jobject,
jobjectArray);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: loadMobileModel
* Signature: (Ljava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_loadMobileModel(JNIEnv *, jclass,
jstring);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: saveOptimizedModel
* Signature: (Ljava/lang/String;)Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_saveOptimizedModel(JNIEnv *, jclass,
jstring);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: clear
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_clear(JNIEnv *, jclass);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: setInput
* Signature: (I[I[F)Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3F(JNIEnv *, jclass,
jint, jintArray,
jfloatArray);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: setInput
* Signature: (I[I[B)Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3B(JNIEnv *, jclass,
jint, jintArray,
jbyteArray);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: run
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_run(JNIEnv *, jclass);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: getFloatOutput
* Signature: (I)[F
*/
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_getFloatOutput(JNIEnv *, jclass,
jint);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: getByteOutput
* Signature: (I)[B
*/
JNIEXPORT jbyteArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_getByteOutput(JNIEnv *, jclass,
jint);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: fetchFloat
* Signature: (Ljava/lang/String;)[F
*/
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_fetchFloat(JNIEnv *, jclass,
jstring);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: fetchByte
* Signature: (Ljava/lang/String;)[B
*/
JNIEXPORT jbyteArray JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_fetchByte(JNIEnv *, jclass, jstring);
#ifdef __cplusplus
}
#endif
#endif // PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
package com.baidu.paddle.lite;
/** Java Native Interface (JNI) class for Paddle Lite APIs */
public class PaddlePredictor {
/** name of C++ JNI lib */
private final static String JNI_LIB_NAME = "paddle_lite_jni";
/* load the C++ JNI lib */
static {
System.loadLibrary(JNI_LIB_NAME);
}
/**
* Loads mobile cxx model, which is the model before optimizing passes. The cxx
* model allow users to manage hardware place resources. Caller uses a place at
* Java to control Target, DataLayout, Precision, and Device ID. More details
* about the four fields see our Paddle-Mobile document.
*
*
* @param modelPath modelPath model file path
* @param preferredPlace preferred place to run Cxx Model
* @param validPlaces n * 4 int array, valid places to run Cxx Model
* @return true if load successfully
*/
public static native boolean loadCxxModel(String modelPath, Place preferredPlace, Place[] validPlaces);
/**
* Loads mobile lite model, which is the model after optimizing passes.
*
* @param modelPath model file path
* @return true if load successfully
*/
public static native boolean loadMobileModel(String modelPath);
/**
* Saves optimized model, which is the model can be used by
* {@link loadMobileModel}
*
* @param modelPath model file path
* @return true if save successfully
*/
public static native boolean saveOptimizedModel(String modelPath);
/**
* Clears the current loaded model.
*
* @return true if a loaded model has been cleared.
*/
public static native boolean clear();
/**
* Set input data on offset-th column of feed data
*
* @param offset the offset-th column of feed data will be set
* @param buf the input data
* @param dims dimension format of the input image
* @return true if set successfully
*/
public static native boolean setInput(int offset, int[] dims, float[] buf);
/**
* Set input data on offset-th column of feed data
*
* @param offset the offset-th column of feed data will be set
* @param buf the input data
* @param dims dimension format of the input image
* @return true if set successfully
*/
public static native boolean setInput(int offset, int[] dims, byte[] buf);
/**
* Run the predict model
*
* @return true if run successfully
*/
public static native boolean run();
/**
* Get offset-th column of output data as float
*
* @param offset the offset-th column of output data will be returned
* @return model predict output
*/
public static native float[] getFloatOutput(int offset);
/**
* Get offset-th column of output data as byte (int8 in C++ side)
*
* @param offset the offset-th column of output data will be returned
* @return model predict output
*/
public static native byte[] getByteOutput(int offset);
/**
* Fetches a Tensor's value as Float data
*
* @param name Tensor's name
* @return values of the Tensor
*/
public static native float[] fetchFloat(String name);
/**
* Fetches a Tensor's value as byte data (int8 at C++ side)
*
* @param name Tensor's name
* @return values of the Tensor
*/
public static native byte[] fetchByte(String name);
/**
* Main function for test
*/
public static void main(String[] args) {
System.out.println("Load native library successfully");
}
}
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
package com.baidu.paddle.lite;
/**
* Place specifies the execution context of a Kernel or input/output for a
* kernel. It is used to make the analysis of the MIR more clear and accurate.
*/
public class Place {
public enum TargetType {
UNKNOWN(0), HOST(1), X86(2), CUDA(3), ARM(4), OPEN_CL(5), ANY(6);
public final int value;
private TargetType(int value) {
this.value = value;
}
}
public enum PrecisionType {
UNKNOWN(0), FLOAT(1), INT8(2), INT32(3), ANY(4);
public final int value;
private PrecisionType(int value) {
this.value = value;
}
}
public enum DataLayoutType {
UNKNOWN(0), NCHW(1), ANY(2);
public final int value;
private DataLayoutType(int value) {
this.value = value;
}
}
public TargetType target;
public PrecisionType precision;
public DataLayoutType layout;
public int device;
public Place() {
target = TargetType.UNKNOWN;
precision = PrecisionType.UNKNOWN;
layout = DataLayoutType.UNKNOWN;
device = 0;
}
public Place(TargetType target) {
this(target, PrecisionType.FLOAT);
}
public Place(TargetType target, PrecisionType precision) {
this(target, precision, DataLayoutType.NCHW);
}
public Place(TargetType target, PrecisionType precision, DataLayoutType layout) {
this(target, precision, layout, 0);
}
public Place(TargetType target, PrecisionType precision, DataLayoutType layout, int device) {
this.target = target;
this.precision = precision;
this.layout = layout;
this.device = device;
}
public boolean isValid() {
return target != TargetType.UNKNOWN && precision != PrecisionType.UNKNOWN && layout != DataLayoutType.UNKNOWN;
}
public int getTargetInt() {
return target.value;
}
public int getPrecisionInt() {
return precision.value;
}
public int getDataLayoutInt() {
return layout.value;
}
public int getDevice() {
return device;
}
}
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
package com.baidu.paddle.lite;
import org.junit.jupiter.api.Test;
import static org.junit.Assert.assertEquals;
class PaddlePredictorTest {
@Test
public void run_defaultModel() {
PaddlePredictor.loadMobileModel("");
float[] inputBuffer = new float[10000];
for (int i = 0; i < 10000; ++i) {
inputBuffer[i] = i;
}
int[] dims = { 100, 100 };
PaddlePredictor.setInput(0, dims, inputBuffer);
PaddlePredictor.run();
float[] output = PaddlePredictor.getFloatOutput(0);
assertEquals(output.length, 50000);
assertEquals(output[0], 50.2132f, 1e-3f);
assertEquals(output[1], -28.8729f, 1e-3f);
PaddlePredictor.clear();
}
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <string>
#include <vector>
#include "paddle/fluid/lite/api/paddle_api.h"
#include "paddle/fluid/lite/api/paddle_use_kernels.h"
#include "paddle/fluid/lite/api/paddle_use_ops.h"
#include "paddle/fluid/lite/api/paddle_use_passes.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/cpu_info.h"
#include "paddle/fluid/lite/utils/string.h"
namespace paddle {
namespace lite_api {
void OutputOptModel(const std::string& load_model_dir,
const std::string& save_optimized_model_dir,
const std::vector<int64_t>& input_shape) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
auto predictor = lite_api::CreatePaddlePredictor(config);
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; ++i) {
data[i] = i;
}
predictor->Run();
// delete old optimized model
int ret = system(
paddle::lite::string_format("rm -rf %s", save_optimized_model_dir.c_str())
.c_str());
if (ret == 0) {
LOG(INFO) << "delete old optimized model " << save_optimized_model_dir;
}
predictor->SaveOptimizedModel(save_optimized_model_dir);
LOG(INFO) << "Load model from " << load_model_dir;
LOG(INFO) << "Save optimized model to " << save_optimized_model_dir;
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void Run(const std::vector<int64_t>& input_shape, const std::string& model_dir,
const int repeat, const int thread_num, const int warmup_times = 10) {
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num);
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
auto predictor = lite_api::CreatePaddlePredictor(config);
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(input_shape);
float* input_data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; ++i) {
input_data[i] = i;
}
for (int i = 0; i < warmup_times; ++i) {
predictor->Run();
}
auto start = lite::GetCurrentUS();
for (int i = 0; i < repeat; ++i) {
predictor->Run();
}
auto end = lite::GetCurrentUS();
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num
<< ", warmup: " << warmup_times << ", repeats: " << repeat
<< ", spend " << (end - start) / repeat / 1000.0
<< " ms in average.";
auto output = predictor->GetOutput(0);
const float* out = output->data<float>();
LOG(INFO) << "out " << out[0];
LOG(INFO) << "out " << out[1];
auto output_shape = output->shape();
int output_num = 1;
for (int i = 0; i < output_shape.size(); ++i) {
output_num *= output_shape[i];
}
LOG(INFO) << "output_num: " << output_num;
}
#endif
} // namespace lite_api
} // namespace paddle
int main(int argc, char** argv) {
if (argc < 4) {
LOG(INFO) << "usage: " << argv[0] << " <model_dir> <repeat> <thread_num>";
exit(0);
}
std::string load_model_dir = argv[1];
std::string save_optimized_model_dir = load_model_dir + "opt2";
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
int repeat = std::stoi(argv[2]);
int thread_num = std::stoi(argv[3]);
#endif
std::vector<int64_t> input_shape{1, 3, 224, 224};
// Output optimized model
paddle::lite_api::OutputOptModel(load_model_dir, save_optimized_model_dir,
input_shape);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// Run inference using optimized model
paddle::lite_api::Run(input_shape, save_optimized_model_dir, repeat,
thread_num);
#endif
return 0;
}
......@@ -15,6 +15,7 @@
#pragma once
#include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h>
// for eval
......
......@@ -469,6 +469,389 @@ TEST(conv_arm_int8, int8_fp32) {
}
}
TEST(conv_direct_int8, compute) {
DeviceInfo::Init();
for (auto n : {1, 2}) {
for (auto ic : {1, 3, 8}) {
for (auto oc : {1, 3, 8}) {
for (auto ih : {5, 15, 28}) {
for (auto iw : {5, 15, 28}) {
for (auto flag_bias : {false, true}) {
for (auto flag_relu : {false, true}) {
for (auto depthwise : {false, /*true*/}) {
for (auto dilation : {1}) {
for (auto stride : {1, 2}) {
for (auto padding : {1}) {
for (auto ks : {3}) {
int group = 1;
if (depthwise) { // depthwise convolution ?
group = oc = ic;
}
const int dks = dilation * (ks - 1) + 1;
int oh = (ih + 2 * padding - dks) / stride + 1;
int ow = (iw + 2 * padding - dks) / stride + 1;
std::vector<int64_t> input_shape = {n, ic, ih, iw};
std::vector<int64_t> filter_shape = {oc, ic / group,
ks, ks};
std::vector<int64_t> bias_shape({1, oc, 1, 1});
std::vector<int64_t> output_shape({n, oc, oh, ow});
Tensor input_fp32, input_int8;
Tensor filter_fp32, filter_int8;
Tensor bias_int32;
Tensor output_int32_ref, output_int32;
Tensor output_fp32_ref, output_fp32;
Tensor output_int8_ref, output_int8;
input_fp32.Resize(input_shape);
input_int8.Resize(input_shape);
filter_fp32.Resize(filter_shape);
filter_int8.Resize(filter_shape);
bias_int32.Resize(bias_shape);
output_int32.Resize(output_shape);
output_int32_ref.Resize(output_shape);
output_fp32_ref.Resize(output_shape);
output_fp32.Resize(output_shape);
output_int8_ref.Resize(output_shape);
output_int8.Resize(output_shape);
float* input_fp32_data =
input_fp32.mutable_data<float>();
int8_t* input_int8_data =
input_int8.mutable_data<int8_t>();
float* filter_fp32_data =
filter_fp32.mutable_data<float>();
int8_t* filter_int8_data =
filter_int8.mutable_data<int8_t>();
int* bias_int32_data =
bias_int32.mutable_data<int32_t>();
for (int i = 0; i < input_fp32.dims().production();
i++) {
input_fp32_data[i] = i % 10 * (i % 3 - 1);
}
for (int i = 0; i < filter_fp32.dims().production();
i++) {
filter_fp32_data[i] = i % 10 * (i % 3 - 1);
}
for (int i = 0; i < bias_int32.dims().production();
i++) {
bias_int32_data[i] = i % 10 * (i % 3 - 1);
}
std::vector<float> in_scale;
lite::arm::math::get_tensor_scale<PRECISION(kFloat)>(
input_fp32, &in_scale, -1, 127.f);
lite::arm::math::trans_tensor_fp32_to_int8(
&input_fp32, &input_int8, in_scale[0]);
std::vector<float> w_scale;
lite::arm::math::get_tensor_scale<PRECISION(kFloat)>(
filter_fp32, &w_scale, -1, 127.f);
int axis_size = oc;
int inner_size = ic / group * ks * ks;
w_scale = lite::arm::math::get_tensor_scale_n(
filter_fp32_data, axis_size, inner_size, 127.f);
lite::arm::math::fp32_to_int8(
filter_fp32_data, filter_int8_data,
w_scale.data(), axis_size, 1, inner_size);
operators::ConvParam param;
param.x = &input_int8;
param.filter = &filter_int8;
if (flag_bias) {
param.bias = &bias_int32;
}
param.fuse_relu = false;
param.paddings = std::vector<int>({padding, padding});
param.strides = std::vector<int>({stride, stride});
param.dilations =
std::vector<int>({dilation, dilation});
param.groups = group;
param.output = &output_int32_ref;
conv_compute_ref<int8_t, int>(param);
int* output_int32_ref_data =
output_int32_ref.mutable_data<int>();
// ============ int8direct_int32 ============
param.output = &output_int32;
std::unique_ptr<KernelContext> ctx_int32(
new KernelContext);
lite::arm::math::DirectConvInt8<PRECISION(kInt32)>
int8direct_int32;
int8direct_int32.init(param,
&ctx_int32->As<ARMContext>());
int8direct_int32.create(param,
&ctx_int32->As<ARMContext>());
int8direct_int32.run(param);
int* output_int32_data =
output_int32.mutable_data<int>();
for (int i = 0; i < output_int32.dims().production();
i++) {
EXPECT_NEAR(output_int32_data[i],
output_int32_ref_data[i], 1e-3);
}
// ============ int8direct_int8 ============
int8_t* output_int8_ref_data =
output_int8_ref.mutable_data<int8_t>();
lite::arm::math::trans_tensor_int32_to_int8(
&output_int32_ref, &output_int8_ref, in_scale[0],
1, w_scale);
param.output = &output_int8;
param.input_scale = in_scale[0];
param.output_scale = 1;
param.weight_scale = w_scale;
std::unique_ptr<KernelContext> ctx_int8(
new KernelContext);
lite::arm::math::DirectConvInt8<PRECISION(kInt8)>
int8direct_int8;
int8direct_int8.init(param,
&ctx_int8->As<ARMContext>());
int8direct_int8.create(param,
&ctx_int8->As<ARMContext>());
int8direct_int8.run(param);
int8_t* output_int8_data =
output_int8.mutable_data<int8_t>();
for (int i = 0; i < output_int8.dims().production();
i++) {
EXPECT_NEAR(output_int8_data[i],
output_int8_ref_data[i], 1e-3);
}
// ============ int8direct_float32 ============
float* output_fp32_ref_data =
output_fp32_ref.mutable_data<float>();
lite::arm::math::trans_tensor_int32_to_fp32(
&output_int32_ref, &output_fp32_ref, in_scale[0],
w_scale);
param.output = &output_fp32;
param.input_scale = in_scale[0];
param.output_scale = 1;
param.weight_scale = w_scale;
std::unique_ptr<KernelContext> ctx_fp32(
new KernelContext);
lite::arm::math::DirectConvInt8<PRECISION(kFloat)>
int8direct_fp32;
int8direct_fp32.init(param,
&ctx_fp32->As<ARMContext>());
int8direct_fp32.create(param,
&ctx_fp32->As<ARMContext>());
int8direct_fp32.run(param);
float* output_fp32_data =
output_fp32.mutable_data<float>();
for (int i = 0; i < output_fp32.dims().production();
i++) {
EXPECT_NEAR(output_fp32_data[i],
output_fp32_ref_data[i], 1e-3);
}
}
}
}
}
}
}
}
}
}
}
}
}
}
TEST(conv_depthwise_int8, compute) {
DeviceInfo::Init();
for (auto n : {1, 2}) {
for (auto ic : {1, 3, 8}) {
for (auto ih : {5, 15, 28}) {
for (auto iw : {5, 15, 28}) {
for (auto flag_bias : {false, true}) {
for (auto flag_relu : {false, true}) {
for (auto dilation : {1}) {
for (auto stride : {1, 2}) {
for (auto padding : {1, 2}) {
for (auto ks : {3, /*5 */}) {
int group = ic;
int oc = ic;
bool flag_dw_3x3 = (ks == 3) && (padding == 1) &&
(stride == 1 || stride == 2);
bool flag_dw_5x5 =
(ks == 5 && stride == 1 && padding == 2);
bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (!flag_dw) continue;
const int dks = dilation * (ks - 1) + 1;
int oh = (ih + 2 * padding - dks) / stride + 1;
int ow = (iw + 2 * padding - dks) / stride + 1;
std::vector<int64_t> input_shape = {n, ic, ih, iw};
std::vector<int64_t> filter_shape = {oc, ic / group, ks,
ks};
std::vector<int64_t> bias_shape({1, oc, 1, 1});
std::vector<int64_t> output_shape({n, oc, oh, ow});
Tensor input_fp32, input_int8;
Tensor filter_fp32, filter_int8;
Tensor bias_int32;
Tensor output_int32_ref, output_int32;
Tensor output_fp32_ref, output_fp32;
Tensor output_int8_ref, output_int8;
input_fp32.Resize(input_shape);
input_int8.Resize(input_shape);
filter_fp32.Resize(filter_shape);
filter_int8.Resize(filter_shape);
bias_int32.Resize(bias_shape);
output_int32.Resize(output_shape);
output_int32_ref.Resize(output_shape);
output_fp32_ref.Resize(output_shape);
output_fp32.Resize(output_shape);
output_int8_ref.Resize(output_shape);
output_int8.Resize(output_shape);
float* input_fp32_data = input_fp32.mutable_data<float>();
int8_t* input_int8_data =
input_int8.mutable_data<int8_t>();
float* filter_fp32_data =
filter_fp32.mutable_data<float>();
int8_t* filter_int8_data =
filter_int8.mutable_data<int8_t>();
int* bias_int32_data = bias_int32.mutable_data<int32_t>();
for (int i = 0; i < input_fp32.dims().production(); i++) {
input_fp32_data[i] = i % 10 * (i % 3 - 1);
}
for (int i = 0; i < filter_fp32.dims().production();
i++) {
filter_fp32_data[i] = i % 10 * (i % 3 - 1);
}
for (int i = 0; i < bias_int32.dims().production(); i++) {
bias_int32_data[i] = i % 10 * (i % 3 - 1);
}
std::vector<float> in_scale;
lite::arm::math::get_tensor_scale<PRECISION(kFloat)>(
input_fp32, &in_scale, -1, 127.f);
lite::arm::math::trans_tensor_fp32_to_int8(
&input_fp32, &input_int8, in_scale[0]);
std::vector<float> w_scale;
lite::arm::math::get_tensor_scale<PRECISION(kFloat)>(
filter_fp32, &w_scale, -1, 127.f);
int axis_size = oc;
int inner_size = ic / group * ks * ks;
w_scale = lite::arm::math::get_tensor_scale_n(
filter_fp32_data, axis_size, inner_size, 127.f);
lite::arm::math::fp32_to_int8(
filter_fp32_data, filter_int8_data, w_scale.data(),
axis_size, 1, inner_size);
operators::ConvParam param;
param.x = &input_int8;
param.filter = &filter_int8;
if (flag_bias) {
param.bias = &bias_int32;
}
param.fuse_relu = false;
param.paddings = std::vector<int>({padding, padding});
param.strides = std::vector<int>({stride, stride});
param.dilations = std::vector<int>({dilation, dilation});
param.groups = group;
param.output = &output_int32_ref;
conv_compute_ref<int8_t, int>(param);
int* output_int32_ref_data =
output_int32_ref.mutable_data<int>();
// ============ int8depthwise_int32 ============
param.output = &output_int32;
std::unique_ptr<KernelContext> ctx_int32(
new KernelContext);
lite::arm::math::DepthwiseConvInt8<PRECISION(kInt32)>
int8depthwise_int32;
int8depthwise_int32.init(param,
&ctx_int32->As<ARMContext>());
int8depthwise_int32.create(param,
&ctx_int32->As<ARMContext>());
int8depthwise_int32.run(param);
int* output_int32_data = output_int32.mutable_data<int>();
for (int i = 0; i < output_int32.dims().production();
i++) {
EXPECT_NEAR(output_int32_data[i],
output_int32_ref_data[i], 1e-3);
}
// ============ int8depthwise_int8============
int8_t* output_int8_ref_data =
output_int8_ref.mutable_data<int8_t>();
lite::arm::math::trans_tensor_int32_to_int8(
&output_int32_ref, &output_int8_ref, in_scale[0], 1,
w_scale);
param.output = &output_int8;
param.input_scale = in_scale[0];
param.output_scale = 1;
param.weight_scale = w_scale;
std::unique_ptr<KernelContext> ctx_int8(
new KernelContext);
lite::arm::math::DepthwiseConvInt8<PRECISION(kInt8)>
int8depthwise_int8;
int8depthwise_int8.init(param,
&ctx_int8->As<ARMContext>());
int8depthwise_int8.create(param,
&ctx_int8->As<ARMContext>());
int8depthwise_int8.run(param);
int8_t* output_int8_data =
output_int8.mutable_data<int8_t>();
for (int i = 0; i < output_int8.dims().production();
i++) {
EXPECT_NEAR(output_int8_data[i],
output_int8_ref_data[i], 1e-3);
}
// ============int8depthwise_float32 ============
float* output_fp32_ref_data =
output_fp32_ref.mutable_data<float>();
lite::arm::math::trans_tensor_int32_to_fp32(
&output_int32_ref, &output_fp32_ref, in_scale[0],
w_scale);
param.output = &output_fp32;
param.input_scale = in_scale[0];
param.output_scale = 1;
param.weight_scale = w_scale;
std::unique_ptr<KernelContext> ctx_fp32(
new KernelContext);
lite::arm::math::DepthwiseConvInt8<PRECISION(kFloat)>
int8depthwise_fp32;
int8depthwise_fp32.init(param,
&ctx_fp32->As<ARMContext>());
int8depthwise_fp32.create(param,
&ctx_fp32->As<ARMContext>());
int8depthwise_fp32.run(param);
float* output_fp32_data =
output_fp32.mutable_data<float>();
for (int i = 0; i < output_fp32.dims().production();
i++) {
EXPECT_NEAR(output_fp32_data[i],
output_fp32_ref_data[i], 1e-3);
}
}
}
}
}
}
}
}
}
}
}
}
TEST(conv_arm, compute) {
DeviceInfo::Init();
#if 1
......
......@@ -295,7 +295,7 @@ function test_arm {
echo "android do not need armv7hf"
return 0
fi
echo "test file: ${TESTS_FILE}"
for _test in $(cat $TESTS_FILE); do
test_arm_android $_test $port
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册