diff --git a/CMakeLists.txt b/CMakeLists.txt index 59f565014b59f1393243a892f81f2069edd6eb9e..f7a5bd1ef3a03e3fd5aad5bc17f51593d4247bfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,7 @@ endif() # for lite, both server and mobile framework. option(WITH_LITE "Enable lite framework" OFF) +option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF) option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) option(LITE_WITH_X86 "Enable X86 in lite mode" ON) option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) diff --git a/cmake/lite.cmake b/cmake/lite.cmake new file mode 100644 index 0000000000000000000000000000000000000000..35d5f7e287128d9ffbb15ad795974be2556a13c4 --- /dev/null +++ b/cmake/lite.cmake @@ -0,0 +1,79 @@ +# Bundle several static libraries into one. +function(bundle_static_library tgt_name bundled_tgt_name fake_target) + list(APPEND static_libs ${tgt_name}) + + function(_recursively_collect_dependencies input_target) + set(_input_link_libraries LINK_LIBRARIES) + get_target_property(_input_type ${input_target} TYPE) + if (${_input_type} STREQUAL "INTERFACE_LIBRARY") + set(_input_link_libraries INTERFACE_LINK_LIBRARIES) + endif() + get_target_property(public_dependencies ${input_target} ${_input_link_libraries}) + foreach(dependency IN LISTS public_dependencies) + if(TARGET ${dependency}) + get_target_property(alias ${dependency} ALIASED_TARGET) + if (TARGET ${alias}) + set(dependency ${alias}) + endif() + get_target_property(_type ${dependency} TYPE) + if (${_type} STREQUAL "STATIC_LIBRARY") + list(APPEND static_libs ${dependency}) + endif() + + get_property(library_already_added + GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency}) + if (NOT library_already_added) + set_property(GLOBAL PROPERTY _${tgt_name}_static_bundle_${dependency} ON) + _recursively_collect_dependencies(${dependency}) + endif() + endif() + endforeach() + set(static_libs ${static_libs} PARENT_SCOPE) + endfunction() + + _recursively_collect_dependencies(${tgt_name}) + + list(REMOVE_DUPLICATES static_libs) + + set(bundled_tgt_full_name + ${CMAKE_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${bundled_tgt_name}${CMAKE_STATIC_LIBRARY_SUFFIX}) + + message(STATUS "+++++ bundled_tgt_full_name: ${bundled_tgt_full_name}") + + file(WRITE ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in + "CREATE ${bundled_tgt_full_name}\n" ) + + foreach(tgt IN LISTS static_libs) + file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in + "ADDLIB $\n") + endforeach() + + file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in "SAVE\n") + file(APPEND ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in "END\n") + + file(GENERATE + OUTPUT ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar + INPUT ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in) + + set(ar_tool ${CMAKE_AR}) + if (CMAKE_INTERPROCEDURAL_OPTIMIZATION) + set(ar_tool ${CMAKE_CXX_COMPILER_AR}) + endif() + + add_custom_command( + COMMAND ${ar_tool} -M < ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar + OUTPUT ${bundled_tgt_full_name} + COMMENT "Bundling ${bundled_tgt_name}" + VERBATIM) + + add_custom_target(${fake_target} ALL DEPENDS ${bundled_tgt_full_name}) + add_dependencies(${fake_target} ${tgt_name}) + + add_library(${bundled_tgt_name} STATIC IMPORTED) + set_target_properties(${bundled_tgt_name} + PROPERTIES + IMPORTED_LOCATION ${bundled_tgt_full_name} + INTERFACE_INCLUDE_DIRECTORIES $) + add_dependencies(${bundled_tgt_name} ${fake_target}) + +endfunction() diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index aae03d3aba8a908d5678bf04bb4c04eb60028de3..b636d5c690184a30d12871b7001dd6d195c06865 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -2,6 +2,8 @@ if (NOT WITH_LITE) return() endif() +include(lite) + message(WARNING "Lite enabled!") message(STATUS "LIGHT_FRAMEWORK:\t${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}") message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}") @@ -85,9 +87,9 @@ function (lite_deps TARGET) endif() set(${TARGET} ${deps} PARENT_SCOPE) - endfunction() + # A fake target to include all the libraries and tests the lite module depends. add_custom_target(lite_compile_deps COMMAND echo 1) @@ -95,6 +97,10 @@ add_custom_target(lite_compile_deps COMMAND echo 1) # the whole fluid project to accelerate the compile speed. set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt") file(WRITE ${offline_lib_registry_file} "") # clean + +set(__lite_cc_files "";"") +set(__lite_cc_files "${CMAKE_BINARY_DIR}/lite_cc_files.txt") +file(WRITE ${__lite_cc_files} "") # clean # cc_library with branch support. # The branches: # X86_DEPS: works only when LITE_WITH_X86 is ON. @@ -104,7 +110,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean # LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK # HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK function(lite_cc_library TARGET) - set(options STATIC static SHARED shared) + set(options SHARED shared STATIC static MODULE module) set(oneValueArgs "") set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS) @@ -120,14 +126,24 @@ function(lite_cc_library TARGET) LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} ) - if (${args_SHARED} OR ${args_shared}) + + if (args_SHARED OR ARGS_shared) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED) + elseif (args_MODULE OR ARGS_module) + add_library(${TARGET} MODULE ${args_SRCS}) + add_dependencies(${TARGET} ${deps} ${args_DEPS}) else() cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) endif() + foreach(cc_file ${args_SRCS}) + file(APPEND ${__lite_cc_files} "${cc_file}\n") + endforeach() + # collect targets need to compile for lite - add_dependencies(lite_compile_deps ${TARGET}) + if (args_SRCS) + add_dependencies(lite_compile_deps ${TARGET}) + endif() # register a library name. file(APPEND ${offline_lib_registry_file} "${TARGET}\n") @@ -224,9 +240,9 @@ add_custom_target(publish_inference_cxx_lib ${TARGET} COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include" COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include" - COMMAND cp "${CMAKE_BINARY_DIR}/paddle/fluid/lite/api/libpaddle_api_full.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" + COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" ) -add_dependencies(publish_inference_cxx_lib paddle_api_full) +add_dependencies(publish_inference_cxx_lib bundle_full_api) add_dependencies(publish_inference_lite publish_inference_cxx_lib) if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) @@ -235,9 +251,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) add_custom_target(publish_inference_mobile_lib ${TARGET} COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/mobile/lib" COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/mobile/include" - COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include" - COMMAND cp "${CMAKE_BINARY_DIR}/paddle/fluid/lite/api/libpaddle_api_light.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" + COMMAND cp "${CMAKE_SOURCE_DIR}/paddle/fluid/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/mobile/include" + COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/mobile/lib" ) - add_dependencies(publish_inference_mobile_lib paddle_api_light) + add_dependencies(publish_inference_mobile_lib paddle_api_light bundle_light_api) add_dependencies(publish_inference_lite publish_inference_mobile_lib) endif() diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index 02c86017089e9600906cc370fbc0a8534f7507aa..8c356e914dd4974915df1ea80c4ecf5ea97d3724 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -102,18 +102,39 @@ lite_cc_test(test_apis_lite SRCS apis_test.cc lite_cc_library(paddle_api_lite SRCS paddle_api.cc DEPS op_params_lite) -lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api_lite) +#----------------------------------------------------------------------------------------------------- +# The final inference library for both CxxConfig and MobileConfig. +lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api_lite + ${ops_lite} + ARM_DEPS ${arm_kernels} + ) +# The final inference library for just MobileConfig. lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api_lite paddle_api_lite) +bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api) +bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api) +#----------------------------------------------------------------------------------------------------- + + lite_cc_test(test_paddle_api_lite SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light ${ops_lite} ARM_DEPS ${arm_kernels} X86_DEPS ${x86_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL) + +lite_cc_test(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light + ${ops_lite} + ARM_DEPS ${arm_kernels} + X86_DEPS ${x86_kernels}) + if (WITH_TESTING) add_dependencies(test_paddle_api_lite extern_lite_download_lite_naive_model_tar_gz) endif() +if (LITE_WITH_JAVA AND LITE_WITH_ARM) + add_subdirectory(android) +endif() + #lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc #X86_DEPS operator #DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes diff --git a/paddle/fluid/lite/api/android/CMakeLists.txt b/paddle/fluid/lite/api/android/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f31f7e9479580d9e7a47804db2a45b4f6d4e1bb --- /dev/null +++ b/paddle/fluid/lite/api/android/CMakeLists.txt @@ -0,0 +1,5 @@ +if ((NOT LITE_WITH_JAVA) OR (NOT LITE_WITH_ARM)) + return() +endif() + +add_subdirectory(jni) diff --git a/paddle/fluid/lite/api/android/jni/.gitignore b/paddle/fluid/lite/api/android/jni/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1299d2738c0d3321a46024d31e24049bef9ace9a --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/.gitignore @@ -0,0 +1,3 @@ +/PaddleListTest.class +/PaddleLite.class +/bin/ diff --git a/paddle/fluid/lite/api/android/jni/CMakeLists.txt b/paddle/fluid/lite/api/android/jni/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ce3b606e80143c8d5e44a9d7619d626c45d4dd1 --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/CMakeLists.txt @@ -0,0 +1,52 @@ +if ((NOT LITE_WITH_ARM) OR (NOT LITE_WITH_JAVA)) + return() +endif() + +include(UseJava) +find_package(Java REQUIRED) + +# We are only interested in finding jni.h: we do not care about extended JVM +# functionality or the AWT library. +set(JAVA_AWT_LIBRARY NotNeeded) +set(JAVA_JVM_LIBRARY NotNeeded) +set(JAVA_INCLUDE_PATH2 NotNeeded) +set(JAVA_AWT_INCLUDE_PATH NotNeeded) +find_package(JNI REQUIRED) + +# Generate PaddlePredictor.jar +include_directories(${JNI_INCLUDE_DIRS}) +add_jar(PaddlePredictor + src/com/baidu/paddle/lite/PaddlePredictor.java + src/com/baidu/paddle/lite/Place.java) +get_target_property(_jarFile PaddlePredictor JAR_FILE) +get_target_property(_classDir PaddlePredictor CLASSDIR) +set(_stubDir "${CMAKE_CURRENT_BINARY_DIR}") + +# Generate paddle_lite_jni.h +add_custom_target( + paddle_lite_jni_header ALL + COMMAND ${Java_JAVAH_EXECUTABLE} -verbose + -classpath ${_classDir} + -o paddle_lite_jni.h + -jni + com.baidu.paddle.lite.PaddlePredictor + DEPENDS PaddlePredictor +) + +# Generate paddle_lite_jni.so +include_directories(${JNI_INCLUDE_DIRS} ${_classDir} ${_stubDir}) +lite_cc_library(paddle_lite_jni MODULE SRCS paddle_lite_jni.cc + DEPS light_api_lite cxx_api_lite + paddle_api_full paddle_api_lite paddle_api_light op_registry_lite + ${ops_lite} ${lite_kernel_deps} + ARM_DEPS ${arm_kernels}) +if (APPLE) + # MacOS only accepts JNI lib ends with .jnilib or .dylib + set_target_properties(paddle_lite_jni PROPERTIES SUFFIX ".jnilib") +elseif (WIN32) + # Windows only accepts JNI lib ends with .dll + set_target_properties(paddle_lite_jni PROPERTIES SUFFIX ".dll") +endif (APPLE) +target_link_libraries(paddle_lite_jni light_api_lite cxx_api_lite + paddle_api_full paddle_api_lite paddle_api_light op_registry_lite + ${ops_lite} ${arm_kernels} ${lite_kernel_deps}) diff --git a/paddle/fluid/lite/api/android/jni/paddle_lite_jni.cc b/paddle/fluid/lite/api/android/jni/paddle_lite_jni.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ef0c2ee55d42cb9583cb66ac5c493232247085e --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/paddle_lite_jni.cc @@ -0,0 +1,331 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/lite/api/android/jni/paddle_lite_jni.h" + +#include +#include +#include +#include + +#include "paddle/fluid/lite/api/light_api.h" +#include "paddle/fluid/lite/api/paddle_api.h" +#include "paddle/fluid/lite/api/paddle_lite_factory_helper.h" +#include "paddle/fluid/lite/api/paddle_place.h" +#include "paddle/fluid/lite/api/paddle_use_kernels.h" +#include "paddle/fluid/lite/api/paddle_use_ops.h" +#include "paddle/fluid/lite/api/paddle_use_passes.h" +#include "paddle/fluid/lite/kernels/arm/activation_compute.h" +#include "paddle/fluid/lite/kernels/arm/batch_norm_compute.h" +#include "paddle/fluid/lite/kernels/arm/calib_compute.h" +#include "paddle/fluid/lite/kernels/arm/concat_compute.h" +#include "paddle/fluid/lite/kernels/arm/conv_compute.h" +#include "paddle/fluid/lite/kernels/arm/dropout_compute.h" +#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h" +#include "paddle/fluid/lite/kernels/arm/fc_compute.h" +#include "paddle/fluid/lite/kernels/arm/mul_compute.h" +#include "paddle/fluid/lite/kernels/arm/pool_compute.h" +#include "paddle/fluid/lite/kernels/arm/scale_compute.h" +#include "paddle/fluid/lite/kernels/arm/softmax_compute.h" +#include "paddle/fluid/lite/kernels/arm/split_compute.h" +#include "paddle/fluid/lite/kernels/arm/transpose_compute.h" + +#define ARM_KERNEL_POINTER(kernel_class_name__) \ + std::unique_ptr \ + p##kernel_class_name__( \ + new paddle::lite::kernels::arm::kernel_class_name__); + +#ifdef __cplusplus +extern "C" { +#endif + +using paddle::lite_api::CxxConfig; +using paddle::lite_api::MobileConfig; +using paddle::lite_api::PaddlePredictor; +using paddle::lite_api::Place; +using paddle::lite_api::Tensor; + +static std::shared_ptr predictor; + +/** + * Not sure why, we have to initial a pointer first for kernels. + * Otherwise it throws null pointer error when do KernelRegistor. + */ +static void use_arm_kernels() { + ARM_KERNEL_POINTER(BatchNormCompute); + ARM_KERNEL_POINTER(CalibComputeFp32ToInt8); + ARM_KERNEL_POINTER(CalibComputeInt8ToFp32); + ARM_KERNEL_POINTER(ConvCompute); + ARM_KERNEL_POINTER(ConcatCompute); + ARM_KERNEL_POINTER(ElementwiseAddCompute); + ARM_KERNEL_POINTER(DropoutCompute); + ARM_KERNEL_POINTER(FcCompute); + ARM_KERNEL_POINTER(MulCompute); + ARM_KERNEL_POINTER(PoolCompute); + ARM_KERNEL_POINTER(ReluCompute); + ARM_KERNEL_POINTER(ScaleCompute); + ARM_KERNEL_POINTER(SoftmaxCompute); + ARM_KERNEL_POINTER(SplitCompute); + ARM_KERNEL_POINTER(TransposeCompute); + ARM_KERNEL_POINTER(Transpose2Compute); +} + +inline std::string jstring_to_cpp_string(JNIEnv *env, jstring jstr) { + // In java, a unicode char will be encoded using 2 bytes (utf16). + // so jstring will contain characters utf16. std::string in c++ is + // essentially a string of bytes, not characters, so if we want to + // pass jstring from JNI to c++, we have convert utf16 to bytes. + if (!jstr) { + return ""; + } + const jclass stringClass = env->GetObjectClass(jstr); + const jmethodID getBytes = + env->GetMethodID(stringClass, "getBytes", "(Ljava/lang/String;)[B"); + const jbyteArray stringJbytes = (jbyteArray)env->CallObjectMethod( + jstr, getBytes, env->NewStringUTF("UTF-8")); + + size_t length = (size_t)env->GetArrayLength(stringJbytes); + jbyte *pBytes = env->GetByteArrayElements(stringJbytes, NULL); + + std::string ret = std::string(reinterpret_cast(pBytes), length); + env->ReleaseByteArrayElements(stringJbytes, pBytes, JNI_ABORT); + + env->DeleteLocalRef(stringJbytes); + env->DeleteLocalRef(stringClass); + return ret; +} + +inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env, const float *buf, + int64_t len) { + jfloatArray result = env->NewFloatArray(len); + env->SetFloatArrayRegion(result, 0, len, buf); + return result; +} + +inline jintArray cpp_array_to_jintarray(JNIEnv *env, const int *buf, + int64_t len) { + jintArray result = env->NewIntArray(len); + env->SetIntArrayRegion(result, 0, len, buf); + return result; +} + +inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env, const int8_t *buf, + int64_t len) { + jbyteArray result = env->NewByteArray(len); + env->SetByteArrayRegion(result, 0, len, buf); + return result; +} + +inline std::vector jintarray_to_int64_vector(JNIEnv *env, + jintArray dims) { + int dim_size = env->GetArrayLength(dims); + jint *dim_nums = env->GetIntArrayElements(dims, nullptr); + std::vector dim_vec(dim_nums, dim_nums + dim_size); + env->ReleaseIntArrayElements(dims, dim_nums, 0); + return dim_vec; +} + +/** + * Converts Java com.baidu.paddle.lite.Place to c++ paddle::lite_api::Place. + */ +inline static Place jplace_to_cpp_place(JNIEnv *env, jobject java_place) { + jclass place_jclazz = env->GetObjectClass(java_place); + + jmethodID target_method = + env->GetMethodID(place_jclazz, "getTargetInt", "()I"); + jmethodID precision_method = + env->GetMethodID(place_jclazz, "getPrecisionInt", "()I"); + jmethodID data_layout_method = + env->GetMethodID(place_jclazz, "getDataLayoutInt", "()I"); + jmethodID device_method = env->GetMethodID(place_jclazz, "getDevice", "()I"); + + int target = env->CallIntMethod(java_place, target_method); + int precision = env->CallIntMethod(java_place, precision_method); + int data_layout = env->CallIntMethod(java_place, data_layout_method); + int device = env->CallIntMethod(java_place, device_method); + + return Place(static_cast(target), + static_cast(precision), + static_cast(data_layout), + device); +} + +inline static int64_t product(const std::vector &vec) { + if (vec.empty()) { + return 0; + } + int64_t result = 1; + for (int64_t d : vec) { + result *= d; + } + return result; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_loadCxxModel( + JNIEnv *env, jclass thiz, jstring model_path, jobject preferred_place, + jobjectArray valid_places) { + if (predictor != nullptr) { + return JNI_FALSE; + } + use_arm_kernels(); + + int valid_place_count = env->GetArrayLength(valid_places); + std::vector cpp_valid_places; + for (int i = 0; i < valid_place_count; ++i) { + jobject jplace = env->GetObjectArrayElement(valid_places, i); + cpp_valid_places.push_back(jplace_to_cpp_place(env, jplace)); + } + + CxxConfig config; + config.set_model_dir(jstring_to_cpp_string(env, model_path)); + config.set_preferred_place(jplace_to_cpp_place(env, preferred_place)); + config.set_valid_places(cpp_valid_places); + + predictor = paddle::lite_api::CreatePaddlePredictor(config); + return predictor == nullptr ? JNI_FALSE : JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_loadMobileModel(JNIEnv *env, + jclass thiz, + jstring model_path) { + if (predictor != nullptr) { + return JNI_FALSE; + } + use_arm_kernels(); + MobileConfig config; + + config.set_model_dir(jstring_to_cpp_string(env, model_path)); + predictor = paddle::lite_api::CreatePaddlePredictor(config); + return predictor == nullptr ? JNI_FALSE : JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_saveOptimizedModel( + JNIEnv *env, jclass thiz, jstring model_path) { + if (predictor == nullptr) { + return JNI_FALSE; + } + predictor->SaveOptimizedModel(jstring_to_cpp_string(env, model_path)); + return JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_clear(JNIEnv *env, jclass thiz) { + if (predictor == nullptr) { + return JNI_FALSE; + } + predictor.reset(); + return JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3F( + JNIEnv *env, jclass thiz, jint offset, jintArray dims, jfloatArray buf) { + if (predictor == nullptr) { + return JNI_FALSE; + } + std::vector ddim = jintarray_to_int64_vector(env, dims); + + int len = env->GetArrayLength(buf); + if ((int64_t)len != product(ddim)) { + return JNI_FALSE; + } + + float *buffer = env->GetFloatArrayElements(buf, nullptr); + std::unique_ptr tensor = + predictor->GetInput(static_cast(offset)); + tensor->Resize(ddim); + float *input = tensor->mutable_data(); + for (int i = 0; i < len; ++i) { + input[i] = buffer[i]; + } + return JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3B( + JNIEnv *env, jclass thiz, jint offset, jintArray dims, jbyteArray buf) { + if (predictor == nullptr) { + return JNI_FALSE; + } + std::vector ddim = jintarray_to_int64_vector(env, dims); + + int len = env->GetArrayLength(buf); + if ((int64_t)len != product(ddim)) { + return JNI_FALSE; + } + + jbyte *buffer = env->GetByteArrayElements(buf, nullptr); + std::unique_ptr tensor = + predictor->GetInput(static_cast(offset)); + tensor->Resize(ddim); + int8_t *input = tensor->mutable_data(); + for (int i = 0; i < len; ++i) { + input[i] = (int8_t)buffer[i]; + } + + return JNI_TRUE; +} + +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_run(JNIEnv *, jclass) { + if (predictor == nullptr) { + return JNI_FALSE; + } + predictor->Run(); + return JNI_TRUE; +} + +JNIEXPORT jfloatArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_getFloatOutput(JNIEnv *env, + jclass thiz, + jint offset) { + std::unique_ptr tensor = + predictor->GetOutput(static_cast(offset)); + int64_t len = product(tensor->shape()); + return cpp_array_to_jfloatarray(env, tensor->data(), len); +} + +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_getByteOutput(JNIEnv *env, + jclass thiz, + jint offset) { + std::unique_ptr tensor = + predictor->GetOutput(static_cast(offset)); + int64_t len = product(tensor->shape()); + return cpp_array_to_jbytearray(env, tensor->data(), len); +} + +JNIEXPORT jfloatArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_fetchFloat(JNIEnv *env, jclass thiz, + jstring name) { + std::string cpp_name = jstring_to_cpp_string(env, name); + std::unique_ptr tensor = predictor->GetTensor(cpp_name); + int64_t len = product(tensor->shape()); + return cpp_array_to_jfloatarray(env, tensor->data(), len); +} + +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_fetchByte(JNIEnv *env, jclass thiz, + jstring name) { + std::string cpp_name = jstring_to_cpp_string(env, name); + std::unique_ptr tensor = predictor->GetTensor(cpp_name); + int64_t len = product(tensor->shape()); + return cpp_array_to_jbytearray(env, tensor->data(), len); +} + +#ifdef __cplusplus +} +#endif diff --git a/paddle/fluid/lite/api/android/jni/paddle_lite_jni.h b/paddle/fluid/lite/api/android/jni/paddle_lite_jni.h new file mode 100644 index 0000000000000000000000000000000000000000..f860801c716ee24b9437917d1068de4eeda7ee1d --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/paddle_lite_jni.h @@ -0,0 +1,127 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class com_baidu_paddle_lite_PaddlePredictor */ + +#ifndef PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_ +#define PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_ +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: loadCxxModel + * Signature: + * (Ljava/lang/String;Lcom/baidu/paddle/lite/Place;[Lcom/baidu/paddle/lite/Place;)Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_loadCxxModel(JNIEnv *, jclass, + jstring, jobject, + jobjectArray); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: loadMobileModel + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_loadMobileModel(JNIEnv *, jclass, + jstring); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: saveOptimizedModel + * Signature: (Ljava/lang/String;)Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_saveOptimizedModel(JNIEnv *, jclass, + jstring); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: clear + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_clear(JNIEnv *, jclass); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: setInput + * Signature: (I[I[F)Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3F(JNIEnv *, jclass, + jint, jintArray, + jfloatArray); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: setInput + * Signature: (I[I[B)Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_setInput__I_3I_3B(JNIEnv *, jclass, + jint, jintArray, + jbyteArray); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: run + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_run(JNIEnv *, jclass); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: getFloatOutput + * Signature: (I)[F + */ +JNIEXPORT jfloatArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_getFloatOutput(JNIEnv *, jclass, + jint); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: getByteOutput + * Signature: (I)[B + */ +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_getByteOutput(JNIEnv *, jclass, + jint); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: fetchFloat + * Signature: (Ljava/lang/String;)[F + */ +JNIEXPORT jfloatArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_fetchFloat(JNIEnv *, jclass, + jstring); + +/* + * Class: com_baidu_paddle_lite_PaddlePredictor + * Method: fetchByte + * Signature: (Ljava/lang/String;)[B + */ +JNIEXPORT jbyteArray JNICALL +Java_com_baidu_paddle_lite_PaddlePredictor_fetchByte(JNIEnv *, jclass, jstring); + +#ifdef __cplusplus +} +#endif +#endif // PADDLE_FLUID_LITE_API_ANDROID_JNI_PADDLE_LITE_JNI_H_ diff --git a/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/.gitignore b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..870ec275e827c663c24ab374bbec8c37c8f3d8b0 --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/.gitignore @@ -0,0 +1,2 @@ +/PaddleLite.class +/PaddleLiteTest.class diff --git a/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/PaddlePredictor.java b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/PaddlePredictor.java new file mode 100644 index 0000000000000000000000000000000000000000..f8957f948040ba4e59397098989fe55e17e60236 --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/PaddlePredictor.java @@ -0,0 +1,130 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +package com.baidu.paddle.lite; + +/** Java Native Interface (JNI) class for Paddle Lite APIs */ +public class PaddlePredictor { + + /** name of C++ JNI lib */ + private final static String JNI_LIB_NAME = "paddle_lite_jni"; + + /* load the C++ JNI lib */ + static { + System.loadLibrary(JNI_LIB_NAME); + } + + /** + * Loads mobile cxx model, which is the model before optimizing passes. The cxx + * model allow users to manage hardware place resources. Caller uses a place at + * Java to control Target, DataLayout, Precision, and Device ID. More details + * about the four fields see our Paddle-Mobile document. + * + * + * @param modelPath modelPath model file path + * @param preferredPlace preferred place to run Cxx Model + * @param validPlaces n * 4 int array, valid places to run Cxx Model + * @return true if load successfully + */ + public static native boolean loadCxxModel(String modelPath, Place preferredPlace, Place[] validPlaces); + + /** + * Loads mobile lite model, which is the model after optimizing passes. + * + * @param modelPath model file path + * @return true if load successfully + */ + public static native boolean loadMobileModel(String modelPath); + + /** + * Saves optimized model, which is the model can be used by + * {@link loadMobileModel} + * + * @param modelPath model file path + * @return true if save successfully + */ + public static native boolean saveOptimizedModel(String modelPath); + + /** + * Clears the current loaded model. + * + * @return true if a loaded model has been cleared. + */ + public static native boolean clear(); + + /** + * Set input data on offset-th column of feed data + * + * @param offset the offset-th column of feed data will be set + * @param buf the input data + * @param dims dimension format of the input image + * @return true if set successfully + */ + public static native boolean setInput(int offset, int[] dims, float[] buf); + + /** + * Set input data on offset-th column of feed data + * + * @param offset the offset-th column of feed data will be set + * @param buf the input data + * @param dims dimension format of the input image + * @return true if set successfully + */ + public static native boolean setInput(int offset, int[] dims, byte[] buf); + + /** + * Run the predict model + * + * @return true if run successfully + */ + public static native boolean run(); + + /** + * Get offset-th column of output data as float + * + * @param offset the offset-th column of output data will be returned + * @return model predict output + */ + public static native float[] getFloatOutput(int offset); + + /** + * Get offset-th column of output data as byte (int8 in C++ side) + * + * @param offset the offset-th column of output data will be returned + * @return model predict output + */ + public static native byte[] getByteOutput(int offset); + + /** + * Fetches a Tensor's value as Float data + * + * @param name Tensor's name + * @return values of the Tensor + */ + public static native float[] fetchFloat(String name); + + /** + * Fetches a Tensor's value as byte data (int8 at C++ side) + * + * @param name Tensor's name + * @return values of the Tensor + */ + public static native byte[] fetchByte(String name); + + /** + * Main function for test + */ + public static void main(String[] args) { + System.out.println("Load native library successfully"); + } +} diff --git a/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/Place.java b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/Place.java new file mode 100644 index 0000000000000000000000000000000000000000..250a2055eebb50cd704f407752656089ab1d8041 --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/src/com/baidu/paddle/lite/Place.java @@ -0,0 +1,102 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +package com.baidu.paddle.lite; + +/** + * Place specifies the execution context of a Kernel or input/output for a + * kernel. It is used to make the analysis of the MIR more clear and accurate. + */ +public class Place { + public enum TargetType { + UNKNOWN(0), HOST(1), X86(2), CUDA(3), ARM(4), OPEN_CL(5), ANY(6); + + public final int value; + + private TargetType(int value) { + this.value = value; + } + } + + public enum PrecisionType { + UNKNOWN(0), FLOAT(1), INT8(2), INT32(3), ANY(4); + + public final int value; + + private PrecisionType(int value) { + this.value = value; + } + } + + public enum DataLayoutType { + UNKNOWN(0), NCHW(1), ANY(2); + + public final int value; + + private DataLayoutType(int value) { + this.value = value; + } + } + + public TargetType target; + public PrecisionType precision; + public DataLayoutType layout; + public int device; + + public Place() { + target = TargetType.UNKNOWN; + precision = PrecisionType.UNKNOWN; + layout = DataLayoutType.UNKNOWN; + device = 0; + } + + public Place(TargetType target) { + this(target, PrecisionType.FLOAT); + } + + public Place(TargetType target, PrecisionType precision) { + this(target, precision, DataLayoutType.NCHW); + } + + public Place(TargetType target, PrecisionType precision, DataLayoutType layout) { + this(target, precision, layout, 0); + } + + public Place(TargetType target, PrecisionType precision, DataLayoutType layout, int device) { + this.target = target; + this.precision = precision; + this.layout = layout; + this.device = device; + } + + public boolean isValid() { + return target != TargetType.UNKNOWN && precision != PrecisionType.UNKNOWN && layout != DataLayoutType.UNKNOWN; + } + + public int getTargetInt() { + return target.value; + } + + public int getPrecisionInt() { + return precision.value; + } + + public int getDataLayoutInt() { + return layout.value; + } + + public int getDevice() { + return device; + } + +} diff --git a/paddle/fluid/lite/api/android/jni/test/com/baidu/paddle/lite/PaddlePredictorTest.java b/paddle/fluid/lite/api/android/jni/test/com/baidu/paddle/lite/PaddlePredictorTest.java new file mode 100644 index 0000000000000000000000000000000000000000..b328c347ffd650ac2d40e05a361fd72c33bfc95c --- /dev/null +++ b/paddle/fluid/lite/api/android/jni/test/com/baidu/paddle/lite/PaddlePredictorTest.java @@ -0,0 +1,43 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +package com.baidu.paddle.lite; + +import org.junit.jupiter.api.Test; + +import static org.junit.Assert.assertEquals; + +class PaddlePredictorTest { + + @Test + public void run_defaultModel() { + PaddlePredictor.loadMobileModel(""); + + float[] inputBuffer = new float[10000]; + for (int i = 0; i < 10000; ++i) { + inputBuffer[i] = i; + } + int[] dims = { 100, 100 }; + + PaddlePredictor.setInput(0, dims, inputBuffer); + PaddlePredictor.run(); + float[] output = PaddlePredictor.getFloatOutput(0); + + assertEquals(output.length, 50000); + assertEquals(output[0], 50.2132f, 1e-3f); + assertEquals(output[1], -28.8729f, 1e-3f); + + PaddlePredictor.clear(); + } + +} diff --git a/paddle/fluid/lite/api/model_test.cc b/paddle/fluid/lite/api/model_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..78fe52394c8b966da0a5087b6d0af15a60085a85 --- /dev/null +++ b/paddle/fluid/lite/api/model_test.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/fluid/lite/api/paddle_api.h" +#include "paddle/fluid/lite/api/paddle_use_kernels.h" +#include "paddle/fluid/lite/api/paddle_use_ops.h" +#include "paddle/fluid/lite/api/paddle_use_passes.h" +#include "paddle/fluid/lite/api/test_helper.h" +#include "paddle/fluid/lite/core/cpu_info.h" +#include "paddle/fluid/lite/utils/string.h" + +namespace paddle { +namespace lite_api { + +void OutputOptModel(const std::string& load_model_dir, + const std::string& save_optimized_model_dir, + const std::vector& input_shape) { + lite_api::CxxConfig config; + config.set_model_dir(load_model_dir); + config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); + config.set_valid_places({ + Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, + }); + auto predictor = lite_api::CreatePaddlePredictor(config); + + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize(input_shape); + auto* data = input_tensor->mutable_data(); + int input_num = 1; + for (int i = 0; i < input_shape.size(); ++i) { + input_num *= input_shape[i]; + } + for (int i = 0; i < input_num; ++i) { + data[i] = i; + } + predictor->Run(); + // delete old optimized model + int ret = system( + paddle::lite::string_format("rm -rf %s", save_optimized_model_dir.c_str()) + .c_str()); + if (ret == 0) { + LOG(INFO) << "delete old optimized model " << save_optimized_model_dir; + } + predictor->SaveOptimizedModel(save_optimized_model_dir); + LOG(INFO) << "Load model from " << load_model_dir; + LOG(INFO) << "Save optimized model to " << save_optimized_model_dir; +} + +#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +void Run(const std::vector& input_shape, const std::string& model_dir, + const int repeat, const int thread_num, const int warmup_times = 10) { + lite::DeviceInfo::Init(); + lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num); + lite_api::MobileConfig config; + config.set_model_dir(model_dir); + + auto predictor = lite_api::CreatePaddlePredictor(config); + + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize(input_shape); + float* input_data = input_tensor->mutable_data(); + int input_num = 1; + for (int i = 0; i < input_shape.size(); ++i) { + input_num *= input_shape[i]; + } + for (int i = 0; i < input_num; ++i) { + input_data[i] = i; + } + + for (int i = 0; i < warmup_times; ++i) { + predictor->Run(); + } + + auto start = lite::GetCurrentUS(); + for (int i = 0; i < repeat; ++i) { + predictor->Run(); + } + auto end = lite::GetCurrentUS(); + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num + << ", warmup: " << warmup_times << ", repeats: " << repeat + << ", spend " << (end - start) / repeat / 1000.0 + << " ms in average."; + + auto output = predictor->GetOutput(0); + const float* out = output->data(); + LOG(INFO) << "out " << out[0]; + LOG(INFO) << "out " << out[1]; + auto output_shape = output->shape(); + int output_num = 1; + for (int i = 0; i < output_shape.size(); ++i) { + output_num *= output_shape[i]; + } + LOG(INFO) << "output_num: " << output_num; +} +#endif + +} // namespace lite_api +} // namespace paddle + +int main(int argc, char** argv) { + if (argc < 4) { + LOG(INFO) << "usage: " << argv[0] << " "; + exit(0); + } + std::string load_model_dir = argv[1]; + std::string save_optimized_model_dir = load_model_dir + "opt2"; + +#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + int repeat = std::stoi(argv[2]); + int thread_num = std::stoi(argv[3]); +#endif + + std::vector input_shape{1, 3, 224, 224}; + + // Output optimized model + paddle::lite_api::OutputOptModel(load_model_dir, save_optimized_model_dir, + input_shape); + +#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + // Run inference using optimized model + paddle::lite_api::Run(input_shape, save_optimized_model_dir, repeat, + thread_num); +#endif + + return 0; +} diff --git a/paddle/fluid/lite/api/test_helper.h b/paddle/fluid/lite/api/test_helper.h index 4d184eeb169c4f1c7f1de968e373137c4e9ffcc6..1a5ab31abd3e97c5bfc484547af5d36d53e49b39 100644 --- a/paddle/fluid/lite/api/test_helper.h +++ b/paddle/fluid/lite/api/test_helper.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include // for eval diff --git a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc index c14845f74ec7579c1dd0a31cc0381d6a94ccd1e6..9745e04238343324f5a4cb929a1235e01795f60d 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc @@ -469,6 +469,389 @@ TEST(conv_arm_int8, int8_fp32) { } } +TEST(conv_direct_int8, compute) { + DeviceInfo::Init(); + for (auto n : {1, 2}) { + for (auto ic : {1, 3, 8}) { + for (auto oc : {1, 3, 8}) { + for (auto ih : {5, 15, 28}) { + for (auto iw : {5, 15, 28}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto depthwise : {false, /*true*/}) { + for (auto dilation : {1}) { + for (auto stride : {1, 2}) { + for (auto padding : {1}) { + for (auto ks : {3}) { + int group = 1; + if (depthwise) { // depthwise convolution ? + group = oc = ic; + } + + const int dks = dilation * (ks - 1) + 1; + int oh = (ih + 2 * padding - dks) / stride + 1; + int ow = (iw + 2 * padding - dks) / stride + 1; + std::vector input_shape = {n, ic, ih, iw}; + std::vector filter_shape = {oc, ic / group, + ks, ks}; + std::vector bias_shape({1, oc, 1, 1}); + std::vector output_shape({n, oc, oh, ow}); + + Tensor input_fp32, input_int8; + Tensor filter_fp32, filter_int8; + Tensor bias_int32; + Tensor output_int32_ref, output_int32; + Tensor output_fp32_ref, output_fp32; + Tensor output_int8_ref, output_int8; + + input_fp32.Resize(input_shape); + input_int8.Resize(input_shape); + filter_fp32.Resize(filter_shape); + filter_int8.Resize(filter_shape); + bias_int32.Resize(bias_shape); + output_int32.Resize(output_shape); + output_int32_ref.Resize(output_shape); + output_fp32_ref.Resize(output_shape); + output_fp32.Resize(output_shape); + output_int8_ref.Resize(output_shape); + output_int8.Resize(output_shape); + + float* input_fp32_data = + input_fp32.mutable_data(); + int8_t* input_int8_data = + input_int8.mutable_data(); + + float* filter_fp32_data = + filter_fp32.mutable_data(); + int8_t* filter_int8_data = + filter_int8.mutable_data(); + + int* bias_int32_data = + bias_int32.mutable_data(); + + for (int i = 0; i < input_fp32.dims().production(); + i++) { + input_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < filter_fp32.dims().production(); + i++) { + filter_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < bias_int32.dims().production(); + i++) { + bias_int32_data[i] = i % 10 * (i % 3 - 1); + } + + std::vector in_scale; + lite::arm::math::get_tensor_scale( + input_fp32, &in_scale, -1, 127.f); + lite::arm::math::trans_tensor_fp32_to_int8( + &input_fp32, &input_int8, in_scale[0]); + + std::vector w_scale; + lite::arm::math::get_tensor_scale( + filter_fp32, &w_scale, -1, 127.f); + int axis_size = oc; + int inner_size = ic / group * ks * ks; + w_scale = lite::arm::math::get_tensor_scale_n( + filter_fp32_data, axis_size, inner_size, 127.f); + lite::arm::math::fp32_to_int8( + filter_fp32_data, filter_int8_data, + w_scale.data(), axis_size, 1, inner_size); + + operators::ConvParam param; + param.x = &input_int8; + param.filter = &filter_int8; + if (flag_bias) { + param.bias = &bias_int32; + } + param.fuse_relu = false; + param.paddings = std::vector({padding, padding}); + param.strides = std::vector({stride, stride}); + param.dilations = + std::vector({dilation, dilation}); + param.groups = group; + param.output = &output_int32_ref; + conv_compute_ref(param); + + int* output_int32_ref_data = + output_int32_ref.mutable_data(); + + // ============ int8direct_int32 ============ + param.output = &output_int32; + std::unique_ptr ctx_int32( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_int32; + int8direct_int32.init(param, + &ctx_int32->As()); + int8direct_int32.create(param, + &ctx_int32->As()); + int8direct_int32.run(param); + int* output_int32_data = + output_int32.mutable_data(); + for (int i = 0; i < output_int32.dims().production(); + i++) { + EXPECT_NEAR(output_int32_data[i], + output_int32_ref_data[i], 1e-3); + } + + // ============ int8direct_int8 ============ + int8_t* output_int8_ref_data = + output_int8_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_int8( + &output_int32_ref, &output_int8_ref, in_scale[0], + 1, w_scale); + param.output = &output_int8; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_int8( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_int8; + int8direct_int8.init(param, + &ctx_int8->As()); + int8direct_int8.create(param, + &ctx_int8->As()); + int8direct_int8.run(param); + int8_t* output_int8_data = + output_int8.mutable_data(); + for (int i = 0; i < output_int8.dims().production(); + i++) { + EXPECT_NEAR(output_int8_data[i], + output_int8_ref_data[i], 1e-3); + } + + // ============ int8direct_float32 ============ + float* output_fp32_ref_data = + output_fp32_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_fp32( + &output_int32_ref, &output_fp32_ref, in_scale[0], + w_scale); + param.output = &output_fp32; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_fp32( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_fp32; + int8direct_fp32.init(param, + &ctx_fp32->As()); + int8direct_fp32.create(param, + &ctx_fp32->As()); + int8direct_fp32.run(param); + float* output_fp32_data = + output_fp32.mutable_data(); + for (int i = 0; i < output_fp32.dims().production(); + i++) { + EXPECT_NEAR(output_fp32_data[i], + output_fp32_ref_data[i], 1e-3); + } + } + } + } + } + } + } + } + } + } + } + } + } +} + +TEST(conv_depthwise_int8, compute) { + DeviceInfo::Init(); + for (auto n : {1, 2}) { + for (auto ic : {1, 3, 8}) { + for (auto ih : {5, 15, 28}) { + for (auto iw : {5, 15, 28}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto dilation : {1}) { + for (auto stride : {1, 2}) { + for (auto padding : {1, 2}) { + for (auto ks : {3, /*5 */}) { + int group = ic; + int oc = ic; + + bool flag_dw_3x3 = (ks == 3) && (padding == 1) && + (stride == 1 || stride == 2); + bool flag_dw_5x5 = + (ks == 5 && stride == 1 && padding == 2); + bool flag_dw = flag_dw_3x3 || flag_dw_5x5; + if (!flag_dw) continue; + + const int dks = dilation * (ks - 1) + 1; + int oh = (ih + 2 * padding - dks) / stride + 1; + int ow = (iw + 2 * padding - dks) / stride + 1; + std::vector input_shape = {n, ic, ih, iw}; + std::vector filter_shape = {oc, ic / group, ks, + ks}; + std::vector bias_shape({1, oc, 1, 1}); + std::vector output_shape({n, oc, oh, ow}); + + Tensor input_fp32, input_int8; + Tensor filter_fp32, filter_int8; + Tensor bias_int32; + Tensor output_int32_ref, output_int32; + Tensor output_fp32_ref, output_fp32; + Tensor output_int8_ref, output_int8; + + input_fp32.Resize(input_shape); + input_int8.Resize(input_shape); + filter_fp32.Resize(filter_shape); + filter_int8.Resize(filter_shape); + bias_int32.Resize(bias_shape); + + output_int32.Resize(output_shape); + output_int32_ref.Resize(output_shape); + output_fp32_ref.Resize(output_shape); + output_fp32.Resize(output_shape); + output_int8_ref.Resize(output_shape); + output_int8.Resize(output_shape); + + float* input_fp32_data = input_fp32.mutable_data(); + int8_t* input_int8_data = + input_int8.mutable_data(); + float* filter_fp32_data = + filter_fp32.mutable_data(); + int8_t* filter_int8_data = + filter_int8.mutable_data(); + + int* bias_int32_data = bias_int32.mutable_data(); + + for (int i = 0; i < input_fp32.dims().production(); i++) { + input_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < filter_fp32.dims().production(); + i++) { + filter_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < bias_int32.dims().production(); i++) { + bias_int32_data[i] = i % 10 * (i % 3 - 1); + } + + std::vector in_scale; + lite::arm::math::get_tensor_scale( + input_fp32, &in_scale, -1, 127.f); + lite::arm::math::trans_tensor_fp32_to_int8( + &input_fp32, &input_int8, in_scale[0]); + + std::vector w_scale; + lite::arm::math::get_tensor_scale( + filter_fp32, &w_scale, -1, 127.f); + int axis_size = oc; + int inner_size = ic / group * ks * ks; + w_scale = lite::arm::math::get_tensor_scale_n( + filter_fp32_data, axis_size, inner_size, 127.f); + lite::arm::math::fp32_to_int8( + filter_fp32_data, filter_int8_data, w_scale.data(), + axis_size, 1, inner_size); + + operators::ConvParam param; + param.x = &input_int8; + param.filter = &filter_int8; + if (flag_bias) { + param.bias = &bias_int32; + } + param.fuse_relu = false; + param.paddings = std::vector({padding, padding}); + param.strides = std::vector({stride, stride}); + param.dilations = std::vector({dilation, dilation}); + param.groups = group; + param.output = &output_int32_ref; + conv_compute_ref(param); + + int* output_int32_ref_data = + output_int32_ref.mutable_data(); + + // ============ int8depthwise_int32 ============ + param.output = &output_int32; + std::unique_ptr ctx_int32( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_int32; + int8depthwise_int32.init(param, + &ctx_int32->As()); + int8depthwise_int32.create(param, + &ctx_int32->As()); + int8depthwise_int32.run(param); + int* output_int32_data = output_int32.mutable_data(); + for (int i = 0; i < output_int32.dims().production(); + i++) { + EXPECT_NEAR(output_int32_data[i], + output_int32_ref_data[i], 1e-3); + } + + // ============ int8depthwise_int8============ + int8_t* output_int8_ref_data = + output_int8_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_int8( + &output_int32_ref, &output_int8_ref, in_scale[0], 1, + w_scale); + param.output = &output_int8; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_int8( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_int8; + int8depthwise_int8.init(param, + &ctx_int8->As()); + int8depthwise_int8.create(param, + &ctx_int8->As()); + int8depthwise_int8.run(param); + int8_t* output_int8_data = + output_int8.mutable_data(); + for (int i = 0; i < output_int8.dims().production(); + i++) { + EXPECT_NEAR(output_int8_data[i], + output_int8_ref_data[i], 1e-3); + } + + // ============int8depthwise_float32 ============ + float* output_fp32_ref_data = + output_fp32_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_fp32( + &output_int32_ref, &output_fp32_ref, in_scale[0], + w_scale); + param.output = &output_fp32; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_fp32( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_fp32; + int8depthwise_fp32.init(param, + &ctx_fp32->As()); + int8depthwise_fp32.create(param, + &ctx_fp32->As()); + int8depthwise_fp32.run(param); + float* output_fp32_data = + output_fp32.mutable_data(); + for (int i = 0; i < output_fp32.dims().production(); + i++) { + EXPECT_NEAR(output_fp32_data[i], + output_fp32_ref_data[i], 1e-3); + } + } + } + } + } + } + } + } + } + } + } +} + TEST(conv_arm, compute) { DeviceInfo::Init(); #if 1 diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 3bb4b219a2c077bb3a40af2e5c9c9abf062d56fd..c832c6304b5932b36be615aaa9d6515eb69f0270 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -295,7 +295,7 @@ function test_arm { echo "android do not need armv7hf" return 0 fi - + echo "test file: ${TESTS_FILE}" for _test in $(cat $TESTS_FILE); do test_arm_android $_test $port