提交 b5c77670 编写于 作者: D DannyIsFunny

test=develop

......@@ -369,6 +369,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/test_cv/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/test_cv/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mask_detection" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mask_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mask_detection/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/test_libs" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/test_libs/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/test_libs/Makefile"
)
add_dependencies(publish_inference_android_cxx_demos logging gflags)
add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos)
......
......@@ -48,6 +48,7 @@ USE_LITE_OP(concat)
USE_LITE_OP(conv2d)
USE_LITE_OP(depthwise_conv2d)
USE_LITE_OP(pool2d)
USE_LITE_OP(max_pool2d_with_index)
USE_LITE_OP(batch_norm)
USE_LITE_OP(fusion_elementwise_sub_activation)
USE_LITE_OP(transpose)
......
......@@ -71,10 +71,11 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
threads_ = config.threads();
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
// set_thread_by input is disabled here, because this inference is proved unstable
// int num_threads = config.x86_math_library_num_threads();
// int real_num_threads = num_threads > 1 ? num_threads : 1;
int real_num_threads=1;
// set_thread_by input is disabled here, because this inference is proved
// unstable
// int num_threads = config.x86_math_library_num_threads();
// int real_num_threads = num_threads > 1 ? num_threads : 1;
int real_num_threads = 1;
paddle::lite::x86::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(real_num_threads);
VLOG(3) << "set_x86_math_library_math_threads() is set successfully and the "
......
......@@ -36,7 +36,7 @@ DEFINE_string(model_dir_0, "", "model_dir_0");
DEFINE_string(input_shape_0,
"1,3,224,224",
"input shapes another, separated by colon and comma");
DEFINE_string(target, "arm", "main target for Predictor: arm, opencl");
DEFINE_bool(use_optimize_nb,
false,
"optimized & naive buffer model for mobile devices");
......@@ -51,9 +51,19 @@ void OutputOptModel(const std::string& load_model_dir,
const std::vector<std::vector<int64_t>>& input_shapes) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_valid_places({
Place{TARGET(kARM), PRECISION(kFloat)},
});
if (FLAGS_target == "arm") {
config.set_valid_places({
Place{TARGET(kARM), PRECISION(kFloat)},
});
} else if (FLAGS_target == "opencl") {
config.set_valid_places({
Place{TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)},
Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)},
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kARM)}, // enable kARM CPU kernel when no opencl kernel
});
}
auto predictor = lite_api::CreatePaddlePredictor(config);
// delete old optimized model
......@@ -78,7 +88,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
int tid,
const int warmup_times = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_model_from_file(model_dir + ".nb");
config.set_power_mode(power_mode);
config.set_threads(thread_num);
......@@ -197,7 +207,7 @@ void RunTestType_10(const std::vector<std::vector<int64_t>>& input_shapes,
const int repeat,
int warmup = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_model_from_file(model_dir + ".nb");
config.set_power_mode(power_mode);
config.set_threads(thread_num);
......@@ -218,13 +228,13 @@ void RunTestType_11(const std::vector<std::vector<int64_t>>& input_shapes,
const int repeat,
int warmup = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_model_from_file(model_dir + ".nb");
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config);
config.set_model_dir(model_dir_0);
config.set_model_from_file(model_dir_0 + ".nb");
auto predictor_0 = lite_api::CreatePaddlePredictor(config);
for (int i = 0; i < 2 * repeat; i += 2) {
......@@ -246,7 +256,8 @@ int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
LOG(INFO) << "usage: "
<< "--model_dir /path/to/your/model";
<< "--model_dir /path/to/your/model --model_dir_0 "
"/path/to/your/model0 --target `arm` or `opencl`";
exit(0);
}
std::string save_optimized_model_dir = "";
......
......@@ -15,6 +15,7 @@
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <thread> //NOLINT
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
......@@ -30,14 +31,18 @@ DEFINE_string(input_img_txt_path,
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places) {
const int g_batch_size = 1;
const int g_thread_num = 1;
void instance_run() {
lite::Predictor predictor;
std::vector<std::string> passes;
std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(
std::vector<DDim::value_type>({1, 3, FLAGS_im_height, FLAGS_im_width})));
input_tensor->Resize(DDim(std::vector<DDim::value_type>(
{g_batch_size, 3, FLAGS_im_height, FLAGS_im_width})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
if (FLAGS_input_img_txt_path.empty()) {
......@@ -45,12 +50,15 @@ void TestModel(const std::vector<Place>& valid_places) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
for (int j = 0; j < g_batch_size; j++) {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size / g_batch_size; i++) {
fs >> data[i];
}
data += j * item_size / g_batch_size;
}
}
for (int i = 0; i < FLAGS_warmup; ++i) {
......@@ -72,6 +80,7 @@ void TestModel(const std::vector<Place>& valid_places) {
FILE* fp = fopen("result.txt", "wb");
for (int i = 0; i < out.size(); i++) {
auto* out_data = out[i]->data<float>();
LOG(INFO) << out[i]->numel();
for (int j = 0; j < out[i]->numel(); j++) {
fprintf(fp, "%f\n", out_data[j]);
}
......@@ -79,6 +88,16 @@ void TestModel(const std::vector<Place>& valid_places) {
fclose(fp);
}
void TestModel(const std::vector<Place>& valid_places) {
std::vector<std::unique_ptr<std::thread>> instances_vec;
for (int i = 0; i < g_thread_num; ++i) {
instances_vec.emplace_back(new std::thread(&instance_run));
}
for (int i = 0; i < g_thread_num; ++i) {
instances_vec[i]->join();
}
}
TEST(Classify, test_bm) {
std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
......
......@@ -128,8 +128,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
T* output_data = output->template mutable_data<T>();
for (int64_t i = 0; i < in_dims[0]; ++i) {
for (int64_t j = 0; j < size; ++j) {
output_data[i * size + j] =
input_data[i * size + j] + vector_data[j];
output_data[i * size + j] = input_data[i * size + j] + vector_data[j];
}
}
}
......
......@@ -103,9 +103,12 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
std::string conv_weight_name = matched.at("conv_weight")->arg()->name;
auto conv_weight_t =
scope->FindVar(conv_weight_name)->GetMutable<lite::Tensor>();
auto groups = conv_op_desc->GetAttr<int>("groups");
bool depthwise = false;
if (conv_type_ == "conv2d_transpose") {
depthwise = (conv_weight_t->dims()[0] == conv_weight_t->dims()[1] * groups);
CHECK_EQ(static_cast<size_t>(bn_scale_t->data_size()),
static_cast<size_t>(conv_weight_t->dims()[1]))
static_cast<size_t>(conv_weight_t->dims()[1] * groups))
<< "The BN bias's size should be equal to the size of the first "
<< "dim size of the conv weights";
} else {
......@@ -159,7 +162,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
// compute new conv_weight for int8
auto weight_scale =
conv_op_desc->GetAttr<std::vector<float>>("weight_scale");
if (conv_type_ == "conv2d_transpose") {
if (conv_type_ == "conv2d_transpose" && !depthwise) {
int c_size = conv_weight_t->dims()[1] * conv_weight_t->dims()[2] *
conv_weight_t->dims()[3];
int hw = conv_weight_t->dims()[2] * conv_weight_t->dims()[3];
......@@ -199,7 +202,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
} else {
// compute new conv_weight
auto conv_weight_d = conv_weight_t->mutable_data<float>();
if (conv_type_ == "conv2d_transpose") {
if (conv_type_ == "conv2d_transpose" && !depthwise) {
int c_size = conv_weight_t->dims()[1] * conv_weight_t->dims()[2] *
conv_weight_t->dims()[3];
int hw = conv_weight_t->dims()[2] * conv_weight_t->dims()[3];
......
ARM_ABI = arm7
export ARM_ABI
include ../Makefile.def
LITE_ROOT=../../../
THIRD_PARTY_DIR=${LITE_ROOT}/third_party
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_imgcodecs.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_imgproc.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_core.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libtegra_hal.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibjpeg-turbo.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibwebp.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibpng.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibjasper.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibtiff.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libIlmImf.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libtbb.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libcpufeatures.a
OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/armeabi-v7a/include
CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include -I${THIRD_PARTY_DIR}/gflags/include
CXX_LIBS = ${OPENCV_LIBS} ${THIRD_PARTY_DIR}/gflags/lib/libgflags.a $(SYSTEM_LIBS)
LITE_FULL_SHAPRED_LIBS=-L$(LITE_ROOT)/cxx/lib/ -lpaddle_full_api_shared
LITE_FULL_STATIC_LIBS=$(LITE_ROOT)/cxx/lib/libpaddle_api_full_bundled.a
LITE_LIGHT_SHAPRED_LIBS=-L$(LITE_ROOT)/cxx/lib/ -lpaddle_light_api_shared
LITE_LIGHT_STATIC_LIBS=$(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a
##########
fetch_opencv:
@ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR}
@ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \
(echo "fetch opencv libs" && \
wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz)
@ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \
tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR}
test_helper.o: test_helper.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_helper.o -c test_helper.cc
classification_full.o: classification_full.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o classification_full.o -c classification_full.cc
classification_light.o: classification_light.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o classification_light.o -c classification_light.cc
classification_full_shared: fetch_opencv classification_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_full.o test_helper.o -o classification_full_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_FULL_SHAPRED_LIBS}
classification_full_static: fetch_opencv classification_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_full.o test_helper.o -o classification_full_static ${LITE_FULL_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
classification_light_shared: fetch_opencv classification_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_light.o test_helper.o -o classification_light_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_LIGHT_SHAPRED_LIBS}
classification_light_static: fetch_opencv classification_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_light.o test_helper.o -o classification_light_static ${LITE_LIGHT_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
######
yolov3_full.o: yolov3_full.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o yolov3_full.o -c yolov3_full.cc
yolov3_light.o: yolov3_light.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o yolov3_light.o -c yolov3_light.cc
yolov3_full_shared: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_full.o test_helper.o -o yolov3_full_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_FULL_SHAPRED_LIBS}
yolov3_full_static: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_full.o test_helper.o -o yolov3_full_static ${LITE_FULL_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
yolov3_light_shared: fetch_opencv yolov3_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_light.o test_helper.o -o yolov3_light_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_LIGHT_SHAPRED_LIBS}
yolov3_light_static: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_light.o test_helper.o -o yolov3_light_static ${LITE_LIGHT_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
#####
all: classification_full_shared classification_full_static classification_light_shared classification_light_static yolov3_full_shared yolov3_full_static yolov3_light_shared yolov3_light_static
clean:
rm -f *.o
rm -f classification_full_shared
rm -r classification_full_static
rm -r classification_light_shared
rm -f classification_light_static
rm -f yolov3_full_shared
rm -f yolov3_full_static
rm -f yolov3_light_shared
rm -f yolov3_light_static
ARM_ABI = arm8
export ARM_ABI
include ../Makefile.def
LITE_ROOT=../../../
THIRD_PARTY_DIR=${LITE_ROOT}/third_party
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgcodecs.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgproc.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_core.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtegra_hal.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjpeg-turbo.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibwebp.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibpng.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjasper.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibtiff.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libIlmImf.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtbb.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libcpufeatures.a
OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/arm64-v8a/include
CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include -I${THIRD_PARTY_DIR}/gflags/include
CXX_LIBS = ${OPENCV_LIBS} ${THIRD_PARTY_DIR}/gflags/lib/libgflags.a $(SYSTEM_LIBS)
LITE_FULL_SHAPRED_LIBS=-L$(LITE_ROOT)/cxx/lib/ -lpaddle_full_api_shared
LITE_FULL_STATIC_LIBS=$(LITE_ROOT)/cxx/lib/libpaddle_api_full_bundled.a
LITE_LIGHT_SHAPRED_LIBS=-L$(LITE_ROOT)/cxx/lib/ -lpaddle_light_api_shared
LITE_LIGHT_STATIC_LIBS=$(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a
##########
fetch_opencv:
@ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR}
@ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \
(echo "fetch opencv libs" && \
wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz)
@ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \
tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR}
test_helper.o: test_helper.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_helper.o -c test_helper.cc
classification_full.o: classification_full.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o classification_full.o -c classification_full.cc
classification_light.o: classification_light.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o classification_light.o -c classification_light.cc
classification_full_shared: fetch_opencv classification_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_full.o test_helper.o -o classification_full_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_FULL_SHAPRED_LIBS}
classification_full_static: fetch_opencv classification_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_full.o test_helper.o -o classification_full_static ${LITE_FULL_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
classification_light_shared: fetch_opencv classification_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_light.o test_helper.o -o classification_light_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_LIGHT_SHAPRED_LIBS}
classification_light_static: fetch_opencv classification_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) classification_light.o test_helper.o -o classification_light_static ${LITE_LIGHT_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
######
yolov3_full.o: yolov3_full.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o yolov3_full.o -c yolov3_full.cc
yolov3_light.o: yolov3_light.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o yolov3_light.o -c yolov3_light.cc
yolov3_full_shared: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_full.o test_helper.o -o yolov3_full_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_FULL_SHAPRED_LIBS}
yolov3_full_static: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_full.o test_helper.o -o yolov3_full_static ${LITE_FULL_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
yolov3_light_shared: fetch_opencv yolov3_light.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_light.o test_helper.o -o yolov3_light_shared $(CXX_LIBS) $(LDFLAGS) ${LITE_LIGHT_SHAPRED_LIBS}
yolov3_light_static: fetch_opencv yolov3_full.o test_helper.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) yolov3_light.o test_helper.o -o yolov3_light_static ${LITE_LIGHT_STATIC_LIBS} $(CXX_LIBS) $(LDFLAGS)
#####
all: classification_full_shared classification_full_static classification_light_shared classification_light_static yolov3_full_shared yolov3_full_static yolov3_light_shared yolov3_light_static
clean:
rm -f *.o
rm -f classification_full_shared
rm -r classification_full_static
rm -r classification_light_shared
rm -f classification_light_static
rm -f yolov3_full_shared
rm -f yolov3_full_static
rm -f yolov3_light_shared
rm -f yolov3_light_static
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <fstream>
#include <iostream>
#include "paddle_api.h" // NOLINT
#include "test_helper.h" // NOLINT
DEFINE_string(model_dir,
"",
"the path of the model, the model and param files is under "
"model_dir.");
DEFINE_string(model_filename,
"",
"the filename of model file. When the model is combined formate, "
"please set model_file.");
DEFINE_string(param_filename,
"",
"the filename of param file, set param_file when the model is "
"combined formate.");
DEFINE_string(img_path, "", "the path of input image");
DEFINE_string(img_txt_path,
"",
"the path of input image, the image is processed "
" and saved in txt file");
DEFINE_double(out_max_value, 0.0, "The max value in output tensor");
DEFINE_double(threshold,
1e-3,
"If the max value diff is smaller than threshold, pass test");
DEFINE_int32(out_max_value_index, 65, "The max value index in output tensor");
// Optimize model for ARM CPU.
// If the model is not combined, set model_filename and params_filename as empty
void OptModel(const std::string& load_model_dir,
const std::string& model_filename,
const std::string& params_filename,
const std::string& save_model_path) {
paddle::lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
if (!model_filename.empty() && !params_filename.empty()) {
config.set_model_file(load_model_dir + "/" + model_filename);
config.set_param_file(load_model_dir + "/" + params_filename);
}
std::vector<paddle::lite_api::Place> vaild_places = {
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt32)},
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt64)},
};
config.set_valid_places(vaild_places);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
std::string cmd_str = "rm -rf " + save_model_path;
int ret = system(cmd_str.c_str());
if (ret == 0) {
std::cout << "Delete old optimized model " << save_model_path << std::endl;
}
predictor->SaveOptimizedModel(save_model_path,
paddle::lite_api::LiteModelType::kNaiveBuffer);
std::cout << "Load model from " << load_model_dir << std::endl;
std::cout << "Save optimized model to " << save_model_path << std::endl;
}
void Run(const std::string& model_path,
const std::string& img_path,
const std::string& img_txt_path,
const float out_max_value,
const int out_max_value_index,
const float threshold,
const int height,
const int width) {
// set config and create predictor
paddle::lite_api::MobileConfig config;
config.set_threads(3);
config.set_model_from_file(model_path);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
// set input
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize({1, 3, height, width});
auto input_data = input_tensor->mutable_data<float>();
if (img_txt_path.size() > 0) {
std::fstream fs(img_txt_path);
if (!fs.is_open()) {
std::cerr << "Fail to open img txt file:" << img_txt_path << std::endl;
}
int num = 1 * 3 * height * width;
for (int i = 0; i < num; i++) {
fs >> input_data[i];
}
} else {
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
if (!img.data) {
std::cerr << "Fail to open img:" << img_path << std::endl;
exit(1);
}
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
process_img(img, width, height, input_data, means, scales);
}
predictor->Run();
auto out_tensor = predictor->GetOutput(0);
auto* out_data = out_tensor->data<float>();
int64_t output_num = ShapeProduction(out_tensor->shape());
float max_value = out_data[0];
int max_index = 0;
for (int i = 0; i < output_num; i++) {
if (max_value < out_data[i]) {
max_value = out_data[i];
max_index = i;
}
}
std::cout << "max_value:" << max_value << std::endl;
std::cout << "max_index:" << max_index << std::endl;
std::cout << "max_value_ground_truth:" << out_max_value << std::endl;
std::cout << "max_index_ground_truth:" << out_max_value_index << std::endl;
if (max_index != out_max_value_index ||
fabs(max_value - out_max_value) > threshold) {
std::cerr << "----------Fail Test.---------- \n\n";
} else {
std::cout << "----------Pass Test.---------- \n\n";
}
}
int main(int argc, char** argv) {
// Check inputs
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() ||
(FLAGS_img_path.empty() && FLAGS_img_txt_path.empty())) {
std::cerr << "Input error." << std::endl;
std::cerr
<< "Usage: " << argv[0] << std::endl
<< "--model_dir: the path of not optimized model \n"
"--model_filename: the model filename of not optimized model \n"
"--param_filename: the param filename of not optimized model \n"
"--img_txt_path: the path of input image, the image is processed \n"
" and saved in txt file \n"
"--img_path: the path of input image \n"
"--out_max_value: The max value in output tensor \n"
"--threshold: If the max value diff is smaller than threshold,\n"
" pass test. Default 1e-3.\n"
"--out_max_value_index: The max value index in output tensor \n";
exit(1);
}
const int height = 224;
const int width = 224;
std::string model_dir = FLAGS_model_dir;
if (model_dir.back() == '/') {
model_dir.pop_back();
}
std::string optimized_model_path = model_dir + "_opt2";
OptModel(FLAGS_model_dir,
FLAGS_model_filename,
FLAGS_param_filename,
optimized_model_path);
std::string run_model_path = optimized_model_path + ".nb";
// Run test
Run(run_model_path,
FLAGS_img_path,
FLAGS_img_txt_path,
FLAGS_out_max_value,
FLAGS_out_max_value_index,
FLAGS_threshold,
height,
width);
return 0;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <fstream>
#include <iostream>
#include "paddle_api.h" // NOLINT
#include "test_helper.h" // NOLINT
DEFINE_string(optimized_model_path, "", "the path of optimized model");
DEFINE_string(img_path, "", "the path of input image");
DEFINE_string(img_txt_path,
"",
"the path of input image, the image is processed "
" and saved in txt file");
DEFINE_double(out_max_value, 0.0, "The max value in output tensor");
DEFINE_double(threshold,
1e-3,
"If the max value diff is smaller than threshold, pass test");
DEFINE_int32(out_max_value_index, -1, "The max value index in output tensor");
void Run(const std::string& model_path,
const std::string& img_path,
const std::string& img_txt_path,
const float out_max_value,
const int out_max_value_index,
const float threshold,
const int height,
const int width) {
// set config and create predictor
paddle::lite_api::MobileConfig config;
config.set_threads(3);
config.set_model_from_file(model_path);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
// set input
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize({1, 3, height, width});
auto input_data = input_tensor->mutable_data<float>();
if (img_txt_path.size() > 0) {
std::fstream fs(img_txt_path);
if (!fs.is_open()) {
std::cerr << "Fail to open img txt file:" << img_txt_path << std::endl;
}
int num = 1 * 3 * height * width;
for (int i = 0; i < num; i++) {
fs >> input_data[i];
}
} else {
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
if (!img.data) {
std::cerr << "Fail to open img:" << img_path << std::endl;
exit(1);
}
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
process_img(img, width, height, input_data, means, scales);
}
predictor->Run();
auto out_tensor = predictor->GetOutput(0);
auto* out_data = out_tensor->data<float>();
int64_t output_num = ShapeProduction(out_tensor->shape());
float max_value = out_data[0];
int max_index = 0;
for (int i = 0; i < output_num; i++) {
if (max_value < out_data[i]) {
max_value = out_data[i];
max_index = i;
}
}
std::cout << "max_value:" << max_value << std::endl;
std::cout << "max_index:" << max_index << std::endl;
std::cout << "max_value_ground_truth:" << out_max_value << std::endl;
std::cout << "max_index_ground_truth:" << out_max_value_index << std::endl;
if (max_index != out_max_value_index ||
fabs(max_value - out_max_value) > threshold) {
std::cerr << "----------Fail Test---------- \n\n";
} else {
std::cout << "----------Pass Test---------- \n\n";
}
}
int main(int argc, char** argv) {
// Check inputs
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_optimized_model_path.empty() ||
(FLAGS_img_path.empty() && FLAGS_img_txt_path.empty())) {
std::cerr << "Input error." << std::endl;
std::cerr
<< "Usage: " << argv[0] << std::endl
<< "--optimized_model_path: the path of optimized model \n"
"--img_txt_path: the path of input image, the image is processed \n"
" and saved in txt file \n"
"--img_path: the path of input image \n"
"--out_max_value: The max value in output tensor \n"
"--threshold: If the max value diff is smaller than threshold,\n"
" pass test. Default 1e-3.\n"
"--out_max_value_index: The max value index in output tensor \n";
exit(1);
}
const int height = 224;
const int width = 224;
// Run test
Run(FLAGS_optimized_model_path,
FLAGS_img_path,
FLAGS_img_txt_path,
FLAGS_out_max_value,
FLAGS_out_max_value_index,
FLAGS_threshold,
height,
width);
return 0;
}
make clean
make all -j
gf=test_lite_lib_files
if [ -d ${gf} ];then
rm -rf ${gf}
fi
mkdir ${gf}
mv classification_full_shared ${gf}
mv classification_full_static ${gf}
mv classification_light_shared ${gf}
mv classification_light_static ${gf}
mv yolov3_full_shared ${gf}
mv yolov3_full_static ${gf}
mv yolov3_light_shared ${gf}
mv yolov3_light_static ${gf}
cp run.sh ${gf}
make clean
cp -r ../../../cxx/ ${gf}
mv ${gf}/cxx ${gf}/lite
if [ ! -f "test_libs_models_imgs.tgz" ];then
wget https://paddle-inference-dist.cdn.bcebos.com/PaddleLite/test_libs_models_imgs.tgz
fi
tar zxvf test_libs_models_imgs.tgz
mv test_libs_models_imgs ${gf}
mv ${gf}/test_libs_models_imgs ${gf}/models_imgs
export LD_LIBRARY_PATH=$PWD/lite/lib/:${LD_LIBRARY_PATH}
# mobilenetv1
./classification_light_shared \
--optimized_model_path=models_imgs/models/mobilenetv1.nb \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.936887 \
--out_max_value_index=65
./classification_light_static \
--optimized_model_path=models_imgs/models/mobilenetv1.nb \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.936887 \
--out_max_value_index=65
./classification_full_static \
--model_dir=models_imgs/models/mobilenetv1 \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.936887 \
--out_max_value_index=65
./classification_full_shared \
--model_dir=models_imgs/models/mobilenetv1 \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.936887 \
--out_max_value_index=65
# mobilenetv2
./classification_light_shared \
--optimized_model_path=models_imgs/models/mobilenetv2.nb \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.868888 \
--out_max_value_index=65
./classification_light_static \
--optimized_model_path=models_imgs/models/mobilenetv2.nb \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.868888 \
--out_max_value_index=65
./classification_full_static \
--model_dir=models_imgs/models/mobilenetv2 \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.868888 \
--out_max_value_index=65
./classification_full_shared \
--model_dir=models_imgs/models/mobilenetv2 \
--img_txt_path=models_imgs/images/classification.jpg.txt \
--out_max_value=0.868888 \
--out_max_value_index=65
# yolov3
./yolov3_light_shared \
--optimized_model_path=models_imgs/models/yolov3_mobilenetv1.nb \
--img_txt_path=models_imgs/images/yolov3.jpg.txt \
--out_values=0,0.153605,174.494,199.729,562.075,604.014
./yolov3_light_static \
--optimized_model_path=models_imgs/models/yolov3_mobilenetv1.nb \
--img_txt_path=models_imgs/images/yolov3.jpg.txt \
--out_values=0,0.153605,174.494,199.729,562.075,604.014
./yolov3_full_static \
--model_dir=models_imgs/models/yolov3_mobilenetv1 \
--img_txt_path=models_imgs/images/yolov3.jpg.txt \
--out_values=0,0.153605,174.494,199.729,562.075,604.014
./yolov3_full_shared \
--model_dir=models_imgs/models/yolov3_mobilenetv1 \
--img_txt_path=models_imgs/images/yolov3.jpg.txt \
--out_values=0,0.153605,174.494,199.729,562.075,604.014
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test_helper.h" // NOLINT
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <cstdio>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
double GetCurrentUS() {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
}
int64_t ShapeProduction(const std::vector<int64_t>& shape) {
int64_t num = 1;
for (auto i : shape) {
num *= i;
}
return num;
}
std::vector<int64_t> GetIntNumsFromStr(const std::string& str) {
std::vector<int64_t> nums;
std::string tmp_str = str;
while (!tmp_str.empty()) {
int num = atoi(tmp_str.data());
nums.push_back(num);
size_t next_offset = tmp_str.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return nums;
}
std::vector<double> GetDoubleNumsFromStr(const std::string& str) {
std::vector<double> nums;
std::string tmp_str = str;
while (!tmp_str.empty()) {
double num = atof(tmp_str.data());
nums.push_back(num);
size_t next_offset = tmp_str.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return nums;
}
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void neon_mean_scale(
const float* din, float* dout, int size, float* mean, float* scale) {
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
float32x4_t vscale0 = vdupq_n_f32(1.f / scale[0]);
float32x4_t vscale1 = vdupq_n_f32(1.f / scale[1]);
float32x4_t vscale2 = vdupq_n_f32(1.f / scale[2]);
float* dout_c0 = dout;
float* dout_c1 = dout + size;
float* dout_c2 = dout + size * 2;
int i = 0;
for (; i < size - 3; i += 4) {
float32x4x3_t vin3 = vld3q_f32(din);
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
vst1q_f32(dout_c0, vs0);
vst1q_f32(dout_c1, vs1);
vst1q_f32(dout_c2, vs2);
din += 12;
dout_c0 += 4;
dout_c1 += 4;
dout_c2 += 4;
}
for (; i < size; i++) {
*(dout_c0++) = (*(din++) - mean[0]) / scale[0];
*(dout_c0++) = (*(din++) - mean[1]) / scale[1];
*(dout_c0++) = (*(din++) - mean[2]) / scale[2];
}
}
// Process img and set it as input
void process_img(const cv::Mat& img,
int width,
int height,
float* dest_data,
float* means,
float* scales) {
cv::Mat rgb_img;
cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(rgb_img, rgb_img, cv::Size(width, height), 0.f, 0.f);
cv::Mat imgf;
rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
const float* dimg = reinterpret_cast<const float*>(imgf.data);
neon_mean_scale(dimg, dest_data, width * height, means, scales);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
double GetCurrentUS();
int64_t ShapeProduction(const std::vector<int64_t>& shape);
std::vector<int64_t> GetIntNumsFromStr(const std::string& str);
std::vector<double> GetDoubleNumsFromStr(const std::string& str);
void neon_mean_scale(
const float* din, float* dout, int size, float* mean, float* scale);
void process_img(const cv::Mat& img,
int width,
int height,
float* dst_data,
float* means,
float* scales);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <fstream>
#include <iostream>
#include "paddle_api.h" // NOLINT
#include "test_helper.h" // NOLINT
DEFINE_string(model_dir,
"",
"the path of the model, the model and param files is under "
"model_dir.");
DEFINE_string(model_filename,
"",
"the filename of model file. When the model is combined formate, "
"please set model_file.");
DEFINE_string(param_filename,
"",
"the filename of param file, set param_file when the model is "
"combined formate.");
DEFINE_string(img_path, "", "the path of input image");
DEFINE_string(img_txt_path,
"",
"the path of input image, the image is processed "
" and saved in txt file");
DEFINE_string(out_values,
"",
"The output values, separated by colon and comma");
DEFINE_double(threshold,
1e-3,
"If the output value diff is smaller than threshold, pass test");
void OptModel(const std::string& load_model_dir,
const std::string& model_filename,
const std::string& params_filename,
const std::string& save_model_path) {
paddle::lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
if (!model_filename.empty() && !params_filename.empty()) {
config.set_model_file(load_model_dir + "/" + model_filename);
config.set_param_file(load_model_dir + "/" + params_filename);
}
std::vector<paddle::lite_api::Place> vaild_places = {
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt32)},
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt64)},
};
config.set_valid_places(vaild_places);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
std::string cmd_str = "rm -rf " + save_model_path;
int ret = system(cmd_str.c_str());
if (ret == 0) {
std::cout << "Delete old optimized model " << save_model_path << std::endl;
}
predictor->SaveOptimizedModel(save_model_path,
paddle::lite_api::LiteModelType::kNaiveBuffer);
std::cout << "Load model from " << load_model_dir << std::endl;
std::cout << "Save optimized model to " << save_model_path << std::endl;
}
void Run(const std::string& model_path,
const std::string& img_path,
const std::string& img_txt_path,
const std::vector<double>& out_values,
const float threshold,
const int height,
const int width) {
// set config and create predictor
paddle::lite_api::MobileConfig config;
config.set_threads(3);
config.set_model_from_file(model_path);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
// set input
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize({1, 3, height, width});
auto input_data = input_tensor->mutable_data<float>();
if (img_txt_path.size() > 0) {
std::fstream fs(img_txt_path);
if (!fs.is_open()) {
std::cerr << "Fail to open img txt file:" << img_txt_path << std::endl;
}
int num = 1 * 3 * height * width;
for (int i = 0; i < num; i++) {
fs >> input_data[i];
}
} else {
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
if (!img.data) {
std::cerr << "Fail to open img:" << img_path << std::endl;
exit(1);
}
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
process_img(img, width, height, input_data, means, scales);
}
auto shape_tensor = predictor->GetInput(1);
shape_tensor->Resize({1, 2});
auto* shape_data = shape_tensor->mutable_data<int>();
shape_data[0] = height;
shape_data[1] = width;
predictor->Run();
auto out_tensor = predictor->GetOutput(0);
auto* out_data = out_tensor->data<float>();
int64_t output_num = ShapeProduction(out_tensor->shape());
bool is_pass = true;
for (int i = 0; i < output_num && i < out_values.size(); i++) {
std::cout << "id:" << i << " out_data:" << out_data[i]
<< " gt_data:" << out_values[i] << std::endl;
if (fabs(out_data[i] - out_values[i]) > threshold) {
is_pass = false;
}
}
if (is_pass) {
std::cout << "----------Pass test---------- \n\n";
} else {
std::cout << "----------Fail test---------- \n\n";
}
}
int main(int argc, char** argv) {
// Check inputs
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() ||
(FLAGS_img_path.empty() && FLAGS_img_txt_path.empty())) {
std::cerr << "Input error." << std::endl;
std::cerr
<< "Usage: " << argv[0] << std::endl
<< "--model_dir: the path of not optimized model \n"
"--model_filename: the model filename of not optimized model \n"
"--param_filename: the param filename of not optimized model \n"
"--img_txt_path: the path of input image, the image is processed \n"
" and saved in txt file \n"
"--img_path: the path of input image \n"
"--out_values: The output values, separated by colon and comma.\n"
"--threshold: If the out value diff is smaller than threshold,\n"
" pass test. Default 1e-3.\n";
exit(1);
}
const int height = 608;
const int width = 608;
std::vector<double> out_values = GetDoubleNumsFromStr(FLAGS_out_values);
std::string model_dir = FLAGS_model_dir;
if (model_dir.back() == '/') {
model_dir.pop_back();
}
std::string optimized_model_path = model_dir + "_opt2";
OptModel(FLAGS_model_dir,
FLAGS_model_filename,
FLAGS_param_filename,
optimized_model_path);
std::string run_model_path = optimized_model_path + ".nb";
// Run test
Run(run_model_path,
FLAGS_img_path,
FLAGS_img_txt_path,
out_values,
FLAGS_threshold,
height,
width);
return 0;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <fstream>
#include <iostream>
#include "paddle_api.h" // NOLINT
#include "test_helper.h" // NOLINT
DEFINE_string(optimized_model_path, "", "the path of the optimized model");
DEFINE_string(img_path, "", "the path of input image");
DEFINE_string(img_txt_path,
"",
"the path of input image, the image is processed "
" and saved in txt file");
DEFINE_string(out_values,
"",
"The output values, separated by colon and comma");
DEFINE_double(threshold,
1e-3,
"If the output value diff is smaller than threshold, pass test");
void Run(const std::string& model_path,
const std::string& img_path,
const std::string& img_txt_path,
const std::vector<double>& out_values,
const float threshold,
const int height,
const int width) {
// set config and create predictor
paddle::lite_api::MobileConfig config;
config.set_threads(3);
config.set_model_from_file(model_path);
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
// set input
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize({1, 3, height, width});
auto input_data = input_tensor->mutable_data<float>();
if (img_txt_path.size() > 0) {
std::fstream fs(img_txt_path);
if (!fs.is_open()) {
std::cerr << "Fail to open img txt file:" << img_txt_path << std::endl;
}
int num = 1 * 3 * height * width;
for (int i = 0; i < num; i++) {
fs >> input_data[i];
}
} else {
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
if (!img.data) {
std::cerr << "Fail to open img:" << img_path << std::endl;
exit(1);
}
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
process_img(img, width, height, input_data, means, scales);
}
auto shape_tensor = predictor->GetInput(1);
shape_tensor->Resize({1, 2});
auto* shape_data = shape_tensor->mutable_data<int>();
shape_data[0] = height;
shape_data[1] = width;
predictor->Run();
auto out_tensor = predictor->GetOutput(0);
auto* out_data = out_tensor->data<float>();
int64_t output_num = ShapeProduction(out_tensor->shape());
bool is_pass = true;
for (int i = 0; i < output_num && i < out_values.size(); i++) {
std::cout << "id:" << i << " out_data:" << out_data[i]
<< " gt_data:" << out_values[i] << std::endl;
if (fabs(out_data[i] - out_values[i]) > threshold) {
is_pass = false;
}
}
if (is_pass) {
std::cout << "----------Pass test---------- \n\n";
} else {
std::cout << "----------Fail test---------- \n\n";
}
}
int main(int argc, char** argv) {
// Check inputs
google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_optimized_model_path.empty() ||
(FLAGS_img_path.empty() && FLAGS_img_txt_path.empty())) {
std::cerr << "Input error." << std::endl;
std::cerr
<< "Usage: " << argv[0] << std::endl
<< "--optimized_model_path: the path of optimized model \n"
"--img_txt_path: the path of input image, the image is processed \n"
" and saved in txt file \n"
"--img_path: the path of input image \n"
"--out_values: The output values, separated by colon and comma.\n"
"--threshold: If the out value diff is smaller than threshold,\n"
" pass test. Default 1e-3.\n";
exit(1);
}
const int height = 608;
const int width = 608;
std::vector<double> out_values = GetDoubleNumsFromStr(FLAGS_out_values);
// Run test
Run(FLAGS_optimized_model_path,
FLAGS_img_path,
FLAGS_img_txt_path,
out_values,
FLAGS_threshold,
height,
width);
return 0;
}
......@@ -36,6 +36,7 @@ lite_cc_library(subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS ${bm_subgraph_
lite_cc_library(subgraph_bridge_split_op_bm SRCS split_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS ${bm_subgraph_bridge_deps})
set(bm_subgraph_bridges
subgraph_bridge_registry
subgraph_bridge_engine
......
......@@ -54,6 +54,8 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
active_type_id = ACTIVE_SQRT;
} else if (op_type == "square") {
active_type_id = ACTIVE_SQUARE;
} else if (op_type == "sigmoid") {
active_type_id = ACTIVE_SIGMOID;
} else {
LOG(FATAL) << "[BM] unsupport act type";
return FAILED;
......@@ -102,3 +104,6 @@ REGISTER_SUBGRAPH_BRIDGE(leaky_relu,
paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(sqrt, kBM, paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(square, kBM, paddle::lite::subgraph::bm::ActConverter);
REGISTER_SUBGRAPH_BRIDGE(sigmoid,
kBM,
paddle::lite::subgraph::bm::ActConverter);
......@@ -20,11 +20,14 @@ namespace lite {
namespace subgraph {
namespace bm {
pthread_mutex_t Graph::mutex_compiler_ = PTHREAD_MUTEX_INITIALIZER;
void Graph::AddNode(const std::string& name) {
nodes_.insert(std::make_pair(name, name));
}
void Graph::CreateCompilerHandle() {
pthread_mutex_lock(&mutex_compiler_);
#ifdef BM1682
compiler_handle_ = create_bmcompiler("BM1682");
#else
......@@ -33,6 +36,8 @@ void Graph::CreateCompilerHandle() {
CHECK(compiler_handle_ != nullptr);
}
void Graph::UnlockCompilerMutex() { pthread_mutex_unlock(&mutex_compiler_); }
} // namespace bm
} // namespace subgraph
} // namespace lite
......
......@@ -14,6 +14,7 @@
#pragma once
#include <pthread.h>
#include <memory>
#include <string>
#include <unordered_map>
......@@ -36,10 +37,12 @@ class Graph {
}
void CreateCompilerHandle();
void* GetCompilerHandle() { return compiler_handle_; }
void UnlockCompilerMutex();
private:
std::unordered_map<std::string, std::string> nodes_;
void* compiler_handle_;
static pthread_mutex_t mutex_compiler_;
};
} // namespace bm
......
......@@ -58,3 +58,5 @@ USE_SUBGRAPH_BRIDGE(depthwise_conv2d_transpose, kBM);
USE_SUBGRAPH_BRIDGE(shape, kBM);
USE_SUBGRAPH_BRIDGE(split, kBM);
USE_SUBGRAPH_BRIDGE(matmul, kBM);
USE_SUBGRAPH_BRIDGE(max_pool2d_with_index, kBM);
USE_SUBGRAPH_BRIDGE(sigmoid, kBM);
......@@ -11,7 +11,10 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include <bmcompiler_if_lite.h>
#include <user_bmcpu_common.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
......@@ -54,46 +57,84 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
shape[0] = &i_output_shape_data[0];
name[0] = static_cast<const char*>(output_var_name.c_str());
dim[0] = output_dims.size();
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
std::string pooling_type;
if (op_info->HasAttr("pooling_type")) {
pooling_type = op_info->GetAttr<std::string>("pooling_type");
} else if (op_type == "max_pool2d_with_index") {
pooling_type = "max";
}
CHECK(pooling_type == "max" || pooling_type == "avg");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ceil_mode = op_info->GetAttr<bool>("ceil_mode");
bool ceil_mode = false;
if (op_info->HasAttr("ceil_mode")) {
ceil_mode = op_info->GetAttr<bool>("ceil_mode");
}
bool adaptive = false;
if (op_info->HasAttr("adaptive")) {
adaptive = op_info->GetAttr<bool>("adaptive");
}
bool average_exclusive = false;
if (pooling_type == "avg") {
average_exclusive = op_info->GetAttr<bool>("exclusive");
}
if (output_dims[2] == 1 && output_dims[3] == 1) {
global_pooling = true;
}
if (global_pooling) {
paddings[0] = 0;
paddings[1] = 0;
ksize[0] = i_x_shape_data[2];
ksize[1] = i_x_shape_data[3];
}
add_pooling_layer(
graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
1,
shape,
dim,
name,
ksize[0],
ksize[1],
paddings[0],
paddings[0],
paddings[1],
paddings[1],
strides[0],
strides[1],
(ksize[0] > 1 && ksize[1] > 1) && pooling_type == "max" ? 0 : 1,
static_cast<int>(average_exclusive),
static_cast<int>(global_pooling),
static_cast<int>(ceil_mode),
static_cast<const char*>(unique_op_name.c_str()),
nullptr);
bool is_max = (pooling_type == "max");
if (adaptive && !global_pooling) {
user_cpu_param_t bm_param;
bm_param.op_type = USER_PADDLE_ADAPTIVE_POOL;
bm_param.u.adaptive_pool_parm.is_avg = !is_max;
int32_t* in_shape[1];
int32_t in_dim[1];
const char* in_name[1];
in_shape[0] = &i_x_shape_data[0];
in_name[0] = static_cast<const char*>(x_var_name.c_str());
in_dim[0] = x_dims.size();
add_user_cpu_layer(graph->GetCompilerHandle(),
1,
in_shape,
in_dim,
in_name,
1,
shape,
dim,
name,
&bm_param,
static_cast<int>(sizeof(bm_param)));
} else {
add_pooling_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
1,
shape,
dim,
name,
ksize[0],
ksize[1],
paddings[0],
paddings[0],
paddings[1],
paddings[1],
strides[0],
strides[1],
is_max ? 0 : 1,
static_cast<int>(average_exclusive),
static_cast<int>(global_pooling),
static_cast<int>(ceil_mode),
static_cast<const char*>(unique_op_name.c_str()),
nullptr);
}
graph->AddNode(output_var_name);
return SUCCESS;
}
......@@ -105,3 +146,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
REGISTER_SUBGRAPH_BRIDGE(pool2d,
kBM,
paddle::lite::subgraph::bm::PoolConverter);
REGISTER_SUBGRAPH_BRIDGE(max_pool2d_with_index,
kBM,
paddle::lite::subgraph::bm::PoolConverter);
......@@ -40,6 +40,7 @@ int SubgraphEngine::BuildDeviceProgram() {
op->CheckShape();
op->InferShape();
std::string op_type = op->op_info()->Type();
LOG(INFO) << op_type;
if (!bridges.Exists(op_type, TARGET(kBM))) {
return subgraph::FAILED;
}
......@@ -59,6 +60,7 @@ int SubgraphEngine::BuildDeviceProgram() {
unsigned int data_size = 0;
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
graph.UnlockCompilerMutex();
bmrt_hd_ = bmrt_create(bm_hd_);
if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
return subgraph::FAILED;
......
......@@ -82,7 +82,7 @@ class FCFunctor {
memcpy(X1_data + i * KK, X + i * K, K * sizeof(T));
}
};
parallel_memcpy_x(0,M);
parallel_memcpy_x(0, M);
blas.GEMM(false,
false,
M,
......@@ -103,14 +103,14 @@ class FCFunctor {
memcpy(Y + i * N, Y1_data + i * NN, N * sizeof(T));
}
};
parallel_memcpy_y(0,M);
parallel_memcpy_y(0, M);
return;
}
parallel_compute(0,M);
parallel_compute(0, M);
} else {
blas.MatMul(M, N, K, X, W, Y);
if (!B) {
if (!B) {
return;
}
parallel_compute(0, M);
......
......@@ -13,11 +13,11 @@
// limitations under the License.
#pragma once
#include <chrono>
#include "lite/backends/x86/math/blas.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include <chrono>
#include "lite/fluid/eigen.h"
namespace paddle {
namespace lite {
......@@ -68,11 +68,13 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
y_matrix = *y;
}
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat_test(x_matrix.mutable_data<T>(), x_matrix.dims()[0], x_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat1_test(y_matrix.mutable_data<T>(), y_matrix.dims()[0], y_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> out_test(z->mutable_data<T>(), z->dims()[0], z->dims()[1]);
out_test = mat_test * mat1_test;
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat_test(
x_matrix.mutable_data<T>(), x_matrix.dims()[0], x_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> mat1_test(
y_matrix.mutable_data<T>(), y_matrix.dims()[0], y_matrix.dims()[1]);
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> out_test(
z->mutable_data<T>(), z->dims()[0], z->dims()[1]);
out_test = mat_test * mat1_test;
}
virtual ~MulCompute() = default;
......
......@@ -56,8 +56,7 @@ class SequenceReshapeCompute
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
out->Resize(std::vector<int64_t>{in->numel()/out_width,
out_width});
out->Resize(std::vector<int64_t>{in->numel() / out_width, out_width});
auto* dst_ptr = out->template mutable_data<T>();
auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->template data<T>(), size);
......@@ -76,8 +75,8 @@ class SequenceReshapeFloatCompute
auto& param = *param_.get_mutable<operators::SequenceReshapeParam>();
auto* in = param.x;
auto* out = param.output;
auto out_data= out->mutable_data<T>();
for(int i=0;i<out->dims().production(); i++){
auto out_data = out->mutable_data<T>();
for (int i = 0; i < out->dims().production(); i++) {
out_data[i] = 0;
}
int out_width = param.new_dim;
......@@ -103,8 +102,7 @@ class SequenceReshapeFloatCompute
out_lod[0][i + 1] = out_lod[0][i] + offset;
}
}
out->Resize(std::vector<int64_t>{in->numel()/out_width,
out_width});
out->Resize(std::vector<int64_t>{in->numel() / out_width, out_width});
auto* dst_ptr = out->mutable_data<T>();
auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->data<T>(), size);
......
......@@ -108,6 +108,7 @@ add_operator(collect_fpn_proposals_op_lite extra SRCS collect_fpn_proposals_op.c
add_operator(distribute_fpn_proposals_op_lite extra SRCS distribute_fpn_proposals_op.cc DEPS ${op_DEPS})
add_operator(crf_decoding_op_lite extra SRCS crf_decoding_op.cc DEPS ${op_DEPS})
add_operator(ctc_align_op_lite extra SRCS ctc_align_op.cc DEPS ${op_DEPS})
add_operator(max_pool_with_index_op extra SRCS max_pool_with_index_op.cc DEPS ${op_DEPS})
# for OCR specific
add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/max_pool_with_index_op.h"
#include <algorithm>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool MaxPoolWithIndexOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.x);
CHECK_OR_FALSE(param_.output);
const auto& x_dims = param_.x->dims();
const auto& strides = param_.strides;
const auto& ksize = param_.ksize;
const auto& paddings = *param_.paddings;
// "Pooling intput should be 4-D or 5-D tensor."
CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5);
// Input size and pooling size should be consistent.
CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U);
// Strides size and pooling size should be the same.
CHECK_OR_FALSE(ksize.size() == strides.size());
// Paddings size must be 4.
CHECK_OR_FALSE(paddings.size() == 4L);
return true;
}
inline int MaxPoolOutputSize(int input_size,
int filter_size,
int padding,
int stride) {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size;
}
bool MaxPoolWithIndexOpLite::InferShapeImpl() const {
const auto x_dims = param_.x->dims();
const auto ksize = param_.ksize;
std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
const auto& strides = param_.strides;
const auto& paddings = *param_.paddings;
const auto adaptive = param_.adaptive;
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (size_t i = 0; i < ksize.size(); ++i) {
output_shape.push_back(
MaxPoolOutputSize(x_dims[i + 2], ksize[i], paddings[i], strides[i]));
}
}
param_.output->Resize(lite::DDim(output_shape));
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(max_pool2d_with_index,
paddle::lite::operators::MaxPoolWithIndexOpLite);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/core/tensor.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
class MaxPoolWithIndexOpLite : public OpLite {
public:
MaxPoolWithIndexOpLite() {}
explicit MaxPoolWithIndexOpLite(const std::string &type) : OpLite(type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
// TODO(Superjomn) replace framework::OpDesc with a lite one.
bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override {
auto x = op_desc.Input("X").front();
auto out = op_desc.Output("Out").front();
auto mask = op_desc.Output("Mask").front();
CHECK(scope->FindVar(x));
CHECK(scope->FindVar(out));
CHECK(scope->FindVar(mask));
param_.x = scope->FindVar(x)->GetMutable<lite::Tensor>();
param_.output = scope->FindVar(out)->GetMutable<lite::Tensor>();
param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize");
param_.global_pooling = op_desc.GetAttr<bool>("global_pooling");
param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
if (op_desc.HasAttr("adaptive")) {
param_.adaptive = op_desc.GetAttr<bool>("adaptive");
}
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the inputs size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
return true;
}
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "max_pool2d_with_index"; }
private:
mutable PoolParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -2,7 +2,7 @@
setlocal
setlocal enabledelayedexpansion
set source_path=%~dp0
set source_path=%~dp0\\..\\..\\
rem global variables
set BUILD_EXTRA=OFF
set BUILD_JAVA=ON
......@@ -92,16 +92,16 @@ goto:eof
) else (
echo "The directory of third_party exists, the third-party-05b862.tar.gz exists."
call:rm_rebuild_dir "%workspace%\third-party"
!python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
!python_path! %workspace%\lite\tools\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
)
) else (
if NOT EXIST "%workspace%\third-party-05b862.tar.gz" (
echo "The directory of third_party not exists, the third-party-05b862.tar.gz not exists."
call:download_third_party
!python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
!python_path! %workspace%\lite\tools\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
) else (
echo "The directory of third_party not exists, the third-party-05b862.tar.gz exists."
!python_path! %workspace%\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
!python_path! %workspace%\lite\tools\untar.py %source_path%\third-party-05b862.tar.gz %workspace%
)
)
......@@ -131,4 +131,4 @@ if "%tmp_var:~-1%"==" " (
set "tmp_var=%tmp_var:~0,-1%"
goto remove_left_space
)
goto:eof
\ No newline at end of file
goto:eof
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile, os
import sys
def untar(fname, dirs):
"""
extract the tar.gz file
:param fname: the name of tar.gz file
:param dirs: the path of decompressed file
:return: bool
"""
try:
t = tarfile.open(name=fname, mode='r:gz')
t.extractall(path=dirs)
return True
except Exception as e:
print(e)
return False
untar(sys.argv[1], sys.argv[2])
......@@ -494,6 +494,7 @@ class ConvParam : public OpParam {
EXEC_DEPTHWISE3x3_FLOAT,
EXEC_SLIDINGWINDOW1x1_FLOAT,
EXEC_SLIDINGWINDOW3x3_FLOAT,
EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT,
EXEC_SLIDINGWINDOW5x5_FLOAT,
EXEC_SLIDINGWINDOW7x7_FLOAT,
EXEC_GEMM1x1s1_FLOAT,
......
......@@ -549,6 +549,9 @@ if (ENABLE_ALL_TEST)
ADD_EXECUTABLE(test-net-performance net/test_net_performance.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-net-performance paddle-mobile)
ADD_EXECUTABLE(test-infer-imfix net/test_inference_imfix.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-infer-imfix paddle-mobile)
# ADD_EXECUTABLE(test-inference-ercy net/test_inference_ercy.cpp test_helper.h test_include.h executor_for_test.h)
# target_link_libraries(test-inference-api-v2 paddle-mobile)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "io/paddle_inference_api.h"
using namespace paddle_mobile; // NOLINT
PaddleMobileConfig GetConfig() {
PaddleMobileConfig config;
config.precision = PaddleMobileConfig::FP32;
config.device = PaddleMobileConfig::kGPU_CL;
config.pre_post_type = PaddleMobileConfig::NONE_PRE_POST;
config.prog_file = "../models/imagefixmodel/model";
config.param_file = "../models/imagefixmodel/params";
config.lod_mode = false;
config.load_when_predict = false;
return config;
}
int main() {
PaddleMobileConfig config = GetConfig();
auto predictor =
CreatePaddlePredictor<PaddleMobileConfig,
PaddleEngineKind::kPaddleMobile>(config);
// factor
int input_rgb_len = 1 * 3 * 256 * 256;
std::vector<float> input_rgb_v(input_rgb_len, 1);
// SetupData<float>(input_rgb_v.data(), input_rgb_len, 0.f, 1.f);
PaddleTensor input_rgb;
input_rgb.shape = std::vector<int>({1, 3, 256, 256});
input_rgb.data = PaddleBuf(input_rgb_v.data(), input_rgb_len * sizeof(float));
input_rgb.dtype = PaddleDType::FLOAT32;
input_rgb.layout = LayoutType::LAYOUT_CHW;
// remap
int input_mask_len = 1 * 3 * 256 * 256;
std::vector<float> input_mask_v(input_mask_len, 1);
// SetupData<float>(input_mask_v.data(), input_mask_len, 0.f, 1.f);
PaddleTensor input_mask;
input_mask.shape = std::vector<int>({1, 3, 256, 256});
input_mask.data =
PaddleBuf(input_mask_v.data(), input_mask_len * sizeof(float));
input_mask.dtype = PaddleDType::FLOAT32;
input_mask.layout = LayoutType::LAYOUT_CHW;
PaddleTensor output0;
output0.shape = std::vector<int>({});
output0.data = PaddleBuf();
output0.dtype = PaddleDType::FLOAT32;
output0.layout = LayoutType::LAYOUT_CHW;
// PaddleTensor output1;
// output1.shape = std::vector<int>({});
// output1.data = PaddleBuf();
// output1.dtype = PaddleDType::FLOAT32;
// output1.layout = LayoutType::LAYOUT_CHW;
// PaddleTensor output2;
// output2.shape = std::vector<int>({});
// output2.data = PaddleBuf();
// output2.dtype = PaddleDType::FLOAT32;
// output2.layout = LayoutType::LAYOUT_CHW;
// PaddleTensor output3;
// output3.shape = std::vector<int>({});
// output3.data = PaddleBuf();
// output3.dtype = PaddleDType::FLOAT32;
// output3.layout = LayoutType::LAYOUT_CHW;
std::cout << "feed : " << std::endl;
predictor->Feed("input_rgb", input_rgb);
std::cout << "feed : " << std::endl;
predictor->Feed("input_mask", input_mask);
std::cout << "run : " << std::endl;
predictor->Run();
std::cout << "fetch : " << std::endl;
predictor->Fetch("save_infer_model/scale_0", &output0);
float* out_ptr0 = reinterpret_cast<float*>(output0.data.data());
std::cout << " print output0 : " << std::endl;
int numel = output0.data.length() / sizeof(float);
int stride = numel / 20;
stride = stride > 0 ? stride : 1;
for (size_t j = 0; j < numel; j += stride) {
std::cout << out_ptr0[j] << " ";
}
std::cout << std::endl;
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册