提交 14e6c781 编写于 作者: B baolei.an

test=develop 1. add bm testcase

上级 f19cb10f
......@@ -50,7 +50,8 @@ if (WITH_TESTING)
${ops} ${host_kernels}
CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels})
XPU_DEPS ${xpu_kernels}
BM_DEPS ${bm_kernels})
endif()
if(LITE_WITH_FPGA)
set(light_api_deps ${light_api_deps} ${fpga_deps})
......@@ -98,7 +99,8 @@ lite_cc_library(light_api SRCS light_api.cc
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kenrels}
FPGA_DEPS ${fpga_kenrels})
FPGA_DEPS ${fpga_kenrels}
BM_DEPS ${bm_kenrels})
include(ExternalProject)
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
......@@ -114,6 +116,7 @@ if(WITH_TESTING)
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
EXCLUDE_COMPILE_DEPS "ON"
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
......@@ -149,6 +152,10 @@ if(WITH_TESTING)
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn)
add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz)
lite_cc_test(test_resnet50_lite_bm SRCS test_resnet50_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
endif()
endif()
......@@ -276,6 +283,7 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
if (WITH_TESTING)
add_dependencies(test_paddle_api extern_lite_download_lite_naive_model_tar_gz)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_img_txt_path,
"",
"if set input_img_txt_path, read the img filename as input.");
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places) {
//DeviceInfo::Init();
//DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", valid_places);
#if 0
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
if (FLAGS_input_img_txt_path.empty()) {
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
}
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
// ground truth result from fluid
results.emplace_back(std::vector<float>(
{0.0002451055, 0.0002585023, 0.0002659616, 0.0002823}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
results[i][j],
1e-6);
}
}
auto* out_data = out->data<float>();
LOG(INFO) << "output data:";
for (int i = 0; i < out->numel(); i += step) {
LOG(INFO) << out_data[i];
}
float max_val = out_data[0];
int max_val_arg = 0;
for (int i = 1; i < out->numel(); i++) {
if (max_val < out_data[i]) {
max_val = out_data[i];
max_val_arg = i;
}
}
LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
#endif
}
TEST(ResNet50, test_bm) {
std::vector<Place> valid_places({
Place{TARGET(kBM), PRECISION(kInt8)},
Place{TARGET(kBM), PRECISION(kFloat)},
});
TestModel(valid_places);
}
} // namespace lite
} // namespace paddle
......@@ -61,6 +61,7 @@ using NPUContext = Context<TargetType::kNPU>;
using XPUContext = Context<TargetType::kXPU>;
using OpenCLContext = Context<TargetType::kOpenCL>;
using FPGAContext = Context<TargetType::kFPGA>;
using BMContext = Context<TargetType::kBM>;
template <>
class Context<TargetType::kHost> {
......@@ -88,6 +89,21 @@ class Context<TargetType::kNPU> {
};
#endif
#ifdef LITE_WITH_BM
template <>
class Context<TargetType::kBM> {
public:
Context() {}
explicit Context(const BMContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {}
void CopySharedTo(NPUContext* ctx) {}
BMContext& operator=(const BMContext& ctx) {}
std::string name() const { return "BMContext"; }
};
#endif
#ifdef LITE_WITH_XPU
template <>
class Context<TargetType::kXPU> {
......@@ -373,6 +389,12 @@ class ContextScheduler {
kernel_contexts_[TargetType::kFPGA].As<FPGAContext>().CopySharedTo(
&ctx->As<FPGAContext>());
break;
#endif
#ifdef LITE_WITH_BM
case TARGET(kBM):
kernel_contexts_[TargetType::kBM].As<BMContext>().CopySharedTo(
&ctx->As<BMContext>());
break;
#endif
default:
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
......@@ -411,6 +433,9 @@ class ContextScheduler {
#endif
#ifdef LITE_WITH_XPU
InitContext<TargetType::kXPU, XPUContext>();
#endif
#ifdef LITE_WITH_BM
InitContext<TargetType::kBM, BMContext>();
#endif
}
......
......@@ -54,6 +54,7 @@ bool PassMatchesTarget(const mir::Pass& pass, TargetType target) {
}
bool PassMatchesKernels(const mir::Pass& pass) {
#if 0
const auto& kernels = pass.GetBoundKernels();
for (const auto& kernel : kernels) {
for (const auto& place : kernel.second) {
......@@ -62,6 +63,7 @@ bool PassMatchesKernels(const mir::Pass& pass) {
}
}
}
#endif
return true;
}
......
......@@ -86,6 +86,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case TARGET(kFPGA): {
CREATE_KERNEL(kFPGA);
} break;
case TARGET(kBM): {
CREATE_KERNEL(kBM);
} break;
default:
CHECK(false) << "not supported kernel target " << TargetToStr(target);
}
......@@ -158,6 +161,11 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kFPGA, kFloat, kNHWC);
INIT_FOR(kFPGA, kAny, kNHWC);
INIT_FOR(kFPGA, kAny, kAny);
INIT_FOR(kBM, kFloat, kNCHW);
INIT_FOR(kBM, kInt8, kNCHW);
INIT_FOR(kBM, kAny, kNCHW);
INIT_FOR(kBM, kAny, kAny);
#undef INIT_FOR
}
......
......@@ -188,6 +188,16 @@ class KernelRegistry final {
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kFPGA),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
......
......@@ -55,10 +55,10 @@ class Optimizer {
SpecifyKernelPickTactic(kernel_pick_factor);
InitTargetTypeTransformPass();
if (passes.empty()) {
RunPasses(std::vector<std::string>{
{"lite_quant_dequant_fuse_pass", //
#if 0
"lite_conv_elementwise_fuse_pass", // conv-elemwise-bn
"lite_conv_bn_fuse_pass", //
"lite_conv_elementwise_fuse_pass", // conv-bn-elemwise
......@@ -114,7 +114,9 @@ class Optimizer {
// TODO(ysh329): cause CL_INVALID_MEM_OBJECT when setArg in kernel
"memory_optimize_pass",
#endif
"argument_type_display_pass"}});
"argument_type_display_pass"
#endif
}});
} else {
RunPasses(passes);
}
......
......@@ -5,7 +5,7 @@ set -ex
BM_SDK_ROOT="$(pwd)/../BM_SDK" # BM SDK
TARGET_NAME="BM1682" # default target
BUILD_EXTRA=OFF # ON(with sequence ops)/OFF
WITH_TESTING=OFF # ON/OFF
WITH_TESTING=ON # ON/OFF
function print_usage {
echo -e "\nUSAGE:"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册