提交 14e6c781 编写于 作者: B baolei.an

test=develop 1. add bm testcase

上级 f19cb10f
...@@ -50,7 +50,8 @@ if (WITH_TESTING) ...@@ -50,7 +50,8 @@ if (WITH_TESTING)
${ops} ${host_kernels} ${ops} ${host_kernels}
CUDA_DEPS ${cuda_kernels} CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels}) XPU_DEPS ${xpu_kernels}
BM_DEPS ${bm_kernels})
endif() endif()
if(LITE_WITH_FPGA) if(LITE_WITH_FPGA)
set(light_api_deps ${light_api_deps} ${fpga_deps}) set(light_api_deps ${light_api_deps} ${fpga_deps})
...@@ -98,7 +99,8 @@ lite_cc_library(light_api SRCS light_api.cc ...@@ -98,7 +99,8 @@ lite_cc_library(light_api SRCS light_api.cc
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kenrels} CL_DEPS ${opencl_kenrels}
FPGA_DEPS ${fpga_kenrels}) FPGA_DEPS ${fpga_kenrels}
BM_DEPS ${bm_kenrels})
include(ExternalProject) include(ExternalProject)
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
...@@ -114,6 +116,7 @@ if(WITH_TESTING) ...@@ -114,6 +116,7 @@ if(WITH_TESTING)
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
EXCLUDE_COMPILE_DEPS "ON" EXCLUDE_COMPILE_DEPS "ON"
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
...@@ -149,6 +152,10 @@ if(WITH_TESTING) ...@@ -149,6 +152,10 @@ if(WITH_TESTING)
${ops} ${host_kernels} ${x86_kernels} ${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn) ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn)
add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz) add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz)
lite_cc_test(test_resnet50_lite_bm SRCS test_resnet50_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
endif() endif()
endif() endif()
...@@ -276,6 +283,7 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle ...@@ -276,6 +283,7 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL) ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
if (WITH_TESTING) if (WITH_TESTING)
add_dependencies(test_paddle_api extern_lite_download_lite_naive_model_tar_gz) add_dependencies(test_paddle_api extern_lite_download_lite_naive_model_tar_gz)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_img_txt_path,
"",
"if set input_img_txt_path, read the img filename as input.");
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places) {
//DeviceInfo::Init();
//DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", valid_places);
#if 0
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
if (FLAGS_input_img_txt_path.empty()) {
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
}
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
// ground truth result from fluid
results.emplace_back(std::vector<float>(
{0.0002451055, 0.0002585023, 0.0002659616, 0.0002823}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
results[i][j],
1e-6);
}
}
auto* out_data = out->data<float>();
LOG(INFO) << "output data:";
for (int i = 0; i < out->numel(); i += step) {
LOG(INFO) << out_data[i];
}
float max_val = out_data[0];
int max_val_arg = 0;
for (int i = 1; i < out->numel(); i++) {
if (max_val < out_data[i]) {
max_val = out_data[i];
max_val_arg = i;
}
}
LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
#endif
}
TEST(ResNet50, test_bm) {
std::vector<Place> valid_places({
Place{TARGET(kBM), PRECISION(kInt8)},
Place{TARGET(kBM), PRECISION(kFloat)},
});
TestModel(valid_places);
}
} // namespace lite
} // namespace paddle
...@@ -61,6 +61,7 @@ using NPUContext = Context<TargetType::kNPU>; ...@@ -61,6 +61,7 @@ using NPUContext = Context<TargetType::kNPU>;
using XPUContext = Context<TargetType::kXPU>; using XPUContext = Context<TargetType::kXPU>;
using OpenCLContext = Context<TargetType::kOpenCL>; using OpenCLContext = Context<TargetType::kOpenCL>;
using FPGAContext = Context<TargetType::kFPGA>; using FPGAContext = Context<TargetType::kFPGA>;
using BMContext = Context<TargetType::kBM>;
template <> template <>
class Context<TargetType::kHost> { class Context<TargetType::kHost> {
...@@ -88,6 +89,21 @@ class Context<TargetType::kNPU> { ...@@ -88,6 +89,21 @@ class Context<TargetType::kNPU> {
}; };
#endif #endif
#ifdef LITE_WITH_BM
template <>
class Context<TargetType::kBM> {
public:
Context() {}
explicit Context(const BMContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {}
void CopySharedTo(NPUContext* ctx) {}
BMContext& operator=(const BMContext& ctx) {}
std::string name() const { return "BMContext"; }
};
#endif
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
template <> template <>
class Context<TargetType::kXPU> { class Context<TargetType::kXPU> {
...@@ -373,6 +389,12 @@ class ContextScheduler { ...@@ -373,6 +389,12 @@ class ContextScheduler {
kernel_contexts_[TargetType::kFPGA].As<FPGAContext>().CopySharedTo( kernel_contexts_[TargetType::kFPGA].As<FPGAContext>().CopySharedTo(
&ctx->As<FPGAContext>()); &ctx->As<FPGAContext>());
break; break;
#endif
#ifdef LITE_WITH_BM
case TARGET(kBM):
kernel_contexts_[TargetType::kBM].As<BMContext>().CopySharedTo(
&ctx->As<BMContext>());
break;
#endif #endif
default: default:
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL #ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
...@@ -411,6 +433,9 @@ class ContextScheduler { ...@@ -411,6 +433,9 @@ class ContextScheduler {
#endif #endif
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
InitContext<TargetType::kXPU, XPUContext>(); InitContext<TargetType::kXPU, XPUContext>();
#endif
#ifdef LITE_WITH_BM
InitContext<TargetType::kBM, BMContext>();
#endif #endif
} }
......
...@@ -54,6 +54,7 @@ bool PassMatchesTarget(const mir::Pass& pass, TargetType target) { ...@@ -54,6 +54,7 @@ bool PassMatchesTarget(const mir::Pass& pass, TargetType target) {
} }
bool PassMatchesKernels(const mir::Pass& pass) { bool PassMatchesKernels(const mir::Pass& pass) {
#if 0
const auto& kernels = pass.GetBoundKernels(); const auto& kernels = pass.GetBoundKernels();
for (const auto& kernel : kernels) { for (const auto& kernel : kernels) {
for (const auto& place : kernel.second) { for (const auto& place : kernel.second) {
...@@ -62,6 +63,7 @@ bool PassMatchesKernels(const mir::Pass& pass) { ...@@ -62,6 +63,7 @@ bool PassMatchesKernels(const mir::Pass& pass) {
} }
} }
} }
#endif
return true; return true;
} }
......
...@@ -86,6 +86,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create( ...@@ -86,6 +86,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case TARGET(kFPGA): { case TARGET(kFPGA): {
CREATE_KERNEL(kFPGA); CREATE_KERNEL(kFPGA);
} break; } break;
case TARGET(kBM): {
CREATE_KERNEL(kBM);
} break;
default: default:
CHECK(false) << "not supported kernel target " << TargetToStr(target); CHECK(false) << "not supported kernel target " << TargetToStr(target);
} }
...@@ -158,6 +161,11 @@ KernelRegistry::KernelRegistry() ...@@ -158,6 +161,11 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kFPGA, kFloat, kNHWC); INIT_FOR(kFPGA, kFloat, kNHWC);
INIT_FOR(kFPGA, kAny, kNHWC); INIT_FOR(kFPGA, kAny, kNHWC);
INIT_FOR(kFPGA, kAny, kAny); INIT_FOR(kFPGA, kAny, kAny);
INIT_FOR(kBM, kFloat, kNCHW);
INIT_FOR(kBM, kInt8, kNCHW);
INIT_FOR(kBM, kAny, kNCHW);
INIT_FOR(kBM, kAny, kAny);
#undef INIT_FOR #undef INIT_FOR
} }
......
...@@ -188,6 +188,16 @@ class KernelRegistry final { ...@@ -188,6 +188,16 @@ class KernelRegistry final {
PRECISION(kInt8), PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, // DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kFPGA), KernelRegistryForTarget<TARGET(kFPGA),
PRECISION(kFloat), PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, // DATALAYOUT(kNCHW)> *, //
......
...@@ -55,10 +55,10 @@ class Optimizer { ...@@ -55,10 +55,10 @@ class Optimizer {
SpecifyKernelPickTactic(kernel_pick_factor); SpecifyKernelPickTactic(kernel_pick_factor);
InitTargetTypeTransformPass(); InitTargetTypeTransformPass();
if (passes.empty()) { if (passes.empty()) {
RunPasses(std::vector<std::string>{ RunPasses(std::vector<std::string>{
{"lite_quant_dequant_fuse_pass", // {"lite_quant_dequant_fuse_pass", //
#if 0
"lite_conv_elementwise_fuse_pass", // conv-elemwise-bn "lite_conv_elementwise_fuse_pass", // conv-elemwise-bn
"lite_conv_bn_fuse_pass", // "lite_conv_bn_fuse_pass", //
"lite_conv_elementwise_fuse_pass", // conv-bn-elemwise "lite_conv_elementwise_fuse_pass", // conv-bn-elemwise
...@@ -114,7 +114,9 @@ class Optimizer { ...@@ -114,7 +114,9 @@ class Optimizer {
// TODO(ysh329): cause CL_INVALID_MEM_OBJECT when setArg in kernel // TODO(ysh329): cause CL_INVALID_MEM_OBJECT when setArg in kernel
"memory_optimize_pass", "memory_optimize_pass",
#endif #endif
"argument_type_display_pass"}}); "argument_type_display_pass"
#endif
}});
} else { } else {
RunPasses(passes); RunPasses(passes);
} }
......
...@@ -5,7 +5,7 @@ set -ex ...@@ -5,7 +5,7 @@ set -ex
BM_SDK_ROOT="$(pwd)/../BM_SDK" # BM SDK BM_SDK_ROOT="$(pwd)/../BM_SDK" # BM SDK
TARGET_NAME="BM1682" # default target TARGET_NAME="BM1682" # default target
BUILD_EXTRA=OFF # ON(with sequence ops)/OFF BUILD_EXTRA=OFF # ON(with sequence ops)/OFF
WITH_TESTING=OFF # ON/OFF WITH_TESTING=ON # ON/OFF
function print_usage { function print_usage {
echo -e "\nUSAGE:" echo -e "\nUSAGE:"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册