提交 ca7fefa1 编写于 作者: L liu zhengxi 提交者: GitHub

Make inceptionv4, resnet50, googlenet can run on x86 paltform (#2250)

* make inceptionv4, resnet50, googlenet can run on x86 paltform and fix the compare part in x86 unittests, test=develop

* fix googlenet tests for benchmark record, test=develop

* [framework][profile] fix profile dump bug when op is feed and fetch test=develop (sangoly)
上级 e328ed89
......@@ -119,6 +119,11 @@ if(WITH_TESTING)
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/inception_v4_simple)
add_dependencies(test_inceptionv4_lite_x86 extern_lite_download_inception_v4_simple_tar_gz)
lite_cc_test(test_resnet50_lite_x86 SRCS test_resnet50_lite_x86.cc
DEPS cxx_api mir_passes lite_api_test_helper
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
add_dependencies(test_resnet50_lite_x86 extern_lite_download_resnet50_tar_gz)
endif()
endif()
......
......@@ -12,20 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
......@@ -34,12 +20,10 @@
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
// for googlenet
DEFINE_string(model_dir, "", "");
namespace paddle {
namespace lite {
#ifdef LITE_WITH_X86
......@@ -57,7 +41,21 @@ TEST(CXXApi, test_lite_googlenet) {
for (int i = 0; i < input_tensor->dims().production(); i++) {
data[i] = 1;
}
predictor.Run();
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto* out = predictor.GetOutput(0);
std::vector<float> results(
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
TEST(Resnet50, test_resnet50_lite_x86) {
// DeviceInfo::Init();
// DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kHost), PRECISION(kFloat)}});
std::string model_dir = FLAGS_model_dir;
std::vector<std::string> passes({"static_kernel_pick_pass",
"variable_place_inference_pass",
"type_target_cast_pass",
"variable_place_inference_pass",
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
results.emplace_back(std::vector<float>(
{0.00024139918, 0.00020566184, 0.00022418296, 0.00041731037,
0.0005366107, 0.00016948722, 0.00028638865, 0.0009257241,
0.00072681636, 8.531815e-05, 0.0002129998, 0.0021168243,
0.006387163, 0.0037145028, 0.0012812682, 0.00045948103,
0.00013535398, 0.0002483765, 0.00076759676, 0.0002773295}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
results[i][j],
1e-6);
}
}
}
} // namespace lite
} // namespace paddle
......@@ -82,8 +82,9 @@ class KernelBase {
#endif
#ifdef LITE_WITH_PROFILE
CHECK_GE(profile_id_, 0) << "Must set profile id first";
profile::ProfileBlock x(profile_id_, "kernel");
if (profile_id_ >= 0) {
profile::ProfileBlock x(profile_id_, "kernel");
}
#endif
Run();
}
......
......@@ -184,7 +184,9 @@ void Instruction::Run() {
CHECK(op_) << "op null";
CHECK(kernel_) << "kernel null";
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_, "instruction");
if (profile_id_ >= 0) {
profile::ProfileBlock x(profile_id_, "instruction");
}
#endif // LITE_WITH_PROFILE
if (first_epoch_) {
first_epoch_ = false;
......
......@@ -89,16 +89,18 @@ struct Instruction {
std::unique_ptr<KernelBase>&& kernel)
: op_(op), kernel_(std::move(kernel)) {
#ifdef LITE_WITH_PROFILE
profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd(kernel_->SerializedKernelType())
.id();
kernel_->SetProfileID(profile_id_);
// Set profile custom info
auto& profiler =
*profile::BasicProfiler<profile::BasicTimer>::Global().mutable_record(
profile_id_);
profiler.SetCustomInfo("op_type", op_->Type());
profiler.SetCustomInfo("op_info", op_->SerializedOpInfo());
if (op_->Type() != "feed" && op_->Type() != "fetch") {
profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd(kernel_->SerializedKernelType())
.id();
kernel_->SetProfileID(profile_id_);
// Set profile custom info
auto& profiler =
*profile::BasicProfiler<profile::BasicTimer>::Global().mutable_record(
profile_id_);
profiler.SetCustomInfo("op_type", op_->Type());
profiler.SetCustomInfo("op_info", op_->SerializedOpInfo());
}
#endif // LITE_WITH_PROFILE
}
......
......@@ -46,6 +46,7 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override {
// auto &context = ctx_->As<X86Context>();
auto &param = *param_.get_mutable<operators::BatchNormParam>();
param.is_test = true;
bool global_stats = param.is_test || param.use_global_stats;
const auto *x = param.x;
......
......@@ -104,7 +104,7 @@ TEST(batch_norm_x86, run_test) {
operators::BatchNormParam param;
param.x = &x;
param.is_test = false;
param.is_test = true;
param.scale = &scale;
param.bias = &bias;
param.mean = &mean;
......
......@@ -84,8 +84,9 @@ TEST(conv2d_x86, run_test) {
conv2d.Run();
LOG(INFO) << "output: ";
float ref_result[1] = {27.};
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i] << " ";
EXPECT_NEAR(out_data[i], ref_result[i], 1e-5);
}
}
......
......@@ -70,8 +70,11 @@ TEST(pool2d_x86, run_test) {
pool2d.Run();
LOG(INFO) << "output: ";
float ref_result[12] = {
5., 7., 13., 15., 21., 23., 29., 31., 37., 39., 45., 47.};
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
EXPECT_NEAR(out_data[i], ref_result[i], 1e-5);
}
}
......
......@@ -194,9 +194,9 @@ function build {
function test_server {
# Due to the missing of x86 kernels, we skip the following tests temporarily.
# TODO(xxx) clear the skip list latter
local skip_list=("test_paddle_api" "test_cxx_api" "test_googlenet"
local skip_list=("test_paddle_api" "test_cxx_api"
"test_mobilenetv1_lite_x86" "test_mobilenetv2_lite_x86"
"test_inceptionv4_lite_x86" "test_light_api"
"test_light_api"
"test_apis" "test_model_bin"
)
local to_skip=0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册