提交 e9833139 编写于 作者: C chonwhite

mobilenet works

上级 b65c1191
......@@ -199,6 +199,11 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
lite_cc_test(test_inceptionv3_fpga SRCS inceptionv3_test_fpga.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
lite_cc_test(test_inceptionv4 SRCS inceptionv4_test.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
#ifdef LITE_WITH_FPGA
TEST(ResNet50, test) {
lite::Predictor predictor;
std::vector<Place> valid_places({
Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
// std::vector<Place> valid_places(
// {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}});
predictor.Build("",
FLAGS_model_dir + "/model",
FLAGS_model_dir + "/params",
valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < 2; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
}
#endif
} // namespace lite
} // namespace paddle
......@@ -31,11 +31,7 @@ TEST(ResNet50, test) {
std::vector<Place> valid_places(
{Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -70,29 +70,25 @@ TEST(ResNet50, test) {
Place{TARGET(kARM), PRECISION(kFloat)},
});
predictor.Build(FLAGS_model_dir, "", "", valid_places);
// predictor.Build(FLAGS_model_dir,
// FLAGS_model_dir + "/model",
// FLAGS_model_dir + "/params",
// Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
// valid_places);
// predictor.Build(FLAGS_model_dir, "", "", valid_places);
predictor.Build("",
FLAGS_model_dir + "/model",
FLAGS_model_dir + "/params",
valid_places);
auto* input_tensor = predictor.GetInput(0);
int width = 300;
int height = 300;
// std::ifstream file_stream(FLAGS_input_file);
// if (!file_stream.good()) {
// std::cout << "file: " << FLAGS_input_file << " dones not exist!\n";
// exit(-1);
// return;
// }
int width = 416;
int height = 416;
std::ifstream file_stream(FLAGS_input_file);
// file_stream.open(path);
if (!file_stream.good()) {
std::cout << "file: " << FLAGS_input_file << " dones not exist!\n";
exit(-1);
return;
}
file_stream >> height;
file_stream >> width;
// file_stream >> height;
// file_stream >> width;
input_tensor->Resize(
DDim(std::vector<DDim::value_type>({1, 3, height, width})));
......@@ -107,12 +103,12 @@ TEST(ResNet50, test) {
int num = 3 * width * height;
for (int i = 0; i < num; ++i) {
float value = 0;
file_stream >> value;
data[i] = value;
}
file_stream.close();
// for (int i = 0; i < num; ++i) {
// float value = 0;
// file_stream >> value;
// data[i] = value;
// }
// file_stream.close();
for (int i = 0; i < 2; ++i) {
predictor.Run();
......@@ -123,12 +119,6 @@ TEST(ResNet50, test) {
std::cout << ":" << out->data<float>()[i] << std::endl;
}
// std::cout << "-------\n";
// auto* out1 = predictor.GetOutput(1);
// for (int i = 0;i < out1->dims().production();i++) {
// std::cout << ":" << out1->data<float>()[i] << std::endl;
// }
std::string file = "output/" + FLAGS_input_file.substr(6);
std::cout << "file:::" << file << std::endl;
......
......@@ -34,8 +34,11 @@ class Debugger {
void registerOutput(std::string op_type, zynqmp::Tensor* tensor) {
// tensor->printScale();
if (op_type != "conv") {
// tensor->saveToFile(op_type, true);
// if (op_type != "conv") {
// // tensor->saveToFile(op_type, true);
// }
if (op_config[op_type]) {
tensor->saveToFile(op_type, true);
}
}
......@@ -45,6 +48,9 @@ class Debugger {
op_config["concat"] = true;
op_config["conv"] = true;
op_config["crop"] = true;
op_config["fetch"] = true;
op_config["fc"] = true;
op_config["softmax"] = true;
}
};
......
......@@ -122,7 +122,7 @@ void RuntimeProgram::Run() {
#ifdef LITE_WITH_PROFILE
#ifdef LITE_WITH_PRECISION_PROFILE
#ifndef LITE_WITH_FPGA
LITE_PRECISION_PROFILE(inst)
// LITE_PRECISION_PROFILE(inst)
#endif
#endif // LITE_WITH_PRECISION_PROFILE
#endif // LITE_WITH_PROFILE
......
......@@ -7,7 +7,7 @@ set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
......@@ -22,11 +22,11 @@ add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps})
add_kernel(prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS ${fpga_deps})
# add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps})
# add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
# add_kernel(transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS ${fpga_deps})
add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
add_kernel(transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS ${fpga_deps})
add_kernel(io_copy_compute_fpga FPGA basic SRCS io_copy_compute.cc DEPS ${fpga_deps})
add_kernel(calib_compute_fpga FPGA basic SRCS calib_compute.cc DEPS ${fpga_deps})
......
......@@ -24,10 +24,6 @@ namespace kernels {
namespace fpga {
using float16 = zynqmp::float16;
// void CalibComputeFp32ToFP16::PrepareForRun() {
// }
void CalibComputeFp32ToFP16::Run() {
auto& param = this->Param<operators::CalibParam>();
const auto* din = param.input->data<float>();
......@@ -38,10 +34,6 @@ void CalibComputeFp32ToFP16::Run() {
return;
}
// void CalibComputeFP16ToFp32::PrepareForRun() {
// }
void CalibComputeFP16ToFp32::Run() {
auto& param = this->Param<operators::CalibParam>();
const auto* din = param.input->data<float16>();
......
......@@ -26,8 +26,6 @@ class CalibComputeFp32ToFP16
public:
using param_t = operators::CalibParam;
// void PrepareForRun() override;
void Run() override;
~CalibComputeFp32ToFP16() override{};
......@@ -40,8 +38,6 @@ class CalibComputeFP16ToFp32
public:
using param_t = operators::CalibParam;
// void PrepareForRun() override;
void Run() override;
~CalibComputeFP16ToFp32() override{};
......
文件模式从 100644 更改为 100755
......@@ -59,7 +59,7 @@ void FeedCompute::Run() {
REGISTER_LITE_KERNEL(
feed, kFPGA, kFP16, kNHWC, paddle::lite::kernels::fpga::FeedCompute, def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kFPGA),
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kNHWC))})
.BindOutput("Out",
......
......@@ -57,12 +57,34 @@ void FetchCompute::Run() {
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
fetch, kFPGA, kFP16, kNHWC, paddle::lite::kernels::fpga::FetchCompute, def)
REGISTER_LITE_KERNEL(fetch,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::FetchCompute,
fpga_host)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(fetch,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::FetchCompute,
host_host)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
......@@ -160,6 +160,22 @@ REGISTER_LITE_KERNEL(io_copy,
DATALAYOUT(kNHWC))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy,
kFPGA,
kAny,
kAny,
paddle::lite::kernels::fpga::IoCopyFpgaToHostCompute,
device_to_host_22)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kARM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.Finalize();
REGISTER_LITE_KERNEL(io_copy_once,
kFPGA,
kAny,
......
......@@ -81,7 +81,6 @@ void mul(MulCompute* k) {
void MulCompute::Run() {
pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR
zynqmp::FullyConnectedParam& fc_param = pe_.param();
Debugger::get_instance().registerOutput("mul", fc_param.output);
......
......@@ -411,3 +411,20 @@ REGISTER_LITE_KERNEL(multiclass_nms,
.BindInput("Scores", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
REGISTER_LITE_KERNEL(multiclass_nms,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::MulticlassNmsCompute,
def2)
.BindInput("BBoxes",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindInput("Scores",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
......@@ -27,7 +27,8 @@ void ReshapeCompute::Run() {
auto& param = Param<operators::ReshapeParam>();
param.output->mutable_data<float16>();
auto x = param.x;
auto actual_shape = param.actual_shape;
// auto actual_shape = param.actual_shape;
Tensor* actual_shape = nullptr; // TODO(chonwhite) change it.
auto output = param.output;
bool inplace = param.inplace;
auto x_dims = x->dims();
......
......@@ -34,9 +34,11 @@ void SoftmaxCompute::PrepareForRun() {
}
void SoftmaxCompute::Run() {
zynqmp::SoftmaxParam& softmax_param = pe_.param();
pe_.dispatch();
// softmax_param.output->saveToFile("softmax.txt");
#ifdef FPGA_PRINT_TENSOR
zynqmp::SoftmaxParam& softmax_param = pe_.param();
Debugger::get_instance().registerOutput("softmax", softmax_param.output);
#endif
}
} // namespace fpga
......
......@@ -30,14 +30,14 @@ using float16 = zynqmp::float16;
// Transpose
void TransposeCompute::Run() {
auto& param = this->Param<param_t>();
param.output->mutable_data<float16>();
// param.output->mutable_data<float16>();
}
// Transpose2
void Transpose2Compute::Run() {
auto& param = this->Param<param_t>();
param.output->mutable_data<float>();
param.x->ZynqTensor()->invalidate();
param.x->ZynqTensor()->unalignImage();
if (param.x->dims().size() != 4) {
// TransposeCompute<float>(param);
......@@ -54,9 +54,10 @@ void Transpose2Compute::Run() {
// index++;
// }
// }
} else {
param.x->ZynqTensor()->saveToFile("tx", true);
param.output->ZynqTensor()->copyFrom(param.x->ZynqTensor());
param.output->ZynqTensor()->saveToFile("to", true);
}
}
......
......@@ -46,19 +46,43 @@ REGISTER_LITE_KERNEL(reshape,
paddle::lite::kernels::host::ReshapeCompute,
def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("ShapeTensor",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindInput("Shape",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kAny),
DATALAYOUT(kAny))})
.Finalize();
// REGISTER_LITE_KERNEL(reshape,
// kFPGA,
// kFP16,
// kNHWC,
// paddle::lite::kernels::host::ReshapeCompute,
// def)
// .BindInput("X",
// {LiteType::GetTensorTy(
// TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC))})
// .BindInput("ShapeTensor",
// {LiteType::GetTensorTy(
// TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))})
// .BindInput("Shape",
// {LiteType::GetTensorTy(
// TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny))})
// .BindOutput("Out",
// {LiteType::GetTensorTy(
// TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW))})
// .Finalize();
REGISTER_LITE_KERNEL(reshape2,
kHost,
kAny,
......
......@@ -27,5 +27,5 @@ cmake .. \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_PROFILE=ON
make -j4
make -j42
cd -
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册