提交 87cec058 编写于 作者: J jameswu2014

support marker-net

上级 ac58218b
......@@ -30,9 +30,10 @@ void format_image(framework::Tensor *image_tensor) {
auto data_ptr = image_tensor->data<float>();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
float *old_p = p_data;
image::format_image(&p_data, channel, height, width);
if (old_p != p_data) {
if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data);
}
}
......@@ -48,9 +49,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
auto dims = ofm_tensor->dims();
size_t memory_size = 0;
if (dims.size() == 4) {
auto channel = dims[1], height = dims[2], width = dims[3];
memory_size =
height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
auto channel = dims[1], height = dims[2], width = dims[3], num = dims[0];
memory_size = num * height * align_to_x(channel * width, IMAGE_ALIGNMENT) *
sizeof(half);
} else if (dims.size() == 2) {
memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half);
} else {
......@@ -713,7 +714,6 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
for (int j = 0; j < split_num; ++j) {
// arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
......@@ -775,19 +775,6 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
fpga_flush(arg->split_conv_args[i]->conv_arg[j].filter_address,
filter_size);
/*{
static int cnt = 0;
std::string str = "deconv_filter";
if(cnt <= 1){
cnt++;
str += std::to_string(cnt);
int8_t result = 0;
fpga::savefile<int8_t>(str,
arg->split_conv_args[i]->conv_arg[j].filter_address, filter_size, result);
}
}*/
size_t bs_align_num = align_to_x(
arg->split_conv_args[i]->conv_arg[j].filter_num, BS_NUM_ALIGNMENT);
size_t bs_size = 2 * bs_align_num * sizeof(float);
......@@ -803,20 +790,6 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
memcpy(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_head, bs_size);
fpga_flush(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_size);
/* {
static int cnt = 0;
std::string str = "deconv_sb";
if(cnt <= 1){
cnt++;
str += std::to_string(cnt);
float result = 0;
fpga::savefile<float>(str,
arg->split_conv_args[i]->conv_arg[j].sb_address, 2 * bs_align_num,
result);
}
}*/
if (split_num == 1) {
arg->split_conv_args[i]->conv_arg[j].output.address =
arg->split_conv_args[i]->output.address;
......@@ -863,10 +836,13 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto deleter = [](void *p) { fpga_free(p); };
arg->vector_dwconv_space.push_back(
std::shared_ptr<char>(reinterpret_cast<char *>(bias_ptr), deleter));
auto filter_ptr = filter->data<int16_t>();
auto input_ptr = input->data<half>();
auto output_ptr = out->data<half>();
auto output_ptr = out->mutable_data<half>();
arg->sub_conv_num = 1;
// arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
......
......@@ -49,6 +49,9 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
fpga::format_image(input);
auto input_ptr = input->data<float>();
auto external_ptr = reinterpret_cast<float *>(input->external_data);
float *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
auto output_ptr = output->data<half>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
......@@ -57,7 +60,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = input_ptr;
args.image.address = p_data;
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
......
......@@ -56,8 +56,9 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
return;
}
fpga::BypassArgs args = param.fpga_bypass_args;
auto data = (input->mutable_data<half>());
args.image.address = static_cast<void *>(data);
auto input_address = (input->data<half>());
args.image.address = static_cast<void*>(input_address);
fpga::PerformBypass(args);
fpga::fpga_invalidate(param.fpga_bypass_args.output.address,
param.fpga_bypass_args.image.channels * sizeof(float));
......
......@@ -77,6 +77,10 @@ if (CON GREATER -1)
ADD_EXECUTABLE(test-rfcn fpga/test_rfcn.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-rfcn paddle-mobile)
ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-marker paddle-mobile)
set(FOUND_MATCH ON)
endif ()
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
#ifdef PADDLE_MOBILE_FPGA_V1
#include "fpga/V1/api.h"
#endif
#ifdef PADDLE_MOBILE_FPGA_V2
#include "fpga/V2/api.h"
#endif
#include <iostream>
#include <string>
void readStream(std::string filename, char *buf) {
std::ifstream in;
in.open(filename, std::ios::in|std::ios::binary);
if (!in.is_open()) {
std::cout << "open File Failed." << std::endl;
return;
}
in.seekg(0, std::ios::end); // go to the end
auto length = in.tellg(); // report location (this is the length)
in.seekg(0, std::ios::beg); // go back to the beginning
in.read(buf, length);
DLOG << length;
in.close();
}
void convert_to_chw(int16_t **data_in, int channel, int height, int width,int num,
int16_t *data_tmp) {
int64_t amount_per_side = width * height;
for(int n = 0; n < num; n++){
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
for (int c = 0; c < channel; c++) {
*(data_tmp + n* amount_per_side*channel + c * amount_per_side + width * h + w) = *((*data_in)++);
}
}
}
}
}
void dump_stride_half(std::string filename, Tensor input_tensor,
const int dumpnum, bool use_chw) {
// bool use_chw = true;
if(input_tensor.dims().size()!=4)return;
int c = (input_tensor.dims())[1];
int h = (input_tensor.dims())[2];
int w = (input_tensor.dims())[3];
int n = (input_tensor.dims())[0];
auto data_ptr = input_tensor.get_data();
auto *data_ptr_16 = reinterpret_cast<half *>(data_ptr);
auto data_tmp = data_ptr_16;
if (use_chw){
data_tmp =
reinterpret_cast<half *>(malloc(n * c * h * w * sizeof(int16_t)));
convert_to_chw(&data_ptr_16, c, h, w,n, data_tmp);
}
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
for (int i = 0; i < input_tensor.numel(); i += stride) {
result = paddle_mobile::fpga::fp16_2_fp32(data_tmp[i]);
out << result << std::endl;
}
out.close();
if(data_tmp!=data_ptr_16){free(data_tmp);}
}
void dump_stride_float(std::string filename, Tensor input_tensor,
const int dumpnum) {
auto data_ptr = reinterpret_cast<float *>(input_tensor.get_data());
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
for (int i = 0; i < input_tensor.numel(); i += stride) {
result = data_ptr[i];
out << result << std::endl;
}
out.close();
}
void dump_stride(std::string filename, Tensor input_tensor,
const int dumpnum, bool use_chw) {
static int i=0;
if (input_tensor.numel() == 0) {
return;
}
if(input_tensor.type() == typeid(float)){
DLOG << "op: " <<i++ << ", float data "<< input_tensor.numel(); ;
dump_stride_float(filename, input_tensor,dumpnum);
}
else{
DLOG << "op: " <<i++ << ", half data "<< input_tensor.numel();;
dump_stride_half(filename, input_tensor,dumpnum, use_chw);
}
DLOG << "dump input address: " << input_tensor.get_data();
}
static const char *g_marker_combine = "../models/marker/model";
static const char *g_image_src_float = "../models/marker/model/input_0.bin";
int main() {
paddle_mobile::fpga::open_device();
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
//if (paddle_mobile.Load(std::string(g_rfcn_combine) + "/model",
// std::string(g_rfcn_combine) + "/params", true, false,
// 1, true)) {
if(paddle_mobile.Load(std::string(g_marker_combine),true)){
float img_info[3] = {720, 1280, 800.0f / 960.0f};
auto img = reinterpret_cast<float*>(fpga::fpga_malloc(720 * 1280 * 3 * sizeof(float)));
readStream(g_image_src_float, reinterpret_cast<char *>(img));
std::vector<void *> v(3, nullptr);
paddle_mobile.FeedData({ img});
paddle_mobile.Predict_To(-1);
for (int i = 47; i < 52; i++) {
auto tensor_ptr = paddle_mobile.FetchResult(i);
std::string saveName = "marker_" + std::to_string(i);
//if(i != 58)
paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(),tensor_ptr->numel() * sizeof(float));
// tensor_ptr->numel() * sizeof(float));
dump_stride(saveName, (*tensor_ptr),tensor_ptr->numel(), true);//20);//tensor_ptr->numel());
/* float result = 0;
std::string str = "softmax_input_data";
float* data = static_cast<float*>(fpga::fpga_malloc(tensor_ptr->numel() * sizeof(float)));
str = "softmax_output_data";
auto output_ptr = static_cast<half*>((*tensor_ptr).get_data());
for (int idx = 0; idx < tensor_ptr->numel(); ++idx)
{
data[idx] = fpga::fp16_2_fp32(output_ptr[idx]);
}
fpga::savefile<float>(str,data, tensor_ptr->numel(), result ); */
}
// paddle_mobile.GetResults(&v);
DLOG << "Computation done";
fpga::fpga_free(img);
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册