提交 e48e7398 编写于 作者: Z zhangyang

change format for FPGA track

上级 70a079c5
...@@ -24,23 +24,25 @@ namespace paddle_mobile { ...@@ -24,23 +24,25 @@ namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace deconv_bias_scale { namespace deconv_bias_scale {
void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n){ void deconv_bias_scale_expand(float** bias_scale_array, int num,
int sub_conv_n) {
int sub_num = num * sub_conv_n; int sub_num = num * sub_conv_n;
float* ptr_tmp = *bias_scale_array; float* ptr_tmp = *bias_scale_array;
float*ptr_bias_scale_expand = (float*)fpga_malloc(sizeof(float) * sub_num * 2); float* ptr_bias_scale_expand =
(float*)fpga_malloc(sizeof(float) * sub_num * 2);
int scale_base_offset = sub_num; int scale_base_offset = sub_num;
for (int i = 0; i < sub_conv_n; ++i) for (int i = 0; i < sub_conv_n; ++i) {
{
int offset = num * i; int offset = num * i;
//copy bias // copy bias
fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp,num * sizeof(float)); fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp, num * sizeof(float));
//copy scale // copy scale
fpga_copy(ptr_bias_scale_expand + scale_base_offset+ offset, ptr_tmp + num,num * sizeof(float)); fpga_copy(ptr_bias_scale_expand + scale_base_offset + offset, ptr_tmp + num,
num * sizeof(float));
} }
*bias_scale_array = ptr_bias_scale_expand; *bias_scale_array = ptr_bias_scale_expand;
fpga_free(ptr_tmp); fpga_free(ptr_tmp);
} }
} // namespace bias_scale } // namespace deconv_bias_scale
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -20,8 +20,9 @@ namespace paddle_mobile { ...@@ -20,8 +20,9 @@ namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace deconv_bias_scale { namespace deconv_bias_scale {
void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n); void deconv_bias_scale_expand(float** bias_scale_array, int num,
int sub_conv_n);
} // namespace bias_scale } // namespace deconv_bias_scale
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "fpga/V1/deconv_filter.h"
#include <memory.h> #include <memory.h>
#include <algorithm> #include <algorithm>
#include "fpga/V1/deconv_filter.h"
// #include "deconv_filter.h" // #include "deconv_filter.h"
#include "fpga/V1/filter.h" #include "fpga/V1/filter.h"
// #include "filter.h" // #include "filter.h"
#include "fpga/V1/api.h" #include "fpga/V1/api.h"
// #include "fpga_api.h" // #include "fpga_api.h"
//just for test // just for test
//#include <string> //#include <string>
//#include "deconv.h" //#include "deconv.h"
//#include "deconv_api.h" //#include "deconv_api.h"
//using namespace std; // using namespace std;
//using namespace paddle_mobile::fpga; // using namespace paddle_mobile::fpga;
//using namespace baidu::fpga::deconv::api; // using namespace baidu::fpga::deconv::api;
//namespace api = baidu::fpga::deconv::api; // namespace api = baidu::fpga::deconv::api;
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
...@@ -23,105 +37,106 @@ namespace deconv_filter { ...@@ -23,105 +37,106 @@ namespace deconv_filter {
/* /*
inverse kernel weights of each channel for every filter inverse kernel weights of each channel for every filter
*/ */
void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height){ void deconv_inverse_filter(float** data_in, int num, int channel, int width,
float *tmp = *data_in; int height) {
float* tmp = *data_in;
// float fix_range = 127;// float scale = fix_range / max; // float fix_range = 127;// float scale = fix_range / max;
int data_size = num * channel * width * height; int data_size = num * channel * width * height;
int hw_len = height * width; int hw_len = height * width;
float *tmp_data = (float *)fpga_malloc(data_size * sizeof(float)); float* tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
for(int j = 0; j < channel; ++j){ for (int j = 0; j < channel; ++j) {
for (int k = 0; k < hw_len; ++k) for (int k = 0; k < hw_len; ++k) {
{ tmp_data[i * channel * hw_len + j * hw_len + k] =
tmp_data[i*channel*hw_len + j*hw_len + k] = (*data_in)[i*channel*hw_len + j*hw_len + hw_len - k-1]; (*data_in)[i * channel * hw_len + j * hw_len + hw_len - k - 1];
} }
} }
} }
*data_in = (float *)tmp_data; // *data_in = (float*)tmp_data; //
fpga_free(tmp); fpga_free(tmp);
} }
/* /*
calculate sub padding number calculate sub padding number
*/ */
int deconv_calc_sub_pad(int filter_axis, int pad, int stride){ int deconv_calc_sub_pad(int filter_axis, int pad, int stride) {
if(stride == 0 || ((filter_axis -pad-1)< 0)){ if (stride == 0 || ((filter_axis - pad - 1) < 0)) {
//error // error
return 0; return 0;
} }
return (filter_axis - pad -1)/stride; return (filter_axis - pad - 1) / stride;
} }
int deconv_get_sub_filter_axis(int filter_axis, int stride){ int deconv_get_sub_filter_axis(int filter_axis, int stride) {
return (filter_axis / stride);
return (filter_axis/stride);
} }
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis){ int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis) {
return ((image_axis + 2*sub_pad -sub_filter_axis ) + 1); return ((image_axis + 2 * sub_pad - sub_filter_axis) + 1);
} }
/* /*
(filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image position. (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image
so the omit rows or columns is (stride - ) position. so the omit rows or columns is (stride - )
*/ */
int deconv_get_omit(int stride, int filter_width, int pad){ int deconv_get_omit(int stride, int filter_width, int pad) {
if( ((filter_width-pad) <= 0) ){// ((filter_width-pad) > stride) || if (((filter_width - pad) <= 0)) { // ((filter_width-pad) > stride) ||
//error // error
return 0; return 0;
} }
int idx = 1; int idx = 1;
bool flag = false; bool flag = false;
for(idx = 1; idx <= stride; ++idx){ for (idx = 1; idx <= stride; ++idx) {
int j = idx; int j = idx;
for(;j <= filter_width;){ for (; j <= filter_width;) {
if(j == filter_width - pad){ if (j == filter_width - pad) {
flag = true; flag = true;
break; break;
} }
j = j + stride; j = j + stride;
} }
if (flag) if (flag) {
{
break; break;
} }
} }
return (stride - idx); return (stride - idx);
} }
int deconv_get_sub_filter_num(int filter_num, int stride){ int deconv_get_sub_filter_num(int filter_num, int stride) {
return filter_num * stride; return filter_num * stride;
} }
void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ){ void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel) {
char* ptr_tmp = *data_in; char* ptr_tmp = *data_in;
int sub_num = kernel_num * sub_conv_n; int sub_num = kernel_num * sub_conv_n;
int sub_h = height /sub_conv_n; int sub_h = height / sub_conv_n;
int sub_w = width / sub_conv_n; int sub_w = width / sub_conv_n;
int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; int sub_filter_size =
kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
char *ptr_sub_filter = (char *)fpga_malloc(sub_filter_size * sizeof(char)); char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
for (int idx = 0; idx < sub_conv_n; ++idx) { for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn =0; nn < sub_num; ++nn) { for (int nn = 0; nn < sub_num; ++nn) {
int ni = nn % kernel_num; int ni = nn % kernel_num;
int woff = sub_conv_n - 1 - (nn / kernel_num);// int woff = sub_conv_n - 1 - (nn / kernel_num); //
for (int hh =0; hh < sub_h; ++hh) { for (int hh = 0; hh < sub_h; ++hh) {
int hi = hh * sub_conv_n + idx % sub_conv_n; int hi = hh * sub_conv_n + idx % sub_conv_n;
for (int ww =0; ww < sub_w; ++ww) { for (int ww = 0; ww < sub_w; ++ww) {
int wi = ww * sub_conv_n + woff;//1 0 int wi = ww * sub_conv_n + woff; // 1 0
int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;// int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel; //
int kidx = ((ni * height + hi) * width + wi) * channel;// int kidx = ((ni * height + hi) * width + wi) * channel; //
fpga_copy(ptr_sub_filter+idx*sub_h*sub_w*channel*sub_num + sidx, (*data_in)+kidx, channel*sizeof(char)); fpga_copy(
ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
(*data_in) + kidx, channel * sizeof(char));
// for (int cc =0; cc < channel; ++cc) { // for (int cc =0; cc < channel; ++cc) {
// ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = (*data_in)[kidx + cc]; // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
// (*data_in)[kidx + cc];
// } // }
} }
} }
...@@ -131,28 +146,25 @@ void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n ...@@ -131,28 +146,25 @@ void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n
fpga_free(ptr_tmp); fpga_free(ptr_tmp);
} }
void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw){ void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
int hw) {
float* tmp = *filter_in; float* tmp = *filter_in;
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(hw * kernel_num * channels * sizeof(float))); float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
hw * kernel_num * channels * sizeof(float)));
for(int c = 0; c < channels; ++c)
{ for (int c = 0; c < channels; ++c) {
for (int n = 0; n < kernel_num ; ++n) for (int n = 0; n < kernel_num; ++n) {
{ paddle_mobile::fpga::fpga_copy(ptr_filter + n * hw + kernel_num * hw * c,
paddle_mobile::fpga::fpga_copy(ptr_filter + n*hw + kernel_num * hw * c, tmp + n * channels * hw + c * hw , hw * sizeof(float)); tmp + n * channels * hw + c * hw,
hw * sizeof(float));
} }
} }
*filter_in = ptr_filter; *filter_in = ptr_filter;
paddle_mobile::fpga::fpga_free(tmp); paddle_mobile::fpga::fpga_free(tmp);
} }
void deconv_format_filter(float** data_in, int num, int channel, int height, void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max,int stride){ int width, int group_num, float max, int stride) {
int data_size = channel * height * width * num; int data_size = channel * height * width * num;
/*{ /*{
...@@ -175,24 +187,25 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -175,24 +187,25 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
string filename = "quantize_filter_data"; string filename = "quantize_filter_data";
api::savefile<char>(filename, (void *)*data_in, data_size, result2); api::savefile<char>(filename, (void *)*data_in, data_size, result2);
}*/ }*/
char **quantize_data = (char **)data_in; // NOLINT char** quantize_data = (char**)data_in; // NOLINT
filter::convert_to_hwc(quantize_data, num, channel, height, width); filter::convert_to_hwc(quantize_data, num, channel, height, width);
/*{ /*{
char result2 = (char)0; char result2 = (char)0;
string filename = "convert_to_hwc_filter_data"; string filename = "convert_to_hwc_filter_data";
api::savefile<char>(filename, (void *)*quantize_data, data_size, result2); api::savefile<char>(filename, (void *)*quantize_data, data_size,
result2);
}*/ }*/
deconv_get_sub_filter(quantize_data, height, width, stride, num, channel ); deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
/*{ /*{
char result2 = (char)0; char result2 = (char)0;
string filename = "sub_filter_filter_data"; string filename = "sub_filter_filter_data";
api::savefile<char>(filename, (void *)*quantize_data, data_size, result2); api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
}*/ }*/
int sub_conv_n = stride; int sub_conv_n = stride;
int sub_h = height/sub_conv_n; int sub_h = height / sub_conv_n;
int sub_w = width / sub_conv_n; int sub_w = width / sub_conv_n;
int sub_chw = sub_h * sub_w * channel; int sub_chw = sub_h * sub_w * channel;
int sub_num = sub_conv_n * num; int sub_num = sub_conv_n * num;
...@@ -201,34 +214,37 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -201,34 +214,37 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
filter::calc_num_per_div(sub_num, group_num, division_capacity); filter::calc_num_per_div(sub_num, group_num, division_capacity);
int num_per_div_after_alignment = int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT); align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num = int div_num = (sub_num + num_per_div_before_alignment - 1) /
(sub_num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; num_per_div_before_alignment;
int residual = (sub_num) % num_per_div_before_alignment; int residual = (sub_num) % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) + ((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT); align_to_x(residual, FILTER_NUM_ALIGNMENT);
char**ptr_ptr_data = (char**)fpga_malloc(sub_conv_n*sizeof(char*)); char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
int origin_offset = sub_chw * sub_num; int origin_offset = sub_chw * sub_num;
for (int i = 0; i < sub_conv_n; ++i){ for (int i = 0; i < sub_conv_n; ++i) {
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset*sizeof(char)); (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
fpga_copy((ptr_ptr_data)[i], (*quantize_data)+origin_offset*i, origin_offset*sizeof(char)); fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
origin_offset * sizeof(char));
/* char result2 = (char)0; /* char result2 = (char)0;
string filename = "ptr_ptr_data" + to_string(i); string filename = "ptr_ptr_data" + to_string(i);
api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset, result2); api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset,
result2);
*/ */
} }
// char result2 = (char)0; // char result2 = (char)0;
// string filename = "interleave"; // string filename = "interleave";
// api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset, result2); // api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset,
// result2);
fpga_free(*quantize_data); fpga_free(*quantize_data);
int align_offset =
int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) *num_after_alignment; align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset*sizeof(char));//continuous space char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
for (int i = 0; i < sub_conv_n; ++i) sizeof(char)); // continuous space
{ for (int i = 0; i < sub_conv_n; ++i) {
int offset = i * origin_offset; int offset = i * origin_offset;
char* ptr_tmp = (ptr_ptr_data)[i]; char* ptr_tmp = (ptr_ptr_data)[i];
...@@ -242,7 +258,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -242,7 +258,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
string filename = "interleave" + to_string(i); string filename = "interleave" + to_string(i);
api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2); api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2);
*/ */
fpga_copy(ptr_space + i*align_offset,ptr_tmp,align_offset); fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
fpga_free(ptr_tmp); fpga_free(ptr_tmp);
} }
*data_in = (float*)ptr_space; *data_in = (float*)ptr_space;
...@@ -250,9 +266,10 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -250,9 +266,10 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
/* { /* {
char result2 = (char)0; char result2 = (char)0;
string filename = "ptr_space"; string filename = "ptr_space";
api::savefile<char>(filename, (void *)ptr_space, sub_conv_n * align_offset, result2); api::savefile<char>(filename, (void *)ptr_space, sub_conv_n *
align_offset, result2);
}*/ }*/
fpga_flush(ptr_space, sub_conv_n * align_offset*sizeof(char)); fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
} }
} // namespace deconv_filter } // namespace deconv_filter
......
#pragma once /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace deconv_filter { namespace deconv_filter {
void deconv_inverse_filter(float** data_in, int num, int channel, int width,
void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height); int height);
int deconv_calc_sub_pad(int filter_axis, int pad, int stride); int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
int deconv_get_sub_filter_num(int filter_num, int stride); int deconv_get_sub_filter_num(int filter_num, int stride);
int deconv_get_sub_filter_axis(int filter_axis, int stride); int deconv_get_sub_filter_axis(int filter_axis, int stride);
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis); int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad); int deconv_get_omit(int stride, int filter_width, int pad);
void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ); void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel);
void deconv_format_filter(float** data_in, int num, int channel, int height, void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max,int stride); int width, int group_num, float max, int stride);
void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw); void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
} // namespace deconv_filter } // namespace deconv_filter
} // namespace fpga } // namespace fpga
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册