提交 e48e7398 编写于 作者: Z zhangyang

change format for FPGA track

上级 70a079c5
...@@ -24,23 +24,25 @@ namespace paddle_mobile { ...@@ -24,23 +24,25 @@ namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace deconv_bias_scale { namespace deconv_bias_scale {
void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n){ void deconv_bias_scale_expand(float** bias_scale_array, int num,
int sub_conv_n) {
int sub_num = num * sub_conv_n; int sub_num = num * sub_conv_n;
float* ptr_tmp = *bias_scale_array; float* ptr_tmp = *bias_scale_array;
float*ptr_bias_scale_expand = (float*)fpga_malloc(sizeof(float) * sub_num * 2); float* ptr_bias_scale_expand =
int scale_base_offset = sub_num; (float*)fpga_malloc(sizeof(float) * sub_num * 2);
for (int i = 0; i < sub_conv_n; ++i) int scale_base_offset = sub_num;
{ for (int i = 0; i < sub_conv_n; ++i) {
int offset = num * i; int offset = num * i;
//copy bias // copy bias
fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp,num * sizeof(float)); fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp, num * sizeof(float));
//copy scale // copy scale
fpga_copy(ptr_bias_scale_expand + scale_base_offset+ offset, ptr_tmp + num,num * sizeof(float)); fpga_copy(ptr_bias_scale_expand + scale_base_offset + offset, ptr_tmp + num,
num * sizeof(float));
} }
*bias_scale_array = ptr_bias_scale_expand; *bias_scale_array = ptr_bias_scale_expand;
fpga_free(ptr_tmp); fpga_free(ptr_tmp);
} }
} // namespace bias_scale } // namespace deconv_bias_scale
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -20,8 +20,9 @@ namespace paddle_mobile { ...@@ -20,8 +20,9 @@ namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace deconv_bias_scale { namespace deconv_bias_scale {
void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n); void deconv_bias_scale_expand(float** bias_scale_array, int num,
int sub_conv_n);
} // namespace bias_scale } // namespace deconv_bias_scale
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
#include <memory.h> /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#include <algorithm>
#include "fpga/V1/deconv_filter.h" Licensed under the Apache License, Version 2.0 (the "License");
// #include "deconv_filter.h" you may not use this file except in compliance with the License.
#include "fpga/V1/filter.h" You may obtain a copy of the License at
// #include "filter.h"
#include "fpga/V1/api.h" http://www.apache.org/licenses/LICENSE-2.0
// #include "fpga_api.h"
Unless required by applicable law or agreed to in writing, software
//just for test distributed under the License is distributed on an "AS IS" BASIS,
//#include <string> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//#include "deconv.h" See the License for the specific language governing permissions and
//#include "deconv_api.h" limitations under the License. */
//using namespace std;
//using namespace paddle_mobile::fpga; #include "fpga/V1/deconv_filter.h"
//using namespace baidu::fpga::deconv::api; #include <memory.h>
//namespace api = baidu::fpga::deconv::api; #include <algorithm>
// #include "deconv_filter.h"
namespace paddle_mobile { #include "fpga/V1/filter.h"
namespace fpga { // #include "filter.h"
namespace deconv_filter { #include "fpga/V1/api.h"
// #include "fpga_api.h"
/*
inverse kernel weights of each channel for every filter // just for test
*/ //#include <string>
void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height){ //#include "deconv.h"
float *tmp = *data_in; //#include "deconv_api.h"
// float fix_range = 127;// float scale = fix_range / max; // using namespace std;
int data_size = num * channel * width * height; // using namespace paddle_mobile::fpga;
int hw_len = height * width; // using namespace baidu::fpga::deconv::api;
float *tmp_data = (float *)fpga_malloc(data_size * sizeof(float)); // namespace api = baidu::fpga::deconv::api;
for (int i = 0; i < num; ++i) {
for(int j = 0; j < channel; ++j){ namespace paddle_mobile {
for (int k = 0; k < hw_len; ++k) namespace fpga {
{ namespace deconv_filter {
tmp_data[i*channel*hw_len + j*hw_len + k] = (*data_in)[i*channel*hw_len + j*hw_len + hw_len - k-1];
} /*
} inverse kernel weights of each channel for every filter
*/
} void deconv_inverse_filter(float** data_in, int num, int channel, int width,
*data_in = (float *)tmp_data; // int height) {
fpga_free(tmp); float* tmp = *data_in;
} // float fix_range = 127;// float scale = fix_range / max;
int data_size = num * channel * width * height;
/* int hw_len = height * width;
calculate sub padding number float* tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
*/ for (int i = 0; i < num; ++i) {
int deconv_calc_sub_pad(int filter_axis, int pad, int stride){ for (int j = 0; j < channel; ++j) {
if(stride == 0 || ((filter_axis -pad-1)< 0)){ for (int k = 0; k < hw_len; ++k) {
//error tmp_data[i * channel * hw_len + j * hw_len + k] =
return 0; (*data_in)[i * channel * hw_len + j * hw_len + hw_len - k - 1];
} }
return (filter_axis - pad -1)/stride; }
} }
int deconv_get_sub_filter_axis(int filter_axis, int stride){ *data_in = (float*)tmp_data; //
fpga_free(tmp);
return (filter_axis/stride); }
}
/*
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis){ calculate sub padding number
return ((image_axis + 2*sub_pad -sub_filter_axis ) + 1); */
} int deconv_calc_sub_pad(int filter_axis, int pad, int stride) {
if (stride == 0 || ((filter_axis - pad - 1) < 0)) {
/* // error
(filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image position. return 0;
so the omit rows or columns is (stride - ) }
*/ return (filter_axis - pad - 1) / stride;
int deconv_get_omit(int stride, int filter_width, int pad){ }
if( ((filter_width-pad) <= 0) ){// ((filter_width-pad) > stride) || int deconv_get_sub_filter_axis(int filter_axis, int stride) {
//error return (filter_axis / stride);
return 0; }
}
int idx = 1; int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis) {
bool flag = false; return ((image_axis + 2 * sub_pad - sub_filter_axis) + 1);
for(idx = 1; idx <= stride; ++idx){ }
int j = idx;
for(;j <= filter_width;){ /*
if(j == filter_width - pad){ (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image
flag = true; position. so the omit rows or columns is (stride - )
break; */
} int deconv_get_omit(int stride, int filter_width, int pad) {
j = j + stride; if (((filter_width - pad) <= 0)) { // ((filter_width-pad) > stride) ||
} // error
if (flag) return 0;
{ }
break; int idx = 1;
} bool flag = false;
for (idx = 1; idx <= stride; ++idx) {
} int j = idx;
for (; j <= filter_width;) {
return (stride - idx); if (j == filter_width - pad) {
} flag = true;
break;
int deconv_get_sub_filter_num(int filter_num, int stride){ }
return filter_num * stride; j = j + stride;
} }
if (flag) {
void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ){ break;
}
char* ptr_tmp = *data_in; }
int sub_num = kernel_num * sub_conv_n;
int sub_h = height /sub_conv_n; return (stride - idx);
int sub_w = width / sub_conv_n; }
int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; int deconv_get_sub_filter_num(int filter_num, int stride) {
return filter_num * stride;
char *ptr_sub_filter = (char *)fpga_malloc(sub_filter_size * sizeof(char)); }
for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn =0; nn < sub_num; ++nn) { void deconv_get_sub_filter(char** data_in, int height, int width,
int ni = nn % kernel_num; int sub_conv_n, int kernel_num, int channel) {
char* ptr_tmp = *data_in;
int woff = sub_conv_n - 1 - (nn / kernel_num);// int sub_num = kernel_num * sub_conv_n;
int sub_h = height / sub_conv_n;
for (int hh =0; hh < sub_h; ++hh) { int sub_w = width / sub_conv_n;
int hi = hh * sub_conv_n + idx % sub_conv_n;
for (int ww =0; ww < sub_w; ++ww) { int sub_filter_size =
int wi = ww * sub_conv_n + woff;//1 0 kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;// char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
int kidx = ((ni * height + hi) * width + wi) * channel;// for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn = 0; nn < sub_num; ++nn) {
fpga_copy(ptr_sub_filter+idx*sub_h*sub_w*channel*sub_num + sidx, (*data_in)+kidx, channel*sizeof(char)); int ni = nn % kernel_num;
// for (int cc =0; cc < channel; ++cc) {
// ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = (*data_in)[kidx + cc]; int woff = sub_conv_n - 1 - (nn / kernel_num); //
// }
} for (int hh = 0; hh < sub_h; ++hh) {
} int hi = hh * sub_conv_n + idx % sub_conv_n;
} for (int ww = 0; ww < sub_w; ++ww) {
} int wi = ww * sub_conv_n + woff; // 1 0
*data_in = ptr_sub_filter;
fpga_free(ptr_tmp); int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel; //
} int kidx = ((ni * height + hi) * width + wi) * channel; //
void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw){ fpga_copy(
float* tmp = *filter_in; ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(hw * kernel_num * channels * sizeof(float))); (*data_in) + kidx, channel * sizeof(char));
// for (int cc =0; cc < channel; ++cc) {
for(int c = 0; c < channels; ++c) // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
{ // (*data_in)[kidx + cc];
for (int n = 0; n < kernel_num ; ++n) // }
{ }
paddle_mobile::fpga::fpga_copy(ptr_filter + n*hw + kernel_num * hw * c, tmp + n * channels * hw + c * hw , hw * sizeof(float)); }
} }
} }
*filter_in = ptr_filter; *data_in = ptr_sub_filter;
paddle_mobile::fpga::fpga_free(tmp); fpga_free(ptr_tmp);
} }
void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
void deconv_format_filter(float** data_in, int num, int channel, int height, int hw) {
int width, int group_num, float max,int stride){ float* tmp = *filter_in;
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
hw * kernel_num * channels * sizeof(float)));
for (int c = 0; c < channels; ++c) {
int data_size = channel * height * width * num; for (int n = 0; n < kernel_num; ++n) {
paddle_mobile::fpga::fpga_copy(ptr_filter + n * hw + kernel_num * hw * c,
/*{ tmp + n * channels * hw + c * hw,
float result2 = (float)0; hw * sizeof(float));
string filename = "origin_filter_data"; }
api::savefile<float>(filename, (void *)*data_in, data_size, result2); }
}*/ *filter_in = ptr_filter;
paddle_mobile::fpga::fpga_free(tmp);
deconv_inverse_filter(data_in, num, channel, width, height); }
/* { void deconv_format_filter(float** data_in, int num, int channel, int height,
float result2 = (float)0; int width, int group_num, float max, int stride) {
string filename = "inverse_filter_data"; int data_size = channel * height * width * num;
api::savefile<float>(filename, (void *)*data_in, data_size, result2);
}*/ /*{
float result2 = (float)0;
filter::quantize(data_in, data_size, max); string filename = "origin_filter_data";
/* { api::savefile<float>(filename, (void *)*data_in, data_size, result2);
char result2 = (char)0; }*/
string filename = "quantize_filter_data";
api::savefile<char>(filename, (void *)*data_in, data_size, result2); deconv_inverse_filter(data_in, num, channel, width, height);
}*/
char **quantize_data = (char **)data_in; // NOLINT /* {
float result2 = (float)0;
filter::convert_to_hwc(quantize_data, num, channel, height, width); string filename = "inverse_filter_data";
/*{ api::savefile<float>(filename, (void *)*data_in, data_size, result2);
char result2 = (char)0; }*/
string filename = "convert_to_hwc_filter_data";
api::savefile<char>(filename, (void *)*quantize_data, data_size, result2); filter::quantize(data_in, data_size, max);
}*/ /* {
char result2 = (char)0;
deconv_get_sub_filter(quantize_data, height, width, stride, num, channel ); string filename = "quantize_filter_data";
/*{ api::savefile<char>(filename, (void *)*data_in, data_size, result2);
char result2 = (char)0; }*/
string filename = "sub_filter_filter_data"; char** quantize_data = (char**)data_in; // NOLINT
api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
}*/ filter::convert_to_hwc(quantize_data, num, channel, height, width);
/*{
int sub_conv_n = stride; char result2 = (char)0;
int sub_h = height/sub_conv_n; string filename = "convert_to_hwc_filter_data";
int sub_w = width / sub_conv_n; api::savefile<char>(filename, (void *)*quantize_data, data_size,
int sub_chw = sub_h * sub_w * channel; result2);
int sub_num = sub_conv_n * num; }*/
int division_capacity = filter::calc_division_capacity(sub_chw);
int num_per_div_before_alignment = deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
filter::calc_num_per_div(sub_num, group_num, division_capacity); /*{
int num_per_div_after_alignment = char result2 = (char)0;
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT); string filename = "sub_filter_filter_data";
int div_num = api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
(sub_num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; }*/
int residual = (sub_num) % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * int sub_conv_n = stride;
((residual == 0) ? div_num : (div_num - 1)) + int sub_h = height / sub_conv_n;
align_to_x(residual, FILTER_NUM_ALIGNMENT); int sub_w = width / sub_conv_n;
int sub_chw = sub_h * sub_w * channel;
char**ptr_ptr_data = (char**)fpga_malloc(sub_conv_n*sizeof(char*)); int sub_num = sub_conv_n * num;
int origin_offset = sub_chw * sub_num; int division_capacity = filter::calc_division_capacity(sub_chw);
for (int i = 0; i < sub_conv_n; ++i){ int num_per_div_before_alignment =
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset*sizeof(char)); filter::calc_num_per_div(sub_num, group_num, division_capacity);
fpga_copy((ptr_ptr_data)[i], (*quantize_data)+origin_offset*i, origin_offset*sizeof(char)); int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
/* char result2 = (char)0; int div_num = (sub_num + num_per_div_before_alignment - 1) /
string filename = "ptr_ptr_data" + to_string(i); num_per_div_before_alignment;
api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset, result2); int residual = (sub_num) % num_per_div_before_alignment;
*/ int num_after_alignment = num_per_div_after_alignment *
} ((residual == 0) ? div_num : (div_num - 1)) +
// char result2 = (char)0; align_to_x(residual, FILTER_NUM_ALIGNMENT);
// string filename = "interleave";
// api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset, result2); char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
fpga_free(*quantize_data); int origin_offset = sub_chw * sub_num;
for (int i = 0; i < sub_conv_n; ++i) {
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) *num_after_alignment; fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset*sizeof(char));//continuous space origin_offset * sizeof(char));
for (int i = 0; i < sub_conv_n; ++i)
{ /* char result2 = (char)0;
int offset = i * origin_offset; string filename = "ptr_ptr_data" + to_string(i);
char* ptr_tmp = (ptr_ptr_data)[i]; api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset,
result2);
filter::align_element(&ptr_tmp, sub_num, sub_chw); */
filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw); }
// char result2 = (char)0;
filter::reorder(&ptr_tmp, num_after_alignment, sub_chw); // string filename = "interleave";
filter::interleave(&ptr_tmp, num_after_alignment, sub_chw); // api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset,
// result2);
/* char result2 = (char)0; fpga_free(*quantize_data);
string filename = "interleave" + to_string(i);
api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2); int align_offset =
*/ align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
fpga_copy(ptr_space + i*align_offset,ptr_tmp,align_offset); char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
fpga_free(ptr_tmp); sizeof(char)); // continuous space
} for (int i = 0; i < sub_conv_n; ++i) {
*data_in = (float*)ptr_space; int offset = i * origin_offset;
char* ptr_tmp = (ptr_ptr_data)[i];
/* {
char result2 = (char)0; filter::align_element(&ptr_tmp, sub_num, sub_chw);
string filename = "ptr_space"; filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw);
api::savefile<char>(filename, (void *)ptr_space, sub_conv_n * align_offset, result2);
}*/ filter::reorder(&ptr_tmp, num_after_alignment, sub_chw);
fpga_flush(ptr_space, sub_conv_n * align_offset*sizeof(char)); filter::interleave(&ptr_tmp, num_after_alignment, sub_chw);
}
/* char result2 = (char)0;
} // namespace deconv_filter string filename = "interleave" + to_string(i);
} // namespace fpga api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2);
} // namespace paddle_mobile */
fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
fpga_free(ptr_tmp);
}
*data_in = (float*)ptr_space;
/* {
char result2 = (char)0;
string filename = "ptr_space";
api::savefile<char>(filename, (void *)ptr_space, sub_conv_n *
align_offset, result2);
}*/
fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
}
} // namespace deconv_filter
} // namespace fpga
} // namespace paddle_mobile
#pragma once /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
namespace paddle_mobile { You may obtain a copy of the License at
namespace fpga {
namespace deconv_filter { http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height); distributed under the License is distributed on an "AS IS" BASIS,
int deconv_calc_sub_pad(int filter_axis, int pad, int stride); WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
int deconv_get_sub_filter_num(int filter_num, int stride); See the License for the specific language governing permissions and
int deconv_get_sub_filter_axis(int filter_axis, int stride); limitations under the License. */
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad); #pragma once
void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel );
void deconv_format_filter(float** data_in, int num, int channel, int height, namespace paddle_mobile {
int width, int group_num, float max,int stride); namespace fpga {
void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw); namespace deconv_filter {
} // namespace deconv_filter void deconv_inverse_filter(float** data_in, int num, int channel, int width,
} // namespace fpga int height);
} // namespace paddle_mobile int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
\ No newline at end of file int deconv_get_sub_filter_num(int filter_num, int stride);
int deconv_get_sub_filter_axis(int filter_axis, int stride);
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad);
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel);
void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max, int stride);
void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
} // namespace deconv_filter
} // namespace fpga
} // namespace paddle_mobile
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册