From e48e7398b24214e1f5f5d5a4f25d5486dcece979 Mon Sep 17 00:00:00 2001 From: zhangyang Date: Wed, 5 Dec 2018 10:11:55 +0800 Subject: [PATCH] change format for FPGA track --- src/fpga/V1/deconv_bias_scale.cpp | 22 +- src/fpga/V1/deconv_bias_scale.h | 5 +- src/fpga/V1/deconv_filter.cpp | 537 +++++++++++++++--------------- src/fpga/V1/deconv_filter.h | 59 ++-- 4 files changed, 328 insertions(+), 295 deletions(-) diff --git a/src/fpga/V1/deconv_bias_scale.cpp b/src/fpga/V1/deconv_bias_scale.cpp index 1a962358cd..0bcc91ddd2 100644 --- a/src/fpga/V1/deconv_bias_scale.cpp +++ b/src/fpga/V1/deconv_bias_scale.cpp @@ -24,23 +24,25 @@ namespace paddle_mobile { namespace fpga { namespace deconv_bias_scale { -void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n){ +void deconv_bias_scale_expand(float** bias_scale_array, int num, + int sub_conv_n) { int sub_num = num * sub_conv_n; float* ptr_tmp = *bias_scale_array; - float*ptr_bias_scale_expand = (float*)fpga_malloc(sizeof(float) * sub_num * 2); - int scale_base_offset = sub_num; - for (int i = 0; i < sub_conv_n; ++i) - { + float* ptr_bias_scale_expand = + (float*)fpga_malloc(sizeof(float) * sub_num * 2); + int scale_base_offset = sub_num; + for (int i = 0; i < sub_conv_n; ++i) { int offset = num * i; - //copy bias - fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp,num * sizeof(float)); - //copy scale - fpga_copy(ptr_bias_scale_expand + scale_base_offset+ offset, ptr_tmp + num,num * sizeof(float)); + // copy bias + fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp, num * sizeof(float)); + // copy scale + fpga_copy(ptr_bias_scale_expand + scale_base_offset + offset, ptr_tmp + num, + num * sizeof(float)); } *bias_scale_array = ptr_bias_scale_expand; fpga_free(ptr_tmp); } -} // namespace bias_scale +} // namespace deconv_bias_scale } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/deconv_bias_scale.h b/src/fpga/V1/deconv_bias_scale.h index bccd922d09..7b9aaff756 100644 --- a/src/fpga/V1/deconv_bias_scale.h +++ b/src/fpga/V1/deconv_bias_scale.h @@ -20,8 +20,9 @@ namespace paddle_mobile { namespace fpga { namespace deconv_bias_scale { -void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n); +void deconv_bias_scale_expand(float** bias_scale_array, int num, + int sub_conv_n); -} // namespace bias_scale +} // namespace deconv_bias_scale } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/deconv_filter.cpp b/src/fpga/V1/deconv_filter.cpp index 2db9e42cb1..96634e520f 100644 --- a/src/fpga/V1/deconv_filter.cpp +++ b/src/fpga/V1/deconv_filter.cpp @@ -1,260 +1,277 @@ -#include -#include -#include "fpga/V1/deconv_filter.h" -// #include "deconv_filter.h" -#include "fpga/V1/filter.h" -// #include "filter.h" -#include "fpga/V1/api.h" -// #include "fpga_api.h" - -//just for test -//#include -//#include "deconv.h" -//#include "deconv_api.h" -//using namespace std; -//using namespace paddle_mobile::fpga; -//using namespace baidu::fpga::deconv::api; -//namespace api = baidu::fpga::deconv::api; - -namespace paddle_mobile { -namespace fpga { -namespace deconv_filter { - -/* -inverse kernel weights of each channel for every filter -*/ -void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height){ - float *tmp = *data_in; - // float fix_range = 127;// float scale = fix_range / max; - int data_size = num * channel * width * height; - int hw_len = height * width; - float *tmp_data = (float *)fpga_malloc(data_size * sizeof(float)); - for (int i = 0; i < num; ++i) { - for(int j = 0; j < channel; ++j){ - for (int k = 0; k < hw_len; ++k) - { - tmp_data[i*channel*hw_len + j*hw_len + k] = (*data_in)[i*channel*hw_len + j*hw_len + hw_len - k-1]; - } - } - - } - *data_in = (float *)tmp_data; // - fpga_free(tmp); -} - -/* - calculate sub padding number -*/ - int deconv_calc_sub_pad(int filter_axis, int pad, int stride){ - if(stride == 0 || ((filter_axis -pad-1)< 0)){ - //error - return 0; - } - return (filter_axis - pad -1)/stride; -} -int deconv_get_sub_filter_axis(int filter_axis, int stride){ - - return (filter_axis/stride); -} - -int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis){ - return ((image_axis + 2*sub_pad -sub_filter_axis ) + 1); -} - -/* - (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image position. - so the omit rows or columns is (stride - ) -*/ -int deconv_get_omit(int stride, int filter_width, int pad){ - if( ((filter_width-pad) <= 0) ){// ((filter_width-pad) > stride) || - //error - return 0; - } - int idx = 1; - bool flag = false; - for(idx = 1; idx <= stride; ++idx){ - int j = idx; - for(;j <= filter_width;){ - if(j == filter_width - pad){ - flag = true; - break; - } - j = j + stride; - } - if (flag) - { - break; - } - - } - - return (stride - idx); -} - -int deconv_get_sub_filter_num(int filter_num, int stride){ - return filter_num * stride; -} - -void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ){ - - char* ptr_tmp = *data_in; - int sub_num = kernel_num * sub_conv_n; - int sub_h = height /sub_conv_n; - int sub_w = width / sub_conv_n; - - int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; - - char *ptr_sub_filter = (char *)fpga_malloc(sub_filter_size * sizeof(char)); - for (int idx = 0; idx < sub_conv_n; ++idx) { - for (int nn =0; nn < sub_num; ++nn) { - int ni = nn % kernel_num; - - int woff = sub_conv_n - 1 - (nn / kernel_num);// - - for (int hh =0; hh < sub_h; ++hh) { - int hi = hh * sub_conv_n + idx % sub_conv_n; - for (int ww =0; ww < sub_w; ++ww) { - int wi = ww * sub_conv_n + woff;//1 0 - - int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;// - int kidx = ((ni * height + hi) * width + wi) * channel;// - - fpga_copy(ptr_sub_filter+idx*sub_h*sub_w*channel*sub_num + sidx, (*data_in)+kidx, channel*sizeof(char)); - // for (int cc =0; cc < channel; ++cc) { - // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = (*data_in)[kidx + cc]; - // } - } - } - } - } - *data_in = ptr_sub_filter; - fpga_free(ptr_tmp); -} - -void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw){ - float* tmp = *filter_in; - float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(hw * kernel_num * channels * sizeof(float))); - - for(int c = 0; c < channels; ++c) - { - for (int n = 0; n < kernel_num ; ++n) - { - paddle_mobile::fpga::fpga_copy(ptr_filter + n*hw + kernel_num * hw * c, tmp + n * channels * hw + c * hw , hw * sizeof(float)); - } - } - *filter_in = ptr_filter; - paddle_mobile::fpga::fpga_free(tmp); -} - - -void deconv_format_filter(float** data_in, int num, int channel, int height, - int width, int group_num, float max,int stride){ - - - - - int data_size = channel * height * width * num; - - /*{ - float result2 = (float)0; - string filename = "origin_filter_data"; - api::savefile(filename, (void *)*data_in, data_size, result2); - }*/ - - deconv_inverse_filter(data_in, num, channel, width, height); - - /* { - float result2 = (float)0; - string filename = "inverse_filter_data"; - api::savefile(filename, (void *)*data_in, data_size, result2); - }*/ - - filter::quantize(data_in, data_size, max); - /* { - char result2 = (char)0; - string filename = "quantize_filter_data"; - api::savefile(filename, (void *)*data_in, data_size, result2); - }*/ - char **quantize_data = (char **)data_in; // NOLINT - - filter::convert_to_hwc(quantize_data, num, channel, height, width); - /*{ - char result2 = (char)0; - string filename = "convert_to_hwc_filter_data"; - api::savefile(filename, (void *)*quantize_data, data_size, result2); - }*/ - - deconv_get_sub_filter(quantize_data, height, width, stride, num, channel ); - /*{ - char result2 = (char)0; - string filename = "sub_filter_filter_data"; - api::savefile(filename, (void *)*quantize_data, data_size, result2); - }*/ - - int sub_conv_n = stride; - int sub_h = height/sub_conv_n; - int sub_w = width / sub_conv_n; - int sub_chw = sub_h * sub_w * channel; - int sub_num = sub_conv_n * num; - int division_capacity = filter::calc_division_capacity(sub_chw); - int num_per_div_before_alignment = - filter::calc_num_per_div(sub_num, group_num, division_capacity); - int num_per_div_after_alignment = - align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT); - int div_num = - (sub_num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; - int residual = (sub_num) % num_per_div_before_alignment; - int num_after_alignment = num_per_div_after_alignment * - ((residual == 0) ? div_num : (div_num - 1)) + - align_to_x(residual, FILTER_NUM_ALIGNMENT); - - char**ptr_ptr_data = (char**)fpga_malloc(sub_conv_n*sizeof(char*)); - int origin_offset = sub_chw * sub_num; - for (int i = 0; i < sub_conv_n; ++i){ - (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset*sizeof(char)); - fpga_copy((ptr_ptr_data)[i], (*quantize_data)+origin_offset*i, origin_offset*sizeof(char)); - - /* char result2 = (char)0; - string filename = "ptr_ptr_data" + to_string(i); - api::savefile(filename, (void *)(ptr_ptr_data[i]), origin_offset, result2); - */ - } - // char result2 = (char)0; - // string filename = "interleave"; - // api::savefile(filename, (void *)*ptr_ptr_data, origin_offset, result2); - fpga_free(*quantize_data); - - - int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) *num_after_alignment; - char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset*sizeof(char));//continuous space - for (int i = 0; i < sub_conv_n; ++i) - { - int offset = i * origin_offset; - char* ptr_tmp = (ptr_ptr_data)[i]; - - filter::align_element(&ptr_tmp, sub_num, sub_chw); - filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw); - - filter::reorder(&ptr_tmp, num_after_alignment, sub_chw); - filter::interleave(&ptr_tmp, num_after_alignment, sub_chw); - - /* char result2 = (char)0; - string filename = "interleave" + to_string(i); - api::savefile(filename, (void *)ptr_tmp, align_offset, result2); -*/ - fpga_copy(ptr_space + i*align_offset,ptr_tmp,align_offset); - fpga_free(ptr_tmp); - } - *data_in = (float*)ptr_space; - - /* { - char result2 = (char)0; - string filename = "ptr_space"; - api::savefile(filename, (void *)ptr_space, sub_conv_n * align_offset, result2); - }*/ - fpga_flush(ptr_space, sub_conv_n * align_offset*sizeof(char)); -} - -} // namespace deconv_filter -} // namespace fpga -} // namespace paddle_mobile +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "fpga/V1/deconv_filter.h" +#include +#include +// #include "deconv_filter.h" +#include "fpga/V1/filter.h" +// #include "filter.h" +#include "fpga/V1/api.h" +// #include "fpga_api.h" + +// just for test +//#include +//#include "deconv.h" +//#include "deconv_api.h" +// using namespace std; +// using namespace paddle_mobile::fpga; +// using namespace baidu::fpga::deconv::api; +// namespace api = baidu::fpga::deconv::api; + +namespace paddle_mobile { +namespace fpga { +namespace deconv_filter { + +/* +inverse kernel weights of each channel for every filter +*/ +void deconv_inverse_filter(float** data_in, int num, int channel, int width, + int height) { + float* tmp = *data_in; + // float fix_range = 127;// float scale = fix_range / max; + int data_size = num * channel * width * height; + int hw_len = height * width; + float* tmp_data = (float*)fpga_malloc(data_size * sizeof(float)); + for (int i = 0; i < num; ++i) { + for (int j = 0; j < channel; ++j) { + for (int k = 0; k < hw_len; ++k) { + tmp_data[i * channel * hw_len + j * hw_len + k] = + (*data_in)[i * channel * hw_len + j * hw_len + hw_len - k - 1]; + } + } + } + *data_in = (float*)tmp_data; // + fpga_free(tmp); +} + +/* + calculate sub padding number +*/ +int deconv_calc_sub_pad(int filter_axis, int pad, int stride) { + if (stride == 0 || ((filter_axis - pad - 1) < 0)) { + // error + return 0; + } + return (filter_axis - pad - 1) / stride; +} +int deconv_get_sub_filter_axis(int filter_axis, int stride) { + return (filter_axis / stride); +} + +int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis) { + return ((image_axis + 2 * sub_pad - sub_filter_axis) + 1); +} + +/* + (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image + position. so the omit rows or columns is (stride - ) +*/ +int deconv_get_omit(int stride, int filter_width, int pad) { + if (((filter_width - pad) <= 0)) { // ((filter_width-pad) > stride) || + // error + return 0; + } + int idx = 1; + bool flag = false; + for (idx = 1; idx <= stride; ++idx) { + int j = idx; + for (; j <= filter_width;) { + if (j == filter_width - pad) { + flag = true; + break; + } + j = j + stride; + } + if (flag) { + break; + } + } + + return (stride - idx); +} + +int deconv_get_sub_filter_num(int filter_num, int stride) { + return filter_num * stride; +} + +void deconv_get_sub_filter(char** data_in, int height, int width, + int sub_conv_n, int kernel_num, int channel) { + char* ptr_tmp = *data_in; + int sub_num = kernel_num * sub_conv_n; + int sub_h = height / sub_conv_n; + int sub_w = width / sub_conv_n; + + int sub_filter_size = + kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; + + char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char)); + for (int idx = 0; idx < sub_conv_n; ++idx) { + for (int nn = 0; nn < sub_num; ++nn) { + int ni = nn % kernel_num; + + int woff = sub_conv_n - 1 - (nn / kernel_num); // + + for (int hh = 0; hh < sub_h; ++hh) { + int hi = hh * sub_conv_n + idx % sub_conv_n; + for (int ww = 0; ww < sub_w; ++ww) { + int wi = ww * sub_conv_n + woff; // 1 0 + + int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel; // + int kidx = ((ni * height + hi) * width + wi) * channel; // + + fpga_copy( + ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx, + (*data_in) + kidx, channel * sizeof(char)); + // for (int cc =0; cc < channel; ++cc) { + // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = + // (*data_in)[kidx + cc]; + // } + } + } + } + } + *data_in = ptr_sub_filter; + fpga_free(ptr_tmp); +} + +void deconv_NC_convert(float** filter_in, int kernel_num, int channels, + int hw) { + float* tmp = *filter_in; + float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc( + hw * kernel_num * channels * sizeof(float))); + + for (int c = 0; c < channels; ++c) { + for (int n = 0; n < kernel_num; ++n) { + paddle_mobile::fpga::fpga_copy(ptr_filter + n * hw + kernel_num * hw * c, + tmp + n * channels * hw + c * hw, + hw * sizeof(float)); + } + } + *filter_in = ptr_filter; + paddle_mobile::fpga::fpga_free(tmp); +} + +void deconv_format_filter(float** data_in, int num, int channel, int height, + int width, int group_num, float max, int stride) { + int data_size = channel * height * width * num; + + /*{ + float result2 = (float)0; + string filename = "origin_filter_data"; + api::savefile(filename, (void *)*data_in, data_size, result2); + }*/ + + deconv_inverse_filter(data_in, num, channel, width, height); + + /* { + float result2 = (float)0; + string filename = "inverse_filter_data"; + api::savefile(filename, (void *)*data_in, data_size, result2); + }*/ + + filter::quantize(data_in, data_size, max); + /* { + char result2 = (char)0; + string filename = "quantize_filter_data"; + api::savefile(filename, (void *)*data_in, data_size, result2); + }*/ + char** quantize_data = (char**)data_in; // NOLINT + + filter::convert_to_hwc(quantize_data, num, channel, height, width); + /*{ + char result2 = (char)0; + string filename = "convert_to_hwc_filter_data"; + api::savefile(filename, (void *)*quantize_data, data_size, + result2); + }*/ + + deconv_get_sub_filter(quantize_data, height, width, stride, num, channel); + /*{ + char result2 = (char)0; + string filename = "sub_filter_filter_data"; + api::savefile(filename, (void *)*quantize_data, data_size, result2); +}*/ + + int sub_conv_n = stride; + int sub_h = height / sub_conv_n; + int sub_w = width / sub_conv_n; + int sub_chw = sub_h * sub_w * channel; + int sub_num = sub_conv_n * num; + int division_capacity = filter::calc_division_capacity(sub_chw); + int num_per_div_before_alignment = + filter::calc_num_per_div(sub_num, group_num, division_capacity); + int num_per_div_after_alignment = + align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT); + int div_num = (sub_num + num_per_div_before_alignment - 1) / + num_per_div_before_alignment; + int residual = (sub_num) % num_per_div_before_alignment; + int num_after_alignment = num_per_div_after_alignment * + ((residual == 0) ? div_num : (div_num - 1)) + + align_to_x(residual, FILTER_NUM_ALIGNMENT); + + char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*)); + int origin_offset = sub_chw * sub_num; + for (int i = 0; i < sub_conv_n; ++i) { + (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char)); + fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i, + origin_offset * sizeof(char)); + + /* char result2 = (char)0; + string filename = "ptr_ptr_data" + to_string(i); + api::savefile(filename, (void *)(ptr_ptr_data[i]), origin_offset, + result2); + */ + } + // char result2 = (char)0; + // string filename = "interleave"; + // api::savefile(filename, (void *)*ptr_ptr_data, origin_offset, + // result2); + fpga_free(*quantize_data); + + int align_offset = + align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment; + char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset * + sizeof(char)); // continuous space + for (int i = 0; i < sub_conv_n; ++i) { + int offset = i * origin_offset; + char* ptr_tmp = (ptr_ptr_data)[i]; + + filter::align_element(&ptr_tmp, sub_num, sub_chw); + filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw); + + filter::reorder(&ptr_tmp, num_after_alignment, sub_chw); + filter::interleave(&ptr_tmp, num_after_alignment, sub_chw); + + /* char result2 = (char)0; + string filename = "interleave" + to_string(i); + api::savefile(filename, (void *)ptr_tmp, align_offset, result2); +*/ + fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset); + fpga_free(ptr_tmp); + } + *data_in = (float*)ptr_space; + + /* { + char result2 = (char)0; + string filename = "ptr_space"; + api::savefile(filename, (void *)ptr_space, sub_conv_n * + align_offset, result2); + }*/ + fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char)); +} + +} // namespace deconv_filter +} // namespace fpga +} // namespace paddle_mobile diff --git a/src/fpga/V1/deconv_filter.h b/src/fpga/V1/deconv_filter.h index 802b2c7822..e89ebe5087 100644 --- a/src/fpga/V1/deconv_filter.h +++ b/src/fpga/V1/deconv_filter.h @@ -1,23 +1,36 @@ -#pragma once - - - -namespace paddle_mobile { -namespace fpga { -namespace deconv_filter { - - -void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height); -int deconv_calc_sub_pad(int filter_axis, int pad, int stride); -int deconv_get_sub_filter_num(int filter_num, int stride); -int deconv_get_sub_filter_axis(int filter_axis, int stride); -int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis); -int deconv_get_omit(int stride, int filter_width, int pad); -void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ); -void deconv_format_filter(float** data_in, int num, int channel, int height, - int width, int group_num, float max,int stride); -void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw); - -} // namespace deconv_filter -} // namespace fpga -} // namespace paddle_mobile \ No newline at end of file +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace paddle_mobile { +namespace fpga { +namespace deconv_filter { + +void deconv_inverse_filter(float** data_in, int num, int channel, int width, + int height); +int deconv_calc_sub_pad(int filter_axis, int pad, int stride); +int deconv_get_sub_filter_num(int filter_num, int stride); +int deconv_get_sub_filter_axis(int filter_axis, int stride); +int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis); +int deconv_get_omit(int stride, int filter_width, int pad); +void deconv_get_sub_filter(char** data_in, int height, int width, + int sub_conv_n, int kernel_num, int channel); +void deconv_format_filter(float** data_in, int num, int channel, int height, + int width, int group_num, float max, int stride); +void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw); + +} // namespace deconv_filter +} // namespace fpga +} // namespace paddle_mobile -- GitLab