change format for FPGA track

e48e7398 · zhangyang · 70a079c5 · e48e7398 · e48e7398 · e48e7398
4 changed file
--- a/src/fpga/V1/deconv_bias_scale.cpp
+++ b/src/fpga/V1/deconv_bias_scale.cpp
@@ -24,23 +24,25 @@ namespace paddle_mobile {
 namespace fpga {
 namespace deconv_bias_scale {
-void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n){
+void deconv_bias_scale_expand(float** bias_scale_array, int num,
+                              int sub_conv_n) {
  int sub_num = num * sub_conv_n;
  float* ptr_tmp = *bias_scale_array;
-  float*ptr_bias_scale_expand = (float*)fpga_malloc(sizeof(float) * sub_num * 2);
+  float* ptr_bias_scale_expand =
+      (float*)fpga_malloc(sizeof(float) * sub_num * 2);
  int scale_base_offset = sub_num;
-  for (int i = 0; i < sub_conv_n; ++i)
+  for (int i = 0; i < sub_conv_n; ++i) {
-  {
    int offset = num * i;
-     //copy bias
+    // copy bias
-     fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp,num * sizeof(float));
+    fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp, num * sizeof(float));
-       //copy scale
+    // copy scale
-     fpga_copy(ptr_bias_scale_expand + scale_base_offset+ offset, ptr_tmp + num,num * sizeof(float));
+    fpga_copy(ptr_bias_scale_expand + scale_base_offset + offset, ptr_tmp + num,
+              num * sizeof(float));
  }
  *bias_scale_array = ptr_bias_scale_expand;
  fpga_free(ptr_tmp);
 }
-}  // namespace bias_scale
+}  // namespace deconv_bias_scale
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/V1/deconv_bias_scale.h
+++ b/src/fpga/V1/deconv_bias_scale.h
@@ -20,8 +20,9 @@ namespace paddle_mobile {
 namespace fpga {
 namespace deconv_bias_scale {
-void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n);
+void deconv_bias_scale_expand(float** bias_scale_array, int num,
+                              int sub_conv_n);
-}  // namespace bias_scale
+}  // namespace deconv_bias_scale
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/V1/deconv_filter.cpp
+++ b/src/fpga/V1/deconv_filter.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "fpga/V1/deconv_filter.h"
 #include <memory.h>
 #include <algorithm>
-#include "fpga/V1/deconv_filter.h"
 // #include "deconv_filter.h"
 #include "fpga/V1/filter.h"
 // #include "filter.h"
 #include "fpga/V1/api.h"
 // #include "fpga_api.h"
-//just for test
+// just for test
 //#include <string>
 //#include "deconv.h"
 //#include "deconv_api.h"
-//using namespace std;
+// using namespace std;
-//using namespace paddle_mobile::fpga;
+// using namespace paddle_mobile::fpga;
-//using namespace baidu::fpga::deconv::api;
+// using namespace baidu::fpga::deconv::api;
-//namespace api = baidu::fpga::deconv::api;
+// namespace api = baidu::fpga::deconv::api;
 namespace paddle_mobile {
 namespace fpga {
@@ -23,105 +37,106 @@ namespace deconv_filter {
 /*
 inverse kernel weights of each channel for every filter
 */
-void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height){
+void deconv_inverse_filter(float** data_in, int num, int channel, int width,
-  float *tmp = *data_in;
+                           int height) {
+  float* tmp = *data_in;
  // float fix_range = 127;//  float scale = fix_range / max;
  int data_size = num * channel * width * height;
  int hw_len = height * width;
-  float *tmp_data = (float *)fpga_malloc(data_size * sizeof(float));
+  float* tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
  for (int i = 0; i < num; ++i) {
-  	for(int j = 0; j < channel; ++j){
+    for (int j = 0; j < channel; ++j) {
-  		for (int k = 0; k < hw_len; ++k)
+      for (int k = 0; k < hw_len; ++k) {
-  		{
+        tmp_data[i * channel * hw_len + j * hw_len + k] =
-  			tmp_data[i*channel*hw_len + j*hw_len + k] = (*data_in)[i*channel*hw_len + j*hw_len + hw_len - k-1];
+            (*data_in)[i * channel * hw_len + j * hw_len + hw_len - k - 1];
      }
    }
  }
-  *data_in = (float *)tmp_data;  //
+  *data_in = (float*)tmp_data;  //
  fpga_free(tmp);
 }
 /*
    calculate sub padding number
 */
- int deconv_calc_sub_pad(int filter_axis, int pad, int stride){
+int deconv_calc_sub_pad(int filter_axis, int pad, int stride) {
-	if(stride == 0 || ((filter_axis -pad-1)< 0)){
+  if (stride == 0 || ((filter_axis - pad - 1) < 0)) {
-		//error
+    // error
    return 0;
  }
-	return (filter_axis - pad -1)/stride;
+  return (filter_axis - pad - 1) / stride;
 }
-int deconv_get_sub_filter_axis(int filter_axis, int stride){
+int deconv_get_sub_filter_axis(int filter_axis, int stride) {
+  return (filter_axis / stride);
-	return (filter_axis/stride);
 }
-int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis){
+int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis) {
-	return ((image_axis + 2*sub_pad -sub_filter_axis ) + 1);
+  return ((image_axis + 2 * sub_pad - sub_filter_axis) + 1);
 }
 /*
-	(filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image position.
+    (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image
-	so the omit rows or columns is (stride - )
+   position. so the omit rows or columns is (stride - )
 */
-int deconv_get_omit(int stride, int filter_width, int pad){
+int deconv_get_omit(int stride, int filter_width, int pad) {
-	if( ((filter_width-pad) <= 0) ){// ((filter_width-pad) > stride) ||
+  if (((filter_width - pad) <= 0)) {  // ((filter_width-pad) > stride) ||
-		//error
+    // error
    return 0;
  }
  int idx = 1;
  bool flag = false;
-	for(idx = 1; idx <= stride; ++idx){
+  for (idx = 1; idx <= stride; ++idx) {
    int j = idx;
-		for(;j <= filter_width;){
+    for (; j <= filter_width;) {
-			if(j == filter_width - pad){
+      if (j == filter_width - pad) {
        flag = true;
        break;
      }
      j = j + stride;
    }
-		if (flag)
+    if (flag) {
-		{
      break;
    }
  }
  return (stride - idx);
 }
-int deconv_get_sub_filter_num(int filter_num, int stride){
+int deconv_get_sub_filter_num(int filter_num, int stride) {
  return filter_num * stride;
 }
-void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ){
+void deconv_get_sub_filter(char** data_in, int height, int width,
+                           int sub_conv_n, int kernel_num, int channel) {
  char* ptr_tmp = *data_in;
  int sub_num = kernel_num * sub_conv_n;
-	int sub_h = height /sub_conv_n;
+  int sub_h = height / sub_conv_n;
  int sub_w = width / sub_conv_n;
-	int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
+  int sub_filter_size =
+      kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
-	char *ptr_sub_filter = (char *)fpga_malloc(sub_filter_size * sizeof(char));
+  char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
  for (int idx = 0; idx < sub_conv_n; ++idx) {
-        for (int nn =0; nn < sub_num; ++nn) {
+    for (int nn = 0; nn < sub_num; ++nn) {
      int ni = nn % kernel_num;
-            int woff = sub_conv_n - 1 - (nn / kernel_num);//
+      int woff = sub_conv_n - 1 - (nn / kernel_num);  //
-            for (int hh =0; hh < sub_h; ++hh) {
+      for (int hh = 0; hh < sub_h; ++hh) {
        int hi = hh * sub_conv_n + idx % sub_conv_n;
-                for (int ww =0; ww < sub_w; ++ww) {
+        for (int ww = 0; ww < sub_w; ++ww) {
-                    int wi = ww * sub_conv_n + woff;//1 0 
+          int wi = ww * sub_conv_n + woff;  // 1 0
-                    int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;//
+          int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;   //
-                    int kidx = ((ni * height + hi) * width + wi) * channel;//
+          int kidx = ((ni * height + hi) * width + wi) * channel;  //
-                    fpga_copy(ptr_sub_filter+idx*sub_h*sub_w*channel*sub_num + sidx, (*data_in)+kidx, channel*sizeof(char));
+          fpga_copy(
+              ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
+              (*data_in) + kidx, channel * sizeof(char));
          // for (int cc =0; cc < channel; ++cc) {
-                    //     ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = (*data_in)[kidx + cc];
+          //     ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
+          //     (*data_in)[kidx + cc];
          // }
        }
      }
@@ -131,28 +146,25 @@ void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n
  fpga_free(ptr_tmp);
 }
-void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw){
+void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
+                       int hw) {
  float* tmp = *filter_in;
-	float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(hw * kernel_num * channels * sizeof(float)));
+  float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
+      hw * kernel_num * channels * sizeof(float)));
-	for(int c = 0; c < channels; ++c)
-	{
+  for (int c = 0; c < channels; ++c) {
-		for (int n = 0; n < kernel_num ; ++n)
+    for (int n = 0; n < kernel_num; ++n) {
-		{
+      paddle_mobile::fpga::fpga_copy(ptr_filter + n * hw + kernel_num * hw * c,
-			paddle_mobile::fpga::fpga_copy(ptr_filter + n*hw + kernel_num * hw * c, tmp + n * channels * hw + c * hw , hw * sizeof(float));
+                                     tmp + n * channels * hw + c * hw,
+                                     hw * sizeof(float));
    }
  }
  *filter_in = ptr_filter;
  paddle_mobile::fpga::fpga_free(tmp);
 }
 void deconv_format_filter(float** data_in, int num, int channel, int height,
-                      int width, int group_num, float max,int stride){
+                          int width, int group_num, float max, int stride) {
  int data_size = channel * height * width * num;
  /*{
@@ -175,24 +187,25 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
        string filename = "quantize_filter_data";
        api::savefile<char>(filename, (void *)*data_in, data_size, result2);
 }*/
-  char **quantize_data = (char **)data_in;  // NOLINT
+  char** quantize_data = (char**)data_in;  // NOLINT
  filter::convert_to_hwc(quantize_data, num, channel, height, width);
  /*{
       char result2 = (char)0;
       string filename = "convert_to_hwc_filter_data";
-		 api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
+       api::savefile<char>(filename, (void *)*quantize_data, data_size,
+  result2);
  }*/
-  deconv_get_sub_filter(quantize_data, height, width,  stride, num, channel );
+  deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
  /*{
     char result2 = (char)0;
     string filename = "sub_filter_filter_data";
     api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
-    }*/
+}*/
  int sub_conv_n = stride;
-  int  sub_h = height/sub_conv_n;
+  int sub_h = height / sub_conv_n;
  int sub_w = width / sub_conv_n;
  int sub_chw = sub_h * sub_w * channel;
  int sub_num = sub_conv_n * num;
@@ -201,34 +214,37 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
      filter::calc_num_per_div(sub_num, group_num, division_capacity);
  int num_per_div_after_alignment =
      align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
-  int div_num =
+  int div_num = (sub_num + num_per_div_before_alignment - 1) /
-      (sub_num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
+                num_per_div_before_alignment;
  int residual = (sub_num) % num_per_div_before_alignment;
  int num_after_alignment = num_per_div_after_alignment *
                                ((residual == 0) ? div_num : (div_num - 1)) +
                            align_to_x(residual, FILTER_NUM_ALIGNMENT);
-  char**ptr_ptr_data = (char**)fpga_malloc(sub_conv_n*sizeof(char*));
+  char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
  int origin_offset = sub_chw * sub_num;
-  for (int i = 0; i < sub_conv_n; ++i){
+  for (int i = 0; i < sub_conv_n; ++i) {
-      (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset*sizeof(char));
+    (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
-      fpga_copy((ptr_ptr_data)[i], (*quantize_data)+origin_offset*i, origin_offset*sizeof(char));
+    fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
+              origin_offset * sizeof(char));
    /* char result2 = (char)0;
     string filename = "ptr_ptr_data" + to_string(i);
-         api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset, result2);
+     api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset,
+     result2);
     */
  }
  // char result2 = (char)0;
  //      string filename = "interleave";
-   //      api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset, result2);
+  //      api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset,
+  //      result2);
  fpga_free(*quantize_data);
+  int align_offset =
-  int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) *num_after_alignment;
+      align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
-  char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset*sizeof(char));//continuous space
+  char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
-	for (int i = 0; i < sub_conv_n; ++i)
+                                       sizeof(char));  // continuous space
-	{
+  for (int i = 0; i < sub_conv_n; ++i) {
    int offset = i * origin_offset;
    char* ptr_tmp = (ptr_ptr_data)[i];
@@ -242,7 +258,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
       string filename = "interleave" + to_string(i);
       api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2);
 */
-    fpga_copy(ptr_space + i*align_offset,ptr_tmp,align_offset);
+    fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
    fpga_free(ptr_tmp);
  }
  *data_in = (float*)ptr_space;
@@ -250,9 +266,10 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
  /*    {
        char result2 = (char)0;
         string filename = "ptr_space";
-         api::savefile<char>(filename, (void *)ptr_space, sub_conv_n * align_offset, result2);
+         api::savefile<char>(filename, (void *)ptr_space, sub_conv_n *
+     align_offset, result2);
      }*/
- fpga_flush(ptr_space, sub_conv_n * align_offset*sizeof(char));
+  fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
 }
 }  // namespace deconv_filter

--- a/src/fpga/V1/deconv_filter.h
+++ b/src/fpga/V1/deconv_filter.h
-#pragma once
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
 namespace paddle_mobile {
 namespace fpga {
 namespace deconv_filter {
+void deconv_inverse_filter(float** data_in, int num, int channel, int width,
-void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height);
+                           int height);
 int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
 int deconv_get_sub_filter_num(int filter_num, int stride);
 int deconv_get_sub_filter_axis(int filter_axis, int stride);
 int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
 int deconv_get_omit(int stride, int filter_width, int pad);
-void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel );
+void deconv_get_sub_filter(char** data_in, int height, int width,
+                           int sub_conv_n, int kernel_num, int channel);
 void deconv_format_filter(float** data_in, int num, int channel, int height,
-                      int width, int group_num, float max,int stride);
+                          int width, int group_num, float max, int stride);
-void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw);
+void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
 }  // namespace deconv_filter
 }  // namespace fpga