change format for FPGA track

e48e7398 · zhangyang · 70a079c5 · e48e7398 · e48e7398 · e48e7398
4 changed file
--- a/src/fpga/V1/deconv_bias_scale.cpp
+++ b/src/fpga/V1/deconv_bias_scale.cpp
@@ -24,23 +24,25 @@ namespace paddle_mobile {
 namespace fpga {
 namespace deconv_bias_scale {
-void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n){
+void deconv_bias_scale_expand(float** bias_scale_array, int num,
+                              int sub_conv_n) {
  int sub_num = num * sub_conv_n;
  float* ptr_tmp = *bias_scale_array;
-  float*ptr_bias_scale_expand = (float*)fpga_malloc(sizeof(float) * sub_num * 2);
+  float* ptr_bias_scale_expand =
- int scale_base_offset = sub_num;
+      (float*)fpga_malloc(sizeof(float) * sub_num * 2);
-  for (int i = 0; i < sub_conv_n; ++i)
+  int scale_base_offset = sub_num;
-  {
+  for (int i = 0; i < sub_conv_n; ++i) {
    int offset = num * i;
-     //copy bias
+    // copy bias
-     fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp,num * sizeof(float));
+    fpga_copy(ptr_bias_scale_expand + offset, ptr_tmp, num * sizeof(float));
-       //copy scale
+    // copy scale
-     fpga_copy(ptr_bias_scale_expand + scale_base_offset+ offset, ptr_tmp + num,num * sizeof(float));
+    fpga_copy(ptr_bias_scale_expand + scale_base_offset + offset, ptr_tmp + num,
+              num * sizeof(float));
  }
  *bias_scale_array = ptr_bias_scale_expand;
  fpga_free(ptr_tmp);
 }
-}  // namespace bias_scale
+}  // namespace deconv_bias_scale
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/V1/deconv_bias_scale.h
+++ b/src/fpga/V1/deconv_bias_scale.h
@@ -20,8 +20,9 @@ namespace paddle_mobile {
 namespace fpga {
 namespace deconv_bias_scale {
-void deconv_bias_scale_expand(float** bias_scale_array,int num,int sub_conv_n);
+void deconv_bias_scale_expand(float** bias_scale_array, int num,
+                              int sub_conv_n);
-}  // namespace bias_scale
+}  // namespace deconv_bias_scale
 }  // namespace fpga
 }  // namespace paddle_mobile
--- a/src/fpga/V1/deconv_filter.cpp
+++ b/src/fpga/V1/deconv_filter.cpp
-#include <memory.h>
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#include <algorithm>
-#include "fpga/V1/deconv_filter.h"
+Licensed under the Apache License, Version 2.0 (the "License");
-// #include "deconv_filter.h"
+you may not use this file except in compliance with the License.
-#include "fpga/V1/filter.h"
+You may obtain a copy of the License at
-// #include "filter.h"
-#include "fpga/V1/api.h"
+    http://www.apache.org/licenses/LICENSE-2.0
-// #include "fpga_api.h"
+Unless required by applicable law or agreed to in writing, software
-//just for test
+distributed under the License is distributed on an "AS IS" BASIS,
-//#include <string>
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//#include "deconv.h"
+See the License for the specific language governing permissions and
-//#include "deconv_api.h"
+limitations under the License. */
-//using namespace std;
-//using namespace paddle_mobile::fpga;
+#include "fpga/V1/deconv_filter.h"
-//using namespace baidu::fpga::deconv::api;
+#include <memory.h>
-//namespace api = baidu::fpga::deconv::api;
+#include <algorithm>
+// #include "deconv_filter.h"
-namespace paddle_mobile {
+#include "fpga/V1/filter.h"
-namespace fpga {
+// #include "filter.h"
-namespace deconv_filter {
+#include "fpga/V1/api.h"
+// #include "fpga_api.h"
-/*
-inverse kernel weights of each channel for every filter 
+// just for test
-*/
+//#include <string>
-void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height){
+//#include "deconv.h"
-  float *tmp = *data_in;
+//#include "deconv_api.h"
- // float fix_range = 127;//  float scale = fix_range / max;
+// using namespace std;
-  int data_size = num * channel * width * height;
+// using namespace paddle_mobile::fpga;
-  int hw_len = height * width;
+// using namespace baidu::fpga::deconv::api;
-  float *tmp_data = (float *)fpga_malloc(data_size * sizeof(float));
+// namespace api = baidu::fpga::deconv::api;
-  for (int i = 0; i < num; ++i) {
-  	for(int j = 0; j < channel; ++j){
+namespace paddle_mobile {
-  		for (int k = 0; k < hw_len; ++k)
+namespace fpga {
-  		{
+namespace deconv_filter {
-  			tmp_data[i*channel*hw_len + j*hw_len + k] = (*data_in)[i*channel*hw_len + j*hw_len + hw_len - k-1];
-  		}
+/*
-  	}
+inverse kernel weights of each channel for every filter
+*/
-  }
+void deconv_inverse_filter(float** data_in, int num, int channel, int width,
-  *data_in = (float *)tmp_data;  //
+                           int height) {
-  fpga_free(tmp);
+  float* tmp = *data_in;
-}
+  // float fix_range = 127;//  float scale = fix_range / max;
+  int data_size = num * channel * width * height;
-/*
+  int hw_len = height * width;
-	calculate sub padding number
+  float* tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
-*/
+  for (int i = 0; i < num; ++i) {
- int deconv_calc_sub_pad(int filter_axis, int pad, int stride){
+    for (int j = 0; j < channel; ++j) {
-	if(stride == 0 || ((filter_axis -pad-1)< 0)){
+      for (int k = 0; k < hw_len; ++k) {
-		//error
+        tmp_data[i * channel * hw_len + j * hw_len + k] =
-		return 0;
+            (*data_in)[i * channel * hw_len + j * hw_len + hw_len - k - 1];
-	}
+      }
-	return (filter_axis - pad -1)/stride;
+    }
-}
+  }
-int deconv_get_sub_filter_axis(int filter_axis, int stride){
+  *data_in = (float*)tmp_data;  //
+  fpga_free(tmp);
-	return (filter_axis/stride);
+}
-}
+/*
-int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis){
+    calculate sub padding number
-	return ((image_axis + 2*sub_pad -sub_filter_axis ) + 1);
+*/
-}
+int deconv_calc_sub_pad(int filter_axis, int pad, int stride) {
+  if (stride == 0 || ((filter_axis - pad - 1) < 0)) {
-/*
+    // error
-	(filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image position.
+    return 0;
-	so the omit rows or columns is (stride - )
+  }
-*/
+  return (filter_axis - pad - 1) / stride;
-int deconv_get_omit(int stride, int filter_width, int pad){
+}
-	if( ((filter_width-pad) <= 0) ){// ((filter_width-pad) > stride) ||
+int deconv_get_sub_filter_axis(int filter_axis, int stride) {
-		//error
+  return (filter_axis / stride);
-		return 0;
+}
-	}
-	int idx = 1;
+int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis) {
-	bool flag = false;
+  return ((image_axis + 2 * sub_pad - sub_filter_axis) + 1);
-	for(idx = 1; idx <= stride; ++idx){
+}
-		int j = idx;
-		for(;j <= filter_width;){
+/*
-			if(j == filter_width - pad){
+    (filter_width-pad,filter_width-pad) is the first pixel of sub-pixel image
-				flag = true;
+   position. so the omit rows or columns is (stride - )
-				break;
+*/
-			}
+int deconv_get_omit(int stride, int filter_width, int pad) {
-			j = j + stride;
+  if (((filter_width - pad) <= 0)) {  // ((filter_width-pad) > stride) ||
-		}
+    // error
-		if (flag)
+    return 0;
-		{
+  }
-			break;
+  int idx = 1;
-		}
+  bool flag = false;
+  for (idx = 1; idx <= stride; ++idx) {
-	}
+    int j = idx;
+    for (; j <= filter_width;) {
-    return (stride - idx);
+      if (j == filter_width - pad) {
-}
+        flag = true;
+        break;
-int deconv_get_sub_filter_num(int filter_num, int stride){
+      }
-	return filter_num * stride;
+      j = j + stride;
-}
+    }
+    if (flag) {
-void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel ){
+      break;
+    }
-	char* ptr_tmp = *data_in;
+  }
-	int sub_num = kernel_num * sub_conv_n;
-	int sub_h = height /sub_conv_n;
+  return (stride - idx);
-	int sub_w = width / sub_conv_n;
+}
-	int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
+int deconv_get_sub_filter_num(int filter_num, int stride) {
+  return filter_num * stride;
-	char *ptr_sub_filter = (char *)fpga_malloc(sub_filter_size * sizeof(char));
+}
-	for (int idx = 0; idx < sub_conv_n; ++idx) {
-        for (int nn =0; nn < sub_num; ++nn) {
+void deconv_get_sub_filter(char** data_in, int height, int width,
-            int ni = nn % kernel_num;
+                           int sub_conv_n, int kernel_num, int channel) {
+  char* ptr_tmp = *data_in;
-            int woff = sub_conv_n - 1 - (nn / kernel_num);//
+  int sub_num = kernel_num * sub_conv_n;
+  int sub_h = height / sub_conv_n;
-            for (int hh =0; hh < sub_h; ++hh) {
+  int sub_w = width / sub_conv_n;
-                int hi = hh * sub_conv_n + idx % sub_conv_n;
-                for (int ww =0; ww < sub_w; ++ww) {
+  int sub_filter_size =
-                    int wi = ww * sub_conv_n + woff;//1 0 
+      kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
-                    int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;//
+  char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
-                    int kidx = ((ni * height + hi) * width + wi) * channel;//
+  for (int idx = 0; idx < sub_conv_n; ++idx) {
+    for (int nn = 0; nn < sub_num; ++nn) {
-                    fpga_copy(ptr_sub_filter+idx*sub_h*sub_w*channel*sub_num + sidx, (*data_in)+kidx, channel*sizeof(char));
+      int ni = nn % kernel_num;
-                    // for (int cc =0; cc < channel; ++cc) {
-                    //     ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = (*data_in)[kidx + cc];
+      int woff = sub_conv_n - 1 - (nn / kernel_num);  //
-                    // }
-                }
+      for (int hh = 0; hh < sub_h; ++hh) {
-            }
+        int hi = hh * sub_conv_n + idx % sub_conv_n;
-        }
+        for (int ww = 0; ww < sub_w; ++ww) {
-    }
+          int wi = ww * sub_conv_n + woff;  // 1 0
-    *data_in = ptr_sub_filter;
-    fpga_free(ptr_tmp);
+          int sidx = ((nn * sub_h + hh) * sub_w + ww) * channel;   //
-}
+          int kidx = ((ni * height + hi) * width + wi) * channel;  //
-void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw){
+          fpga_copy(
-	float* tmp = *filter_in;
+              ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
-	float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(hw * kernel_num * channels * sizeof(float)));
+              (*data_in) + kidx, channel * sizeof(char));
+          // for (int cc =0; cc < channel; ++cc) {
-	for(int c = 0; c < channels; ++c)
+          //     ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
-	{
+          //     (*data_in)[kidx + cc];
-		for (int n = 0; n < kernel_num ; ++n)
+          // }
-		{
+        }
-			paddle_mobile::fpga::fpga_copy(ptr_filter + n*hw + kernel_num * hw * c, tmp + n * channels * hw + c * hw , hw * sizeof(float));
+      }
-		}
+    }
-	}
+  }
-	*filter_in = ptr_filter; 
+  *data_in = ptr_sub_filter;
-	paddle_mobile::fpga::fpga_free(tmp);
+  fpga_free(ptr_tmp);
 }
+void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
-void deconv_format_filter(float** data_in, int num, int channel, int height,
+                       int hw) {
-                      int width, int group_num, float max,int stride){
+  float* tmp = *filter_in;
+  float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
+      hw * kernel_num * channels * sizeof(float)));
+  for (int c = 0; c < channels; ++c) {
-  int data_size = channel * height * width * num;
+    for (int n = 0; n < kernel_num; ++n) {
+      paddle_mobile::fpga::fpga_copy(ptr_filter + n * hw + kernel_num * hw * c,
-  /*{
+                                     tmp + n * channels * hw + c * hw,
-	   float result2 = (float)0;
+                                     hw * sizeof(float));
-	   string filename = "origin_filter_data";
+    }
-	   api::savefile<float>(filename, (void *)*data_in, data_size, result2);
+  }
-    }*/
+  *filter_in = ptr_filter;
+  paddle_mobile::fpga::fpga_free(tmp);
-  deconv_inverse_filter(data_in, num, channel, width, height);
+}
- /* {
+void deconv_format_filter(float** data_in, int num, int channel, int height,
-		 float result2 = (float)0;
+                          int width, int group_num, float max, int stride) {
-		 string filename = "inverse_filter_data";
+  int data_size = channel * height * width * num;
-		 api::savefile<float>(filename, (void *)*data_in, data_size, result2);
-  }*/
+  /*{
+       float result2 = (float)0;
-  filter::quantize(data_in, data_size, max);
+       string filename = "origin_filter_data";
-   /* {
+       api::savefile<float>(filename, (void *)*data_in, data_size, result2);
-		 char result2 = (char)0;
+    }*/
-		 string filename = "quantize_filter_data";
-		 api::savefile<char>(filename, (void *)*data_in, data_size, result2);
+  deconv_inverse_filter(data_in, num, channel, width, height);
-  }*/
-  char **quantize_data = (char **)data_in;  // NOLINT
+  /* {
+          float result2 = (float)0;
-  filter::convert_to_hwc(quantize_data, num, channel, height, width);
+          string filename = "inverse_filter_data";
-    /*{
+          api::savefile<float>(filename, (void *)*data_in, data_size, result2);
-		 char result2 = (char)0;
+   }*/
-		 string filename = "convert_to_hwc_filter_data";
-		 api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
+  filter::quantize(data_in, data_size, max);
-    }*/
+  /* {
+        char result2 = (char)0;
-  deconv_get_sub_filter(quantize_data, height, width,  stride, num, channel );
+        string filename = "quantize_filter_data";
-      /*{
+        api::savefile<char>(filename, (void *)*data_in, data_size, result2);
-		 char result2 = (char)0;
+ }*/
-		 string filename = "sub_filter_filter_data";
+  char** quantize_data = (char**)data_in;  // NOLINT
-		 api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
-    }*/
+  filter::convert_to_hwc(quantize_data, num, channel, height, width);
+  /*{
-  int sub_conv_n = stride;
+       char result2 = (char)0;
-  int  sub_h = height/sub_conv_n;
+       string filename = "convert_to_hwc_filter_data";
-  int sub_w = width / sub_conv_n;
+       api::savefile<char>(filename, (void *)*quantize_data, data_size,
-  int sub_chw = sub_h * sub_w  * channel;
+  result2);
-  int sub_num = sub_conv_n * num;
+  }*/
-   int division_capacity = filter::calc_division_capacity(sub_chw);
-   int num_per_div_before_alignment =
+  deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
-       filter::calc_num_per_div(sub_num, group_num, division_capacity);
+  /*{
-  int num_per_div_after_alignment =
+     char result2 = (char)0;
-      align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
+     string filename = "sub_filter_filter_data";
-  int div_num =
+     api::savefile<char>(filename, (void *)*quantize_data, data_size, result2);
-      (sub_num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
+}*/
-  int residual = (sub_num) % num_per_div_before_alignment;
-  int num_after_alignment = num_per_div_after_alignment *
+  int sub_conv_n = stride;
-                                ((residual == 0) ? div_num : (div_num - 1)) +
+  int sub_h = height / sub_conv_n;
-                            align_to_x(residual, FILTER_NUM_ALIGNMENT);
+  int sub_w = width / sub_conv_n;
+  int sub_chw = sub_h * sub_w * channel;
-  char**ptr_ptr_data = (char**)fpga_malloc(sub_conv_n*sizeof(char*));
+  int sub_num = sub_conv_n * num;
-  int origin_offset = sub_chw * sub_num;  
+  int division_capacity = filter::calc_division_capacity(sub_chw);
-  for (int i = 0; i < sub_conv_n; ++i){
+  int num_per_div_before_alignment =
-      (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset*sizeof(char));
+      filter::calc_num_per_div(sub_num, group_num, division_capacity);
-      fpga_copy((ptr_ptr_data)[i], (*quantize_data)+origin_offset*i, origin_offset*sizeof(char));
+  int num_per_div_after_alignment =
+      align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
-		/* char result2 = (char)0;
+  int div_num = (sub_num + num_per_div_before_alignment - 1) /
-         string filename = "ptr_ptr_data" + to_string(i);
+                num_per_div_before_alignment;
-         api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset, result2);
+  int residual = (sub_num) % num_per_div_before_alignment;
-         */
+  int num_after_alignment = num_per_div_after_alignment *
-  }
+                                ((residual == 0) ? div_num : (div_num - 1)) +
-   // char result2 = (char)0;
+                            align_to_x(residual, FILTER_NUM_ALIGNMENT);
-   //      string filename = "interleave";
-   //      api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset, result2);
+  char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
-  fpga_free(*quantize_data);
+  int origin_offset = sub_chw * sub_num;
+  for (int i = 0; i < sub_conv_n; ++i) {
+    (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
-  int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) *num_after_alignment;
+    fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
-  char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset*sizeof(char));//continuous space
+              origin_offset * sizeof(char));
-	for (int i = 0; i < sub_conv_n; ++i)
-	{
+    /* char result2 = (char)0;
-	  int offset = i * origin_offset;
+     string filename = "ptr_ptr_data" + to_string(i);
-	  char* ptr_tmp = (ptr_ptr_data)[i];
+     api::savefile<char>(filename, (void *)(ptr_ptr_data[i]), origin_offset,
+     result2);
-	  filter::align_element(&ptr_tmp, sub_num, sub_chw);
+     */
-	  filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw);
+  }
+  // char result2 = (char)0;
-	  filter::reorder(&ptr_tmp, num_after_alignment, sub_chw);
+  //      string filename = "interleave";
-	  filter::interleave(&ptr_tmp, num_after_alignment, sub_chw);
+  //      api::savefile<char>(filename, (void *)*ptr_ptr_data, origin_offset,
+  //      result2);
-	  /*   char result2 = (char)0;
+  fpga_free(*quantize_data);
-         string filename = "interleave" + to_string(i);
-         api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2);
+  int align_offset =
-*/
+      align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
-    fpga_copy(ptr_space + i*align_offset,ptr_tmp,align_offset);
+  char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
-    fpga_free(ptr_tmp);
+                                       sizeof(char));  // continuous space
-	}
+  for (int i = 0; i < sub_conv_n; ++i) {
-  *data_in = (float*)ptr_space;
+    int offset = i * origin_offset;
+    char* ptr_tmp = (ptr_ptr_data)[i];
-  /*	{
-	    char result2 = (char)0;
+    filter::align_element(&ptr_tmp, sub_num, sub_chw);
-         string filename = "ptr_space";
+    filter::align_num(&ptr_tmp, num_per_div_before_alignment, sub_num, sub_chw);
-         api::savefile<char>(filename, (void *)ptr_space, sub_conv_n * align_offset, result2);
-  	}*/
+    filter::reorder(&ptr_tmp, num_after_alignment, sub_chw);
- fpga_flush(ptr_space, sub_conv_n * align_offset*sizeof(char));
+    filter::interleave(&ptr_tmp, num_after_alignment, sub_chw);
-}
+    /*   char result2 = (char)0;
-}  // namespace deconv_filter
+       string filename = "interleave" + to_string(i);
-}  // namespace fpga
+       api::savefile<char>(filename, (void *)ptr_tmp, align_offset, result2);
-}  // namespace paddle_mobile
+*/
+    fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
+    fpga_free(ptr_tmp);
+  }
+  *data_in = (float*)ptr_space;
+  /*    {
+        char result2 = (char)0;
+         string filename = "ptr_space";
+         api::savefile<char>(filename, (void *)ptr_space, sub_conv_n *
+     align_offset, result2);
+      }*/
+  fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
+}
+}  // namespace deconv_filter
+}  // namespace fpga
+}  // namespace paddle_mobile
--- a/src/fpga/V1/deconv_filter.h
+++ b/src/fpga/V1/deconv_filter.h
-#pragma once
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
-namespace paddle_mobile {
+You may obtain a copy of the License at
-namespace fpga {
-namespace deconv_filter {
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
-void deconv_inverse_filter(float** data_in, int num, int channel, int width, int height);
+distributed under the License is distributed on an "AS IS" BASIS,
-int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-int deconv_get_sub_filter_num(int filter_num, int stride);
+See the License for the specific language governing permissions and
-int deconv_get_sub_filter_axis(int filter_axis, int stride);
+limitations under the License. */
-int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
-int deconv_get_omit(int stride, int filter_width, int pad);
+#pragma once
-void deconv_get_sub_filter(char** data_in, int height, int width, int sub_conv_n, int kernel_num, int channel );
-void deconv_format_filter(float** data_in, int num, int channel, int height,
+namespace paddle_mobile {
-                      int width, int group_num, float max,int stride);
+namespace fpga {
-void deconv_NC_convert(float**filter_in, int kernel_num, int channels, int hw);
+namespace deconv_filter {
-}  // namespace deconv_filter
+void deconv_inverse_filter(float** data_in, int num, int channel, int width,
-}  // namespace fpga
+                           int height);
-}  // namespace paddle_mobile
+int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
\ No newline at end of file
+int deconv_get_sub_filter_num(int filter_num, int stride);
+int deconv_get_sub_filter_axis(int filter_axis, int stride);
+int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
+int deconv_get_omit(int stride, int filter_width, int pad);
+void deconv_get_sub_filter(char** data_in, int height, int width,
+                           int sub_conv_n, int kernel_num, int channel);
+void deconv_format_filter(float** data_in, int num, int channel, int height,
+                          int width, int group_num, float max, int stride);
+void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
+}  // namespace deconv_filter
+}  // namespace fpga
+}  // namespace paddle_mobile