api.h 4.6 KB
Newer Older
H
hanbuhe 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Z
zhangyang 已提交
17
#include <stdint.h>
H
hanbuhe 已提交
18 19 20
#include <cstddef>
#include <iostream>
#include <limits>
Z
zhangyang 已提交
21 22
#include "fpga/V2/driver/driver.h"
#include "fpga/V2/driver/pe.h"
Z
zhangyang 已提交
23
#include "framework/tensor.h"
H
hanbuhe 已提交
24

Z
zhangyang 已提交
25

Z
zhangyang 已提交
26
namespace paddle_mobile {
H
hanbuhe 已提交
27 28
namespace fpga {

H
hanbuhe 已提交
29 30 31
enum DataType {
  DATA_TYPE_FP32 = 1,
  DATA_TYPE_FP16 = 0,
H
hanbuhe 已提交
32 33
};

H
hanbuhe 已提交
34 35 36
enum LayoutType {
  LAYOUT_CHW = 1,
  LAYOUT_HWC = 0,
H
hanbuhe 已提交
37 38
};

Z
zhangyang 已提交
39

H
hanbuhe 已提交
40
struct KernelArgs {
Z
zhangyang 已提交
41 42 43
  uint32_t width;
  uint32_t height;
  uint32_t stride_w;
H
hanbuhe 已提交
44
  uint32_t stride_h;
Z
zhangyang 已提交
45 46
};

H
hanbuhe 已提交
47
struct ImageInputArgs {
H
hanbuhe 已提交
48 49
  void* address;         // input featuremap virtual address
  float* scale_address;  // input scale address;
Z
zhangyang 已提交
50
  uint32_t channels;
H
hanbuhe 已提交
51 52 53 54 55 56 57 58 59
  uint32_t width;  // featuremap width
  uint32_t height;
  uint32_t pad_width;  // padding width;
  uint32_t pad_height;
};

struct ImageOutputArgs {
  void* address;         // output result address;
  float* scale_address;  // output scale address;
Z
zhangyang 已提交
60
  uint64_t timer_cnt;    // time counter for FPGA computation
Z
zhangyang 已提交
61
};
Z
zhangyang 已提交
62

H
hanbuhe 已提交
63
struct ConvArgs {
Z
zhangyang 已提交
64
  bool relu_enabled;
H
hanbuhe 已提交
65
  void* sb_address;  // scale and bias are interlaced;
H
hanbuhe 已提交
66
  void* filter_address;
67
  float* filter_scale_address;
Z
zhangyang 已提交
68 69 70
  uint32_t filter_num;
  uint32_t group_num;

H
hanbuhe 已提交
71
  struct KernelArgs kernel;
H
hanbuhe 已提交
72
  struct ImageInputArgs image;  // input image;
H
hanbuhe 已提交
73
  struct ImageOutputArgs output;
Z
zhangyang 已提交
74 75
};

Z
zhangyang 已提交
76 77 78 79 80 81 82
struct ConcatArgs {
  uint32_t image_num;
  half** images_in;
  float** scales_in;
  void* image_out;
  float* scale_out;
  uint32_t* channel_num;
Z
zhangyang 已提交
83 84
  uint32_t* aligned_channel_num;
  uint32_t out_channel;
Z
zhangyang 已提交
85 86 87 88
  uint32_t height;
  uint32_t width;
};

Z
zhangyang 已提交
89
struct SplitConvArgs {
Z
zhangyang 已提交
90 91 92 93
  uint32_t split_num;
  uint32_t group_num;
  uint32_t filter_num;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
94 95
  struct ConvArgs* conv_args;
  struct ConcatArgs concat_arg;
Z
zhangyang 已提交
96 97
};

H
hanbuhe 已提交
98
struct PoolingArgs {
Z
zhangyang 已提交
99 100
  int16_t mode;  // mode: 0:max, 1:avg
  half kernel_reciprocal;
H
hanbuhe 已提交
101
  struct KernelArgs kernel;
H
hanbuhe 已提交
102 103
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
104 105
};

H
hanbuhe 已提交
106
struct EWAddArgs {
Z
zhangyang 已提交
107
  bool relu_enabled;
H
hanbuhe 已提交
108

Z
zhangyang 已提交
109 110
  uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
  uint32_t const1;
H
hanbuhe 已提交
111 112 113
  struct ImageInputArgs image0;
  struct ImageInputArgs image1;
  struct ImageOutputArgs output;
H
hanbuhe 已提交
114 115
};

H
hanbuhe 已提交
116
struct BypassArgs {
H
hanbuhe 已提交
117 118 119 120
  enum DataType input_data_type;
  enum DataType output_data_type;
  enum LayoutType input_layout_type;
  enum LayoutType output_layout_type;
H
hanbuhe 已提交
121 122 123 124
  struct ImageInputArgs image;
  struct ImageOutputArgs output;
};

H
hanbuhe 已提交
125 126


H
hanbuhe 已提交
127 128 129 130
int open_device();
int close_device();
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
H
hanbuhe 已提交
131

Z
zhangyang 已提交
132
static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; }
133

Z
zhangyang 已提交
134 135 136 137 138
float filter_find_max(framework::Tensor* filter_tensor);
int get_aligned_channel_num(int channel_num);
int get_aligned_filter_num(framework::Tensor* filter_tensor);
int get_conv_output_channel(framework::Tensor* filter_tensor);

Z
zhangyang 已提交
139
void format_image(framework::Tensor* image_tensor);
Z
zhangyang 已提交
140 141 142
void format_fp16_ofm(framework::Tensor* ofm_tensor,
                     int aligned_channel);  // only allocate memory
void format_fp32_ofm(framework::Tensor* ofm_tensor, int aligned_channel);
143

Z
zhangyang 已提交
144 145
void format_filter(framework::Tensor* filter_tensor, float max_value,
                   int group_num);
Z
zhangyang 已提交
146
void format_fc_filter(framework::Tensor* filter_tensor, float max_value);
Z
zhangyang 已提交
147 148
void format_bias_scale_array(float** bias_scale_array, int filter_num,
                             int filter_channel);
Z
zhangyang 已提交
149
void format_concat_output(framework::Tensor* out, int height, int width,
Z
zhangyang 已提交
150 151 152 153 154
                          uint32_t out_channel);
int format_conv_data(framework::Tensor* filter_tensor,
                     framework::Tensor* ofm_tensor, float* bs_ptr, int group);
int format_fc_data(framework::Tensor* filter_tensor,
                   framework::Tensor* ofm_tensor, float* bs_ptr);
Z
zhangyang 已提交
155 156 157 158
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
                    framework::Tensor* out, framework::Tensor* filter,
                    bool relu_enabled, int group_num, int stride_h,
                    int stride_w, int padding_h, int padding_w, float* bs_ptr);
159

Z
zhangyang 已提交
160 161 162
half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);

H
hanbuhe 已提交
163
}  // namespace fpga
Z
zhangyang 已提交
164
}  // namespace paddle_mobile