api.h 4.9 KB
Newer Older
H
hanbuhe 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

qnqinan's avatar
qnqinan 已提交
17
<<<<<<< HEAD
Z
zhangyang 已提交
18
#include <stdint.h>
H
hanbuhe 已提交
19 20 21
#include <cstddef>
#include <iostream>
#include <limits>
qnqinan's avatar
qnqinan 已提交
22 23
#include "fpga/V2/driver/driver.h"
#include "fpga/V2/driver/pe.h"
qnqinan's avatar
qnqinan 已提交
24
=======
Z
zhangyang 已提交
25
#include "fpga/V2/driver/pe.h"
Z
zhangyang 已提交
26
#include "fpga/V2/fpga_common.h"
qnqinan's avatar
qnqinan 已提交
27
>>>>>>> upstream/develop
Z
zhangyang 已提交
28
#include "framework/tensor.h"
H
hanbuhe 已提交
29

Z
zhangyang 已提交
30
namespace paddle_mobile {
H
hanbuhe 已提交
31 32
namespace fpga {

qnqinan's avatar
qnqinan 已提交
33
<<<<<<< HEAD
H
hanbuhe 已提交
34 35 36
enum DataType {
  DATA_TYPE_FP32 = 1,
  DATA_TYPE_FP16 = 0,
H
hanbuhe 已提交
37 38
};

H
hanbuhe 已提交
39 40 41
enum LayoutType {
  LAYOUT_CHW = 1,
  LAYOUT_HWC = 0,
H
hanbuhe 已提交
42 43
};

H
hanbuhe 已提交
44
struct KernelArgs {
Z
zhangyang 已提交
45 46 47
  uint32_t width;
  uint32_t height;
  uint32_t stride_w;
H
hanbuhe 已提交
48
  uint32_t stride_h;
Z
zhangyang 已提交
49 50
};

H
hanbuhe 已提交
51
struct ImageInputArgs {
H
hanbuhe 已提交
52 53
  void* address;         // input featuremap virtual address
  float* scale_address;  // input scale address;
Z
zhangyang 已提交
54
  uint32_t channels;
H
hanbuhe 已提交
55 56 57 58 59 60 61 62 63
  uint32_t width;  // featuremap width
  uint32_t height;
  uint32_t pad_width;  // padding width;
  uint32_t pad_height;
};

struct ImageOutputArgs {
  void* address;         // output result address;
  float* scale_address;  // output scale address;
qnqinan's avatar
qnqinan 已提交
64
  uint64_t timer_cnt;    // time counter for FPGA computation
Z
zhangyang 已提交
65
};
Z
zhangyang 已提交
66

H
hanbuhe 已提交
67
struct ConvArgs {
Z
zhangyang 已提交
68
  bool relu_enabled;
H
hanbuhe 已提交
69
  void* sb_address;  // scale and bias are interlaced;
H
hanbuhe 已提交
70
  void* filter_address;
71
  float* filter_scale_address;
Z
zhangyang 已提交
72 73 74
  uint32_t filter_num;
  uint32_t group_num;

H
hanbuhe 已提交
75
  struct KernelArgs kernel;
H
hanbuhe 已提交
76
  struct ImageInputArgs image;  // input image;
H
hanbuhe 已提交
77
  struct ImageOutputArgs output;
Z
zhangyang 已提交
78 79
};

Z
zhangyang 已提交
80 81 82 83 84 85 86
struct ConcatArgs {
  uint32_t image_num;
  half** images_in;
  float** scales_in;
  void* image_out;
  float* scale_out;
  uint32_t* channel_num;
qnqinan's avatar
qnqinan 已提交
87 88
  uint32_t* aligned_channel_num;
  uint32_t out_channel;
Z
zhangyang 已提交
89 90 91 92
  uint32_t height;
  uint32_t width;
};

Z
zhangyang 已提交
93
struct SplitConvArgs {
Z
zhangyang 已提交
94 95 96 97
  uint32_t split_num;
  uint32_t group_num;
  uint32_t filter_num;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
98 99
  struct ConvArgs* conv_args;
  struct ConcatArgs concat_arg;
Z
zhangyang 已提交
100 101
};

H
hanbuhe 已提交
102
struct PoolingArgs {
Z
zhangyang 已提交
103 104
  int16_t mode;  // mode: 0:max, 1:avg
  half kernel_reciprocal;
H
hanbuhe 已提交
105
  struct KernelArgs kernel;
H
hanbuhe 已提交
106 107
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
108 109
};

H
hanbuhe 已提交
110
struct EWAddArgs {
Z
zhangyang 已提交
111
  bool relu_enabled;
H
hanbuhe 已提交
112

Z
zhangyang 已提交
113 114
  uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
  uint32_t const1;
H
hanbuhe 已提交
115 116 117
  struct ImageInputArgs image0;
  struct ImageInputArgs image1;
  struct ImageOutputArgs output;
H
hanbuhe 已提交
118 119
};

H
hanbuhe 已提交
120
struct BypassArgs {
H
hanbuhe 已提交
121 122 123 124
  enum DataType input_data_type;
  enum DataType output_data_type;
  enum LayoutType input_layout_type;
  enum LayoutType output_layout_type;
H
hanbuhe 已提交
125 126 127 128
  struct ImageInputArgs image;
  struct ImageOutputArgs output;
};

qnqinan's avatar
qnqinan 已提交
129 130
=======
>>>>>>> upstream/develop
H
hanbuhe 已提交
131 132 133 134
int open_device();
int close_device();
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
qnqinan's avatar
qnqinan 已提交
135
<<<<<<< HEAD
H
hanbuhe 已提交
136

Z
zhangyang 已提交
137
static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; }
qnqinan's avatar
qnqinan 已提交
138
=======
Z
zhangyang 已提交
139 140 141
void fpga_copy(void* dest, const void* src, size_t num);
int fpga_flush(void* address, size_t size);
int fpga_invalidate(void* address, size_t size);
qnqinan's avatar
qnqinan 已提交
142
>>>>>>> upstream/develop
143

qnqinan's avatar
qnqinan 已提交
144 145 146 147 148
float filter_find_max(framework::Tensor* filter_tensor);
int get_aligned_channel_num(int channel_num);
int get_aligned_filter_num(framework::Tensor* filter_tensor);
int get_conv_output_channel(framework::Tensor* filter_tensor);

Z
zhangyang 已提交
149
void format_image(framework::Tensor* image_tensor);
qnqinan's avatar
qnqinan 已提交
150 151 152
void format_fp16_ofm(framework::Tensor* ofm_tensor,
                     int aligned_channel);  // only allocate memory
void format_fp32_ofm(framework::Tensor* ofm_tensor, int aligned_channel);
153

Z
zhangyang 已提交
154 155
void format_filter(framework::Tensor* filter_tensor, float max_value,
                   int group_num);
Z
zhangyang 已提交
156
void format_fc_filter(framework::Tensor* filter_tensor, float max_value);
qnqinan's avatar
qnqinan 已提交
157 158
void format_bias_scale_array(float** bias_scale_array, int filter_num,
                             int filter_channel);
Z
zhangyang 已提交
159
void format_concat_output(framework::Tensor* out, int height, int width,
qnqinan's avatar
qnqinan 已提交
160 161 162 163 164
                          uint32_t out_channel);
int format_conv_data(framework::Tensor* filter_tensor,
                     framework::Tensor* ofm_tensor, float* bs_ptr, int group);
int format_fc_data(framework::Tensor* filter_tensor,
                   framework::Tensor* ofm_tensor, float* bs_ptr);
Z
zhangyang 已提交
165 166 167 168
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
                    framework::Tensor* out, framework::Tensor* filter,
                    bool relu_enabled, int group_num, int stride_h,
                    int stride_w, int padding_h, int padding_w, float* bs_ptr);
169

qnqinan's avatar
qnqinan 已提交
170
<<<<<<< HEAD
Z
zhangyang 已提交
171 172 173
half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);

qnqinan's avatar
qnqinan 已提交
174 175
=======
>>>>>>> upstream/develop
H
hanbuhe 已提交
176
}  // namespace fpga
Z
zhangyang 已提交
177
}  // namespace paddle_mobile