api.h 4.6 KB
Newer Older
H
hanbuhe 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

Z
zhangyang 已提交
17
#include <stdint.h>
H
hanbuhe 已提交
18 19 20
#include <cstddef>
#include <iostream>
#include <limits>
Z
zhangyang 已提交
21 22
#include "fpga/V2/driver/driver.h"
#include "fpga/V2/driver/pe.h"
Z
zhangyang 已提交
23
#include "framework/tensor.h"
H
hanbuhe 已提交
24

Z
zhangyang 已提交
25
namespace paddle_mobile {
H
hanbuhe 已提交
26 27
namespace fpga {

H
hanbuhe 已提交
28 29 30
enum DataType {
  DATA_TYPE_FP32 = 1,
  DATA_TYPE_FP16 = 0,
H
hanbuhe 已提交
31 32
};

H
hanbuhe 已提交
33 34 35
enum LayoutType {
  LAYOUT_CHW = 1,
  LAYOUT_HWC = 0,
H
hanbuhe 已提交
36 37
};

H
hanbuhe 已提交
38
struct KernelArgs {
Z
zhangyang 已提交
39 40 41
  uint32_t width;
  uint32_t height;
  uint32_t stride_w;
H
hanbuhe 已提交
42
  uint32_t stride_h;
Z
zhangyang 已提交
43 44
};

H
hanbuhe 已提交
45
struct ImageInputArgs {
H
hanbuhe 已提交
46 47
  void* address;         // input featuremap virtual address
  float* scale_address;  // input scale address;
Z
zhangyang 已提交
48
  uint32_t channels;
H
hanbuhe 已提交
49 50 51 52 53 54 55 56 57
  uint32_t width;  // featuremap width
  uint32_t height;
  uint32_t pad_width;  // padding width;
  uint32_t pad_height;
};

struct ImageOutputArgs {
  void* address;         // output result address;
  float* scale_address;  // output scale address;
Z
zhangyang 已提交
58
  uint64_t timer_cnt;    // time counter for FPGA computation
Z
zhangyang 已提交
59
};
Z
zhangyang 已提交
60

H
hanbuhe 已提交
61
struct ConvArgs {
Z
zhangyang 已提交
62
  bool relu_enabled;
H
hanbuhe 已提交
63
  void* sb_address;  // scale and bias are interlaced;
H
hanbuhe 已提交
64
  void* filter_address;
65
  float* filter_scale_address;
Z
zhangyang 已提交
66 67 68
  uint32_t filter_num;
  uint32_t group_num;

H
hanbuhe 已提交
69
  struct KernelArgs kernel;
H
hanbuhe 已提交
70
  struct ImageInputArgs image;  // input image;
H
hanbuhe 已提交
71
  struct ImageOutputArgs output;
Z
zhangyang 已提交
72 73
};

Z
zhangyang 已提交
74 75 76 77 78 79 80
struct ConcatArgs {
  uint32_t image_num;
  half** images_in;
  float** scales_in;
  void* image_out;
  float* scale_out;
  uint32_t* channel_num;
Z
zhangyang 已提交
81 82
  uint32_t* aligned_channel_num;
  uint32_t out_channel;
Z
zhangyang 已提交
83 84 85 86
  uint32_t height;
  uint32_t width;
};

Z
zhangyang 已提交
87
struct SplitConvArgs {
Z
zhangyang 已提交
88 89 90 91
  uint32_t split_num;
  uint32_t group_num;
  uint32_t filter_num;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
92 93
  struct ConvArgs* conv_args;
  struct ConcatArgs concat_arg;
Z
zhangyang 已提交
94 95
};

H
hanbuhe 已提交
96
struct PoolingArgs {
Z
zhangyang 已提交
97 98
  int16_t mode;  // mode: 0:max, 1:avg
  half kernel_reciprocal;
H
hanbuhe 已提交
99
  struct KernelArgs kernel;
H
hanbuhe 已提交
100 101
  struct ImageInputArgs image;  // input image;
  struct ImageOutputArgs output;
Z
zhangyang 已提交
102 103
};

H
hanbuhe 已提交
104
struct EWAddArgs {
Z
zhangyang 已提交
105
  bool relu_enabled;
H
hanbuhe 已提交
106

Z
zhangyang 已提交
107 108
  uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
  uint32_t const1;
H
hanbuhe 已提交
109 110 111
  struct ImageInputArgs image0;
  struct ImageInputArgs image1;
  struct ImageOutputArgs output;
H
hanbuhe 已提交
112 113
};

H
hanbuhe 已提交
114
struct BypassArgs {
H
hanbuhe 已提交
115 116 117 118
  enum DataType input_data_type;
  enum DataType output_data_type;
  enum LayoutType input_layout_type;
  enum LayoutType output_layout_type;
H
hanbuhe 已提交
119 120 121 122
  struct ImageInputArgs image;
  struct ImageOutputArgs output;
};

H
hanbuhe 已提交
123 124 125 126
int open_device();
int close_device();
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
H
hanbuhe 已提交
127

Z
zhangyang 已提交
128
static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; }
129

Z
zhangyang 已提交
130 131 132 133 134
float filter_find_max(framework::Tensor* filter_tensor);
int get_aligned_channel_num(int channel_num);
int get_aligned_filter_num(framework::Tensor* filter_tensor);
int get_conv_output_channel(framework::Tensor* filter_tensor);

Z
zhangyang 已提交
135
void format_image(framework::Tensor* image_tensor);
Z
zhangyang 已提交
136 137 138
void format_fp16_ofm(framework::Tensor* ofm_tensor,
                     int aligned_channel);  // only allocate memory
void format_fp32_ofm(framework::Tensor* ofm_tensor, int aligned_channel);
139

Z
zhangyang 已提交
140 141
void format_filter(framework::Tensor* filter_tensor, float max_value,
                   int group_num);
Z
zhangyang 已提交
142
void format_fc_filter(framework::Tensor* filter_tensor, float max_value);
Z
zhangyang 已提交
143 144
void format_bias_scale_array(float** bias_scale_array, int filter_num,
                             int filter_channel);
Z
zhangyang 已提交
145
void format_concat_output(framework::Tensor* out, int height, int width,
Z
zhangyang 已提交
146 147 148 149 150
                          uint32_t out_channel);
int format_conv_data(framework::Tensor* filter_tensor,
                     framework::Tensor* ofm_tensor, float* bs_ptr, int group);
int format_fc_data(framework::Tensor* filter_tensor,
                   framework::Tensor* ofm_tensor, float* bs_ptr);
Z
zhangyang 已提交
151 152 153 154
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
                    framework::Tensor* out, framework::Tensor* filter,
                    bool relu_enabled, int group_num, int stride_h,
                    int stride_w, int padding_h, int padding_w, float* bs_ptr);
155

Z
zhangyang 已提交
156 157 158
half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);

H
hanbuhe 已提交
159
}  // namespace fpga
Z
zhangyang 已提交
160
}  // namespace paddle_mobile