提交 0e554f5a 编写于 作者: S smilejames 提交者: GitHub

Merge branch 'develop' into develop

......@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once
#include <stdint.h>
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace paddle {
namespace mobile {
namespace paddle_mobile {
namespace fpga {
namespace api {
int open_device();
int close_device();
void *fpga_malloc(size_t size);
void fpga_free(void *ptr);
void fpga_copy(void *dst, const void *src, size_t num);
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs {
void *buf;
void* buf;
};
struct MemoryToPhysicalArgs {
const void *src;
const void* src;
uint64_t physical;
};
struct MemoryCopyArgs {
void *src;
void *dst;
void* src;
void* dst;
size_t size;
};
......@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float scale;
};
struct FpgaBNArgs {};
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
};
struct FpgaKernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
};
struct FpgaConvArgs {
bool enable_BN = false;
bool enable_Relu = false;
struct FpgaBNParam bn_parm;
bool relu_enabled;
struct FpgaBNArgs BNargs;
void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
};
struct FpgaPoolArgs {
bool enable_BN = false;
struct FpgaBNParam bn_parm;
void* image_addr;
void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
};
struct FpgaEWAddArgs { // only support X + Y
bool enable_Relu = false;
struct FpgaEWAddArgs {
bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
};
int ComputeFpgaConv(struct FpgaConvArgs);
int ComputeFpgaPool(struct FpgaPoolArgs);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs);
int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga
} // namespace mobile
} // namespace paddle
} // namespace paddle_mobile
......@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
}
template class Executor<CPU, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP16>;
template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP16>;
} // namespace paddle_mobile
......@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace paddle_mobile {
namespace operators {
......@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor *input_y_;
Tensor *out_;
int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaEWAddArgs fpga_EW_add_args;
public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
#endif
};
#ifdef FUSION_ELEMENTWISEADDRELU_OP
......@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector<int> paddings_;
bool ceil_mode_;
bool global_pooling_ = false;
};
#endif
#ifdef FUSION_POOLBN_OP
class FusionPoolBNParam : OpParam {
public:
FusionPoolBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
global_pooling_ = GetAttr<bool>("global_pooling", attrs);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const string &PoolingType() const { return pooling_type_; }
const vector<int> &Ksize() const { return ksize_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return global_pooling_; }
Tensor *OutputY() const { return output_y_; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
const string &DataFormat() const { return data_format_; }
#ifdef PADDLE_MOBILE_FPGA
private:
Tensor *input_;
string pooling_type_;
vector<int> ksize_;
vector<int> strides_;
vector<int> paddings_;
bool ceil_mode_;
bool global_pooling_ = false;
Tensor *output_y_;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
string data_format_;
fpga::FpgaPoolArgs fpga_pool_args;
public:
const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
#endif
};
#endif
......@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam {
public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope &scope) {
const AttributeMap &attrs, Scope const &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size");
......@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int x_num_col_dims_;
int y_num_col_dims_;
int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#ifdef FUSION_FCRELU_OP
......@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector<int> paddings_;
vector<int> dilations_;
int groups;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
......@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool is_test_;
Tensor *new_bias_;
Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
};
#endif
......@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
};
#endif
#ifdef REGION_OP
class RegionParam : public OpParam {};
#endif
} // namespace operators
} // namespace paddle_mobile
......@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(FUSION_POOLBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON)
set(REGION_OP ON)
set(POOL_OP ON)
set(CONCAT_OP ON)
set(SOFTMAX_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册