提交 0e554f5a 编写于 作者: S smilejames 提交者: GitHub

Merge branch 'develop' into develop

...@@ -14,36 +14,35 @@ limitations under the License. */ ...@@ -14,36 +14,35 @@ limitations under the License. */
#pragma once #pragma once
#include <stdint.h>
#include <cstddef> #include <cstddef>
#include <iostream> #include <iostream>
#include <limits> #include <limits>
// memory management; // memory management;
namespace paddle { namespace paddle_mobile {
namespace mobile {
namespace fpga { namespace fpga {
namespace api {
int open_device(); int open_device();
int close_device(); int close_device();
void *fpga_malloc(size_t size); void* fpga_malloc(size_t size);
void fpga_free(void *ptr); void fpga_free(void* ptr);
void fpga_copy(void *dst, const void *src, size_t num); void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs { struct FpgaVersionArgs {
void *buf; void* buf;
}; };
struct MemoryToPhysicalArgs { struct MemoryToPhysicalArgs {
const void *src; const void* src;
uint64_t physical; uint64_t physical;
}; };
struct MemoryCopyArgs { struct MemoryCopyArgs {
void *src; void* src;
void *dst; void* dst;
size_t size; size_t size;
}; };
...@@ -51,38 +50,71 @@ struct FpgaQuantArgs { ...@@ -51,38 +50,71 @@ struct FpgaQuantArgs {
float scale; float scale;
}; };
struct FpgaBNArgs {}; struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
};
struct FpgaKernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
};
struct FpgaConvArgs { struct FpgaConvArgs {
bool enable_BN = false; bool relu_enabled;
bool enable_Relu = false; struct FpgaBNArgs BNargs;
struct FpgaBNParam bn_parm; void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
}; };
struct FpgaPoolArgs { struct FpgaPoolArgs {
bool enable_BN = false; void* image_addr;
struct FpgaBNParam bn_parm; void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
}; };
struct FpgaEWAddArgs { // only support X + Y struct FpgaEWAddArgs {
bool enable_Relu = false; bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
}; };
int ComputeFpgaConv(struct FpgaConvArgs); int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs); int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs); int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs) #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs) #define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaArgs) #define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_MEM_TOPHY _IOW(IOCTL_FPGA_MAGIC, 12, struct MemoryToPhysicalArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs) #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace api
} // namespace fpga } // namespace fpga
} // namespace mobile } // namespace paddle_mobile
} // namespace paddle
...@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -419,7 +419,7 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
} }
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>; template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP16>; template class Executor<FPGA, Precision::FP16>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -22,6 +22,9 @@ limitations under the License. */ ...@@ -22,6 +22,9 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/variable.h" #include "framework/variable.h"
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam { ...@@ -256,6 +259,15 @@ class ElementwiseAddParam : OpParam {
Tensor *input_y_; Tensor *input_y_;
Tensor *out_; Tensor *out_;
int axis_; int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaEWAddArgs fpga_EW_add_args;
public:
const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; }
void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; }
#endif
}; };
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
...@@ -450,80 +462,15 @@ class PoolParam : public OpParam { ...@@ -450,80 +462,15 @@ class PoolParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
bool ceil_mode_; bool ceil_mode_;
bool global_pooling_ = false; bool global_pooling_ = false;
}; #ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef FUSION_POOLBN_OP
class FusionPoolBNParam : OpParam {
public:
FusionPoolBNParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
paddings_ = GetAttr<vector<int>>("paddings", attrs);
ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
global_pooling_ = GetAttr<bool>("global_pooling", attrs);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const Tensor *Input() const { return input_; }
const string &PoolingType() const { return pooling_type_; }
const vector<int> &Ksize() const { return ksize_; }
const vector<int> &Strides() const { return strides_; }
const vector<int> &Paddings() const { return paddings_; }
bool isCeilMode() const { return ceil_mode_; }
bool isGlobalPooling() const { return global_pooling_; }
Tensor *OutputY() const { return output_y_; }
const Tensor *InputBias() const { return input_bias_; }
const Tensor *InputMean() const { return input_mean_; }
const Tensor *InputScale() const { return input_scale_; }
const Tensor *InputVariance() const { return input_variance_; }
const float &Epsilon() const { return epsilon_; }
const float &Momentum() const { return momentum_; }
const bool &IsTest() const { return is_test_; }
const string &DataFormat() const { return data_format_; }
private: private:
Tensor *input_; fpga::FpgaPoolArgs fpga_pool_args;
string pooling_type_;
vector<int> ksize_; public:
vector<int> strides_; const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; }
vector<int> paddings_; void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; }
bool ceil_mode_; #endif
bool global_pooling_ = false;
Tensor *output_y_;
Tensor *input_bias_;
Tensor *input_mean_;
Tensor *input_scale_;
Tensor *input_variance_;
float epsilon_;
float momentum_;
bool is_test_;
string data_format_;
}; };
#endif #endif
...@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam { ...@@ -704,7 +651,7 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam { class FeedParam : public OpParam {
public: public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope &scope) { const AttributeMap &attrs, Scope const &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope); input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope); out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size"); auto var = scope.Var("batch_size");
...@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam { ...@@ -983,6 +930,15 @@ class FusionFcParam : public OpParam {
int x_num_col_dims_; int x_num_col_dims_;
int y_num_col_dims_; int y_num_col_dims_;
int axis_; int axis_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#ifdef FUSION_FCRELU_OP #ifdef FUSION_FCRELU_OP
...@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam { ...@@ -1032,6 +988,15 @@ class FusionConvAddParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
vector<int> dilations_; vector<int> dilations_;
int groups; int groups;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
Print &operator<<(Print &printer, const FusionConvAddParam &conv_param); Print &operator<<(Print &printer, const FusionConvAddParam &conv_param);
...@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1128,6 +1093,15 @@ class FusionConvAddBNReluParam : public OpParam {
bool is_test_; bool is_test_;
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#endif #endif
...@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1213,6 +1187,15 @@ class FusionConvAddBNParam : public OpParam {
bool is_test_; bool is_test_;
Tensor *new_bias_; Tensor *new_bias_;
Tensor *new_scale_; Tensor *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::FpgaConvArgs fpga_conv_args;
public:
const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; }
void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; }
#endif
}; };
#endif #endif
...@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam { ...@@ -1426,9 +1409,5 @@ class DropoutParam : public OpParam {
}; };
#endif #endif
#ifdef REGION_OP
class RegionParam : public OpParam {};
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET) ...@@ -75,11 +75,9 @@ if ("FPGAnets" IN_LIST NET)
set(FUSION_CONVADDRELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON) set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON) set(FUSION_CONVADDBN_OP ON)
set(FUSION_POOLBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON) set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON) set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON) set(FUSION_FCRELU_OP ON)
set(REGION_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
set(CONCAT_OP ON) set(CONCAT_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册