提交 a74ccea0 编写于 作者: Z zhangyang0701 提交者: GitHub

Merge pull request #675 from chonwhite/develop

#674 tensor增加fpga量化支持
...@@ -31,90 +31,110 @@ void* fpga_malloc(size_t size); ...@@ -31,90 +31,110 @@ void* fpga_malloc(size_t size);
void fpga_free(void* ptr); void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num); void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs { struct VersionArgs {
void* buf; void* buffer;
};
struct MemoryToPhysicalArgs {
const void* src;
uint64_t physical;
}; };
struct MemoryCopyArgs { struct MemoryCopyArgs {
void* src; void* src;
void* dst; void* dest;
size_t size; size_t size;
}; };
struct FpgaQuantArgs { struct BNArgs {
float scale; bool enabled;
}; void* bias_address;
void* scale_address;
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
}; };
struct FpgaKernelArgs { /**
Conv and Pooling kernel
*/
struct KernelArgs {
uint32_t width; uint32_t width;
uint32_t height; uint32_t height;
uint32_t stride_h;
uint32_t stride_w; uint32_t stride_w;
uint32_t stride_h;
}; };
struct FpgaImageArgs { struct ImageInputArgs {
uint32_t width; void* address; // input featuremap virtual address
uint32_t height;
uint32_t channels; uint32_t channels;
uint32_t pad_h; uint32_t width; // featuremap width
uint32_t pad_w; uint32_t height;
uint32_t pad_width; // padding width;
uint32_t pad_height;
};
struct ImageOutputArgs {
void* address; // output result address;
float* scale_address; // output scale address;
}; };
struct FpgaConvArgs { struct ConvArgs {
bool relu_enabled; bool relu_enabled;
struct FpgaBNArgs BNargs; float scale; // input scale;
void* image_addr; void* bias_address;
void* filter_addr; void* filter_address;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
uint32_t filter_num; uint32_t filter_num;
uint32_t group_num; uint32_t group_num;
struct FpgaKernelArgs kernel; struct BNArgs bn;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
struct KernelArgs kernel;
}; };
struct FpgaPoolArgs { struct PoolingArgs {
void* image_addr; float scale;
void* output_addr; struct ImageInputArgs image; // input image;
struct FpgaImageArgs image; struct ImageOutputArgs output;
struct FpgaKernelArgs kernel; struct KernelArgs kernel;
}; };
struct FpgaEWAddArgs { // elementwise add arguments
struct EWAddArgs {
bool relu_enabled; bool relu_enabled;
void* image0_addr; float scale;
void* image1_addr;
void* result_addr; float const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const0; float const1;
uint32_t const1; struct ImageInputArgs image0;
uint32_t data_len; // aligned element count struct ImageInputArgs image1;
struct ImageOutputArgs output;
}; };
struct FpgaRegWriteArgs {
uint64_t address; //
uint64_t value;
};
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
#define IOCTL_SEPARATOR_0 10
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_SEPARATOR_1 20
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
//============================== API =============================
int ComputeFpgaConv(struct FpgaConvArgs args); int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs args); int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args); int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -253,6 +253,18 @@ class Tensor { ...@@ -253,6 +253,18 @@ class Tensor {
"Tensor's dims_ is out of bound. "); "Tensor's dims_ is out of bound. ");
} }
#ifdef PADDLE_MOBILE_FPGA
struct FPGAArgs {
float scale;
inline float *scale_pointer() { return &scale; }
};
struct &fpga_args() const {
return fpgaArgs_;
}
#endif
private: private:
/** /**
* @note Placeholder hides type T, so it doesn't appear as a * @note Placeholder hides type T, so it doesn't appear as a
...@@ -319,6 +331,10 @@ class Tensor { ...@@ -319,6 +331,10 @@ class Tensor {
* begins. * begins.
*/ */
size_t offset_; size_t offset_;
#ifdef PADDLE_MOBILE_FPGA
FPGAArgs fpgaArgs_;
#endif
}; };
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
......
...@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const { ...@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
for (int i = 0; i < inputs.size(); ++i) { for (int i = 0; i < inputs.size(); ++i) {
auto input = inputs[i]; auto input = inputs[i];
auto channels = input[3]; auto channels = input->dims()[3];
out_offset += channels; out_offset += channels;
auto src = input->data<half>(); auto src = input->data<half>();
for (int j = 0; j < pixels; ++j) { for (int j = 0; j < pixels; ++j) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册