提交 a74ccea0 编写于 作者: Z zhangyang0701 提交者: GitHub

Merge pull request #675 from chonwhite/develop

#674 tensor增加fpga量化支持
......@@ -31,90 +31,110 @@ void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
struct FpgaVersionArgs {
void* buf;
};
struct MemoryToPhysicalArgs {
const void* src;
uint64_t physical;
struct VersionArgs {
void* buffer;
};
struct MemoryCopyArgs {
void* src;
void* dst;
void* dest;
size_t size;
};
struct FpgaQuantArgs {
float scale;
};
struct FpgaBNArgs {
bool enabled = false;
void* bias_addr;
void* scale_addr;
struct BNArgs {
bool enabled;
void* bias_address;
void* scale_address;
};
struct FpgaKernelArgs {
/**
Conv and Pooling kernel
*/
struct KernelArgs {
uint32_t width;
uint32_t height;
uint32_t stride_h;
uint32_t stride_w;
uint32_t stride_h;
};
struct FpgaImageArgs {
uint32_t width;
uint32_t height;
struct ImageInputArgs {
void* address; // input featuremap virtual address
uint32_t channels;
uint32_t pad_h;
uint32_t pad_w;
uint32_t width; // featuremap width
uint32_t height;
uint32_t pad_width; // padding width;
uint32_t pad_height;
};
struct ImageOutputArgs {
void* address; // output result address;
float* scale_address; // output scale address;
};
struct FpgaConvArgs {
struct ConvArgs {
bool relu_enabled;
struct FpgaBNArgs BNargs;
void* image_addr;
void* filter_addr;
void* bias_addr;
void* output_addr;
float quant_scale;
struct FpgaImageArgs image;
float scale; // input scale;
void* bias_address;
void* filter_address;
uint32_t filter_num;
uint32_t group_num;
struct FpgaKernelArgs kernel;
struct BNArgs bn;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
struct KernelArgs kernel;
};
struct FpgaPoolArgs {
void* image_addr;
void* output_addr;
struct FpgaImageArgs image;
struct FpgaKernelArgs kernel;
struct PoolingArgs {
float scale;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
struct KernelArgs kernel;
};
struct FpgaEWAddArgs {
// elementwise add arguments
struct EWAddArgs {
bool relu_enabled;
void* image0_addr;
void* image1_addr;
void* result_addr;
uint32_t const0;
uint32_t const1;
uint32_t data_len; // aligned element count
float scale;
float const0; // output0 = const0 x input0 + const1 x input1;
float const1;
struct ImageInputArgs image0;
struct ImageInputArgs image1;
struct ImageOutputArgs output;
};
struct FpgaRegWriteArgs {
uint64_t address; //
uint64_t value;
};
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
#define IOCTL_SEPARATOR_0 10
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_SEPARATOR_1 20
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
//============================== API =============================
int ComputeFpgaConv(struct FpgaConvArgs args);
int ComputeFpgaPool(struct FpgaPoolArgs args);
int ComputeFpgaEWAdd(struct FpgaEWAddArgs args);
#define IOCTL_FPGA_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs)
#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs)
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs)
} // namespace fpga
} // namespace paddle_mobile
......@@ -253,6 +253,18 @@ class Tensor {
"Tensor's dims_ is out of bound. ");
}
#ifdef PADDLE_MOBILE_FPGA
struct FPGAArgs {
float scale;
inline float *scale_pointer() { return &scale; }
};
struct &fpga_args() const {
return fpgaArgs_;
}
#endif
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a
......@@ -319,6 +331,10 @@ class Tensor {
* begins.
*/
size_t offset_;
#ifdef PADDLE_MOBILE_FPGA
FPGAArgs fpgaArgs_;
#endif
};
#ifdef PADDLE_MOBILE_DEBUG
......
......@@ -39,7 +39,7 @@ void ConcatKernel<FPGA, half>::Compute(const ConcatParam &param) const {
for (int i = 0; i < inputs.size(); ++i) {
auto input = inputs[i];
auto channels = input[3];
auto channels = input->dims()[3];
out_offset += channels;
auto src = input->data<half>();
for (int j = 0; j < pixels; ++j) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册