diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 42e99f4e4238d6974d23c1fb33bf238ca8a8626d..3192b4780784a755f9fd3dcae9d4d687f8276400 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -31,90 +31,109 @@ void* fpga_malloc(size_t size); void fpga_free(void* ptr); void fpga_copy(void* dst, const void* src, size_t num); -struct FpgaVersionArgs { - void* buf; -}; - -struct MemoryToPhysicalArgs { - const void* src; - uint64_t physical; +struct VersionArgs { + void* buffer; }; struct MemoryCopyArgs { void* src; - void* dst; + void* dest; size_t size; }; -struct FpgaQuantArgs { - float scale; -}; - -struct FpgaBNArgs { - bool enabled = false; - void* bias_addr; - void* scale_addr; +struct BNArgs { + bool enabled; + void* bias_address; + void* scale_address; }; -struct FpgaKernelArgs { +/** +Conv and Pooling kernel +*/ +struct KernelArgs { uint32_t width; uint32_t height; - uint32_t stride_h; uint32_t stride_w; + uint32_t stride_h; }; -struct FpgaImageArgs { - uint32_t width; - uint32_t height; +struct ImageInputArgs { + void* address; // input featuremap virtual address uint32_t channels; - uint32_t pad_h; - uint32_t pad_w; + uint32_t width; // featuremap width + uint32_t height; + uint32_t pad_width; // padding width; + uint32_t pad_height; +}; + +struct ImageOutputArgs { + void* address; // output result address; + float* scale_address; // output scale address; }; -struct FpgaConvArgs { +struct ConvArgs { bool relu_enabled; - struct FpgaBNArgs BNargs; - void* image_addr; - void* filter_addr; - void* bias_addr; - void* output_addr; - float quant_scale; - struct FpgaImageArgs image; + float scale; // input scale; + void* bias_address; + void* filter_address; uint32_t filter_num; uint32_t group_num; - struct FpgaKernelArgs kernel; + struct BNArgs bn; + struct ImageInputArgs image; // input image; + struct KernelArgs kernel; }; -struct FpgaPoolArgs { - void* image_addr; - void* output_addr; - struct FpgaImageArgs image; - struct FpgaKernelArgs kernel; +struct PoolingArgs { + float scale; + struct ImageInputArgs image; // input image; + struct ImageOutputArgs output; + struct KernelArgs kernel; }; -struct FpgaEWAddArgs { +// elementwise add arguments +struct EWAddArgs { bool relu_enabled; - void* image0_addr; - void* image1_addr; - void* result_addr; - uint32_t const0; - uint32_t const1; - uint32_t data_len; // aligned element count + float scale; + + float const0; // output0 = const0 x input0 + const1 x input1; + float const1; + struct ImageInputArgs image0; + struct ImageInputArgs image1; + struct ImageOutputArgs output; }; +struct FpgaRegWriteArgs { + uint64_t address; // + uint64_t value; +}; + +struct FpgaRegReadArgs { + uint64_t address; + uint64_t value; +}; + +#define IOCTL_FPGA_MAGIC 'FPGA' + +#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) +#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs) +#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs) + +#define IOCTL_SEPARATOR_0 10 + +#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) + +#define IOCTL_SEPARATOR_1 20 + +#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) +#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) +#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) + +//============================== API ============================= + int ComputeFpgaConv(struct FpgaConvArgs args); int ComputeFpgaPool(struct FpgaPoolArgs args); int ComputeFpgaEWAdd(struct FpgaEWAddArgs args); -#define IOCTL_FPGA_MAGIC 'CNN' -#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs) -#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs) -#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs) -#define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) -#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs) -#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) -#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs) - } // namespace fpga } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 954a65a3605c4d0204890d9414aeb074371b0d69..388788216fe45b66441a0390e2ef09c2d51c16dc 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -253,6 +253,18 @@ class Tensor { "Tensor's dims_ is out of bound. "); } +#ifdef PADDLE_MOBILE_FPGA + struct FPGAArgs { + float scale; + + inline float *scale_pointer() { return &scale; } + }; + + struct &fpga_args() const { + return fpgaArgs_; + } +#endif + private: /** * @note Placeholder hides type T, so it doesn't appear as a @@ -319,6 +331,10 @@ class Tensor { * begins. */ size_t offset_; + +#ifdef PADDLE_MOBILE_FPGA + FPGAArgs fpgaArgs_; +#endif }; #ifdef PADDLE_MOBILE_DEBUG diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp index c691988f4a388c7835a7016602d7a1ac9cb5f9b6..627a94242ca4638640a7961120b36c9f763a0e85 100644 --- a/src/operators/kernel/fpga/concat_kernel.cpp +++ b/src/operators/kernel/fpga/concat_kernel.cpp @@ -39,7 +39,7 @@ void ConcatKernel::Compute(const ConcatParam ¶m) const { for (int i = 0; i < inputs.size(); ++i) { auto input = inputs[i]; - auto channels = input[3]; + auto channels = input->dims()[3]; out_offset += channels; auto src = input->data(); for (int j = 0; j < pixels; ++j) {