From ae826b3800c55d6bac555ff0beaad5dc086d0448 Mon Sep 17 00:00:00 2001 From: zhangyang Date: Thu, 11 Oct 2018 15:18:48 +0800 Subject: [PATCH] fix bugs for FPGA track --- src/fpga/api.cpp | 33 ++++++++++++++++--- src/fpga/api.h | 9 +++-- src/fpga/filter.cpp | 14 ++++++-- .../fpga/elementwise_add_relu_kernel.cpp | 4 +-- src/operators/kernel/fpga/pool_kernel.cpp | 4 +++ 5 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index f10aee5014..32930b29b2 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -22,7 +22,7 @@ limitations under the License. */ #include "filter.h" #include "image.h" #define FPGA_TEST_MODE -#define PADDLE_MOBILE_OS_LINUX +//#define PADDLE_MOBILE_OS_LINUX namespace paddle_mobile { namespace fpga { @@ -59,8 +59,8 @@ void *fpga_malloc(size_t size) { #endif counter += size; memory_map.insert(std::make_pair(ptr, size)); - DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total " - << counter << " bytes"; + // DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total " + // << counter << " bytes"; return ptr; } @@ -78,8 +78,8 @@ void fpga_free(void *ptr) { free(ptr); #endif counter += size; - DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total " - << counter << " bytes"; + // DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total " + // << counter << " bytes"; } else { DLOG << "Invalid pointer"; } @@ -103,6 +103,27 @@ int fpga_invalidate(void *address, size_t size) { return do_ioctl(IOCTL_MEMCACHE_INVAL, &args); } +half fp32_2_fp16(float fp32_num) { + unsigned long tmp = *(unsigned long *)(&fp32_num); + half t = ((tmp & 0x007fffff) >> 13) | ((tmp & 0x80000000) >> 16) | + (((tmp & 0x7f800000) >> 13) - (112 << 10)); + if (tmp & 0x1000) { + t++; // roundoff + } + return t; +} + +float fp16_2_fp32(half fp16_num) { + int frac = (fp16_num & 0x3ff); + int exp = ((fp16_num & 0x7c00) >> 10) + 112; + int s = fp16_num & 0x8000; + int tmp = 0; + float fp32_num; + tmp = s << 16 | exp << 23 | frac << 13; + fp32_num = *(float *)&tmp; + return fp32_num; +} + int ComputeBasicConv(const struct ConvArgs &args) { DLOG << "======Compute Basic Conv======"; DLOG << " relu_enabled:" << args.relu_enabled @@ -148,6 +169,8 @@ int ComputeFpgaConv(const struct WrapperConvArgs &args) { int ComputeFpgaPool(const struct PoolingArgs &args) { #ifdef FPGA_TEST_MODE DLOG << "=============ComputeFpgaPool==========="; + DLOG << " mode:" << args.mode + << " kernel_reciprocal:" << fp16_2_fp32(args.kernel_reciprocal); DLOG << " image_address:" << args.image.address << " image_scale_address:" << args.image.scale_address << " image_channels:" << args.image.channels diff --git a/src/fpga/api.h b/src/fpga/api.h index f5fa05b675..d180959623 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -99,6 +99,8 @@ struct WrapperConvArgs { }; struct PoolingArgs { + int16_t mode; // mode: 0:max, 1:avg + half kernel_reciprocal; struct KernelArgs kernel; struct ImageInputArgs image; // input image; struct ImageOutputArgs output; @@ -107,8 +109,8 @@ struct PoolingArgs { struct EWAddArgs { bool relu_enabled; - uint32_t const0; // output0 = const0 x input0 + const1 x input1; - uint32_t const1; + half const0; // output0 = const0 x input0 + const1 x input1; + half const1; struct ImageInputArgs image0; struct ImageInputArgs image1; struct ImageOutputArgs output; @@ -222,5 +224,8 @@ void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input, bool relu_enabled, int group_num, int stride_h, int stride_w, int padding_h, int padding_w, float* bs_ptr); +half fp32_2_fp16(float fp32_num); +float fp16_2_fp32(half fp16_num); + } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/filter.cpp b/src/fpga/filter.cpp index 3b09ede10d..5f74fe1ae6 100644 --- a/src/fpga/filter.cpp +++ b/src/fpga/filter.cpp @@ -83,14 +83,24 @@ float find_max(float *data_in, int data_size) { return max; } +signed char float_to_int8(float fdata) { + if (fdata < 0.0) { + fdata -= 0.5; + } else { + fdata += 0.5; + } + return (signed char)fdata; +} + void quantize(float **data_in, int data_size, float max) { float *tmp = *data_in; float fix_range = 127; float scale = fix_range / max; - char *tmp_data = (char *)fpga_malloc(data_size * sizeof(char)); + signed char *tmp_data = (signed char *)fpga_malloc(data_size * sizeof(char)); for (int i = 0; i < data_size; i++) { - tmp_data[i] = (char)((*data_in)[i] * scale); + tmp_data[i] = float_to_int8( + (*data_in)[i] * scale); // (signed char)((*data_in)[i] * scale); } *data_in = (float *)tmp_data; fpga_free(tmp); diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index f0d8533641..b592dd6d59 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -32,8 +32,8 @@ bool ElementwiseAddReluKernel::Init( fpga::EWAddArgs ewaddArgs = {0}; ewaddArgs.relu_enabled = relu_enabled; - ewaddArgs.const0 = 1; - ewaddArgs.const1 = 1; + ewaddArgs.const0 = 0x3c00; // =1 + ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1]; ewaddArgs.image0.scale_address = input_x->scale; diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index 4dad2f789b..6269506836 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -29,8 +29,12 @@ bool PoolKernel::Init(PoolParam *param) { vector ksize = param->Ksize(); vector strides = param->Strides(); vector paddings = param->Paddings(); + std::string pooling_type = param->PoolingType(); fpga::PoolingArgs poolArgs = {0}; + poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1 + poolArgs.kernel_reciprocal = + fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1]))); poolArgs.image.address = input_ptr; poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.height = (uint32_t)input->dims()[2]; -- GitLab