提交 ae826b38 编写于 作者: Z zhangyang

fix bugs for FPGA track

上级 f9a22863
......@@ -22,7 +22,7 @@ limitations under the License. */
#include "filter.h"
#include "image.h"
#define FPGA_TEST_MODE
#define PADDLE_MOBILE_OS_LINUX
//#define PADDLE_MOBILE_OS_LINUX
namespace paddle_mobile {
namespace fpga {
......@@ -59,8 +59,8 @@ void *fpga_malloc(size_t size) {
#endif
counter += size;
memory_map.insert(std::make_pair(ptr, size));
DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
<< counter << " bytes";
// DLOG << "Address: " << ptr << ", " << size << " bytes allocated. Total "
// << counter << " bytes";
return ptr;
}
......@@ -78,8 +78,8 @@ void fpga_free(void *ptr) {
free(ptr);
#endif
counter += size;
DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
<< counter << " bytes";
// DLOG << "Address: " << ptr << ", " << size << " bytes freed. Total "
// << counter << " bytes";
} else {
DLOG << "Invalid pointer";
}
......@@ -103,6 +103,27 @@ int fpga_invalidate(void *address, size_t size) {
return do_ioctl(IOCTL_MEMCACHE_INVAL, &args);
}
half fp32_2_fp16(float fp32_num) {
unsigned long tmp = *(unsigned long *)(&fp32_num);
half t = ((tmp & 0x007fffff) >> 13) | ((tmp & 0x80000000) >> 16) |
(((tmp & 0x7f800000) >> 13) - (112 << 10));
if (tmp & 0x1000) {
t++; // roundoff
}
return t;
}
float fp16_2_fp32(half fp16_num) {
int frac = (fp16_num & 0x3ff);
int exp = ((fp16_num & 0x7c00) >> 10) + 112;
int s = fp16_num & 0x8000;
int tmp = 0;
float fp32_num;
tmp = s << 16 | exp << 23 | frac << 13;
fp32_num = *(float *)&tmp;
return fp32_num;
}
int ComputeBasicConv(const struct ConvArgs &args) {
DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled
......@@ -148,6 +169,8 @@ int ComputeFpgaConv(const struct WrapperConvArgs &args) {
int ComputeFpgaPool(const struct PoolingArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << "=============ComputeFpgaPool===========";
DLOG << " mode:" << args.mode
<< " kernel_reciprocal:" << fp16_2_fp32(args.kernel_reciprocal);
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
......
......@@ -99,6 +99,8 @@ struct WrapperConvArgs {
};
struct PoolingArgs {
int16_t mode; // mode: 0:max, 1:avg
half kernel_reciprocal;
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
......@@ -107,8 +109,8 @@ struct PoolingArgs {
struct EWAddArgs {
bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1;
half const0; // output0 = const0 x input0 + const1 x input1;
half const1;
struct ImageInputArgs image0;
struct ImageInputArgs image1;
struct ImageOutputArgs output;
......@@ -222,5 +224,8 @@ void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input,
bool relu_enabled, int group_num, int stride_h, int stride_w,
int padding_h, int padding_w, float* bs_ptr);
half fp32_2_fp16(float fp32_num);
float fp16_2_fp32(half fp16_num);
} // namespace fpga
} // namespace paddle_mobile
......@@ -83,14 +83,24 @@ float find_max(float *data_in, int data_size) {
return max;
}
signed char float_to_int8(float fdata) {
if (fdata < 0.0) {
fdata -= 0.5;
} else {
fdata += 0.5;
}
return (signed char)fdata;
}
void quantize(float **data_in, int data_size, float max) {
float *tmp = *data_in;
float fix_range = 127;
float scale = fix_range / max;
char *tmp_data = (char *)fpga_malloc(data_size * sizeof(char));
signed char *tmp_data = (signed char *)fpga_malloc(data_size * sizeof(char));
for (int i = 0; i < data_size; i++) {
tmp_data[i] = (char)((*data_in)[i] * scale);
tmp_data[i] = float_to_int8(
(*data_in)[i] * scale); // (signed char)((*data_in)[i] * scale);
}
*data_in = (float *)tmp_data;
fpga_free(tmp);
......
......@@ -32,8 +32,8 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.const0 = 1;
ewaddArgs.const1 = 1;
ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1];
ewaddArgs.image0.scale_address = input_x->scale;
......
......@@ -29,8 +29,12 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings();
std::string pooling_type = param->PoolingType();
fpga::PoolingArgs poolArgs = {0};
poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1
poolArgs.kernel_reciprocal =
fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1])));
poolArgs.image.address = input_ptr;
poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = (uint32_t)input->dims()[2];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册