提交 31b578e0 编写于 作者: Z zhangyang

correct concat bug for FPGA track

上级 766672cc
...@@ -86,14 +86,14 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -86,14 +86,14 @@ void fpga_copy(void *dest, const void *src, size_t num) {
} }
int fpga_flush(void *address, size_t size) { int fpga_flush(void *address, size_t size) {
struct MemoryCacheArgs args; struct MemoryCacheArgs args = {nullptr};
args.address = address; args.address = address;
args.size = size; args.size = size;
return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args); return do_ioctl(IOCTL_MEMCACHE_FLUSH, &args);
} }
int fpga_invalidate(void *address, size_t size) { int fpga_invalidate(void *address, size_t size) {
struct MemoryCacheArgs args; struct MemoryCacheArgs args = {nullptr};
args.address = address; args.address = address;
args.size = size; args.size = size;
return do_ioctl(IOCTL_MEMCACHE_INVAL, &args); return do_ioctl(IOCTL_MEMCACHE_INVAL, &args);
...@@ -332,7 +332,7 @@ void format_concat_output(framework::Tensor *out, int height, int width, ...@@ -332,7 +332,7 @@ void format_concat_output(framework::Tensor *out, int height, int width,
sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT); sum_cw = align_to_x(width * sum_channel, IMAGE_ALIGNMENT);
auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half)); auto data_ptr = fpga_malloc(height * sum_cw * sizeof(half));
auto ddim = framework::make_ddim({-1, sum_channel, height, width}); auto ddim = framework::make_ddim({1, sum_channel, height, width});
out->Resize(ddim); out->Resize(ddim);
out->reset_data_ptr(data_ptr); out->reset_data_ptr(data_ptr);
} }
......
...@@ -74,15 +74,17 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, ...@@ -74,15 +74,17 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
int align_each_in_area_cw = 0; int align_each_in_area_cw = 0;
int align_each_out_area_cw_differ = 0; int align_each_out_area_cw_differ = 0;
int tmp_channel = 0; int tmp_channel = 0;
*scale_out = 0; scale_out[0] = 0.0;
scale_out[1] = 0.0;
for (i = 0; i < image_num; i++) { for (i = 0; i < image_num; i++) {
each_out_line_channel += channel_num[i]; each_out_line_channel += channel_num[i];
*scale_out = std::max(*scale_out, scales_in[i][0]); scale_out[0] = std::max(*scale_out, scales_in[i][0]);
fpga_invalidate(images_in[i], fpga_invalidate(images_in[i],
height * height *
align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) * align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
sizeof(int16_t)); sizeof(int16_t));
} }
scale_out[1] = 1 / scale_out[0];
align_each_out_area_cw = align_each_out_area_cw =
align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT); align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
align_each_out_area_cw_differ = align_each_out_area_cw_differ =
......
...@@ -55,7 +55,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -55,7 +55,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
Tensor *output = param_.Out(); Tensor *output = param_.Out();
auto output_ptr = output->data<half>(); auto output_ptr = output->data<half>();
fpga::BypassArgs args; fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
args.input_data_type = fpga::DATA_TYPE_FP32; args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16;
......
...@@ -43,7 +43,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) { ...@@ -43,7 +43,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
fpga::format_concat_output(out, (int)height, (int)width, (int)image_num, fpga::format_concat_output(out, (int)height, (int)width, (int)image_num,
channel_num); channel_num);
fpga::ConcatArgs concatArgs; fpga::ConcatArgs concatArgs = {0};
concatArgs.image_num = (uint32_t)image_num; concatArgs.image_num = (uint32_t)image_num;
concatArgs.images_in = images_in; concatArgs.images_in = images_in;
concatArgs.scales_in = scales_in; concatArgs.scales_in = scales_in;
......
...@@ -66,7 +66,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -66,7 +66,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -64,7 +64,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -64,7 +64,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -46,7 +46,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -46,7 +46,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -58,7 +58,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -58,7 +58,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0], param->Strides()[1], param->Groups(), param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
......
...@@ -30,7 +30,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -30,7 +30,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.const0 = 1; ewaddArgs.const0 = 1;
ewaddArgs.const1 = 1; ewaddArgs.const1 = 1;
......
...@@ -51,7 +51,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -51,7 +51,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr); 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
......
...@@ -52,7 +52,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -52,7 +52,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::WrapperConvArgs conv_arg; fpga::WrapperConvArgs conv_arg = {0};
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
0, bs_ptr); 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
......
...@@ -30,7 +30,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -30,7 +30,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
fpga::PoolingArgs poolArgs; fpga::PoolingArgs poolArgs = {0};
poolArgs.image.address = input_ptr; poolArgs.image.address = input_ptr;
poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = (uint32_t)input->dims()[2]; poolArgs.image.height = (uint32_t)input->dims()[2];
......
...@@ -29,7 +29,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -29,7 +29,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto float_input = new Tensor(*input); auto float_input = new Tensor(*input);
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW; args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册