diff --git a/src/fpga/V1/image.cpp b/src/fpga/V1/image.cpp index 312af1d00b5f6dfa25f33ce93a25d55577b92818..f4142ad58a273691c84db9dd585518e7edcff8a6 100644 --- a/src/fpga/V1/image.cpp +++ b/src/fpga/V1/image.cpp @@ -111,6 +111,27 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t)); } +void split_image(int16_t *image_in, float *scale_in, void **images_out, + float **scales_out, int image_num, uint32_t *channel_nums, + int height, int width) { + int total_channel = 0; + for (int i = 0; i < image_num; i++) { + scales_out[i][0] = scale_in[0]; + scales_out[i][1] = scale_in[1]; + total_channel += channel_nums[i]; + } + + for (int h = 0; h < height; h++) { + int src_offset = h * align_to_x(total_channel * width, IMAGE_ALIGNMENT); + for (int i = 0; i < image_num; i++) { + int des_offset = h * align_to_x(channel_nums[i] * width, IMAGE_ALIGNMENT); + memcpy((int16_t *)images_out[i] + des_offset, image_in + src_offset, + channel_nums[i] * sizeof(int16_t)); + src_offset += channel_nums[i]; + } + } +} + } // namespace image } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/image.h b/src/fpga/V1/image.h index 7e004916118ae97d60d24e798300d66a98191211..321967bbe233c5bec889aeb63f98dc23779b4918 100644 --- a/src/fpga/V1/image.h +++ b/src/fpga/V1/image.h @@ -28,6 +28,9 @@ void concat_images(int16_t** images_in, float** scales_in, void* image_out, float* scale_out, int image_num, uint32_t* channel_num, int height, int width); // Concat featuremaps along channel direction +void split_image(int16_t* image_in, float* scale_in, void** images_out, + float** scales_out, int image_num, uint32_t* channel_nums, + int height, int width); } // namespace image } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index 9adea7e0962243d46fa6060b4deae6df371567c8..1f0e5768a7017a4c7f928fea86b8f4ef3cdbae3d 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -138,13 +138,11 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) { DLOG << "=============ComputeFpgaConcat==========="; DLOG << " Image_num: " << args.image_num << " out_address:" << args.image_out - << " out_scale_address:" << args.scale_out - << " out_channel:" << args.out_channel; + << " out_scale_address:" << args.scale_out; DLOG << " image_height:" << args.height << " image_width:" << args.width; for (int i = 0; i < args.image_num; i++) { DLOG << " " << i << "th: "; DLOG << " channel_num:" << args.channel_num[i] - << " aligned_channel_num:" << args.aligned_channel_num[i] << " image_address:" << args.images_in[i] << " image_scale_address:" << args.scales_in[i]; } @@ -156,5 +154,25 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) { return 0; } +int ComputeFPGASplit(const struct SplitArgs &args) { +#ifdef FPGA_PRINT_MODE + DLOG << "=============ComputeFpgaSplit==========="; + DLOG << " Image_num: " << args.image_num + << " in_address:" << args.image_in + << " in_scale_address:" << args.scale_in; + DLOG << " image_height:" << args.height << " image_width:" << args.width; + for (int i = 0; i < args.image_num; i++) { + DLOG << " " << i << "th: "; + DLOG << " channel_num:" << args.out_channel_nums[i] + << " image_address:" << args.images_out[i] + << " image_scale_address:" << args.scales_out[i]; + } +#endif + image::split_image(args.image_in, args.scale_in, args.images_out, + args.scales_out, args.image_num, args.out_channel_nums, + args.height, args.width); + return 0; +} + } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h index 430014ef654ec2f00eeb2548012e4ae716f4aa8b..b3f619f2f24aba47d99f7f427c4b67af8c0d430d 100644 --- a/src/fpga/common/fpga_common.h +++ b/src/fpga/common/fpga_common.h @@ -74,8 +74,19 @@ struct ConcatArgs { void* image_out; float* scale_out; uint32_t* channel_num; - uint32_t* aligned_channel_num; - uint32_t out_channel; + // uint32_t* aligned_channel_num; + // uint32_t out_channel; + uint32_t height; + uint32_t width; +}; + +struct SplitArgs { + uint32_t image_num; + int16_t* image_in; + float* scale_in; + void** images_out; + float** scales_out; + uint32_t* out_channel_nums; uint32_t height; uint32_t width; }; diff --git a/src/fpga/common/pe.h b/src/fpga/common/pe.h index 0da13b8396b7f6a7960dfbb36337f3b38c7ac865..ae773f25b4171df3e552aaa07bb05af8564d872a 100644 --- a/src/fpga/common/pe.h +++ b/src/fpga/common/pe.h @@ -25,6 +25,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs& args); int ComputeFpgaConv(const struct SplitConvArgs& args); int ComputeFPGAConcat(const struct ConcatArgs& args); +int ComputeFPGASplit(const struct SplitArgs& args); } // namespace fpga } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/split_kernel.cpp b/src/operators/kernel/fpga/V1/split_kernel.cpp index faa1da9186d2a74961450925dea6e3d0f98856bc..b8c0bb3be64d2393b61b0f82375c695000f52b65 100644 --- a/src/operators/kernel/fpga/V1/split_kernel.cpp +++ b/src/operators/kernel/fpga/V1/split_kernel.cpp @@ -19,11 +19,45 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { template <> -bool SplitKernel::Init(SplitParam* param) { +bool SplitKernel::Init(SplitParam *param) { + auto *in = const_cast(param->InputX()); + auto outs = param->Outs(); + auto sections = param->Sections(); + int axis = param->Axis(); + PADDLE_MOBILE_ENFORCE(axis == 1, "Only support split in channel dimension"); + PADDLE_MOBILE_ENFORCE(outs.size() == sections.size(), + "Output number should be equal to section number"); + auto image_num = (uint32_t)outs.size(); + auto images_out = + reinterpret_cast(fpga::fpga_malloc(image_num * sizeof(void *))); + auto scales_out = reinterpret_cast( + fpga::fpga_malloc(image_num * sizeof(float *))); + auto out_channels = reinterpret_cast( + fpga::fpga_malloc(image_num * sizeof(uint32_t))); + for (int i = 0; i < image_num; i++) { + fpga::format_fp16_ofm(outs[i]); + images_out[i] = outs[i]->mutable_data(); + scales_out[i] = outs[i]->scale; + out_channels[i] = (uint32_t)sections[i]; + } + + fpga::SplitArgs arg = {0}; + arg.image_num = image_num; + arg.image_in = (half *)in->data(); + arg.scale_in = in->scale; + arg.images_out = images_out; + arg.scales_out = scales_out; + arg.out_channel_nums = out_channels; + arg.height = (uint32_t)in->dims()[2]; + arg.width = (uint32_t)in->dims()[3]; + + param->SetFpgaArgs(arg); return true; } template <> -void SplitKernel::Compute(const SplitParam& param) {} +void SplitKernel::Compute(const SplitParam ¶m) { + fpga::ComputeFPGASplit(param.FpgaArgs()); +} } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp index 585cc52947fa5de991fee446ba3c0098ae99d0af..4505401f434c320003e8122a3a0e197441ae8921 100644 --- a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp +++ b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp @@ -21,6 +21,7 @@ namespace operators { template <> bool Transpose2Kernel::Init(Transpose2Param *param) { + param->Out()->ShareDataWith(*param->InputX()); return true; } diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 5a2305876bae2f53327b0ae75cc498ff585ad4f0..fe5cce379d199be5d3931308513823c7279c21ff 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -2421,6 +2421,15 @@ class SplitParam : public OpParam { int num; std::vector sections; // std::vector out_ts_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::SplitArgs fpga_split_args; + + public: + const fpga::SplitArgs &FpgaArgs() const { return fpga_split_args; } + void SetFpgaArgs(const fpga::SplitArgs &args) { fpga_split_args = args; } +#endif }; #endif