提交 b55c8914 编写于 作者: Z zhangyang

add split and transpose kernels for FPGA track

上级 777e7fd4
...@@ -111,6 +111,27 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out, ...@@ -111,6 +111,27 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t)); fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t));
} }
void split_image(int16_t *image_in, float *scale_in, void **images_out,
float **scales_out, int image_num, uint32_t *channel_nums,
int height, int width) {
int total_channel = 0;
for (int i = 0; i < image_num; i++) {
scales_out[i][0] = scale_in[0];
scales_out[i][1] = scale_in[1];
total_channel += channel_nums[i];
}
for (int h = 0; h < height; h++) {
int src_offset = h * align_to_x(total_channel * width, IMAGE_ALIGNMENT);
for (int i = 0; i < image_num; i++) {
int des_offset = h * align_to_x(channel_nums[i] * width, IMAGE_ALIGNMENT);
memcpy((int16_t *)images_out[i] + des_offset, image_in + src_offset,
channel_nums[i] * sizeof(int16_t));
src_offset += channel_nums[i];
}
}
}
} // namespace image } // namespace image
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -28,6 +28,9 @@ void concat_images(int16_t** images_in, float** scales_in, void* image_out, ...@@ -28,6 +28,9 @@ void concat_images(int16_t** images_in, float** scales_in, void* image_out,
float* scale_out, int image_num, uint32_t* channel_num, float* scale_out, int image_num, uint32_t* channel_num,
int height, int height,
int width); // Concat featuremaps along channel direction int width); // Concat featuremaps along channel direction
void split_image(int16_t* image_in, float* scale_in, void** images_out,
float** scales_out, int image_num, uint32_t* channel_nums,
int height, int width);
} // namespace image } // namespace image
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -138,13 +138,11 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) { ...@@ -138,13 +138,11 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) {
DLOG << "=============ComputeFpgaConcat==========="; DLOG << "=============ComputeFpgaConcat===========";
DLOG << " Image_num: " << args.image_num DLOG << " Image_num: " << args.image_num
<< " out_address:" << args.image_out << " out_address:" << args.image_out
<< " out_scale_address:" << args.scale_out << " out_scale_address:" << args.scale_out;
<< " out_channel:" << args.out_channel;
DLOG << " image_height:" << args.height << " image_width:" << args.width; DLOG << " image_height:" << args.height << " image_width:" << args.width;
for (int i = 0; i < args.image_num; i++) { for (int i = 0; i < args.image_num; i++) {
DLOG << " " << i << "th: "; DLOG << " " << i << "th: ";
DLOG << " channel_num:" << args.channel_num[i] DLOG << " channel_num:" << args.channel_num[i]
<< " aligned_channel_num:" << args.aligned_channel_num[i]
<< " image_address:" << args.images_in[i] << " image_address:" << args.images_in[i]
<< " image_scale_address:" << args.scales_in[i]; << " image_scale_address:" << args.scales_in[i];
} }
...@@ -156,5 +154,25 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) { ...@@ -156,5 +154,25 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) {
return 0; return 0;
} }
int ComputeFPGASplit(const struct SplitArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaSplit===========";
DLOG << " Image_num: " << args.image_num
<< " in_address:" << args.image_in
<< " in_scale_address:" << args.scale_in;
DLOG << " image_height:" << args.height << " image_width:" << args.width;
for (int i = 0; i < args.image_num; i++) {
DLOG << " " << i << "th: ";
DLOG << " channel_num:" << args.out_channel_nums[i]
<< " image_address:" << args.images_out[i]
<< " image_scale_address:" << args.scales_out[i];
}
#endif
image::split_image(args.image_in, args.scale_in, args.images_out,
args.scales_out, args.image_num, args.out_channel_nums,
args.height, args.width);
return 0;
}
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -74,8 +74,19 @@ struct ConcatArgs { ...@@ -74,8 +74,19 @@ struct ConcatArgs {
void* image_out; void* image_out;
float* scale_out; float* scale_out;
uint32_t* channel_num; uint32_t* channel_num;
uint32_t* aligned_channel_num; // uint32_t* aligned_channel_num;
uint32_t out_channel; // uint32_t out_channel;
uint32_t height;
uint32_t width;
};
struct SplitArgs {
uint32_t image_num;
int16_t* image_in;
float* scale_in;
void** images_out;
float** scales_out;
uint32_t* out_channel_nums;
uint32_t height; uint32_t height;
uint32_t width; uint32_t width;
}; };
......
...@@ -25,6 +25,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs& args); ...@@ -25,6 +25,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs& args);
int ComputeFpgaConv(const struct SplitConvArgs& args); int ComputeFpgaConv(const struct SplitConvArgs& args);
int ComputeFPGAConcat(const struct ConcatArgs& args); int ComputeFPGAConcat(const struct ConcatArgs& args);
int ComputeFPGASplit(const struct SplitArgs& args);
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -19,11 +19,45 @@ limitations under the License. */ ...@@ -19,11 +19,45 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA>* param) { bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA> *param) {
auto *in = const_cast<Tensor *>(param->InputX());
auto outs = param->Outs();
auto sections = param->Sections();
int axis = param->Axis();
PADDLE_MOBILE_ENFORCE(axis == 1, "Only support split in channel dimension");
PADDLE_MOBILE_ENFORCE(outs.size() == sections.size(),
"Output number should be equal to section number");
auto image_num = (uint32_t)outs.size();
auto images_out =
reinterpret_cast<void **>(fpga::fpga_malloc(image_num * sizeof(void *)));
auto scales_out = reinterpret_cast<float **>(
fpga::fpga_malloc(image_num * sizeof(float *)));
auto out_channels = reinterpret_cast<uint32_t *>(
fpga::fpga_malloc(image_num * sizeof(uint32_t)));
for (int i = 0; i < image_num; i++) {
fpga::format_fp16_ofm(outs[i]);
images_out[i] = outs[i]->mutable_data<float>();
scales_out[i] = outs[i]->scale;
out_channels[i] = (uint32_t)sections[i];
}
fpga::SplitArgs arg = {0};
arg.image_num = image_num;
arg.image_in = (half *)in->data<float>();
arg.scale_in = in->scale;
arg.images_out = images_out;
arg.scales_out = scales_out;
arg.out_channel_nums = out_channels;
arg.height = (uint32_t)in->dims()[2];
arg.width = (uint32_t)in->dims()[3];
param->SetFpgaArgs(arg);
return true; return true;
} }
template <> template <>
void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA>& param) {} void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA> &param) {
fpga::ComputeFPGASplit(param.FpgaArgs());
}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -21,6 +21,7 @@ namespace operators { ...@@ -21,6 +21,7 @@ namespace operators {
template <> template <>
bool Transpose2Kernel<FPGA, float>::Init(Transpose2Param<FPGA> *param) { bool Transpose2Kernel<FPGA, float>::Init(Transpose2Param<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX());
return true; return true;
} }
......
...@@ -2421,6 +2421,15 @@ class SplitParam : public OpParam { ...@@ -2421,6 +2421,15 @@ class SplitParam : public OpParam {
int num; int num;
std::vector<int> sections; std::vector<int> sections;
// std::vector<GType> out_ts_; // std::vector<GType> out_ts_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::SplitArgs fpga_split_args;
public:
const fpga::SplitArgs &FpgaArgs() const { return fpga_split_args; }
void SetFpgaArgs(const fpga::SplitArgs &args) { fpga_split_args = args; }
#endif
}; };
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册