diff --git a/src/fpga/V1/image.cpp b/src/fpga/V1/image.cpp
index 312af1d00b5f6dfa25f33ce93a25d55577b92818..f4142ad58a273691c84db9dd585518e7edcff8a6 100644
--- a/src/fpga/V1/image.cpp
+++ b/src/fpga/V1/image.cpp
@@ -111,6 +111,27 @@ void concat_images(int16_t **images_in, float **scales_in, void *image_out,
   fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t));
 }
 
+void split_image(int16_t *image_in, float *scale_in, void **images_out,
+                 float **scales_out, int image_num, uint32_t *channel_nums,
+                 int height, int width) {
+  int total_channel = 0;
+  for (int i = 0; i < image_num; i++) {
+    scales_out[i][0] = scale_in[0];
+    scales_out[i][1] = scale_in[1];
+    total_channel += channel_nums[i];
+  }
+
+  for (int h = 0; h < height; h++) {
+    int src_offset = h * align_to_x(total_channel * width, IMAGE_ALIGNMENT);
+    for (int i = 0; i < image_num; i++) {
+      int des_offset = h * align_to_x(channel_nums[i] * width, IMAGE_ALIGNMENT);
+      memcpy((int16_t *)images_out[i] + des_offset, image_in + src_offset,
+             channel_nums[i] * sizeof(int16_t));
+      src_offset += channel_nums[i];
+    }
+  }
+}
+
 }  // namespace image
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/V1/image.h b/src/fpga/V1/image.h
index 7e004916118ae97d60d24e798300d66a98191211..321967bbe233c5bec889aeb63f98dc23779b4918 100644
--- a/src/fpga/V1/image.h
+++ b/src/fpga/V1/image.h
@@ -28,6 +28,9 @@ void concat_images(int16_t** images_in, float** scales_in, void* image_out,
                    float* scale_out, int image_num, uint32_t* channel_num,
                    int height,
                    int width);  // Concat featuremaps along channel direction
+void split_image(int16_t* image_in, float* scale_in, void** images_out,
+                 float** scales_out, int image_num, uint32_t* channel_nums,
+                 int height, int width);
 }  // namespace image
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp
index 9adea7e0962243d46fa6060b4deae6df371567c8..1f0e5768a7017a4c7f928fea86b8f4ef3cdbae3d 100644
--- a/src/fpga/V1/pe.cpp
+++ b/src/fpga/V1/pe.cpp
@@ -138,13 +138,11 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) {
   DLOG << "=============ComputeFpgaConcat===========";
   DLOG << "   Image_num: " << args.image_num
        << "   out_address:" << args.image_out
-       << "   out_scale_address:" << args.scale_out
-       << "   out_channel:" << args.out_channel;
+       << "   out_scale_address:" << args.scale_out;
   DLOG << "   image_height:" << args.height << "   image_width:" << args.width;
   for (int i = 0; i < args.image_num; i++) {
     DLOG << "   " << i << "th:        ";
     DLOG << "   channel_num:" << args.channel_num[i]
-         << "   aligned_channel_num:" << args.aligned_channel_num[i]
          << "   image_address:" << args.images_in[i]
          << "   image_scale_address:" << args.scales_in[i];
   }
@@ -156,5 +154,25 @@ int ComputeFPGAConcat(const struct ConcatArgs &args) {
   return 0;
 }
 
+int ComputeFPGASplit(const struct SplitArgs &args) {
+#ifdef FPGA_PRINT_MODE
+  DLOG << "=============ComputeFpgaSplit===========";
+  DLOG << "   Image_num: " << args.image_num
+       << "   in_address:" << args.image_in
+       << "   in_scale_address:" << args.scale_in;
+  DLOG << "   image_height:" << args.height << "   image_width:" << args.width;
+  for (int i = 0; i < args.image_num; i++) {
+    DLOG << "   " << i << "th:        ";
+    DLOG << "   channel_num:" << args.out_channel_nums[i]
+         << "   image_address:" << args.images_out[i]
+         << "   image_scale_address:" << args.scales_out[i];
+  }
+#endif
+  image::split_image(args.image_in, args.scale_in, args.images_out,
+                     args.scales_out, args.image_num, args.out_channel_nums,
+                     args.height, args.width);
+  return 0;
+}
+
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h
index 430014ef654ec2f00eeb2548012e4ae716f4aa8b..b3f619f2f24aba47d99f7f427c4b67af8c0d430d 100644
--- a/src/fpga/common/fpga_common.h
+++ b/src/fpga/common/fpga_common.h
@@ -74,8 +74,19 @@ struct ConcatArgs {
   void* image_out;
   float* scale_out;
   uint32_t* channel_num;
-  uint32_t* aligned_channel_num;
-  uint32_t out_channel;
+  //  uint32_t* aligned_channel_num;
+  //  uint32_t out_channel;
+  uint32_t height;
+  uint32_t width;
+};
+
+struct SplitArgs {
+  uint32_t image_num;
+  int16_t* image_in;
+  float* scale_in;
+  void** images_out;
+  float** scales_out;
+  uint32_t* out_channel_nums;
   uint32_t height;
   uint32_t width;
 };
diff --git a/src/fpga/common/pe.h b/src/fpga/common/pe.h
index 0da13b8396b7f6a7960dfbb36337f3b38c7ac865..ae773f25b4171df3e552aaa07bb05af8564d872a 100644
--- a/src/fpga/common/pe.h
+++ b/src/fpga/common/pe.h
@@ -25,6 +25,7 @@ int ComputeFpgaEWAdd(const struct EWAddArgs& args);
 
 int ComputeFpgaConv(const struct SplitConvArgs& args);
 int ComputeFPGAConcat(const struct ConcatArgs& args);
+int ComputeFPGASplit(const struct SplitArgs& args);
 
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/V1/split_kernel.cpp b/src/operators/kernel/fpga/V1/split_kernel.cpp
index faa1da9186d2a74961450925dea6e3d0f98856bc..b8c0bb3be64d2393b61b0f82375c695000f52b65 100644
--- a/src/operators/kernel/fpga/V1/split_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/split_kernel.cpp
@@ -19,11 +19,45 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 template <>
-bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA>* param) {
+bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA> *param) {
+  auto *in = const_cast<Tensor *>(param->InputX());
+  auto outs = param->Outs();
+  auto sections = param->Sections();
+  int axis = param->Axis();
+  PADDLE_MOBILE_ENFORCE(axis == 1, "Only support split in channel dimension");
+  PADDLE_MOBILE_ENFORCE(outs.size() == sections.size(),
+                        "Output number should be equal to section number");
+  auto image_num = (uint32_t)outs.size();
+  auto images_out =
+      reinterpret_cast<void **>(fpga::fpga_malloc(image_num * sizeof(void *)));
+  auto scales_out = reinterpret_cast<float **>(
+      fpga::fpga_malloc(image_num * sizeof(float *)));
+  auto out_channels = reinterpret_cast<uint32_t *>(
+      fpga::fpga_malloc(image_num * sizeof(uint32_t)));
+  for (int i = 0; i < image_num; i++) {
+    fpga::format_fp16_ofm(outs[i]);
+    images_out[i] = outs[i]->mutable_data<float>();
+    scales_out[i] = outs[i]->scale;
+    out_channels[i] = (uint32_t)sections[i];
+  }
+
+  fpga::SplitArgs arg = {0};
+  arg.image_num = image_num;
+  arg.image_in = (half *)in->data<float>();
+  arg.scale_in = in->scale;
+  arg.images_out = images_out;
+  arg.scales_out = scales_out;
+  arg.out_channel_nums = out_channels;
+  arg.height = (uint32_t)in->dims()[2];
+  arg.width = (uint32_t)in->dims()[3];
+
+  param->SetFpgaArgs(arg);
   return true;
 }
 template <>
-void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA>& param) {}
+void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA> &param) {
+  fpga::ComputeFPGASplit(param.FpgaArgs());
+}
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp
index 585cc52947fa5de991fee446ba3c0098ae99d0af..4505401f434c320003e8122a3a0e197441ae8921 100644
--- a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp
@@ -21,6 +21,7 @@ namespace operators {
 
 template <>
 bool Transpose2Kernel<FPGA, float>::Init(Transpose2Param<FPGA> *param) {
+  param->Out()->ShareDataWith(*param->InputX());
   return true;
 }
 
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 5a2305876bae2f53327b0ae75cc498ff585ad4f0..fe5cce379d199be5d3931308513823c7279c21ff 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -2421,6 +2421,15 @@ class SplitParam : public OpParam {
   int num;
   std::vector<int> sections;
   //  std::vector<GType> out_ts_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::SplitArgs fpga_split_args;
+
+ public:
+  const fpga::SplitArgs &FpgaArgs() const { return fpga_split_args; }
+  void SetFpgaArgs(const fpga::SplitArgs &args) { fpga_split_args = args; }
+#endif
 };
 #endif