for

ec3bb70c · liuruilong · c93b7403 · ec3bb70c · ec3bb70c · ec3bb70c
28 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.0)
 project(paddle-mobile)

-option(DEBUGING "enable debug mode" OFF)
+option(DEBUGING "enable debug mode" ON)
 option(USE_OPENMP "openmp support" OFF)
 option(USE_EXCEPTION "use std exception" ON)

+option(CPU "cpu" ON)
+option(MALI_GPU "mali gpu" OFF)
+option(FPGA "fpga" ON)
+
+if (CPU)
+    add_definitions(-DPADDLE_MOBILE_CPU)
+elseif (MALI_GPU)
+    add_definitions(-DPADDLE_MOBILE_MALI_GPU)
+elseif(FPGA)
+    add_definitions(-DPADDLE_MOBILE_FPGA)
+endif()
+
 if (DEBUGING)
    set(CMAKE_BUILD_TYPE Debug)
 else()

--- a/src/framework/op_registry.h
+++ b/src/framework/op_registry.h
@@ -96,24 +96,73 @@ class OpRegistry {
  }
 };

-#define REGISTER_OPERATOR(op_type, op_class)                                \
+#ifdef PADDLE_MOBILE_CPU
+
+#define REGISTER_OPERATOR_CPU(op_type, op_class)                                \
+  template <typename Dtype, typename T>                                     \
+  class _OpClass_##op_type##_cpu : public op_class<Dtype, T> {                 \
+   public:                                                                  \
+    DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_cpu, op_class);                 \
+  };                                                                        \
+  static paddle_mobile::framework::OperatorRegistrar<                       \
+      paddle_mobile::CPU, _OpClass_##op_type##_cpu<paddle_mobile::CPU, float>> \
+      __op_registrar_##op_type##__cpu(#op_type);                               \
+  int TouchOpRegistrar_##op_type##_cpu() {                                        \
+    __op_registrar_##op_type##__cpu.Touch();                                   \
+    return 0;                                                               \
+  }
+
+#define USE_OP_CPU(op_type)                                           \
+  extern int TouchOpRegistrar_##op_type##_cpu();                        \
+  static int use_op_itself_##op_type##_ __attribute__((unused)) = \
+      TouchOpRegistrar_##op_type##_cpu()
+
+#endif
+
+
+#ifdef PADDLE_MOBILE_MALI_GPU
+#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class)                                \
  template <typename Dtype, typename T>                                     \
-  class _OpClass_##op_type##_ : public op_class<Dtype, T> {                 \
+  class _OpClass_##op_type##_mali_gpu : public op_class<Dtype, T> {                 \
   public:                                                                  \
-    DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class);                 \
+    DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_mali_gpu, op_class);                 \
  };                                                                        \
  static paddle_mobile::framework::OperatorRegistrar<                       \
-      paddle_mobile::CPU, _OpClass_##op_type##_<paddle_mobile::CPU, float>> \
-      __op_registrar_##op_type##__(#op_type);                               \
-  int TouchOpRegistrar_##op_type() {                                        \
-    __op_registrar_##op_type##__.Touch();                                   \
+      paddle_mobile::CPU, _OpClass_##op_type##_mali_gpu<paddle_mobile::CPU, float>> \
+      __op_registrar_##op_type##__mali_gpu(#op_type);                               \
+  int TouchOpRegistrar_##op_type##_mali_gpu() {                                        \
+    __op_registrar_##op_type##__mali_gpu.Touch();                                   \
    return 0;                                                               \
  }

-#define USE_OP(op_type)                                           \
-  extern int TouchOpRegistrar_##op_type();                        \
+#define USE_OP_MALI_GPU(op_type)                                           \
+  extern int TouchOpRegistrar_##op_type##_mali_gpu();                        \
  static int use_op_itself_##op_type##_ __attribute__((unused)) = \
-      TouchOpRegistrar_##op_type()
+      TouchOpRegistrar_##op_type##_mali_gpu()
+
+#endif
+
+#ifdef PADDLE_MOBILE_FPGA
+#define REGISTER_OPERATOR_FPGA(op_type, op_class)                                \
+  template <typename Dtype, typename T>                                         \
+  class _OpClass_##op_type##_fpga : public op_class<Dtype, T> {                 \
+   public:                                                                      \
+    DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_fpga, op_class);                 \
+  };                                                                            \
+  static paddle_mobile::framework::OperatorRegistrar<                           \
+      paddle_mobile::CPU, _OpClass_##op_type##_fpga<paddle_mobile::CPU, float>> \
+      __op_registrar_##op_type##__fpga(#op_type);                               \
+  int TouchOpRegistrar_##op_type##_fpga() {                                        \
+    __op_registrar_##op_type##__fpga.Touch();                                   \
+    return 0;                                                               \
+  }
+
+#define USE_OP_FPGA(op_type)                                           \
+  extern int TouchOpRegistrar_##op_type##_fpga();                        \
+  static int use_op_itself_##op_type##_ __attribute__((unused)) = \
+      TouchOpRegistrar_##op_type##_fpga()
+
+#endif

 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -58,7 +58,12 @@ void OperatorBase<Dtype>::Run() const {
 }

 template class OperatorBase<CPU>;
+template class OperatorBase<FPGA>;
+template class OperatorBase<GPU_MALI>;
+
 template class OperatorWithKernel<CPU>;
+template class OperatorWithKernel<FPGA>;
+template class OperatorWithKernel<GPU_MALI>;

 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -137,6 +137,7 @@ class OpKernelBase {
      std::shared_ptr<::paddle_mobile::framework::Scope> scope)                \
      : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}

+
 class FusionOpMatcher {
 public:
  FusionOpMatcher() {}
@@ -153,6 +154,7 @@ class FusionOpMatcher {

  std::string BeginType() { return node_.Type(); }

+  //  virtual  bool Fusion();
 protected:
  Node node_;
  std::string type_;

--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -31,7 +31,14 @@ template class BatchNormOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(batch_norm);
-REGISTER_OPERATOR(batch_norm, ops::BatchNormOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(batch_norm);
+REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+

 #endif
--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -52,7 +52,13 @@ template class BoxCoderOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(box_coder);
-REGISTER_OPERATOR(box_coder, ops::BoxCoderOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(box_coder);
+REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -61,8 +61,15 @@ template class ConcatOp<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile

+
 namespace ops = paddle_mobile::operators;
-USE_OP(concat);
-REGISTER_OPERATOR(concat, ops::ConcatOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(concat);
+REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -53,7 +53,19 @@ template class ConvOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(conv2d);
-REGISTER_OPERATOR(conv2d, ops::ConvOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(conv2d);
+REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(conv2d);
+REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(conv2d);
+REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
+#endif
+
+

 #endif
--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -54,7 +54,13 @@ template class DepthwiseConvOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(depthwise_conv2d);
-REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(depthwise_conv2d);
+REGISTER_OPERATOR_CPU(depthwise_conv2d, ops::DepthwiseConvOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -29,7 +29,13 @@ template class ElementwiseAddOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(elementwise_add);
-REGISTER_OPERATOR(elementwise_add, ops::ElementwiseAddOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(elementwise_add);
+REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -43,8 +43,14 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
 };

 namespace ops = paddle_mobile::operators;
-USE_OP(feed);
-REGISTER_OPERATOR(feed, ops::FeedOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(feed);
+REGISTER_OPERATOR_CPU(feed, ops::FeedOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/fetch_op.h
+++ b/src/operators/fetch_op.h
@@ -43,8 +43,14 @@ class FetchOp : public framework::OperatorBase<DeviceType> {
 };

 namespace ops = paddle_mobile::operators;
-USE_OP(fetch);
-REGISTER_OPERATOR(fetch, ops::FetchOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fetch);
+REGISTER_OPERATOR_CPU(fetch, ops::FetchOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -25,7 +25,13 @@ template class FushionConvAddOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(conv_add);
-REGISTER_OPERATOR(conv_add, ops::FushionConvAddOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(conv_add);
+REGISTER_OPERATOR_CPU(conv_add, ops::FushionConvAddOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -64,7 +64,13 @@ class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
  //  FushionFcParam param_;
 };

-// static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
+#ifdef PADDLE_MOBILE_CPU
+static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -46,8 +46,14 @@ class ConvAddReluOp {
 private:
 };

-// static framework::FusionOpRegistrar fc_registrar(
+#ifdef PADDLE_MOBILE_CPU
+//static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
 //        new FushionConvAddReluOpMatcher());
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -54,7 +54,13 @@ template class FushionFcOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(fc);
-REGISTER_OPERATOR(fc, ops::FushionFcOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fc);
+REGISTER_OPERATOR_CPU(fc, ops::FushionFcOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -37,8 +37,6 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
  void FolderNodes(
      framework::Node *node,
      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
-    vector<std::shared_ptr<framework::OpDesc>> origin_descs =
-        node->OpDescs(node_.Depth());
    node->Folder(node_.Depth(), Type(),
                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes);
  }
@@ -69,7 +67,14 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
  FushionFcParam param_;
 };

+#ifdef PADDLE_MOBILE_CPU
 static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -14,14 +14,15 @@ limitations under the License. */

 #ifdef CONV_OP

+#include "operators/kernel/conv_kernel.h"
+
 namespace paddle_mobile {
 namespace operators {

-// template<>
-// void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
-// {}
-//
-// template class ConvKernel<FPGA, float>;
+template<>
+void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {}
+template class ConvKernel<FPGA, float>;
+
 }
 }  // namespace paddle_mobile


--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -29,7 +29,13 @@ template class LrnOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(lrn);
-REGISTER_OPERATOR(lrn, ops::LrnOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(lrn);
+REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -55,7 +55,13 @@ template class MulOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(mul);
-REGISTER_OPERATOR(mul, ops::MulOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(mul);
+REGISTER_OPERATOR_CPU(mul, ops::MulOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
@@ -39,7 +39,13 @@ template class MultiClassNMSOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(multiclass_nms);
-REGISTER_OPERATOR(multiclass_nms, ops::MultiClassNMSOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(multiclass_nms);
+REGISTER_OPERATOR_CPU(multiclass_nms, ops::MultiClassNMSOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -59,7 +59,13 @@ template class PoolOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(pool2d);
-REGISTER_OPERATOR(pool2d, ops::PoolOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(pool2d);
+REGISTER_OPERATOR_CPU(pool2d, ops::PoolOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
@@ -49,7 +49,13 @@ template class PriorBoxOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(prior_box);
-REGISTER_OPERATOR(prior_box, ops::PriorBoxOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(prior_box);
+REGISTER_OPERATOR_CPU(prior_box, ops::PriorBoxOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -33,7 +33,13 @@ template class ReluOp<CPU, float>;
 * 都是需要和model中类型对应起来的
 * */
 namespace ops = paddle_mobile::operators;
-USE_OP(relu);
-REGISTER_OPERATOR(relu, ops::ReluOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(relu);
+REGISTER_OPERATOR_CPU(relu, ops::ReluOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -32,7 +32,13 @@ template class ReshapeOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(reshape);
-REGISTER_OPERATOR(reshape, ops::ReshapeOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(reshape);
+REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -27,7 +27,13 @@ template class SigmoidOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(sigmoid);
-REGISTER_OPERATOR(sigmoid, ops::SigmoidOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(sigmoid);
+REGISTER_OPERATOR_CPU(sigmoid, ops::SigmoidOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -27,7 +27,13 @@ template class SoftmaxOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(softmax);
-REGISTER_OPERATOR(softmax, ops::SoftmaxOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(softmax);
+REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif

 #endif
--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
@@ -52,7 +52,14 @@ template class TransposeOp<CPU, float>;
 }  // namespace paddle_mobile

 namespace ops = paddle_mobile::operators;
-USE_OP(transpose);
-REGISTER_OPERATOR(transpose, ops::TransposeOp);
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(transpose);
+REGISTER_OPERATOR_CPU(transpose, ops::TransposeOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+

 #endif