add some function for openCL

afa26250 · yangfei · 3264ebc0 · afa26250 · afa26250 · afa26250
19 changed file
--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -85,7 +85,7 @@ class CLImage {
  }

  const DDim &dims() const {
-    return DDim();
+    return tensorDims_;
  }

  std::vector<size_t> DefaultWorkSize() {

--- a/src/framework/cl/cl_scope.h
+++ b/src/framework/cl/cl_scope.h
@@ -30,6 +30,7 @@ class CLScope {
 public:
  CLScope() {
    CLEngine *engin = CLEngine::Instance();
+    engin->Init();
    context_ = engin->CreateContext();
    command_queue_ = engin->CreateClCommandQueue();
  }

--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -164,4 +164,6 @@ template class PaddleMobile<FPGA, Precision::FP32>;

 template class PaddleMobile<GPU_MALI, Precision::FP32>;

+template class PaddleMobile<GPU_CL, Precision::FP32>;
+
 }  // namespace paddle_mobile
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -38,4 +38,8 @@ REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif

+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp);
+#endif
+
 #endif
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -53,4 +53,8 @@ USE_OP_MALI_GPU(elementwise_add);
 #ifdef PADDLE_MOBILE_FPGA
 #endif

+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(elementwise_add);
+#endif
+
 #endif
--- a/src/operators/feed_op.cpp
+++ b/src/operators/feed_op.cpp
@@ -27,3 +27,6 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
 #endif
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(feed, ops::FeedOp);
+#endif
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -43,7 +43,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {

 #ifdef PADDLE_MOBILE_FPGA

-  void Init() {
+    void Init() {
    Tensor *output = param_.Out();
    fpga::format_fp16_ofm(output);
  }
@@ -73,11 +73,19 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
  }

 #else
-  void Init() {}
+#ifdef PADDLE_MOBILE_CL
+    void Init() {}
+    void RunImpl() {
+
+
+    }
+#else
+    void Init() {}
  void RunImpl() {
    param_.Out()->ShareDataWith(*param_.InputX());
    param_.Out()->set_lod(param_.InputX()->lod());
  }
+#endif
 #endif

 protected:
@@ -96,3 +104,6 @@ USE_OP_MALI_GPU(feed);
 #ifdef PADDLE_MOBILE_FPGA
 USE_OP_FPGA(feed);
 #endif
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(feed);
+#endif
--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -58,4 +58,8 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
 #ifdef PADDLE_MOBILE_FPGA
 #endif

+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp);
+#endif
+
 #endif
--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -96,5 +96,7 @@ USE_OP_MALI_GPU(fusion_conv_add);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
 #endif
-
+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(fusion_conv_add);
+#endif
 #endif
--- a/src/operators/fusion_conv_add_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_add_bn_relu_op.cpp
@@ -57,5 +57,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #ifdef PADDLE_MOBILE_FPGA
 REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #endif
-
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
+#endif
 #endif
--- a/src/operators/fusion_conv_add_bn_relu_op.h
+++ b/src/operators/fusion_conv_add_bn_relu_op.h
@@ -99,6 +99,14 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
 #define FUSION_CONV_ADD_BN_RELU_REGISTER
 #endif

+#endif
+#ifdef PADDLE_MOBILE_CL
+
+#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
+        static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
+    new FusionConvAddBNReluMatcher());
+#define FUSION_CONV_ADD_BN_RELU_REGISTER
+#endif
 #endif

 }  // namespace operators
@@ -113,4 +121,8 @@ USE_OP_CPU(fusion_conv_add_bn_relu);
 USE_OP_FPGA(fusion_conv_add_bn_relu);
 #endif

+#ifdef PADDLE_MOBILE_CL
+USE_OP_CL(fusion_conv_add_bn_relu);
+#endif
+
 #endif
--- a/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl
+__kernel void elementwise_add(__global float* in, __global float* out) {
+     int num = get_global_id(0);
+     out[num] = in[num] * 0.1  + 102;
+ }
--- a/src/operators/kernel/cl/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_bn_kernel.cpp
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef FUSION_CONVADDBNRELU_OP
+#ifdef FUSION_CONVADDBN_OP

-#include "operators/kernel/conv_add_bn_relu_kernel.h"
-#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h"
+#include "operators/kernel/conv_add_bn_kernel.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
@@ -15,7 +15,6 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBNRELU_OP

 #include "operators/kernel/conv_add_bn_relu_kernel.h"
-#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/cl/conv_add_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_kernel.cpp
@@ -15,7 +15,6 @@ limitations under the License. */
 #ifdef FUSION_CONVADD_OP

 #include "operators/kernel/conv_add_kernel.h"
-#include "../central-arm-func/conv_add_arm_func.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/cl/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/cl/elementwise_add_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ELEMENTWISEADD_OP
+
+#include "operators/kernel/elementwise_add_kernel.h"
+
+namespace paddle_mobile {
+    namespace operators {
+
+        template <>
+        bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) {
+            this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
+            return true;
+        }
+
+        template <>
+        void ElementwiseAddKernel<GPU_CL, float>::Compute(const ElementwiseAddParam<GPU_CL> &param) {
+
+        }
+
+        template class ElementwiseAddKernel<GPU_CL, float>;
+
+    }  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -901,7 +901,7 @@ class FeedParam : public OpParam {
 public:
  FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
            const AttributeMap &attrs, Scope *scope) {
-    input_x_ = InputXFrom<GType>(inputs, *scope);
+    input_x_ = InputXFrom<LoDTensor>(inputs, *scope);
    out_ = OutFrom<GType>(outputs, *scope);
    auto var = scope->Var("batch_size");
    batch_size = var->GetValue<int>();
@@ -911,7 +911,7 @@ class FeedParam : public OpParam {
  const int BatchSize() const { return batch_size; }

 private:
-  GType *input_x_;
+  LoDTensor *input_x_;
  GType *out_;
  int batch_size;
 };

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -253,6 +253,10 @@ else ()
    ADD_EXECUTABLE(test-fssd  net/test_mobilenet_025_fssd.cpp test_helper.h  test_include.h)
    target_link_libraries(test-fssd paddle-mobile)

+    # gen test
+    ADD_EXECUTABLE(test-mobilenetgpu  net/test_mobilenet_GPU.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-mobilenetgpu paddle-mobile)
+

    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)


--- a/test/net/test_mobilenet_GPU.cpp
+++ b/test/net/test_mobilenet_GPU.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+    paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
+//    paddle_mobile.SetThreadNum(4);
+    auto time1 = time();
+    //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+    //                     std::string(g_mobilenet_detect) + "/params", true);
+
+    auto isok = paddle_mobile.Load(g_mobilenet, false);
+    if (isok) {
+        auto time2 = time();
+        std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+        std::vector<float> input;
+        std::vector<int64_t> dims{1, 3, 224, 224};
+        GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+        auto vec_result = paddle_mobile.Predict(input, dims);
+        std::vector<float>::iterator biggest =
+                std::max_element(std::begin(vec_result), std::end(vec_result));
+        std::cout << " Max element is " << *biggest << " at position "
+                  << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+        // 预热十次
+        for (int i = 0; i < 10; ++i) {
+            auto vec_result = paddle_mobile.Predict(input, dims);
+        }
+        auto time3 = time();
+        for (int i = 0; i < 10; ++i) {
+            auto vec_result = paddle_mobile.Predict(input, dims);
+        }
+        DLOG << vec_result;
+        auto time4 = time();
+        std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+                  << std::endl;
+    }
+
+    std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+            "是否存在?"
+              << std::endl;
+    return 0;
+}