From 70966b6ff23bd4273c5bef62f0394291dccafbc1 Mon Sep 17 00:00:00 2001
From: yangfei <yangfei11@baidu.com>
Date: Mon, 12 Nov 2018 16:09:53 +0800
Subject: [PATCH] add cpu and gpu predict function,optimize feed op kernel

---
 src/io/api_paddle_mobile.cc   |  4 ++++
 src/io/api_paddle_mobile.h    |  2 ++
 src/io/paddle_inference_api.h |  2 +-
 src/io/paddle_mobile.cpp      | 10 ++++++----
 src/io/paddle_mobile.h        |  3 +--
 test/net/test_yologpu.cpp     | 25 ++++++++++++++-----------
 6 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc
index 144cf127a4..c5da3993d1 100644
--- a/src/io/api_paddle_mobile.cc
+++ b/src/io/api_paddle_mobile.cc
@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
   paddle_mobile_->SetThreadNum(config.thread_num);
   return true;
 }
+template <typename Dtype, Precision P>
+double PaddleMobilePredictor<Dtype, P>::CaculatePredictTime() {
+  return paddle_mobile_->GetPredictTime();
+};
 
 template <typename Dtype, Precision P>
 bool PaddleMobilePredictor<Dtype, P>::Run(
diff --git a/src/io/api_paddle_mobile.h b/src/io/api_paddle_mobile.h
index bdeb7e1865..d8e5f856c6 100644
--- a/src/io/api_paddle_mobile.h
+++ b/src/io/api_paddle_mobile.h
@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor {
            std::vector<PaddleTensor>* output_data,
            int batch_size = -1) override;
 
+  double CaculatePredictTime() override;
+
   ~PaddleMobilePredictor() override;
 
  private:
diff --git a/src/io/paddle_inference_api.h b/src/io/paddle_inference_api.h
index 3c9ffa00c7..33a166f2c5 100644
--- a/src/io/paddle_inference_api.h
+++ b/src/io/paddle_inference_api.h
@@ -98,7 +98,7 @@ class PaddlePredictor {
   virtual bool Run(const std::vector<PaddleTensor>& inputs,
                    std::vector<PaddleTensor>* output_data,
                    int batch_size = -1) = 0;
-
+  virtual double CaculatePredictTime() = 0;
   // Destroy the Predictor.
   virtual ~PaddlePredictor() = default;
 
diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp
index cd49532045..fca870860e 100644
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "io/paddle_mobile.h"
+#ifdef PADDLE_MOBILE_CL
 #include <CL/cl.h>
-#include "common/common.h"
 #include "framework/cl/cl_tensor.h"
+#endif
+#include "common/common.h"
 #include "operators/math/gemm.h"
 namespace paddle_mobile {
 
@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() {
 }
 
 template <typename Dtype, Precision P>
-double PaddleMobile<Dtype, P>::GetCPUPredictTime() {
+double PaddleMobile<Dtype, P>::GetPredictTime() {
   int m = 32;
   int n = 224 * 224;
   int k = 27;
@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
     framework::CLEngine::Instance()->setClPath(path);
   }
 }
-template <typename Dtype, Precision P>
-double PaddleMobile<Dtype, P>::GetGPUPredictTime() {
+template <>
+double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
   cl_int status;
   cl_uint nPlatform;
   clGetPlatformIDs(0, NULL, &nPlatform);
diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h
index 5f058ebf28..ab148e7361 100644
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -65,7 +65,7 @@ class PaddleMobile {
 
   void SetThreadNum(int num);
   void Clear();
-  double GetCPUPredictTime();
+  double GetPredictTime();
 
   ~PaddleMobile();
 
@@ -81,7 +81,6 @@ class PaddleMobile {
 #ifdef PADDLE_MOBILE_CL
  public:
   void SetCLPath(std::string cl_path);
-  double GetGPUPredictTime();
   int readText(const char *kernelPath,
                char **pcode);  // 读取文本文件放入 pcode，返回字符串长度
 #endif
diff --git a/test/net/test_yologpu.cpp b/test/net/test_yologpu.cpp
index d0797de908..e77861caba 100644
--- a/test/net/test_yologpu.cpp
+++ b/test/net/test_yologpu.cpp
@@ -18,16 +18,17 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "../test_include.h"
 void t1() {
-  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
+  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile_gpu;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile_cpu;
   //    paddle_mobile.SetThreadNum(4);
 #ifdef PADDLE_MOBILE_CL
-  paddle_mobile.SetCLPath("/data/local/tmp/bin");
+  paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
 #endif
-  printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime());
-  printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime());
+  printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime());
+  printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime());
   auto time1 = paddle_mobile::time();
-  auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
-                                 std::string(g_yolo_mul) + "/params", true);
+  auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
+                                     std::string(g_yolo_mul) + "/params", true);
 
   //  auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
   if (isok) {
@@ -45,7 +46,7 @@ void t1() {
     auto time3 = paddle_mobile::time();
     int max = 10;
     for (int i = 0; i < max; ++i) {
-      vec_result = paddle_mobile.Predict(input, dims);
+      vec_result = paddle_mobile_gpu.Predict(input, dims);
     }
     auto time4 = paddle_mobile::time();
 
@@ -173,10 +174,12 @@ void t3() {
 
 int main() {
   //  std::thread th1(t1);
-  //    std::thread th2(t2);
-  std::thread th1(t1);
+  //      std::thread th2(t2);
+  std::thread th3(t3);
+  //  std::thread th1(t1);
+  //  th1.join();
+  //      th2.join();
+  th3.join();
   //  th1.join();
-  //    th2.join();
-  th1.join();
   return 0;
 }
-- 
GitLab