add cpu and gpu predict function,optimize feed op kernel

bd67efb4 · yangfei · 7109adbe · bd67efb4 · bd67efb4 · bd67efb4
6 changed file
--- a/src/io/api_paddle_mobile.cc
+++ b/src/io/api_paddle_mobile.cc
@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
  paddle_mobile_->SetThreadNum(config.thread_num);
  return true;
 }
+template <typename Dtype, Precision P>
+double PaddleMobilePredictor<Dtype, P>::CaculatePredictTime() {
+  return paddle_mobile_->GetPredictTime();
+};

 template <typename Dtype, Precision P>
 bool PaddleMobilePredictor<Dtype, P>::Run(

--- a/src/io/api_paddle_mobile.h
+++ b/src/io/api_paddle_mobile.h
@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor {
           std::vector<PaddleTensor>* output_data,
           int batch_size = -1) override;

+  double CaculatePredictTime() override;
+
  ~PaddleMobilePredictor() override;

 private:

--- a/src/io/paddle_inference_api.h
+++ b/src/io/paddle_inference_api.h
@@ -98,7 +98,7 @@ class PaddlePredictor {
  virtual bool Run(const std::vector<PaddleTensor>& inputs,
                   std::vector<PaddleTensor>* output_data,
                   int batch_size = -1) = 0;
-
+  virtual double CaculatePredictTime() = 0;
  // Destroy the Predictor.
  virtual ~PaddlePredictor() = default;


--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "io/paddle_mobile.h"
+#ifdef PADDLE_MOBILE_CL
 #include <CL/cl.h>
-#include "common/common.h"
 #include "framework/cl/cl_tensor.h"
+#endif
+#include "common/common.h"
 #include "operators/math/gemm.h"
 namespace paddle_mobile {

@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() {
 }

 template <typename Dtype, Precision P>
-double PaddleMobile<Dtype, P>::GetCPUPredictTime() {
+double PaddleMobile<Dtype, P>::GetPredictTime() {
  int m = 32;
  int n = 224 * 224;
  int k = 27;
@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
    framework::CLEngine::Instance()->setClPath(path);
  }
 }
-template <typename Dtype, Precision P>
-double PaddleMobile<Dtype, P>::GetGPUPredictTime() {
+template <>
+double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
  cl_int status;
  cl_uint nPlatform;
  clGetPlatformIDs(0, NULL, &nPlatform);

--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -65,7 +65,7 @@ class PaddleMobile {

  void SetThreadNum(int num);
  void Clear();
-  double GetCPUPredictTime();
+  double GetPredictTime();

  ~PaddleMobile();

@@ -81,7 +81,6 @@ class PaddleMobile {
 #ifdef PADDLE_MOBILE_CL
 public:
  void SetCLPath(std::string cl_path);
-  double GetGPUPredictTime();
  int readText(const char *kernelPath,
               char **pcode);  // 读取文本文件放入 pcode，返回字符串长度
 #endif

--- a/test/net/test_yologpu.cpp
+++ b/test/net/test_yologpu.cpp
@@ -18,16 +18,17 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "../test_include.h"
 void t1() {
-  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
+  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile_gpu;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile_cpu;
  //    paddle_mobile.SetThreadNum(4);
 #ifdef PADDLE_MOBILE_CL
-  paddle_mobile.SetCLPath("/data/local/tmp/bin");
+  paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
 #endif
-  printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime());
-  printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime());
+  printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime());
+  printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime());
  auto time1 = paddle_mobile::time();
-  auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
-                                 std::string(g_yolo_mul) + "/params", true);
+  auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
+                                     std::string(g_yolo_mul) + "/params", true);

  //  auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
  if (isok) {
@@ -45,7 +46,7 @@ void t1() {
    auto time3 = paddle_mobile::time();
    int max = 10;
    for (int i = 0; i < max; ++i) {
-      vec_result = paddle_mobile.Predict(input, dims);
+      vec_result = paddle_mobile_gpu.Predict(input, dims);
    }
    auto time4 = paddle_mobile::time();

@@ -173,10 +174,12 @@ void t3() {

 int main() {
  //  std::thread th1(t1);
-  //    std::thread th2(t2);
-  std::thread th1(t1);
+  //      std::thread th2(t2);
+  std::thread th3(t3);
+  //  std::thread th1(t1);
+  //  th1.join();
+  //      th2.join();
+  th3.join();
  //  th1.join();
-  //    th2.join();
-  th1.join();
  return 0;
 }