add cpu and gpu predict function,optimize feed op kernel

bd67efb4 · yangfei · 7109adbe · bd67efb4 · bd67efb4 · bd67efb4
6 changed file
--- a/src/io/api_paddle_mobile.cc
+++ b/src/io/api_paddle_mobile.cc
@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
  paddle_mobile_->SetThreadNum(config.thread_num);
  return true;
 }
+template <typename Dtype, Precision P>
+double PaddleMobilePredictor<Dtype, P>::CaculatePredictTime() {
+  return paddle_mobile_->GetPredictTime();
+};
 template <typename Dtype, Precision P>
 bool PaddleMobilePredictor<Dtype, P>::Run(

--- a/src/io/api_paddle_mobile.h
+++ b/src/io/api_paddle_mobile.h
@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor {
           std::vector<PaddleTensor>* output_data,
           int batch_size = -1) override;
+  double CaculatePredictTime() override;
  ~PaddleMobilePredictor() override;
 private:

--- a/src/io/paddle_inference_api.h
+++ b/src/io/paddle_inference_api.h
@@ -98,7 +98,7 @@ class PaddlePredictor {
  virtual bool Run(const std::vector<PaddleTensor>& inputs,
                   std::vector<PaddleTensor>* output_data,
                   int batch_size = -1) = 0;
+  virtual double CaculatePredictTime() = 0;
  // Destroy the Predictor.
  virtual ~PaddlePredictor() = default;

--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "io/paddle_mobile.h"
+#ifdef PADDLE_MOBILE_CL
 #include <CL/cl.h>
-#include "common/common.h"
 #include "framework/cl/cl_tensor.h"
+#endif
+#include "common/common.h"
 #include "operators/math/gemm.h"
 namespace paddle_mobile {
@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() {
 }
 template <typename Dtype, Precision P>
-double PaddleMobile<Dtype, P>::GetCPUPredictTime() {
+double PaddleMobile<Dtype, P>::GetPredictTime() {
  int m = 32;
  int n = 224 * 224;
  int k = 27;
@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
    framework::CLEngine::Instance()->setClPath(path);
  }
 }
-template <typename Dtype, Precision P>
+template <>
-double PaddleMobile<Dtype, P>::GetGPUPredictTime() {
+double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
  cl_int status;
  cl_uint nPlatform;
  clGetPlatformIDs(0, NULL, &nPlatform);

--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -65,7 +65,7 @@ class PaddleMobile {
  void SetThreadNum(int num);
  void Clear();
-  double GetCPUPredictTime();
+  double GetPredictTime();
  ~PaddleMobile();
@@ -81,7 +81,6 @@ class PaddleMobile {
 #ifdef PADDLE_MOBILE_CL
 public:
  void SetCLPath(std::string cl_path);
-  double GetGPUPredictTime();
  int readText(const char *kernelPath,
               char **pcode);  // 读取文本文件放入 pcode，返回字符串长度
 #endif

--- a/test/net/test_yologpu.cpp
+++ b/test/net/test_yologpu.cpp
@@ -18,15 +18,16 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "../test_include.h"
 void t1() {
-  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
+  paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile_gpu;
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile_cpu;
  //    paddle_mobile.SetThreadNum(4);
 #ifdef PADDLE_MOBILE_CL
-  paddle_mobile.SetCLPath("/data/local/tmp/bin");
+  paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
 #endif
-  printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime());
+  printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime());
-  printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime());
+  printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime());
  auto time1 = paddle_mobile::time();
-  auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
+  auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
                                     std::string(g_yolo_mul) + "/params", true);
  //  auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
@@ -45,7 +46,7 @@ void t1() {
    auto time3 = paddle_mobile::time();
    int max = 10;
    for (int i = 0; i < max; ++i) {
-      vec_result = paddle_mobile.Predict(input, dims);
+      vec_result = paddle_mobile_gpu.Predict(input, dims);
    }
    auto time4 = paddle_mobile::time();
@@ -174,9 +175,11 @@ void t3() {
 int main() {
  //  std::thread th1(t1);
  //      std::thread th2(t2);
-  std::thread th1(t1);
+  std::thread th3(t3);
+  //  std::thread th1(t1);
  //  th1.join();
  //      th2.join();
-  th1.join();
+  th3.join();
+  //  th1.join();
  return 0;
 }