From bd67efb49da2a62788dfc98494351a0f3dacffb7 Mon Sep 17 00:00:00 2001 From: yangfei Date: Mon, 12 Nov 2018 16:09:53 +0800 Subject: [PATCH] add cpu and gpu predict function,optimize feed op kernel --- src/io/api_paddle_mobile.cc | 4 ++++ src/io/api_paddle_mobile.h | 2 ++ src/io/paddle_inference_api.h | 2 +- src/io/paddle_mobile.cpp | 10 ++++++---- src/io/paddle_mobile.h | 3 +-- test/net/test_yologpu.cpp | 25 ++++++++++++++----------- 6 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index 144cf127a4..c5da3993d1 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -52,6 +52,10 @@ bool PaddleMobilePredictor::Init(const PaddleMobileConfig &config) { paddle_mobile_->SetThreadNum(config.thread_num); return true; } +template +double PaddleMobilePredictor::CaculatePredictTime() { + return paddle_mobile_->GetPredictTime(); +}; template bool PaddleMobilePredictor::Run( diff --git a/src/io/api_paddle_mobile.h b/src/io/api_paddle_mobile.h index bdeb7e1865..d8e5f856c6 100644 --- a/src/io/api_paddle_mobile.h +++ b/src/io/api_paddle_mobile.h @@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor { std::vector* output_data, int batch_size = -1) override; + double CaculatePredictTime() override; + ~PaddleMobilePredictor() override; private: diff --git a/src/io/paddle_inference_api.h b/src/io/paddle_inference_api.h index 3c9ffa00c7..33a166f2c5 100644 --- a/src/io/paddle_inference_api.h +++ b/src/io/paddle_inference_api.h @@ -98,7 +98,7 @@ class PaddlePredictor { virtual bool Run(const std::vector& inputs, std::vector* output_data, int batch_size = -1) = 0; - + virtual double CaculatePredictTime() = 0; // Destroy the Predictor. virtual ~PaddlePredictor() = default; diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index cd49532045..fca870860e 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "io/paddle_mobile.h" +#ifdef PADDLE_MOBILE_CL #include -#include "common/common.h" #include "framework/cl/cl_tensor.h" +#endif +#include "common/common.h" #include "operators/math/gemm.h" namespace paddle_mobile { @@ -123,7 +125,7 @@ void PaddleMobile::Clear() { } template -double PaddleMobile::GetCPUPredictTime() { +double PaddleMobile::GetPredictTime() { int m = 32; int n = 224 * 224; int k = 27; @@ -204,8 +206,8 @@ void PaddleMobile::SetCLPath(std::string path) { framework::CLEngine::Instance()->setClPath(path); } } -template -double PaddleMobile::GetGPUPredictTime() { +template <> +double PaddleMobile::GetPredictTime() { cl_int status; cl_uint nPlatform; clGetPlatformIDs(0, NULL, &nPlatform); diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 5f058ebf28..ab148e7361 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -65,7 +65,7 @@ class PaddleMobile { void SetThreadNum(int num); void Clear(); - double GetCPUPredictTime(); + double GetPredictTime(); ~PaddleMobile(); @@ -81,7 +81,6 @@ class PaddleMobile { #ifdef PADDLE_MOBILE_CL public: void SetCLPath(std::string cl_path); - double GetGPUPredictTime(); int readText(const char *kernelPath, char **pcode); // 读取文本文件放入 pcode,返回字符串长度 #endif diff --git a/test/net/test_yologpu.cpp b/test/net/test_yologpu.cpp index d0797de908..e77861caba 100644 --- a/test/net/test_yologpu.cpp +++ b/test/net/test_yologpu.cpp @@ -18,16 +18,17 @@ limitations under the License. */ #include "../test_helper.h" #include "../test_include.h" void t1() { - paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile::PaddleMobile paddle_mobile_gpu; + paddle_mobile::PaddleMobile paddle_mobile_cpu; // paddle_mobile.SetThreadNum(4); #ifdef PADDLE_MOBILE_CL - paddle_mobile.SetCLPath("/data/local/tmp/bin"); + paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin"); #endif - printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime()); - printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime()); + printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime()); + printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime()); auto time1 = paddle_mobile::time(); - auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model", - std::string(g_yolo_mul) + "/params", true); + auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model", + std::string(g_yolo_mul) + "/params", true); // auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true); if (isok) { @@ -45,7 +46,7 @@ void t1() { auto time3 = paddle_mobile::time(); int max = 10; for (int i = 0; i < max; ++i) { - vec_result = paddle_mobile.Predict(input, dims); + vec_result = paddle_mobile_gpu.Predict(input, dims); } auto time4 = paddle_mobile::time(); @@ -173,10 +174,12 @@ void t3() { int main() { // std::thread th1(t1); - // std::thread th2(t2); - std::thread th1(t1); + // std::thread th2(t2); + std::thread th3(t3); + // std::thread th1(t1); + // th1.join(); + // th2.join(); + th3.join(); // th1.join(); - // th2.join(); - th1.join(); return 0; } -- GitLab