提交 bd67efb4 编写于 作者: Y yangfei

add cpu and gpu predict function,optimize feed op kernel

上级 7109adbe
...@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) { ...@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_->SetThreadNum(config.thread_num); paddle_mobile_->SetThreadNum(config.thread_num);
return true; return true;
} }
template <typename Dtype, Precision P>
double PaddleMobilePredictor<Dtype, P>::CaculatePredictTime() {
return paddle_mobile_->GetPredictTime();
};
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool PaddleMobilePredictor<Dtype, P>::Run( bool PaddleMobilePredictor<Dtype, P>::Run(
......
...@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor { ...@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor {
std::vector<PaddleTensor>* output_data, std::vector<PaddleTensor>* output_data,
int batch_size = -1) override; int batch_size = -1) override;
double CaculatePredictTime() override;
~PaddleMobilePredictor() override; ~PaddleMobilePredictor() override;
private: private:
......
...@@ -98,7 +98,7 @@ class PaddlePredictor { ...@@ -98,7 +98,7 @@ class PaddlePredictor {
virtual bool Run(const std::vector<PaddleTensor>& inputs, virtual bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data, std::vector<PaddleTensor>* output_data,
int batch_size = -1) = 0; int batch_size = -1) = 0;
virtual double CaculatePredictTime() = 0;
// Destroy the Predictor. // Destroy the Predictor.
virtual ~PaddlePredictor() = default; virtual ~PaddlePredictor() = default;
......
...@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io/paddle_mobile.h" #include "io/paddle_mobile.h"
#ifdef PADDLE_MOBILE_CL
#include <CL/cl.h> #include <CL/cl.h>
#include "common/common.h"
#include "framework/cl/cl_tensor.h" #include "framework/cl/cl_tensor.h"
#endif
#include "common/common.h"
#include "operators/math/gemm.h" #include "operators/math/gemm.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() { ...@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
double PaddleMobile<Dtype, P>::GetCPUPredictTime() { double PaddleMobile<Dtype, P>::GetPredictTime() {
int m = 32; int m = 32;
int n = 224 * 224; int n = 224 * 224;
int k = 27; int k = 27;
...@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) { ...@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
framework::CLEngine::Instance()->setClPath(path); framework::CLEngine::Instance()->setClPath(path);
} }
} }
template <typename Dtype, Precision P> template <>
double PaddleMobile<Dtype, P>::GetGPUPredictTime() { double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
cl_int status; cl_int status;
cl_uint nPlatform; cl_uint nPlatform;
clGetPlatformIDs(0, NULL, &nPlatform); clGetPlatformIDs(0, NULL, &nPlatform);
......
...@@ -65,7 +65,7 @@ class PaddleMobile { ...@@ -65,7 +65,7 @@ class PaddleMobile {
void SetThreadNum(int num); void SetThreadNum(int num);
void Clear(); void Clear();
double GetCPUPredictTime(); double GetPredictTime();
~PaddleMobile(); ~PaddleMobile();
...@@ -81,7 +81,6 @@ class PaddleMobile { ...@@ -81,7 +81,6 @@ class PaddleMobile {
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
public: public:
void SetCLPath(std::string cl_path); void SetCLPath(std::string cl_path);
double GetGPUPredictTime();
int readText(const char *kernelPath, int readText(const char *kernelPath,
char **pcode); // 读取文本文件放入 pcode,返回字符串长度 char **pcode); // 读取文本文件放入 pcode,返回字符串长度
#endif #endif
......
...@@ -18,15 +18,16 @@ limitations under the License. */ ...@@ -18,15 +18,16 @@ limitations under the License. */
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
void t1() { void t1() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile_gpu;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile_cpu;
// paddle_mobile.SetThreadNum(4); // paddle_mobile.SetThreadNum(4);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
paddle_mobile.SetCLPath("/data/local/tmp/bin"); paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
#endif #endif
printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime()); printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime());
printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime()); printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime());
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model", auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
std::string(g_yolo_mul) + "/params", true); std::string(g_yolo_mul) + "/params", true);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true); // auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
...@@ -45,7 +46,7 @@ void t1() { ...@@ -45,7 +46,7 @@ void t1() {
auto time3 = paddle_mobile::time(); auto time3 = paddle_mobile::time();
int max = 10; int max = 10;
for (int i = 0; i < max; ++i) { for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims); vec_result = paddle_mobile_gpu.Predict(input, dims);
} }
auto time4 = paddle_mobile::time(); auto time4 = paddle_mobile::time();
...@@ -174,9 +175,11 @@ void t3() { ...@@ -174,9 +175,11 @@ void t3() {
int main() { int main() {
// std::thread th1(t1); // std::thread th1(t1);
// std::thread th2(t2); // std::thread th2(t2);
std::thread th1(t1); std::thread th3(t3);
// std::thread th1(t1);
// th1.join(); // th1.join();
// th2.join(); // th2.join();
th1.join(); th3.join();
// th1.join();
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册