提交 70966b6f 编写于 作者: Y yangfei

add cpu and gpu predict function,optimize feed op kernel

上级 d89d0cfd
......@@ -52,6 +52,10 @@ bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_->SetThreadNum(config.thread_num);
return true;
}
template <typename Dtype, Precision P>
double PaddleMobilePredictor<Dtype, P>::CaculatePredictTime() {
return paddle_mobile_->GetPredictTime();
};
template <typename Dtype, Precision P>
bool PaddleMobilePredictor<Dtype, P>::Run(
......
......@@ -40,6 +40,8 @@ class PaddleMobilePredictor : public PaddlePredictor {
std::vector<PaddleTensor>* output_data,
int batch_size = -1) override;
double CaculatePredictTime() override;
~PaddleMobilePredictor() override;
private:
......
......@@ -98,7 +98,7 @@ class PaddlePredictor {
virtual bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data,
int batch_size = -1) = 0;
virtual double CaculatePredictTime() = 0;
// Destroy the Predictor.
virtual ~PaddlePredictor() = default;
......
......@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "io/paddle_mobile.h"
#ifdef PADDLE_MOBILE_CL
#include <CL/cl.h>
#include "common/common.h"
#include "framework/cl/cl_tensor.h"
#endif
#include "common/common.h"
#include "operators/math/gemm.h"
namespace paddle_mobile {
......@@ -123,7 +125,7 @@ void PaddleMobile<Dtype, P>::Clear() {
}
template <typename Dtype, Precision P>
double PaddleMobile<Dtype, P>::GetCPUPredictTime() {
double PaddleMobile<Dtype, P>::GetPredictTime() {
int m = 32;
int n = 224 * 224;
int k = 27;
......@@ -204,8 +206,8 @@ void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
framework::CLEngine::Instance()->setClPath(path);
}
}
template <typename Dtype, Precision P>
double PaddleMobile<Dtype, P>::GetGPUPredictTime() {
template <>
double PaddleMobile<GPU_CL, Precision::FP32>::GetPredictTime() {
cl_int status;
cl_uint nPlatform;
clGetPlatformIDs(0, NULL, &nPlatform);
......
......@@ -65,7 +65,7 @@ class PaddleMobile {
void SetThreadNum(int num);
void Clear();
double GetCPUPredictTime();
double GetPredictTime();
~PaddleMobile();
......@@ -81,7 +81,6 @@ class PaddleMobile {
#ifdef PADDLE_MOBILE_CL
public:
void SetCLPath(std::string cl_path);
double GetGPUPredictTime();
int readText(const char *kernelPath,
char **pcode); // 读取文本文件放入 pcode,返回字符串长度
#endif
......
......@@ -18,15 +18,16 @@ limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
void t1() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile_gpu;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile_cpu;
// paddle_mobile.SetThreadNum(4);
#ifdef PADDLE_MOBILE_CL
paddle_mobile.SetCLPath("/data/local/tmp/bin");
paddle_mobile_gpu.SetCLPath("/data/local/tmp/bin");
#endif
printf("cpu time:%f\n", paddle_mobile.GetCPUPredictTime());
printf("gpu time:%f\n", paddle_mobile.GetGPUPredictTime());
printf("cpu time:%f\n", paddle_mobile_cpu.GetPredictTime());
printf("gpu time:%f\n", paddle_mobile_gpu.GetPredictTime());
auto time1 = paddle_mobile::time();
auto isok = paddle_mobile.Load(std::string(g_yolo_mul) + "/model",
auto isok = paddle_mobile_gpu.Load(std::string(g_yolo_mul) + "/model",
std::string(g_yolo_mul) + "/params", true);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
......@@ -45,7 +46,7 @@ void t1() {
auto time3 = paddle_mobile::time();
int max = 10;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
vec_result = paddle_mobile_gpu.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
......@@ -174,9 +175,11 @@ void t3() {
int main() {
// std::thread th1(t1);
// std::thread th2(t2);
std::thread th1(t1);
std::thread th3(t3);
// std::thread th1(t1);
// th1.join();
// th2.join();
th1.join();
th3.join();
// th1.join();
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册