Fix opencl compile

518aca15 · hjchen2 · d49d6147 · 518aca15 · 518aca15 · 518aca15
4 changed file
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -456,9 +456,8 @@ void Executor<Device, T>::LoadMemory(const VarDesc var_desc, float *tensorInput,
                                     char **data) {}

 template <>
-void Executor<GPU_CL, Precision::FP32>::LoadMemory(const VarDesc var_desc,
-                                                   float *tensorInput,
-                                                   char **data) {
+void Executor<GPU_CL, float>::LoadMemory(const VarDesc var_desc,
+                                         float *tensorInput, char **data) {
  // 1. version
  uint32_t version = *reinterpret_cast<uint32_t *>(*data);


--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -202,50 +202,50 @@ double PaddleMobile<CPU, float>::GetPredictTime() {
 #endif

 #ifdef PADDLE_MOBILE_FPGA
-template <typename Device, T P>
-void PaddleMobile<Device, P>::InjectVariable(const framework::Tensor &t,
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::InjectVariable(const framework::Tensor &t,
                                             std::string var_name) {
  executor_->InjectVariable(t, var_name);
 }

-template <typename Device, T P>
-void PaddleMobile<Device, P>::FeedData(const framework::Tensor &t) {
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) {
  executor_->FeedData(t);
 }

-template <typename Device, T P>
-std::shared_ptr<framework::Tensor> PaddleMobile<Device, P>::FetchResult(
+template <typename Device, typename T>
+std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
    int id) {
  return executor_->FetchResult(id);
 }

-template <typename Device, T P>
-void PaddleMobile<Device, P>::Predict_From_To(int start, int end) {
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::Predict_From_To(int start, int end) {
  executor_->Predict_From_To(start, end);
 }

-template <typename Device, T P>
-void PaddleMobile<Device, P>::Predict_From(int start) {
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::Predict_From(int start) {
  executor_->Predict_From(start);
 }

-template <typename Device, T P>
-void PaddleMobile<Device, P>::Predict_To(int end) {
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::Predict_To(int end) {
  executor_->Predict_To(end);
 }
 #endif

 #ifdef PADDLE_MOBILE_CL
 static std::mutex lc;
-template <typename Device, T P>
-void PaddleMobile<Device, P>::SetCLPath(std::string path) {
+template <typename Device, typename T>
+void PaddleMobile<Device, T>::SetCLPath(std::string path) {
  std::lock_guard<std::mutex> lock(lc);
  if (framework::CLEngine::Instance()->GetCLPath() == "") {
    framework::CLEngine::Instance()->setClPath(path);
  }
 }
 template <>
-double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
+double PaddleMobile<GPU_CL, float>::GetPredictTime() {
  cl_int status;
  cl_uint nPlatform;
  clGetPlatformIDs(0, NULL, &nPlatform);
@@ -443,8 +443,8 @@ double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
    return -1;
  }
 }
-template <typename Device, T P>
-int PaddleMobile<Device, P>::readText(
+template <typename Device, typename T>
+int PaddleMobile<Device, T>::readText(
    const char *kernelPath,
    char **pcode) {  // 读取文本文件放入 pcode，返回字符串长度
  FILE *fp;

--- a/src/operators/kernel/cl/feed_kernel.cpp
+++ b/src/operators/kernel/cl/feed_kernel.cpp
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "operators/kernel/feed_kernel.h"
 #include "framework/cl/cl_tensor.h"
+
 namespace paddle_mobile {
 namespace operators {

@@ -43,8 +44,8 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
  const int Stride2 = out_C * out_H * out_W;
  const int Stride1 = out_H * out_W;
  const int Stride0 = out_W;
-  CLTensor input_cl_tensor(this->cl_helper_.CLContext(),
-                           this->cl_helper_.CLCommandQueue());
+  framework::CLTensor input_cl_tensor(this->cl_helper_.CLContext(),
+                                      this->cl_helper_.CLCommandQueue());
  input_cl_tensor.Resize(input->dims());
  cl_mem inputBuffer = input_cl_tensor.mutable_with_data<float>(input_data);


--- a/src/operators/kernel/cl/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/cl/fusion_fc_kernel.cpp
@@ -94,8 +94,9 @@ void FusionFcCompute(const FusionFcParam<GPU_CL> &param, cl_context context,
    memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
  }

-  math::MatMul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
-                      out, static_cast<float>(1), false);
+  math::MatMul<float, float>(x_matrix, false, y_matrix, false,
+                             static_cast<float>(1), out, static_cast<float>(1),
+                             false);

  out_image->InitEmptyImage(context, commandQueue, out->dims());
  framework::TensorToCLImage(out, out_image, context, commandQueue, kernel1);