提交 e28aa4b0 编写于 作者: H hjchen2

Fix opencl compile

上级 6666fea2
......@@ -456,9 +456,8 @@ void Executor<Device, T>::LoadMemory(const VarDesc var_desc, float *tensorInput,
char **data) {}
template <>
void Executor<GPU_CL, Precision::FP32>::LoadMemory(const VarDesc var_desc,
float *tensorInput,
char **data) {
void Executor<GPU_CL, float>::LoadMemory(const VarDesc var_desc,
float *tensorInput, char **data) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
......
......@@ -202,50 +202,50 @@ double PaddleMobile<CPU, float>::GetPredictTime() {
#endif
#ifdef PADDLE_MOBILE_FPGA
template <typename Device, T P>
void PaddleMobile<Device, P>::InjectVariable(const framework::Tensor &t,
template <typename Device, typename T>
void PaddleMobile<Device, T>::InjectVariable(const framework::Tensor &t,
std::string var_name) {
executor_->InjectVariable(t, var_name);
}
template <typename Device, T P>
void PaddleMobile<Device, P>::FeedData(const framework::Tensor &t) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const framework::Tensor &t) {
executor_->FeedData(t);
}
template <typename Device, T P>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, P>::FetchResult(
template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
int id) {
return executor_->FetchResult(id);
}
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_From_To(int start, int end) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::Predict_From_To(int start, int end) {
executor_->Predict_From_To(start, end);
}
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_From(int start) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::Predict_From(int start) {
executor_->Predict_From(start);
}
template <typename Device, T P>
void PaddleMobile<Device, P>::Predict_To(int end) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::Predict_To(int end) {
executor_->Predict_To(end);
}
#endif
#ifdef PADDLE_MOBILE_CL
static std::mutex lc;
template <typename Device, T P>
void PaddleMobile<Device, P>::SetCLPath(std::string path) {
template <typename Device, typename T>
void PaddleMobile<Device, T>::SetCLPath(std::string path) {
std::lock_guard<std::mutex> lock(lc);
if (framework::CLEngine::Instance()->GetCLPath() == "") {
framework::CLEngine::Instance()->setClPath(path);
}
}
template <>
double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
double PaddleMobile<GPU_CL, float>::GetPredictTime() {
cl_int status;
cl_uint nPlatform;
clGetPlatformIDs(0, NULL, &nPlatform);
......@@ -443,8 +443,8 @@ double PaddleMobile<GPU_CL, T::FP32>::GetPredictTime() {
return -1;
}
}
template <typename Device, T P>
int PaddleMobile<Device, P>::readText(
template <typename Device, typename T>
int PaddleMobile<Device, T>::readText(
const char *kernelPath,
char **pcode) { // 读取文本文件放入 pcode,返回字符串长度
FILE *fp;
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "operators/kernel/feed_kernel.h"
#include "framework/cl/cl_tensor.h"
namespace paddle_mobile {
namespace operators {
......@@ -43,8 +44,8 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
const int Stride2 = out_C * out_H * out_W;
const int Stride1 = out_H * out_W;
const int Stride0 = out_W;
CLTensor input_cl_tensor(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
framework::CLTensor input_cl_tensor(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
input_cl_tensor.Resize(input->dims());
cl_mem inputBuffer = input_cl_tensor.mutable_with_data<float>(input_data);
......
......@@ -94,8 +94,9 @@ void FusionFcCompute(const FusionFcParam<GPU_CL> &param, cl_context context,
memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
}
math::MatMul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(1), false);
math::MatMul<float, float>(x_matrix, false, y_matrix, false,
static_cast<float>(1), out, static_cast<float>(1),
false);
out_image->InitEmptyImage(context, commandQueue, out->dims());
framework::TensorToCLImage(out, out_image, context, commandQueue, kernel1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册