提交 0816c284 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #1015 from codeWorm2015/opencl

add cl classes
......@@ -39,7 +39,7 @@ struct PrecisionTrait<Precision::FP16> {
};
//! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kGPU_CL = 3};
template <DeviceTypeEnum T>
struct DeviceType {};
......@@ -47,6 +47,8 @@ struct DeviceType {};
typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kGPU_CL> GPU_CL;
//! data type
enum DataType {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <CL/cl.h>
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
}
};
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h"
#include <CL/cl.h>
......@@ -25,11 +26,11 @@ bool CLEngine::Init() {
cl_int status;
setPlatform();
setClDeviceId();
setClContext();
setClCommandQueue();
std::string filename = "./HelloWorld_Kernel.cl";
loadKernelFromFile(filename.c_str());
buildProgram();
// setClContext();
// setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str());
// buildProgram();
initialized_ = true;
}
......@@ -38,19 +39,19 @@ CLEngine *CLEngine::Instance() {
return &cl_engine_;
}
std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
const std::string &kernel_name) {
std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
return std::move(kernel);
}
bool CLEngine::SetClCommandQueue() {
cl_int status;
command_queue_.reset(
clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
return true;
}
//std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// const std::string &kernel_name) {
// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
// clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
// return std::move(kernel);
//}
//
//bool CLEngine::SetClCommandQueue() {
// cl_int status;
// command_queue_.reset(
// clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
// return true;
//}
bool CLEngine::SetPlatform() {
platform_ = NULL; // the chosen platform
......@@ -70,10 +71,10 @@ bool CLEngine::SetPlatform() {
}
}
bool CLEngine::SetClContext() {
context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
return true;
}
//bool CLEngine::SetClContext() {
// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
// return true;
//}
bool CLEngine::SetClDeviceId() {
cl_uint numDevices = 0;
......@@ -92,40 +93,35 @@ bool CLEngine::SetClDeviceId() {
return false;
}
bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
size_t size;
char *str;
std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
if (!f.is_open()) {
return false;
}
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size + 1];
if (!str) {
f.close();
return 0;
}
//bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// size_t size;
// char *str;
// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
//
// if (!f.is_open()) {
// return false;
// }
//
// size_t fileSize;
// f.seekg(0, std::fstream::end);
// size = fileSize = (size_t)f.tellg();
// f.seekg(0, std::fstream::beg);
// str = new char[size + 1];
// if (!str) {
// f.close();
// return 0;
// }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL));
// return true;
//}
f.read(str, fileSize);
f.close();
str[size] = '\0';
const char *source = str;
size_t sourceSize[] = {strlen(source)};
program_.reset(
clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL));
return true;
}
bool CLEngine::BuildProgram() {
cl_int status;
status = clBuildProgram(program_.get(), 0, 0, "-cl-fast-relaxed-math", 0, 0);
return true;
}
} // namespace framework
} // namespace paddle_mobile
......@@ -21,60 +21,63 @@ limitations under the License. */
#include <memory>
#include <string>
#include "common/enforce.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile {
namespace framework {
struct CLContext {};
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
}
};
class CLEngine {
public:
static CLEngine *Instance();
bool Init();
std::unique_ptr<_cl_kernel, clKernel_deleter> GetKernel(
const std::string &kernel_name);
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, NULL);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
return std::move(context_ptr);
}
const cl_context GetContext() { return context_.get(); }
std::unique_ptr<_cl_command_queue, CLContextDeleter> CreateClCommandQueue() {
cl_int status;
cl_command_queue = clCreateCommandQueue(context_.get(), devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(cl_command_queue);
return std::move(command_queue_ptr);
}
const cl_program GetProgram() { return program_.get(); }
std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(cl_context context, std::string file_name) {
const char *kernel_file = file_name.c_str();
size_t size;
char *str;
std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
PADDLE_MOBILE_ENFORCE(f.is_open(), " file open failed")
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size+1];
PADDLE_MOBILE_ENFORCE(str != NULL, " str null")
f.read(str, fileSize);
f.close();
str[size] = '\0';
const char *source = str;
size_t sourceSize[] = {strlen(source)};
cl_program p = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
return std::move(program_ptr);
}
const cl_command_queue GetCommandQueue() { return command_queue_.get(); }
bool CLEngine::BuildProgram(cl_program program) {
cl_int status;
status = clBuildProgram(program, 0, 0, "-cl-fast-relaxed-math", 0, 0);
CL_CHECK_ERRORS(status);
return true;
}
private:
CLEngine() { initialized_ = false; }
......@@ -83,20 +86,25 @@ class CLEngine {
bool SetClDeviceId();
bool SetClContext();
// bool SetClContext();
bool SetClCommandQueue();
// bool SetClCommandQueue();
bool LoadKernelFromFile(const char *kernel_file);
// bool LoadKernelFromFile(const char *kernel_file);
bool BuildProgram();
// bool BuildProgram();
bool initialized_;
cl_platform_id platform_;
cl_device_id *devices_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unique_ptr<_cl_program, clProgram_deleter> program_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
};
} // namespace framework
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile {
namespace framework {
class CLHelper {
public:
CLHelper(CLScope *scope): scope_(scope) {
}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
auto kernel = scope_->GetKernel(kernel_name, file_name);
kernels.emplace_back(kernel);
}
cl_kernel KernelAt(const int index) {
return kernels[index].get();
}
cl_command_queue CLCommandQueue() {
return scope_->CommandQueue();
}
cl_context CLContext() {
return scope_->Context();
}
private:
CLScope *scope_;
std::vector<std::unique_ptr<_cl_kernel, CLKernelDeleter>> kernels;
};
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/ddim.h"
#include "framework/tensor.h"
namespace paddle_mobile {
namespace framework {
class CLImage {
public:
CLImage(cl_context context, float *tensorInput, DDim ddim) : tensorDims_(ddim), context_(context) {
}
const DDim &TensorDim();
private:
cl_mem cl_image_;
DDim tensorDims_;
cl_context context_;
};
void TensorToCLImage(Tensor *tensor, CLImage *image) {
}
void CLImageToTensor(CLImage *image, Tensor *tensor) {
}
}
}
\ No newline at end of file
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile {
namespace framework {
class CLScope {
public:
CLScope() {
CLEngine *engin = CLEngine::Instance();
context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue();
}
cl_command_queue CommandQueue() {
return command_queue_.get();
}
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name);
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(clCreateKernel(program, kernel_name.c_str(), NULL));
return std::move(kernel);
}
cl_context Context() {
return context_.get();
}
cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name);
if (it != programs_.end()) {
return it->second.get();
}
auto program = CLEngine::Instance()->CreateProgramWith(context_.get(), file_name);
programs_[file_name] = program;
status_ = clBuildProgram(program, 0, 0, 0, 0, 0);
CL_CHECK_ERRORS(status_);
return program;
}
private:
cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unordered_map<std::string, std::unique_ptr<_cl_program, CLProgramDeleter>> programs_;
};
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define CL_CHECK_ERRORS(ERR) \
if(ERR != CL_SUCCESS) \
{ \
printf( \
"OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__ \
); \
}
#endif
\ No newline at end of file
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "common/enforce.h"
#include "common/type_define.h"
#include "common/types.h"
......@@ -31,6 +32,8 @@ limitations under the License. */
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_helper.h"
namespace paddle_mobile {
namespace framework {
......@@ -112,7 +115,8 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {}
param_(inputs, outputs, attrs, *scope),
kernel_(scope->GetCLScpoe()) {}
virtual void RunImpl() const { this->kernel_.Compute(this->param_); }
......@@ -138,6 +142,11 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
template <typename Dtype, typename P>
class OpKernelBase {
public:
OpKernelBase() = default;
OpKernelBase(CLScope *clscope): cl_helper_(clscope) {
}
/*
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
......@@ -158,6 +167,10 @@ class OpKernelBase {
#ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_;
#endif
CLHelper cl_helper_;
};
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
......
......@@ -15,6 +15,11 @@ limitations under the License. */
#pragma once
#include <list>
//#ifdef PADDLE_MOBILE_OCL
#include "framework/cl/cl_scope.h"
//#endif
#include <unordered_map>
#include "variable.h"
......@@ -33,6 +38,11 @@ class Scope {
delete kid;
}
kids_.clear();
//#ifdef PADDLE_MOBILE_OCL
delete cl_scope_;
//#endif
}
Scope &NewScope() const;
......@@ -72,6 +82,10 @@ class Scope {
Variable *FindVarLocally(const std::string &name) const;
CLScope *GetCLScpoe() {
return cl_scope_;
}
private:
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const *parent) : parent_(parent) {}
......@@ -79,6 +93,9 @@ class Scope {
mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr};
//#ifdef PADDLE_MOBILE_OCL
CLScope *cl_scope_ = new CLScope();
//#endif
};
} // namespace framework
} // namespace paddle_mobile
......@@ -290,6 +290,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
delete origin_data;
LOG(kLOG_INFO) << " end init combine memory ";
}
template <typename Dtype, Precision P>
bool Executor<Dtype, P>::varInputMemory(
const std::shared_ptr<framework::VarDesc> &var_desc, Variable *var,
......
__kernel void conv_3x3(__global float* in, __global float* out) {
int num = get_global_id(0);
out[num] = in[num] * 0.1 + 102;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
return true;
}
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) const {
}
template class ConvKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册