提交 0816c284 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #1015 from codeWorm2015/opencl

add cl classes
...@@ -39,7 +39,7 @@ struct PrecisionTrait<Precision::FP16> { ...@@ -39,7 +39,7 @@ struct PrecisionTrait<Precision::FP16> {
}; };
//! device type //! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 }; enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kGPU_CL = 3};
template <DeviceTypeEnum T> template <DeviceTypeEnum T>
struct DeviceType {}; struct DeviceType {};
...@@ -47,6 +47,8 @@ struct DeviceType {}; ...@@ -47,6 +47,8 @@ struct DeviceType {};
typedef DeviceType<kCPU> CPU; typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA; typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI; typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kGPU_CL> GPU_CL;
//! data type //! data type
enum DataType { enum DataType {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <CL/cl.h>
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
}
};
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h" #include "framework/cl/cl_engine.h"
#include <CL/cl.h> #include <CL/cl.h>
...@@ -25,11 +26,11 @@ bool CLEngine::Init() { ...@@ -25,11 +26,11 @@ bool CLEngine::Init() {
cl_int status; cl_int status;
setPlatform(); setPlatform();
setClDeviceId(); setClDeviceId();
setClContext(); // setClContext();
setClCommandQueue(); // setClCommandQueue();
std::string filename = "./HelloWorld_Kernel.cl"; // std::string filename = "./HelloWorld_Kernel.cl";
loadKernelFromFile(filename.c_str()); // loadKernelFromFile(filename.c_str());
buildProgram(); // buildProgram();
initialized_ = true; initialized_ = true;
} }
...@@ -38,19 +39,19 @@ CLEngine *CLEngine::Instance() { ...@@ -38,19 +39,19 @@ CLEngine *CLEngine::Instance() {
return &cl_engine_; return &cl_engine_;
} }
std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel( //std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
const std::string &kernel_name) { // const std::string &kernel_name) {
std::unique_ptr<_cl_kernel, clKernel_deleter> kernel( // std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
clCreateKernel(program_.get(), kernel_name.c_str(), NULL)); // clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
return std::move(kernel); // return std::move(kernel);
} //}
//
bool CLEngine::SetClCommandQueue() { //bool CLEngine::SetClCommandQueue() {
cl_int status; // cl_int status;
command_queue_.reset( // command_queue_.reset(
clCreateCommandQueue(context_.get(), devices_[0], 0, &status)); // clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
return true; // return true;
} //}
bool CLEngine::SetPlatform() { bool CLEngine::SetPlatform() {
platform_ = NULL; // the chosen platform platform_ = NULL; // the chosen platform
...@@ -70,10 +71,10 @@ bool CLEngine::SetPlatform() { ...@@ -70,10 +71,10 @@ bool CLEngine::SetPlatform() {
} }
} }
bool CLEngine::SetClContext() { //bool CLEngine::SetClContext() {
context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL)); // context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
return true; // return true;
} //}
bool CLEngine::SetClDeviceId() { bool CLEngine::SetClDeviceId() {
cl_uint numDevices = 0; cl_uint numDevices = 0;
...@@ -92,40 +93,35 @@ bool CLEngine::SetClDeviceId() { ...@@ -92,40 +93,35 @@ bool CLEngine::SetClDeviceId() {
return false; return false;
} }
bool CLEngine::LoadKernelFromFile(const char *kernel_file) { //bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
size_t size; // size_t size;
char *str; // char *str;
std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary)); // std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
//
if (!f.is_open()) { // if (!f.is_open()) {
return false; // return false;
} // }
//
size_t fileSize; // size_t fileSize;
f.seekg(0, std::fstream::end); // f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg(); // size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg); // f.seekg(0, std::fstream::beg);
str = new char[size + 1]; // str = new char[size + 1];
if (!str) { // if (!str) {
f.close(); // f.close();
return 0; // return 0;
} // }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL));
// return true;
//}
f.read(str, fileSize);
f.close();
str[size] = '\0';
const char *source = str;
size_t sourceSize[] = {strlen(source)};
program_.reset(
clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL));
return true;
}
bool CLEngine::BuildProgram() {
cl_int status;
status = clBuildProgram(program_.get(), 0, 0, "-cl-fast-relaxed-math", 0, 0);
return true;
}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -21,60 +21,63 @@ limitations under the License. */ ...@@ -21,60 +21,63 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include "common/enforce.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
struct CLContext {};
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
}
};
class CLEngine { class CLEngine {
public: public:
static CLEngine *Instance(); static CLEngine *Instance();
bool Init(); bool Init();
std::unique_ptr<_cl_kernel, clKernel_deleter> GetKernel( std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
const std::string &kernel_name); cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, NULL);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
return std::move(context_ptr);
}
const cl_context GetContext() { return context_.get(); } std::unique_ptr<_cl_command_queue, CLContextDeleter> CreateClCommandQueue() {
cl_int status;
cl_command_queue = clCreateCommandQueue(context_.get(), devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(cl_command_queue);
return std::move(command_queue_ptr);
}
const cl_program GetProgram() { return program_.get(); } std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(cl_context context, std::string file_name) {
const char *kernel_file = file_name.c_str();
size_t size;
char *str;
std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
PADDLE_MOBILE_ENFORCE(f.is_open(), " file open failed")
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
str = new char[size+1];
PADDLE_MOBILE_ENFORCE(str != NULL, " str null")
f.read(str, fileSize);
f.close();
str[size] = '\0';
const char *source = str;
size_t sourceSize[] = {strlen(source)};
cl_program p = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
return std::move(program_ptr);
}
const cl_command_queue GetCommandQueue() { return command_queue_.get(); } bool CLEngine::BuildProgram(cl_program program) {
cl_int status;
status = clBuildProgram(program, 0, 0, "-cl-fast-relaxed-math", 0, 0);
CL_CHECK_ERRORS(status);
return true;
}
private: private:
CLEngine() { initialized_ = false; } CLEngine() { initialized_ = false; }
...@@ -83,20 +86,25 @@ class CLEngine { ...@@ -83,20 +86,25 @@ class CLEngine {
bool SetClDeviceId(); bool SetClDeviceId();
bool SetClContext(); // bool SetClContext();
bool SetClCommandQueue(); // bool SetClCommandQueue();
bool LoadKernelFromFile(const char *kernel_file); // bool LoadKernelFromFile(const char *kernel_file);
bool BuildProgram(); // bool BuildProgram();
bool initialized_; bool initialized_;
cl_platform_id platform_; cl_platform_id platform_;
cl_device_id *devices_; cl_device_id *devices_;
std::unique_ptr<_cl_context, CLContextDeleter> context_; std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_; std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unique_ptr<_cl_program, clProgram_deleter> program_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
}; };
} // namespace framework } // namespace framework
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile {
namespace framework {
class CLHelper {
public:
CLHelper(CLScope *scope): scope_(scope) {
}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
auto kernel = scope_->GetKernel(kernel_name, file_name);
kernels.emplace_back(kernel);
}
cl_kernel KernelAt(const int index) {
return kernels[index].get();
}
cl_command_queue CLCommandQueue() {
return scope_->CommandQueue();
}
cl_context CLContext() {
return scope_->Context();
}
private:
CLScope *scope_;
std::vector<std::unique_ptr<_cl_kernel, CLKernelDeleter>> kernels;
};
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/ddim.h"
#include "framework/tensor.h"
namespace paddle_mobile {
namespace framework {
class CLImage {
public:
CLImage(cl_context context, float *tensorInput, DDim ddim) : tensorDims_(ddim), context_(context) {
}
const DDim &TensorDim();
private:
cl_mem cl_image_;
DDim tensorDims_;
cl_context context_;
};
void TensorToCLImage(Tensor *tensor, CLImage *image) {
}
void CLImageToTensor(CLImage *image, Tensor *tensor) {
}
}
}
\ No newline at end of file
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_deleter.h"
namespace paddle_mobile {
namespace framework {
class CLScope {
public:
CLScope() {
CLEngine *engin = CLEngine::Instance();
context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue();
}
cl_command_queue CommandQueue() {
return command_queue_.get();
}
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name);
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(clCreateKernel(program, kernel_name.c_str(), NULL));
return std::move(kernel);
}
cl_context Context() {
return context_.get();
}
cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name);
if (it != programs_.end()) {
return it->second.get();
}
auto program = CLEngine::Instance()->CreateProgramWith(context_.get(), file_name);
programs_[file_name] = program;
status_ = clBuildProgram(program, 0, 0, 0, 0, 0);
CL_CHECK_ERRORS(status_);
return program;
}
private:
cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unordered_map<std::string, std::unique_ptr<_cl_program, CLProgramDeleter>> programs_;
};
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define CL_CHECK_ERRORS(ERR) \
if(ERR != CL_SUCCESS) \
{ \
printf( \
"OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__ \
); \
}
#endif
\ No newline at end of file
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/type_define.h" #include "common/type_define.h"
#include "common/types.h" #include "common/types.h"
...@@ -31,6 +32,8 @@ limitations under the License. */ ...@@ -31,6 +32,8 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/variable.h" #include "framework/variable.h"
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_helper.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -112,7 +115,8 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -112,7 +115,8 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope), : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {} param_(inputs, outputs, attrs, *scope),
kernel_(scope->GetCLScpoe()) {}
virtual void RunImpl() const { this->kernel_.Compute(this->param_); } virtual void RunImpl() const { this->kernel_.Compute(this->param_); }
...@@ -138,6 +142,11 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -138,6 +142,11 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
template <typename Dtype, typename P> template <typename Dtype, typename P>
class OpKernelBase { class OpKernelBase {
public: public:
OpKernelBase() = default;
OpKernelBase(CLScope *clscope): cl_helper_(clscope) {
}
/* /*
* @b 所有kernel 需实现 Compute 方法 * @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, * @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
...@@ -158,6 +167,10 @@ class OpKernelBase { ...@@ -158,6 +167,10 @@ class OpKernelBase {
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_; void *acl_op_;
#endif #endif
CLHelper cl_helper_;
}; };
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ #define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
......
...@@ -15,6 +15,11 @@ limitations under the License. */ ...@@ -15,6 +15,11 @@ limitations under the License. */
#pragma once #pragma once
#include <list> #include <list>
//#ifdef PADDLE_MOBILE_OCL
#include "framework/cl/cl_scope.h"
//#endif
#include <unordered_map> #include <unordered_map>
#include "variable.h" #include "variable.h"
...@@ -33,6 +38,11 @@ class Scope { ...@@ -33,6 +38,11 @@ class Scope {
delete kid; delete kid;
} }
kids_.clear(); kids_.clear();
//#ifdef PADDLE_MOBILE_OCL
delete cl_scope_;
//#endif
} }
Scope &NewScope() const; Scope &NewScope() const;
...@@ -72,6 +82,10 @@ class Scope { ...@@ -72,6 +82,10 @@ class Scope {
Variable *FindVarLocally(const std::string &name) const; Variable *FindVarLocally(const std::string &name) const;
CLScope *GetCLScpoe() {
return cl_scope_;
}
private: private:
// Call Scope::NewScope for a sub-scope. // Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const *parent) : parent_(parent) {} explicit Scope(Scope const *parent) : parent_(parent) {}
...@@ -79,6 +93,9 @@ class Scope { ...@@ -79,6 +93,9 @@ class Scope {
mutable std::unordered_map<std::string, Variable *> vars_; mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_; mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr}; Scope const *parent_{nullptr};
//#ifdef PADDLE_MOBILE_OCL
CLScope *cl_scope_ = new CLScope();
//#endif
}; };
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -290,6 +290,7 @@ void Executor<Dtype, P>::InitCombineMemory() { ...@@ -290,6 +290,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
delete origin_data; delete origin_data;
LOG(kLOG_INFO) << " end init combine memory "; LOG(kLOG_INFO) << " end init combine memory ";
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool Executor<Dtype, P>::varInputMemory( bool Executor<Dtype, P>::varInputMemory(
const std::shared_ptr<framework::VarDesc> &var_desc, Variable *var, const std::shared_ptr<framework::VarDesc> &var_desc, Variable *var,
......
__kernel void conv_3x3(__global float* in, __global float* out) {
int num = get_global_id(0);
out[num] = in[num] * 0.1 + 102;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
return true;
}
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) const {
}
template class ConvKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册