提交 d5789dc6 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1088 from codeWorm2015/opencl

update cl sope
...@@ -23,12 +23,15 @@ namespace paddle_mobile { ...@@ -23,12 +23,15 @@ namespace paddle_mobile {
namespace framework { namespace framework {
bool CLEngine::Init() { bool CLEngine::Init() {
if (initialized_) {
return true;
}
cl_int status; cl_int status;
SetPlatform(); SetPlatform();
SetClDeviceId(); SetClDeviceId();
initialized_ = true;
// setClContext(); initialized_ = true;
return initialized_;
// setClCommandQueue(); // setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl"; // std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str()); // loadKernelFromFile(filename.c_str());
...@@ -37,6 +40,7 @@ bool CLEngine::Init() { ...@@ -37,6 +40,7 @@ bool CLEngine::Init() {
CLEngine *CLEngine::Instance() { CLEngine *CLEngine::Instance() {
static CLEngine cl_engine_; static CLEngine cl_engine_;
cl_engine_.Init();
return &cl_engine_; return &cl_engine_;
} }
......
...@@ -33,18 +33,21 @@ class CLEngine { ...@@ -33,18 +33,21 @@ class CLEngine {
bool Init(); bool Init();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() { std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, NULL); cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c); std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
CL_CHECK_ERRORS(status);
return std::move(context_ptr); return std::move(context_ptr);
} }
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> std::unique_ptr<_cl_command_queue, CLCommQueueDeleter>
CreateClCommandQueue() { CreateClCommandQueue(cl_context context) {
cl_int status; cl_int status;
cl_command_queue queue = cl_command_queue queue =
clCreateCommandQueue(context_.get(), devices_[0], 0, &status); clCreateCommandQueue(context, devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr( std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(
queue); queue);
CL_CHECK_ERRORS(status);
return std::move(command_queue_ptr); return std::move(command_queue_ptr);
} }
...@@ -100,10 +103,6 @@ class CLEngine { ...@@ -100,10 +103,6 @@ class CLEngine {
cl_int status_; cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_; std::unique_ptr<_cl_program, CLProgramDeleter> program_;
// bool SetClContext(); // bool SetClContext();
......
...@@ -30,9 +30,8 @@ class CLScope { ...@@ -30,9 +30,8 @@ class CLScope {
public: public:
CLScope() { CLScope() {
CLEngine *engin = CLEngine::Instance(); CLEngine *engin = CLEngine::Instance();
engin->Init();
context_ = engin->CreateContext(); context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue(); command_queue_ = engin->CreateClCommandQueue(context_.get());
} }
cl_command_queue CommandQueue() { return command_queue_.get(); } cl_command_queue CommandQueue() { return command_queue_.get(); }
......
...@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
//#include "conv_kernel.inc.cl"
__kernel void conv_3x3() {} __kernel void conv_3x3() {
\ No newline at end of file
}
...@@ -93,32 +93,52 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -93,32 +93,52 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " begin set kernel arg "; DLOG << " begin set kernel arg ";
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); // status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w); // CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh); //
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); // status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); // CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); //
status = clSetKernelArg(kernel, 6, sizeof(int), &stride); // status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
status = clSetKernelArg(kernel, 7, sizeof(int), &offset); // CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(int), &input_c); //
status = clSetKernelArg(kernel, 9, sizeof(int), &dilation); // status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); // CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); //
// status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
// CL_CHECK_ERRORS(status);
DLOG << " end set kernel arg "; DLOG << " end set kernel arg ";
CL_CHECK_ERRORS(status);
DLOG << " begin enqueue "; DLOG << " begin enqueue ";
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
DLOG << " end enqueue "; DLOG << " end enqueue ";
CL_CHECK_ERRORS(status);
} }
template class ConvKernel<GPU_CL, float>; template class ConvKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册