未验证 提交 af0be6e0 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1088 from codeWorm2015/opencl

update cl sope
......@@ -23,12 +23,15 @@ namespace paddle_mobile {
namespace framework {
bool CLEngine::Init() {
if (initialized_) {
return true;
}
cl_int status;
SetPlatform();
SetClDeviceId();
initialized_ = true;
// setClContext();
initialized_ = true;
return initialized_;
// setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str());
......@@ -37,6 +40,7 @@ bool CLEngine::Init() {
CLEngine *CLEngine::Instance() {
static CLEngine cl_engine_;
cl_engine_.Init();
return &cl_engine_;
}
......
......@@ -33,18 +33,21 @@ class CLEngine {
bool Init();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, NULL);
cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
CL_CHECK_ERRORS(status);
return std::move(context_ptr);
}
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter>
CreateClCommandQueue() {
CreateClCommandQueue(cl_context context) {
cl_int status;
cl_command_queue queue =
clCreateCommandQueue(context_.get(), devices_[0], 0, &status);
clCreateCommandQueue(context, devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(
queue);
CL_CHECK_ERRORS(status);
return std::move(command_queue_ptr);
}
......@@ -100,10 +103,6 @@ class CLEngine {
cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
// bool SetClContext();
......
......@@ -30,9 +30,8 @@ class CLScope {
public:
CLScope() {
CLEngine *engin = CLEngine::Instance();
engin->Init();
context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue();
command_queue_ = engin->CreateClCommandQueue(context_.get());
}
cl_command_queue CommandQueue() { return command_queue_.get(); }
......
......@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
//#include "conv_kernel.inc.cl"
__kernel void conv_3x3() {}
\ No newline at end of file
__kernel void conv_3x3() {
}
......@@ -93,32 +93,52 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " begin set kernel arg ";
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
// status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 1, sizeof(int), &w);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
// CL_CHECK_ERRORS(status);
//
// status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
// CL_CHECK_ERRORS(status);
DLOG << " end set kernel arg ";
CL_CHECK_ERRORS(status);
DLOG << " begin enqueue ";
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
DLOG << " end enqueue ";
CL_CHECK_ERRORS(status);
}
template class ConvKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册