未验证 提交 cd08e4a9 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1165 from codeWorm2015/opencl

 fix crash error
...@@ -308,7 +308,7 @@ class CLImage { ...@@ -308,7 +308,7 @@ class CLImage {
size_t c_block_; size_t c_block_;
DDim tensor_dims_; DDim tensor_dims_;
DDim image_dims_; DDim image_dims_;
float *tensor_data_; float *tensor_data_ = nullptr;
cl_context context_; cl_context context_;
cl_command_queue command_queue_; cl_command_queue command_queue_;
}; };
......
...@@ -97,7 +97,7 @@ class CLTensor : TensorBase { ...@@ -97,7 +97,7 @@ class CLTensor : TensorBase {
inline cl_mem CLBuffer() { inline cl_mem CLBuffer() {
check_memory_size(); check_memory_size();
return reinterpret_cast<cl_mem>( return reinterpret_cast<cl_mem>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_); reinterpret_cast<uintptr_t>(holder_->ptr()));
} }
template <typename T> template <typename T>
...@@ -115,8 +115,14 @@ class CLTensor : TensorBase { ...@@ -115,8 +115,14 @@ class CLTensor : TensorBase {
return reinterpret_cast<T *>(host_ptr_); return reinterpret_cast<T *>(host_ptr_);
} }
int memorySize() {
return holder_->size();
}
~CLTensor() { ~CLTensor() {
DLOG << "~CLTensor";
if (host_ptr_) { if (host_ptr_) {
DLOG << " delete host ptr ";
delete (host_ptr_); delete (host_ptr_);
host_ptr_ = nullptr; host_ptr_ = nullptr;
} }
...@@ -125,7 +131,7 @@ class CLTensor : TensorBase { ...@@ -125,7 +131,7 @@ class CLTensor : TensorBase {
private: private:
cl_context context_; cl_context context_;
cl_command_queue command_queue_; cl_command_queue command_queue_;
void *host_ptr_; void *host_ptr_ = nullptr;
struct PlaceholderImpl : public Placeholder { struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, void *input, std::type_index type, PlaceholderImpl(size_t size, void *input, std::type_index type,
......
...@@ -429,7 +429,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -429,7 +429,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
} }
#endif #endif
DLOG << " predict return nullptr";
auto last_op = ops.rbegin(); auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs(); auto output_map = (*last_op)->Outputs();
......
...@@ -86,11 +86,10 @@ void BatchNormKernel<GPU_CL, float>::Compute( ...@@ -86,11 +86,10 @@ void BatchNormKernel<GPU_CL, float>::Compute(
clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias); clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &out); clSetKernelArg(kernel, 5, sizeof(cl_mem), &out);
cl_event out_event = param.OutputY()->GetClEvent(); // cl_event out_event = param.OutputY()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event, default_work_size.data(), NULL, 0, NULL, NULL);
&out_event);
} }
template class BatchNormKernel<GPU_CL, float>; template class BatchNormKernel<GPU_CL, float>;
......
...@@ -236,12 +236,12 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute( ...@@ -236,12 +236,12 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
status = clSetKernelArg(kernel, 16, sizeof(int), &output_height); status = clSetKernelArg(kernel, 16, sizeof(int), &output_height);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
...@@ -117,12 +117,12 @@ void ConvAddKernel<GPU_CL, float>::Compute( ...@@ -117,12 +117,12 @@ void ConvAddKernel<GPU_CL, float>::Compute(
status = clSetKernelArg(kernel, 14, sizeof(int), &output_height); status = clSetKernelArg(kernel, 14, sizeof(int), &output_height);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
...@@ -111,12 +111,12 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -111,12 +111,12 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
status = clSetKernelArg(kernel, 12, sizeof(int), &output_width); status = clSetKernelArg(kernel, 12, sizeof(int), &output_width);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_height); status = clSetKernelArg(kernel, 13, sizeof(int), &output_height);
cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
...@@ -76,12 +76,12 @@ void DepthwiseConvKernel<GPU_CL, float>::Compute( ...@@ -76,12 +76,12 @@ void DepthwiseConvKernel<GPU_CL, float>::Compute(
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
...@@ -82,7 +82,7 @@ void ElementwiseAddKernel<GPU_CL, float>::Compute( ...@@ -82,7 +82,7 @@ void ElementwiseAddKernel<GPU_CL, float>::Compute(
cl_event wait_event = param.InputX()->GetClEvent(); cl_event wait_event = param.InputX()->GetClEvent();
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 1,&wait_event, &out_event); NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} else { } else {
DLOG << "error:bias dims is error"; DLOG << "error:bias dims is error";
......
...@@ -30,7 +30,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -30,7 +30,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
cl_int status; cl_int status;
auto output = param.Out(); auto output = param.Out();
const Tensor *input = param.InputX(); const Tensor *input = param.InputX();
DLOG << *input; // DLOG << *input;
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
int numel = input->numel(); int numel = input->numel();
cl_mem cl_image = output->GetCLImage(); cl_mem cl_image = output->GetCLImage();
...@@ -52,10 +52,10 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -52,10 +52,10 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
size_t global_work_size[2] = {width, height}; size_t global_work_size[2] = {width, height};
cl_event out_event = param.Out()->GetClEvent(); // cl_event out_event = param.Out()->GetClEvent();
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, &out_event); NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
...@@ -73,9 +73,14 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) { ...@@ -73,9 +73,14 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
clSetKernelArg(kernel, 6, sizeof(int), &size_batch); clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
} }
cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InpdutX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
// printf(" before finish \n");
// clFlsh(this->cl_helper_.CLCommandQueue());
// clFinish(this->cl_helper_.CLCommandQueue());
// printf(" after finish \n");
memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size()); memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size());
} }
......
...@@ -63,10 +63,10 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) { ...@@ -63,10 +63,10 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) {
clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); clSetKernelArg(kernel, 10, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); clSetKernelArg(kernel, 11, sizeof(cl_mem), &out);
cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
} }
template class PoolKernel<GPU_CL, float>; template class PoolKernel<GPU_CL, float>;
......
...@@ -50,12 +50,12 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) { ...@@ -50,12 +50,12 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
// clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage); // clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
cl_event out_event = param.Out()->GetClEvent(); // cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, NULL,
work_size, NULL, 1, &wait_event, &out_event); work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL, // NULL,
// work_size, NULL, 0, NULL, NULL); // work_size, NULL, 0, NULL, NULL);
......
...@@ -55,11 +55,11 @@ void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) { ...@@ -55,11 +55,11 @@ void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {
clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]); clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]);
const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()}; const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()};
cl_event out_event = param.Out()->GetClEvent(); // cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 1, &wait_event, &out_event); work_size, NULL, 0, NULL, NULL);
} }
template class ReshapeKernel<GPU_CL, float>; template class ReshapeKernel<GPU_CL, float>;
......
...@@ -55,11 +55,11 @@ void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) { ...@@ -55,11 +55,11 @@ void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {
// clSetKernelArg(kernel, 4, sizeof(int), &dims[2]); // clSetKernelArg(kernel, 4, sizeof(int), &dims[2]);
// clSetKernelArg(kernel, 5, sizeof(int), &dims[3]); // clSetKernelArg(kernel, 5, sizeof(int), &dims[3]);
cl_event out_event = param.Out()->GetClEvent(); // cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent();
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
......
...@@ -26,32 +26,35 @@ int main() { ...@@ -26,32 +26,35 @@ int main() {
auto isok = paddle_mobile.Load(g_mobilenet, true); auto isok = paddle_mobile.Load(g_mobilenet, true);
if (isok) { if (isok) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms" std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl; << std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims); GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
// std::vector<float>::iterator biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << " Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// auto time3 = paddle_mobile::time(); // auto time3 = paddle_mobile::time();
// for (int i = 0; i < 10; ++i) { auto vec_result = paddle_mobile.Predict(input, dims);
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// DLOG << vec_result;
// auto time4 = paddle_mobile::time(); // auto time4 = paddle_mobile::time();
// std::cout << "predict cost :" << paddle_mobile::time_diff(time3,
// time4) / 10 << "ms" for (int i = 0; i < 10; ++i) {
// << std::endl; auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = paddle_mobile::time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10
<< "ms" << std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
} }
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册