未验证 提交 cd08e4a9 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1165 from codeWorm2015/opencl

 fix crash error
......@@ -308,7 +308,7 @@ class CLImage {
size_t c_block_;
DDim tensor_dims_;
DDim image_dims_;
float *tensor_data_;
float *tensor_data_ = nullptr;
cl_context context_;
cl_command_queue command_queue_;
};
......
......@@ -97,7 +97,7 @@ class CLTensor : TensorBase {
inline cl_mem CLBuffer() {
check_memory_size();
return reinterpret_cast<cl_mem>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
reinterpret_cast<uintptr_t>(holder_->ptr()));
}
template <typename T>
......@@ -115,8 +115,14 @@ class CLTensor : TensorBase {
return reinterpret_cast<T *>(host_ptr_);
}
int memorySize() {
return holder_->size();
}
~CLTensor() {
DLOG << "~CLTensor";
if (host_ptr_) {
DLOG << " delete host ptr ";
delete (host_ptr_);
host_ptr_ = nullptr;
}
......@@ -125,7 +131,7 @@ class CLTensor : TensorBase {
private:
cl_context context_;
cl_command_queue command_queue_;
void *host_ptr_;
void *host_ptr_ = nullptr;
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, void *input, std::type_index type,
......
......@@ -429,7 +429,6 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
}
#endif
DLOG << " predict return nullptr";
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs();
......
......@@ -86,11 +86,10 @@ void BatchNormKernel<GPU_CL, float>::Compute(
clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &out);
cl_event out_event = param.OutputY()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
// cl_event out_event = param.OutputY()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event,
&out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class BatchNormKernel<GPU_CL, float>;
......
......@@ -236,12 +236,12 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
status = clSetKernelArg(kernel, 16, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -117,12 +117,12 @@ void ConvAddKernel<GPU_CL, float>::Compute(
status = clSetKernelArg(kernel, 14, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -111,12 +111,12 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
status = clSetKernelArg(kernel, 12, sizeof(int), &output_width);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_height);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -76,12 +76,12 @@ void DepthwiseConvKernel<GPU_CL, float>::Compute(
CL_CHECK_ERRORS(status);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -82,7 +82,7 @@ void ElementwiseAddKernel<GPU_CL, float>::Compute(
cl_event wait_event = param.InputX()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 1,&wait_event, &out_event);
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else {
DLOG << "error:bias dims is error";
......
......@@ -30,7 +30,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
cl_int status;
auto output = param.Out();
const Tensor *input = param.InputX();
DLOG << *input;
// DLOG << *input;
const float *input_data = input->data<float>();
int numel = input->numel();
cl_mem cl_image = output->GetCLImage();
......@@ -52,10 +52,10 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
size_t global_work_size[2] = {width, height};
cl_event out_event = param.Out()->GetClEvent();
// cl_event out_event = param.Out()->GetClEvent();
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, &out_event);
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
......
......@@ -73,9 +73,14 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
}
cl_event wait_event = param.InputX()->GetClEvent();
// cl_event wait_event = param.InpdutX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event, NULL);
default_work_size.data(), NULL, 0, NULL, NULL);
// printf(" before finish \n");
// clFlsh(this->cl_helper_.CLCommandQueue());
// clFinish(this->cl_helper_.CLCommandQueue());
// printf(" after finish \n");
memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size());
}
......
......@@ -63,10 +63,10 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) {
clSetKernelArg(kernel, 10, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 11, sizeof(cl_mem), &out);
cl_event out_event = param.Output()->GetClEvent();
cl_event wait_event = param.Input()->GetClEvent();
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class PoolKernel<GPU_CL, float>;
......
......@@ -50,12 +50,12 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
// clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL,
work_size, NULL, 1, &wait_event, &out_event);
work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL,
// work_size, NULL, 0, NULL, NULL);
......
......@@ -55,11 +55,11 @@ void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {
clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]);
const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()};
cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 1, &wait_event, &out_event);
work_size, NULL, 0, NULL, NULL);
}
template class ReshapeKernel<GPU_CL, float>;
......
......@@ -55,11 +55,11 @@ void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {
// clSetKernelArg(kernel, 4, sizeof(int), &dims[2]);
// clSetKernelArg(kernel, 5, sizeof(int), &dims[3]);
cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 1, &wait_event, &out_event);
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
......
......@@ -26,32 +26,35 @@ int main() {
auto isok = paddle_mobile.Load(g_mobilenet, true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
// std::vector<float>::iterator biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << " Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// auto time3 = paddle_mobile::time();
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// DLOG << vec_result;
auto vec_result = paddle_mobile.Predict(input, dims);
// auto time4 = paddle_mobile::time();
// std::cout << "predict cost :" << paddle_mobile::time_diff(time3,
// time4) / 10 << "ms"
// << std::endl;
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = paddle_mobile::time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10
<< "ms" << std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册