diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index bed423441164420b5c1e50e0eaebc0a933f3d811..b88091e09b03940a97170941ea0c48b2874260f1 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -308,7 +308,7 @@ class CLImage { size_t c_block_; DDim tensor_dims_; DDim image_dims_; - float *tensor_data_; + float *tensor_data_ = nullptr; cl_context context_; cl_command_queue command_queue_; }; diff --git a/src/framework/cl/cl_tensor.h b/src/framework/cl/cl_tensor.h index 204c3e3ca185817bdf5b966fe04fac8574acd8a7..57155f65c4491c1f1f3c533bd86339ae3ebf2964 100644 --- a/src/framework/cl/cl_tensor.h +++ b/src/framework/cl/cl_tensor.h @@ -97,7 +97,7 @@ class CLTensor : TensorBase { inline cl_mem CLBuffer() { check_memory_size(); return reinterpret_cast( - reinterpret_cast(holder_->ptr()) + offset_); + reinterpret_cast(holder_->ptr())); } template @@ -115,8 +115,14 @@ class CLTensor : TensorBase { return reinterpret_cast(host_ptr_); } + int memorySize() { + return holder_->size(); + } + ~CLTensor() { + DLOG << "~CLTensor"; if (host_ptr_) { + DLOG << " delete host ptr "; delete (host_ptr_); host_ptr_ = nullptr; } @@ -125,7 +131,7 @@ class CLTensor : TensorBase { private: cl_context context_; cl_command_queue command_queue_; - void *host_ptr_; + void *host_ptr_ = nullptr; struct PlaceholderImpl : public Placeholder { PlaceholderImpl(size_t size, void *input, std::type_index type, diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 45f9726d1eb9f031e910ebebf6726807970fcb61..d2ba4d97362f51d7fe22be0b2a1ddd7cf7563c4b 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -429,7 +429,6 @@ std::shared_ptr Executor::Predict( } #endif - DLOG << " predict return nullptr"; auto last_op = ops.rbegin(); auto output_map = (*last_op)->Outputs(); diff --git a/src/operators/kernel/cl/batchnorm_kernel.cpp b/src/operators/kernel/cl/batchnorm_kernel.cpp index 2da70637968b0da10fec301e454bf359a6f5bb56..10add5de5d5b271389671d28bfd3bcd2deaa3c8a 100644 --- a/src/operators/kernel/cl/batchnorm_kernel.cpp +++ b/src/operators/kernel/cl/batchnorm_kernel.cpp @@ -86,11 +86,10 @@ void BatchNormKernel::Compute( clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias); clSetKernelArg(kernel, 5, sizeof(cl_mem), &out); - cl_event out_event = param.OutputY()->GetClEvent(); - cl_event wait_event = param.InputX()->GetClEvent(); +// cl_event out_event = param.OutputY()->GetClEvent(); +// cl_event wait_event = param.InputX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 1, &wait_event, - &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); } template class BatchNormKernel; diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index 8a0417eaaa79763c38bd4d77d646f1dbb0d92d06..65f39a0ac6e18e8a206c47ffc1b7e3fba56c6085 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -236,12 +236,12 @@ void ConvAddBNReluKernel::Compute( status = clSetKernelArg(kernel, 16, sizeof(int), &output_height); CL_CHECK_ERRORS(status); - cl_event out_event = param.Output()->GetClEvent(); - cl_event wait_event = param.Input()->GetClEvent(); +// cl_event out_event = param.Output()->GetClEvent(); +// cl_event wait_event = param.Input()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } diff --git a/src/operators/kernel/cl/conv_add_kernel.cpp b/src/operators/kernel/cl/conv_add_kernel.cpp index 390aebef8c9ee4cf440cb70226a3195ffc7b639c..cb093451a5acbe480028d8518f665127faa02f1c 100644 --- a/src/operators/kernel/cl/conv_add_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_kernel.cpp @@ -117,12 +117,12 @@ void ConvAddKernel::Compute( status = clSetKernelArg(kernel, 14, sizeof(int), &output_height); CL_CHECK_ERRORS(status); - cl_event out_event = param.Output()->GetClEvent(); - cl_event wait_event = param.Input()->GetClEvent(); +// cl_event out_event = param.Output()->GetClEvent(); +// cl_event wait_event = param.Input()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index 8cec6b9893ba934cdccba053f5ce335618c3e251..c6fe51e7ee865f3b882f3144837c7618ff8597d4 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -111,12 +111,12 @@ void ConvKernel::Compute(const ConvParam ¶m) { status = clSetKernelArg(kernel, 12, sizeof(int), &output_width); status = clSetKernelArg(kernel, 13, sizeof(int), &output_height); - cl_event out_event = param.Output()->GetClEvent(); - cl_event wait_event = param.Input()->GetClEvent(); +// cl_event out_event = param.Output()->GetClEvent(); +// cl_event wait_event = param.Input()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } diff --git a/src/operators/kernel/cl/depthwise_conv_kernel.cpp b/src/operators/kernel/cl/depthwise_conv_kernel.cpp index f24e6f0be8322e296bcf599f497425014d2ea8a5..d4a539ab0a75c7ec04141de4a8619613712a1e52 100644 --- a/src/operators/kernel/cl/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/cl/depthwise_conv_kernel.cpp @@ -76,12 +76,12 @@ void DepthwiseConvKernel::Compute( CL_CHECK_ERRORS(status); - cl_event out_event = param.Output()->GetClEvent(); - cl_event wait_event = param.Input()->GetClEvent(); +// cl_event out_event = param.Output()->GetClEvent(); +// cl_event wait_event = param.Input()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } diff --git a/src/operators/kernel/cl/elementwise_add_kernel.cpp b/src/operators/kernel/cl/elementwise_add_kernel.cpp index 0bc3700350f5d54a16fff6b1fc297695d94b769b..8ac8a7034f44fd800d4174306c9ef7036bea797a 100644 --- a/src/operators/kernel/cl/elementwise_add_kernel.cpp +++ b/src/operators/kernel/cl/elementwise_add_kernel.cpp @@ -82,7 +82,7 @@ void ElementwiseAddKernel::Compute( cl_event wait_event = param.InputX()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, - NULL, global_work_size, NULL, 1,&wait_event, &out_event); + NULL, global_work_size, NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } else { DLOG << "error:bias dims is error"; diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index e79e0ed15dfae00ee894ad6239ceadad704ba6bb..f1f13df96c2bd784d42b41c387b1d09b6cd5b74f 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -30,7 +30,7 @@ void FeedKernel::Compute(const FeedParam ¶m) { cl_int status; auto output = param.Out(); const Tensor *input = param.InputX(); - DLOG << *input; +// DLOG << *input; const float *input_data = input->data(); int numel = input->numel(); cl_mem cl_image = output->GetCLImage(); @@ -52,10 +52,10 @@ void FeedKernel::Compute(const FeedParam ¶m) { size_t global_work_size[2] = {width, height}; - cl_event out_event = param.Out()->GetClEvent(); +// cl_event out_event = param.Out()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, - NULL, global_work_size, NULL, 0, NULL, &out_event); + NULL, global_work_size, NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); } diff --git a/src/operators/kernel/cl/fetch_kernel.cpp b/src/operators/kernel/cl/fetch_kernel.cpp index c6f8e78361842008fbb841b1751bdfea1ca2d18b..ca94ae475e46aac463338d554d73c1baddd9c269 100644 --- a/src/operators/kernel/cl/fetch_kernel.cpp +++ b/src/operators/kernel/cl/fetch_kernel.cpp @@ -73,9 +73,14 @@ void FetchKernel::Compute(const FetchParam ¶m) { clSetKernelArg(kernel, 6, sizeof(int), &size_batch); } - cl_event wait_event = param.InputX()->GetClEvent(); +// cl_event wait_event = param.InpdutX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 1, &wait_event, NULL); + default_work_size.data(), NULL, 0, NULL, NULL); + +// printf(" before finish \n"); +// clFlsh(this->cl_helper_.CLCommandQueue()); +// clFinish(this->cl_helper_.CLCommandQueue()); +// printf(" after finish \n"); memcpy(out->data(), out_cl_tensor.Data(), out->memory_size()); } diff --git a/src/operators/kernel/cl/pool_kernel.cpp b/src/operators/kernel/cl/pool_kernel.cpp index 904bb0336a4de506fe4ac1dd4b915b6a125e6715..3159152944fe6259e47528f30d86b4b1a1f373b0 100644 --- a/src/operators/kernel/cl/pool_kernel.cpp +++ b/src/operators/kernel/cl/pool_kernel.cpp @@ -63,10 +63,10 @@ void PoolKernel::Compute(const PoolParam ¶m) { clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); - cl_event out_event = param.Output()->GetClEvent(); - cl_event wait_event = param.Input()->GetClEvent(); +// cl_event out_event = param.Output()->GetClEvent(); +// cl_event wait_event = param.Input()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); } template class PoolKernel; diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index 6ad5ba56da2609d444d5edf7647a1d2941776b79..70c939b86df1fcbd161249638771256f68a34024 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -50,12 +50,12 @@ void ReluKernel::Compute(const ReluParam& param) { // clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage); const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; - cl_event out_event = param.Out()->GetClEvent(); - cl_event wait_event = param.InputX()->GetClEvent(); +// cl_event out_event = param.Out()->GetClEvent(); +// cl_event wait_event = param.InputX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 1, &wait_event, &out_event); + work_size, NULL, 0, NULL, NULL); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, // NULL, // work_size, NULL, 0, NULL, NULL); diff --git a/src/operators/kernel/cl/reshape_kernel.cpp b/src/operators/kernel/cl/reshape_kernel.cpp index d82d810cc2e9bf9e94b67d948f87e090cc45e152..a1e6b57984b7e8a206d2f452edaf7100260e274c 100644 --- a/src/operators/kernel/cl/reshape_kernel.cpp +++ b/src/operators/kernel/cl/reshape_kernel.cpp @@ -55,11 +55,11 @@ void ReshapeKernel::Compute(const ReshapeParam ¶m) { clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]); const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()}; - cl_event out_event = param.Out()->GetClEvent(); - cl_event wait_event = param.InputX()->GetClEvent(); +// cl_event out_event = param.Out()->GetClEvent(); +// cl_event wait_event = param.InputX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 1, &wait_event, &out_event); + work_size, NULL, 0, NULL, NULL); } template class ReshapeKernel; diff --git a/src/operators/kernel/cl/softmax_kernel.cpp b/src/operators/kernel/cl/softmax_kernel.cpp index 7d99ad9e1c979dfabeb85f6e5bfbb076dd896ac7..432ead67fc87523a3d33ed83eff6ffe8c4666f97 100644 --- a/src/operators/kernel/cl/softmax_kernel.cpp +++ b/src/operators/kernel/cl/softmax_kernel.cpp @@ -55,11 +55,11 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { // clSetKernelArg(kernel, 4, sizeof(int), &dims[2]); // clSetKernelArg(kernel, 5, sizeof(int), &dims[3]); - cl_event out_event = param.Out()->GetClEvent(); - cl_event wait_event = param.InputX()->GetClEvent(); +// cl_event out_event = param.Out()->GetClEvent(); +// cl_event wait_event = param.InputX()->GetClEvent(); status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, - default_work_size.data(), NULL, 1, &wait_event, &out_event); + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); diff --git a/test/net/test_mobilenet_GPU.cpp b/test/net/test_mobilenet_GPU.cpp index a5a78f7f8f6fe93ca7412f0ec007c291b26417af..40edd0a8450496550a9d3747507b42945f925a09 100644 --- a/test/net/test_mobilenet_GPU.cpp +++ b/test/net/test_mobilenet_GPU.cpp @@ -26,32 +26,35 @@ int main() { auto isok = paddle_mobile.Load(g_mobilenet, true); if (isok) { auto time2 = paddle_mobile::time(); - std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms" + std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" << std::endl; std::vector input; std::vector dims{1, 3, 224, 224}; GetInput(g_test_image_1x3x224x224_banana, &input, dims); - auto vec_result = paddle_mobile.Predict(input, dims); - // std::vector::iterator biggest = - // std::max_element(std::begin(vec_result), std::end(vec_result)); - // std::cout << " Max element is " << *biggest << " at position " - // << std::distance(std::begin(vec_result), biggest) << - // std::endl; - - // for (int i = 0; i < 10; ++i) { - // auto vec_result = paddle_mobile.Predict(input, dims); - // } // auto time3 = paddle_mobile::time(); - // for (int i = 0; i < 10; ++i) { - // auto vec_result = paddle_mobile.Predict(input, dims); - // } - // DLOG << vec_result; + auto vec_result = paddle_mobile.Predict(input, dims); // auto time4 = paddle_mobile::time(); - // std::cout << "predict cost :" << paddle_mobile::time_diff(time3, - // time4) / 10 << "ms" - // << std::endl; + + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + + + auto time3 = paddle_mobile::time(); + + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + + auto time4 = paddle_mobile::time(); + std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10 + << "ms" << std::endl; + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; } std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "