add clfinish at fetch kernel

0ac132f9 · liuruilong · cd08e4a9 · 0ac132f9
隐藏空白更改
内联并排

Showing with 11 addition and 1 deletion

src/operators/kernel/cl/fetch_kernel.cpp src/operators/kernel/cl/fetch_kernel.cpp +11 -1

未找到文件。
--- a/src/operators/kernel/cl/fetch_kernel.cpp
+++ b/src/operators/kernel/cl/fetch_kernel.cpp
@@ -14,6 +14,8 @@ limitations under the License. */

 #include "operators/kernel/fetch_kernel.h"
 #include "framework/cl/cl_tensor.h"
+//#include "common/common.h"
+//#include <iostream>

 namespace paddle_mobile {
 namespace operators {
@@ -77,11 +79,19 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
                         default_work_size.data(), NULL, 0, NULL, NULL);

+//  auto time1 = paddle_mobile::time();
+
 //  printf(" before finish \n");
 //  clFlsh(this->cl_helper_.CLCommandQueue());
-//  clFinish(this->cl_helper_.CLCommandQueue());
+  clFinish(this->cl_helper_.CLCommandQueue());
 //  printf(" after finish \n");

+//  auto time2 = paddle_mobile::time();
+//
+//
+//  std::cout << " finish  cost :" << paddle_mobile::time_diff(time1, time2)
+//            << "ms" << std::endl;
+
  memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size());
 }