diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h
index d9111e5c46c9217b181e5a3e5a8c7981f46250df..fbc4d8e5224d6b4113271d789ceb0b9e2e6a8863 100644
--- a/lite/core/profile/precision_profiler.h
+++ b/lite/core/profile/precision_profiler.h
@@ -21,7 +21,9 @@
 #include <string>
 #include <vector>
 #include "lite/core/program.h"
-
+#ifdef LITE_WITH_OPENCL
+#include "lite/kernels/opencl/image_helper.h"
+#endif
 namespace paddle {
 namespace lite {
 namespace profile {
@@ -49,15 +51,19 @@ class PrecisionProfiler {
   ~PrecisionProfiler() {
     LOG(INFO) << ">> Running kernel: " << inst_->op()->op_info()->Repr()
               << " on Target " << TargetToStr(inst_->kernel()->target()) << " "
-              << PrecisionToStr(inst_->kernel()->precision());
+              << PrecisionToStr(inst_->kernel()->precision()) << " "
+              << DataLayoutToStr(inst_->kernel()->layout());
     auto tensor_mean = [](const Tensor* in,
                           PrecisionType ptype,
+                          std::string target_str = "host",
+                          std::string layout_str = "nchw",
                           std::string name = "inst") -> double {
       if (!in->data<int8_t>()) {
         return -99999;
       }
       double sum = 0.;
       switch (ptype) {
+#ifndef LITE_WITH_OPENCL
         case PRECISION(kFloat): {
           auto ptr = in->data<float>();
           // write_tensorfile<float>(in, name);
@@ -66,6 +72,93 @@ class PrecisionProfiler {
           }
           return sum / in->numel();
         }
+#else
+        case PRECISION(kFloat): {
+          if (layout_str == "ImageDefault") {
+            paddle::lite::CLImageConverterDefault default_convertor;
+            auto image_shape =
+                default_convertor.InitImageDimInfoWith(in->dims());
+            size_t im_w = image_shape[0];
+            size_t im_h = image_shape[1];
+            LOG(INFO) << im_w << " " << im_h;
+            std::vector<float> in_data_v(im_w * im_h * 4);
+            std::vector<float> real_out_v(in->numel());
+            const size_t cl_image2d_row_pitch{0};
+            const size_t cl_image2d_slice_pitch{0};
+            TargetWrapperCL::ImgcpySync(in_data_v.data(),
+                                        in->data<float, cl::Image2D>(),
+                                        im_w,
+                                        im_h,
+                                        cl_image2d_row_pitch,
+                                        cl_image2d_slice_pitch,
+                                        IoDirection::DtoH);
+            default_convertor.ImageToNCHW(
+                in_data_v.data(), real_out_v.data(), image_shape, in->dims());
+            // write_tensorfile<float>(in, name);
+            for (int i = 0; i < real_out_v.size(); ++i) {
+              sum += real_out_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else if (target_str == "opencl") {
+            std::vector<float> in_data_v(in->numel(), 0);
+            TargetWrapperCL::MemcpySync(in_data_v.data(),
+                                        in->data<float>(),
+                                        in->numel() * sizeof(float),
+                                        IoDirection::DtoH);
+            for (int i = 0; i < in_data_v.size(); ++i) {
+              sum += in_data_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else {
+            return -10000;
+          }
+        }
+        case PRECISION(kAny): {
+          if (layout_str == "ImageDefault") {
+            paddle::lite::CLImageConverterDefault default_convertor;
+            auto image_shape =
+                default_convertor.InitImageDimInfoWith(in->dims());
+            size_t im_w = image_shape[0];
+            size_t im_h = image_shape[1];
+            LOG(INFO) << im_w << " " << im_h;
+            std::vector<float> in_data_v(im_w * im_h * 4);
+            std::vector<float> real_out_v(in->numel());
+            const size_t cl_image2d_row_pitch{0};
+            const size_t cl_image2d_slice_pitch{0};
+            TargetWrapperCL::ImgcpySync(in_data_v.data(),
+                                        in->data<float, cl::Image2D>(),
+                                        im_w,
+                                        im_h,
+                                        cl_image2d_row_pitch,
+                                        cl_image2d_slice_pitch,
+                                        IoDirection::DtoH);
+            default_convertor.ImageToNCHW(
+                in_data_v.data(), real_out_v.data(), image_shape, in->dims());
+            // write_tensorfile<float>(in, name);
+            for (int i = 0; i < in->numel(); ++i) {
+              sum += real_out_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else if (target_str == "opencl") {
+            std::vector<float> in_data_v(in->numel(), 0);
+            TargetWrapperCL::MemcpySync(in_data_v.data(),
+                                        in->data<float>(),
+                                        in->numel() * sizeof(float),
+                                        IoDirection::DtoH);
+            for (int i = 0; i < in_data_v.size(); ++i) {
+              sum += in_data_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else {
+            return -10000;
+          }
+        }
+#endif
+#ifndef LITE_WITH_OPENCL
         case PRECISION(kAny): {
           auto ptr = in->data<float>();
           // write_tensorfile<float>(in, name);
@@ -90,6 +183,7 @@ class PrecisionProfiler {
           }
           return sum / in->numel();
         }
+#endif
         default:
           LOG(INFO) << "unsupport data type: " << PrecisionToStr(ptype);
           return 0.;
@@ -107,15 +201,27 @@ class PrecisionProfiler {
 
         if (type->IsTensor()) {
           auto tout = op_scope->FindVar(out_name)->GetMutable<Tensor>();
-          double mean = tensor_mean(tout, type->precision(), out_name);
+          double mean = tensor_mean(tout,
+                                    type->precision(),
+                                    TargetToStr(inst_->kernel()->target()),
+                                    DataLayoutToStr(inst_->kernel()->layout()),
+                                    out_name);
+          LOG(INFO) << "go here";
           LOG(INFO) << "output name: " << out_name << ", dims: " << tout->dims()
                     << ", precision: " << PrecisionToStr(type->precision())
+                    << " " << TargetToStr(inst_->kernel()->target()) << " "
+                    << DataLayoutToStr(inst_->kernel()->layout())
                     << ", mean value: " << mean << " shape:" << tout->dims();
         } else if (type->IsTensorList()) {
           auto tout =
               op_scope->FindVar(out_name)->GetMutable<std::vector<Tensor>>();
           for (auto& t : *tout) {
-            double mean = tensor_mean(&t, type->precision(), out_name);
+            double mean =
+                tensor_mean(&t,
+                            type->precision(),
+                            TargetToStr(inst_->kernel()->target()),
+                            DataLayoutToStr(inst_->kernel()->layout()),
+                            out_name);
             LOG(INFO) << "output name: " << out_name << ", dims: " << t.dims()
                       << ", precision: " << PrecisionToStr(type->precision())
                       << ", mean value: " << mean;