fix: add opencl precision profile

34b05e19 · MyPandaShaoxiang · b74c7ebd · 34b05e19
隐藏空白更改
内联并排

Showing with 110 addition and 4 deletion

lite/core/profile/precision_profiler.h lite/core/profile/precision_profiler.h +110 -4

未找到文件。
--- a/lite/core/profile/precision_profiler.h
+++ b/lite/core/profile/precision_profiler.h
@@ -21,7 +21,9 @@
 #include <string>
 #include <vector>
 #include "lite/core/program.h"
-
+#ifdef LITE_WITH_OPENCL
+#include "lite/kernels/opencl/image_helper.h"
+#endif
 namespace paddle {
 namespace lite {
 namespace profile {
@@ -49,15 +51,19 @@ class PrecisionProfiler {
  ~PrecisionProfiler() {
    LOG(INFO) << ">> Running kernel: " << inst_->op()->op_info()->Repr()
              << " on Target " << TargetToStr(inst_->kernel()->target()) << " "
-              << PrecisionToStr(inst_->kernel()->precision());
+              << PrecisionToStr(inst_->kernel()->precision()) << " "
+              << DataLayoutToStr(inst_->kernel()->layout());
    auto tensor_mean = [](const Tensor* in,
                          PrecisionType ptype,
+                          std::string target_str = "host",
+                          std::string layout_str = "nchw",
                          std::string name = "inst") -> double {
      if (!in->data<int8_t>()) {
        return -99999;
      }
      double sum = 0.;
      switch (ptype) {
+#ifndef LITE_WITH_OPENCL
        case PRECISION(kFloat): {
          auto ptr = in->data<float>();
          // write_tensorfile<float>(in, name);
@@ -66,6 +72,93 @@ class PrecisionProfiler {
          }
          return sum / in->numel();
        }
+#else
+        case PRECISION(kFloat): {
+          if (layout_str == "ImageDefault") {
+            paddle::lite::CLImageConverterDefault default_convertor;
+            auto image_shape =
+                default_convertor.InitImageDimInfoWith(in->dims());
+            size_t im_w = image_shape[0];
+            size_t im_h = image_shape[1];
+            LOG(INFO) << im_w << " " << im_h;
+            std::vector<float> in_data_v(im_w * im_h * 4);
+            std::vector<float> real_out_v(in->numel());
+            const size_t cl_image2d_row_pitch{0};
+            const size_t cl_image2d_slice_pitch{0};
+            TargetWrapperCL::ImgcpySync(in_data_v.data(),
+                                        in->data<float, cl::Image2D>(),
+                                        im_w,
+                                        im_h,
+                                        cl_image2d_row_pitch,
+                                        cl_image2d_slice_pitch,
+                                        IoDirection::DtoH);
+            default_convertor.ImageToNCHW(
+                in_data_v.data(), real_out_v.data(), image_shape, in->dims());
+            // write_tensorfile<float>(in, name);
+            for (int i = 0; i < real_out_v.size(); ++i) {
+              sum += real_out_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else if (target_str == "opencl") {
+            std::vector<float> in_data_v(in->numel(), 0);
+            TargetWrapperCL::MemcpySync(in_data_v.data(),
+                                        in->data<float>(),
+                                        in->numel() * sizeof(float),
+                                        IoDirection::DtoH);
+            for (int i = 0; i < in_data_v.size(); ++i) {
+              sum += in_data_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else {
+            return -10000;
+          }
+        }
+        case PRECISION(kAny): {
+          if (layout_str == "ImageDefault") {
+            paddle::lite::CLImageConverterDefault default_convertor;
+            auto image_shape =
+                default_convertor.InitImageDimInfoWith(in->dims());
+            size_t im_w = image_shape[0];
+            size_t im_h = image_shape[1];
+            LOG(INFO) << im_w << " " << im_h;
+            std::vector<float> in_data_v(im_w * im_h * 4);
+            std::vector<float> real_out_v(in->numel());
+            const size_t cl_image2d_row_pitch{0};
+            const size_t cl_image2d_slice_pitch{0};
+            TargetWrapperCL::ImgcpySync(in_data_v.data(),
+                                        in->data<float, cl::Image2D>(),
+                                        im_w,
+                                        im_h,
+                                        cl_image2d_row_pitch,
+                                        cl_image2d_slice_pitch,
+                                        IoDirection::DtoH);
+            default_convertor.ImageToNCHW(
+                in_data_v.data(), real_out_v.data(), image_shape, in->dims());
+            // write_tensorfile<float>(in, name);
+            for (int i = 0; i < in->numel(); ++i) {
+              sum += real_out_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else if (target_str == "opencl") {
+            std::vector<float> in_data_v(in->numel(), 0);
+            TargetWrapperCL::MemcpySync(in_data_v.data(),
+                                        in->data<float>(),
+                                        in->numel() * sizeof(float),
+                                        IoDirection::DtoH);
+            for (int i = 0; i < in_data_v.size(); ++i) {
+              sum += in_data_v[i];
+            }
+            LOG(INFO) << in->numel();
+            return sum / in->numel();
+          } else {
+            return -10000;
+          }
+        }
+#endif
+#ifndef LITE_WITH_OPENCL
        case PRECISION(kAny): {
          auto ptr = in->data<float>();
          // write_tensorfile<float>(in, name);
@@ -90,6 +183,7 @@ class PrecisionProfiler {
          }
          return sum / in->numel();
        }
+#endif
        default:
          LOG(INFO) << "unsupport data type: " << PrecisionToStr(ptype);
          return 0.;
@@ -107,15 +201,27 @@ class PrecisionProfiler {

        if (type->IsTensor()) {
          auto tout = op_scope->FindVar(out_name)->GetMutable<Tensor>();
-          double mean = tensor_mean(tout, type->precision(), out_name);
+          double mean = tensor_mean(tout,
+                                    type->precision(),
+                                    TargetToStr(inst_->kernel()->target()),
+                                    DataLayoutToStr(inst_->kernel()->layout()),
+                                    out_name);
+          LOG(INFO) << "go here";
          LOG(INFO) << "output name: " << out_name << ", dims: " << tout->dims()
                    << ", precision: " << PrecisionToStr(type->precision())
+                    << " " << TargetToStr(inst_->kernel()->target()) << " "
+                    << DataLayoutToStr(inst_->kernel()->layout())
                    << ", mean value: " << mean << " shape:" << tout->dims();
        } else if (type->IsTensorList()) {
          auto tout =
              op_scope->FindVar(out_name)->GetMutable<std::vector<Tensor>>();
          for (auto& t : *tout) {
-            double mean = tensor_mean(&t, type->precision(), out_name);
+            double mean =
+                tensor_mean(&t,
+                            type->precision(),
+                            TargetToStr(inst_->kernel()->target()),
+                            DataLayoutToStr(inst_->kernel()->layout()),
+                            out_name);
            LOG(INFO) << "output name: " << out_name << ", dims: " << t.dims()
                      << ", precision: " << PrecisionToStr(type->precision())
                      << ", mean value: " << mean;