[opencl]add pre_process attribute into layoutop (#3001)

598c2a5f · huzhiqiang · GitHub · 4fbbdfc6 · 598c2a5f · 598c2a5f
6 changed file
--- a/lite/api/cxx_api_impl.cc
+++ b/lite/api/cxx_api_impl.cc
@@ -35,7 +35,13 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
  Env<TARGET(kCUDA)>::Init();
 #endif
  auto places = config.valid_places();
-  raw_predictor_.Build(config, places);
+  std::vector<std::string> passes{};
+  auto use_layout_preprocess_pass =
+      config.model_dir().find("OPENCL_PRE_PRECESS");
+  if (use_layout_preprocess_pass != std::string::npos) {
+    passes = {"type_layout_cast_preprocess_pass"};
+  }
+  raw_predictor_.Build(config, places, passes);
  mode_ = config.power_mode();
  threads_ = config.threads();

--- a/lite/core/mir/type_layout_cast_pass.cc
+++ b/lite/core/mir/type_layout_cast_pass.cc
@@ -204,6 +204,28 @@ void TypeLayoutTransformPass::SetValidPlaces(
  valid_places_ = valid_places;
 }
+void OpenCLTypeLayoutTransformPass::Apply(
+    const std::unique_ptr<SSAGraph>& graph) {
+  // Start from inputs of the graph, those should have place set.
+  VLOG(4) << "\n" << Visualize(graph.get());
+  std::list<Node*> nodes;
+  for (auto& node : graph->StmtTopologicalOrder()) {
+    nodes.push_back(node);
+  }
+  VLOG(4) << "nodes.size():" << nodes.size();
+  for (auto& node : nodes) {
+    VLOG(4) << "!node->IsStmt():" << !node->IsStmt();
+    if (!node->IsStmt() || node->AsStmt().op_type() == "while") continue;
+    if (node->AsStmt().op_type() == "layout") {
+      auto new_op = node->AsStmt().mutable_op_info();
+      int process_type = 1;
+      new_op->SetAttr("process_type", process_type);
+    }
+  }
+  VLOG(4) << "\n" << Visualize(graph.get());
+}
 }  // namespace mir
 }  // namespace lite
 }  // namespace paddle
@@ -213,3 +235,9 @@ REGISTER_MIR_PASS(type_layout_cast_pass,
    .BindTargets({TARGET(kAny)})
    .BindKernel("layout_once")
    .BindKernel("layout");
+REGISTER_MIR_PASS(type_layout_cast_preprocess_pass,
+                  paddle::lite::mir::OpenCLTypeLayoutTransformPass)
+    .BindTargets({TARGET(kAny)})
+    .BindKernel("layout_once")
+    .BindKernel("layout");
--- a/lite/core/mir/type_layout_cast_pass.h
+++ b/lite/core/mir/type_layout_cast_pass.h
@@ -57,6 +57,15 @@ class TypeLayoutTransformPass : public ProgramPass {
  std::vector<Place> valid_places_;
 };
+// add preprocess and postprocess attribute for layout op
+class OpenCLTypeLayoutTransformPass : public ProgramPass {
+ public:
+  void Apply(const std::unique_ptr<SSAGraph>& graph) override;
+ private:
+  std::vector<Place> valid_places_;
+};
 }  // namespace mir
 }  // namespace lite
 }  // namespace paddle
--- a/lite/core/optimizer.h
+++ b/lite/core/optimizer.h
@@ -53,7 +53,7 @@ class Optimizer {
    SpecifyKernelPickTactic(kernel_pick_factor);
    InitTargetTypeTransformPass();
-    if (passes.empty()) {
+    if (passes.empty() || passes.size() == 1) {
      std::vector<std::string> passes_local{
          {"lite_quant_dequant_fuse_pass",         //
           "weight_quantization_preprocess_pass",  //
@@ -112,6 +112,9 @@ class Optimizer {
           "runtime_context_assign_pass",
           "argument_type_display_pass",
           "memory_optimize_pass"}};
+      if (passes.size() == 1) {
+        passes_local.push_back(passes[0]);
+      }
      RunPasses(passes_local);
    } else {
      RunPasses(passes);

--- a/lite/operators/layout_op.cc
+++ b/lite/operators/layout_op.cc
@@ -35,6 +35,9 @@ bool LayoutOp::AttachImpl(const cpp::OpDesc &opdesc,
  auto out = opdesc.Output("Out").front();
  param_.x = GetTensor(scope, x);
  param_.y = GetMutableTensor(scope, out);
+  if (opdesc.HasAttr("process_type")) {
+    param_.process_type = opdesc.GetAttr<int>("process_type");
+  }
  return true;
 }
 std::string LayoutOp::DebugString() const { return "layout_op"; }

--- a/lite/operators/op_params.h
+++ b/lite/operators/op_params.h
@@ -62,6 +62,7 @@ struct IoCopyParam {
 struct LayoutParam {
  const lite::Tensor* x{};
  lite::Tensor* y{};
+  int process_type{0};
 };
 struct CalibParam {