cinn_launch_op.h 10.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/scope.h"
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"

namespace paddle {
namespace operators {

static constexpr char kX[] = "X";
static constexpr char kOutputs[] = "Out";
static constexpr char kCompilationKey[] = "compilation_key";

using LoDTensor = framework::LoDTensor;
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
using CinnTensor = ::cinn::hlir::framework::Tensor;
using CinnScope = ::cinn::hlir::framework::Scope;
using CinnCompiler = framework::paddle2cinn::CinnCompiler;
using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject;

namespace details {

// Tranform Paddle place to CINN target
const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place);

// Print detailed compilation result of graph for debug
void DebugCinnCompiledResult(const CinnCompiledObject& result);

// Transform names of Paddle variables to CINN ones
std::vector<std::string> MapPaddleVariablesToCinn(
    const std::vector<std::string>& paddle_names,
    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap);

// Get CinnTensor with variable name from CinnScope
std::vector<CinnTensor> GetCinnTensorsFromCompiledScope(
    const std::vector<std::string>& cinn_names, const CinnScope& cinn_scope);

// Check whether tensors from Paddle and CINN respectively
// of the same variable are equivalent in type and dimension
void CheckTensorEquivalent(const std::string& paddle_name,
                           const LoDTensor* paddle_tensor,
                           const CinnTensor& cinn_tensor);

// Allocate buffer to a Paddle tensor with assginment information from CINN
void TensorMutableDataWithCinnInfo(const platform::Place& place,
                                   const CinnTensor& cinn_tensor,
                                   LoDTensor* paddle_tensor);

// Extract temporary variable names from CinnScope by excluding
// input and output variables
std::vector<std::string> SeperateTempVar(
    const CinnScope& cinn_scope,
    const std::vector<std::string>& input_cinn_names,
    const std::vector<std::string>& output_cinn_names);

// Share the buffer of a Paddle tensor to CINN by packing memory address
// in a cinn_buffer_t object
std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(LoDTensor* tensor);

// Check all execution arguments are carried
void CheckArgumentsNotMissed(
    const CinnScope& cinn_scope,
    const std::map<std::string, cinn_pod_value_t>& name2argument);

}  // namespace details
86 87 88 89 90

template <typename DeviceContext, typename T>
class CinnLaunchOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
91 92
    const auto& scope = ctx.scope();
    const auto& place = ctx.GetPlace();
93 94 95 96 97 98 99
    // Step 1. Find graph object and prepare input
    PADDLE_ENFORCE_EQ(ctx.HasAttr(kCompilationKey), true,
                      platform::errors::NotFound(
                          "No Attribute(%s) found for CinnLaunchOp operator.",
                          kCompilationKey));
    const auto& compilation_key =
        ctx.template Attr<std::string>(kCompilationKey);
100 101
    VLOG(4) << "CinnLaunchOp attribute(" << kCompilationKey << ") "
            << "value:" << compilation_key;
102 103 104

    const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key);
    auto input_variable_names = ctx.InputNames(kX);
105 106 107 108 109 110 111 112
    const auto& input_tensors = ctx.MultiInput<LoDTensor>(kX);
    std::map<std::string, const LoDTensor*> inputs_name2tensor;
    std::transform(input_variable_names.begin(), input_variable_names.end(),
                   input_tensors.begin(),
                   std::inserter(inputs_name2tensor, inputs_name2tensor.end()),
                   [](const std::string& name, const LoDTensor* tensor) {
                     return std::make_pair(name, tensor);
                   });
113 114

    // Step 2. Get compilation result of the graph
115
    auto target = details::PlaceToCinnTarget(place);
116
    const auto& cinn_compiled_object =
117 118
        CinnCompiler::GetInstance()->Compile(graph, inputs_name2tensor, target);
    details::DebugCinnCompiledResult(cinn_compiled_object);
119 120

    const auto& cinn_runtime_program = cinn_compiled_object.runtime_program;
121
    const auto& cinn_scope = *(cinn_compiled_object.scope);
122 123
    const auto& paddle2cinn_varmap = cinn_compiled_object.paddle2cinn_varmap;

124 125 126 127
    // Step 3. Initialize all variables needed for cinn compiled runtime
    //         program execution, and share buffers of their tensors into
    //         cinn buffers through execution arguments passed.
    VLOG(4) << "CinnLaunchOp initialize variables and prepare arguments";
128
    std::map<std::string, cinn_pod_value_t> name2argument;
129 130 131
    // because a cinn_pod_value_t does not own the cinn_buffer_t object,
    // an extra stroage is necessary to keep the object and it can
    // not be released until runtime program finish  execution.
132
    std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers;
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163

    // 3.1 Prepare input variables: because tensors of input variables have
    //     been initialized before graph compiled, just check the
    //     equiality between tensors of paddle and cinn.
    auto input_cinn_names = details::MapPaddleVariablesToCinn(
        input_variable_names, paddle2cinn_varmap);
    auto input_cinn_tensors =
        details::GetCinnTensorsFromCompiledScope(input_cinn_names, cinn_scope);
    for (auto i = 0; i < input_variable_names.size(); ++i) {
      const auto& var_name = input_variable_names.at(i);
      const auto& cinn_name = input_cinn_names.at(i);
      auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
      details::CheckTensorEquivalent(var_name, tensor,
                                     input_cinn_tensors.at(i));

      VLOG(4) << "Prepare input argument-" << i << ":"
              << "name(" << var_name << "->" << cinn_name << "), "
              << "tensor(type:" << tensor->type() << ","
              << "dims:" << tensor->dims() << ").";
      auto buffer = details::ShareTensorWithCinnBuffer(tensor);
      name2argument.emplace(input_cinn_names.at(i), buffer.get());
      hold_buffers.emplace_back(std::move(buffer));
    }

    // 3.2 Prepare output variables: all output variables should
    //     be initialized and allocated buffer in advance before
    //     the runtime program start execution, the compilation result
    //     includes details of their buffer assginment which used by
    //     Paddle tensor allocation. For those variables allocated yet,
    //     like persistable parameters, just check the equiality between
    //     Paddle allocation and CINN buffer assginment.
164
    auto output_variable_names = ctx.OutputNames(kOutputs);
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    auto output_cinn_names = details::MapPaddleVariablesToCinn(
        output_variable_names, paddle2cinn_varmap);
    auto output_cinn_tensors =
        details::GetCinnTensorsFromCompiledScope(output_cinn_names, cinn_scope);
    for (auto i = 0; i < output_variable_names.size(); ++i) {
      const auto& var_name = output_variable_names.at(i);
      const auto& cinn_name = output_cinn_names.at(i);
      auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
      if (tensor->IsInitialized()) {
        details::CheckTensorEquivalent(var_name, tensor,
                                       output_cinn_tensors.at(i));
      } else {
        details::TensorMutableDataWithCinnInfo(place, output_cinn_tensors.at(i),
                                               tensor);
      }

      VLOG(4) << "Prepare outnput argument-" << i << ":"
              << "name(" << var_name << "->" << cinn_name << "), "
              << "tensor(type:" << tensor->type() << ","
              << "dims:" << tensor->dims() << ").";
      auto buffer = details::ShareTensorWithCinnBuffer(tensor);
      name2argument.emplace(output_cinn_names.at(i), buffer.get());
      hold_buffers.emplace_back(std::move(buffer));
    }

    // 3.3 Prepare temporary variables: Create a temporary scope
    //     to keep temporary variables needed by compiled runtime program
    //     in addition, they directly use the names from CinnScope.
    auto temp_variable_names = details::SeperateTempVar(
        cinn_scope, input_cinn_names, output_cinn_names);
    auto temp_scope = scope.NewTmpScope();
196
    if (!temp_variable_names.empty()) {
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
      auto temp_cinn_tensors = details::GetCinnTensorsFromCompiledScope(
          temp_variable_names, cinn_scope);
      for (auto i = 0; i < output_variable_names.size(); ++i) {
        const auto& var_name = temp_variable_names.at(i);
        auto* tensor = temp_scope->Var(var_name)->GetMutable<LoDTensor>();
        details::TensorMutableDataWithCinnInfo(place, temp_cinn_tensors.at(i),
                                               tensor);

        VLOG(4) << "Prepare temporary argument-" << i << ":"
                << "name(" << var_name << "->" << var_name << "), "
                << "tensor(type:" << tensor->type() << ","
                << "dims:" << tensor->dims() << ").";
        auto buffer = details::ShareTensorWithCinnBuffer(tensor);
        name2argument.emplace(var_name, buffer.get());
        hold_buffers.emplace_back(std::move(buffer));
      }
213
    }
214 215 216

    // Step 4. Launch CINN to execute the compiled runtime program
    details::CheckArgumentsNotMissed(cinn_scope, name2argument);
217
    cinn_runtime_program->Execute(&name2argument);
218
    VLOG(4) << "CinnLaunchOp launch execution done.";
219 220 221 222 223
  }
};

}  // namespace operators
}  // namespace paddle