cinn_launch_op.cc 11.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/cinn_launch_op.h"
16
#include <vector>
17
#include "paddle/fluid/string/string_helper.h"
18

19 20
DECLARE_bool(cudnn_deterministic);

21 22 23
namespace paddle {
namespace operators {

24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
namespace details {

const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place) {
  if (platform::is_cpu_place(place)) {
    return ::cinn::common::DefaultHostTarget();
  } else if (platform::is_gpu_place(place)) {
    return ::cinn::common::DefaultNVGPUTarget();
  }

  PADDLE_THROW(platform::errors::InvalidArgument(
      "CINN is not supported on current place:%s", place));
  return ::cinn::common::UnkTarget();
}

void DebugCinnCompiledResult(const CinnCompiledObject& result) {
  if (!VLOG_IS_ON(4)) {
    return;
  }
  const auto& cinn_runtime_program = result.runtime_program;
  const auto& cinn_scope = *(result.scope);
  const auto& paddle2cinn_varmap = result.paddle2cinn_varmap;

  VLOG(4) << "Compiled runtime_program instrunction size:["
          << cinn_runtime_program->size() << "]";

  std::vector<std::string> infos;
  auto cinn_var_names = cinn_scope.var_names();
  infos.reserve(cinn_var_names.size());
  std::transform(cinn_var_names.begin(), cinn_var_names.end(),
                 std::back_inserter(infos),
                 [](const auto& name_view) { return name_view.data(); });
  VLOG(4) << "Compiled scope variable names:["
          << string::join_strings(infos, ',') << "]";

  infos.clear();
  infos.reserve(paddle2cinn_varmap.size());
  std::transform(paddle2cinn_varmap.begin(), paddle2cinn_varmap.end(),
                 std::back_inserter(infos), [](const auto& paddle2cinn) {
                   return paddle2cinn.first + "->" + paddle2cinn.second;
                 });
  VLOG(4) << "Compiled paddle2cinn_varmap:[" << string::join_strings(infos, ',')
          << "]";
}

68
void LaunchCinnExecution(const CinnCompiledObject& compiled_obj,
69 70
                         const CinnLaunchContext& context, void* stream) {
  compiled_obj.runtime_program->Execute(&context.FinalizeArguments(), stream);
71 72
}

73 74 75 76 77 78
void SetCinnRuntimeFlags() {
  VLOG(4) << "Set FLAGS_cinn_cudnn_deterministic to "
          << FLAGS_cudnn_deterministic;
  ::cinn::runtime::SetCinnCudnnDeterministic(FLAGS_cudnn_deterministic);
}

79 80 81 82 83
CinnLaunchContext::CinnLaunchContext(const CinnCompiledObject& compiled_obj)
    : paddle2cinn_varmap_(compiled_obj.paddle2cinn_varmap),
      cinn_scope_(compiled_obj.scope) {
  auto var_names = cinn_scope_->var_names();
  cinn_variable_names_.reserve(var_names.size());
84
  std::transform(
85 86 87
      var_names.begin(), var_names.end(),
      std::inserter(cinn_variable_names_, cinn_variable_names_.end()),
      [](const auto& name_view) { return std::string(name_view.data()); });
88 89
}

90 91 92 93 94 95 96 97 98 99 100 101
bool CinnLaunchContext::IsVariableUsed(const std::string& paddle_name) {
  return paddle2cinn_varmap_.count(paddle_name) > 0 &&
         cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_name)) > 0;
}

CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& var_name) {
  PADDLE_ENFORCE_GT(cinn_variable_names_.count(var_name), 0,
                    platform::errors::NotFound(
                        "Variable(%s) not found in cinn scope.", var_name));
  return cinn_scope_->GetTensor(var_name);
}

102
std::unordered_set<std::string> CinnLaunchContext::GetInternalVariableNames() {
103 104 105 106 107
  std::unordered_set<std::string> all_parameters(cinn_variable_names_);
  std::for_each(name2argument_.begin(), name2argument_.end(),
                [&all_parameters](const auto& name2arg) {
                  all_parameters.erase(name2arg.first);
                });
108
  return all_parameters;
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
}

void CinnLaunchContext::MutableTensorData(const std::string& var_name,
                                          const platform::Place& place,
                                          LoDTensor* paddle_tensor,
                                          bool is_internal_var) {
  auto cinn_name = var_name;
  if (!is_internal_var) {
    PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
                      platform::errors::InvalidArgument(
                          "Paddle variable(%s) not used by cinn", var_name));
    cinn_name = paddle2cinn_varmap_.at(var_name);
  }

  auto cinn_tensor = GetCinnTensor(cinn_name);
  // TODO(CtfGo): support mutable corresponding c++ type after CINN ready
125
  VLOG(4) << "Only support float in cinn_launch op now.";
126 127
  paddle_tensor->mutable_data<float>(
      framework::make_ddim(cinn_tensor->shape().data()), place);
128 129
}

130 131 132
void CinnLaunchContext::CheckTensorEquivalent(const std::string& paddle_name,
                                              const LoDTensor& paddle_tensor,
                                              const CinnTensor& cinn_tensor) {
133
  PADDLE_ENFORCE_EQ(
134
      paddle_tensor.IsInitialized(), true,
135
      platform::errors::InvalidArgument(
136
          "Tensor in variable(%s) is not initialized.", paddle_name));
137 138 139

  // check dimension
  auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data());
140 141 142 143 144
  PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims,
                    platform::errors::PreconditionNotMet(
                        "Tensors' shape in variable(%s) are not equivalent, "
                        "paddle's shape = [%s], but cinn's shape = [%s].",
                        paddle_name, paddle_tensor.dims(), cinn_dims));
145 146 147 148

  // TODO(CtfGo): check the underlying data type after CINN ready
}

149 150 151 152 153
void CinnLaunchContext::AssignExternalVariable(const std::string& paddle_name,
                                               LoDTensor* paddle_tensor) {
  PADDLE_ENFORCE_EQ(IsVariableUsed(paddle_name), true,
                    platform::errors::InvalidArgument(
                        "Paddle variable(%s) not used by cinn", paddle_name));
154

155 156 157 158
  const auto& cinn_name = paddle2cinn_varmap_.at(paddle_name);
  CheckTensorEquivalent(paddle_name, *paddle_tensor, GetCinnTensor(cinn_name));
  return SetArgument(cinn_name, paddle_tensor);
}
159

160 161 162 163 164 165 166
void CinnLaunchContext::AssignInternalVariable(const std::string& cinn_name,
                                               LoDTensor* paddle_tensor) {
  PADDLE_ENFORCE_GT(cinn_variable_names_.count(cinn_name), 0,
                    platform::errors::InvalidArgument(
                        "Variable(%s) not found in cinn socpe.", cinn_name));
  CheckTensorEquivalent(cinn_name, *paddle_tensor, GetCinnTensor(cinn_name));
  return SetArgument(cinn_name, paddle_tensor);
167 168
}

169 170
std::unique_ptr<cinn_buffer_t> CinnLaunchContext::ShareTensorWithCinnBuffer(
    LoDTensor* tensor) {
171 172 173 174 175 176 177 178 179 180 181 182 183
  // convert paddle dimensions array to cinn format
  std::vector<cinn_dimension_t> cinn_dims(tensor->dims().size());
  for (auto i = 0; i < tensor->dims().size(); ++i) {
    cinn_dims[i] = static_cast<cinn_dimension_t>(tensor->dims().at(i));
  }

  auto cinn_buffer = std::make_unique<cinn_buffer_t>();
  // assign size and memory
  cinn_buffer->resize(cinn_dims.data(), cinn_dims.size());
  cinn_buffer->memory = reinterpret_cast<uint8_t*>(tensor->data<float>());
  return cinn_buffer;
}

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
void CinnLaunchContext::SetArgument(const std::string& cinn_name,
                                    LoDTensor* paddle_tensor) {
  auto buffer = ShareTensorWithCinnBuffer(paddle_tensor);
  name2argument_.emplace(cinn_name, buffer.get());
  hold_buffers_.emplace_back(std::move(buffer));
  VLOG(4) << "SetArgument-" << name2argument_.size() << ": "
          << "name(" << cinn_name << "), "
          << "type(" << framework::DataTypeToString(paddle_tensor->type())
          << "), dims(" << paddle_tensor->dims() << ").";
}

const std::map<std::string, cinn_pod_value_t>&
CinnLaunchContext::FinalizeArguments() const {
  // Check all execution parameters are assigned valued.
  std::for_each(cinn_variable_names_.begin(), cinn_variable_names_.end(),
                [this](const auto& var_name) {
                  PADDLE_ENFORCE_GT(name2argument_.count(var_name), 0,
                                    platform::errors::InvalidArgument(
                                        "Variable(%s) is missed for launching "
                                        "compiled program execution",
                                        var_name));
205
                });
206
  return name2argument_;
207 208 209 210
}

}  // namespace details

211 212 213 214 215 216
class CinnLaunchOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    OP_INOUT_CHECK(ctx->HasInputs(kX), "Input", kX, "CinnLaunchOp");
217
    OP_INOUT_CHECK(ctx->HasOutputs(kOutputs), "Output", kOutputs,
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
                   "CinnLaunchOp");
  }

 protected:
  /* [Why use single type kernel]:
   *
   * This op is similar to a control flow op, it doses not need
   * a op kernel, but in order to make it execute under dynamic
   * graph mode, implement it with op kernel.
   *
   * So whether the kernel data type is int, float or other type,
   * which has no effect on its execution logic, so directly
   * specified a data type here.
   *
   * Of course, the data type here is also not important.
   */

  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(framework::proto::VarType::FP32,
                                   ctx.GetPlace());
  }
};

class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput(kX,
             "(vector<LoDTensor>)"
             "which are the input of graph inside the CinnLaunchOp.")
        .AsDuplicable();
    AddOutput(kOutputs,
              "(vector<LoDTensor>)"
              "which are the output of graph inside the CinnLaunchOp.")
        .AsDuplicable();
    AddAttr<std::string>(
        kCompilationKey,
        "(string)"
        "a hash key used to get the graph object or its computation result.");
    AddComment(R"DOC(
CinnLaunch Operator.

This operator is used to launch CINN(https://github.com/PaddlePaddle/CINN/blob/develop/README.md)
to compile a graph and execute the compiled object.

Both input and output of this operator are a set of variables
which are input and output of the graph respectively that will be
compiled and executed in this operator.
In addition, there is an attribute named 'compilation_key' should be
set necessarily to get corresponding ir::Graph object of the graph
or its computation result.

270
It accomplishes the computation of graph following several steps:
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
  1. Fetch ir::Graph object from CinnCompiler using kCompilationKey
  2. Compile the graph to a compiled object, and insert it to the
     global cache so that we can directly query it from this cache next time
     when shape of input variables are not changed at all.
  3. Create and instantiate all variables used to execute compiled runtime program
     if necessary according to the info(type,shape) included in the return scope.
  4. Pack each tensor buffer of all above variables as execution arguments.
  5. Launch execution of the runtime program with above arguments, then
     the result would be output by writing value on underlying buffer address.

)DOC");
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(
    cinn_launch, ops::CinnLaunchOp, ops::CinnLaunchOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
/* see [Why use single type kernel] */
REGISTER_OP_CPU_KERNEL(
    cinn_launch,
    ops::CinnLaunchOpKernel<paddle::platform::CPUDeviceContext, float>);