api_custom_impl.cc 8.9 KB
Newer Older
1
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include "paddle/phi/api/lib/api_custom_impl.h"
16

17
#include "paddle/phi/api/lib/api_gen_utils.h"
18 19 20
#include "paddle/phi/api/lib/data_transform.h"
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/api/lib/utils/storage.h"
21
#include "paddle/phi/core/compat/convert_utils.h"
22 23
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/meta_tensor.h"
24
#include "paddle/phi/infermeta/backward.h"
25 26 27
#include "paddle/phi/infermeta/binary.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/infermeta/nullary.h"
28
#include "paddle/phi/infermeta/unary.h"
29

30
#include "glog/logging.h"
31

32 33 34
namespace paddle {
namespace experimental {

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
// TODO(chenweihang):  the original sum grad op can support higher-level
// differentiation,
// but if we use this impl, it will not support. We need to be able to reuse
// the autograd API here, which is not yet implemented
// TODO(chenweihang): we should support call generated api in custom api impl
std::vector<Tensor> add_n_grad_impl(const std::vector<Tensor>& x,
                                    const Tensor& out_grad) {
  auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
  auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();

  Backend kernel_backend = kernel_key.backend();
  DataLayout kernel_layout = kernel_key.layout();
  DataType kernel_data_type = kernel_key.dtype();

  auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
      "scale", {kernel_backend, kernel_layout, kernel_data_type});
  VLOG(6) << "add_n_grad API kernel key: [" << kernel_backend << ", "
          << kernel_layout << ", " << kernel_data_type << "]";
  VLOG(6) << "add_n_grad API kernel: " << kernel;

  auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);

  auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {});

  size_t out_number = x.size();
  std::vector<Tensor> x_grad;
  auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad);

  using kernel_signature = void (*)(const platform::DeviceContext&,
                                    const phi::DenseTensor&,
                                    const phi::Scalar&,
                                    float,
                                    bool,
                                    phi::DenseTensor*);
  auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();

  for (auto* dense_x_grad_t : dense_x_grad) {
    phi::MetaTensor meta_out(dense_x_grad_t);
    phi::UnchangedInferMeta(MakeMetaTensor(*dense_out_grad), &meta_out);
    (*kernel_fn)(
        *dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t);
  }

  return x_grad;
}

81
Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) {
82
  auto kernel_key_set = ParseKernelKeyByInputArgs(x);
83 84
  kernel_key_set.backend_set =
      kernel_key_set.backend_set | BackendSet(phi::TransToPhiBackend(place));
85
  auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
86
  auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
87 88
      "copy", kernel_key);

89 90
  VLOG(6) << "copy API kernel key: " << kernel_key;
  VLOG(6) << "copy API kernel: " << kernel;
91 92 93

  auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());

94
  auto dense_x = TensorToDenseTensor(x);
95 96

  Tensor out;
97 98 99 100 101 102 103 104 105
  auto kernel_out = SetKernelOutput(kernel_key.backend(), &out);
  phi::MetaTensor meta_out(kernel_out);
  phi::UnchangedInferMeta(*dense_x, &meta_out);

  using kernel_signature = void (*)(const platform::DeviceContext&,
                                    const phi::DenseTensor&,
                                    phi::Place,
                                    bool,
                                    phi::DenseTensor*);
106

107
  auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
108
  (*kernel_fn)(*dev_ctx, *dense_x, place, blocking, kernel_out);
109 110 111 112

  return out;
}

113
std::vector<Tensor> split_impl(const Tensor& x,
114
                               const IntArray& num_or_sections,
115 116
                               const Scalar& axis) {
  auto kernel_key_set = ParseKernelKeyByInputArgs(x);
117
  auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
118 119 120 121

  Backend kernel_backend = kernel_key.backend();
  DataLayout kernel_layout = kernel_key.layout();
  DataType kernel_data_type = kernel_key.dtype();
C
chentianyu03 已提交
122

123
  auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
C
chentianyu03 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
      "split", {kernel_backend, kernel_layout, kernel_data_type});
  VLOG(6) << "split API kernel key: [" << kernel_backend << ", "
          << kernel_layout << ", " << kernel_data_type << "]";
  VLOG(6) << "split API kernel: " << kernel;

  auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);

  auto dense_x = PrepareData(x, kernel.InputAt(0), {});

  // Calculate the number of out tensors
  size_t out_number;
  if (num_or_sections.GetData().size() == 1) {
    out_number = num_or_sections.GetData()[0];
  } else {
    out_number = num_or_sections.GetData().size();
  }

  std::vector<Tensor> out;
  auto dense_outs = SetKernelOutput(out_number, kernel_backend, &out);
143
  std::vector<phi::MetaTensor> meta_outs;
144 145 146
  meta_outs.reserve(out_number);
  std::vector<phi::MetaTensor*> meta_out_ptrs;
  meta_out_ptrs.reserve(out_number);
C
chentianyu03 已提交
147 148
  for (size_t i = 0; i < out_number; ++i) {
    meta_outs.push_back(dense_outs[i]);
149
    meta_out_ptrs.push_back(&meta_outs.back());
C
chentianyu03 已提交
150 151
  }

152
  phi::SplitInferMeta(
153
      MakeMetaTensor(*dense_x), num_or_sections, axis, meta_out_ptrs);
C
chentianyu03 已提交
154 155

  using kernel_signature = void (*)(const platform::DeviceContext&,
156
                                    const phi::DenseTensor&,
157
                                    const phi::IntArray&,
158 159
                                    const phi::Scalar&,
                                    std::vector<phi::DenseTensor*>&);
C
chentianyu03 已提交
160 161 162
  auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
  (*kernel_fn)(*dev_ctx,
               *dense_x,
163
               phi::IntArray(num_or_sections),
164
               phi::Scalar(axis),
C
chentianyu03 已提交
165 166 167 168
               dense_outs);

  return out;
}
169

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
std::vector<Tensor> concat_grad_impl(const std::vector<Tensor>& x,
                                     const Tensor& out_grad,
                                     const Scalar& axis) {
  auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
  auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();

  Backend kernel_backend = kernel_key.backend();
  DataLayout kernel_layout = kernel_key.layout();
  DataType kernel_data_type = kernel_key.dtype();

  auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
      "concat_grad", {kernel_backend, kernel_layout, kernel_data_type});
  VLOG(6) << "concat_grad API kernel key: [" << kernel_backend << ", "
          << kernel_layout << ", " << kernel_data_type << "]";
  VLOG(6) << "concat_grad API kernel: " << kernel;

  auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);

  // std::unique_ptr<std::vector<phi::DenseTensor>>
  auto dense_x = PrepareData(x, kernel.InputAt(0), {});
  auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(1), {});

  // Calculate the number of out tensors
  size_t out_number = x.size();
  std::vector<Tensor> x_grad;
  auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad);

  std::vector<phi::MetaTensor> meta_x;
  meta_x.reserve(x.size());
  std::vector<phi::MetaTensor*> meta_x_ptrs;
  meta_x_ptrs.reserve(x.size());
  for (const auto& t : *dense_x) {
    meta_x.push_back(t);
    meta_x_ptrs.push_back(&meta_x.back());
  }

  std::vector<phi::MetaTensor> meta_x_grad;
  meta_x_grad.reserve(x.size());
  std::vector<phi::MetaTensor*> meta_x_grad_ptrs;
  meta_x_grad_ptrs.reserve(x.size());
  for (size_t i = 0; i < out_number; ++i) {
    meta_x_grad.push_back(*dense_x_grad[i]);
    meta_x_grad_ptrs.push_back(&meta_x_grad.back());
  }

  phi::UnchangedMultiInferMeta(meta_x_ptrs, meta_x_grad_ptrs);

  std::vector<const phi::DenseTensor*> dense_x_ptr;
  dense_x_ptr.reserve(x.size());
  for (const auto& t : *dense_x) {
    dense_x_ptr.push_back(&t);
  }

  using kernel_signature = void (*)(const platform::DeviceContext&,
                                    const std::vector<const phi::DenseTensor*>&,
                                    const phi::DenseTensor&,
                                    const phi::Scalar&,
                                    std::vector<phi::DenseTensor*>);
  auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
  (*kernel_fn)(
      *dev_ctx, dense_x_ptr, *dense_out_grad, phi::Scalar(axis), dense_x_grad);

  return x_grad;
}

235 236
}  // namespace experimental
}  // namespace paddle