concat_kernel.cc 4.0 KB
Newer Older
X
xiongkun 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/concat_kernel.h"

#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/lod_utils.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
#include "paddle/phi/kernels/funcs/concat_funcs.h"

namespace phi {

template <typename T, typename Context>
void ConcatKernel(const Context& dev_ctx,
                  const std::vector<const DenseTensor*>& x,
                  const Scalar& axis_scalar,
                  DenseTensor* out) {
  using XPUType = typename XPUTypeTrait<T>::Type;
  int64_t axis = axis_scalar.to<int64_t>();
  PADDLE_ENFORCE_NE(
      x[0],
      nullptr,
      phi::errors::InvalidArgument("The input should not be null."));
  axis = phi::funcs::ComputeAxis(axis, x[0]->dims().size());
  PADDLE_ENFORCE_GE(
      axis,
      0,
      phi::errors::InvalidArgument("concat: axis should be larger than or "
                                   "equal to 0, but received axis is %d.",
                                   axis));
  PADDLE_ENFORCE_LT(axis,
                    x[0]->dims().size(),
                    phi::errors::InvalidArgument(
                        "concat: axis should be less than x[0]->dims()!"
                        "But received axis is %d, while x[0]->dims()"
                        "size is %d.",
                        axis,
                        x[0]->dims().size()));

  // If axis is 0, the lod of the output is not the same as inputs.
  if (axis == 0 && x[0]->lod().size() > 0) {
    size_t lod_size_0 = x[0]->lod().size();
    size_t lod_size = lod_size_0;
    for (size_t i = 1; i < x.size(); ++i) {
      if (x[i]->lod().size() > 0) {
        PADDLE_ENFORCE_EQ(
            x[i]->lod().size(),
            lod_size_0,
            phi::errors::Unimplemented(
                "The lod level of all input LoDTensors should be same. "
                "Maybe different lod level of input LoDTensors can concat,"
                "it is not supported currently. The lod level of %dth input "
                "is %d and first input is %d.",
                i,
                x[i]->lod().size(),
                lod_size_0));
      } else {
        lod_size = 0;
        break;
      }
    }
    if (lod_size) {
      auto* out_lod = out->mutable_lod();
      for (size_t i = 1; i < x.size(); ++i) {
        auto in_lod = phi::ConvertToLengthBasedLoD(x[i]->lod());
        phi::AppendLoD(out_lod, in_lod);
      }
    }
  }
  dev_ctx.template Alloc<T>(out);
  std::vector<std::vector<int>> xdims_list;
  std::vector<const XPUType*> ptrs;
  for (unsigned int i = 0; i < x.size(); ++i) {
    if (x[i] && x[i]->numel() > 0) {
      ptrs.push_back(reinterpret_cast<const XPUType*>(x[i]->data<T>()));
      int size = x[i]->dims().size();
      std::vector<int> tmp_dims(size);
      for (int j = 0; j < size; ++j) {
        tmp_dims[j] = x[i]->dims()[j];
      }
      xdims_list.push_back(tmp_dims);
    }
  }

  PADDLE_ENFORCE_GT(xdims_list.size(),
                    0,
                    phi::errors::InvalidArgument("No tensor need concat"));
  int r = xpu::concat<XPUType>(dev_ctx.x_context(),
                               ptrs,
                               reinterpret_cast<XPUType*>(out->data<T>()),
                               xdims_list,
                               axis);
  PADDLE_ENFORCE_XDNN_SUCCESS(r, "concat");
}

}  // namespace phi

PD_REGISTER_KERNEL(
    concat, XPU, ALL_LAYOUT, phi::ConcatKernel, float, phi::dtype::float16) {}