broadcast.cu 3.7 KB
Newer Older
1
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14 15
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/eigen/eigen_function.h"
16

17
namespace phi {
18
namespace funcs {
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33

template <typename T, int Rank>
struct EigenBroadcast<Eigen::GpuDevice, T, Rank> {
  using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
  using InType = Eigen::TensorMap<
      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
  using InType32BitIndex =
      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
                       Eigen::Aligned>;
  using OutType = Eigen::TensorMap<
      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
  using OutType32BitIndex =
      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
                       Eigen::Aligned>;

34 35 36
  static void Eval(const Eigen::GpuDevice& dev,
                   OutType out,
                   InType in,
37 38 39 40
                   const Array& bcast) {
    out.device(dev) = in.broadcast(bcast);
  }

41 42 43 44
  static void Eval(const Eigen::GpuDevice& dev,
                   OutType32BitIndex out,
                   InType32BitIndex in,
                   const Array& bcast) {
45 46 47 48 49 50 51 52 53 54 55 56
    out.device(dev) = in.broadcast(bcast);
  }
};

template <typename T, int Rank>
struct EigenBroadcastGrad<Eigen::GpuDevice, T, Rank> {
  using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
  using Array2 = Eigen::DSizes<Eigen::DenseIndex, Rank * 2>;
  using InType = Eigen::TensorMap<
      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
  using OutType =
      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
57 58 59 60 61
  static void Eval(const Eigen::GpuDevice& dev,
                   OutType out,
                   InType in,
                   const Array& reduce_dims,
                   const Array2& reshape_dims) {
62 63 64 65 66 67 68 69 70 71 72 73 74
    out.device(dev) =
        in.reshape(reshape_dims).sum(reduce_dims).reshape(out.dimensions());
  }
};

#define INSTANTIATION(FUNCTOR, T)                  \
  template struct FUNCTOR<Eigen::GpuDevice, T, 1>; \
  template struct FUNCTOR<Eigen::GpuDevice, T, 2>; \
  template struct FUNCTOR<Eigen::GpuDevice, T, 3>; \
  template struct FUNCTOR<Eigen::GpuDevice, T, 4>; \
  template struct FUNCTOR<Eigen::GpuDevice, T, 5>; \
  template struct FUNCTOR<Eigen::GpuDevice, T, 6>
INSTANTIATION(EigenBroadcast, bool);
75
INSTANTIATION(EigenBroadcast, dtype::float16);
76 77 78 79 80 81
INSTANTIATION(EigenBroadcast, float);
INSTANTIATION(EigenBroadcast, double);
INSTANTIATION(EigenBroadcast, int);
INSTANTIATION(EigenBroadcast, int64_t);
INSTANTIATION(EigenBroadcastGrad, bool);
INSTANTIATION(EigenBroadcastGrad, float);
82
INSTANTIATION(EigenBroadcastGrad, dtype::float16);
83 84 85 86
INSTANTIATION(EigenBroadcastGrad, double);
INSTANTIATION(EigenBroadcastGrad, int);
INSTANTIATION(EigenBroadcastGrad, int64_t);
template struct EigenBroadcastGrad<Eigen::GpuDevice, float, 0>;
87
template struct EigenBroadcastGrad<Eigen::GpuDevice, dtype::float16, 0>;
88 89 90 91 92
template struct EigenBroadcastGrad<Eigen::GpuDevice, double, 0>;
template struct EigenBroadcastGrad<Eigen::GpuDevice, int, 0>;
template struct EigenBroadcastGrad<Eigen::GpuDevice, int64_t, 0>;
#undef INSTANTIATION

93
}  // namespace funcs
94
}  // namespace phi