transform.h 8.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yu Yang 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Y
Yu Yang 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
Y
Yu Yang 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yu Yang 已提交
14 15 16

#pragma once

17 18 19
#include <algorithm>
#include <type_traits>

Y
Yi Wang 已提交
20 21 22
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
23
#include "paddle/phi/core/hostdevice.h"
Y
Yu Yang 已提交
24

25
#if defined(__NVCC__) || defined(__HIPCC__)
Y
Yu Yang 已提交
26
#include <thrust/execution_policy.h>
Y
Yu Yang 已提交
27
#include <thrust/transform.h>
28

29
#include "paddle/fluid/platform/details/cuda_transform_iterator_cast.h"
Y
Yu Yang 已提交
30 31 32 33
#endif

namespace paddle {
namespace platform {
34

35 36 37 38 39 40 41 42 43 44 45 46 47
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
//       different types, because the InputIter points op's inputs and
//       OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
//       OutputIter to be OutputType*, because we might use a iterator
//       class, paddle::fluid::operators::RowwiseTRansformIterator.
Q
QI JUN 已提交
48
template <typename DeviceContext>
49
struct Transform {
50
  // The unary version.
51 52 53 54
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
  void operator()(const DeviceContext& context, InputIter first, InputIter last,
                  OutputIter result, UnaryOperation op);

55
  // The binary version.
56 57 58 59 60 61 62
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
  void operator()(const DeviceContext& context, InputIter1 first1,
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op);
};

63
// NOTE: After the phi kernel is migrated, it needs to be deleted.
64
template <>
Q
QI JUN 已提交
65
struct Transform<platform::CPUDeviceContext> {
66
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
67 68
  void operator()(const platform::CPUDeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
Y
Yu Yang 已提交
69 70 71
    std::transform(first, last, result, op);
  }

72 73
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
74
  void operator()(const platform::CPUDeviceContext& context, InputIter1 first1,
75 76
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
Y
Yu Yang 已提交
77
    std::transform(first1, last1, first2, result, op);
78 79 80
  }
};

W
Wilber 已提交
81
template <>
82
struct Transform<phi::CPUContext> {
W
Wilber 已提交
83
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
84
  void operator()(const phi::CPUContext& context, InputIter first,
W
Wilber 已提交
85 86 87 88 89 90
                  InputIter last, OutputIter result, UnaryOperation op) {
    std::transform(first, last, result, op);
  }

  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
91
  void operator()(const phi::CPUContext& context, InputIter1 first1,
W
Wilber 已提交
92 93 94 95 96 97
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
    std::transform(first1, last1, first2, result, op);
  }
};

98
#if defined(__NVCC__) || defined(__HIPCC__)
99
template <>
Q
QI JUN 已提交
100
struct Transform<platform::CUDADeviceContext> {
101
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
102 103
  void operator()(const platform::CUDADeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
104
    auto place = context.GetPlace();
G
GaoWei8 已提交
105 106 107
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
108 109 110 111 112 113
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
#else
Q
QI JUN 已提交
114
    thrust::transform(thrust::cuda::par.on(context.stream()),
115 116 117
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
118
#endif
119 120 121 122
  }

  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
123
  void operator()(const platform::CUDADeviceContext& context, InputIter1 first1,
124 125
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    auto place = context.GetPlace();
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
#else
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
#endif
  }
};

template <>
147
struct Transform<phi::GPUContext> {
148
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
149
  void operator()(const phi::GPUContext& context, InputIter first,
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
                  InputIter last, OutputIter result, UnaryOperation op) {
    auto place = context.GetPlace();
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
#else
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
#endif
  }

  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
170
  void operator()(const phi::GPUContext& context, InputIter1 first1,
171 172
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
173
    auto place = context.GetPlace();
G
GaoWei8 已提交
174 175 176
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
177 178 179 180 181 182 183
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
#else
Q
QI JUN 已提交
184
    thrust::transform(thrust::cuda::par.on(context.stream()),
185 186 187 188
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
189
#endif
Y
Yu Yang 已提交
190 191
  }
};
192
#endif
Y
Yu Yang 已提交
193 194 195

}  // namespace platform
}  // namespace paddle