transform.h 9.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yu Yang 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Y
Yu Yang 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
Y
Yu Yang 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yu Yang 已提交
14 15 16

#pragma once

17 18 19
#include <algorithm>
#include <type_traits>

Y
Yi Wang 已提交
20 21 22
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
23
#include "paddle/phi/core/hostdevice.h"
Y
Yu Yang 已提交
24

25
#if defined(__NVCC__) || defined(__HIPCC__)
Y
Yu Yang 已提交
26
#include <thrust/execution_policy.h>
Y
Yu Yang 已提交
27
#include <thrust/transform.h>
28

29
#include "paddle/fluid/platform/details/cuda_transform_iterator_cast.h"
Y
Yu Yang 已提交
30 31 32 33
#endif

namespace paddle {
namespace platform {
34

35 36 37 38 39 40 41 42 43 44 45 46 47
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
//       different types, because the InputIter points op's inputs and
//       OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
//       OutputIter to be OutputType*, because we might use a iterator
//       class, paddle::fluid::operators::RowwiseTRansformIterator.
Q
QI JUN 已提交
48
template <typename DeviceContext>
49
struct Transform {
50
  // The unary version.
51
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
52 53 54 55 56
  void operator()(const DeviceContext& context,
                  InputIter first,
                  InputIter last,
                  OutputIter result,
                  UnaryOperation op);
57

58
  // The binary version.
59 60 61
  template <typename InputIter1,
            typename InputIter2,
            typename OutputIter,
62
            typename BinaryOperation>
63 64 65 66 67
  void operator()(const DeviceContext& context,
                  InputIter1 first1,
                  InputIter1 last1,
                  InputIter2 first2,
                  OutputIter result,
68 69 70
                  BinaryOperation op);
};

71
// NOTE: After the phi kernel is migrated, it needs to be deleted.
72
template <>
Q
QI JUN 已提交
73
struct Transform<platform::CPUDeviceContext> {
74
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
75 76 77 78 79
  void operator()(const platform::CPUDeviceContext& context,
                  InputIter first,
                  InputIter last,
                  OutputIter result,
                  UnaryOperation op) {
Y
Yu Yang 已提交
80 81 82
    std::transform(first, last, result, op);
  }

83 84 85
  template <typename InputIter1,
            typename InputIter2,
            typename OutputIter,
86
            typename BinaryOperation>
87 88 89 90 91
  void operator()(const platform::CPUDeviceContext& context,
                  InputIter1 first1,
                  InputIter1 last1,
                  InputIter2 first2,
                  OutputIter result,
92
                  BinaryOperation op) {
Y
Yu Yang 已提交
93
    std::transform(first1, last1, first2, result, op);
94 95 96
  }
};

W
Wilber 已提交
97
template <>
98
struct Transform<phi::CPUContext> {
W
Wilber 已提交
99
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
100 101 102 103 104
  void operator()(const phi::CPUContext& context,
                  InputIter first,
                  InputIter last,
                  OutputIter result,
                  UnaryOperation op) {
W
Wilber 已提交
105 106 107
    std::transform(first, last, result, op);
  }

108 109 110
  template <typename InputIter1,
            typename InputIter2,
            typename OutputIter,
W
Wilber 已提交
111
            typename BinaryOperation>
112 113 114 115 116
  void operator()(const phi::CPUContext& context,
                  InputIter1 first1,
                  InputIter1 last1,
                  InputIter2 first2,
                  OutputIter result,
W
Wilber 已提交
117 118 119 120 121
                  BinaryOperation op) {
    std::transform(first1, last1, first2, result, op);
  }
};

122
#if defined(__NVCC__) || defined(__HIPCC__)
123
template <>
Q
QI JUN 已提交
124
struct Transform<platform::CUDADeviceContext> {
125
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
126 127 128 129 130
  void operator()(const platform::CUDADeviceContext& context,
                  InputIter first,
                  InputIter last,
                  OutputIter result,
                  UnaryOperation op) {
131
    auto place = context.GetPlace();
132 133
    PADDLE_ENFORCE_EQ(is_gpu_place(place),
                      true,
G
GaoWei8 已提交
134 135
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
136 137 138 139
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
140 141
                      details::CastToCUDATransformIterator(result),
                      op);
142
#else
Q
QI JUN 已提交
143
    thrust::transform(thrust::cuda::par.on(context.stream()),
144 145
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
146 147
                      details::CastToCUDATransformIterator(result),
                      op);
148
#endif
149 150
  }

151 152 153
  template <typename InputIter1,
            typename InputIter2,
            typename OutputIter,
154
            typename BinaryOperation>
155 156 157 158 159
  void operator()(const platform::CUDADeviceContext& context,
                  InputIter1 first1,
                  InputIter1 last1,
                  InputIter2 first2,
                  OutputIter result,
160
                  BinaryOperation op) {
161
    auto place = context.GetPlace();
162 163
    PADDLE_ENFORCE_EQ(is_gpu_place(place),
                      true,
164 165 166 167 168 169 170
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
171 172
                      details::CastToCUDATransformIterator(result),
                      op);
173 174 175 176 177
#else
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
178 179
                      details::CastToCUDATransformIterator(result),
                      op);
180 181 182 183 184
#endif
  }
};

template <>
185
struct Transform<phi::GPUContext> {
186
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
187 188 189 190 191
  void operator()(const phi::GPUContext& context,
                  InputIter first,
                  InputIter last,
                  OutputIter result,
                  UnaryOperation op) {
192
    auto place = context.GetPlace();
193 194
    PADDLE_ENFORCE_EQ(is_gpu_place(place),
                      true,
195 196 197 198 199 200
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
201 202
                      details::CastToCUDATransformIterator(result),
                      op);
203 204 205 206
#else
    thrust::transform(thrust::cuda::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
207 208
                      details::CastToCUDATransformIterator(result),
                      op);
209 210 211
#endif
  }

212 213 214
  template <typename InputIter1,
            typename InputIter2,
            typename OutputIter,
215
            typename BinaryOperation>
216 217 218 219 220
  void operator()(const phi::GPUContext& context,
                  InputIter1 first1,
                  InputIter1 last1,
                  InputIter2 first2,
                  OutputIter result,
221
                  BinaryOperation op) {
222
    auto place = context.GetPlace();
223 224
    PADDLE_ENFORCE_EQ(is_gpu_place(place),
                      true,
G
GaoWei8 已提交
225 226
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
227 228 229 230 231
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
232 233
                      details::CastToCUDATransformIterator(result),
                      op);
234
#else
Q
QI JUN 已提交
235
    thrust::transform(thrust::cuda::par.on(context.stream()),
236 237 238
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
239 240
                      details::CastToCUDATransformIterator(result),
                      op);
241
#endif
Y
Yu Yang 已提交
242 243
  }
};
244
#endif
Y
Yu Yang 已提交
245 246 247

}  // namespace platform
}  // namespace paddle