transform.h 5.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yu Yang 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Y
Yu Yang 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
Y
Yu Yang 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yu Yang 已提交
14 15 16

#pragma once

17 18 19
#include <algorithm>
#include <type_traits>

Y
Yi Wang 已提交
20 21 22 23
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/place.h"
Y
Yu Yang 已提交
24

25
#if defined(__NVCC__) || defined(__HIPCC__)
Y
Yu Yang 已提交
26
#include <thrust/execution_policy.h>
Y
Yu Yang 已提交
27
#include <thrust/transform.h>
28
#include "paddle/fluid/platform/details/cuda_transform_iterator_cast.h"
Y
Yu Yang 已提交
29 30 31 32
#endif

namespace paddle {
namespace platform {
33

34 35 36 37 38 39 40 41 42 43 44 45 46
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
//       different types, because the InputIter points op's inputs and
//       OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
//       OutputIter to be OutputType*, because we might use a iterator
//       class, paddle::fluid::operators::RowwiseTRansformIterator.
Q
QI JUN 已提交
47
template <typename DeviceContext>
48
struct Transform {
49
  // The unary version.
50 51 52 53
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
  void operator()(const DeviceContext& context, InputIter first, InputIter last,
                  OutputIter result, UnaryOperation op);

54
  // The binary version.
55 56 57 58 59 60 61 62
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
  void operator()(const DeviceContext& context, InputIter1 first1,
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op);
};

template <>
Q
QI JUN 已提交
63
struct Transform<platform::CPUDeviceContext> {
64
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
65 66
  void operator()(const platform::CPUDeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
Y
Yu Yang 已提交
67 68 69
    std::transform(first, last, result, op);
  }

70 71
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
72
  void operator()(const platform::CPUDeviceContext& context, InputIter1 first1,
73 74
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
Y
Yu Yang 已提交
75
    std::transform(first1, last1, first2, result, op);
76 77 78
  }
};

79
#if defined(__NVCC__) || defined(__HIPCC__)
80
template <>
Q
QI JUN 已提交
81
struct Transform<platform::CUDADeviceContext> {
82
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
83 84
  void operator()(const platform::CUDADeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
85
    auto place = context.GetPlace();
G
GaoWei8 已提交
86 87 88
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
89 90 91 92 93 94
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
#else
Q
QI JUN 已提交
95
    thrust::transform(thrust::cuda::par.on(context.stream()),
96 97 98
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
99
#endif
100 101 102 103
  }

  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
104
  void operator()(const platform::CUDADeviceContext& context, InputIter1 first1,
105 106 107
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
    auto place = context.GetPlace();
G
GaoWei8 已提交
108 109 110
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
111 112 113 114 115 116 117
#ifdef __HIPCC__
    thrust::transform(thrust::hip::par.on(context.stream()),
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
#else
Q
QI JUN 已提交
118
    thrust::transform(thrust::cuda::par.on(context.stream()),
119 120 121 122
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
123
#endif
Y
Yu Yang 已提交
124 125
  }
};
126
#endif
Y
Yu Yang 已提交
127 128 129

}  // namespace platform
}  // namespace paddle