transform.h 4.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Y
Yu Yang 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Y
Yu Yang 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
Y
Yu Yang 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yu Yang 已提交
14 15 16

#pragma once

17 18 19
#include <algorithm>
#include <type_traits>

Y
Yi Wang 已提交
20 21 22 23
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/place.h"
Y
Yu Yang 已提交
24 25

#ifdef __NVCC__
Y
Yu Yang 已提交
26
#include <thrust/execution_policy.h>
Y
Yu Yang 已提交
27
#include <thrust/transform.h>
28
#include "paddle/fluid/platform/details/cuda_transform_iterator_cast.h"
Y
Yu Yang 已提交
29 30 31 32
#endif

namespace paddle {
namespace platform {
33

34 35 36 37 38 39 40 41 42 43 44 45 46
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
//       different types, because the InputIter points op's inputs and
//       OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
//       OutputIter to be OutputType*, because we might use a iterator
//       class, paddle::fluid::operators::RowwiseTRansformIterator.
Q
QI JUN 已提交
47
template <typename DeviceContext>
48
struct Transform {
49
  // The unary version.
50 51 52 53
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
  void operator()(const DeviceContext& context, InputIter first, InputIter last,
                  OutputIter result, UnaryOperation op);

54
  // The binary version.
55 56 57 58 59 60 61 62
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
  void operator()(const DeviceContext& context, InputIter1 first1,
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op);
};

template <>
Q
QI JUN 已提交
63
struct Transform<platform::CPUDeviceContext> {
64
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
65 66
  void operator()(const platform::CPUDeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
Y
Yu Yang 已提交
67 68 69
    std::transform(first, last, result, op);
  }

70 71
  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
72
  void operator()(const platform::CPUDeviceContext& context, InputIter1 first1,
73 74
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
Y
Yu Yang 已提交
75
    std::transform(first1, last1, first2, result, op);
76 77 78
  }
};

Y
Yu Yang 已提交
79
#ifdef __NVCC__
80
template <>
Q
QI JUN 已提交
81
struct Transform<platform::CUDADeviceContext> {
82
  template <typename InputIter, typename OutputIter, typename UnaryOperation>
Q
QI JUN 已提交
83 84
  void operator()(const platform::CUDADeviceContext& context, InputIter first,
                  InputIter last, OutputIter result, UnaryOperation op) {
85
    auto place = context.GetPlace();
G
GaoWei8 已提交
86 87 88
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
Q
QI JUN 已提交
89
    thrust::transform(thrust::cuda::par.on(context.stream()),
90 91 92
                      details::CastToCUDATransformIterator(first),
                      details::CastToCUDATransformIterator(last),
                      details::CastToCUDATransformIterator(result), op);
93 94 95 96
  }

  template <typename InputIter1, typename InputIter2, typename OutputIter,
            typename BinaryOperation>
Q
QI JUN 已提交
97
  void operator()(const platform::CUDADeviceContext& context, InputIter1 first1,
98 99 100
                  InputIter1 last1, InputIter2 first2, OutputIter result,
                  BinaryOperation op) {
    auto place = context.GetPlace();
G
GaoWei8 已提交
101 102 103
    PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
                      platform::errors::PreconditionNotMet(
                          "The CUDA Transform must be used in GPU place."));
Q
QI JUN 已提交
104
    thrust::transform(thrust::cuda::par.on(context.stream()),
105 106 107 108
                      details::CastToCUDATransformIterator(first1),
                      details::CastToCUDATransformIterator(last1),
                      details::CastToCUDATransformIterator(first2),
                      details::CastToCUDATransformIterator(result), op);
Y
Yu Yang 已提交
109 110
  }
};
111
#endif
Y
Yu Yang 已提交
112 113 114

}  // namespace platform
}  // namespace paddle