// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" #ifdef PADDLE_WITH_CUDA #include #endif namespace paddle { namespace operators { template struct Random; template <> struct Random { using Engine = std::minstd_rand; template using UniformIntDist = std::uniform_int_distribution; }; #ifdef PADDLE_WITH_CUDA template <> struct Random { using Engine = thrust::minstd_rand; template using UniformIntDist = thrust::uniform_int_distribution; }; #endif template HOSTDEVICE inline void StridedMemcpy(const T* x, const size_t* x_dims, T* out, const size_t* out_dims, int i, int rank, size_t prod_x_remain, size_t prod_out_remain, const size_t* offsets) { size_t x_dim_i = x_dims[i]; size_t out_dim_i = out_dims[i]; size_t x_stride = prod_x_remain / x_dim_i; size_t out_stride = prod_out_remain / out_dim_i; size_t offset_i = offsets[i]; if (i == rank - 1) { PADDLE_ASSERT(x_stride == 1 && out_stride == 1); x += offset_i; for (size_t j = 0; j < out_dim_i; ++j) { *out++ = *x++; } } else { x += offset_i * x_stride; for (size_t j = 0; j < out_dim_i; ++j) { StridedMemcpy(x, x_dims, out, out_dims, i + 1, rank, x_stride, out_stride, offsets); x += x_stride; out += out_stride; } } } template struct RandomCropFunctor { const T* x_; T* out_; size_t x_dims_[9]; size_t out_dims_[9]; int num_batchsize_dims_; int rank_; int64_t seed_; size_t prod_batchsize_dims_; size_t prod_x_ins_dims_; size_t prod_out_ins_dims_; RandomCropFunctor(const T* x, T* out, const framework::DDim& x_dims, const framework::DDim& out_dims, int num_batchsize_dims, int64_t seed) : x_(x), out_(out), num_batchsize_dims_(num_batchsize_dims), rank_(x_dims.size()), seed_(seed) { PADDLE_ENFORCE_EQ(x_dims.size(), out_dims.size()); PADDLE_ENFORCE_GT(rank_, num_batchsize_dims_); prod_batchsize_dims_ = 1; prod_x_ins_dims_ = 1; prod_out_ins_dims_ = 1; for (size_t i = 0; i < static_cast(rank_); ++i) { size_t x_dim_i = x_dims[i]; size_t out_dim_i = out_dims[i]; x_dims_[i] = x_dim_i; out_dims_[i] = out_dim_i; if (i < static_cast(num_batchsize_dims_)) { PADDLE_ENFORCE_EQ(x_dim_i, out_dim_i); prod_batchsize_dims_ *= x_dim_i; } else { prod_x_ins_dims_ *= x_dim_i; prod_out_ins_dims_ *= out_dim_i; } } } HOSTDEVICE void operator()(size_t ins_idx) { typename Random::Engine engine(seed_); engine.discard(ins_idx * (rank_ - num_batchsize_dims_)); size_t offsets[9] = {}; for (int i = num_batchsize_dims_; i < rank_; ++i) { typename Random::template UniformIntDist dist( 0, x_dims_[i] - out_dims_[i]); offsets[i - num_batchsize_dims_] = dist(engine); } const T* x = x_ + ins_idx * prod_x_ins_dims_; T* out = out_ + ins_idx * prod_out_ins_dims_; StridedMemcpy(x, x_dims_ + num_batchsize_dims_, out, out_dims_ + num_batchsize_dims_, 0, rank_ - num_batchsize_dims_, prod_x_ins_dims_, prod_out_ins_dims_, offsets); } }; template class RandomCropKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { int64_t seed = 0; auto& seed_tensor = detail::Ref(ctx.Input("Seed")); if (seed_tensor.IsInitialized()) { if (platform::is_cpu_place(seed_tensor.place())) { seed = *seed_tensor.data(); } else { LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " "your program"; framework::LoDTensor cpu_seed; framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed); seed = *cpu_seed.data(); } } else { VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute " "'startup_seed' instead."; seed = ctx.Attr("startup_seed"); } auto shape = ctx.Attr>("shape"); auto& x = detail::Ref(ctx.Input("X")); auto& out = detail::Ref(ctx.Output("Out")); int num_batchsize_dims = x.dims().size() - shape.size(); RandomCropFunctor functor( x.data(), out.mutable_data(ctx.GetPlace()), x.dims(), out.dims(), num_batchsize_dims, seed); platform::ForRange for_range( ctx.template device_context(), functor.prod_batchsize_dims_); for_range(functor); Random::Engine engine(seed); engine.discard(functor.prod_batchsize_dims_ * (functor.rank_ - functor.num_batchsize_dims_)); *ctx.Output("SeedOut")->mutable_data( framework::make_ddim({1}), platform::CPUPlace()) = engine(); } }; // TODO(fengjiayi): Backward of random crop op } // namespace operators } // namespace paddle