// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/fluid/platform/fast_divmod.h" #endif namespace phi { namespace funcs { template HOSTDEVICE inline T CubicConvolution1(T x, T A) { return ((A + static_cast(2)) * x - (A + static_cast(3))) * x * x + static_cast(1); } template HOSTDEVICE inline T CubicConvolution2(T x, T A) { return ((A * x - static_cast(5) * A) * x + static_cast(8) * A) * x - static_cast(4) * A; } template HOSTDEVICE inline void get_cubic_upsample_coefficients(T coeffs[4], T t) { T A = static_cast(-0.75); T x1 = t; coeffs[0] = CubicConvolution2(x1 + static_cast(1.0), A); coeffs[1] = CubicConvolution1(x1, A); // opposite coefficients T x2 = static_cast(1.0) - t; coeffs[2] = CubicConvolution1(x2, A); coeffs[3] = CubicConvolution2(x2 + static_cast(1.0), A); } inline void ExtractNCDWH(const DDim& dims, const DataLayout& data_layout, int* N, int* C, int* D, int* H, int* W) { *N = dims[0]; if (dims.size() == 3) { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[2]; *D = 1; *H = 1; *W = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; } else if (dims.size() == 4) { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[3]; *D = 1; *H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; *W = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; } else { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[4]; *D = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; *H = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; *W = data_layout == DataLayout::kNCHW ? dims[4] : dims[3]; } } inline std::vector get_new_shape( const std::vector& list_new_shape_tensor) { // get tensor from std::vector vec_new_shape; for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; PADDLE_ENFORCE_EQ( tensor->dims(), phi::make_ddim({1}), errors::InvalidArgument("The shape of dimension tensor should be [1]," "but received d%.", tensor->dims())); #ifdef PADDLE_WITH_XPU if (tensor->place().GetType() == phi::AllocationType::XPU) { DenseTensor temp; paddle::framework::TensorCopySync(*tensor, phi::CPUPlace(), &temp); vec_new_shape.push_back(static_cast(*temp.data())); continue; } #endif if (paddle::platform::is_gpu_place(tensor->place())) { DenseTensor temp; paddle::framework::TensorCopySync( *tensor, paddle::platform::CPUPlace(), &temp); vec_new_shape.push_back(static_cast(*temp.data())); } else { vec_new_shape.push_back(static_cast(*tensor->data())); } } return vec_new_shape; } template inline std::vector get_new_data_from_tensor( const DenseTensor* new_data_tensor) { std::vector vec_new_data; auto* new_data = new_data_tensor->data(); DenseTensor cpu_starts_tensor; if (paddle::platform::is_gpu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #ifdef PADDLE_WITH_ASCEND_CL if (paddle::platform::is_npu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif #ifdef PADDLE_WITH_XPU if (paddle::platform::is_xpu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( *new_data_tensor, paddle::platform::CPUPlace(), &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif vec_new_data = std::vector(new_data, new_data + new_data_tensor->numel()); return vec_new_data; } #if defined(__NVCC__) || defined(__HIPCC__) using paddle::platform::FastDivMod; struct FastDivModForInterpolate { public: FastDivMod channels_div; FastDivMod output_w_div; FastDivMod output_wc_div; explicit HOSTDEVICE FastDivModForInterpolate(const int channels, const int output_w, const int outout_wc) : channels_div(FastDivMod(channels)), output_w_div(FastDivMod(output_w)), output_wc_div(FastDivMod(outout_wc)) {} }; #endif } // namespace funcs } // namespace phi