// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include "paddle/phi/common/layout.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #if defined(__NVCC__) || defined(__HIPCC__) #include "paddle/phi/kernels/primitive/datamover_primitives.h" #endif namespace phi { namespace funcs { template HOSTDEVICE inline T CubicConvolution1(T x, T A) { return ((A + static_cast(2)) * x - (A + static_cast(3))) * x * x + static_cast(1); } template HOSTDEVICE inline T CubicConvolution2(T x, T A) { return ((A * x - static_cast(5) * A) * x + static_cast(8) * A) * x - static_cast(4) * A; } template HOSTDEVICE inline void get_cubic_upsample_coefficients(T coeffs[4], T t) { T A = static_cast(-0.75); T x1 = t; coeffs[0] = CubicConvolution2(x1 + static_cast(1.0), A); coeffs[1] = CubicConvolution1(x1, A); // opposite coefficients T x2 = static_cast(1.0) - t; coeffs[2] = CubicConvolution1(x2, A); coeffs[3] = CubicConvolution2(x2 + static_cast(1.0), A); } inline void ExtractNCDWH(const DDim& dims, const DataLayout& data_layout, int* N, int* C, int* D, int* H, int* W) { *N = dims[0]; if (dims.size() == 3) { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[2]; *D = 1; *H = 1; *W = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; } else if (dims.size() == 4) { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[3]; *D = 1; *H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; *W = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; } else { *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[4]; *D = data_layout == DataLayout::kNCHW ? dims[2] : dims[1]; *H = data_layout == DataLayout::kNCHW ? dims[3] : dims[2]; *W = data_layout == DataLayout::kNCHW ? dims[4] : dims[3]; } } inline std::vector get_new_shape( const std::vector& list_new_shape_tensor) { // get tensor from std::vector vec_new_shape; auto& pool = phi::DeviceContextPool::Instance(); for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; phi::DeviceContext* dev_ctx = pool.Get(tensor->place()); PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) || tensor->dims() == phi::make_ddim({}), true, errors::InvalidArgument( "The shape of dimension tensor should be [1] or []," "but received d%.", tensor->dims())); #ifdef PADDLE_WITH_XPU if (tensor->place().GetType() == phi::AllocationType::XPU) { DenseTensor temp; phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp); vec_new_shape.push_back(static_cast(*temp.data())); continue; } #endif if (tensor->place().GetType() == phi::AllocationType::GPU) { DenseTensor temp; phi::Copy(*dev_ctx, *tensor, phi::CPUPlace(), true, &temp); vec_new_shape.push_back(static_cast(*temp.data())); } else { vec_new_shape.push_back(static_cast(*tensor->data())); } } return vec_new_shape; } template inline std::vector get_new_data_from_tensor( const DenseTensor* new_data_tensor) { std::vector vec_new_data; auto* new_data = new_data_tensor->data(); DenseTensor cpu_starts_tensor; auto& pool = phi::DeviceContextPool::Instance(); phi::DeviceContext* dev_ctx = pool.Get(new_data_tensor->place()); if (paddle::platform::is_gpu_place(new_data_tensor->place())) { phi::Copy( *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #ifdef PADDLE_WITH_ASCEND_CL if (paddle::platform::is_npu_place(new_data_tensor->place())) { phi::Copy( *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif #ifdef PADDLE_WITH_XPU if (paddle::platform::is_xpu_place(new_data_tensor->place())) { phi::Copy( *dev_ctx, *new_data_tensor, phi::CPUPlace(), true, &cpu_starts_tensor); new_data = cpu_starts_tensor.data(); } #endif vec_new_data = std::vector(new_data, new_data + new_data_tensor->numel()); return vec_new_data; } #if defined(__NVCC__) || defined(__HIPCC__) using phi::kps::details::FastDivMod; struct FastDivModForInterpolate { public: FastDivMod channels_div; FastDivMod output_w_div; FastDivMod output_wc_div; explicit HOSTDEVICE FastDivModForInterpolate(const int channels, const int output_w, const int outout_wc) : channels_div(FastDivMod(channels)), output_w_div(FastDivMod(output_w)), output_wc_div(FastDivMod(outout_wc)) {} }; #endif } // namespace funcs } // namespace phi