diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index dfd93f49e73404427dbeaedd87c8c094068cb76f..e5a7f0d2bef5403e49330afd539ee0702cb528c5 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -216,18 +216,36 @@ copy(inference_lib_dist DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) +# TODO(chenweihang, before 11.27) Here, the header file of pten is copied to +# the experimental directory, the include path needs to be changed, so the +# header file path needs to be processed here +# copy api headers for custom op copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include/* - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) + SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/* + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/ext/) +copy(inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/* + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/include/) +copy(inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/all.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/) +copy(inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/* + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/) copy(inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/) copy(inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/common/) copy(inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) +# In order to be compatible with the original behavior, the header file name needs to be changed +copy(inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h) + # CAPI inference library for only inference set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING diff --git a/paddle/extension.h b/paddle/extension.h index 98d4bfd0326c5c524fcac9129f58d0ae99fc8afe..8394f87c31d1bdeaad75e94ad00e5f8c8d3433dd 100644 --- a/paddle/extension.h +++ b/paddle/extension.h @@ -15,4 +15,4 @@ limitations under the License. */ #pragma once // All paddle apis in C++ frontend -#include "paddle/extension/include/ext_all.h" +#include "paddle/pten/api/all.h" diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index dcff02a662e2734bc66d4cf219fce527fd0961aa..369aa64016951ac4429c963ff35e9403dfa1f3ce 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -1,3 +1,8 @@ +# Adapt to custom op mechanism: Include the header files related to the data type +# to avoid exposing the path of the underlying file, remove it after moving +# float16.h/complex.h/bfloat16.h into pten +include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) + add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(distributed) diff --git a/paddle/fluid/extension/include/ext_dtype.h b/paddle/fluid/extension/include/ext_dtype.h deleted file mode 100644 index a0816b65a3d15c9cf1384d1b6f18fa79f9199a83..0000000000000000000000000000000000000000 --- a/paddle/fluid/extension/include/ext_dtype.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#pragma once - -#include -#include - -#include "complex.h" // NOLINT -#include "ext_exception.h" // NOLINT -#include "float16.h" // NOLINT - -namespace paddle { - -using complex64 = paddle::platform::complex; -using complex128 = paddle::platform::complex; -using float16 = paddle::platform::float16; - -enum class DataType { - BOOL, - INT8, - UINT8, - INT16, - INT32, - INT64, - FLOAT16, - FLOAT32, - FLOAT64, - COMPLEX64, - COMPLEX128, - // TODO(JiabinYang) support more data types if needed. -}; - -inline std::string ToString(DataType dtype) { - switch (dtype) { - case DataType::BOOL: - return "bool"; - case DataType::INT8: - return "int8_t"; - case DataType::UINT8: - return "uint8_t"; - case DataType::INT16: - return "int16_t"; - case DataType::INT32: - return "int32_t"; - case DataType::INT64: - return "int64_t"; - case DataType::FLOAT16: - return "float16"; - case DataType::FLOAT32: - return "float"; - case DataType::FLOAT64: - return "double"; - case DataType::COMPLEX64: - return "complex64"; - case DataType::COMPLEX128: - return "complex128"; - default: - PD_THROW("Unsupported paddle enum data type."); - } -} - -#define PD_FOR_EACH_DATA_TYPE(_) \ - _(bool, DataType::BOOL) \ - _(int8_t, DataType::INT8) \ - _(uint8_t, DataType::UINT8) \ - _(int16_t, DataType::INT16) \ - _(int, DataType::INT32) \ - _(int64_t, DataType::INT64) \ - _(float16, DataType::FLOAT16) \ - _(float, DataType::FLOAT32) \ - _(double, DataType::FLOAT64) \ - _(complex64, DataType::COMPLEX64) \ - _(complex128, DataType::COMPLEX128) - -template -struct DataTypeToCPPType; - -#define PD_SPECIALIZE_DataTypeToCPPType(cpp_type, data_type) \ - template <> \ - struct DataTypeToCPPType { \ - using type = cpp_type; \ - }; - -PD_FOR_EACH_DATA_TYPE(PD_SPECIALIZE_DataTypeToCPPType) - -#undef PD_SPECIALIZE_DataTypeToCPPType - -} // namespace paddle diff --git a/paddle/fluid/extension/include/ext_tensor.h b/paddle/fluid/extension/include/ext_tensor.h deleted file mode 100644 index 970be905cc2566f7de3bac1c593e237cf6c7dac0..0000000000000000000000000000000000000000 --- a/paddle/fluid/extension/include/ext_tensor.h +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -#ifdef PADDLE_WITH_CUDA -#include -using gpuStream_t = cudaStream_t; -#endif - -#ifdef PADDLE_WITH_HIP -#include -using gpuStream_t = hipStream_t; -#endif - -#include "ext_dll_decl.h" // NOLINT -#include "ext_dtype.h" // NOLINT -#include "ext_place.h" // NOLINT - -namespace paddle { -namespace framework { -class CustomTensorUtils; -} // namespace framework - -class StreamWrapper { - public: - StreamWrapper() : stream_(nullptr), is_stream_set_(false) {} - void SetStream(void* stream) { - stream_ = stream; - is_stream_set_ = true; - } - - void* GetStream() const { return stream_; } - - bool IsStreamSet() const { return is_stream_set_; } - - private: - // cudaStream_t stream_; - void* stream_; - bool is_stream_set_; -}; - -class PD_DLL_DECL Tensor { - public: - /// \brief Construct a Tensor on target Place for CustomOp. - /// Generally it's only used for user to create Tensor. - explicit Tensor(const PlaceType& place); - /// \brief Construct a Tensor on target Place with shape for CustomOp. - /// Generally it's only used for user to create Tensor. - Tensor(const PlaceType& place, const std::vector& shape); - /// \brief Reset the shape of the tensor. - /// Generally it's only used for the input tensor. - /// Reshape must be called before calling - /// mutable_data() or copy_to(const PlaceType& place) - /// \param shape The shape to set. - void reshape(const std::vector& shape); - - /// \brief Get the memory pointer in CPU or GPU with - /// specific data type. - /// Please Reshape the tensor first before call this. - /// It's usually used to get input data pointer. - /// \param place The place of the tensor this will - /// override the original place of current tensor. - template - T* mutable_data(const PlaceType& place); - - /// \brief Get the memory pointer in CPU or GPU with - /// specific data type. Please Reshape the tensor - /// first before call this.It's usually used to get - /// input data pointer. - template - T* mutable_data(); - - /// \brief Get the memory pointer directly. - /// It's usually used to get the output data pointer. - /// \return The tensor data buffer pointer. - template - T* data() const; - - /// \brief Copy the host memory to tensor data. - /// It's usually used to set the input tensor data. - /// \param PlaceType of target place, of which - /// the tensor will copy to. - template - Tensor copy_to(const PlaceType& place) const; - - /// \brief Return a sub-tensor of the given tensor. - /// It is usually used to extract a sub-tensor (which supports - /// modifying the data of the original tensor) to perform further - /// operations. - /// \param begin_idx The index of the start row (inclusive) to slice. - /// The index number begins from 0. - /// \param end_idx The index of the end row (exclusive) to slice. - /// The index number begins from begin_idx + 1. - /// \return The sliced tensor. - Tensor slice(const int64_t begin_idx, const int64_t end_idx) const; - - /// \brief Return the shape of the Tensor. - std::vector shape() const; - - /// \brief Return the data type of the tensor. - /// It's usually used to get the output tensor data type. - /// \return The data type of the tensor. - DataType type() const; - - /// \brief Get the size of current tensor. - /// Use this method to get the size of tensor - /// \return int64_t. - int64_t size() const; - - /// \brief Get the place of current tensor. - /// Use this method to get the place of tensor - /// \return Place. - const PlaceType& place() const; - - /// \brief Cast datatype from one to another - Tensor cast(const DataType& target_type) const; - - /// \brief Check Tensor is initialized - bool is_initialized() const; - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - /// \bref Get current stream of Tensor - gpuStream_t stream() const; -#endif - - private: - friend class framework::CustomTensorUtils; - mutable std::shared_ptr tensor_; - mutable PlaceType place_; - StreamWrapper stream_; -}; - -} // namespace paddle diff --git a/paddle/fluid/extension/src/ext_tensor.cc b/paddle/fluid/extension/src/ext_tensor.cc deleted file mode 100644 index b5cd9e0b5c0e1590f6455c317f7056317f46fab9..0000000000000000000000000000000000000000 --- a/paddle/fluid/extension/src/ext_tensor.cc +++ /dev/null @@ -1,456 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/extension/include/ext_tensor.h" - -#include - -#include "paddle/fluid/framework/custom_tensor_utils.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/transform.h" - -namespace paddle { - -template -struct CastDataTypeFunctor { - HOSTDEVICE inline OutType operator()(InType in) const { - return static_cast(in); - } -}; - -template -struct CastDataType { - CastDataType(const framework::Tensor &in, framework::Tensor *out, - const platform::DeviceContext *ctx) - : in_(in), out_(out), ctx_(ctx) {} - const framework::Tensor in_; - framework::Tensor *out_; - const platform::DeviceContext *ctx_; - - template - void apply() { - auto *in_begin = in_.data(); - auto *in_end = in_begin + in_.numel(); - auto *out_begin = out_->mutable_data(in_.place()); - - if (platform::is_cpu_place(in_.place())) { - platform::Transform trans; - auto *context = static_cast(ctx_); - trans(*context, in_begin, in_end, out_begin, - CastDataTypeFunctor()); -#if defined(__NVCC__) || defined(__HIPCC__) - } else if (platform::is_gpu_place(in_.place())) { - platform::Transform trans; - auto *context = static_cast(ctx_); - trans(*context, in_begin, in_end, out_begin, - CastDataTypeFunctor()); - context->Wait(); -#endif - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Place type is not supported when casting data type.")); - } - } -}; - -template -void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, - int64_t ele_size) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - int device_num = paddle::platform::GetCurrentDeviceId(); - platform::CUDAPlace gpu_place(device_num); - auto *dev_ctx = - static_cast(pool.Get(gpu_place)); - if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kCPU)) { - memory::Copy(platform::CPUPlace(), static_cast(dst), gpu_place, src, - ele_size, dev_ctx->stream()); - } else if ((src_plc == PlaceType::kGPU) && (dst_plc == PlaceType::kGPU)) { - memory::Copy(gpu_place, static_cast(dst), gpu_place, src, ele_size, - dev_ctx->stream()); - } else if ((src_plc == PlaceType::kCPU) && (dst_plc == PlaceType::kGPU)) { - memory::Copy(gpu_place, static_cast(dst), platform::CPUPlace(), src, - ele_size, dev_ctx->stream()); - } else { - PADDLE_THROW(platform::errors::Unavailable( - "Only GPU related Copy can reach this func.")); - } -#ifdef PADDLE_WITH_HIP - hipStreamSynchronize(dev_ctx->stream()); -#else - cudaStreamSynchronize(dev_ctx->stream()); -#endif -#endif -} - -#define GET_CASTED_TENSOR \ - if (!tensor_) { \ - tensor_ = std::make_shared(); \ - } \ - auto *tensor = static_cast(tensor_.get()); - -#define GET_INNER_PLACE \ - platform::Place place; \ - switch (place_) { \ - case PlaceType::kCPU: \ - place = platform::CPUPlace(); \ - break; \ - case PlaceType::kGPU: \ - place = platform::CUDAPlace(); \ - break; \ - default: \ - PADDLE_THROW(platform::errors::Unavailable( \ - "Custom operator unsupported place id(%d)", \ - static_cast(place_))); \ - } - -void Tensor::reshape(const std::vector &shape) { - GET_CASTED_TENSOR - auto new_dim = framework::make_ddim(shape); - tensor->Resize(new_dim); -} - -Tensor::Tensor(const PlaceType &place) - : tensor_(std::make_shared()), - place_(place), - stream_(StreamWrapper()) {} - -Tensor::Tensor(const PlaceType &place, const std::vector &shape) - : tensor_(std::make_shared()), - place_(place), - stream_(StreamWrapper()) { - GET_CASTED_TENSOR - tensor->Resize(framework::make_ddim(shape)); -} - -template -T *Tensor::mutable_data(const PlaceType &place) { - place_ = place; - return mutable_data(); -} - -template -T *Tensor::mutable_data() { - GET_CASTED_TENSOR - PADDLE_ENFORCE_GT( - tensor->numel(), 0, - platform::errors::PreconditionNotMet( - "You should call Tensor::Reshape(const std::vector " - "&shape)" - "function before retrieving mutable_data from input tensor.")); - switch (static_cast(place_)) { - case static_cast(PlaceType::kCPU): { - return tensor->mutable_data(platform::CPUPlace()); - } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - case static_cast(PlaceType::kGPU): { - int device_num = platform::GetCurrentDeviceId(); - return tensor->mutable_data(platform::CUDAPlace(device_num)); - } -#endif - default: - PADDLE_THROW(platform::errors::Unavailable( - "Custom operator unsupported place id(%d)", - static_cast(place_))); - } -} - -template -T *Tensor::data() const { - GET_CASTED_TENSOR; - auto *res = tensor->data(); - return res; -} - -DataType Tensor::type() const { - GET_CASTED_TENSOR; - auto type = tensor->type(); - if (type == framework::proto::VarType::FP32) { - return DataType::FLOAT32; - } else if (type == framework::proto::VarType::INT64) { - return DataType::INT64; - } else if (type == framework::proto::VarType::INT32) { - return DataType::INT32; - } else if (type == framework::proto::VarType::INT16) { - return DataType::INT16; - } else if (type == framework::proto::VarType::INT8) { - return DataType::INT8; - } else if (type == framework::proto::VarType::UINT8) { - return DataType::UINT8; - } else if (type == framework::proto::VarType::FP64) { - return DataType::FLOAT64; - } else if (type == framework::proto::VarType::BOOL) { - return DataType::BOOL; - } else if (type == framework::proto::VarType::COMPLEX64) { - return DataType::COMPLEX64; - } else if (type == framework::proto::VarType::COMPLEX128) { - return DataType::COMPLEX128; - } else if (type == framework::proto::VarType::FP16) { - return DataType::FLOAT16; - } - // TODO(JiabinYang) Support more dtype here - return DataType::FLOAT32; -} - -template -Tensor Tensor::copy_to(const PlaceType &target_place) const { - GET_CASTED_TENSOR; - PADDLE_ENFORCE_GE(tensor->numel(), 0, - platform::errors::PreconditionNotMet( - "You should call Tensor::Reshape(const " - "std::vector &shape)" - "function before copying data from cpu.")); - size_t ele_size = tensor->numel() * sizeof(T); - auto *p_src_data = tensor->data(); - auto src_place = place(); - Tensor target = Tensor(target_place); - target.reshape(shape()); - auto *p_target_data = target.template mutable_data(); - - if ((src_place == PlaceType::kCPU) && (target_place == PlaceType::kCPU)) { - std::memcpy(static_cast(p_target_data), p_src_data, ele_size); - } else if ((src_place == PlaceType::kGPU) && - (target_place == PlaceType::kCPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); - } else if ((src_place == PlaceType::kCPU) && - (target_place == PlaceType::kGPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); - } else if ((src_place == PlaceType::kGPU) && - (target_place == PlaceType::kGPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); - } else { - PADDLE_THROW(platform::errors::Unavailable( - "Not supported place transform of place: %d to place: %d", - static_cast(src_place), static_cast(target_place))); - } - return target; -} - -Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const { - GET_CASTED_TENSOR - GET_INNER_PLACE - framework::Tensor intermediate = tensor->Slice(begin_idx, end_idx); - Tensor target = Tensor(place_); - framework::CustomTensorUtils::ShareDataFrom( - static_cast(&intermediate), target); - return target; -} - -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; -template PD_DLL_DECL Tensor Tensor::copy_to>( - const PlaceType &target_place) const; -template PD_DLL_DECL Tensor Tensor::copy_to>( - const PlaceType &target_place) const; -template PD_DLL_DECL Tensor -Tensor::copy_to(const PlaceType &target_place) const; - -template PD_DLL_DECL float *Tensor::data() const; -template PD_DLL_DECL double *Tensor::data() const; -template PD_DLL_DECL int64_t *Tensor::data() const; -template PD_DLL_DECL int32_t *Tensor::data() const; -template PD_DLL_DECL uint8_t *Tensor::data() const; -template PD_DLL_DECL int8_t *Tensor::data() const; -template PD_DLL_DECL int16_t *Tensor::data() const; -template PD_DLL_DECL bool *Tensor::data() const; -template PD_DLL_DECL paddle::platform::complex - *Tensor::data>() const; -template PD_DLL_DECL paddle::platform::complex - *Tensor::data>() const; -template PD_DLL_DECL paddle::platform::float16 * -Tensor::data() const; - -template PD_DLL_DECL float *Tensor::mutable_data(); -template PD_DLL_DECL double *Tensor::mutable_data(); -template PD_DLL_DECL int64_t *Tensor::mutable_data(); -template PD_DLL_DECL int32_t *Tensor::mutable_data(); -template PD_DLL_DECL uint8_t *Tensor::mutable_data(); -template PD_DLL_DECL int8_t *Tensor::mutable_data(); -template PD_DLL_DECL int16_t *Tensor::mutable_data(); -template PD_DLL_DECL bool *Tensor::mutable_data(); -template PD_DLL_DECL paddle::platform::complex - *Tensor::mutable_data>(); -template PD_DLL_DECL paddle::platform::complex - *Tensor::mutable_data>(); -template PD_DLL_DECL paddle::platform::float16 * -Tensor::mutable_data(); - -template PD_DLL_DECL float *Tensor::mutable_data(const PlaceType &place); -template PD_DLL_DECL double *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL int64_t *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL int32_t *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL uint8_t *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL int8_t *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL int16_t *Tensor::mutable_data( - const PlaceType &place); -template PD_DLL_DECL bool *Tensor::mutable_data(const PlaceType &place); -template PD_DLL_DECL paddle::platform::complex * -Tensor::mutable_data>(const PlaceType &place); -template PD_DLL_DECL paddle::platform::complex * -Tensor::mutable_data>(const PlaceType &place); -template PD_DLL_DECL paddle::platform::float16 * -Tensor::mutable_data(const PlaceType &place); - -std::vector Tensor::shape() const { - GET_CASTED_TENSOR - return framework::vectorize(tensor->dims()); -} - -const PlaceType &Tensor::place() const { - GET_CASTED_TENSOR; - if (platform::is_cpu_place(tensor->place())) { - place_ = PlaceType::kCPU; -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - } else if (platform::is_gpu_place(tensor->place())) { - place_ = PlaceType::kGPU; -#endif - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Current Tensor hold unsupported Place Type, Please Init it" - "using Tensor::mutable_data(PaddlePlace) with T among:" - "Place::kCPU or Place::kGPU")); - } - return place_; -} - -Tensor Tensor::cast(const DataType &target_type) const { - GET_CASTED_TENSOR; - Tensor rlt = Tensor(place()); - rlt.reshape(this->shape()); - auto rlt_tensor_ = static_cast(rlt.tensor_.get()); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto ctx = pool.Get(tensor->place()); - auto src_type = tensor->type(); - auto dst_type = - framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType(target_type); - switch (src_type) { - case framework::proto::VarType::FP32: - framework::VisitDataType(dst_type, - CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::FP64: - framework::VisitDataType(dst_type, - CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::INT32: - framework::VisitDataType(dst_type, - CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::INT64: - framework::VisitDataType( - dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::BOOL: - framework::VisitDataType(dst_type, - CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::INT16: - framework::VisitDataType( - dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::UINT8: - framework::VisitDataType( - dst_type, CastDataType(*tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::COMPLEX64: - framework::VisitDataType(dst_type, - CastDataType>( - *tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::COMPLEX128: - framework::VisitDataType(dst_type, - CastDataType>( - *tensor, rlt_tensor_, ctx)); - break; - case framework::proto::VarType::FP16: - framework::VisitDataType( - dst_type, - CastDataType(*tensor, rlt_tensor_, ctx)); - break; - // TODO(JiabinYang) Support more dtype here - default: - PADDLE_THROW(platform::errors::Unimplemented( - "Data type (%s) is not supported when casting data type.", - framework::DataTypeToString(src_type))); - } - return rlt; -} - -int64_t Tensor::size() const { - GET_CASTED_TENSOR; - return tensor->numel(); -} - -bool Tensor::is_initialized() const { - GET_CASTED_TENSOR; - if (tensor->IsInitialized()) { - return true; - } else { - return false; - } -} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -gpuStream_t Tensor::stream() const { - if (!stream_.IsStreamSet()) { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "Stream is not Set, only input tensor will have " - "stream which is set by framework ")); - } else { - return reinterpret_cast(stream_.GetStream()); - } -} -#endif - -namespace framework { - -void CustomTensorUtils::ShareDataTo(const paddle::Tensor &src, void *dst) { - static_cast(dst)->ShareDataWith( - *static_cast(src.tensor_.get())); -} - -void CustomTensorUtils::ShareDataFrom(const void *src, - const paddle::Tensor &dst) { - if (!dst.tensor_) { - dst.tensor_ = std::make_shared(); - } - auto *tensor = static_cast(dst.tensor_.get()); - tensor->ShareDataWith(*static_cast(src)); -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/extension/src/ext_tensor.cu b/paddle/fluid/extension/src/ext_tensor.cu deleted file mode 120000 index 23ae523331877f1ee4818dba4618259e0914aeb4..0000000000000000000000000000000000000000 --- a/paddle/fluid/extension/src/ext_tensor.cu +++ /dev/null @@ -1 +0,0 @@ -ext_tensor.cc \ No newline at end of file diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 1fcb9d2ca6607a2b1e826d8450f32f8ee61c0bd2..1b45193e3d0a3455d81e7056f3790a6ff26c2d25 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -431,34 +431,7 @@ message(STATUS "branch: ${PADDLE_BRANCH}") configure_file(commit.h.in commit.h) -# Adapt to custom op mechanism: Include the header files related to the data type -# to avoid exposing the path of the underlying file -include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) -include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/extension/include) -include_directories(${PADDLE_SOURCE_DIR}/paddle/utils) - -if (WITH_GPU) - if (WIN32) - windows_symbolic(ext_tensor_cu SRCS ext_tensor.cu PATH ../extension/src) - nv_library(custom_tensor SRCS ../extension/src/.ext_tensor.cu DEPS lod_tensor memory enforce) - add_dependencies(custom_tensor ext_tensor_cu) - else() - nv_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce) - endif(WIN32) -elseif (WITH_ROCM) - hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cu DEPS lod_tensor memory enforce) -else() - cc_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce) -endif() - -cc_library(op_meta_info SRCS ../extension/src/ext_op_meta_info.cc DEPS custom_tensor) -cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper custom_tensor op_meta_info) - -if(WITH_ROCM) - hip_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) -else() - cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) -endif() +cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper pten_tensor op_meta_info pten_api) #cc_binary(test_executor SRCS test_executor.cc DEPS executor op_registry ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ) #cc_binary(new_executor SRCS new_exec_test.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index bb8258dcd9228f53e106642de19ae8ad96ab7eec..88cfd74cdaf99228238457f9d2f16f57677c468a 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -25,15 +25,18 @@ limitations under the License. */ #include #include -#include "paddle/fluid/extension/include/ext_tensor.h" #include "paddle/fluid/framework/attribute.h" -#include "paddle/fluid/framework/custom_tensor_utils.h" #include "paddle/fluid/framework/op_meta_info_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/api/all.h" +#include "paddle/pten/api/lib/ext_compat_utils.h" +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/core/convert_utils.h" #include "paddle/utils/any.h" namespace paddle { @@ -128,10 +131,8 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "The %d-th tensor in input vector (%s) " "is not initialized.", i, in_name)); - auto custom_t = paddle::Tensor( - CustomTensorUtils::ConvertInnerPlaceToEnumPlace(x->place())); - CustomTensorUtils::ShareDataFrom(static_cast(x), custom_t); - CustomTensorUtils::SetTensorCurrentStream(&custom_t, ctx.GetPlace()); + paddle::Tensor custom_t; + custom_t.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); custom_vec_in.emplace_back(custom_t); } custom_vec_ins.emplace_back(custom_vec_in); @@ -142,10 +143,8 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, PADDLE_ENFORCE_EQ(x->IsInitialized(), true, platform::errors::InvalidArgument( "Input tensor (%s) is not initialized.", in_name)); - auto custom_in = paddle::Tensor( - CustomTensorUtils::ConvertInnerPlaceToEnumPlace(x->place())); - CustomTensorUtils::ShareDataFrom(static_cast(x), custom_in); - CustomTensorUtils::SetTensorCurrentStream(&custom_in, ctx.GetPlace()); + paddle::Tensor custom_in; + custom_in.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); custom_ins.emplace_back(custom_in); } } @@ -207,11 +206,17 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "Tensors.", vec_true_outs.size(), outs.size())); for (size_t j = 0; j < vec_true_outs.size(); ++j) { - CustomTensorUtils::ShareDataTo(outs.at(j), vec_true_outs.at(j)); + experimental::MovesStorage( + std::dynamic_pointer_cast(outs.at(j).impl()) + .get(), + vec_true_outs.at(j)); } } else { auto* true_out = ctx.Output(out_name); - CustomTensorUtils::ShareDataTo(outs.at(i), true_out); + experimental::MovesStorage( + std::dynamic_pointer_cast(outs.at(i).impl()) + .get(), + true_out); } } } catch (platform::EnforceNotMet& exception) { @@ -479,8 +484,7 @@ void RegisterOperatorKernelWithPlace(const std::string& name, const std::vector& inputs, const std::vector& outputs, const std::vector& attrs) { - OpKernelType key(type, - CustomTensorUtils::ConvertEnumPlaceToInnerPlace(place)); + OpKernelType key(type, experimental::ConvertExtPlaceToInnerPlace(place)); VLOG(1) << "Custom Operator: op kernel key: " << key; OperatorWithKernel::AllOpKernels()[name][key] = [kernel_func, inputs, outputs, @@ -717,14 +721,12 @@ void RegisterOperatorWithMetaInfo( std::vector vec_custom_dtype; for (size_t i = 0; i < ctx->InputSize(in_name); ++i) { auto dtype = ctx->GetInputDataType(in_name, i); - vec_custom_dtype.emplace_back( - CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); + vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype)); } vec_input_dtypes.emplace_back(vec_custom_dtype); } else { auto dtype = ctx->GetInputDataType(in_name); - input_dtypes.emplace_back( - CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); + input_dtypes.emplace_back(pten::TransToPtenDataType(dtype)); } } @@ -736,14 +738,12 @@ void RegisterOperatorWithMetaInfo( auto out_name = op_outputs[i]; if (detail::IsDuplicableVar(out_name)) { for (size_t j = 0; j < output_dtypes.size(); ++j) { - auto dtype = CustomTensorUtils::ConvertEnumDTypeToInnerDType( - output_dtypes[i]); + auto dtype = pten::TransToProtoVarType(output_dtypes[i]); ctx->SetOutputDataType(out_name, dtype, j); } } else { - ctx->SetOutputDataType( - out_name, CustomTensorUtils::ConvertEnumDTypeToInnerDType( - output_dtypes[i])); + ctx->SetOutputDataType(out_name, + pten::TransToProtoVarType(output_dtypes[i])); } } }; diff --git a/paddle/fluid/framework/custom_operator.h b/paddle/fluid/framework/custom_operator.h index 259901c09f3e00729876d7bea062237ad5bad94a..d8712e60d24ba5d440eaa35d8ea15d1d87df549d 100644 --- a/paddle/fluid/framework/custom_operator.h +++ b/paddle/fluid/framework/custom_operator.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/fluid/extension/include/ext_op_meta_info.h" +#include "paddle/pten/api/ext/op_meta_info.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/custom_tensor_utils.h b/paddle/fluid/framework/custom_tensor_utils.h deleted file mode 100644 index d7bde04b84b1615d2f7e2f56e4b1218741faf674..0000000000000000000000000000000000000000 --- a/paddle/fluid/framework/custom_tensor_utils.h +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include "paddle/fluid/extension/include/ext_tensor.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/gpu_info.h" -#include "paddle/fluid/platform/place.h" - -namespace paddle { -namespace framework { - -class CustomTensorUtils { - public: - /// \brief Share data TO another tensor. - /// Use this to pass tensor from op to op - /// \return void. - static void ShareDataTo(const paddle::Tensor& src, void* dst); - - /// \brief Share data FROM another tensor. - /// Use this to pass tensor from op to op - /// \return void. - static void ShareDataFrom(const void* src, const paddle::Tensor& dst); - - static framework::proto::VarType::Type ConvertEnumDTypeToInnerDType( - const paddle::DataType& dtype) { - switch (dtype) { - case paddle::DataType::FLOAT64: - return framework::proto::VarType::FP64; - case paddle::DataType::FLOAT32: - return framework::proto::VarType::FP32; - case paddle::DataType::UINT8: - return framework::proto::VarType::UINT8; - case paddle::DataType::INT8: - return framework::proto::VarType::INT8; - case paddle::DataType::INT32: - return framework::proto::VarType::INT32; - case paddle::DataType::INT64: - return framework::proto::VarType::INT64; - case paddle::DataType::INT16: - return framework::proto::VarType::INT16; - case paddle::DataType::COMPLEX64: - return framework::proto::VarType::COMPLEX64; - case paddle::DataType::COMPLEX128: - return framework::proto::VarType::COMPLEX128; - case paddle::DataType::FLOAT16: - return framework::proto::VarType::FP16; - case paddle::DataType::BOOL: - return framework::proto::VarType::BOOL; - default: - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data type code(%d) when casting enum data type into " - "paddle data type.", - static_cast(dtype))); - } - } - - static paddle::DataType ConvertInnerDTypeToEnumDType( - const framework::proto::VarType::Type& dtype) { - switch (dtype) { - case framework::proto::VarType::FP64: - return paddle::DataType::FLOAT64; - case framework::proto::VarType::FP32: - return paddle::DataType::FLOAT32; - case framework::proto::VarType::INT64: - return paddle::DataType::INT64; - case framework::proto::VarType::INT32: - return paddle::DataType::INT32; - case framework::proto::VarType::INT8: - return paddle::DataType::INT8; - case framework::proto::VarType::UINT8: - return paddle::DataType::UINT8; - case framework::proto::VarType::INT16: - return paddle::DataType::INT16; - case framework::proto::VarType::COMPLEX64: - return paddle::DataType::COMPLEX64; - case framework::proto::VarType::COMPLEX128: - return paddle::DataType::COMPLEX128; - case framework::proto::VarType::FP16: - return paddle::DataType::FLOAT16; - case framework::proto::VarType::BOOL: - return paddle::DataType::BOOL; - default: - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data type `%s` when casting paddle data type into " - "enum data type.", - DataTypeToString(dtype))); - } - } - - // PaddlePlace <-> platform::Place - static platform::Place ConvertEnumPlaceToInnerPlace(const PlaceType& pc) { - if (pc == PlaceType::kCPU) { - return platform::Place(platform::CPUPlace()); - } else if (pc == PlaceType::kGPU) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return platform::Place( - platform::CUDAPlace(platform::GetCurrentDeviceId())); -#endif - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported place type code(%d) when " - "casting enum place to paddle place.", - static_cast(pc))); - } - return platform::Place(); - } - - static PlaceType ConvertInnerPlaceToEnumPlace(const platform::Place& pc) { - if (platform::is_cpu_place(pc)) { - return PlaceType::kCPU; - } else if (platform::is_gpu_place(pc)) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return PlaceType::kGPU; -#endif - } else { - PADDLE_THROW( - platform::errors::Unimplemented("Unsupported place type `%s` when " - "casting paddle place to enum place.", - pc)); - } - return PlaceType::kUNK; - } - - static void SetTensorCurrentStream(paddle::Tensor* src, - const platform::Place& pc) { - if (platform::is_gpu_place(pc)) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - auto* dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get(pc)); - src->stream_.SetStream(reinterpret_cast(dev_ctx->stream())); -#endif - } else { - return; - } - } -}; - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/op_meta_info_helper.h b/paddle/fluid/framework/op_meta_info_helper.h index c70fe2f38ab63d7ddb4581f55cb121fa514870fe..b5b84dc5768e4d5757a84538b9b12b2f65de0c38 100644 --- a/paddle/fluid/framework/op_meta_info_helper.h +++ b/paddle/fluid/framework/op_meta_info_helper.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include -#include "paddle/fluid/extension/include/ext_op_meta_info.h" +#include "paddle/pten/api/ext/op_meta_info.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 09c72cb13b8033bdbd7e81d23707eb8f243ee7a3..3c66e35abf1d59a43e86541614a56ad78f957416 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -37,11 +37,6 @@ endif() get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(pten_modules GLOBAL PROPERTY PTEN_MODULES) -# Adapt to custom op mechanism: Include the header files related to the data type -# to avoid exposing the path of the underlying file -include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) -include_directories(${PADDLE_SOURCE_DIR}/paddle/utils) - add_subdirectory(api) # Create static inference library if needed diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ad0647236acb969dd32b7564a943db14ea83ee65..b1408995fa157bbbf5c52eae928d3226138de2e7 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -24,7 +24,6 @@ #include #include -#include "paddle/fluid/extension/include/ext_op_meta_info.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" @@ -46,6 +45,7 @@ #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/api/ext/op_meta_info.h" #ifdef PADDLE_WITH_MKLML #include "paddle/fluid/platform/dynload/mklml.h" diff --git a/paddle/fluid/inference/api/helper.cc b/paddle/fluid/inference/api/helper.cc index d78560239de50eb224641583d62b55bac75be465..630e85778c3307c7f24719be8f8102b72278c91d 100644 --- a/paddle/fluid/inference/api/helper.cc +++ b/paddle/fluid/inference/api/helper.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/extension/include/ext_op_meta_info.h" #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/pten/api/ext/op_meta_info.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc index 28c209018d662d054c27fa68d6b87f3eb58a7e31..ad726f2bf19b6ab48e86ef9894f322695b6449ab 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc @@ -22,6 +22,7 @@ template class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { using MPDType = typename details::MPTypeTrait::Type; using XPUTyp = typename XPUTypeTrait::Type; + using float16 = typename XPUTypeTrait::Type; public: void Compute(const framework::ExecutionContext& ctx) const { diff --git a/paddle/fluid/operators/cast_op_xpu.cc b/paddle/fluid/operators/cast_op_xpu.cc index c1a296f2b2788d9c1a32c36b2688b11b8751ded2..c3e23831e65286d3d65e8ae80933e0725ad85f27 100644 --- a/paddle/fluid/operators/cast_op_xpu.cc +++ b/paddle/fluid/operators/cast_op_xpu.cc @@ -29,6 +29,7 @@ namespace plat = paddle::platform; template class CastXPUKernel : public framework::OpKernel { using XPUInTDType = typename XPUTypeTrait::Type; + using float16 = typename XPUTypeTrait::Type; public: void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 595c833cbfa8a065bc7ee4d5c97fcf407186aec3..47027d69f82e00b741a67a6406ac01c0b268cf27 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,8 +1,3 @@ -# Adapt to custom op mechanism: Include the header files related to the data type -# to avoid exposing the path of the underlying file -include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) -include_directories(${PADDLE_SOURCE_DIR}/paddle/utils) - set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune feed_fetch_method pass generate_pass pass_builder parallel_executor profiler layer tracer engine scope_pool analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 8fc63e8317e9e57c71c6f39ea405bfada7974991..2c2004162452c5da06a1a413d33737b236ea4516 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -1,3 +1,8 @@ +# Adapt to custom op mechanism: Include the header files related to the data type +# to avoid exposing the path of the underlying file, remove it after moving +# float16.h/complex.h/bfloat16.h into pten +include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) + # pten (low level) api headers: include # pten (high level) api add_subdirectory(api) @@ -20,4 +25,5 @@ endif() if(WITH_XPU) set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) endif() + cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) diff --git a/paddle/pten/all.h b/paddle/pten/all.h index e8e41a8c3eabc860799dfd63b33ea8dc2a4c232b..f9532eb33d897998e2521d04daf83d680630c4ce 100644 --- a/paddle/pten/all.h +++ b/paddle/pten/all.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -// develop apis +// developer apis #include "paddle/pten/include/core.h" #include "paddle/pten/include/creation.h" #include "paddle/pten/include/infershape.h" diff --git a/paddle/pten/api/all.h b/paddle/pten/api/all.h index 3a2c7b3fa98fe4adb813e7178bb6ebfad84fbec6..e4d079de9d41aa1c9a417a32a9855ec92f40f34c 100644 --- a/paddle/pten/api/all.h +++ b/paddle/pten/api/all.h @@ -14,9 +14,40 @@ limitations under the License. */ #pragma once -// user apis +#if !defined(_MSC_VER) && __cplusplus < 201402L +#error C++14 or later compatible compiler is required to use Paddle. +#endif + +#ifdef _WIN32 +#ifndef NOMINMAX +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#endif +#endif + +// new pten apis #include "paddle/pten/api/include/creation.h" #include "paddle/pten/api/include/linalg.h" #include "paddle/pten/api/include/manipulation.h" #include "paddle/pten/api/include/math.h" #include "paddle/pten/api/include/tensor.h" + +// pten common headers +#include "paddle/pten/common/backend.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/common/layout.h" +#include "paddle/pten/common/scalar.h" + +// original custom op headers +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/api/ext/dll_decl.h" +#include "paddle/pten/api/ext/exception.h" +#include "paddle/pten/api/ext/op_meta_info.h" +#include "paddle/pten/api/ext/place.h" + +// api symbols declare, remove in the future +#include "paddle/pten/api/include/registry.h" + +PT_DECLARE_API(Creation); +PT_DECLARE_API(Linalg); +PT_DECLARE_API(Manipulation); +PT_DECLARE_API(Math); diff --git a/paddle/fluid/extension/include/ext_dispatch.h b/paddle/pten/api/ext/dispatch.h similarity index 51% rename from paddle/fluid/extension/include/ext_dispatch.h rename to paddle/pten/api/ext/dispatch.h index 9b3e199708adc93356c214df3be217f67d2e8949..2b90bd77943f5d289d1057f388aa5f41c7d25fad 100644 --- a/paddle/fluid/extension/include/ext_dispatch.h +++ b/paddle/pten/api/ext/dispatch.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include "ext_dtype.h" // NOLINT -#include "ext_exception.h" // NOLINT +#include "paddle/pten/api/ext/exception.h" +#include "paddle/pten/common/data_type.h" namespace paddle { @@ -37,30 +37,32 @@ namespace paddle { [&] { \ const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ - __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ default: \ PD_THROW("function " #NAME " is not implemented for data type `", \ - ::paddle::ToString(__dtype__), "`"); \ + __dtype__, \ + "`"); \ } \ }() -#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT16, paddle::float16, \ - __VA_ARGS__) \ - default: \ - PD_THROW("function " #NAME " is not implemented for data type `", \ - ::paddle::ToString(__dtype__), "`"); \ - } \ +#define PD_DISPATCH_FLOATING_AND_HALF_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT16, paddle::float16, __VA_ARGS__) \ + default: \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ + } \ }() ///////// Integral Dispatch Marco /////////// @@ -70,34 +72,40 @@ namespace paddle { const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \ - __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT8, int8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::UINT8, uint8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT16, int16_t, __VA_ARGS__) \ default: \ - PD_THROW("function " #NAME " is not implemented for data type `" + \ - ::paddle::ToString(__dtype__) + "`"); \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ } \ }() ///////// Complex Dispatch Marco /////////// -#define PD_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \ - ::paddle::complex64, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \ - ::paddle::complex128, __VA_ARGS__) \ - default: \ - PD_THROW("function " #NAME " is not implemented for data type `" + \ - ::paddle::ToString(__dtype__) + "`"); \ - } \ +#define PD_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX64, \ + ::paddle::complex64, \ + __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX128, \ + ::paddle::complex128, \ + __VA_ARGS__) \ + default: \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ + } \ }() ///////// Floating and Integral Dispatch Marco /////////// @@ -106,43 +114,49 @@ namespace paddle { [&] { \ const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ - __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \ - __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT8, int8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::UINT8, uint8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT16, int16_t, __VA_ARGS__) \ default: \ - PD_THROW("function " #NAME " is not implemented for data type `" + \ - ::paddle::ToString(__dtype__) + "`"); \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ } \ }() ///////// Floating and Complex Dispatch Marco /////////// -#define PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \ - ::paddle::complex64, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \ - ::paddle::complex128, __VA_ARGS__) \ - default: \ - PD_THROW("function " #NAME " is not implemented for data type `" + \ - ::paddle::ToString(__dtype__) + "`"); \ - } \ +#define PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX64, \ + ::paddle::complex64, \ + __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX128, \ + ::paddle::complex128, \ + __VA_ARGS__) \ + default: \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ + } \ }() ///////// Floating, Integral and Complex Dispatch Marco /////////// @@ -151,26 +165,31 @@ namespace paddle { [&] { \ const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT32, float, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::FLOAT64, double, \ - __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT64, int64_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT8, int8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::UINT8, uint8_t, \ - __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT16, int16_t, \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT8, int8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::UINT8, uint8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT16, int16_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX64, \ + ::paddle::complex64, \ + __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, \ + ::paddle::DataType::COMPLEX128, \ + ::paddle::complex128, \ __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX64, \ - ::paddle::complex64, __VA_ARGS__) \ - PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::COMPLEX128, \ - ::paddle::complex128, __VA_ARGS__) \ default: \ - PD_THROW("function " #NAME " is not implemented for data type `" + \ - ::paddle::ToString(__dtype__) + "`"); \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ } \ }() diff --git a/paddle/fluid/extension/include/ext_dll_decl.h b/paddle/pten/api/ext/dll_decl.h similarity index 100% rename from paddle/fluid/extension/include/ext_dll_decl.h rename to paddle/pten/api/ext/dll_decl.h diff --git a/paddle/fluid/extension/include/ext_exception.h b/paddle/pten/api/ext/exception.h similarity index 81% rename from paddle/fluid/extension/include/ext_exception.h rename to paddle/pten/api/ext/exception.h index 632d91d5285c22ac9143f49cfae133d1d1e67bd3..92b17b4898d3f7f9af26a0ba7172fc1a1ad36fe1 100644 --- a/paddle/fluid/extension/include/ext_exception.h +++ b/paddle/pten/api/ext/exception.h @@ -32,7 +32,9 @@ namespace paddle { struct PD_Exception : public std::exception { public: template - explicit PD_Exception(const std::string& msg, const char* file, int line, + explicit PD_Exception(const std::string& msg, + const char* file, + int line, const char* default_msg) { std::ostringstream sout; if (msg.empty()) { @@ -75,24 +77,13 @@ class ErrorMessage { std::ostringstream oss; }; -#if defined _WIN32 -#define HANDLE_THE_ERROR try { -#define END_HANDLE_THE_ERROR \ - } \ - catch (const std::exception& e) { \ - std::cerr << e.what() << std::endl; \ - throw e; \ - } -#else -#define HANDLE_THE_ERROR -#define END_HANDLE_THE_ERROR -#endif - #define PD_CHECK(COND, ...) \ do { \ if (PD_UNLIKELY(!(COND))) { \ auto __message__ = ::paddle::ErrorMessage(__VA_ARGS__).to_string(); \ - throw ::paddle::PD_Exception(__message__, __FILE__, __LINE__, \ + throw ::paddle::PD_Exception(__message__, \ + __FILE__, \ + __LINE__, \ "Expected " #COND \ ", but it's not satisfied."); \ } \ @@ -101,8 +92,8 @@ class ErrorMessage { #define PD_THROW(...) \ do { \ auto __message__ = ::paddle::ErrorMessage(__VA_ARGS__).to_string(); \ - throw ::paddle::PD_Exception(__message__, __FILE__, __LINE__, \ - "An error occurred."); \ + throw ::paddle::PD_Exception( \ + __message__, __FILE__, __LINE__, "An error occurred."); \ } while (0) } // namespace paddle diff --git a/paddle/fluid/extension/include/ext_op_meta_info.h b/paddle/pten/api/ext/op_meta_info.h similarity index 83% rename from paddle/fluid/extension/include/ext_op_meta_info.h rename to paddle/pten/api/ext/op_meta_info.h index 6f2528030e603de37dbfb09c3e07539812f2c818..140874f93aed72ac1a4a56937e0b8a6fe908cdf4 100644 --- a/paddle/fluid/extension/include/ext_op_meta_info.h +++ b/paddle/pten/api/ext/op_meta_info.h @@ -19,10 +19,10 @@ limitations under the License. */ #include #include -#include "any.h" -#include "ext_dll_decl.h" // NOLINT -#include "ext_exception.h" // NOLINT -#include "ext_tensor.h" // NOLINT +#include "paddle/pten/api/ext/dll_decl.h" +#include "paddle/pten/api/ext/exception.h" +#include "paddle/pten/api/include/tensor.h" +#include "paddle/utils/any.h" /** * Op Meta Info Related Define. @@ -87,7 +87,9 @@ using KernelFunc = #define PD_SPECIALIZE_ComputeCallHelper(attr_type) \ template \ struct ComputeCallHelper { \ - template \ static Return Compute(const std::vector& inputs, \ const std::vector>& vec_inputs, \ @@ -95,9 +97,10 @@ using KernelFunc = const PreviousArgs&... pargs) { \ try { \ attr_type arg = paddle::any_cast(attrs[attr_idx]); \ - return ComputeCallHelper::template Compute< \ - in_idx, vec_in_idx, attr_idx + 1>(inputs, vec_inputs, attrs, \ - pargs..., arg); \ + return ComputeCallHelper::template Compute( \ + inputs, vec_inputs, attrs, pargs..., arg); \ } catch (paddle::bad_any_cast&) { \ PD_THROW( \ "Attribute cast error in custom operator. Expected " #attr_type \ @@ -127,7 +130,9 @@ struct KernelFuncImpl { template struct ComputeCallHelper { - template static Return Compute(const std::vector& inputs, const std::vector>& vec_inputs, @@ -135,23 +140,27 @@ struct KernelFuncImpl { const PreviousArgs&... pargs) { const Tensor& arg = inputs[in_idx]; return ComputeCallHelper::template Compute( + vec_in_idx, + attr_idx>( inputs, vec_inputs, attrs, pargs..., arg); } }; template struct ComputeCallHelper&, Tail...> { - template static Return Compute(const std::vector& inputs, const std::vector>& vec_inputs, const std::vector& attrs, const PreviousArgs&... pargs) { const std::vector& arg = vec_inputs[vec_in_idx]; - return ComputeCallHelper::template Compute< - in_idx, vec_in_idx + 1, attr_idx>(inputs, vec_inputs, attrs, pargs..., - arg); + return ComputeCallHelper::template Compute( + inputs, vec_inputs, attrs, pargs..., arg); } }; @@ -206,65 +215,75 @@ using InferShapeFunc = std::vector> (*)( const std::vector>>& vec_input_shapes, const std::vector& attrs); -#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type) \ - template \ - struct InferShapeCallHelper { \ - template \ - static Return InferShape( \ - const std::vector>& input_shapes, \ - const std::vector>>& \ - vec_input_shapes, \ - const std::vector& attrs, const PreviousArgs&... pargs) { \ - input_type arg = input_shapes[in_idx]; \ - return InferShapeCallHelper::template InferShape< \ - in_idx + 1, vec_in_idx, attr_idx>(input_shapes, vec_input_shapes, \ - attrs, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type) \ + template \ + struct InferShapeCallHelper { \ + template \ + static Return InferShape( \ + const std::vector>& input_shapes, \ + const std::vector>>& \ + vec_input_shapes, \ + const std::vector& attrs, \ + const PreviousArgs&... pargs) { \ + input_type arg = input_shapes[in_idx]; \ + return InferShapeCallHelper::template InferShape( \ + input_shapes, vec_input_shapes, attrs, pargs..., arg); \ + } \ } -#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(input_type) \ - template \ - struct InferShapeCallHelper { \ - template \ - static Return InferShape( \ - const std::vector>& input_shapes, \ - const std::vector>>& \ - vec_input_shapes, \ - const std::vector& attrs, const PreviousArgs&... pargs) { \ - input_type arg = vec_input_shapes[vec_in_idx]; \ - return InferShapeCallHelper::template InferShape< \ - in_idx, vec_in_idx + 1, attr_idx>(input_shapes, vec_input_shapes, \ - attrs, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(input_type) \ + template \ + struct InferShapeCallHelper { \ + template \ + static Return InferShape( \ + const std::vector>& input_shapes, \ + const std::vector>>& \ + vec_input_shapes, \ + const std::vector& attrs, \ + const PreviousArgs&... pargs) { \ + input_type arg = vec_input_shapes[vec_in_idx]; \ + return InferShapeCallHelper:: \ + template InferShape( \ + input_shapes, vec_input_shapes, attrs, pargs..., arg); \ + } \ } -#define PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(attr_type) \ - template \ - struct InferShapeCallHelper { \ - template \ - static Return InferShape( \ - const std::vector>& input_shapes, \ - const std::vector>>& \ - vec_input_shapes, \ - const std::vector& attrs, const PreviousArgs&... pargs) { \ - try { \ - attr_type arg = paddle::any_cast(attrs[attr_idx]); \ - return InferShapeCallHelper::template InferShape< \ - in_idx, vec_in_idx, attr_idx + 1>(input_shapes, vec_input_shapes, \ - attrs, pargs..., arg); \ - } catch (paddle::bad_any_cast&) { \ - PD_THROW( \ - "Attribute cast error in custom operator InferShapeFn. " \ - "Expected " #attr_type \ - " value. InferShapeFn's attribute list must be exactly same as " \ - "Forward " \ - "KernelFn's attribute list except std::vector " \ - "attribute."); \ - } \ - } \ +#define PD_SPECIALIZE_InferShapeCallHelper_FOR_ATTR(attr_type) \ + template \ + struct InferShapeCallHelper { \ + template \ + static Return InferShape( \ + const std::vector>& input_shapes, \ + const std::vector>>& \ + vec_input_shapes, \ + const std::vector& attrs, \ + const PreviousArgs&... pargs) { \ + try { \ + attr_type arg = paddle::any_cast(attrs[attr_idx]); \ + return InferShapeCallHelper:: \ + template InferShape( \ + input_shapes, vec_input_shapes, attrs, pargs..., arg); \ + } catch (paddle::bad_any_cast&) { \ + PD_THROW( \ + "Attribute cast error in custom operator InferShapeFn. " \ + "Expected " #attr_type \ + " value. InferShapeFn's attribute list must be exactly same as " \ + "Forward " \ + "KernelFn's attribute list except std::vector " \ + "attribute."); \ + } \ + } \ } template @@ -276,8 +295,10 @@ struct InferShapeFuncImpl { const std::vector>& input_shapes, const std::vector>>& vec_input_shapes, const std::vector& attrs) { - return InferShapeCallHelper>::template InferShape< - 0, 0, 0>(input_shapes, vec_input_shapes, attrs); + return InferShapeCallHelper>::template InferShape<0, + 0, + 0>( + input_shapes, vec_input_shapes, attrs); } private: @@ -313,7 +334,8 @@ struct InferShapeFuncImpl { static Return InferShape( const std::vector>& input_shapes, const std::vector>>& vec_input_shapes, - const std::vector& attrs, const Args&... args) { + const std::vector& attrs, + const Args&... args) { return impl_fn(args...); } }; @@ -344,19 +366,20 @@ using InferDtypeFunc = std::vector (*)( } \ } -#define PD_SPECIALIZE_InferDtypeCallHelper_FOR_DTYPES(input_type) \ - template \ - struct InferDtypeCallHelper { \ - template \ - static Return InferDtype( \ - const std::vector& input_dtypes, \ - const std::vector>& vec_input_dtypes, \ - const PreviousArgs&... pargs) { \ - input_type arg = vec_input_dtypes[vec_in_idx]; \ - return InferDtypeCallHelper::template InferDtype< \ - in_idx, vec_in_idx + 1>(input_dtypes, vec_input_dtypes, pargs..., \ - arg); \ - } \ +#define PD_SPECIALIZE_InferDtypeCallHelper_FOR_DTYPES(input_type) \ + template \ + struct InferDtypeCallHelper { \ + template \ + static Return InferDtype( \ + const std::vector& input_dtypes, \ + const std::vector>& vec_input_dtypes, \ + const PreviousArgs&... pargs) { \ + input_type arg = vec_input_dtypes[vec_in_idx]; \ + return InferDtypeCallHelper::template InferDtype( \ + input_dtypes, vec_input_dtypes, pargs..., arg); \ + } \ } template diff --git a/paddle/fluid/extension/include/ext_place.h b/paddle/pten/api/ext/place.h similarity index 100% rename from paddle/fluid/extension/include/ext_place.h rename to paddle/pten/api/ext/place.h diff --git a/paddle/pten/api/include/creation.h b/paddle/pten/api/include/creation.h index 688aa20780bba4bc6fc5229ce25bbcda85345a87..bcd3d4355cf6e9683599143969b136478aa90319 100644 --- a/paddle/pten/api/include/creation.h +++ b/paddle/pten/api/include/creation.h @@ -15,33 +15,34 @@ #pragma once #include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/backend.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/common/scalar.h" namespace paddle { namespace experimental { -Tensor full(const std::vector& shape, - const Scalar& value, - DataType dtype = DataType::FLOAT32, - Backend backend = Backend::CPU, - DataLayout layout = DataLayout::NCHW); - -Tensor full_like(const Tensor& x, - const Scalar& value, - DataType dtype = DataType::UNDEFINED, - Backend backend = Backend::UNDEFINED, - DataLayout layout = DataLayout::UNDEFINED); - -Tensor ones_like(const Tensor& x, - DataType dtype = DataType::UNDEFINED, - Backend backend = Backend::UNDEFINED, - DataLayout layout = DataLayout::UNDEFINED); - -Tensor zeros_like(const Tensor& x, - DataType dtype = DataType::UNDEFINED, - Backend backend = Backend::UNDEFINED, - DataLayout layout = DataLayout::UNDEFINED); +PD_DLL_DECL Tensor full(const std::vector& shape, + const Scalar& value, + DataType dtype = DataType::FLOAT32, + Backend backend = Backend::CPU, + DataLayout layout = DataLayout::NCHW); + +PD_DLL_DECL Tensor full_like(const Tensor& x, + const Scalar& value, + DataType dtype = DataType::UNDEFINED, + Backend backend = Backend::UNDEFINED, + DataLayout layout = DataLayout::UNDEFINED); + +PD_DLL_DECL Tensor ones_like(const Tensor& x, + DataType dtype = DataType::UNDEFINED, + Backend backend = Backend::UNDEFINED, + DataLayout layout = DataLayout::UNDEFINED); + +PD_DLL_DECL Tensor zeros_like(const Tensor& x, + DataType dtype = DataType::UNDEFINED, + Backend backend = Backend::UNDEFINED, + DataLayout layout = DataLayout::UNDEFINED); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/include/linalg.h b/paddle/pten/api/include/linalg.h index c28c133018464b635cd9c6b9037d3b12b8e91a5a..259cf664932038d1abbf4ab1c1ec0ebdba5f00ae 100644 --- a/paddle/pten/api/include/linalg.h +++ b/paddle/pten/api/include/linalg.h @@ -19,12 +19,12 @@ namespace paddle { namespace experimental { -Tensor dot(const Tensor& x, const Tensor& y); +PD_DLL_DECL Tensor dot(const Tensor& x, const Tensor& y); -Tensor matmul(const Tensor& x, - const Tensor& y, - bool transpose_x, - bool transpose_y); +PD_DLL_DECL Tensor matmul(const Tensor& x, + const Tensor& y, + bool transpose_x = false, + bool transpose_y = false); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/include/manipulation.h b/paddle/pten/api/include/manipulation.h index fe8c01cb74b95fe3c6159413df5e1f0e8b618eb1..8c3bf5ae94d95b8c5cfd7711533990bf1992a876 100644 --- a/paddle/pten/api/include/manipulation.h +++ b/paddle/pten/api/include/manipulation.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -Tensor flatten(const Tensor& x, int start_axis, int stop_axis); +PD_DLL_DECL Tensor flatten(const Tensor& x, int start_axis, int stop_axis); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/include/math.h b/paddle/pten/api/include/math.h index 01d5850a2b286738827d5bcf800de9a3cf03fefb..41b4d2ffb33611347c3aac6bd7f8061742758cd7 100644 --- a/paddle/pten/api/include/math.h +++ b/paddle/pten/api/include/math.h @@ -21,9 +21,9 @@ namespace experimental { // TODO(chenweihang): add scale API // TODO(chenweihang): move mean API into stat.h/cc -Tensor mean(const Tensor& x); +PD_DLL_DECL Tensor mean(const Tensor& x); -Tensor add(const Tensor& x, const Tensor& y); +PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/include/registry.h b/paddle/pten/api/include/registry.h new file mode 100644 index 0000000000000000000000000000000000000000..abb31451d522ebc948ad156c2feb4f50a9dd6d4f --- /dev/null +++ b/paddle/pten/api/include/registry.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/ext/dll_decl.h" + +namespace paddle { +namespace experimental { + +#if defined(_WIN32) +#define UNUSED +#define __builtin_expect(EXP, C) (EXP) +#else +#define UNUSED __attribute__((unused)) +#endif + +/** + * Now there is no module to call pten's API. When compiling, the function + * implementation will be optimized. Therefore, the symbol will be exposed + * manually for the time being. + * + * After the dynamic graph calls the API in the future, the logic declared + * by these macro can be deleted. + */ + +// use to declare symbol +#define PT_REGISTER_API(name) \ + PD_DLL_DECL int RegisterSymbolsFor##name() { return 0; } + +#define PT_DECLARE_API(name) \ + extern PD_DLL_DECL int RegisterSymbolsFor##name(); \ + UNUSED static int use_pten_api_##name = RegisterSymbolsFor##name() + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/include/tensor.h b/paddle/pten/api/include/tensor.h index 66ea7853541bdbaa99e7f6899e2740c3bcbb3a3b..31e57b69447a22c1486e41bce7e613f616ba378c 100644 --- a/paddle/pten/api/include/tensor.h +++ b/paddle/pten/api/include/tensor.h @@ -17,35 +17,38 @@ limitations under the License. */ #include #include #include +#include -#include "paddle/pten/core/tensor_base.h" +#ifdef PADDLE_WITH_CUDA +#include +using gpuStream_t = cudaStream_t; +#endif -/** - * [ Why still include the fluid headers? ] - * - * We hope to organize the basic implementation of Tensor and the logic related - * to Tensor computation into an independent library, which we call - * [Tensor Operation Library, pten], so we extract or rewrite the original - * Kernels. - * - * In the future, the training library, inference library and custom operators - * will link to this Tensor Operation library. - * - * However, if we directly split the link relation, we need to make too many - * changes, which will affect the stability of the framework, so here we still - * rely on the implementation of the framework, which is a intermediate state. - * - * In the future, the necessary components will be moved to the this library, - * or the corresponding components will be re-implemented. - */ -#include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" +#ifdef PADDLE_WITH_HIP +#include +using gpuStream_t = hipStream_t; +#endif + +#include "paddle/pten/api/ext/dll_decl.h" +#include "paddle/pten/api/ext/place.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/common/layout.h" + +namespace pten { +class TensorBase; +} // namespace pten namespace paddle { +namespace framework { +class DDim; +} +namespace platform { +class Place; +} namespace experimental { class Tensor; +class CompatiblePTenTensorUtils; class AbstractAutogradMeta { public: @@ -80,138 +83,336 @@ class AbstractAutogradMeta { * another simple Tensor design may be required for inference. */ -class Tensor final { +class PD_DLL_DECL Tensor final { public: /* Part 1: Construction and destruction methods */ - Tensor() {} + + /** + * @brief Construct a new Tensor object + */ + Tensor() = default; + + /** + * @brief Construct a new Tensor object by copy + */ Tensor(const Tensor&) = default; + + /** + * @brief Construct a new Tensor object by move + */ Tensor(Tensor&&) = default; /** - * @description: Use a TensorImpl pointer to construct a Tensor - * @param {shared_ptr} tensor_impl - * @return {Tensor} + * @brief Construct a new Tensor object by a TensorBase pointer + * + * @param tensor_impl + */ + explicit Tensor(std::shared_ptr tensor_impl); + + /** + * @brief Construct a new Tensor object on the target place. + * This is a deprecated method and may be removed in the future! + * + * @param place */ - explicit Tensor(std::shared_ptr tensor_impl) - : impl_(std::move(tensor_impl)) { - PADDLE_ENFORCE_NOT_NULL(impl_, - platform::errors::InvalidArgument( - "TensorImpl with nullptr is not supported")); - } + explicit Tensor(const PlaceType& place); + + /** + * @brief Construct a new Tensor object on the target place + * with specified shape. + * This is a deprecated method and may be removed in the future! + * + * @param place + * @param shape + */ + Tensor(const PlaceType& place, const std::vector& shape); /* Part 2: Dimension, DataType and DataLayout methods */ + + /** + * @brief Return the number of elements of Tensor. + * + * @return int64_t + */ + int64_t numel() const; + + /** + * @brief Get the size of current tensor. + * The compatible method of `Tensor::numel()`. + * This is a deprecated method and may be removed in the future! + * + * @return int64_t + */ + int64_t size() const; + /** - * @description: Return the number of elements of current Tensor. - * @param None - * @return {int64_t} + * @brief Return the dimensions of Tensor. + * + * @return paddle::framework::DDim */ - int64_t numel() const { return impl_->numel(); } + paddle::framework::DDim dims() const; /** - * @description: Return the shape (dimensions) of current Tensor. - * @param None - * @return {DDim} + * @brief Return the shape (dimensions) of Tensor. + * The compatible method of `Tensor::dims()`. + * This is a deprecated method and may be removed in the future! + * + * @return std::vector + */ + std::vector shape() const; + + /** + * @brief Reset the shape of the tensor. + * Reshape must be called before calling mutable_data() or + * copy_to(const PlaceType& place). + * This is a deprecated method and may be removed in the future! + * + * @param shape + */ + void reshape(const std::vector& shape); + + /** + * @brief Return the data type of Tensor. + * + * @return DataType */ - paddle::framework::DDim shape() const { return impl_->dims(); } + DataType dtype() const; /** - * @description: Return the data type of current Tensor. - * @param None - * @return {DataType} + * @brief Return the data type of Tensor. + * The compatible method of `Tensor::dtype()`. + * This is a deprecated method and may be removed in the future! + * + * @return DataType */ - paddle::experimental::DataType type() const { return impl_->data_type(); } + DataType type() const; /** - * @description: Return the layout of current Tensor. - * @param None - * @return {DataLayout} + * @brief Return the layout of Tensor. + * + * @return DataLayout */ - paddle::experimental::DataLayout layout() const { return impl_->layout(); } + DataLayout layout() const; /* Part 3: Device and Backend methods */ + /** - * @description: Return the place (device) of current Tensor. - * @param None - * @return {Place} + * @brief Return the place (device) of Tensor. + * This is a deprecated method and may be removed in the future! + * + * @return PlaceType */ - paddle::platform::Place place() const { return impl_->place(); } + PlaceType place() const; /** - * Backend judgment APIs, shield the concept of Backend. + * @brief Return the place (device) of Tensor. + * Because the `place` method already exists, so we need to use a new name, + * here we temporarily use `inner_place`. + * + * @return paddle::platform::Place */ - bool is_cpu() const { return paddle::platform::is_cpu_place(place()); } - bool is_cuda() const { return paddle::platform::is_gpu_place(place()); } + paddle::platform::Place inner_place() const; /** - * Backend convert APIs. + * @brief Determine whether the tensor device is CPU + * + * @return true + * @return false */ - Tensor cpu() const; - Tensor cuda() const; + bool is_cpu() const; + + /** + * @brief Determine whether the tensor device is CUDA + * + * @return true + * @return false + */ + bool is_cuda() const; /* Part 4: Data Access methods */ + + /** + * @brief Get the memory pointer in CPU or GPU with specific data type. + * It's usually used to get the output data pointer. + * + * @tparam T + * @return T* + */ + template + T* mutable_data(); + /** - * @description: Return the implemention of current Tensor. - * @param None - * @return {std::shared_ptr} + * @brief Get the memory pointer in CPU or GPU with specific data type. + * It's usually used to get the output data pointer. + * This is a deprecated method and may be removed in the future! + * + * @tparam T + * @param place + * @return T* + */ + template + T* mutable_data(const PlaceType& place); + + /** + * @brief Get the const memory pointer directly. + * It's usually used to get the output data pointer. + * + * @tparam T + * @return T* + */ + template + const T* data() const; + + /** + * @brief Get the memory pointer directly. + * It's usually used to get the output data pointer. + * This is a deprecated method and may be removed in the future! + * + * @tparam T + * @return T* + */ + template + T* data(); + + /** + * @brief Return a sub-tensor of the given tensor. + * It is usually used to extract a sub-tensor (which supports + * modifying the data of the original tensor) to perform further + * operations. + * + * @param begin_idx The index of the start row (inclusive) to slice. + * The index number begins from 0. + * @param end_idx The index of the end row (exclusive) to slice. + * The index number begins from begin_idx + 1. + * @return Tensor + */ + Tensor slice(const int64_t begin_idx, const int64_t end_idx) const; + + /** + * @brief Return the implemention of current Tensor. + * + * @return std::shared_ptr + */ + std::shared_ptr impl() const; + + /** + * @brief Set the implemention of current Tensor. + * + * @param impl + */ + void set_impl(const std::shared_ptr& impl); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + /** + * @brief Get the stream where the tensor is currently located + * This is a deprecated method and may be removed in the future! + * + * @return gpuStream_t + */ + gpuStream_t stream() const; +#endif + + /* Part 5: Data Transform methods */ + + /** + * @brief Copy the current Tensor data to the specified device + * and return the new Tensor. + * It's usually used to set the input tensor data. + * This is a deprecated method and may be removed in the future! + * + * @tparam T + * @param target_place, the target place of which the tensor will copy to. + * @return Tensor */ - std::shared_ptr impl() const { return impl_; } + template + Tensor copy_to(const PlaceType& target_place) const; /** - * @description: Set the implemention of current Tensor. - * @param {std::shared_ptr} - * @return None + * @brief Transfer the current Tensor to the specified device and return. + * + * @param place, the target place of which the tensor will copy to. + * @return Tensor + */ + Tensor to(const PlaceType& place) const; + + /** + * @brief Cast datatype from one to another + * + * @param target_type + * @return Tensor */ - void set_impl(const std::shared_ptr& impl) { impl_ = impl; } + Tensor cast(const DataType& target_type) const; - // TODO(chenweihang): Whether API Tensor need `data` and `mutable_data`? + /* Part 6: Status utils methods */ - // TODO(chenweihang): slice and split methods use kernels? + /** + * @brief Determine whether it is a meaningful Tensor + * + * @return true + * @return false + */ + bool defined() const; + + /** + * @brief Determine whether Tensor is initialized. + * + * @return true + * @return false + */ + bool initialized() const; + + /** + * @brief Determine whether Tensor is initialized. + * This is a deprecated method and may be removed in the future! + * + * @return true + * @return false + */ + bool is_initialized() const; - /* Part 5: Status utils methods */ /** - * @description: Determine whether it is a meaningful Tensor - * @param None - * @return {bool} + * @brief Reset the Tensor implementation */ - bool defined() const { return impl_ != nullptr; } + void reset(); + + /* Part 7: Operator overloading */ /** - * @description: Determine whether Tensor is initialized - * @param None - * @return {bool} + * @brief Assignment operator + * + * @param x + * @return Tensor& */ - bool initialized() const { return impl_->initialized(); } + Tensor& operator=(const Tensor& x) &; /** - * @description: Reset the Tensor implementation - * @param None - * @return {void} + * @brief Move assignment operator + * + * @param x + * @return Tensor& */ - void reset() { impl_.reset(); } + Tensor& operator=(Tensor&& x) &; - /* Part 6: Operator overloading */ - Tensor& operator=(const Tensor& x) & { - impl_ = x.impl_; - autograd_meta_ = x.autograd_meta_; - return *this; - } - Tensor& operator=(Tensor&& x) & { - impl_ = std::move(x.impl_); - autograd_meta_ = std::move(x.autograd_meta_); - return *this; - } + /* Part 8: Autograd methods */ - /* Part 7: Autograd methods */ - AbstractAutogradMeta* get_autograd_meta() const { - return autograd_meta_.get(); - } + /** + * @brief Get the autograd meta object + * + * @return AbstractAutogradMeta* + */ + AbstractAutogradMeta* get_autograd_meta() const; - void set_autograd_meta(std::shared_ptr autograd_meta) { - autograd_meta_ = std::move(autograd_meta); - } + /** + * @brief Set the autograd meta object + * + * @param autograd_meta + */ + void set_autograd_meta(std::shared_ptr autograd_meta); - /* Part 8: Auto generated Tensor methods */ - // ... + /* Part 9: Auto generated Tensor methods */ + + private: + friend class CompatiblePTenTensorUtils; private: /** @@ -249,10 +450,15 @@ class Tensor final { /** * Tensor name: used for adapt original execution mechanism and debug analysis - * in the development of new dygraph. + * in the development of new dygraph. It may be removed in the future. */ std::string name_; }; } // namespace experimental } // namespace paddle + +namespace paddle { +// In order to be compatible with the original custom operator Tensor interface +using Tensor = paddle::experimental::Tensor; +} // namespace paddle diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index a4726b3d426f63bcbb2d2d8e07aa1f286a0bc0bb..2f605e491daa4f68c592ef291f53bc578361d18b 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -1,6 +1,20 @@ add_subdirectory(utils) -cc_library(math_api SRCS math.cc DEPS pten) -cc_library(linalg_api SRCS linalg.cc DEPS pten) -cc_library(creation_api SRCS creation.cc DEPS pten) -cc_library(manipulation_api SRCS manipulation.cc DEPS pten) +cc_library(ext_compat_utils SRCS ext_compat_utils.cc DEPS place) + +if (WITH_GPU) + nv_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils enforce) +elseif (WITH_ROCM) + hip_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils enforce) +else() + cc_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils enforce) +endif() + +cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor device_context kernel_factory) + +cc_library(op_meta_info SRCS op_meta_info.cc DEPS pten_tensor) + +cc_library(math_api SRCS math.cc DEPS pten_tensor pten kernel_dispatch) +cc_library(linalg_api SRCS linalg.cc DEPS pten_tensor pten kernel_dispatch) +cc_library(creation_api SRCS creation.cc DEPS pten_tensor pten kernel_dispatch) +cc_library(manipulation_api SRCS manipulation.cc DEPS pten_tensor pten kernel_dispatch) diff --git a/paddle/pten/api/include/backend_set.h b/paddle/pten/api/lib/backend_set.h similarity index 90% rename from paddle/pten/api/include/backend_set.h rename to paddle/pten/api/lib/backend_set.h index e01c195e955301326007b71339a785668e56837a..923048cdd2c84dea22d9adf41eb5ddc8f868d63d 100644 --- a/paddle/pten/api/include/backend_set.h +++ b/paddle/pten/api/lib/backend_set.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/api/ext/exception.h" #include "paddle/pten/common/backend.h" namespace paddle { namespace experimental { @@ -38,10 +38,7 @@ class BackendSet final { uint64_t bitset() const { return bitset_; } bool inline Has(Backend b) const { - PADDLE_ENFORCE_NE(b, - Backend::UNDEFINED, - platform::errors::InvalidArgument( - "Backend argument can't be UNDEFINED.")); + PD_CHECK(b != Backend::UNDEFINED, "Backend argument can't be UNDEFINED."); return static_cast(bitset_ & BackendSet(b).bitset()); } bool IsEmpty() const { return bitset_ == 0; } diff --git a/paddle/pten/api/lib/creation.cc b/paddle/pten/api/lib/creation.cc index 450b4d3de7eb3c7920be573291c6404efd2fb562..f39baa0207c73b1d5e9c429748826eba20129224 100644 --- a/paddle/pten/api/lib/creation.cc +++ b/paddle/pten/api/lib/creation.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/pten/api/include/registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/include/core.h" @@ -26,11 +27,11 @@ limitations under the License. */ namespace paddle { namespace experimental { -Tensor full(const std::vector& shape, - const Scalar& value, - DataType dtype, - Backend backend, - DataLayout layout) { +PD_DLL_DECL Tensor full(const std::vector& shape, + const Scalar& value, + DataType dtype, + Backend backend, + DataLayout layout) { // 1. Get kernel signature and kernel pten::KernelKey kernel_key{backend, layout, dtype}; auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( @@ -61,11 +62,11 @@ Tensor full(const std::vector& shape, return out; } -Tensor full_like(const Tensor& x, - const Scalar& value, - DataType dtype, - Backend backend, - DataLayout layout) { +PD_DLL_DECL Tensor full_like(const Tensor& x, + const Scalar& value, + DataType dtype, + Backend backend, + DataLayout layout) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -106,19 +107,21 @@ Tensor full_like(const Tensor& x, return out; } -Tensor ones_like(const Tensor& x, - DataType dtype, - Backend backend, - DataLayout layout) { +PD_DLL_DECL Tensor ones_like(const Tensor& x, + DataType dtype, + Backend backend, + DataLayout layout) { return full_like(x, 1, dtype, backend, layout); } -Tensor zeros_like(const Tensor& x, - DataType dtype, - Backend backend, - DataLayout layout) { +PD_DLL_DECL Tensor zeros_like(const Tensor& x, + DataType dtype, + Backend backend, + DataLayout layout) { return full_like(x, 0, dtype, backend, layout); } } // namespace experimental } // namespace paddle + +PT_REGISTER_API(Creation); diff --git a/paddle/pten/api/lib/ext_compat_utils.cc b/paddle/pten/api/lib/ext_compat_utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..3aef1257072910347ea517249a84c50c5ca4aec1 --- /dev/null +++ b/paddle/pten/api/lib/ext_compat_utils.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/lib/ext_compat_utils.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace experimental { + +platform::Place ConvertExtPlaceToInnerPlace(const PlaceType& p) { + if (p == PlaceType::kCPU) { + return platform::Place(platform::CPUPlace()); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (p == PlaceType::kGPU) { + return platform::Place(platform::CUDAPlace(platform::GetCurrentDeviceId())); +#endif + } else { + PADDLE_THROW( + platform::errors::Unimplemented("Unsupported place type code(%d) when " + "casting enum place to paddle place.", + static_cast(p))); + } + return platform::Place(); +} + +PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p) { + if (platform::is_cpu_place(p)) { + return PlaceType::kCPU; + } else if (platform::is_gpu_place(p)) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + return PlaceType::kGPU; +#endif + } else { + PADDLE_THROW( + platform::errors::Unimplemented("Unsupported place type `%s` when " + "casting paddle place to enum place.", + p)); + } + return PlaceType::kUNK; +} + +} // namespace experimental +} // namespace paddle diff --git a/paddle/fluid/extension/include/ext_all.h b/paddle/pten/api/lib/ext_compat_utils.h similarity index 55% rename from paddle/fluid/extension/include/ext_all.h rename to paddle/pten/api/lib/ext_compat_utils.h index 6987b33012f64d6e4d473ffc7ae666c432c65967..3a4ea20ff88e09784126d1831ffde91628f67e58 100644 --- a/paddle/fluid/extension/include/ext_all.h +++ b/paddle/pten/api/lib/ext_compat_utils.h @@ -14,19 +14,15 @@ limitations under the License. */ #pragma once -#if !defined(_MSC_VER) && __cplusplus < 201402L -#error C++14 or later compatible compiler is required to use Paddle. -#endif - -#ifdef _WIN32 -#ifndef NOMINMAX -#define NOMINMAX // msvc max/min macro conflict with std::min/max -#endif -#endif - -#include "ext_dispatch.h" // NOLINT -#include "ext_dtype.h" // NOLINT -#include "ext_exception.h" // NOLINT -#include "ext_op_meta_info.h" // NOLINT -#include "ext_place.h" // NOLINT -#include "ext_tensor.h" // NOLINT +#include "paddle/fluid/platform/place.h" +#include "paddle/pten/api/ext/place.h" + +namespace paddle { +namespace experimental { + +platform::Place ConvertExtPlaceToInnerPlace(const PlaceType& p); + +PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p); + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/kernel_dispatch.cc b/paddle/pten/api/lib/kernel_dispatch.cc new file mode 100644 index 0000000000000000000000000000000000000000..0205a0d53c319fdc7c4a809fd0aecf9f3a11210e --- /dev/null +++ b/paddle/pten/api/lib/kernel_dispatch.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/lib/kernel_dispatch.h" + +#include "paddle/pten/core/convert_utils.h" + +namespace paddle { +namespace experimental { +namespace detail { + +BackendSet GetTensorBackendSet(const Tensor& t) { + BackendSet backend_set(pten::TransToPtenBackend(t.inner_place())); + switch (t.layout()) { + case DataLayout::MKLDNN: + backend_set = backend_set | BackendSet(Backend::MKLDNN); + break; + default: + // do nothing + break; + } + return backend_set; +} + +std::size_t CountLeadingZeros(uint64_t val) { + if (val == 0) { + return 64; + } + std::size_t zero_bits = 0; + for (std::size_t shift = 64 >> 1; shift; shift >>= 1) { + uint64_t tmp = val >> shift; + if (tmp) { + val = tmp; + } else { + zero_bits |= shift; + } + } + return zero_bits; +} + +} // namespace detail + +paddle::platform::DeviceContext* GetDeviceContextByBackend( + pten::Backend backend) { + auto& pool = paddle::platform::DeviceContextPool::Instance(); + return pool.Get(pten::TransToFluidPlace(backend)); +} + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/kernel_dispatch.h b/paddle/pten/api/lib/kernel_dispatch.h index 567c21eeee9e86757db8842d49afd6ad5cfa02f6..46aa9ce992939ac98b3718f693f5c737893768ce 100644 --- a/paddle/pten/api/lib/kernel_dispatch.h +++ b/paddle/pten/api/lib/kernel_dispatch.h @@ -18,13 +18,12 @@ limitations under the License. */ #include #include -#include "paddle/pten/api/include/backend_set.h" #include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/api/lib/backend_set.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/common/layout.h" // TODO(chenweihang): split KernelName, Key, Kernel, Factory into diff files -#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/kernel_factory.h" // See Note [ Why still include the fluid headers? ] @@ -40,36 +39,13 @@ using CUDAContext = paddle::platform::CUDADeviceContext; #endif namespace detail { -BackendSet GetTensorBackendSet(const Tensor& t) { - BackendSet backend_set(pten::TransToPtenBackend(t.place())); - switch (t.layout()) { - case DataLayout::MKLDNN: - backend_set = backend_set | BackendSet(Backend::MKLDNN); - break; - default: - // do nothing - break; - } - return backend_set; -} - -std::size_t CountLeadingZeros(uint64_t val) { - if (val == 0) { - return 64; - } - std::size_t zero_bits = 0; - for (std::size_t shift = 64 >> 1; shift; shift >>= 1) { - uint64_t tmp = val >> shift; - if (tmp) { - val = tmp; - } else { - zero_bits |= shift; - } - } - return zero_bits; -} +BackendSet GetTensorBackendSet(const Tensor& t); +std::size_t CountLeadingZeros(uint64_t val); } // namespace detail +paddle::platform::DeviceContext* GetDeviceContextByBackend( + pten::Backend backend); + // TODO(chenweihang): support DataLayout and DataType selected struct KernelKeySet { BackendSet backend_set{Backend::UNDEFINED}; @@ -144,11 +120,5 @@ KernelKeySet ParseKernelKeyByInputArgs(const Args&... args) { return detail::KernelKeyParser().apply(args...).key_set; } -paddle::platform::DeviceContext* GetDeviceContextByBackend( - pten::Backend backend) { - auto& pool = paddle::platform::DeviceContextPool::Instance(); - return pool.Get(pten::TransToFluidPlace(backend)); -} - } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/linalg.cc b/paddle/pten/api/lib/linalg.cc index 0ede7b8a68b416c0101579321e4fd507a69dc897..66c7b570b38374360321e46ebd008d6bc9b79047 100644 --- a/paddle/pten/api/lib/linalg.cc +++ b/paddle/pten/api/lib/linalg.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/pten/api/include/registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/convert_utils.h" @@ -29,7 +30,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -Tensor dot(const Tensor& x, const Tensor& y) { +PD_DLL_DECL Tensor dot(const Tensor& x, const Tensor& y) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -64,10 +65,10 @@ Tensor dot(const Tensor& x, const Tensor& y) { return out; } -Tensor matmul(const Tensor& x, - const Tensor& y, - bool transpose_x, - bool transpose_y) { +PD_DLL_DECL Tensor matmul(const Tensor& x, + const Tensor& y, + bool transpose_x, + bool transpose_y) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x, y); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -108,3 +109,5 @@ Tensor matmul(const Tensor& x, } // namespace experimental } // namespace paddle + +PT_REGISTER_API(Linalg); diff --git a/paddle/pten/api/lib/manipulation.cc b/paddle/pten/api/lib/manipulation.cc index 43752120e37100af3d737e629623bd76c9b23d87..4b80b2010ccc07f5f0c333638aa1702cb683d064 100644 --- a/paddle/pten/api/lib/manipulation.cc +++ b/paddle/pten/api/lib/manipulation.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/pten/api/include/registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/include/core.h" @@ -25,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -Tensor flatten(const Tensor& x, int start_axis, int stop_axis) { +PD_DLL_DECL Tensor flatten(const Tensor& x, int start_axis, int stop_axis) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -60,3 +61,5 @@ Tensor flatten(const Tensor& x, int start_axis, int stop_axis) { } } // namespace experimental } // namespace paddle + +PT_REGISTER_API(Manipulation); diff --git a/paddle/pten/api/lib/math.cc b/paddle/pten/api/lib/math.cc index 3c6536af6afa9aa851c234330f36cb419a846700..be16e83219bc2c5a2a78747c7c6509df65fadee2 100644 --- a/paddle/pten/api/lib/math.cc +++ b/paddle/pten/api/lib/math.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/pten/api/include/registry.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/include/core.h" @@ -27,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace experimental { -Tensor mean(const Tensor& x) { +PD_DLL_DECL Tensor mean(const Tensor& x) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -60,7 +61,7 @@ Tensor mean(const Tensor& x) { return out; } -Tensor add(const Tensor& x, const Tensor& y) { +PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y) { // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); @@ -97,3 +98,5 @@ Tensor add(const Tensor& x, const Tensor& y) { } // namespace experimental } // namespace paddle + +PT_REGISTER_API(Math); diff --git a/paddle/fluid/extension/src/ext_op_meta_info.cc b/paddle/pten/api/lib/op_meta_info.cc similarity index 97% rename from paddle/fluid/extension/src/ext_op_meta_info.cc rename to paddle/pten/api/lib/op_meta_info.cc index 40cad7a155226bfa0d97501ae93540449c86ad4a..586fa0cc05526fc15beff00fca214dbf84682e14 100644 --- a/paddle/fluid/extension/src/ext_op_meta_info.cc +++ b/paddle/pten/api/lib/op_meta_info.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/extension/include/ext_op_meta_info.h" +#include "paddle/pten/api/ext/op_meta_info.h" #include #include @@ -72,7 +72,8 @@ OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name, size_t index) { auto& info_vector = OpMetaInfoMap::Instance()[name_]; // index check PADDLE_ENFORCE_EQ( - info_vector.size(), index_, + info_vector.size(), + index_, platform::errors::PreconditionNotMet( "The operator %s's meta info register failed. " "Please make sure you call marcos as order `PD_BUILD_OP`, " @@ -122,7 +123,8 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) { PADDLE_ENFORCE_EQ( - index_, 0UL, + index_, + 0UL, platform::errors::Unimplemented( "Currently, the InferShapeFn setting of Grad Op is not supported, " "And backward Tensor `X@GRAD` will use the shape of forward Tensor " @@ -133,7 +135,8 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) { PADDLE_ENFORCE_EQ( - index_, 0UL, + index_, + 0UL, platform::errors::Unimplemented( "Currently, the InferDtypeFn setting of Grad Op is not supported, " "And backward Tensor `X@GRAD` will use the dtype of forward Tensor " diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc new file mode 100644 index 0000000000000000000000000000000000000000..5d10c7209e2b866a8f3d4c33342b3b6e5bb03694 --- /dev/null +++ b/paddle/pten/api/lib/tensor.cc @@ -0,0 +1,343 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/include/tensor.h" + +#include +#include +#include + +#include "glog/logging.h" +#include "paddle/pten/api/lib/ext_compat_utils.h" +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_base.h" +#include "paddle/pten/core/tensor_meta.h" + +/** + * [ Why still include the fluid headers? ] + * + * We hope to organize the basic implementation of Tensor and the logic related + * to Tensor computation into an independent library, which we call + * [Tensor Operation Library, pten], so we extract or rewrite the original + * Kernels. + * + * In the future, the training library, inference library and custom operators + * will link to this Tensor Operation library. + * + * However, if we directly split the link relation, we need to make too many + * changes, which will affect the stability of the framework, so here we still + * rely on the implementation of the framework, which is a intermediate state. + * + * In the future, the necessary components will be moved to the this library, + * or the corresponding components will be re-implemented. + */ +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/platform/complex.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/stream/cuda_stream.h" + +namespace paddle { +namespace experimental { + +namespace detail { + +inline bool IsDenseTensor( + const std::shared_ptr &tensor_impl) { + return tensor_impl->type_info().name() == "DenseTensor"; +} + +} // namespace detail + +/////// Tensor Methods //////// + +/* Part 1: Construction and destruction methods */ + +Tensor::Tensor(std::shared_ptr tensor_impl) + : impl_(std::move(tensor_impl)) { + PADDLE_ENFORCE_NOT_NULL(impl_, + platform::errors::InvalidArgument( + "TensorImpl with nullptr is not supported")); +} + +Tensor::Tensor(const PlaceType &place) + : impl_(std::move(std::make_shared( + std::move(pten::make_intrusive( + ConvertExtPlaceToInnerPlace(place))), + std::move(pten::DenseTensorMeta(pten::DataType::UNDEFINED, + framework::make_ddim({}), + pten::DataLayout::NCHW))))) {} + +Tensor::Tensor(const PlaceType &place, const std::vector &shape) + : impl_(std::move(std::make_shared( + std::move(pten::make_intrusive( + ConvertExtPlaceToInnerPlace(place))), + std::move(pten::DenseTensorMeta(pten::DataType::UNDEFINED, + framework::make_ddim(shape), + pten::DataLayout::NCHW))))) {} + +/* Part 2: Dimension, DataType and DataLayout methods */ + +int64_t Tensor::numel() const { return impl_->numel(); } + +int64_t Tensor::size() const { return impl_->numel(); } + +paddle::framework::DDim Tensor::dims() const { return impl_->dims(); } + +std::vector Tensor::shape() const { + return paddle::framework::vectorize(impl_->dims()); +} + +void Tensor::reshape(const std::vector &shape) { + PADDLE_THROW(platform::errors::Unimplemented( + "The reshape operation is not supported now, " + "and it will be implemented by calling the reshape kernel later.")); +} + +DataType Tensor::dtype() const { return impl_->data_type(); } + +DataType Tensor::type() const { return impl_->data_type(); } + +DataLayout Tensor::layout() const { return impl_->layout(); } + +/* Part 3: Device and Backend methods */ + +PlaceType Tensor::place() const { + return ConvertInnerPlaceToExtPlace(impl_->place()); +} + +paddle::platform::Place Tensor::inner_place() const { return impl_->place(); } + +bool Tensor::is_cpu() const { + return paddle::platform::is_cpu_place(impl_->place()); +} + +bool Tensor::is_cuda() const { + return paddle::platform::is_gpu_place(impl_->place()); +} + +/* Part 4: Data Access methods */ + +template +T *Tensor::mutable_data() { + if (detail::IsDenseTensor(impl_)) { + return std::dynamic_pointer_cast(impl_) + ->mutable_data(); + } + return nullptr; +} + +template PD_DLL_DECL float *Tensor::mutable_data(); +template PD_DLL_DECL double *Tensor::mutable_data(); +template PD_DLL_DECL int64_t *Tensor::mutable_data(); +template PD_DLL_DECL int32_t *Tensor::mutable_data(); +template PD_DLL_DECL uint8_t *Tensor::mutable_data(); +template PD_DLL_DECL int8_t *Tensor::mutable_data(); +template PD_DLL_DECL int16_t *Tensor::mutable_data(); +template PD_DLL_DECL bool *Tensor::mutable_data(); +template PD_DLL_DECL paddle::platform::complex + *Tensor::mutable_data>(); +template PD_DLL_DECL paddle::platform::complex + *Tensor::mutable_data>(); +template PD_DLL_DECL paddle::platform::float16 * +Tensor::mutable_data(); + +template +T *Tensor::mutable_data(const PlaceType &place) { + auto inner_place = ConvertExtPlaceToInnerPlace(place); + PADDLE_ENFORCE_EQ( + platform::is_same_place(inner_place, impl_->place()), + true, + platform::errors::Unimplemented("Modification of tensor place through " + "mutable_data is not supported now")); + return mutable_data(); +} + +template PD_DLL_DECL float *Tensor::mutable_data(const PlaceType &place); +template PD_DLL_DECL double *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int64_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int32_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL uint8_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int8_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int16_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL bool *Tensor::mutable_data(const PlaceType &place); +template PD_DLL_DECL paddle::platform::complex * +Tensor::mutable_data>(const PlaceType &place); +template PD_DLL_DECL paddle::platform::complex * +Tensor::mutable_data>(const PlaceType &place); +template PD_DLL_DECL paddle::platform::float16 * +Tensor::mutable_data(const PlaceType &place); + +template +const T *Tensor::data() const { + if (detail::IsDenseTensor(impl_)) { + return std::dynamic_pointer_cast(impl_)->data(); + } + return nullptr; +} + +template PD_DLL_DECL const float *Tensor::data() const; +template PD_DLL_DECL const double *Tensor::data() const; +template PD_DLL_DECL const int64_t *Tensor::data() const; +template PD_DLL_DECL const int32_t *Tensor::data() const; +template PD_DLL_DECL const uint8_t *Tensor::data() const; +template PD_DLL_DECL const int8_t *Tensor::data() const; +template PD_DLL_DECL const int16_t *Tensor::data() const; +template PD_DLL_DECL const bool *Tensor::data() const; +template PD_DLL_DECL const paddle::platform::complex + *Tensor::data>() const; +template PD_DLL_DECL const paddle::platform::complex + *Tensor::data>() const; +template PD_DLL_DECL const paddle::platform::float16 * +Tensor::data() const; + +template +T *Tensor::data() { + PADDLE_THROW(platform::errors::Unimplemented( + "It is not currently supported to directly obtain the modifiable data " + "address through the tensor::data() method, please use the " + "tensor::mutable_data() method.")); + return nullptr; +} + +template PD_DLL_DECL float *Tensor::data(); +template PD_DLL_DECL double *Tensor::data(); +template PD_DLL_DECL int64_t *Tensor::data(); +template PD_DLL_DECL int32_t *Tensor::data(); +template PD_DLL_DECL uint8_t *Tensor::data(); +template PD_DLL_DECL int8_t *Tensor::data(); +template PD_DLL_DECL int16_t *Tensor::data(); +template PD_DLL_DECL bool *Tensor::data(); +template PD_DLL_DECL paddle::platform::complex + *Tensor::data>(); +template PD_DLL_DECL paddle::platform::complex + *Tensor::data>(); +template PD_DLL_DECL paddle::platform::float16 * +Tensor::data(); + +Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const { + PADDLE_THROW(platform::errors::Unimplemented( + "The slice operation is not supported now, " + "and it will be implemented by calling the slice kernel later.")); + return Tensor(); +} + +std::shared_ptr Tensor::impl() const { return impl_; } + +void Tensor::set_impl(const std::shared_ptr &impl) { + impl_ = impl; +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +gpuStream_t Tensor::stream() const { + return platform::stream::get_current_stream(-1)->raw_stream(); +} +#endif + +/* Part 5: Data Transform methods */ + +template +Tensor Tensor::copy_to(const PlaceType &target_place) const { + PADDLE_THROW(platform::errors::Unimplemented( + "The copy_to operation is not supported now, " + "and it will be implemented by calling the copy kernel later.")); + return Tensor(); +} + +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor Tensor::copy_to>( + const PlaceType &target_place) const; +template PD_DLL_DECL Tensor Tensor::copy_to>( + const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; + +Tensor Tensor::to(const PlaceType &target_place) const { + PADDLE_THROW(platform::errors::Unimplemented( + "The to operation is not supported now, " + "and it will be implemented by calling the copy kernel later.")); + return Tensor(); +} + +Tensor Tensor::cast(const DataType &target_type) const { + PADDLE_THROW(platform::errors::Unimplemented( + "The cast operation is not supported now, " + "and it will be implemented by calling the cast kernel later.")); + return Tensor(); +} + +/* Part 6: Status utils methods */ + +bool Tensor::defined() const { return impl_ != nullptr; } + +bool Tensor::initialized() const { + return impl_ != nullptr && impl_->initialized(); +} + +bool Tensor::is_initialized() const { + return impl_ != nullptr && impl_->initialized(); +} + +void Tensor::reset() { impl_.reset(); } + +/* Part 7: Operator overloading */ + +Tensor &Tensor::operator=(const Tensor &x) & { + impl_ = x.impl_; + autograd_meta_ = x.autograd_meta_; + return *this; +} + +Tensor &Tensor::operator=(Tensor &&x) & { + impl_ = std::move(x.impl_); + autograd_meta_ = std::move(x.autograd_meta_); + return *this; +} + +AbstractAutogradMeta *Tensor::get_autograd_meta() const { + return autograd_meta_.get(); +} + +void Tensor::set_autograd_meta( + std::shared_ptr autograd_meta) { + autograd_meta_ = std::move(autograd_meta); +} + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/utils/storage.h b/paddle/pten/api/lib/utils/storage.h index 2ba1e2e90eef214b8701bb8b32dff34481b9d52e..b05ae4cb08494adb930efe5c4ddb5dfab8f3d5f4 100644 --- a/paddle/pten/api/lib/utils/storage.h +++ b/paddle/pten/api/lib/utils/storage.h @@ -63,11 +63,24 @@ class SharedStorage : public pten::Storage { size_ = allocation->size(); } + // In order to be compatible with the original Tensor design and execution + // system, we need to allow the uninitialized SharedStorage to exist, + // and it can be removed after the compatibility phase is over in the future + explicit SharedStorage(const paddle::platform::Place& place) { + data_ = pten::Allocation(nullptr, place); + } + static const char* name() { return "SharedStorage"; } + // In order to be compatible with the original Tensor design and execution + // system, we need to allow the SharedStorage realloc, + // and it can be removed after the compatibility phase is over in the future void Realloc(size_t n) override { - PADDLE_THROW(paddle::platform::errors::Unavailable( - "The external shared storage cannot be reallocated.")); + if (data() != nullptr) { + PADDLE_THROW(paddle::platform::errors::Unavailable( + "The external shared storage cannot be reallocated.")); + } + ResetAllocation(paddle::memory::AllocShared(place(), n), 0); } void Clear() override { diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 52554bf7af0cadeff416546ec7c21cfe2988a189..2c362a11b11d9c112ab477bbee40c5edc4723725 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -113,19 +113,30 @@ std::unique_ptr MakePtenTensorBaseFromVar( } void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { - CHECK(src); - CHECK(dst); + PADDLE_ENFORCE_NOT_NULL( + src, + platform::errors::InvalidArgument( + "The source DenseTensor is nullptr when move storage.")); + PADDLE_ENFORCE_NOT_NULL( + dst, + platform::errors::InvalidArgument( + "The destination Tensor is nullptr when move storage.")); dst->Resize(src->dims()); auto storage = src->release(); - CHECK(storage->OwnsMemory()); std::shared_ptr holder( new TensorStorage(std::move(storage))); dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type())); } void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { - CHECK(src); - CHECK(dst); + PADDLE_ENFORCE_NOT_NULL( + src, + platform::errors::InvalidArgument( + "The source DenseTensor is nullptr when move storage.")); + PADDLE_ENFORCE_NOT_NULL( + dst, + platform::errors::InvalidArgument( + "The destination LoDTensor is nullptr when move storage.")); SetLoD(dst->mutable_lod(), src->lod()); MovesStorage(src, static_cast(dst)); } diff --git a/paddle/pten/common/backend.h b/paddle/pten/common/backend.h index e0bf746050a672ace718e33f56df1391218954f7..94080701e28166c20952ba09d8eccb1394508923 100644 --- a/paddle/pten/common/backend.h +++ b/paddle/pten/common/backend.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/api/ext/exception.h" namespace paddle { namespace experimental { @@ -80,8 +80,7 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) { os << "CUDNN"; break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid enum backend type `%d`.", static_cast(backend))); + PD_THROW("Invalid enum backend type `", static_cast(backend), "`."); } return os; } diff --git a/paddle/pten/common/data_type.h b/paddle/pten/common/data_type.h index 27ca28b2734859612fd9b2a4d1356097e39e5342..1ddee0746d4d16e8d31c52f2442b31955709f3a3 100644 --- a/paddle/pten/common/data_type.h +++ b/paddle/pten/common/data_type.h @@ -14,11 +14,11 @@ limitations under the License. */ #pragma once -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" +#include "bfloat16.h" // NOLINT +#include "complex.h" // NOLINT +#include "float16.h" // NOLINT + +#include "paddle/pten/api/ext/exception.h" namespace paddle { namespace experimental { @@ -72,9 +72,9 @@ inline size_t SizeOf(DataType data_type) { return 16; case DataType::UNDEFINED: case DataType::NUM_DATA_TYPES: - PADDLE_THROW(platform::errors::Unimplemented( - "Data type %d is not supported by tensor.", - static_cast(data_type))); + PD_THROW("Data type `", + static_cast(data_type), + "` is not supported by tensor."); } return 0; } @@ -173,8 +173,7 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) { os << "complex128"; break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid enum data type `%d`.", static_cast(dtype))); + PD_THROW("Invalid enum data type `", static_cast(dtype), "`."); } return os; } @@ -184,4 +183,13 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) { namespace pten { using DataType = paddle::experimental::DataType; -} +} // namespace pten + +namespace paddle { +// In order to be compatible with the original custom operator Tensor interface +using DataType = paddle::experimental::DataType; +using bfloat16 = paddle::experimental::bfloat16; +using complex64 = paddle::experimental::complex64; +using complex128 = paddle::experimental::complex128; +using float16 = paddle::experimental::float16; +} // namespace paddle diff --git a/paddle/pten/common/layout.h b/paddle/pten/common/layout.h index 0da10dff4335b9618fcec6dd19a324b15a519705..b93e2623ee7d0f8cbcfd3ecb410799d2a31cd2f1 100644 --- a/paddle/pten/common/layout.h +++ b/paddle/pten/common/layout.h @@ -14,8 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/platform/enforce.h" - +#include "paddle/pten/api/ext/exception.h" namespace paddle { namespace experimental { @@ -46,8 +45,8 @@ inline std::ostream& operator<<(std::ostream& os, DataLayout layout) { os << "MKLDNN"; break; default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid enum data layout type `%d`.", static_cast(layout))); + PD_THROW( + "Invalid enum data layout type `", static_cast(layout), "`."); } return os; } diff --git a/paddle/pten/common/scalar.h b/paddle/pten/common/scalar.h index ef648ba70f33684c9d81295c092a0aeec82a660c..bc2488024f13bf8baabc76fececc1c15a0da8cab 100644 --- a/paddle/pten/common/scalar.h +++ b/paddle/pten/common/scalar.h @@ -15,8 +15,9 @@ limitations under the License. */ #pragma once #include +#include -#include "paddle/fluid/platform/enforce.h" +#include "paddle/pten/api/ext/exception.h" namespace paddle { namespace experimental { @@ -60,8 +61,7 @@ class Scalar { case Tag::HAS_B: return static_cast(data_.b); default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Invalid enum scalar type tag `%d`.", static_cast(tag))); + PD_THROW("Invalid enum scalar type tag `", static_cast(tag), "`."); } } @@ -83,4 +83,4 @@ class Scalar { namespace pten { using Scalar = paddle::experimental::Scalar; -} +} // namespace pten diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index a7ccf314674384bc8f3e97a3cb3bfa461b715d50..ae09507193e9cc24f4ec341b58669eda8b651e42 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -1,9 +1,3 @@ -IF(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) -ELSE() - set(MKLDNN_CTX_DEPS) -ENDIF() - if(WITH_GPU) cc_library(convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) elseif(WITH_ROCM) diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 2dfd523579d26e780358c5b8c546785a463855df..fbc2a24312941e27c444509f5985d0c8ac162938 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -14,6 +14,11 @@ limitations under the License. */ #include "paddle/pten/core/dense_tensor.h" +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/complex.h" +#include "paddle/fluid/platform/float16.h" + namespace pten { DenseTensor::DenseTensor(const std::shared_ptr& a, @@ -74,6 +79,13 @@ void* DenseTensor::mutable_data(size_t request_bytes) { template T* DenseTensor::mutable_data() { + // In order to be compatible with the original Tensor design and + // execution system, we have to reset the datatype in mutable_data. + // When the compatibility phase is over in the future, we can delete it + if (meta_.type == DataType::UNDEFINED) { + const_cast(meta_.type) = + paddle::experimental::CppTypeToDataType::Type(); + } PADDLE_ENFORCE( (data_type() == paddle::experimental::CppTypeToDataType::Type()), paddle::platform::errors::InvalidArgument( diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h index 8c2b711015c9da2c5daac2005eae47fdfa2e403d..ca313ef9cc31f840e78977d70492118ad0f97fc9 100644 --- a/paddle/pten/core/dense_tensor.h +++ b/paddle/pten/core/dense_tensor.h @@ -76,11 +76,11 @@ class DenseTensor : public TensorBase, /// \brief Returns the number of elements contained in tensor. /// \return The number of elements contained in tensor. - int64_t numel() const; + int64_t numel() const override; /// \brief Returns the dims of the tensor. /// \return The dims of the tensor. - const DDim& dims() const noexcept { return meta_.dims; } + const DDim& dims() const noexcept override { return meta_.dims; } /// \brief Returns the lod of the tensor. /// \return The lod of the tensor. @@ -93,15 +93,15 @@ class DenseTensor : public TensorBase, /// \brief Returns the data type of the tensor. /// \return The data type of the tensor. - DataType data_type() const noexcept { return meta_.type; } + DataType data_type() const noexcept override { return meta_.type; } /// \brief Returns the data layout of the tensor. /// \return The data layout of the tensor. - DataLayout layout() const noexcept { return meta_.layout; } + DataLayout layout() const noexcept override { return meta_.layout; } /// \brief Returns the data place of the tensor. /// \return The data place of the tensor. - const Place& place() const { return storage_->place(); } + const Place& place() const override { return storage_->place(); } /// \brief Returns the meta information of the tensor. /// \return The meta information of the tensor. @@ -109,11 +109,13 @@ class DenseTensor : public TensorBase, /// \brief Test whether the metadata is valid. /// \return Whether the metadata is valid. - bool valid() const noexcept { return meta_.valid(); } + bool valid() const noexcept override { return meta_.valid(); } /// \brief Test whether the storage is allocated. /// return Whether the storage is allocated. - bool initialized() const { return storage_->data(); } + bool initialized() const override { + return storage_ != nullptr && storage_->data() != nullptr; + } /// \brief Check if storage is shared with other objects. /// \return Whether the storage is shared with other objects. diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index 00a9496d84b7e30f8039f96034e5ead0c84482b3..0e6349ef085fa1d3248ba2f82888e1d0797f47d2 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -1,8 +1,16 @@ -cc_test(test_mean_api SRCS test_mean_api.cc DEPS pten_api pten_api_utils) -cc_test(test_dot_api SRCS test_dot_api.cc DEPS pten_api pten_api_utils) -cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS pten_api pten_api_utils) -cc_test(test_fill_api SRCS test_fill_api.cc DEPS pten_api pten_api_utils) -cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS pten_api pten_api_utils) +if(WITH_ROCM) + hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor glog) +else() + cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor glog) +endif() + +cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest) cc_test(test_framework_storage SRCS test_storage.cc DEPS pten_api_utils) cc_test(test_framework_tensor_utils SRCS test_tensor_utils.cc DEPS pten_api_utils) -cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS pten_api pten_api_utils) + +cc_test(test_mean_api SRCS test_mean_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_dot_api SRCS test_dot_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_fill_api SRCS test_fill_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS pten_tensor pten_api pten_api_utils) diff --git a/paddle/pten/tests/api/test_dot_api.cc b/paddle/pten/tests/api/test_dot_api.cc index 2c7b65f98135be8b464badcfa0114d9c98e0d52d..50363a3bf95244a1b3df2af56008a42aba91701a 100644 --- a/paddle/pten/tests/api/test_dot_api.cc +++ b/paddle/pten/tests/api/test_dot_api.cc @@ -65,8 +65,8 @@ TEST(API, dot) { auto out = paddle::experimental::dot(x, y); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 3); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); diff --git a/paddle/pten/tests/api/test_elementwise_api.cc b/paddle/pten/tests/api/test_elementwise_api.cc index 79f69343149c836deaef70adf540c07a796ca3e4..e9eda8d3b18860c5b3afbe261ceca8384a664011 100644 --- a/paddle/pten/tests/api/test_elementwise_api.cc +++ b/paddle/pten/tests/api/test_elementwise_api.cc @@ -66,7 +66,7 @@ TEST(API, add) { auto out = paddle::experimental::add(x, y); // 3. check result - ASSERT_EQ(out.shape().size(), 2); + ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape()[0], 3); ASSERT_EQ(out.numel(), 30); ASSERT_EQ(out.is_cpu(), true); diff --git a/paddle/pten/tests/api/test_fill_api.cc b/paddle/pten/tests/api/test_fill_api.cc index 897637942547e9435d800fc9561d6a7ce94ea747..0b8d0e2e98d84a4e8b3b17ae225e9ae8258f7490 100644 --- a/paddle/pten/tests/api/test_fill_api.cc +++ b/paddle/pten/tests/api/test_fill_api.cc @@ -51,8 +51,8 @@ TEST(API, full_like) { auto out = paddle::experimental::full_like(x, val, pten::DataType::FLOAT32); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); @@ -84,8 +84,8 @@ TEST(API, zeros_like) { auto out = paddle::experimental::zeros_like(x, pten::DataType::INT32); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::INT32); @@ -117,8 +117,8 @@ TEST(API, ones_like) { auto out = paddle::experimental::ones_like(x, pten::DataType::INT32); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::INT32); @@ -143,7 +143,7 @@ TEST(API, full) { auto out = paddle::experimental::full({3, 2}, val, pten::DataType::FLOAT32); // 3. check result - ASSERT_EQ(out.shape().size(), 2); + ASSERT_EQ(out.shape().size(), 2UL); ASSERT_EQ(out.shape()[0], 3); ASSERT_EQ(out.numel(), 6); ASSERT_EQ(out.is_cpu(), true); diff --git a/paddle/pten/tests/api/test_flatten_api.cc b/paddle/pten/tests/api/test_flatten_api.cc index 3701c358c667e1c96851f5b5d18ac72101411057..5bcf60febff9afb7d9781b969c3bd1f542080b0f 100644 --- a/paddle/pten/tests/api/test_flatten_api.cc +++ b/paddle/pten/tests/api/test_flatten_api.cc @@ -53,9 +53,9 @@ TEST(API, flatten) { // 3. check result std::vector expect_shape = {3, 4, 3}; - ASSERT_EQ(out.shape()[0], expect_shape[0]); - ASSERT_EQ(out.shape()[1], expect_shape[1]); - ASSERT_EQ(out.shape()[2], expect_shape[2]); + ASSERT_EQ(out.dims()[0], expect_shape[0]); + ASSERT_EQ(out.dims()[1], expect_shape[1]); + ASSERT_EQ(out.dims()[2], expect_shape[2]); ASSERT_EQ(out.numel(), 36); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); diff --git a/paddle/pten/tests/api/test_matmul_api.cc b/paddle/pten/tests/api/test_matmul_api.cc index b0b649c6908c98d8eea03deda5e3fefc1db2e890..377173370de7d9b1f68c389d9b47aba763a15c16 100644 --- a/paddle/pten/tests/api/test_matmul_api.cc +++ b/paddle/pten/tests/api/test_matmul_api.cc @@ -63,9 +63,9 @@ TEST(API, matmul_cpu) { auto out = paddle::experimental::matmul(x, y, false, false); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); - ASSERT_EQ(out.shape()[1], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.dims()[1], 3); ASSERT_EQ(out.numel(), 9); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); @@ -135,9 +135,9 @@ TEST(API, matmul_cuda) { auto out = paddle::experimental::matmul(x, y, false, false); // 3. check result - ASSERT_EQ(out.shape().size(), 2); - ASSERT_EQ(out.shape()[0], 3); - ASSERT_EQ(out.shape()[1], 3); + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.dims()[1], 3); ASSERT_EQ(out.numel(), 9); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); ASSERT_EQ(out.layout(), pten::DataLayout::NCHW); @@ -148,7 +148,7 @@ TEST(API, matmul_cuda) { auto ref_out = std::make_shared( alloc_cpu, pten::DenseTensorMeta( - pten::DataType::FLOAT32, out.shape(), pten::DataLayout::NCHW)); + pten::DataType::FLOAT32, out.dims(), pten::DataLayout::NCHW)); pten::Copy(*dev_ctx, *dense_out.get(), false, ref_out.get()); diff --git a/paddle/pten/tests/api/test_mean_api.cc b/paddle/pten/tests/api/test_mean_api.cc index d772e58659507a7d098fdba2b785a62fab019eac..ee38e9d49cb1e7a35f07a0550fc94f8053ba62e4 100644 --- a/paddle/pten/tests/api/test_mean_api.cc +++ b/paddle/pten/tests/api/test_mean_api.cc @@ -54,8 +54,8 @@ TEST(API, mean) { auto out = paddle::experimental::mean(x); // 3. check result - ASSERT_EQ(out.shape().size(), 1); - ASSERT_EQ(out.shape()[0], 1); + ASSERT_EQ(out.dims().size(), 1); + ASSERT_EQ(out.dims()[0], 1); ASSERT_EQ(out.numel(), 1); ASSERT_EQ(out.is_cpu(), true); ASSERT_EQ(out.type(), pten::DataType::FLOAT32); diff --git a/python/paddle/fluid/tests/custom_op/test_check_error.cc b/paddle/pten/tests/api/test_pten_exception.cc similarity index 74% rename from python/paddle/fluid/tests/custom_op/test_check_error.cc rename to paddle/pten/tests/api/test_pten_exception.cc index eda521a89662f5569db39ffa94dcd9a0f539e9bf..a987e9bc315455922f4fd06c0e61e62171294c33 100644 --- a/python/paddle/fluid/tests/custom_op/test_check_error.cc +++ b/paddle/pten/tests/api/test_pten_exception.cc @@ -12,7 +12,7 @@ limitations under the License. */ #include #include #include "gtest/gtest.h" -#include "paddle/fluid/extension/include/ext_exception.h" +#include "paddle/pten/api/ext/exception.h" TEST(PD_THROW, empty) { bool caught_exception = false; @@ -23,12 +23,11 @@ TEST(PD_THROW, empty) { std::string err_msg = e.what(); EXPECT_TRUE(err_msg.find("An error occurred.") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc:20") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc:20") != std::string::npos); #else EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc:20") != + err_msg.find("paddle/pten/tests/api/test_pten_exception.cc:20") != std::string::npos); #endif } @@ -52,13 +51,11 @@ TEST(PD_THROW, non_empty) { EXPECT_TRUE(err_msg.find("PD_THROW returns 0. DataType of 1 is INT. ") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc") != std::string::npos); #else - EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc") != - std::string::npos); + EXPECT_TRUE(err_msg.find("paddle/pten/tests/api/test_pten_exception.cc") != + std::string::npos); #endif } EXPECT_TRUE(caught_exception); @@ -84,13 +81,11 @@ TEST(PD_CHECK, FAILED) { EXPECT_TRUE(err_msg.find("Expected false, but it's not satisfied.") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc") != std::string::npos); #else - EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc") != - std::string::npos); + EXPECT_TRUE(err_msg.find("paddle/pten/tests/api/test_pten_exception.cc") != + std::string::npos); #endif } EXPECT_TRUE(caught_exception); @@ -112,13 +107,11 @@ TEST(PD_CHECK, FAILED) { EXPECT_TRUE(err_msg.find("PD_CHECK returns 0. DataType of 1 is INT. ") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc") != std::string::npos); #else - EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc") != - std::string::npos); + EXPECT_TRUE(err_msg.find("paddle/pten/tests/api/test_pten_exception.cc") != + std::string::npos); #endif } EXPECT_TRUE(caught_exception); @@ -134,13 +127,11 @@ TEST(PD_CHECK, FAILED) { EXPECT_TRUE(err_msg.find("Expected a > b, but it's not satisfied.") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc") != std::string::npos); #else - EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc") != - std::string::npos); + EXPECT_TRUE(err_msg.find("paddle/pten/tests/api/test_pten_exception.cc") != + std::string::npos); #endif } EXPECT_TRUE(caught_exception); @@ -156,13 +147,11 @@ TEST(PD_CHECK, FAILED) { EXPECT_TRUE(err_msg.find("PD_CHECK returns 0, because 123 > 0.345") != std::string::npos); #if _WIN32 - EXPECT_TRUE(err_msg.find("tests\\custom_op\\test_check_error.cc") != + EXPECT_TRUE(err_msg.find("tests\\api\\test_pten_exception.cc") != std::string::npos); #else - EXPECT_TRUE( - err_msg.find( - "python/paddle/fluid/tests/custom_op/test_check_error.cc") != - std::string::npos); + EXPECT_TRUE(err_msg.find("paddle/pten/tests/api/test_pten_exception.cc") != + std::string::npos); #endif } EXPECT_TRUE(caught_exception); diff --git a/paddle/fluid/framework/custom_tensor_test.cc b/paddle/pten/tests/api/test_pten_tensor.cc similarity index 60% rename from paddle/fluid/framework/custom_tensor_test.cc rename to paddle/pten/tests/api/test_pten_tensor.cc index 342be27c896ae9924f967005f38315f70abed6f2..9cadd2664bdf3c6f2ad6db84279752a3add9271d 100644 --- a/paddle/fluid/framework/custom_tensor_test.cc +++ b/paddle/pten/tests/api/test_pten_tensor.cc @@ -14,15 +14,16 @@ #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/extension/include/ext_all.h" -#include "paddle/fluid/framework/custom_tensor_utils.h" -#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/api/lib/ext_compat_utils.h" + +namespace pten { +namespace tests { template paddle::Tensor InitCPUTensorForTest() { std::vector tensor_shape{5, 5}; - auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); - t1.reshape(tensor_shape); + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape); auto* p_data_ptr = t1.mutable_data(paddle::PlaceType::kCPU); for (int64_t i = 0; i < t1.size(); i++) { p_data_ptr[i] = T(5); @@ -56,21 +57,18 @@ void TestCopyTensor() { void TestAPIPlace() { std::vector tensor_shape = {5, 5}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - auto t1 = paddle::Tensor(paddle::PlaceType::kGPU); - t1.reshape(tensor_shape); + auto t1 = paddle::Tensor(paddle::PlaceType::kGPU, tensor_shape); t1.mutable_data(); CHECK((paddle::PlaceType::kGPU == t1.place())); #endif - auto t2 = paddle::Tensor(paddle::PlaceType::kCPU); - t2.reshape(tensor_shape); + auto t2 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape); t2.mutable_data(); CHECK((paddle::PlaceType::kCPU == t2.place())); } void TestAPISizeAndShape() { std::vector tensor_shape = {5, 5}; - auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); - t1.reshape(tensor_shape); + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape); CHECK_EQ(t1.size(), 25); CHECK(t1.shape() == tensor_shape); } @@ -113,8 +111,7 @@ void TestAPISlice() { template paddle::DataType TestDtype() { std::vector tensor_shape = {5, 5}; - auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); - t1.reshape(tensor_shape); + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape); t1.template mutable_data(); return t1.type(); } @@ -122,8 +119,7 @@ paddle::DataType TestDtype() { template void TestCast(paddle::DataType data_type) { std::vector tensor_shape = {5, 5}; - auto t1 = paddle::Tensor(paddle::PlaceType::kCPU); - t1.reshape(tensor_shape); + auto t1 = paddle::Tensor(paddle::PlaceType::kCPU, tensor_shape); t1.template mutable_data(); auto t2 = t1.cast(data_type); CHECK(t2.type() == data_type); @@ -195,80 +191,12 @@ void GroupTestDtype() { CHECK(TestDtype() == paddle::DataType::FLOAT16); } -void GroupTestDtypeConvert() { - // enum -> proto - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::FLOAT64) == - paddle::framework::proto::VarType::FP64); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::FLOAT32) == - paddle::framework::proto::VarType::FP32); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::UINT8) == - paddle::framework::proto::VarType::UINT8); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::INT8) == paddle::framework::proto::VarType::INT8); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::INT32) == - paddle::framework::proto::VarType::INT32); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::INT64) == - paddle::framework::proto::VarType::INT64); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::INT16) == - paddle::framework::proto::VarType::INT16); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::BOOL) == paddle::framework::proto::VarType::BOOL); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::COMPLEX64) == - paddle::framework::proto::VarType::COMPLEX64); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::COMPLEX128) == - paddle::framework::proto::VarType::COMPLEX128); - CHECK(paddle::framework::CustomTensorUtils::ConvertEnumDTypeToInnerDType( - paddle::DataType::FLOAT16) == - paddle::framework::proto::VarType::FP16); - // proto -> enum - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::FP64) == - paddle::DataType::FLOAT64); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::FP32) == - paddle::DataType::FLOAT32); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::INT64) == - paddle::DataType::INT64); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::INT32) == - paddle::DataType::INT32); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::INT8) == paddle::DataType::INT8); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::UINT8) == - paddle::DataType::UINT8); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::INT16) == - paddle::DataType::INT16); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::BOOL) == paddle::DataType::BOOL); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::COMPLEX64) == - paddle::DataType::COMPLEX64); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::COMPLEX128) == - paddle::DataType::COMPLEX128); - CHECK(paddle::framework::CustomTensorUtils::ConvertInnerDTypeToEnumDType( - paddle::framework::proto::VarType::FP16) == - paddle::DataType::FLOAT16); -} - void TestInitilized() { - paddle::Tensor test_tensor(paddle::PlaceType::kCPU); + paddle::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1}); CHECK(test_tensor.is_initialized() == false); - test_tensor.reshape({1, 1}); test_tensor.mutable_data(); CHECK(test_tensor.is_initialized() == true); - float* tensor_data = test_tensor.data(); + float* tensor_data = test_tensor.mutable_data(); for (int i = 0; i < test_tensor.size(); i++) { tensor_data[i] = 0.5; } @@ -277,21 +205,23 @@ void TestInitilized() { } } -TEST(CustomTensor, copyTest) { - VLOG(2) << "TestCopy"; - GroupTestCopy(); +TEST(PtenTensor, All) { + // TODO(chenweihang, before 2021.11.20) support copy, slice and cast methods + // VLOG(2) << "TestCopy"; + // GroupTestCopy(); VLOG(2) << "TestDtype"; GroupTestDtype(); VLOG(2) << "TestShape"; TestAPISizeAndShape(); VLOG(2) << "TestPlace"; TestAPIPlace(); - VLOG(2) << "TestSlice"; - TestAPISlice(); - VLOG(2) << "TestCast"; - GroupTestCast(); - VLOG(2) << "TestDtypeConvert"; - GroupTestDtypeConvert(); + // VLOG(2) << "TestSlice"; + // TestAPISlice(); + // VLOG(2) << "TestCast"; + // GroupTestCast(); VLOG(2) << "TestInitilized"; TestInitilized(); } + +} // namespace tests +} // namespace pten diff --git a/paddle/pten/tests/common/test_backend.cc b/paddle/pten/tests/common/test_backend.cc index 1c17c881ed24f23148525072301c5eba3b671125..0aabf22608f69bd4fd661f005e7796b65f51f0d3 100644 --- a/paddle/pten/tests/common/test_backend.cc +++ b/paddle/pten/tests/common/test_backend.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include +#include "paddle/pten/api/ext/exception.h" #include "paddle/pten/common/backend.h" TEST(Backend, OStream) { @@ -42,7 +43,7 @@ TEST(Backend, OStream) { oss.str(""); try { oss << pten::Backend::NUM_BACKENDS; - } catch (paddle::platform::EnforceNotMet &exception) { + } catch (const std::exception& exception) { std::string ex_msg = exception.what(); EXPECT_TRUE(ex_msg.find("Invalid enum backend type") != std::string::npos); } diff --git a/paddle/pten/tests/common/test_data_layout.cc b/paddle/pten/tests/common/test_data_layout.cc index 7fe1b6c2ffd2be88afa2e6978f6c3e9be10745f1..4de75a7bd71fb7d9f96d6ac94dc6f2abaafefa23 100644 --- a/paddle/pten/tests/common/test_data_layout.cc +++ b/paddle/pten/tests/common/test_data_layout.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/pten/api/ext/exception.h" #include "paddle/pten/common/layout.h" TEST(DataLayout, OStream) { @@ -37,7 +38,7 @@ TEST(DataLayout, OStream) { oss.str(""); try { oss << pten::DataLayout::NUM_DATA_LAYOUTS; - } catch (paddle::platform::EnforceNotMet &exception) { + } catch (const std::exception& exception) { std::string ex_msg = exception.what(); EXPECT_TRUE(ex_msg.find("Invalid enum data layout type") != std::string::npos); diff --git a/paddle/pten/tests/common/test_data_type.cc b/paddle/pten/tests/common/test_data_type.cc index 28d58858bb42c8a5d778fa5ce9fa24a0a171bce4..95957260de545733b2dbcde09ba344c52802b803 100644 --- a/paddle/pten/tests/common/test_data_type.cc +++ b/paddle/pten/tests/common/test_data_type.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/pten/api/ext/exception.h" #include "paddle/pten/common/data_type.h" TEST(DataType, OStream) { @@ -61,7 +62,7 @@ TEST(DataType, OStream) { oss.str(""); try { oss << pten::DataType::NUM_DATA_TYPES; - } catch (paddle::platform::EnforceNotMet &exception) { + } catch (const std::exception& exception) { std::string ex_msg = exception.what(); EXPECT_TRUE(ex_msg.find("Invalid enum data type") != std::string::npos); } diff --git a/paddle/pten/tests/core/CMakeLists.txt b/paddle/pten/tests/core/CMakeLists.txt index b25439cfe25279b3db97afe90643624fe014a62a..9a5cfecc2917b8e40d4b941a7339935400e8484a 100644 --- a/paddle/pten/tests/core/CMakeLists.txt +++ b/paddle/pten/tests/core/CMakeLists.txt @@ -3,4 +3,5 @@ cc_test(test_storage SRCS test_storage.cc DEPS tensor_base) cc_test(test_dense_tensor SRCS test_dense_tensor.cc DEPS dense_tensor) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) +cc_test(test_convert_utils SRCS test_convert_utils.cc DEPS convert_utils) cc_test(test_kernel_factory SRCS test_kernel_factory.cc DEPS kernel_factory) diff --git a/paddle/pten/tests/core/test_convert_utils.cc b/paddle/pten/tests/core/test_convert_utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..51fba7cbe0658bc761562e47e4f5c0988d6c9d56 --- /dev/null +++ b/paddle/pten/tests/core/test_convert_utils.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "gtest/gtest.h" +#include "paddle/pten/core/convert_utils.h" + +namespace pten { +namespace tests { + +TEST(ConvertUtils, DataType) { + // enum -> proto + CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT64) == + paddle::framework::proto::VarType::FP64); + CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT32) == + paddle::framework::proto::VarType::FP32); + CHECK(pten::TransToProtoVarType(paddle::DataType::UINT8) == + paddle::framework::proto::VarType::UINT8); + CHECK(pten::TransToProtoVarType(paddle::DataType::INT8) == + paddle::framework::proto::VarType::INT8); + CHECK(pten::TransToProtoVarType(paddle::DataType::INT32) == + paddle::framework::proto::VarType::INT32); + CHECK(pten::TransToProtoVarType(paddle::DataType::INT64) == + paddle::framework::proto::VarType::INT64); + CHECK(pten::TransToProtoVarType(paddle::DataType::INT16) == + paddle::framework::proto::VarType::INT16); + CHECK(pten::TransToProtoVarType(paddle::DataType::BOOL) == + paddle::framework::proto::VarType::BOOL); + CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX64) == + paddle::framework::proto::VarType::COMPLEX64); + CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX128) == + paddle::framework::proto::VarType::COMPLEX128); + CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT16) == + paddle::framework::proto::VarType::FP16); + // proto -> enum + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP64) == + paddle::DataType::FLOAT64); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP32) == + paddle::DataType::FLOAT32); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT64) == + paddle::DataType::INT64); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT32) == + paddle::DataType::INT32); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT8) == + paddle::DataType::INT8); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::UINT8) == + paddle::DataType::UINT8); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT16) == + paddle::DataType::INT16); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::BOOL) == + paddle::DataType::BOOL); + CHECK( + pten::TransToPtenDataType(paddle::framework::proto::VarType::COMPLEX64) == + paddle::DataType::COMPLEX64); + CHECK(pten::TransToPtenDataType( + paddle::framework::proto::VarType::COMPLEX128) == + paddle::DataType::COMPLEX128); + CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP16) == + paddle::DataType::FLOAT16); +} + +} // namespace tests +} // namespace pten diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt index 2092151b84f454de581601e2de630e9789bfe429..68b79041355fb3d0bf6eec2ec0c59d2ad19506ff 100644 --- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -16,8 +16,8 @@ py_test(test_multi_out_jit SRCS test_multi_out_jit.py) py_test(test_custom_attrs_jit SRCS test_custom_attrs_jit.py) py_test(test_custom_concat SRCS test_custom_concat.py) py_test(test_custom_conj SRCS test_custom_conj.py) +py_test(test_custom_linear SRCS test_custom_linear.py) # other tests py_test(test_sysconfig SRCS test_sysconfig.py) py_test(test_check_abi SRCS test_check_abi.py) -cc_test(test_check_error SRCS test_check_error.cc DEPS gtest) diff --git a/python/paddle/fluid/tests/custom_op/attr_test_op.cc b/python/paddle/fluid/tests/custom_op/attr_test_op.cc index 1edc10b8a8a981532cfe61875f3e792380487a76..297acd602086541cbdfc274a9b248b08b5c94758 100644 --- a/python/paddle/fluid/tests/custom_op/attr_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/attr_test_op.cc @@ -132,8 +132,7 @@ std::vector AttrTestForward( std::vector float_vec_attr, std::vector int64_vec_attr, std::vector str_vec_attr) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -161,8 +160,7 @@ std::vector AttrTestBackward( int int_attr, std::vector float_vec_attr, std::vector str_vec_attr) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU); - grad_x.reshape(grad_out.shape()); + auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( @@ -187,8 +185,7 @@ std::vector ConstAttrTestForward( const std::vector& float_vec_attr, const std::vector& int64_vec_attr, const std::vector& str_vec_attr) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -216,8 +213,7 @@ std::vector ConstAttrTestBackward( const int& int_attr, const std::vector& float_vec_attr, const std::vector& str_vec_attr) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU); - grad_x.reshape(grad_out.shape()); + auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, grad_out.shape()); PD_DISPATCH_FLOATING_TYPES(grad_out.type(), "assign_cpu_kernel", ([&] { assign_cpu_kernel( diff --git a/python/paddle/fluid/tests/custom_op/custom_concat_op.cc b/python/paddle/fluid/tests/custom_op/custom_concat_op.cc index a01e01f2bc59239e5ce6aec4a1d9ea9a27bc00d1..66cc36c300e9d6f24652ab5907a71e3a2fbc03ff 100644 --- a/python/paddle/fluid/tests/custom_op/custom_concat_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_concat_op.cc @@ -75,8 +75,7 @@ std::vector ConcatForwardDynamicAxis( auto out_shape = ComputeOutShape(in_shapes, axis); // create output - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(out_shape); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, out_shape); // calc PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( @@ -107,8 +106,7 @@ std::vector ConcatBackwardDynamicAxis( // create outputs std::vector grad_inputs; for (auto& t : inputs) { - auto grad = paddle::Tensor(paddle::PlaceType::kCPU); - grad.reshape(t.shape()); + auto grad = paddle::Tensor(paddle::PlaceType::kCPU, t.shape()); grad_inputs.emplace_back(grad); } @@ -163,8 +161,7 @@ std::vector ConcatForwardStaticAxis( auto out_shape = ComputeOutShape(in_shapes, final_axis); // create output - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(out_shape); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, out_shape); // calc PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( @@ -193,8 +190,7 @@ std::vector ConcatBackwardStaticAxis( // create outputs std::vector grad_inputs; for (auto& t : inputs) { - auto grad = paddle::Tensor(paddle::PlaceType::kCPU); - grad.reshape(t.shape()); + auto grad = paddle::Tensor(paddle::PlaceType::kCPU, t.shape()); grad_inputs.emplace_back(grad); } diff --git a/python/paddle/fluid/tests/custom_op/custom_conj_op.cc b/python/paddle/fluid/tests/custom_op/custom_conj_op.cc index 4feb887ca036a1c911b3a2890162a839f836dacf..b9c10f479e0a39eb8e33ffceb30e8eb9cc8efa9e 100644 --- a/python/paddle/fluid/tests/custom_op/custom_conj_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_conj_op.cc @@ -71,8 +71,7 @@ void ConjCPUKernel(const data_t* x_data, int64_t numel, data_t* out_data) { std::vector ConjFunction(const paddle::Tensor& x) { CHECK_INPUT(x); - paddle::Tensor out(x.place()); - out.reshape(x.shape()); + paddle::Tensor out(x.place(), x.shape()); PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( x.type(), "ConjCPUKernel", ([&] { diff --git a/python/paddle/fluid/tests/custom_op/custom_linear_op.cc b/python/paddle/fluid/tests/custom_op/custom_linear_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6e0b44b71f7f87447bd66a052f0a394ab38b2874 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/custom_linear_op.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/extension.h" + +// The linear implemented here must be passed in bias +std::vector PtenLinearForward(const paddle::Tensor& x, + const paddle::Tensor& weight, + const paddle::Tensor& bias) { + return { + paddle::experimental::add(paddle::experimental::matmul(x, weight), bias)}; +} + +std::vector> LinearInferShape( + const std::vector& x_shape, + const std::vector& weight_shape, + const std::vector& bias_shape) { + auto dims_x = x_shape; + auto dims_y = weight_shape; + auto ndims_x = x_shape.size(); + auto ndims_y = weight_shape.size(); + PD_CHECK(ndims_x > 0, + "The Input(x) dims size must be greater than 0," + " but reviced dims size is 0. "); + PD_CHECK(ndims_y > 0, + "The Input(y) dims size must be greater than 0," + " but reviced dims size is 0. "); + + bool x_broadcasted = false, y_broadcasted = false; + if (ndims_x == 1) { + dims_x.insert(dims_x.begin(), 1); + ndims_x = 2; + x_broadcasted = true; + } + + if (ndims_y == 1) { + dims_y.push_back(1); + ndims_y = 2; + y_broadcasted = true; + } + + size_t M, N; + M = dims_x[ndims_x - 2]; + N = dims_y[ndims_y - 1]; + + std::vector new_dims; + if (ndims_x > ndims_y) { + new_dims.assign(dims_x.begin(), dims_x.end() - 2); + } else if (ndims_x < ndims_y) { + new_dims.assign(dims_y.begin(), dims_y.end() - 2); + } else { + new_dims.reserve(ndims_x); + for (size_t i = 0; i < ndims_x - 2; ++i) { + new_dims.push_back(std::max(dims_x[i], dims_y[i])); + } + } + if (!x_broadcasted) { + new_dims.push_back(M); + } + if (!y_broadcasted) { + new_dims.push_back(N); + } + if (x_broadcasted && y_broadcasted) { + new_dims.push_back(1); + } + + return {new_dims}; +} + +std::vector LinearInferDtype( + const paddle::DataType& x_dtype, + const paddle::DataType& weight_dtype, + const paddle::DataType& bias_dtype) { + return {x_dtype}; +} + +PD_BUILD_OP(pten_linear) + .Inputs({"X", "Weight", "Bias"}) + .Outputs({"Out"}) + .SetKernelFn(PD_KERNEL(PtenLinearForward)) + .SetInferShapeFn(PD_INFER_SHAPE(LinearInferShape)) + .SetInferDtypeFn(PD_INFER_DTYPE(LinearInferDtype)); diff --git a/python/paddle/fluid/tests/custom_op/custom_relu_op.cc b/python/paddle/fluid/tests/custom_op/custom_relu_op.cc index c075d27f7b1763babf54cd9d378b28f29fd84566..b2ef90bf87a1af3ebe68a8f10950537978c0e390 100644 --- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cc @@ -21,6 +21,8 @@ template void relu_cpu_forward_kernel(const data_t* x_data, data_t* out_data, int64_t x_numel) { + PD_CHECK(x_data != nullptr, "x_data is nullptr."); + PD_CHECK(out_data != nullptr, "out_data is nullptr."); for (int i = 0; i < x_numel; ++i) { out_data[i] = std::max(static_cast(0.), x_data[i]); } @@ -52,8 +54,7 @@ std::vector relu_cpu_forward(const paddle::Tensor& x) { std::vector relu_cpu_backward(const paddle::Tensor& x, const paddle::Tensor& out, const paddle::Tensor& grad_out) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU); - grad_x.reshape(x.shape()); + auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] { relu_cpu_backward_kernel( diff --git a/python/paddle/fluid/tests/custom_op/custom_relu_op.cu b/python/paddle/fluid/tests/custom_op/custom_relu_op.cu index 38e8e71cf8129b0e3b8ea7d816e6389c52c83a9e..dda42a5c0598478aaf80ac25bdcdea2c2ab8b6e4 100644 --- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cu +++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cu @@ -37,20 +37,15 @@ __global__ void relu_cuda_backward_kernel(const data_t* dy, } std::vector relu_cuda_forward(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kGPU); + auto out = paddle::Tensor(paddle::PlaceType::kGPU, x.shape()); - out.reshape(x.shape()); int numel = x.size(); int block = 512; int grid = (numel + block - 1) / block; PD_DISPATCH_FLOATING_AND_HALF_TYPES( x.type(), "relu_cuda_forward_kernel", ([&] { - auto cpu_input = x.copy_to(paddle::PlaceType::kCPU); - auto gpu_input = cpu_input.copy_to(paddle::PlaceType::kGPU); relu_cuda_forward_kernel<<>>( - gpu_input.data(), - out.mutable_data(x.place()), - numel); + x.data(), out.mutable_data(x.place()), numel); })); return {out}; @@ -59,8 +54,7 @@ std::vector relu_cuda_forward(const paddle::Tensor& x) { std::vector relu_cuda_backward(const paddle::Tensor& x, const paddle::Tensor& out, const paddle::Tensor& grad_out) { - auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU); - grad_x.reshape(x.shape()); + auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU, x.shape()); int numel = out.size(); int block = 512; diff --git a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc index 0435f50b7c701e997c9e73ba4bfd9a8c5a998471..0f7d323b5451efba5a503d9039a03531e1773efb 100644 --- a/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/dispatch_test_op.cc @@ -27,8 +27,7 @@ void assign_cpu_kernel(const data_t* x_data, } std::vector DispatchTestInterger(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -46,8 +45,7 @@ PD_BUILD_OP(dispatch_test_integer) std::vector DispatchTestFloatAndInteger( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -64,8 +62,7 @@ PD_BUILD_OP(dispatch_test_float_and_integer) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger)); std::vector DispatchTestComplex(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -83,8 +80,7 @@ PD_BUILD_OP(dispatch_test_complex) std::vector DispatchTestFloatAndComplex( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -102,8 +98,7 @@ PD_BUILD_OP(dispatch_test_float_and_complex) std::vector DispatchTestFloatAndIntegerAndComplex( const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -120,8 +115,7 @@ PD_BUILD_OP(dispatch_test_float_and_integer_and_complex) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex)); std::vector DispatchTestFloatAndHalf(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_AND_HALF_TYPES( x.type(), "assign_cpu_kernel", ([&] { diff --git a/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc b/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc index 17a36df2cde48598efb3945499516c6c70edecfd..18940f20e7652305d86942b5a67a51fa98638034 100644 --- a/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc +++ b/python/paddle/fluid/tests/custom_op/multi_out_test_op.cc @@ -34,8 +34,7 @@ void fill_constant_cpu_kernel(data_t* out_data, int64_t x_numel, data_t value) { } std::vector MultiOutCPU(const paddle::Tensor& x) { - auto out = paddle::Tensor(paddle::PlaceType::kCPU); - out.reshape(x.shape()); + auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); PD_DISPATCH_FLOATING_TYPES( x.type(), "assign_cpu_kernel", ([&] { @@ -44,15 +43,13 @@ std::vector MultiOutCPU(const paddle::Tensor& x) { })); // fake multi output: Fake_float64 with float64 dtype - auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU); - fake_float64.reshape(x.shape()); + auto fake_float64 = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); fill_constant_cpu_kernel( fake_float64.mutable_data(x.place()), x.size(), 0.); // fake multi output: ZFake_int32 with int32 dtype - auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU); - zfake_int32.reshape(x.shape()); + auto zfake_int32 = paddle::Tensor(paddle::PlaceType::kCPU, x.shape()); fill_constant_cpu_kernel( zfake_int32.mutable_data(x.place()), x.size(), 1); diff --git a/python/paddle/fluid/tests/custom_op/test_custom_concat.py b/python/paddle/fluid/tests/custom_op/test_custom_concat.py index d796c3b5fbd60b450d7dbb02cbd47905c57f4b98..9049b604c910c80a41afa2c509e2ec3fdb4ffbfc 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_concat.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_concat.py @@ -24,8 +24,7 @@ from utils import paddle_includes, extra_cc_args, extra_nvcc_args # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. -file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format( - get_build_directory()) +file = '{}\\custom_concat\\custom_concat.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_conj.py b/python/paddle/fluid/tests/custom_op/test_custom_conj.py index a8e4019880332d46031ae3847a0a3f611ed37ad2..25c88ee6c6b01daf62553f5f7857bfe06fce25ca 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_conj.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_conj.py @@ -24,8 +24,7 @@ from utils import paddle_includes, extra_cc_args, extra_nvcc_args # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. -file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format( - get_build_directory()) +file = '{}\\custom_conj\\custom_conj.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) diff --git a/python/paddle/fluid/tests/custom_op/test_custom_linear.py b/python/paddle/fluid/tests/custom_op/test_custom_linear.py new file mode 100644 index 0000000000000000000000000000000000000000..0ba70eaa3e06cec619b2a9175db4aa1c8bf75a8b --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_custom_linear.py @@ -0,0 +1,107 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import numpy as np + +import paddle +import paddle.static as static +import paddle.nn.functional as F +from paddle.utils.cpp_extension import load, get_build_directory +from paddle.utils.cpp_extension.extension_utils import run_cmd +from utils import paddle_includes, extra_cc_args, extra_nvcc_args + +# Because Windows don't use docker, the shared lib already exists in the +# cache dir, it will not be compiled again unless the shared lib is removed. +file = '{}\\custom_linear\\custom_linear.pyd'.format(get_build_directory()) +if os.name == 'nt' and os.path.isfile(file): + cmd = 'del {}'.format(file) + run_cmd(cmd, True) + +custom_ops = load( + name='custom_linear_jit', + sources=['custom_linear_op.cc'], + extra_include_paths=paddle_includes, # add for Coverage CI + extra_cxx_cflags=extra_cc_args, # test for cc flags + extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags + verbose=True) + + +def linear_dynamic(func, dtype, np_x, np_weight, np_bias): + paddle.set_device("cpu") + x = paddle.to_tensor(np_x, dtype=dtype) + weight = paddle.to_tensor(np_weight, dtype=dtype) + bias = paddle.to_tensor(np_bias, dtype=dtype) + out = func(x, weight, bias) + return out.numpy() + + +def linear_static(func, dtype, np_x, np_weight, np_bias): + paddle.enable_static() + paddle.set_device("cpu") + with static.scope_guard(static.Scope()): + with static.program_guard(static.Program()): + x = static.data(name="x", shape=np_x.shape, dtype=dtype) + weight = static.data( + name="weight", shape=np_weight.shape, dtype=dtype) + bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) + out = func(x, weight, bias) + + exe = static.Executor() + exe.run(static.default_startup_program()) + + out_v, = exe.run(static.default_main_program(), + feed={ + "x": np_x.astype(dtype), + "weight": np_weight.astype(dtype), + "bias": np_bias.astype(dtype) + }, + fetch_list=[out.name]) + paddle.disable_static() + return out_v + + +class TestCustomLinearJit(unittest.TestCase): + def setUp(self): + self.dtypes = ['float32', 'float64'] + self.np_x = np.random.random((3, 2)).astype("float32") + self.np_weight = np.full([2, 4], fill_value=0.5, dtype="float32") + self.np_bias = np.ones([4], dtype="float32") + + def check_output(self, out, pd_out, name): + self.assertTrue( + np.array_equal(out, pd_out), + "custom op {}: {},\n paddle api {}: {}".format(name, out, name, + pd_out)) + + def test_static(self): + for dtype in self.dtypes: + pten_out = linear_static(custom_ops.pten_linear, dtype, self.np_x, + self.np_weight, self.np_bias) + pd_out = linear_static(F.linear, dtype, self.np_x, self.np_weight, + self.np_bias) + self.check_output(pten_out, pd_out, "pten_out") + + def test_dynamic(self): + for dtype in self.dtypes: + pten_out = linear_dynamic(custom_ops.pten_linear, dtype, self.np_x, + self.np_weight, self.np_bias) + pd_out = linear_dynamic(F.linear, dtype, self.np_x, self.np_weight, + self.np_bias) + self.check_output(pten_out, pd_out, "pten_out") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py index 052fe8b156a53cec04fc2a216768acc1d4528312..4f075066b9d931c978b40e4b9cafc21018c43710 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py @@ -101,7 +101,7 @@ class TestJITLoad(unittest.TestCase): except OSError as e: caught_exception = True self.assertTrue( - "function \"relu_cpu_forward\" is not implemented for data type `int32_t`" + "function \"relu_cpu_forward\" is not implemented for data type `int32`" in str(e)) if IS_WINDOWS: self.assertTrue( @@ -123,7 +123,7 @@ class TestJITLoad(unittest.TestCase): except OSError as e: caught_exception = True self.assertTrue( - "function \"relu_cuda_forward_kernel\" is not implemented for data type `int32_t`" + "function \"relu_cuda_forward_kernel\" is not implemented for data type `int32`" in str(e)) self.assertTrue( "python/paddle/fluid/tests/custom_op/custom_relu_op.cu" in diff --git a/python/setup.py.in b/python/setup.py.in index b1643e6a6d7d36c5e9a59d4daefb7e15300a549e..05418e1f46814a423dbbc77106ef2f08543376bd 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -558,9 +558,13 @@ def find_files(pattern, root, recursive=False): headers = ( list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/extension/include')) + # extension + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/pten/api')) + # pten unify api header + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/pten/api/ext')) + # custom op api + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/pten/api/include')) + # pten api + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/pten/common')) + # pten common headers # For paddle uew custom op, only copy data type headers from `paddle/fluid/platform` - # to `extension/incude`, + # to `paddle/pten/api/ext`, + ['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/bfloat16.h'] + ['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/complex.h'] + ['@PADDLE_SOURCE_DIR@/paddle/fluid/platform/float16.h'] + ['@PADDLE_SOURCE_DIR@/paddle/utils/any.h']) @@ -577,7 +581,6 @@ class InstallCommand(InstallCommandBase): ret = InstallCommandBase.finalize_options(self) self.install_lib = self.install_platlib self.install_headers = os.path.join(self.install_platlib, 'paddle', 'include') - return ret class InstallHeaders(Command): @@ -609,8 +612,8 @@ class InstallHeaders(Command): elif 'third_party' not in header: # paddle headers install_dir = re.sub('@PADDLE_SOURCE_DIR@/', '', header) - if 'fluid' in install_dir or 'utils' in install_dir: - install_dir = "paddle/extension/include/" + if 'fluid' in install_dir: + install_dir = "paddle/pten/common/" else: # third_party install_dir = re.sub('${THIRD_PARTY_PATH}', 'third_party', header) diff --git a/tools/get_single_test_cov.py b/tools/get_single_test_cov.py index 421962bb584974ee0eef5140c908af55f5d5fa8c..9232924ddb07d8473e951392fcba163bdb63c9b2 100644 --- a/tools/get_single_test_cov.py +++ b/tools/get_single_test_cov.py @@ -87,7 +87,7 @@ def getCovinfo(rootPath, test): 'cd %s && lcov --capture -d . -o coverage.info --rc lcov_branch_coverage=0 > /dev/null 2>&1' % ut_map_path) os.system( - "cd %s && lcov --extract coverage.info '/paddle/paddle/fluid/framework/*' '/paddle/paddle/fluid/imperative/*' '/paddle/paddle/fluid/inference/*' '/paddle/paddle/fluid/memory/*' '/paddle/paddle/fluid/operators/*' '/paddle/paddle/fluid/string/*' '/paddle/paddle/fluid/distributed/*' '/paddle/paddle/fluid/extension/*' '/paddle/paddle/fluid/platform/*' '/paddle/paddle/fluid/pybind/*' '/paddle/build/*' -o coverage.info.tmp --rc lcov_branch_coverage=0 > /dev/null 2>&1" + "cd %s && lcov --extract coverage.info '/paddle/paddle/fluid/framework/*' '/paddle/paddle/fluid/imperative/*' '/paddle/paddle/fluid/inference/*' '/paddle/paddle/fluid/memory/*' '/paddle/paddle/fluid/operators/*' '/paddle/paddle/fluid/string/*' '/paddle/paddle/fluid/distributed/*' '/paddle/paddle/fluid/platform/*' '/paddle/paddle/fluid/pybind/*' '/paddle/build/*' -o coverage.info.tmp --rc lcov_branch_coverage=0 > /dev/null 2>&1" % ut_map_path) os.system('rm -rf %s/paddle' % ut_map_path) os.system('rm -rf %s/coverage.info' % ut_map_path)