diff --git a/.gitignore b/.gitignore index 664c45b7202f6bf93712062ffa1d003b575afffd..4faaf162ba52b4ca36a5e9d5a286d3197a2fd043 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,12 @@ paddle/phi/api/backward/backward_api.h paddle/phi/api/backward/sparse_bw_api.h paddle/phi/api/include/api.h paddle/phi/api/include/sparse_api.h +paddle/phi/api/include/strings_api.h paddle/phi/api/lib/api.cc paddle/phi/api/lib/dygraph_api.* paddle/phi/api/lib/backward_api.cc paddle/phi/api/lib/sparse_api.cc +paddle/phi/api/lib/strings_api.cc paddle/phi/api/lib/sparse_bw_api.cc paddle/phi/extension.h paddle/phi/include/* diff --git a/paddle/fluid/framework/convert_utils.cc b/paddle/fluid/framework/convert_utils.cc index 1144bc5150906a60026bcd7e02ad70fb4db0125d..1ea278ea4f799ed1aa011272708efcbfe6952a49 100644 --- a/paddle/fluid/framework/convert_utils.cc +++ b/paddle/fluid/framework/convert_utils.cc @@ -47,6 +47,8 @@ paddle::experimental::DataType TransToPhiDataType( return DataType::BFLOAT16; case paddle::framework::proto::VarType::BOOL: return DataType::BOOL; + case paddle::framework::proto::VarType::PSTRING: + return DataType::PSTRING; default: return DataType::UNDEFINED; } @@ -81,6 +83,8 @@ paddle::framework::proto::VarType::Type TransToProtoVarType( return paddle::framework::proto::VarType::BF16; case DataType::BOOL: return paddle::framework::proto::VarType::BOOL; + case DataType::PSTRING: + return paddle::framework::proto::VarType::PSTRING; default: PADDLE_THROW(paddle::platform::errors::Unimplemented( "Unsupported data type `%s` when casting it into " @@ -117,6 +121,8 @@ size_t DataTypeSize(DataType dtype) { return sizeof(paddle::platform::complex); case DataType::COMPLEX128: return sizeof(paddle::platform::complex); + case DataType::PSTRING: + return sizeof(paddle::platform::pstring); default: return 0; } @@ -145,6 +151,8 @@ DataType String2DataType(const std::string& str) { return DataType::COMPLEX64; } else if (str == "complex128") { return DataType::COMPLEX128; + } else if (str == "pstring") { + return DataType::PSTRING; } else if (str == "bfloat16") { return DataType::BFLOAT16; } else { @@ -176,6 +184,8 @@ std::string DataType2String(DataType dtype) { return "complex64"; case DataType::COMPLEX128: return "complex128"; + case DataType::PSTRING: + return "pstring"; case DataType::BFLOAT16: return "bfloat16"; default: diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index de6239959316b411303823b46cfbad1dc1faeece..75ab747794f014ec1b9c8f84978426a6329b4cf4 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -18,9 +18,11 @@ #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/pstring.h" using float16 = paddle::platform::float16; using bfloat16 = paddle::platform::bfloat16; +using pstring = phi::dtype::pstring; namespace paddle { namespace framework { @@ -58,7 +60,8 @@ static DataTypeMap* InitDataTypeMap() { RegisterType(retv, proto_type, #cc_type) _ForEachDataType_(RegType); - + // Register pstring individually + RegType(pstring, proto::VarType::PSTRING); #undef RegType return retv; } diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 300d5f6e8fad107e9480c555b9ed122012e0e3b3..0d3e7c2741c17b2649c7e73a3c97d3f117c6027f 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -152,6 +152,8 @@ message VarType { STRINGS = 26; VOCAB = 27; FEED_LIST = 28; + // The data type of phi::StringTensor + PSTRING = 29; } required Type type = 1; diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 04e1bbcc9df423bc38e78822ec6ef8ee28c5b216..724b1ba556d4b999f8be501b8d212cbf0742e894 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -23,7 +23,7 @@ add_subdirectory(tools) add_subdirectory(tests) # make an unity target for compile deps -set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor) +set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor string_tensor) get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) diff --git a/paddle/phi/api/CMakeLists.txt b/paddle/phi/api/CMakeLists.txt index a1b0af609ca8d52f148b4ffa6016fdbc49862677..d575759db32eeb023510b39ecd39160785a8e8e2 100644 --- a/paddle/phi/api/CMakeLists.txt +++ b/paddle/phi/api/CMakeLists.txt @@ -1,2 +1,2 @@ add_subdirectory(lib) -cc_library(phi_api SRCS all.cc DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api) +cc_library(phi_api SRCS all.cc DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api strings_api) diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 16341e58a9b796bbbd2599882dd2074be4ac2dd6..cd525368e5883f6fe8e42bad365f4066864cd28a 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -51,6 +51,14 @@ set(sparse_bw_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/sparse_bw_a set(sparse_bw_api_header_file_tmp ${sparse_bw_api_header_file}.tmp) set(sparse_bw_api_source_file_tmp ${sparse_bw_api_source_file}.tmp) +# strings api file +set(strings_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api_gen.py) +set(strings_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml) +set(strings_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/strings_api.h) +set(strings_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/strings_api.cc) +set(strings_api_header_file_tmp ${strings_api_header_file}.tmp) +set(strings_api_source_file_tmp ${strings_api_source_file}.tmp) + # wrapped infermeta file set(wrapped_infermeta_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/wrapped_infermeta_gen.py) set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml) @@ -114,6 +122,19 @@ add_custom_command( DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base} ${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file} VERBATIM) +# generate strings api +add_custom_command( + OUTPUT ${strings_api_header_file} ${strings_api_source_file} + COMMAND ${PYTHON_EXECUTABLE} ${strings_api_gen_file} + --api_yaml_path ${strings_api_yaml_file} + --api_header_path ${strings_api_header_file_tmp} + --api_source_path ${strings_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp} ${strings_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp} ${strings_api_source_file} + COMMENT "copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}" + DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base} ${api_gen_file} + VERBATIM) + # generate dygraph(intermediate) api add_custom_command( OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file} @@ -152,5 +173,5 @@ cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw ph cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl) cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl) cc_library(phi_dygraph_api SRCS ${dygraph_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform phi_function_api sparse_api) - -cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta sparse_api) +cc_library(strings_api SRCS ${strings_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils) +cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta sparse_api strings_api) diff --git a/paddle/phi/api/lib/api_declare.h b/paddle/phi/api/lib/api_declare.h index 209b3f0c3c3e3bef49df19622b45135fc5284539..ab41cc8793682920547427befffb596e39ede878 100644 --- a/paddle/phi/api/lib/api_declare.h +++ b/paddle/phi/api/lib/api_declare.h @@ -19,3 +19,4 @@ limitations under the License. */ // PD_DECLARE_API(Math); // PD_DECLARE_API(SparseApi); +// PD_DECLARE_API(StringsApi); diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 0c11e2df65d0db23b4e080bf041c78d976714013..7cbb4344e81d7c38f0aeb28cb161f9325648628c 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -56,6 +56,10 @@ std::shared_ptr TensorToSelectedRows( return nullptr; } +std::shared_ptr TensorToStringTensor(const Tensor& tensor) { + return std::dynamic_pointer_cast(tensor.impl()); +} + /* ----------------- for infer_meta --------------------- */ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) { @@ -92,6 +96,10 @@ paddle::optional MakeMetaTensor( return {paddle::none}; } +phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { + return phi::MetaTensor(tensor); +} + /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { @@ -148,5 +156,20 @@ phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type) { return out->impl().get(); } +phi::TensorBase* SetStringsKernelOutput(Backend backend, + Tensor* out, + TensorType type) { + if (!out->initialized()) { + if (type == TensorType::STRING_TENSOR) { + if (out->impl() == nullptr) { + auto strings_tensor = std::make_shared(); + out->set_impl(strings_tensor); + } + return out->impl().get(); + } + } + return out->impl().get(); +} + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 01625f651c3bd1deaae43f735ac03fb2bc3f4e25..2a4c8417b5e6ded3b277d1b5751f1a8a4fc5f09a 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -22,11 +22,12 @@ limitations under the License. */ #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#include "paddle/phi/core/string_tensor.h" namespace paddle { namespace experimental { -enum class TensorType { DENSE_TENSOR, SPARSE_CSR, SPARSE_COO }; +enum class TensorType { DENSE_TENSOR, SPARSE_CSR, SPARSE_COO, STRING_TENSOR }; /* ------------------ for input ----------------------- */ @@ -43,6 +44,8 @@ std::shared_ptr TensorToSelectedRows(const Tensor& tensor); std::shared_ptr TensorToSelectedRows( const paddle::optional& tensor); +std::shared_ptr TensorToStringTensor(const Tensor& tensor); + /* ----------------- for infer_meta --------------------- */ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); @@ -58,6 +61,8 @@ phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); paddle::optional MakeMetaTensor( const paddle::optional& tensor); +phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); + /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out); @@ -70,5 +75,9 @@ phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out); phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type); +phi::TensorBase* SetStringsKernelOutput(Backend backend, + Tensor* out, + TensorType type); + } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/utils/CMakeLists.txt b/paddle/phi/api/lib/utils/CMakeLists.txt index 271a58222f0c0f6b60642482691bed635e4d5f3c..94a16da2b7720b5af08e72096d9d465a93ea3f9e 100644 --- a/paddle/phi/api/lib/utils/CMakeLists.txt +++ b/paddle/phi/api/lib/utils/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(phi_api_utils SRCS storage.cc tensor_utils.cc DEPS -tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits scalar) +tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits scalar string_tensor) diff --git a/paddle/phi/common/cpstring_impl.h b/paddle/phi/common/cpstring_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..99a04e7ce49244cb24672ed91c7b2cb408e3fef2 --- /dev/null +++ b/paddle/phi/common/cpstring_impl.h @@ -0,0 +1,547 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +This file is inspired by + + https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/ctstring_internal.h + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#if (defined(__NVCC__) || defined(__HIPCC__)) +#define HOSTDEVICE __host__ __device__ +#define DEVICE __device__ +#define HOST __host__ +#else +#define HOSTDEVICE +#define DEVICE +#define HOST +#endif + +#if (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \ + defined(_WIN32) +#define PD_PSTRING_LITTLE_ENDIAN 1 +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define PD_PSTRING_LITTLE_ENDIAN 0 +#else +#error "Unable to detect endianness." +#endif + +#if defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return __builtin_bswap32(host_int); +} + +#elif defined(_MSC_VER) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return _byteswap_ulong(host_int); +} + +#elif defined(__APPLE__) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return OSSwapInt32(host_int); +} + +#else +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { +#if defined(__GLIBC__) + return bswap_32(host_int); +#else // defined(__GLIBC__) + return (((host_int & uint32_t{0xFF}) << 24) | + ((host_int & uint32_t{0xFF00}) << 8) | + ((host_int & uint32_t{0xFF0000}) >> 8) | + ((host_int & uint32_t{0xFF000000}) >> 24)); +#endif // defined(__GLIBC__) +} +#endif + +#if PD_PSTRING_LITTLE_ENDIAN || (defined(__NVCC__) || defined(__HIPCC__)) +#define PD_le32toh(x) x +#else // PD_PSTRING_LITTLE_ENDIAN +#define PD_le32toh(x) swap32(x) +#endif // PD_PSTRING_LARGE_ENDIAN + +HOSTDEVICE static inline size_t PD_align16(size_t i) { + return (i + 0xF) & ~0xF; +} + +HOSTDEVICE static inline size_t PD_max(size_t a, size_t b) { + return a > b ? a : b; +} +HOSTDEVICE static inline size_t PD_min(size_t a, size_t b) { + return a < b ? a : b; +} + +typedef enum PD_PString_Type { // NOLINT + PD_PSTR_SMALL = 0x00, + PD_PSTR_LARGE = 0x01, + PD_PSTR_OFFSET = 0x02, + PD_PSTR_VIEW = 0x03, + PD_PSTR_TYPE_MASK = 0x03 +} PD_PString_Type; + +typedef struct PD_PString_Large { // NOLINT + size_t size; + size_t cap; + char *ptr; +} PD_PString_Large; + +typedef struct PD_PString_Offset { // NOLINT + uint32_t size; + uint32_t offset; + uint32_t count; +} PD_PString_Offset; + +typedef struct PD_PString_View { // NOLINT + size_t size; + const char *ptr; +} PD_PString_View; + +typedef struct PD_PString_Raw { // NOLINT + uint8_t raw[24]; +} PD_PString_Raw; + +typedef union PD_PString_Union { // NOLINT + PD_PString_Large large; + PD_PString_Offset offset; + PD_PString_View view; + PD_PString_Raw raw; +} PD_PString_Union; + +enum { + PD_PString_SmallCapacity = + (sizeof(PD_PString_Union) - sizeof(/* null delim */ char) - + sizeof(/* uint8_t size */ uint8_t)), +}; + +typedef struct PD_PString_Small { // NOLINT + uint8_t size; + char str[PD_PString_SmallCapacity + sizeof(/* null delim */ char)]; +} PD_PString_Small; + +typedef struct PD_PString { // NOLINT + union { + PD_PString_Small smll; + PD_PString_Large large; + PD_PString_Offset offset; + PD_PString_View view; + PD_PString_Raw raw; + } u; +} PD_PString; + +HOSTDEVICE static inline PD_PString_Type PD_PString_GetType( + const PD_PString *str) { + return (PD_PString_Type)(str->u.raw.raw[0] & PD_PSTR_TYPE_MASK); // NOLINT +} + +HOSTDEVICE static inline size_t PD_PString_ToActualSizeT(size_t size) { +#if PD_PSTRING_LITTLE_ENDIAN + return size >> 2; +#else // PD_PSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); // NOLINT + + return (((mask << 2) & size) >> 2) | (~mask & size); +#endif // PD_PSTRING_LITTLE_ENDIAN +} + +HOSTDEVICE static inline size_t PD_PString_ToInternalSizeT( + size_t size, PD_PString_Type type) { +#if PD_PSTRING_LITTLE_ENDIAN + return (size << 2) | type; +#else // PD_PSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); // NOLINT + + return (mask & (size << 2)) | (~mask & size) | + ((size_t)type << ((sizeof(size_t) - 1) * 8)); // NOLINT +#endif // PD_PSTRING_LITTLE_ENDIAN +} + +/* + * Need to implement in other source file. + */ +HOSTDEVICE static inline void PD_Free(void *ptr, size_t size) { free(ptr); } + +HOSTDEVICE static inline void *PD_Memset(void *src, int ch, size_t size) { + char *dst = (char *)src; // NOLINT + for (size_t i = 0; i < size; ++i) { + dst[i] = ch; + } + return dst; +} + +HOSTDEVICE static inline void *PD_Memcpy(void *dst, + const void *src, + size_t size) { + for (size_t i = 0; i < size; ++i) { + ((char *)dst)[i] = ((const char *)src)[i]; // NOLINT + } + return dst; +} + +HOSTDEVICE static inline void *PD_Malloc(size_t size) { return malloc(size); } + +HOSTDEVICE static inline void *PD_Realloc(void *ptr, + size_t old_size, + size_t new_size) { +#if (defined(__NVCC__) || defined(__HIPCC__)) + if (old_size >= new_size) { + return ptr; + } + void *new_ptr = malloc(new_size); + PD_Memcpy(new_ptr, ptr, old_size); + free(ptr); + return new_ptr; +#else + return realloc(ptr, new_size); +#endif +} + +HOSTDEVICE static inline int PD_Memcmp(const void *s1, + const void *s2, + size_t size) { + const uint8_t *lstr = (const uint8_t *)(s1); // NOLINT + const uint8_t *rstr = (const uint8_t *)(s2); // NOLINT + for (size_t i = 0; i < size; ++i) { + if (lstr[i] != rstr[i]) { + return (lstr[i] - rstr[i]); + } + } + return 0; +} + +HOSTDEVICE static inline void *PD_Memmove(void *dest, + const void *src, + size_t size) { + const uint8_t *from = (const uint8_t *)(src); // NOLINT + uint8_t *to = (uint8_t *)(dest); // NOLINT + if (from == to || size == 0) { + return dest; + } + + if (to > from && (to - from < static_cast(size))) { + for (int i = size - 1; i >= 0; i--) { + to[i] = from[i]; + } + return dest; + } + if (from > to && (from - to < static_cast(size))) { + for (size_t i = 0; i < size; i++) { + to[i] = from[i]; + } + return dest; + } + dest = PD_Memcpy(dest, src, size); + return dest; +} + +HOSTDEVICE static inline void PD_PString_Init(PD_PString *str) { + PD_Memset(str->u.raw.raw, 0, sizeof(PD_PString_Raw)); +} + +HOSTDEVICE static inline void PD_PString_Dealloc(PD_PString *str) { + if (PD_PString_GetType(str) == PD_PSTR_LARGE && + str->u.large.ptr != NULL) { // NOLINT + PD_Free(str->u.large.ptr, str->u.large.cap + 1); + PD_PString_Init(str); + } +} + +HOSTDEVICE static inline size_t PD_PString_GetSize(const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.size >> 2; + case PD_PSTR_LARGE: + return PD_PString_ToActualSizeT(str->u.large.size); + case PD_PSTR_OFFSET: + return PD_le32toh(str->u.offset.size) >> 2; + case PD_PSTR_VIEW: + return PD_PString_ToActualSizeT(str->u.view.size); + default: + return 0; // Unreachable. + } +} + +HOSTDEVICE static inline size_t PD_PString_GetCapacity(const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return PD_PString_SmallCapacity; + case PD_PSTR_LARGE: + return str->u.large.cap; + case PD_PSTR_OFFSET: + case PD_PSTR_VIEW: + default: + return 0; + } +} + +HOSTDEVICE static inline const char *PD_PString_GetDataPointer( + const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.str; + case PD_PSTR_LARGE: + return str->u.large.ptr; + case PD_PSTR_OFFSET: + return (const char *)str + str->u.offset.offset; // NOLINT + case PD_PSTR_VIEW: + return str->u.view.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +HOSTDEVICE static inline char *PD_PString_ResizeUninitialized(PD_PString *str, + size_t new_size) { + size_t curr_size = PD_PString_GetSize(str); + size_t copy_size = PD_min(new_size, curr_size); + + PD_PString_Type curr_type = PD_PString_GetType(str); + const char *curr_ptr = PD_PString_GetDataPointer(str); + + // Case: SMALL/LARGE/VIEW/OFFSET -> SMALL + if (new_size <= PD_PString_SmallCapacity) { + str->u.smll.size = (uint8_t)((new_size << 2) | PD_PSTR_SMALL); // NOLINT + str->u.smll.str[new_size] = '\0'; + + if (curr_type != PD_PSTR_SMALL && copy_size) { + PD_Memcpy(str->u.smll.str, curr_ptr, copy_size); + } + + if (curr_type == PD_PSTR_LARGE) { + PD_Free((void *)curr_ptr, str->u.large.cap + 1); // NOLINT + } + + return str->u.smll.str; + } + + // Case: SMALL/LARGE/VIEW/OFFSET -> LARGE + size_t new_cap; + size_t curr_cap = PD_PString_GetCapacity(str); + + if (new_size < curr_size && new_size < curr_cap / 2) { + new_cap = PD_align16(curr_cap / 2 + 1) - 1; + } else if (new_size > curr_cap) { + new_cap = PD_align16(new_size + 1) - 1; + } else { + new_cap = curr_cap; + } + + char *new_ptr; + if (new_cap == curr_cap) { + new_ptr = str->u.large.ptr; + } else if (curr_type == PD_PSTR_LARGE) { + new_ptr = (char *)PD_Realloc( // NOLINT + str->u.large.ptr, + curr_cap + 1, + new_cap + 1); + } else { + new_ptr = (char *)PD_Malloc(new_cap + 1); // NOLINT + if (copy_size) { + PD_Memcpy(new_ptr, curr_ptr, copy_size); + } + } + + str->u.large.size = PD_PString_ToInternalSizeT(new_size, PD_PSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[new_size] = '\0'; + str->u.large.cap = new_cap; + + return str->u.large.ptr; +} + +HOSTDEVICE static inline char *PD_PString_GetMutableDataPointer( + PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.str; + case PD_PSTR_OFFSET: + case PD_PSTR_VIEW: + // Convert OFFSET/VIEW to SMALL/LARGE + PD_PString_ResizeUninitialized(str, PD_PString_GetSize(str)); + return (PD_PString_GetType(str) == PD_PSTR_SMALL) ? str->u.smll.str + : str->u.large.ptr; + case PD_PSTR_LARGE: + return str->u.large.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +HOSTDEVICE static inline void PD_PString_Reserve(PD_PString *str, + size_t new_cap) { + PD_PString_Type curr_type = PD_PString_GetType(str); + + if (new_cap <= PD_PString_SmallCapacity) { + // We do nothing, we let Resize/GetMutableDataPointer handle the + // conversion to SMALL from VIEW/OFFSET when the need arises. + // In the degenerate case, where new_cap <= PD_PString_SmallCapacity, + // curr_size > PD_PString_SmallCapacity, and the type is VIEW/OFFSET, we + // defer the malloc to Resize/GetMutableDataPointer. + return; + } + + if (curr_type == PD_PSTR_LARGE && new_cap <= str->u.large.cap) { + // We handle reduced cap in resize. + return; + } + + // Case: VIEW/OFFSET -> LARGE or grow an existing LARGE type + size_t curr_size = PD_PString_GetSize(str); + const char *curr_ptr = PD_PString_GetDataPointer(str); + + // Since VIEW and OFFSET types are read-only, their capacity is effectively 0. + // So we make sure we have enough room in the VIEW and OFFSET cases. + new_cap = PD_align16(PD_max(new_cap, curr_size) + 1) - 1; + size_t curr_cap = PD_PString_GetCapacity(str); + + if (curr_type == PD_PSTR_LARGE) { + str->u.large.ptr = (char *)PD_Realloc( // NOLINT + str->u.large.ptr, + curr_cap + 1, + new_cap + 1); + } else { + // Convert to Large + char *new_ptr = (char *)PD_Malloc(new_cap + 1); // NOLINT + PD_Memcpy(new_ptr, curr_ptr, curr_size); + + str->u.large.size = PD_PString_ToInternalSizeT(curr_size, PD_PSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[curr_size] = '\0'; + } + + str->u.large.cap = new_cap; +} + +HOSTDEVICE static inline void PD_PString_ReserveAmortized(PD_PString *str, + size_t new_cap) { + const size_t curr_cap = PD_PString_GetCapacity(str); + if (new_cap > curr_cap) { + PD_PString_Reserve(str, new_cap > 2 * curr_cap ? new_cap : 2 * curr_cap); + } +} + +HOSTDEVICE static inline char *PD_PString_Resize(PD_PString *str, + size_t new_size, + char c) { + size_t curr_size = PD_PString_GetSize(str); + char *cstr = PD_PString_ResizeUninitialized(str, new_size); + + if (new_size > curr_size) { + PD_Memset(cstr + curr_size, c, new_size - curr_size); + } + + return cstr; +} + +HOSTDEVICE static inline void PD_PString_AssignView(PD_PString *dst, + const char *src, + size_t size) { + PD_PString_Dealloc(dst); + + dst->u.view.size = PD_PString_ToInternalSizeT(size, PD_PSTR_VIEW); + dst->u.view.ptr = src; +} + +HOSTDEVICE static inline void PD_PString_AppendN(PD_PString *dst, + const char *src, + size_t src_size) { + if (!src_size) return; + + size_t dst_size = PD_PString_GetSize(dst); + + // For append use cases, we want to ensure amortized growth. + PD_PString_ReserveAmortized(dst, dst_size + src_size); + char *dst_c = PD_PString_ResizeUninitialized(dst, dst_size + src_size); + + PD_Memcpy(dst_c + dst_size, src, src_size); +} + +HOSTDEVICE static inline void PD_PString_Append(PD_PString *dst, + const PD_PString *src) { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_AppendN(dst, src_c, size); +} + +HOSTDEVICE static inline void PD_PString_Copy(PD_PString *dst, + const char *src, + size_t size) { + char *dst_c = PD_PString_ResizeUninitialized(dst, size); + + if (size) PD_Memcpy(dst_c, src, size); +} + +HOSTDEVICE static inline void PD_PString_Assign(PD_PString *dst, + const PD_PString *src) { + if (dst == src) return; + + PD_PString_Dealloc(dst); + + switch (PD_PString_GetType(src)) { + case PD_PSTR_SMALL: + case PD_PSTR_VIEW: + *dst = *src; + return; + case PD_PSTR_LARGE: { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_Copy(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} + +HOSTDEVICE static inline void PD_PString_Move(PD_PString *dst, + PD_PString *src) { + if (dst == src) return; + + PD_PString_Dealloc(dst); + + switch (PD_PString_GetType(src)) { + case PD_PSTR_SMALL: + case PD_PSTR_VIEW: + *dst = *src; + return; + case PD_PSTR_LARGE: + *dst = *src; + PD_PString_Init(src); + return; + case PD_PSTR_OFFSET: { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_AssignView(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h index 38239f0fa9dc1c2c2c5f67645745a1b0bf0281ac..1792cb9370673bdbcaeeae2d1fccb5cd871e7e19 100644 --- a/paddle/phi/common/data_type.h +++ b/paddle/phi/common/data_type.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" #include "paddle/phi/api/ext/exception.h" +#include "paddle/phi/common/pstring.h" namespace paddle { namespace experimental { @@ -27,6 +28,7 @@ using complex64 = ::phi::dtype::complex; using complex128 = ::phi::dtype::complex; using float16 = ::phi::dtype::float16; using bfloat16 = ::phi::dtype::bfloat16; +using pstring = ::phi::dtype::pstring; enum class DataType { UNDEFINED = 0, @@ -45,6 +47,11 @@ enum class DataType { FLOAT64, COMPLEX64, COMPLEX128, + // In Paddle 2.3, we add a new type of Tensor, StringTensor, which is designed + // for string data management. We design the dtype of StringTensor, pstring. + // In order to express a unique data dtype of StringTensor, we add + // DataType::PSTRING. + PSTRING, NUM_DATA_TYPES, // See Note [ Why we need ALL in baisc kernel key member? ] ALL_DTYPE = UNDEFINED, @@ -72,6 +79,8 @@ inline size_t SizeOf(DataType data_type) { return 8; case DataType::COMPLEX128: return 16; + case DataType::PSTRING: + return 24; case DataType::UNDEFINED: return 0; case DataType::NUM_DATA_TYPES: @@ -82,22 +91,23 @@ inline size_t SizeOf(DataType data_type) { return 0; } -#define PD_FOR_EACH_DATA_TYPE(_) \ - _(bool, DataType::BOOL) \ - _(int8_t, DataType::INT8) \ - _(uint8_t, DataType::UINT8) \ - _(int16_t, DataType::INT16) \ - _(uint16_t, DataType::UINT16) \ - _(int32_t, DataType::INT32) \ - _(uint32_t, DataType::UINT32) \ - _(int64_t, DataType::INT64) \ - _(uint64_t, DataType::UINT64) \ - _(bfloat16, DataType::BFLOAT16) \ - _(float16, DataType::FLOAT16) \ - _(float, DataType::FLOAT32) \ - _(double, DataType::FLOAT64) \ - _(complex64, DataType::COMPLEX64) \ - _(complex128, DataType::COMPLEX128) +#define PD_FOR_EACH_DATA_TYPE(_) \ + _(bool, DataType::BOOL) \ + _(int8_t, DataType::INT8) \ + _(uint8_t, DataType::UINT8) \ + _(int16_t, DataType::INT16) \ + _(uint16_t, DataType::UINT16) \ + _(int32_t, DataType::INT32) \ + _(uint32_t, DataType::UINT32) \ + _(int64_t, DataType::INT64) \ + _(uint64_t, DataType::UINT64) \ + _(bfloat16, DataType::BFLOAT16) \ + _(float16, DataType::FLOAT16) \ + _(float, DataType::FLOAT32) \ + _(double, DataType::FLOAT64) \ + _(complex64, DataType::COMPLEX64) \ + _(complex128, DataType::COMPLEX128) \ + _(pstring, DataType::PSTRING) template struct DataTypeToCppType; @@ -175,6 +185,9 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) { case DataType::COMPLEX128: os << "complex128"; break; + case DataType::PSTRING: + os << "pstring"; + break; default: PD_THROW("Invalid enum data type `", static_cast(dtype), "`."); } @@ -195,4 +208,6 @@ using bfloat16 = paddle::experimental::bfloat16; using complex64 = paddle::experimental::complex64; using complex128 = paddle::experimental::complex128; using float16 = paddle::experimental::float16; +using pstring = paddle::experimental::pstring; + } // namespace paddle diff --git a/paddle/phi/common/layout.h b/paddle/phi/common/layout.h index 8146d5d399f2c93f12d7d30bba4abe56f875e9a7..a5e4871f3d56b269cf51545fb4aa0fd92883dbe3 100644 --- a/paddle/phi/common/layout.h +++ b/paddle/phi/common/layout.h @@ -29,6 +29,7 @@ enum class DataLayout { MKLDNN, SPARSE_COO, SPARSE_CSR, + PSTRING_UNION, NUM_DATA_LAYOUTS, NDHWC, NCDHW, @@ -76,6 +77,8 @@ inline DataLayout StringToDataLayout(const std::string& str) { return DataLayout::SPARSE_CSR; } else if (s == "NDHWC") { return DataLayout::kNDHWC; + } else if (s == "PSTRING_UNION") { + return DataLayout::PSTRING_UNION; } else if (s == "NCDHW") { return DataLayout::kNCDHW; } else { @@ -101,6 +104,8 @@ inline std::string DataLayoutToString(const DataLayout& layout) { return "NDHWC"; case DataLayout::kNCDHW: return "NCDHW"; + case DataLayout::PSTRING_UNION: + return "PSTRING_UNION"; default: PD_THROW("Unknown Data Layout type ", static_cast(layout), "."); } diff --git a/paddle/phi/common/pstring.h b/paddle/phi/common/pstring.h new file mode 100644 index 0000000000000000000000000000000000000000..4c89251a60c13a995c464ec8546a0e906d46dc9e --- /dev/null +++ b/paddle/phi/common/pstring.h @@ -0,0 +1,480 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +This file is inspired by + + https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/tstring.h + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include +#include + +#include "paddle/phi/common/cpstring_impl.h" + +namespace phi { +namespace dtype { + +// Pstring is an only dtype of StringTensor, which is +// used to manage string data. It provides almost same +// interfaces compared to std::string, including data(), +// length() and so on. Besides, pstring data can be +// manipulated in GPU. + +class pstring { + PD_PString pstr_; + + public: + enum Type { + SMALL = PD_PSTR_SMALL, + LARGE = PD_PSTR_LARGE, + OFFSET = PD_PSTR_OFFSET, + VIEW = PD_PSTR_VIEW, + }; + + typedef const char* const_iterator; + + // Ctor + HOSTDEVICE pstring(); + HOSTDEVICE pstring(const std::string& str); // NOLINT + HOSTDEVICE pstring(const char* str, size_t len); + HOSTDEVICE pstring(const char* str); // NOLINT + HOSTDEVICE pstring(size_t n, char c); + + // Copy + HOSTDEVICE pstring(const pstring& str); + + // Move + HOSTDEVICE pstring(pstring&& str) noexcept; + + // Dtor + HOSTDEVICE ~pstring(); + + // Copy Assignment + HOSTDEVICE pstring& operator=(const pstring& str); + HOSTDEVICE pstring& operator=(const std::string& str); + HOSTDEVICE pstring& operator=(const char* str); + HOSTDEVICE pstring& operator=(char ch); + + // Move Assignment + HOSTDEVICE pstring& operator=(pstring&& str); + + // Comparison + HOSTDEVICE int compare(const char* str, size_t len) const; + HOSTDEVICE bool operator<(const pstring& o) const; + HOSTDEVICE bool operator>(const pstring& o) const; + HOSTDEVICE bool operator==(const char* str) const; + HOSTDEVICE bool operator==(const pstring& o) const; + HOSTDEVICE bool operator!=(const char* str) const; + HOSTDEVICE bool operator!=(const pstring& o) const; + + // Conversion Operators + HOSTDEVICE operator std::string() const; // NOLINT + + // Attributes + HOSTDEVICE size_t size() const; + HOSTDEVICE size_t length() const; + HOSTDEVICE size_t capacity() const; + HOSTDEVICE bool empty() const; + HOSTDEVICE Type type() const; + + // Allocation + HOSTDEVICE void resize(size_t new_size, char c = 0); + // Similar to resize, but will leave the newly grown region uninitialized. + HOSTDEVICE void resize_uninitialized(size_t new_size); + HOSTDEVICE void clear() noexcept; + HOSTDEVICE void reserve(size_t n); + + // Iterators + HOSTDEVICE const_iterator begin() const; + HOSTDEVICE const_iterator end() const; + + // Const Element Access + HOSTDEVICE const char* c_str() const; + HOSTDEVICE const char* data() const; + HOSTDEVICE const char& operator[](size_t i) const; + HOSTDEVICE const char& back() const; + + // Mutable Element Access + HOSTDEVICE char* mdata(); + HOSTDEVICE char& operator[](size_t i); + + // Assignment + HOSTDEVICE pstring& assign(const char* str, size_t len); + HOSTDEVICE pstring& assign(const char* str); + + // View Assignment + HOSTDEVICE pstring& assign_as_view(const pstring& str); + HOSTDEVICE pstring& assign_as_view(const std::string& str); + HOSTDEVICE pstring& assign_as_view(const char* str, size_t len); + HOSTDEVICE pstring& assign_as_view(const char* str); + + // Modifiers + // NOTE: Invalid input will result in undefined behavior. + HOSTDEVICE pstring& append(const pstring& str); + HOSTDEVICE pstring& append(const char* str, size_t len); + HOSTDEVICE pstring& append(const char* str); + HOSTDEVICE pstring& append(size_t n, char c); + + HOSTDEVICE pstring& erase(size_t pos, size_t len); + + HOSTDEVICE pstring& insert(size_t pos, + const pstring& str, + size_t subpos, + size_t sublen); + HOSTDEVICE pstring& insert(size_t pos, size_t n, char c); + HOSTDEVICE void swap(pstring& str); + HOSTDEVICE void push_back(char ch); + + // Friends + HOSTDEVICE friend bool operator==(const char* a, const pstring& b); + HOSTDEVICE friend bool operator==(const std::string& a, const pstring& b); + HOSTDEVICE friend pstring operator+(const pstring& a, const pstring& b); + HOSTDEVICE friend std::ostream& operator<<(std::ostream& o, + const pstring& str); +}; + +// Non-member function overloads + +HOSTDEVICE bool operator==(const char* a, const pstring& b); +HOSTDEVICE bool operator==(const std::string& a, const pstring& b); +HOSTDEVICE pstring operator+(const pstring& a, const pstring& b); +HOSTDEVICE std::ostream& operator<<(std::ostream& o, const pstring& str); +HOSTDEVICE size_t strlen(const char* start); + +// Implementations + +// Ctor + +HOSTDEVICE inline pstring::pstring() { PD_PString_Init(&pstr_); } + +HOSTDEVICE inline pstring::pstring(const char* str, size_t len) { + PD_PString_Init(&pstr_); + PD_PString_Copy(&pstr_, str, len); +} + +HOSTDEVICE inline pstring::pstring(const char* str) + : pstring(str, strlen(str)) {} + +HOSTDEVICE inline pstring::pstring(size_t n, char c) { + PD_PString_Init(&pstr_); + PD_PString_Resize(&pstr_, n, c); +} + +HOSTDEVICE inline pstring::pstring(const std::string& str) + : pstring(str.data(), str.size()) {} + +HOSTDEVICE inline pstring::pstring(const pstring& str) { + PD_PString_Init(&pstr_); + PD_PString_Assign(&pstr_, &str.pstr_); +} + +// Move + +HOSTDEVICE inline pstring::pstring(pstring&& str) noexcept { + PD_PString_Init(&pstr_); + PD_PString_Move(&pstr_, &str.pstr_); +} + +// Dtor + +HOSTDEVICE inline pstring::~pstring() { PD_PString_Dealloc(&pstr_); } + +// Copy Assignment + +HOSTDEVICE inline pstring& pstring::operator=(const pstring& str) { + PD_PString_Assign(&pstr_, &str.pstr_); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(const std::string& str) { + PD_PString_Copy(&pstr_, str.data(), str.size()); + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(const char* str) { + PD_PString_Copy(&pstr_, str, strlen(str)); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(char c) { + resize_uninitialized(1); + (*this)[0] = c; + + return *this; +} + +// Move Assignment + +HOSTDEVICE inline pstring& pstring::operator=(pstring&& str) { + PD_PString_Move(&pstr_, &str.pstr_); + + return *this; +} + +// Comparison + +HOSTDEVICE inline int pstring::compare(const char* str, size_t len) const { + int ret = PD_Memcmp(data(), str, (len < size()) ? len : size()); + + if (ret < 0) return -1; + if (ret > 0) return +1; + + if (size() < len) return -1; + if (size() > len) return +1; + + return 0; +} + +HOSTDEVICE inline bool pstring::operator<(const pstring& o) const { + return compare(o.data(), o.size()) < 0; +} + +HOSTDEVICE inline bool pstring::operator>(const pstring& o) const { + return compare(o.data(), o.size()) > 0; +} + +HOSTDEVICE inline bool pstring::operator==(const char* str) const { + return strlen(str) == size() && PD_Memcmp(data(), str, size()) == 0; +} + +HOSTDEVICE inline bool pstring::operator==(const pstring& o) const { + return o.size() == size() && PD_Memcmp(data(), o.data(), size()) == 0; +} + +HOSTDEVICE inline bool pstring::operator!=(const char* str) const { + return !(*this == str); +} + +HOSTDEVICE inline bool pstring::operator!=(const pstring& o) const { + return !(*this == o); +} + +// Conversion Operators + +HOSTDEVICE inline pstring::operator std::string() const { + return std::string(data(), size()); +} + +// Attributes + +HOSTDEVICE inline size_t pstring::size() const { + return PD_PString_GetSize(&pstr_); +} + +HOSTDEVICE inline size_t pstring::length() const { return size(); } + +HOSTDEVICE inline size_t pstring::capacity() const { + return PD_PString_GetCapacity(&pstr_); +} + +HOSTDEVICE inline bool pstring::empty() const { return size() == 0; } + +HOSTDEVICE inline pstring::Type pstring::type() const { + return static_cast(PD_PString_GetType(&pstr_)); +} + +// Allocation + +HOSTDEVICE inline void pstring::resize(size_t new_size, char c) { + PD_PString_Resize(&pstr_, new_size, c); +} + +HOSTDEVICE inline void pstring::resize_uninitialized(size_t new_size) { + PD_PString_ResizeUninitialized(&pstr_, new_size); +} + +HOSTDEVICE inline void pstring::clear() noexcept { + PD_PString_ResizeUninitialized(&pstr_, 0); +} + +HOSTDEVICE inline void pstring::reserve(size_t n) { + PD_PString_Reserve(&pstr_, n); +} + +// Iterators + +HOSTDEVICE inline pstring::const_iterator pstring::begin() const { + return &(*this)[0]; +} +HOSTDEVICE inline pstring::const_iterator pstring::end() const { + return &(*this)[size()]; +} + +// Element Access + +HOSTDEVICE inline const char* pstring::c_str() const { return data(); } + +HOSTDEVICE inline const char* pstring::data() const { + return PD_PString_GetDataPointer(&pstr_); +} + +HOSTDEVICE inline const char& pstring::operator[](size_t i) const { + return data()[i]; +} + +HOSTDEVICE inline const char& pstring::back() const { + return (*this)[size() - 1]; +} + +HOSTDEVICE inline char* pstring::mdata() { + return PD_PString_GetMutableDataPointer(&pstr_); +} + +HOSTDEVICE inline char& pstring::operator[](size_t i) { return mdata()[i]; } + +// Assignment + +HOSTDEVICE inline pstring& pstring::assign(const char* str, size_t len) { + PD_PString_Copy(&pstr_, str, len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign(const char* str) { + assign(str, strlen(str)); + + return *this; +} + +// View Assignment + +HOSTDEVICE inline pstring& pstring::assign_as_view(const pstring& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const std::string& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const char* str, + size_t len) { + PD_PString_AssignView(&pstr_, str, len); + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const char* str) { + assign_as_view(str, strlen(str)); + + return *this; +} + +// Modifiers + +HOSTDEVICE inline pstring& pstring::append(const pstring& str) { + PD_PString_Append(&pstr_, &str.pstr_); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(const char* str, size_t len) { + PD_PString_AppendN(&pstr_, str, len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(const char* str) { + append(str, strlen(str)); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(size_t n, char c) { + // For append use cases, we want to ensure amortized growth. + const size_t new_size = size() + n; + PD_PString_ReserveAmortized(&pstr_, new_size); + resize(new_size, c); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::erase(size_t pos, size_t len) { + PD_Memmove(mdata() + pos, data() + pos + len, size() - len - pos); + + resize(size() - len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::insert(size_t pos, + const pstring& str, + size_t subpos, + size_t sublen) { + size_t orig_size = size(); + PD_PString_ResizeUninitialized(&pstr_, orig_size + sublen); + + PD_Memmove(mdata() + pos + sublen, data() + pos, orig_size - pos); + PD_Memmove(mdata() + pos, str.data() + subpos, sublen); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::insert(size_t pos, size_t n, char c) { + size_t size_ = size(); + PD_PString_ResizeUninitialized(&pstr_, size_ + n); + + PD_Memmove(mdata() + pos + n, data() + pos, size_ - pos); + PD_Memset(mdata() + pos, c, n); + + return *this; +} + +HOSTDEVICE inline void pstring::swap(pstring& str) { + std::swap(pstr_, str.pstr_); +} + +HOSTDEVICE inline void pstring::push_back(char ch) { append(1, ch); } + +// Friends + +HOSTDEVICE inline bool operator==(const char* a, const pstring& b) { + return strlen(a) == b.size() && PD_Memcmp(a, b.data(), b.size()) == 0; +} + +HOSTDEVICE inline bool operator==(const std::string& a, const pstring& b) { + return a.size() == b.size() && PD_Memcmp(a.data(), b.data(), b.size()) == 0; +} + +HOSTDEVICE inline pstring operator+(const pstring& a, const pstring& b) { + pstring r; + r.reserve(a.size() + b.size()); + r.append(a); + r.append(b); + + return r; +} + +HOSTDEVICE inline std::ostream& operator<<(std::ostream& o, + const pstring& str) { + return o.write(str.data(), str.size()); +} + +HOSTDEVICE inline size_t strlen(const char* start) { + const char* end = start; + for (; *end != '\0'; ++end) { + } + return end - start; +} + +} // namespace dtype +} // namespace phi diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index b4a6b54d0fe3a96eea831a439f1555e14c367fa7..b42b4388c2ce159a6aca61a27898d12d41f77d7a 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -19,6 +19,7 @@ cc_library(lod_utils SRCS lod_utils.cc DEPS phi_enforce) cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base) cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) +cc_library(string_tensor SRCS string_tensor.cc DEPS convert_utils tensor_meta tensor_base) cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc index b139eb99dd4846adb3f7ef3a27507a2ca4478e6d..6b486196a4b8a0bd9785e13fa05b7010e4494243 100644 --- a/paddle/phi/core/device_context.cc +++ b/paddle/phi/core/device_context.cc @@ -16,6 +16,7 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" +#include "paddle/phi/core/string_tensor.h" namespace phi { using DataType = paddle::experimental::DataType; @@ -167,6 +168,8 @@ struct DeviceContext::Impl { static_cast(tensor)->clear(); } else if (SelectedRows::classof(tensor)) { static_cast(tensor)->mutable_value()->clear(); + } else if (StringTensor::classof(tensor)) { + static_cast(tensor)->clear(); } else { PADDLE_THROW(errors::Unimplemented( "Only support DenseTensor and SelectedRows now.")); @@ -262,6 +265,7 @@ DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::bfloat16) DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::float16) DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64) DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128) +DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::pstring) #undef DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION diff --git a/paddle/phi/core/kernel_registry.h b/paddle/phi/core/kernel_registry.h index c3356eadcbd2156617a7a69324e7b440cc54b339..fac4b1e82792f559b31b182bd3072fe92db90d0b 100644 --- a/paddle/phi/core/kernel_registry.h +++ b/paddle/phi/core/kernel_registry.h @@ -211,6 +211,14 @@ struct KernelRegistrar { dtype == static_cast(DataType::UINT16)) { continue; } + // NOTE(zhoushunjie): Only the strings kernels can support pstring dtype + constexpr char strings_kernels_prefix[] = "strings_"; + if (dtype == static_cast(DataType::PSTRING) && + strncmp(kernel_name_cstr, + strings_kernels_prefix, + strlen(strings_kernels_prefix))) { + continue; + } ConstructKernel(reg_type, kernel_name_cstr, backend_cstr, diff --git a/paddle/phi/core/kernel_utils.h b/paddle/phi/core/kernel_utils.h index 2cc82772cf8aa09c2a67d5329dda5adfe01f21bb..642dc0b4c830e569232883f1bba2ef00f8acfb14 100644 --- a/paddle/phi/core/kernel_utils.h +++ b/paddle/phi/core/kernel_utils.h @@ -26,6 +26,7 @@ #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#include "paddle/phi/core/string_tensor.h" #include "paddle/phi/core/type_defs.h" namespace phi { @@ -232,6 +233,10 @@ struct KernelImpl { PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(SparseCsrTensor); PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(SparseCsrTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_INPUT(StringTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(StringTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(StringTensor); + /* Attribute Helpers */ PD_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(bool); @@ -266,6 +271,8 @@ struct KernelImpl { PD_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(SparseCsrTensor); PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(SparseCsrTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(StringTensor); + PD_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(StringTensor); /* End case */ template struct KernelCallHelper> { diff --git a/paddle/phi/core/meta_tensor.cc b/paddle/phi/core/meta_tensor.cc index bcbb1a4835b9d0397f6e85b7c44311bb9fe57209..04dfbf96031c2eb4fb021dbbd985f0df42a6fe34 100644 --- a/paddle/phi/core/meta_tensor.cc +++ b/paddle/phi/core/meta_tensor.cc @@ -17,6 +17,8 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" +#include "paddle/phi/core/string_tensor.h" +#include "paddle/phi/core/string_tensor_utils.h" #include "paddle/phi/core/tensor_utils.h" namespace phi { @@ -33,6 +35,9 @@ void MetaTensor::set_dims(const DDim& dims) { if (phi::DenseTensor::classof(tensor_)) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_))->dims = dims; + } else if (phi::StringTensor::classof(tensor_)) { + StringTensorUtils::GetMutableMeta(static_cast(tensor_)) + ->dims = dims; } else if (phi::SelectedRows::classof(tensor_)) { DenseTensorUtils::GetMutableMeta( static_cast(tensor_)->mutable_value()) @@ -47,6 +52,8 @@ void MetaTensor::set_dtype(DataType dtype) { if (phi::DenseTensor::classof(tensor_)) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_)) ->dtype = dtype; + } else if (phi::StringTensor::classof(tensor_)) { + // No need to set dtype } else if (phi::SelectedRows::classof(tensor_)) { DenseTensorUtils::GetMutableMeta( static_cast(tensor_)->mutable_value()) @@ -61,6 +68,8 @@ void MetaTensor::set_layout(DataLayout layout) { if (phi::DenseTensor::classof(tensor_)) { DenseTensorUtils::GetMutableMeta(static_cast(tensor_)) ->layout = layout; + } else if (phi::StringTensor::classof(tensor_)) { + // No need to set layout } else if (phi::SelectedRows::classof(tensor_)) { DenseTensorUtils::GetMutableMeta( static_cast(tensor_)->mutable_value()) diff --git a/paddle/phi/core/string_tensor.cc b/paddle/phi/core/string_tensor.cc new file mode 100644 index 0000000000000000000000000000000000000000..42f12b78204427d9103199f37500789c852cf5f6 --- /dev/null +++ b/paddle/phi/core/string_tensor.cc @@ -0,0 +1,164 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/string_tensor.h" + +namespace phi { + +StringTensor::StringTensor() { meta_.offset = 0; } + +StringTensor::StringTensor(Allocator* a, const StringTensorMeta& meta) + : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) { + init_holder(); +} + +StringTensor::StringTensor(Allocator* a, StringTensorMeta&& meta) + : meta_(std::move(meta)), holder_(a->Allocate(SizeOf(dtype()) * numel())) { + init_holder(); +} + +StringTensor::StringTensor(const std::shared_ptr& holder, + const StringTensorMeta& meta) + : meta_(meta), holder_(holder) {} + +StringTensor::StringTensor(const StringTensor& other) : meta_(other.meta()) { + holder_ = other.holder_; +} + +StringTensor& StringTensor::operator=(const StringTensor& other) { + meta_ = other.meta(); + holder_ = other.holder_; + return *this; +} + +StringTensor& StringTensor::operator=(StringTensor&& other) { + meta_ = std::move(other.meta_); + std::swap(holder_, other.holder_); + return *this; +} + +int64_t StringTensor::numel() const { + if (meta_.is_scalar) { + return 1; + } + return product(meta_.dims); +} + +bool StringTensor::IsSharedWith(const StringTensor& b) const { + return holder_ && holder_ == b.holder_; +} + +const Place& StringTensor::place() const { + PADDLE_ENFORCE_NOT_NULL( + holder_, + phi::errors::PreconditionNotMet( + "Tensor not initialized yet when DenseTensor::place() is called.")); + return holder_->place(); +} + +const dtype::pstring* StringTensor::data() const { + PADDLE_ENFORCE_NOT_NULL( + holder_, + phi::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return reinterpret_cast( + reinterpret_cast(holder_->ptr()) + meta_.offset); +} + +dtype::pstring* StringTensor::data() { + PADDLE_ENFORCE_NOT_NULL( + holder_, + phi::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return reinterpret_cast( + reinterpret_cast(holder_->ptr()) + meta_.offset); +} + +void StringTensor::set_meta(const StringTensorMeta& meta) { + PADDLE_ENFORCE( + meta.valid(), + phi::errors::InvalidArgument( + "Input meta is invalid, please check the meta attribute.")); + meta_.dims = meta.dims; + meta_.is_scalar = meta.is_scalar; + meta_.offset = meta.offset; +} + +StringTensor& StringTensor::Resize(const DDim& dims) { + meta_.dims = dims; + return *this; +} +// TODO(zhoushunjie): need to remove it for general space +void StringTensor::init_holder() { + void* ptr = holder_->ptr(); + auto& place = holder_->place(); + auto bytes_size = holder_->size(); + VLOG(6) << "Init StringTensor data with bytes:" << bytes_size; + if (place.GetType() == phi::AllocationType::CPU) { + std::memset(ptr, 0, bytes_size); + } else if (place.GetType() == phi::AllocationType::GPU) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#ifdef PADDLE_WITH_HIP + hipMemset(ptr, 0, bytes_size); +#else + cudaMemset(ptr, 0, bytes_size); +#endif +#endif + } else { + // TODO(zhoushunjie): Need to support more places + PADDLE_THROW( + errors::Unimplemented("StringTensor can only be created in CPU or GPU " + "place. But now attemps to " + "create StringTensor on %s", + place.DebugString())); + } +} + +void* StringTensor::AllocateFrom(Allocator* allocator, + DataType dtype, + size_t requested_size) { + PADDLE_ENFORCE_NOT_NULL( + allocator, + errors::InvalidArgument( + "Required allocator shall not be nullptr, but received nullptr.")); + PADDLE_ENFORCE( + valid(), + errors::PreconditionNotMet( + "The meta data must be valid when call the mutable data function.")); + size_t bytes = numel() * SizeOf(this->dtype()); + if (requested_size) { + PADDLE_ENFORCE_GE(requested_size, + bytes, + errors::InvalidArgument( + "The reserved size %d should be enough to meet the " + "volume required by metadata %d.", + requested_size, + bytes)); + bytes = requested_size; + } + + if (!holder_ || holder_->size() < bytes + meta_.offset) { + meta_.offset = 0; + VLOG(10) << "Allocate string data with bytes: " << bytes; + holder_.reset(); + holder_ = allocator->Allocate(bytes); + // Initialize the allocated bytes + init_holder(); + meta_.offset = 0; + } + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + meta_.offset); +} + +} // namespace phi diff --git a/paddle/phi/core/string_tensor.h b/paddle/phi/core/string_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..223ecaca58143bb3dce5c97d0d361e4182b24938 --- /dev/null +++ b/paddle/phi/core/string_tensor.h @@ -0,0 +1,135 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/storage.h" +#include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace phi { + +/// \brief In Paddle 2.3, we add a new type of Tensor, StringTensor, +/// which is designed for string data management. +/// During the entire life cycle of a StringTensor, its device type and key +/// metadata are set unchanged. +class StringTensorUtils; + +class StringTensor : public TensorBase, + public TypeInfoTraits { + public: + /// \brief Construct a string tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of string tensor. + StringTensor(Allocator* a, const StringTensorMeta& meta); + + /// \brief Construct a string tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of string tensor. + StringTensor(Allocator* a, StringTensorMeta&& meta); + + StringTensor(const std::shared_ptr& holder, + const StringTensorMeta& meta); + + /// \brief Because string tensor is a resource handle, we provide a default + /// move constructor to support move semantics. + StringTensor(StringTensor&& other) = default; + + StringTensor(const StringTensor& other); + + StringTensor(); + /// \brief StringTensor shallow copy assignment. + StringTensor& operator=(const StringTensor& other); + + StringTensor& operator=(StringTensor&& other); + /// \brief Destroy the tensor object and release exclusive resources. + virtual ~StringTensor() = default; + + public: + /// \brief Returns the name of the class for type traits. + /// \return The name of the class. + static const char* name() { return "StringTensor"; } + + /// \brief Returns the number of elements contained in tensor. + /// \return The number of elements contained in tensor. + int64_t numel() const override; + + /// \brief Returns the dims of the tensor. + /// \return The dims of the tensor. + const DDim& dims() const noexcept override { return meta_.dims; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const override; + + /// \brief Returns the meta information of the tensor. + /// \return The meta information of the tensor. + const StringTensorMeta& meta() const noexcept { return meta_; } + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType dtype() const noexcept override { return DataType::PSTRING; } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const noexcept override { + return DataLayout::PSTRING_UNION; + } + + void set_meta(const StringTensorMeta& meta); + + /// \brief Test whether the metadata is valid. + /// \return Whether the metadata is valid. + bool valid() const noexcept override { return meta_.valid(); } + + /// \brief Test whether the storage is allocated. + /// return Whether the storage is allocated. + bool initialized() const override { return holder_ && holder_->ptr(); } + + /// \brief Check if storage is shared with other objects. + /// \return Whether the storage is shared with other objects. + bool IsSharedWith(const StringTensor& b) const; + + StringTensor& Resize(const DDim& dims); + + /// \brief Returns the actual storage size occupied by tensor, may be larger + /// than its shape dims. + /// \return The actual storage size occupied by tensor. + size_t capacity() const { return holder_->size(); } + + /// \brief Get the const data pointer value of pstring type. + /// \return The const data pointer value of pstring type. + const dtype::pstring* data() const; + dtype::pstring* data(); + + void clear() { + holder_.reset(); + meta_.offset = 0; + } + void* AllocateFrom(Allocator* allocator, + DataType dtype, + size_t requested_size = 0); + + private: + friend class StringTensorUtils; + + private: + StringTensorMeta meta_; + std::shared_ptr holder_; + void init_holder(); +}; + +} // namespace phi diff --git a/paddle/phi/core/string_tensor_utils.h b/paddle/phi/core/string_tensor_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..c1b0d09647d91c0529e0db952937d5585be9e9d9 --- /dev/null +++ b/paddle/phi/core/string_tensor_utils.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/string_tensor.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace phi { +class StringTensorUtils { + public: + static StringTensorMeta* GetMutableMeta(StringTensor* tensor) { + return &(tensor->meta_); + } +}; + +} // namespace phi diff --git a/paddle/phi/core/tensor_meta.cc b/paddle/phi/core/tensor_meta.cc index e9bb24600a0e8fc0880305c07d7873fe3f7f1d88..0140ec23937daeab1aeba5b25e4a70f25771fc52 100644 --- a/paddle/phi/core/tensor_meta.cc +++ b/paddle/phi/core/tensor_meta.cc @@ -40,4 +40,12 @@ bool DenseTensorMeta::valid() const noexcept { return valid; } +StringTensorMeta::StringTensorMeta(const DDim& dims) : dims(dims) {} + +bool StringTensorMeta::valid() const noexcept { + bool valid{true}; + valid = valid && (is_scalar || product(dims) >= 0); + return valid; +} + } // namespace phi diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index f4bd0be0b45b867b8ed98a5c50d2e3f58ea49780..4ad5abc77274cb1207b360c3dadf2de69ab2dbba 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -64,4 +64,24 @@ inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) { (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset); } +struct StringTensorMeta { + StringTensorMeta() = default; + explicit StringTensorMeta(const DDim& dims); + /// \brief Test whether the metadata is valid. Does not throw exceptions. + /// \return Whether the metadata is valid. + bool valid() const noexcept; + + /// During the entire life cycle of a DenseTensor, the following attributes + /// marked with `const` are expected to remain unchanged. + bool is_scalar{false}; + DDim dims; + size_t offset{0}; +}; + +inline bool operator==(const StringTensorMeta& lhs, + const StringTensorMeta& rhs) { + return (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) && + (lhs.offset == rhs.offset); +} + } // namespace phi diff --git a/paddle/phi/infermeta/CMakeLists.txt b/paddle/phi/infermeta/CMakeLists.txt index f7102629d213c08ecb3da1dfdd974e3354105e61..1a19fd003222db651f36f05d41d8a21f661bc023 100644 --- a/paddle/phi/infermeta/CMakeLists.txt +++ b/paddle/phi/infermeta/CMakeLists.txt @@ -1,2 +1,3 @@ cc_library(infermeta SRCS nullary.cc unary.cc binary.cc ternary.cc multiary.cc DEPS convert_utils meta_tensor infermeta_utils) cc_library(backward_infermeta SRCS backward.cc DEPS meta_tensor convert_utils) +add_subdirectory(strings) diff --git a/paddle/phi/infermeta/strings/CMakeLists.txt b/paddle/phi/infermeta/strings/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e1a947728f51e5783f83254711a5f3b601fa971 --- /dev/null +++ b/paddle/phi/infermeta/strings/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(string_infermeta SRCS nullary.cc unary.cc DEPS convert_utils infermeta_utils) diff --git a/paddle/phi/infermeta/strings/nullary.cc b/paddle/phi/infermeta/strings/nullary.cc new file mode 100644 index 0000000000000000000000000000000000000000..807a5a9bf80a01a0831819f7e78b487ec9bfc541 --- /dev/null +++ b/paddle/phi/infermeta/strings/nullary.cc @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/phi/infermeta/strings/nullary.h" + +namespace phi { +namespace strings { + +void CreateInferMeta(const ScalarArray& shape, MetaTensor* out) { + const auto& out_dims = phi::make_ddim(shape.GetData()); + out->set_dims(out_dims); + out->set_dtype(DataType::PSTRING); + out->set_layout(DataLayout::PSTRING_UNION); +} + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/infermeta/strings/nullary.h b/paddle/phi/infermeta/strings/nullary.h new file mode 100644 index 0000000000000000000000000000000000000000..513792ffff37d8c166d8cf0d2eb822423e4e6711 --- /dev/null +++ b/paddle/phi/infermeta/strings/nullary.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace phi { +namespace strings { + +void CreateInferMeta(const std::vector& shape, MetaTensor* out); +void CreateInferMeta(const ScalarArray& shape, MetaTensor* out); + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/infermeta/strings/unary.cc b/paddle/phi/infermeta/strings/unary.cc new file mode 100644 index 0000000000000000000000000000000000000000..c4c1aa5c990eb5afd2a2ceea35944e2fe7d82c80 --- /dev/null +++ b/paddle/phi/infermeta/strings/unary.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/infermeta/strings/unary.h" + +#include "paddle/phi/core/infermeta_utils.h" + +namespace phi { +namespace strings { + +void UnchangedInferMeta(const StringTensorMeta& x_meta, MetaTensor* out) { + out->set_dims(x_meta.dims); + out->set_dtype(DataType::PSTRING); + out->set_layout(DataLayout::PSTRING_UNION); +} + +void CreateLikeInferMeta(const MetaTensor& x, MetaTensor* out) { + out->set_dims(x.dims()); + out->set_dtype(x.dtype()); + out->set_layout(x.layout()); +} + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/infermeta/strings/unary.h b/paddle/phi/infermeta/strings/unary.h new file mode 100644 index 0000000000000000000000000000000000000000..fe942db6c9f3adb29790e117bcce366be2292b59 --- /dev/null +++ b/paddle/phi/infermeta/strings/unary.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +// See Note [ Why still include the fluid headers? ] +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/core/infermeta_utils.h" +#include "paddle/phi/core/meta_tensor.h" +#include "paddle/phi/core/tensor_meta.h" + +namespace phi { +namespace strings { +// Common InferMeta Functions of StringTensor for unary operators: +void UnchangedInferMeta(const StringTensorMeta& x_meta, MetaTensor* out); + +void CreateLikeInferMeta(const MetaTensor& x, MetaTensor* out); + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index c752387ee35ba47192ef871398ed416164e1bc76..fc3529f1a44f157edbc12c1b27c1f0e6a4f632af 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -68,6 +68,8 @@ add_subdirectory(sparse) add_subdirectory(selected_rows) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) +# For strings kernels +add_subdirectory(strings) # 5. kernel autotune add_subdirectory(autotune) diff --git a/paddle/phi/kernels/strings/CMakeLists.txt b/paddle/phi/kernels/strings/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..54eeeb290e1bc5dd90fcb47601df47a9be903fc2 --- /dev/null +++ b/paddle/phi/kernels/strings/CMakeLists.txt @@ -0,0 +1,16 @@ +add_subdirectory(cpu) +if(WITH_GPU OR WITH_ROCM) + add_subdirectory(gpu) +endif() + +cc_library(unicode SRCS unicode.cc DEPS utf8proc) +set_property(GLOBAL PROPERTY STRING_KERNELS "") + +set(STRING_KERNEL_DEPS dense_tensor string_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} eigen_function blas math_function) +# remove this dep after removing fluid deps on tensor creation +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} phi_api_utils) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} string_infermeta) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} unicode) + +register_kernels(DEPS ${STRING_KERNEL_DEPS} SUB_DIR "strings") diff --git a/paddle/phi/kernels/strings/case_utils.h b/paddle/phi/kernels/strings/case_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..2c30102a5a60701b2d300ab2b98d018e603c0074 --- /dev/null +++ b/paddle/phi/kernels/strings/case_utils.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/kernels/strings/unicode.h" +#if defined(__NVCC__) || defined(__HIPCC__) +#include +#include +#include "paddle/phi/backends/gpu/gpu_context.h" +#endif + +namespace phi { +namespace strings { + +using pstring = dtype::pstring; +struct AsciiToLower { + HOSTDEVICE char operator()(char in) const { + return ('A' <= in && in <= 'Z') ? in - ('Z' - 'z') : in; + } +}; + +struct AsciiToUpper { + HOSTDEVICE char operator()(char in) const { + return ('a' <= in && in <= 'z') ? in ^ 0x20 : in; + } +}; + +template +struct UTF8ToLower { + HOSTDEVICE UTF8ToLower(const uint8_t* unicode_flag_map, + const uint16_t* cases_map) + : unicode_flag_map_(unicode_flag_map), cases_map_(cases_map) {} + + HOSTDEVICE uint32_t operator()(uint32_t in) const { + uint32_t flg = (in <= 0x00FFFF ? unicode_flag_map_[in] : 0); + return (strings::IsUpper(flg) ? cases_map_[in] : in); + } + + const uint8_t* unicode_flag_map_; + const uint16_t* cases_map_; +}; + +template +struct UTF8ToUpper { + HOSTDEVICE UTF8ToUpper(const uint8_t* unicode_flag_map, + const uint16_t* cases_map) + : unicode_flag_map_(unicode_flag_map), cases_map_(cases_map) {} + + HOSTDEVICE uint32_t operator()(uint32_t in) const { + uint32_t flg = (in <= 0x00FFFF ? unicode_flag_map_[in] : 0); + return (strings::IsLower(flg) ? cases_map_[in] : in); + } + + const uint8_t* unicode_flag_map_; + const uint16_t* cases_map_; +}; + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/kernels/strings/cpu/CMakeLists.txt b/paddle/phi/kernels/strings/cpu/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc b/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..156cea63f171cbc30127cccc3e16fc793d499b9a --- /dev/null +++ b/paddle/phi/kernels/strings/cpu/strings_copy_kernel.cc @@ -0,0 +1,57 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/strings/strings_copy_kernel.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +namespace strings { + +template +void Copy(const Context& dev_ctx, + const StringTensor& src, + bool blocking, + StringTensor* dst) { + auto* src_ptr = src.data(); + const auto& src_place = src.place(); + + VLOG(3) << "StringTensorCopy " << src.dims() << " from " << src.place() + << " to " << src_place; + + dst->Resize(src.dims()); + dtype::pstring* dst_ptr = dev_ctx.template Alloc(dst); + + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same string data async from " << src_place + << " to " << src_place; + return; + } + VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; + int64_t numel = src.numel(); + + if (src_place.GetType() == phi::AllocationType::CPU) { + for (int64_t i = 0; i < numel; ++i) { + dst_ptr[i] = src_ptr[i]; + } + } +} + +} // namespace strings +} // namespace phi + +PD_REGISTER_GENERAL_KERNEL(strings_copy, + CPU, + ALL_LAYOUT, + phi::strings::Copy, + pstring) {} diff --git a/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc b/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..9901496b2a6cd82c50ff387345aa65876ffb08f6 --- /dev/null +++ b/paddle/phi/kernels/strings/cpu/strings_lower_upper_kernel.cc @@ -0,0 +1,56 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/strings/strings_lower_upper_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/core/kernel_registry.h" + +using pstring = ::phi::dtype::pstring; + +namespace phi { +namespace strings { + +template +void StringLowerKernel(const ContextT& dev_ctx, + const StringTensor& x, + bool use_utf8_encoding, + StringTensor* out) { + StringCaseConvertKernel, + UTF8CaseConverter, + ContextT>()(dev_ctx, x, use_utf8_encoding, out); +} + +template +void StringUpperKernel(const ContextT& dev_ctx, + const StringTensor& x, + bool use_utf8_encoding, + StringTensor* out) { + StringCaseConvertKernel, + UTF8CaseConverter, + ContextT>()(dev_ctx, x, use_utf8_encoding, out); +} + +} // namespace strings +} // namespace phi + +PD_REGISTER_GENERAL_KERNEL(strings_lower, + CPU, + ALL_LAYOUT, + phi::strings::StringLowerKernel, + pstring) {} + +PD_REGISTER_GENERAL_KERNEL(strings_upper, + CPU, + ALL_LAYOUT, + phi::strings::StringUpperKernel, + pstring) {} diff --git a/paddle/phi/kernels/strings/gpu/CMakeLists.txt b/paddle/phi/kernels/strings/gpu/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddle/phi/kernels/strings/gpu/copy_utils.h b/paddle/phi/kernels/strings/gpu/copy_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..36cad026184242dd940df3ccbee4106af6c42d18 --- /dev/null +++ b/paddle/phi/kernels/strings/gpu/copy_utils.h @@ -0,0 +1,199 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/phi/backends/gpu/gpu_helper.h" +#include "paddle/phi/backends/gpu/gpu_info.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/string_tensor.h" + +namespace phi { +namespace strings { + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +__global__ void SerializeStringsData(const phi::dtype::pstring* src_str, + uint8_t* strings_data, + int32_t* strings_offset, + int64_t numel, + int32_t start_offset) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + strings_offset[0] = start_offset; + for (int64_t i = 1; i <= numel; ++i) { + strings_offset[i] = strings_offset[i - 1] + src_str[i - 1].length() + 1; + } + } + __syncthreads(); + CUDA_KERNEL_LOOP(i, numel) { + memcpy(strings_data + strings_offset[i], + src_str[i].data(), + src_str[i].length() + 1); + } +} + +__global__ void SumStringsLen(const phi::dtype::pstring* src_ptr, + int64_t numel, + int* num) { + extern __shared__ int counter[]; + int thread_counter = 0; + CUDA_KERNEL_LOOP(i, numel) { thread_counter += src_ptr[i].length() + 1; } + counter[threadIdx.x] = thread_counter; + __syncthreads(); + if (threadIdx.x == 0) { + int block_counter = 0; + for (int i = 0; i < blockDim.x; ++i) { + block_counter += counter[i]; + } + atomicAdd(num, block_counter); + } +} + +template +int GetAllStringsSize(const Context& dev_ctx, + const phi::dtype::pstring* src_ptr, + size_t numel) { + auto nums_meta = + phi::DenseTensorMeta(DataType::INT32, {1}, phi::DataLayout::NCHW); + DenseTensor nums_tensor = phi::Empty(dev_ctx, std::move(nums_meta)); + + int* nums_ptr = dev_ctx.template Alloc(&nums_tensor); + phi::backends::gpu::GpuMemsetAsync( + nums_ptr, 0, sizeof(int), dev_ctx.stream()); + + dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); + dim3 grid_size = + dim3((numel + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); + SumStringsLen<<>>(src_ptr, numel, nums_ptr); + int num = -1; +#ifdef PADDLE_WITH_HIP + phi::backends::gpu::GpuMemcpyAsync( + &num, nums_ptr, sizeof(int), hipMemcpyDeviceToHost, dev_ctx.stream()); +#else + phi::backends::gpu::GpuMemcpyAsync( + &num, nums_ptr, sizeof(int), cudaMemcpyDeviceToHost, dev_ctx.stream()); +#endif + return num; +} + +__global__ void DeserializeCUDAKernel(const char* strings_data, + const int* strings_offset, + phi::dtype::pstring* dst_str, + int numel) { + CUDA_KERNEL_LOOP(i, numel) { + // -1 not include '\0' + auto len = strings_offset[i + 1] - strings_offset[i] - 1; + dst_str[i] = phi::dtype::pstring(strings_data + strings_offset[i], len); + } +} +#endif + +template +void SerializeOnCPU(const Context& dev_ctx, + const StringTensor& src, + DenseTensor* dst) { + int64_t numel = src.numel(); + int64_t num = sizeof(int) * (numel + 1); + auto* src_str = src.data(); + for (int64_t i = 0; i < numel; ++i) { + num += src_str[i].length() + 1; + } + dst->Resize(phi::make_ddim({num})); + uint8_t* strings_data = dev_ctx.template HostAlloc(dst); + auto* strings_offset = reinterpret_cast(strings_data); + int start_offset = sizeof(int) * (numel + 1); + for (int64_t i = 0; i <= numel; ++i) { + if (i == 0) { + strings_offset[i] = start_offset; + } else { + strings_offset[i] = strings_offset[i - 1] + src_str[i - 1].length() + 1; + } + } + for (int64_t i = 0; i < numel; ++i) { + memcpy(strings_data + strings_offset[i], + src_str[i].data(), + src_str[i].length() + 1); + } +} + +template +void DeserializeOnCPU(const Context& dev_ctx, + const DenseTensor& src, + StringTensor* dst) { + auto* strings_data = reinterpret_cast(src.data()); + auto* strings_offset = reinterpret_cast(strings_data); + int numel = strings_offset[0] / sizeof(int) - 1; + dst->Resize(phi::make_ddim({numel})); + dtype::pstring* dst_str = dev_ctx.template HostAlloc(dst); + for (int i = 0; i < numel; ++i) { + // -1 not include '\0' + auto len = strings_offset[i + 1] - strings_offset[i] - 1; + dst_str[i] = phi::dtype::pstring(strings_data + strings_offset[i], len); + } +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +void SerializeOnGPU(const phi::GPUContext& dev_ctx, + const StringTensor& src, + DenseTensor* dst) { + int64_t numel = src.numel(); + auto* src_str = src.data(); + // 1.get the number of bytes of all strings in string tensor + auto strings_size = GetAllStringsSize(dev_ctx, src_str, numel); + strings_size += sizeof(int32_t) * (numel + 1); + + dst->Resize(phi::make_ddim({strings_size})); + uint8_t* strings_data = dev_ctx.template Alloc(dst); + auto* strings_offset = reinterpret_cast(strings_data); + + int32_t start_offset = sizeof(int32_t) * (numel + 1); + // 2. serialize strings data to dense tensor + dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); + dim3 grid_size = + dim3((numel + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); + + SerializeStringsData<<>>( + src_str, strings_data, strings_offset, numel, start_offset); +} + +void DeserializeOnGPU(const phi::GPUContext& dev_ctx, + const DenseTensor& src, + StringTensor* dst) { + auto* strings_data = reinterpret_cast(src.data()); + auto* strings_offset = reinterpret_cast(strings_data); + int numel = 0; +#ifdef PADDLE_WITH_HIP + phi::backends::gpu::GpuMemcpySync( + &numel, strings_data, sizeof(numel), hipMemcpyDeviceToHost); +#else + phi::backends::gpu::GpuMemcpySync( + &numel, strings_data, sizeof(numel), cudaMemcpyDeviceToHost); +#endif + numel = numel / sizeof(int) - 1; + dst->Resize(phi::make_ddim({numel})); + dtype::pstring* dst_str = dev_ctx.template Alloc(dst); + + dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); + dim3 grid_size = + dim3((numel + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); + DeserializeCUDAKernel<<>>( + strings_data, strings_offset, dst_str, numel); +} +#endif + +} // namespace strings +} // namespace phi diff --git a/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu b/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..5cb4d21ec9906c253d053cbd5984f47681e2377c --- /dev/null +++ b/paddle/phi/kernels/strings/gpu/strings_copy_kernel.cu @@ -0,0 +1,126 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/strings/strings_copy_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_helper.h" +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/empty_kernel.h" + +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/kernels/strings/gpu/copy_utils.h" + +using pstring = ::phi::dtype::pstring; + +namespace phi { +namespace strings { + +__global__ void CopyFromStringTensor(pstring* dst, + const pstring* src, + int64_t num) { + CUDA_KERNEL_LOOP(i, num) { dst[i] = src[i]; } +} + +template +void Copy(const Context& dev_ctx, + const StringTensor& src, + bool blocking, + StringTensor* dst) { + auto* src_ptr = src.data(); + const auto& src_place = src.place(); + auto dst_place = dst->place(); + + if (src_place == dst_place && + src_place.GetType() == phi::AllocationType::CPU) { + PADDLE_THROW( + phi::errors::InvalidArgument("The src and dst string tensor are all " + "CPU string tensor, you should call copy " + "function in CPU mode.")); + } + VLOG(3) << "StringTensorCopy " << src.dims() << " from " << src.place() + << " to " << dst_place; + + dst->Resize(src.dims()); + auto* dst_ptr = dev_ctx.template Alloc(dst); + + if (src_ptr == dst_ptr && src_place == dst_place) { + VLOG(3) << "Skip copy the same string data async from " << src_place + << " to " << dst_place; + return; + } + + VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; + + if (src_place.GetType() == phi::AllocationType::GPU && + dst_place.GetType() == phi::AllocationType::CPU) { + // Situation 1: gpu_place->cpu_place + DenseTensor gpu_serialized = phi::Empty(dev_ctx, {1}); + phi::strings::SerializeOnGPU(dev_ctx, src, &gpu_serialized); + + DenseTensor cpu_serialized; + cpu_serialized.Resize(gpu_serialized.dims()); + dev_ctx.template HostAlloc(&cpu_serialized); + + phi::Copy(dev_ctx, gpu_serialized, dst_place, false, &cpu_serialized); + + phi::strings::DeserializeOnCPU(dev_ctx, cpu_serialized, dst); + + } else if (src_place.GetType() == phi::AllocationType::CPU && + dst_place.GetType() == phi::AllocationType::GPU) { + // Situation 2: cpu_place->gpu_place + DenseTensor cpu_serialized; + cpu_serialized.Resize({1}); + dev_ctx.template HostAlloc(&cpu_serialized); + + phi::strings::SerializeOnCPU(dev_ctx, src, &cpu_serialized); + + DenseTensor gpu_serialized = + phi::EmptyLike(dev_ctx, cpu_serialized); + phi::Copy( + dev_ctx, cpu_serialized, dev_ctx.GetPlace(), false, &gpu_serialized); + + phi::strings::DeserializeOnGPU(dev_ctx, gpu_serialized, dst); + } else if (src_place.GetType() == phi::AllocationType::GPU && + dst_place.GetType() == phi::AllocationType::GPU) { + // Situation 3: gpu_place->gpu_place + auto src_gpu_place = src_place; + auto dst_gpu_place = dst_place; + auto ctx_place = dev_ctx.GetPlace(); + PADDLE_ENFORCE_EQ( + ctx_place.GetType(), + phi::AllocationType::GPU, + phi::errors::PreconditionNotMet( + "Context place error, excepted GPUPlace, but actually %s.", + ctx_place)); + int64_t numel = src.numel(); + dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); + dim3 grid_size = + dim3((numel + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); + // Copy + CopyFromStringTensor<<>>( + dst_ptr, src_ptr, numel); + } +} + +} // namespace strings +} // namespace phi + +PD_REGISTER_GENERAL_KERNEL(strings_copy, + GPU, + ALL_LAYOUT, + phi::strings::Copy, + pstring) {} diff --git a/paddle/phi/kernels/strings/gpu/strings_lower_upper_kernel.cu b/paddle/phi/kernels/strings/gpu/strings_lower_upper_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..53916def37bdad156482dbae3040a0a49aedac74 --- /dev/null +++ b/paddle/phi/kernels/strings/gpu/strings_lower_upper_kernel.cu @@ -0,0 +1,181 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/strings/strings_lower_upper_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/pstring.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/strings/unicode.h" + +using pstring = ::phi::dtype::pstring; +namespace phi { +namespace strings { + +template +__global__ void StringCaseConvertCUDAKernel(pstring* out, + const pstring* in, + size_t num) { + CUDA_KERNEL_LOOP(i, num) { + out[i] = pstring(in[i]); + thrust::transform(thrust::device, + in[i].begin(), + in[i].end(), + out[i].mdata(), + CharConverter()); + } +} + +template +struct AsciiCaseConverter { + void operator()(const phi::GPUContext& dev_ctx, + const pstring* in, + pstring* out, + size_t num) const { + dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); + dim3 grid_size = + dim3((num + PREDEFINED_BLOCK_SIZE - 1) / PREDEFINED_BLOCK_SIZE, 1); + StringCaseConvertCUDAKernel< + CharConverter><<>>( + out, in, num); + } +}; + +template