From 5d809ecef7eee46ca2363f6654162a48a542449e Mon Sep 17 00:00:00 2001 From: Wilfried Goesgens Date: Thu, 11 Oct 2018 23:22:42 -0700 Subject: [PATCH] Add compile time option to work with utf8 filename strings (#4469) Summary: The default behaviour of rocksdb is to use the `*A(` windows API functions. These accept filenames in the currently configured system encoding, be it Latin 1, utf8 or whatever. If the Application intends to completely work with utf8 strings internally, converting these to that codepage properly isn't even always possible. Thus this patch adds a switch to use the `*W(` functions, which accept UTF-16 filenames, and uses C++11 features to translate the UTF8 containing std::string to an UTF16 containing std::wstring. This feature is a compile time options, that can be enabled by setting `WITH_WINDOWS_UTF8_FILENAMES` to true. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4469 Differential Revision: D10356011 Pulled By: yiwu-arbug fbshipit-source-id: 27b6ae9171f209085894cdf80069e8a896642044 --- CMakeLists.txt | 4 +++ port/win/env_win.cc | 85 +++++++++++++++++++++++--------------------- port/win/port_win.cc | 39 ++++++++++++++++---- port/win/port_win.h | 51 ++++++++++++++++++++++++++ 4 files changed, 131 insertions(+), 48 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b1fdf913d..1ab252095 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,10 @@ option(WITH_SNAPPY "build with SNAPPY" OFF) option(WITH_LZ4 "build with lz4" OFF) option(WITH_ZLIB "build with zlib" OFF) option(WITH_ZSTD "build with zstd" OFF) +option(WITH_WINDOWS_UTF8_FILENAMES "use UTF8 as characterset for opening files, regardles of the system code page" OFF) +if (WITH_WINDOWS_UTF8_FILENAMES) + add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES) +endif() if(MSVC) # Defaults currently different for GFLAGS. # We will address find_package work a little later diff --git a/port/win/env_win.cc b/port/win/env_win.cc index 723a273f0..d30139067 100644 --- a/port/win/env_win.cc +++ b/port/win/env_win.cc @@ -102,7 +102,8 @@ WinEnvIO::~WinEnvIO() { Status WinEnvIO::DeleteFile(const std::string& fname) { Status result; - BOOL ret = DeleteFileA(fname.c_str()); + BOOL ret = RX_DeleteFile(RX_FN(fname).c_str()); + if(!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to delete: " + fname, @@ -114,7 +115,7 @@ Status WinEnvIO::DeleteFile(const std::string& fname) { Status WinEnvIO::Truncate(const std::string& fname, size_t size) { Status s; - int result = truncate(fname.c_str(), size); + int result = rocksdb::port::Truncate(fname, size); if (result != 0) { s = IOError("Failed to truncate: " + fname, errno); } @@ -151,8 +152,8 @@ Status WinEnvIO::NewSequentialFile(const std::string& fname, { IOSTATS_TIMER_GUARD(open_nanos); - hFile = CreateFileA( - fname.c_str(), GENERIC_READ, + hFile = RX_CreateFile( + RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Original fopen mode is "rb" fileFlags, NULL); @@ -190,7 +191,7 @@ Status WinEnvIO::NewRandomAccessFile(const std::string& fname, { IOSTATS_TIMER_GUARD(open_nanos); hFile = - CreateFileA(fname.c_str(), GENERIC_READ, + RX_CreateFile(RX_FN(fname).c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, fileFlags, NULL); } @@ -217,7 +218,7 @@ Status WinEnvIO::NewRandomAccessFile(const std::string& fname, "NewRandomAccessFile failed to map empty file: " + fname, EINVAL); } - HANDLE hMap = CreateFileMappingA(hFile, NULL, PAGE_READONLY, + HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, // Whole file at its present length 0, NULL); // Mapping name @@ -302,8 +303,8 @@ Status WinEnvIO::OpenWritableFile(const std::string& fname, HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); - hFile = CreateFileA( - fname.c_str(), + hFile = RX_CreateFile( + RX_FN(fname).c_str(), desired_access, // Access desired shared_mode, NULL, // Security attributes @@ -366,7 +367,7 @@ Status WinEnvIO::NewRandomRWFile(const std::string & fname, { IOSTATS_TIMER_GUARD(open_nanos); hFile = - CreateFileA(fname.c_str(), + RX_CreateFile(RX_FN(fname).c_str(), desired_access, shared_mode, NULL, // Security attributes @@ -399,8 +400,8 @@ Status WinEnvIO::NewMemoryMappedFileBuffer(const std::string & fname, HANDLE hFile = INVALID_HANDLE_VALUE; { IOSTATS_TIMER_GUARD(open_nanos); - hFile = CreateFileA( - fname.c_str(), GENERIC_READ | GENERIC_WRITE, + hFile = RX_CreateFile( + RX_FN(fname).c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, // Open only if it exists @@ -432,7 +433,7 @@ Status WinEnvIO::NewMemoryMappedFileBuffer(const std::string & fname, "The specified file size does not fit into 32-bit memory addressing: " + fname); } - HANDLE hMap = CreateFileMappingA(hFile, NULL, PAGE_READWRITE, + HANDLE hMap = RX_CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, // Whole file at its present length 0, NULL); // Mapping name @@ -483,7 +484,7 @@ Status WinEnvIO::NewDirectory(const std::string& name, // 0 - for access means read metadata { IOSTATS_TIMER_GUARD(open_nanos); - handle = ::CreateFileA(name.c_str(), 0, + handle = RX_CreateFile(RX_FN(name).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, @@ -509,8 +510,7 @@ Status WinEnvIO::FileExists(const std::string& fname) { // which is consistent with _access() impl on windows // but can be added WIN32_FILE_ATTRIBUTE_DATA attrs; - if (FALSE == GetFileAttributesExA(fname.c_str(), GetFileExInfoStandard, - &attrs)) { + if (FALSE == RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { auto lastError = GetLastError(); switch (lastError) { case ERROR_ACCESS_DENIED: @@ -535,11 +535,12 @@ Status WinEnvIO::GetChildren(const std::string& dir, result->clear(); std::vector output; - WIN32_FIND_DATA data; + RX_WIN32_FIND_DATA data; + memset(&data, 0, sizeof(data)); std::string pattern(dir); pattern.append("\\").append("*"); - HANDLE handle = ::FindFirstFileExA(pattern.c_str(), + HANDLE handle = RX_FindFirstFileEx(RX_FN(pattern).c_str(), FindExInfoBasic, // Do not want alternative name &data, FindExSearchNameMatch, @@ -572,8 +573,9 @@ Status WinEnvIO::GetChildren(const std::string& dir, data.cFileName[MAX_PATH - 1] = 0; while (true) { - output.emplace_back(data.cFileName); - BOOL ret =- ::FindNextFileA(handle, &data); + auto x = RX_FILESTRING(data.cFileName, RX_FNLEN(data.cFileName)); + output.emplace_back(FN_TO_RX(x)); + BOOL ret =- RX_FindNextFile(handle, &data); // If the function fails the return value is zero // and non-zero otherwise. Not TRUE or FALSE. if (ret == FALSE) { @@ -588,8 +590,7 @@ Status WinEnvIO::GetChildren(const std::string& dir, Status WinEnvIO::CreateDir(const std::string& name) { Status result; - - BOOL ret = CreateDirectoryA(name.c_str(), NULL); + BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError( @@ -606,7 +607,7 @@ Status WinEnvIO::CreateDirIfMissing(const std::string& name) { return result; } - BOOL ret = CreateDirectoryA(name.c_str(), NULL); + BOOL ret = RX_CreateDirectory(RX_FN(name).c_str(), NULL); if (!ret) { auto lastError = GetLastError(); if (lastError != ERROR_ALREADY_EXISTS) { @@ -622,7 +623,7 @@ Status WinEnvIO::CreateDirIfMissing(const std::string& name) { Status WinEnvIO::DeleteDir(const std::string& name) { Status result; - BOOL ret = RemoveDirectoryA(name.c_str()); + BOOL ret = RX_RemoveDirectory(RX_FN(name).c_str()); if (!ret) { auto lastError = GetLastError(); result = IOErrorFromWindowsError("Failed to remove dir: " + name, lastError); @@ -635,7 +636,7 @@ Status WinEnvIO::GetFileSize(const std::string& fname, Status s; WIN32_FILE_ATTRIBUTE_DATA attrs; - if (GetFileAttributesExA(fname.c_str(), GetFileExInfoStandard, &attrs)) { + if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { ULARGE_INTEGER file_size; file_size.HighPart = attrs.nFileSizeHigh; file_size.LowPart = attrs.nFileSizeLow; @@ -670,7 +671,7 @@ Status WinEnvIO::GetFileModificationTime(const std::string& fname, Status s; WIN32_FILE_ATTRIBUTE_DATA attrs; - if (GetFileAttributesExA(fname.c_str(), GetFileExInfoStandard, &attrs)) { + if (RX_GetFileAttributesEx(RX_FN(fname).c_str(), GetFileExInfoStandard, &attrs)) { *file_mtime = FileTimeToUnixTime(attrs.ftLastWriteTime); } else { auto lastError = GetLastError(); @@ -688,7 +689,7 @@ Status WinEnvIO::RenameFile(const std::string& src, // rename() is not capable of replacing the existing file as on Linux // so use OS API directly - if (!MoveFileExA(src.c_str(), target.c_str(), MOVEFILE_REPLACE_EXISTING)) { + if (!RX_MoveFileEx(RX_FN(src).c_str(), RX_FN(target).c_str(), MOVEFILE_REPLACE_EXISTING)) { DWORD lastError = GetLastError(); std::string text("Failed to rename: "); @@ -704,7 +705,7 @@ Status WinEnvIO::LinkFile(const std::string& src, const std::string& target) { Status result; - if (!CreateHardLinkA(target.c_str(), src.c_str(), NULL)) { + if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) { DWORD lastError = GetLastError(); if (lastError == ERROR_NOT_SAME_DEVICE) { return Status::NotSupported("No cross FS links allowed"); @@ -721,8 +722,9 @@ Status WinEnvIO::LinkFile(const std::string& src, Status WinEnvIO::NumFileLinks(const std::string& fname, uint64_t* count) { Status s; - HANDLE handle = ::CreateFileA( - fname.c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + HANDLE handle = RX_CreateFile( + RX_FN(fname).c_str(), 0, + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (INVALID_HANDLE_VALUE == handle) { @@ -758,7 +760,7 @@ Status WinEnvIO::AreFilesSame(const std::string& first, } // 0 - for access means read metadata - HANDLE file_1 = ::CreateFileA(first.c_str(), 0, + HANDLE file_1 = RX_CreateFile(RX_FN(first).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, @@ -773,7 +775,7 @@ Status WinEnvIO::AreFilesSame(const std::string& first, } UniqueCloseHandlePtr g_1(file_1, CloseHandleFunc); - HANDLE file_2 = ::CreateFileA(second.c_str(), 0, + HANDLE file_2 = RX_CreateFile(RX_FN(second).c_str(), 0, FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, // make opening folders possible @@ -835,7 +837,7 @@ Status WinEnvIO::LockFile(const std::string& lockFname, HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); - hFile = CreateFileA(lockFname.c_str(), (GENERIC_READ | GENERIC_WRITE), + hFile = RX_CreateFile(RX_FN(lockFname).c_str(), (GENERIC_READ | GENERIC_WRITE), ExclusiveAccessON, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); } @@ -898,8 +900,8 @@ Status WinEnvIO::NewLogger(const std::string& fname, HANDLE hFile = 0; { IOSTATS_TIMER_GUARD(open_nanos); - hFile = CreateFileA( - fname.c_str(), GENERIC_WRITE, + hFile = RX_CreateFile( + RX_FN(fname).c_str(), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_DELETE, // In RocksDb log files are // renamed and deleted before // they are closed. This enables @@ -992,17 +994,17 @@ Status WinEnvIO::GetAbsolutePath(const std::string& db_path, // For test compatibility we will consider starting slash as an // absolute path if ((!db_path.empty() && (db_path[0] == '\\' || db_path[0] == '/')) || - !PathIsRelativeA(db_path.c_str())) { + !RX_PathIsRelative(RX_FN(db_path).c_str())) { *output_path = db_path; return Status::OK(); } - std::string result; + RX_FILESTRING result; result.resize(MAX_PATH); // Hopefully no changes the current directory while we do this // however _getcwd also suffers from the same limitation - DWORD len = GetCurrentDirectoryA(MAX_PATH, &result[0]); + DWORD len = RX_GetCurrentDirectory(MAX_PATH, &result[0]); if (len == 0) { auto lastError = GetLastError(); return IOErrorFromWindowsError("Failed to get current working directory", @@ -1010,8 +1012,9 @@ Status WinEnvIO::GetAbsolutePath(const std::string& db_path, } result.resize(len); - - result.swap(*output_path); + std::string res = FN_TO_RX(result); + + res.swap(*output_path); return Status::OK(); } @@ -1076,7 +1079,7 @@ EnvOptions WinEnvIO::OptimizeForManifestRead( // Returns true iff the named directory exists and is a directory. bool WinEnvIO::DirExists(const std::string& dname) { WIN32_FILE_ATTRIBUTE_DATA attrs; - if (GetFileAttributesExA(dname.c_str(), GetFileExInfoStandard, &attrs)) { + if (RX_GetFileAttributesEx(RX_FN(dname).c_str(), GetFileExInfoStandard, &attrs)) { return 0 != (attrs.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); } return false; @@ -1085,7 +1088,7 @@ bool WinEnvIO::DirExists(const std::string& dname) { size_t WinEnvIO::GetSectorSize(const std::string& fname) { size_t sector_size = kSectorSize; - if (PathIsRelativeA(fname.c_str())) { + if (RX_PathIsRelative(RX_FN(fname).c_str())) { return sector_size; } diff --git a/port/win/port_win.cc b/port/win/port_win.cc index 75b4ec6de..6ca5bba3b 100644 --- a/port/win/port_win.cc +++ b/port/win/port_win.cc @@ -26,11 +26,30 @@ #include #include +#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES +// utf8 <-> utf16 +#include +#include +#include +#endif + #include "util/logging.h" namespace rocksdb { namespace port { +#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES +std::string utf16_to_utf8(const std::wstring& utf16) { + std::wstring_convert,wchar_t> convert; + return convert.to_bytes(utf16); +} + +std::wstring utf8_to_utf16(const std::string& utf8) { + std::wstring_convert> converter; + return converter.from_bytes(utf8); +} +#endif + void gettimeofday(struct timeval* tv, struct timezone* /* tz */) { using namespace std::chrono; @@ -110,7 +129,7 @@ void InitOnce(OnceType* once, void (*initializer)()) { struct DIR { HANDLE handle_; bool firstread_; - WIN32_FIND_DATA data_; + RX_WIN32_FIND_DATA data_; dirent entry_; DIR() : handle_(INVALID_HANDLE_VALUE), @@ -137,7 +156,7 @@ DIR* opendir(const char* name) { std::unique_ptr dir(new DIR); - dir->handle_ = ::FindFirstFileExA(pattern.c_str(), + dir->handle_ = RX_FindFirstFileEx(RX_FN(pattern).c_str(), FindExInfoBasic, // Do not want alternative name &dir->data_, FindExSearchNameMatch, @@ -148,8 +167,9 @@ DIR* opendir(const char* name) { return nullptr; } + RX_FILESTRING x(dir->data_.cFileName, RX_FNLEN(dir->data_.cFileName)); strcpy_s(dir->entry_.d_name, sizeof(dir->entry_.d_name), - dir->data_.cFileName); + FN_TO_RX(x).c_str()); return dir.release(); } @@ -165,14 +185,15 @@ struct dirent* readdir(DIR* dirp) { return &dirp->entry_; } - auto ret = ::FindNextFileA(dirp->handle_, &dirp->data_); + auto ret = RX_FindNextFile(dirp->handle_, &dirp->data_); if (ret == 0) { return nullptr; } + RX_FILESTRING x(dirp->data_.cFileName, RX_FNLEN(dirp->data_.cFileName)); strcpy_s(dirp->entry_.d_name, sizeof(dirp->entry_.d_name), - dirp->data_.cFileName); + FN_TO_RX(x).c_str()); return &dirp->entry_; } @@ -182,11 +203,15 @@ int closedir(DIR* dirp) { return 0; } -int truncate(const char* path, int64_t len) { +int truncate(const char* path, int64_t length) { if (path == nullptr) { errno = EFAULT; return -1; } + return rocksdb::port::Truncate(path, length); +} + +int Truncate(std::string path, int64_t len) { if (len < 0) { errno = EINVAL; @@ -194,7 +219,7 @@ int truncate(const char* path, int64_t len) { } HANDLE hFile = - CreateFile(path, GENERIC_READ | GENERIC_WRITE, + RX_CreateFile(RX_FN(path).c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, // Security attrs OPEN_EXISTING, // Truncate existing file only diff --git a/port/win/port_win.h b/port/win/port_win.h index 41ccea68d..9b8ba9ff8 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -327,11 +327,62 @@ inline void* pthread_getspecific(pthread_key_t key) { // using C-runtime to implement. Note, this does not // feel space with zeros in case the file is extended. int truncate(const char* path, int64_t length); +int Truncate(std::string path, int64_t length); void Crash(const std::string& srcfile, int srcline); extern int GetMaxOpenFiles(); +std::string utf16_to_utf8(const std::wstring& utf16); +std::wstring utf8_to_utf16(const std::string& utf8); } // namespace port + +#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES + +#define RX_FILESTRING std::wstring +#define RX_FN(a) rocksdb::port::utf8_to_utf16(a) +#define FN_TO_RX(a) rocksdb::port::utf16_to_utf8(a) +#define RX_FNLEN(a) ::wcslen(a) + +#define RX_DeleteFile DeleteFileW +#define RX_CreateFile CreateFileW +#define RX_CreateFileMapping CreateFileMappingW +#define RX_GetFileAttributesEx GetFileAttributesExW +#define RX_FindFirstFileEx FindFirstFileExW +#define RX_FindNextFile FindNextFileW +#define RX_WIN32_FIND_DATA WIN32_FIND_DATAW +#define RX_CreateDirectory CreateDirectoryW +#define RX_RemoveDirectory RemoveDirectoryW +#define RX_GetFileAttributesEx GetFileAttributesExW +#define RX_MoveFileEx MoveFileExW +#define RX_CreateHardLink CreateHardLinkW +#define RX_PathIsRelative PathIsRelativeW +#define RX_GetCurrentDirectory GetCurrentDirectoryW + +#else + +#define RX_FILESTRING std::string +#define RX_FN(a) a +#define FN_TO_RX(a) a +#define RX_FNLEN(a) strlen(a) + +#define RX_DeleteFile DeleteFileA +#define RX_CreateFile CreateFileA +#define RX_CreateFileMapping CreateFileMappingA +#define RX_GetFileAttributesEx GetFileAttributesExA +#define RX_FindFirstFileEx FindFirstFileExA +#define RX_CreateDirectory CreateDirectoryA +#define RX_FindNextFile FindNextFileA +#define RX_WIN32_FIND_DATA WIN32_FIND_DATA +#define RX_CreateDirectory CreateDirectoryA +#define RX_RemoveDirectory RemoveDirectoryA +#define RX_GetFileAttributesEx GetFileAttributesExA +#define RX_MoveFileEx MoveFileExA +#define RX_CreateHardLink CreateHardLinkA +#define RX_PathIsRelative PathIsRelativeA +#define RX_GetCurrentDirectory GetCurrentDirectoryA + +#endif + using port::pthread_key_t; using port::pthread_key_create; using port::pthread_key_delete; -- GitLab