diff --git a/HISTORY.md b/HISTORY.md index b19eec2011043b3e4205af62aaa6e820401db43e..403f06ea9ecd984160a44064dd83c78400ad0e14 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,10 @@ # Rocksdb Change Log +## Unreleased + +### New Features +* Added an experimental API for handling flashcache devices (blacklists background threads from caching their reads) -- NewFlashcacheAwareEnv + ## 3.10.0 (3/24/2015) ### New Features * GetThreadStatus() is now able to report detailed thread status, including: diff --git a/db/db_bench.cc b/db/db_bench.cc index 802d96467a0c53d42a7a2a6e6aeba2555c06f721..b39cc40dea5880c89f1f14e8396b398c00108edb 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -42,6 +42,7 @@ int main() { #include "rocksdb/filter_policy.h" #include "rocksdb/slice_transform.h" #include "rocksdb/perf_context.h" +#include "rocksdb/utilities/flashcache.h" #include "port/port.h" #include "port/stack_trace.h" #include "util/crc32c.h" @@ -531,6 +532,11 @@ DEFINE_string(compaction_fadvice, "NORMAL", static auto FLAGS_compaction_fadvice_e = rocksdb::Options().access_hint_on_compaction_start; +DEFINE_bool(disable_flashcache_for_background_threads, false, + "Disable flashcache for background threads"); + +DEFINE_string(flashcache_dev, "", "Path to flashcache device"); + DEFINE_bool(use_tailing_iterator, false, "Use tailing iterator to access a series of keys instead of get"); DEFINE_int64(iter_refresh_interval_us, -1, @@ -1680,6 +1686,8 @@ class Benchmark { } private: + std::unique_ptr flashcache_aware_env_; + struct ThreadArg { Benchmark* bm; SharedState* shared; @@ -1992,7 +2000,18 @@ class Benchmark { FLAGS_env->LowerThreadPoolIOPriority(Env::LOW); FLAGS_env->LowerThreadPoolIOPriority(Env::HIGH); } - options.env = FLAGS_env; + if (FLAGS_disable_flashcache_for_background_threads) { + flashcache_aware_env_ = + std::move(NewFlashcacheAwareEnv(FLAGS_env, FLAGS_flashcache_dev)); + if (flashcache_aware_env_.get() == nullptr) { + fprintf(stderr, "Failed to open flashcahce device at %s\n", + FLAGS_flashcache_dev.c_str()); + std::abort(); + } + options.env = flashcache_aware_env_.get(); + } else { + options.env = FLAGS_env; + } options.disableDataSync = FLAGS_disable_data_sync; options.use_fsync = FLAGS_use_fsync; options.wal_dir = FLAGS_wal_dir; diff --git a/include/rocksdb/utilities/flashcache.h b/include/rocksdb/utilities/flashcache.h new file mode 100644 index 0000000000000000000000000000000000000000..c9bd4fd269b3037a7f97e7ae64e796f52d821c70 --- /dev/null +++ b/include/rocksdb/utilities/flashcache.h @@ -0,0 +1,24 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include "rocksdb/env.h" + +namespace rocksdb { + +// This API is experimental. We will mark it stable once we run it in production +// for a while. +// NewFlashcacheAwareEnv() creates and Env that blacklists all background +// threads (used for flush and compaction) from using flashcache to cache their +// reads. Reads from compaction thread don't need to be cached because they are +// going to be soon made obsolete (due to nature of compaction) +// Usually you would pass Env::Default() as base. +// flashcache_dev is a path to the flashcache device +extern std::unique_ptr NewFlashcacheAwareEnv( + Env* base, const std::string& flashcache_dev); + +} // namespace rocksdb diff --git a/src.mk b/src.mk index 138a6d7256b4a2f5543cf81adec8fafcf4eb762e..bdfb79da7ba0732e781a31557f2a94e43106181e 100644 --- a/src.mk +++ b/src.mk @@ -97,6 +97,7 @@ LIB_SOURCES = \ utilities/document/document_db.cc \ utilities/document/json_document_builder.cc \ utilities/document/json_document.cc \ + utilities/flashcache/flashcache.cc \ utilities/geodb/geodb_impl.cc \ utilities/leveldb_options/leveldb_options.cc \ utilities/merge_operators/put.cc \ diff --git a/third-party/flashcache/flashcache_ioctl.h b/third-party/flashcache/flashcache_ioctl.h new file mode 100644 index 0000000000000000000000000000000000000000..af111ab4d475c71aaef70298ea2bfe1aa9218644 --- /dev/null +++ b/third-party/flashcache/flashcache_ioctl.h @@ -0,0 +1,55 @@ +/**************************************************************************** + * flashcache_ioctl.h + * FlashCache: Device mapper target for block-level disk caching + * + * Copyright 2010 Facebook, Inc. + * Author: Mohan Srinivasan (mohan@facebook.com) + * + * Based on DM-Cache: + * Copyright (C) International Business Machines Corp., 2006 + * Author: Ming Zhao (mingzhao@ufl.edu) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#ifdef OS_LINUX +#ifndef FLASHCACHE_IOCTL_H +#define FLASHCACHE_IOCTL_H + +#include + +#define FLASHCACHE_IOCTL 0xfe + +enum { + FLASHCACHEADDNCPID_CMD=200, + FLASHCACHEDELNCPID_CMD, + FLASHCACHEDELNCALL_CMD, + FLASHCACHEADDWHITELIST_CMD, + FLASHCACHEDELWHITELIST_CMD, + FLASHCACHEDELWHITELISTALL_CMD, +}; + +#define FLASHCACHEADDNCPID _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDNCPID_CMD, pid_t) +#define FLASHCACHEDELNCPID _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCPID_CMD, pid_t) +#define FLASHCACHEDELNCALL _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCALL_CMD, pid_t) + +#define FLASHCACHEADDBLACKLIST FLASHCACHEADDNCPID +#define FLASHCACHEDELBLACKLIST FLASHCACHEDELNCPID +#define FLASHCACHEDELALLBLACKLIST FLASHCACHEDELNCALL + +#define FLASHCACHEADDWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDWHITELIST_CMD, pid_t) +#define FLASHCACHEDELWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELIST_CMD, pid_t) +#define FLASHCACHEDELALLWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELISTALL_CMD, pid_t) + +#endif /* FLASHCACHE_IOCTL_H */ +#endif /* OS_LINUX */ diff --git a/utilities/flashcache/flashcache.cc b/utilities/flashcache/flashcache.cc new file mode 100644 index 0000000000000000000000000000000000000000..c31d127a9f7abace88cb12fdf36aa0cfee77f163 --- /dev/null +++ b/utilities/flashcache/flashcache.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "rocksdb/utilities/flashcache.h" + +#include "utilities/flashcache/flashcache.h" + +#ifdef OS_LINUX +#include +#include +#include +#include +#include + +#include "third-party/flashcache/flashcache_ioctl.h" +#endif + +namespace rocksdb { + +#if !defined(ROCKSDB_LITE) && defined(OS_LINUX) +// Most of the code that handles flashcache is copied from websql's branch of +// mysql-5.6 +class FlashcacheAwareEnv : public EnvWrapper { + public: + FlashcacheAwareEnv(Env* base, int cachedev_fd) + : EnvWrapper(base), cachedev_fd_(cachedev_fd) { + pid_t pid = getpid(); + /* cleanup previous whitelistings */ + if (ioctl(cachedev_fd_, FLASHCACHEDELALLWHITELIST, &pid) < 0) { + close(cachedev_fd_); + cachedev_fd_ = -1; + fprintf(stderr, "ioctl del-all-whitelist for flashcache failed\n"); + return; + } + if (ioctl(cachedev_fd_, FLASHCACHEADDWHITELIST, &pid) < 0) { + fprintf(stderr, "ioctl add-whitelist for flashcache failed\n"); + } + } + + ~FlashcacheAwareEnv() { + // cachedev_fd_ is -1 if it's unitialized + if (cachedev_fd_ != -1) { + pid_t pid = getpid(); + if (ioctl(cachedev_fd_, FLASHCACHEDELWHITELIST, &pid) < 0) { + fprintf(stderr, "ioctl del-whitelist for flashcache failed\n"); + } + close(cachedev_fd_); + } + } + + static int BlacklistCurrentThread(int cachedev_fd) { + pid_t pid = syscall(SYS_gettid); + return ioctl(cachedev_fd, FLASHCACHEADDNCPID, &pid); + } + + static int WhitelistCurrentThread(int cachedev_fd) { + pid_t pid = syscall(SYS_gettid); + return ioctl(cachedev_fd, FLASHCACHEDELNCPID, &pid); + } + + int GetFlashCacheFileDescriptor() { return cachedev_fd_; } + + struct Arg { + Arg(void (*f)(void* arg), void* a, int _cachedev_fd) + : original_function_(f), original_arg_(a), cachedev_fd(_cachedev_fd) {} + + void (*original_function_)(void* arg); + void* original_arg_; + int cachedev_fd; + }; + + static void BgThreadWrapper(void* a) { + Arg* arg = reinterpret_cast(a); + if (arg->cachedev_fd != -1) { + if (BlacklistCurrentThread(arg->cachedev_fd) < 0) { + fprintf(stderr, "ioctl add-nc-pid for flashcache failed\n"); + } + } + arg->original_function_(arg->original_arg_); + if (arg->cachedev_fd != -1) { + if (WhitelistCurrentThread(arg->cachedev_fd) < 0) { + fprintf(stderr, "ioctl del-nc-pid for flashcache failed\n"); + } + } + delete arg; + } + + int UnSchedule(void* arg, Priority pri) override { + // no unschedule for you + return 0; + } + + void Schedule(void (*f)(void* arg), void* a, Priority pri, + void* tag = nullptr) override { + EnvWrapper::Schedule(&BgThreadWrapper, new Arg(f, a, cachedev_fd_), pri, + tag); + } + + private: + int cachedev_fd_; +}; + +std::unique_ptr NewFlashcacheAwareEnv(Env* base, + const std::string& flashcache_dev) { + // Cachedev should remain open or ioctl will be lost + int cachedev_fd = open(flashcache_dev.c_str(), O_RDONLY); + if (cachedev_fd < 0) { + fprintf(stderr, "Open flash device failed\n"); + return nullptr; + } + + std::unique_ptr ret(new FlashcacheAwareEnv(base, cachedev_fd)); + return std::move(ret); +} + +int FlashcacheBlacklistCurrentThread(Env* flashcache_aware_env) { + int fd = dynamic_cast(flashcache_aware_env) + ->GetFlashCacheFileDescriptor(); + if (fd == -1) { + return -1; + } + return FlashcacheAwareEnv::BlacklistCurrentThread(fd); +} +int FlashcacheWhitelistCurrentThread(Env* flashcache_aware_env) { + int fd = dynamic_cast(flashcache_aware_env) + ->GetFlashCacheFileDescriptor(); + if (fd == -1) { + return -1; + } + return FlashcacheAwareEnv::WhitelistCurrentThread(fd); +} + +#else // !defined(ROCKSDB_LITE) && defined(OS_LINUX) +std::unique_ptr NewFlashcacheAwareEnv(Env* base, + const std::string& flashcache_dev) { + return nullptr; +} +int FlashcacheBlacklistCurrentThread(Env* flashcache_aware_env) { return -1; } +int FlashcacheWhitelistCurrentThread(Env* flashcache_aware_env) { return -1; } + +#endif // !defined(ROCKSDB_LITE) && defined(OS_LINUX) + +} // namespace rocksdb diff --git a/utilities/flashcache/flashcache.h b/utilities/flashcache/flashcache.h new file mode 100644 index 0000000000000000000000000000000000000000..a8a3d7d13379043e7e83df18299ef999900c98d3 --- /dev/null +++ b/utilities/flashcache/flashcache.h @@ -0,0 +1,18 @@ +// Copyright (c) 2014, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include "rocksdb/env.h" + +namespace rocksdb { + +// This is internal API that will make hacking on flashcache easier. Not sure if +// we need to expose this to public users, probably not +extern int FlashcacheBlacklistCurrentThread(Env* flashcache_aware_env); +extern int FlashcacheWhitelistCurrentThread(Env* flashcache_aware_env); + +} // namespace rocksdb