From 4b97812da8a9fd7f5d84b6b4b21ee701a5e9a873 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Sat, 14 Dec 2019 15:17:05 -0800 Subject: [PATCH] Add long-running snapshots to stress tests (#6171) Summary: Current implementation holds on to 10% of snapshots for 10x longer, and 1% of snapshots 100x longer. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6171 Test Plan: ``` make -j32 crash_test Differential Revision: D19038399 Pulled By: maysamyabandeh fbshipit-source-id: 75da2dbb5c47a0b3f37d299b8719e392b73b42c0 --- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 3 +++ db_stress_tool/db_stress_test_base.cc | 18 +++++++++++++++--- tools/db_crashtest.py | 1 + 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index c02105a18..cb6c36049 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -175,6 +175,7 @@ DECLARE_int32(compact_range_width); DECLARE_int32(acquire_snapshot_one_in); DECLARE_bool(compare_full_db_state_snapshot); DECLARE_uint64(snapshot_hold_ops); +DECLARE_bool(long_running_snapshots); DECLARE_bool(use_multiget); DECLARE_int32(readpercent); DECLARE_int32(prefixpercent); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index a6aff93c6..b56e0fe1b 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -430,6 +430,9 @@ DEFINE_uint64(snapshot_hold_ops, 0, "If non-zero, then releases snapshots N operations after they're " "acquired."); +DEFINE_bool(long_running_snapshots, false, + "If set, hold on some some snapshots for much longer time."); + DEFINE_bool(use_multiget, false, "If set, use the batched MultiGet API for reads"); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 767097ee0..03bb25537 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -682,9 +682,21 @@ void StressTest::OperateDb(ThreadState* thread) { snapshot, rand_column_family, column_family->GetName(), keystr, status_at, value_at, key_vec}; - thread->snapshot_queue.emplace( - std::min(FLAGS_ops_per_thread - 1, i + FLAGS_snapshot_hold_ops), - snap_state); + uint64_t hold_for = FLAGS_snapshot_hold_ops; + if (FLAGS_long_running_snapshots) { + // Hold 10% of snapshots for 10x more + if (thread->rand.OneIn(10)) { + assert(hold_for < port::kMaxInt64 / 10); + hold_for *= 10; + // Hold 1% of snapshots for 100x more + if (thread->rand.OneIn(10)) { + assert(hold_for < port::kMaxInt64 / 10); + hold_for *= 10; + } + } + } + uint64_t release_at = std::min(FLAGS_ops_per_thread - 1, i + hold_for); + thread->snapshot_queue.emplace(release_at, snap_state); } while (!thread->snapshot_queue.empty() && i >= thread->snapshot_queue.front().first) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index ed50b5e80..2508b1036 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -61,6 +61,7 @@ default_params = { "recycle_log_file_num": lambda: random.randint(0, 1), "reopen": 20, "snapshot_hold_ops": 100000, + "long_running_snapshots": lambda: random.randint(0, 1), "subcompactions": lambda: random.randint(1, 4), "target_file_size_base": 2097152, "target_file_size_multiplier": 2, -- GitLab