corruption_test.cc 12.5 KB
Newer Older
1 2 3 4 5
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
J
jorlow@chromium.org 已提交
6 7 8 9
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

10
#include "rocksdb/db.h"
J
jorlow@chromium.org 已提交
11 12 13 14 15

#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
16 17
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
S
Siying Dong 已提交
18
#include "rocksdb/table.h"
19
#include "rocksdb/write_batch.h"
J
jorlow@chromium.org 已提交
20 21
#include "db/db_impl.h"
#include "db/filename.h"
J
jorlow@chromium.org 已提交
22
#include "db/log_format.h"
J
jorlow@chromium.org 已提交
23 24 25 26 27
#include "db/version_set.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"

28
namespace rocksdb {
J
jorlow@chromium.org 已提交
29 30 31 32 33 34 35

static const int kValueSize = 1000;

class CorruptionTest {
 public:
  test::ErrorEnv env_;
  std::string dbname_;
36
  shared_ptr<Cache> tiny_cache_;
J
jorlow@chromium.org 已提交
37 38 39
  Options options_;
  DB* db_;

40
  CorruptionTest() {
41
    tiny_cache_ = NewLRUCache(100);
J
jorlow@chromium.org 已提交
42
    options_.env = &env_;
43
    dbname_ = test::TmpDir() + "/corruption_test";
J
jorlow@chromium.org 已提交
44 45
    DestroyDB(dbname_, options_);

A
Abhishek Kona 已提交
46
    db_ = nullptr;
J
jorlow@chromium.org 已提交
47
    options_.create_if_missing = true;
48 49 50
    BlockBasedTableOptions table_options;
    table_options.block_size_deviation = 0;  // make unit test pass for now
    options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
J
jorlow@chromium.org 已提交
51 52 53 54 55 56 57 58 59
    Reopen();
    options_.create_if_missing = false;
  }

  ~CorruptionTest() {
     delete db_;
     DestroyDB(dbname_, Options());
  }

A
Abhishek Kona 已提交
60
  Status TryReopen(Options* options = nullptr) {
J
jorlow@chromium.org 已提交
61
    delete db_;
A
Abhishek Kona 已提交
62
    db_ = nullptr;
J
jorlow@chromium.org 已提交
63 64
    Options opt = (options ? *options : options_);
    opt.env = &env_;
X
Xing Jin 已提交
65
    opt.arena_block_size = 4096;
66 67 68 69
    BlockBasedTableOptions table_options;
    table_options.block_cache = tiny_cache_;
    table_options.block_size_deviation = 0;
    opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
J
jorlow@chromium.org 已提交
70 71 72
    return DB::Open(opt, dbname_, &db_);
  }

A
Abhishek Kona 已提交
73
  void Reopen(Options* options = nullptr) {
J
jorlow@chromium.org 已提交
74 75 76 77 78
    ASSERT_OK(TryReopen(options));
  }

  void RepairDB() {
    delete db_;
A
Abhishek Kona 已提交
79
    db_ = nullptr;
80
    ASSERT_OK(::rocksdb::RepairDB(dbname_, options_));
J
jorlow@chromium.org 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
  }

  void Build(int n) {
    std::string key_space, value_space;
    WriteBatch batch;
    for (int i = 0; i < n; i++) {
      //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
      Slice key = Key(i, &key_space);
      batch.Clear();
      batch.Put(key, Value(i, &value_space));
      ASSERT_OK(db_->Write(WriteOptions(), &batch));
    }
  }

  void Check(int min_expected, int max_expected) {
96
    unsigned int next_expected = 0;
J
jorlow@chromium.org 已提交
97 98 99 100 101
    int missed = 0;
    int bad_keys = 0;
    int bad_values = 0;
    int correct = 0;
    std::string value_space;
102 103 104 105 106 107
    // Do not verify checksums. If we verify checksums then the
    // db itself will raise errors because data is corrupted.
    // Instead, we want the reads to be successful and this test
    // will detect whether the appropriate corruptions have
    // occured.
    Iterator* iter = db_->NewIterator(ReadOptions(false, true));
J
jorlow@chromium.org 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      uint64_t key;
      Slice in(iter->key());
      if (!ConsumeDecimalNumber(&in, &key) ||
          !in.empty() ||
          key < next_expected) {
        bad_keys++;
        continue;
      }
      missed += (key - next_expected);
      next_expected = key + 1;
      if (iter->value() != Value(key, &value_space)) {
        bad_values++;
      } else {
        correct++;
      }
    }
    delete iter;

    fprintf(stderr,
            "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
            min_expected, max_expected, correct, bad_keys, bad_values, missed);
    ASSERT_LE(min_expected, correct);
    ASSERT_GE(max_expected, correct);
  }

134
  void CorruptFile(const std::string fname, int offset, int bytes_to_corrupt) {
J
jorlow@chromium.org 已提交
135
    struct stat sbuf;
J
jorlow@chromium.org 已提交
136 137 138 139 140 141 142 143 144 145 146
    if (stat(fname.c_str(), &sbuf) != 0) {
      const char* msg = strerror(errno);
      ASSERT_TRUE(false) << fname << ": " << msg;
    }

    if (offset < 0) {
      // Relative to end of file; make it absolute
      if (-offset > sbuf.st_size) {
        offset = 0;
      } else {
        offset = sbuf.st_size + offset;
J
jorlow@chromium.org 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
      }
    }
    if (offset > sbuf.st_size) {
      offset = sbuf.st_size;
    }
    if (offset + bytes_to_corrupt > sbuf.st_size) {
      bytes_to_corrupt = sbuf.st_size - offset;
    }

    // Do it
    std::string contents;
    Status s = ReadFileToString(Env::Default(), fname, &contents);
    ASSERT_TRUE(s.ok()) << s.ToString();
    for (int i = 0; i < bytes_to_corrupt; i++) {
      contents[i + offset] ^= 0x80;
    }
    s = WriteStringToFile(Env::Default(), contents, fname);
    ASSERT_TRUE(s.ok()) << s.ToString();
  }

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
  void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
    // Pick file to corrupt
    std::vector<std::string> filenames;
    ASSERT_OK(env_.GetChildren(dbname_, &filenames));
    uint64_t number;
    FileType type;
    std::string fname;
    int picked_number = -1;
    for (unsigned int i = 0; i < filenames.size(); i++) {
      if (ParseFileName(filenames[i], &number, &type) &&
          type == filetype &&
          static_cast<int>(number) > picked_number) {  // Pick latest file
        fname = dbname_ + "/" + filenames[i];
        picked_number = number;
      }
    }
    ASSERT_TRUE(!fname.empty()) << filetype;

    CorruptFile(fname, offset, bytes_to_corrupt);
  }

  // corrupts exactly one file at level `level`. if no file found at level,
  // asserts
  void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) {
    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    for (const auto& m : metadata) {
      if (m.level == level) {
        CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt);
        return;
      }
    }
    ASSERT_TRUE(false) << "no file found at level";
  }


203 204 205 206 207 208 209 210
  int Property(const std::string& name) {
    std::string property;
    int result;
    if (db_->GetProperty(name, &property) &&
        sscanf(property.c_str(), "%d", &result) == 1) {
      return result;
    } else {
      return -1;
J
jorlow@chromium.org 已提交
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
    }
  }

  // Return the ith key
  Slice Key(int i, std::string* storage) {
    char buf[100];
    snprintf(buf, sizeof(buf), "%016d", i);
    storage->assign(buf, strlen(buf));
    return Slice(*storage);
  }

  // Return the value to associate with the specified key
  Slice Value(int k, std::string* storage) {
    Random r(k);
    return test::RandomString(&r, kValueSize, storage);
  }
};

TEST(CorruptionTest, Recovery) {
J
jorlow@chromium.org 已提交
230 231
  Build(100);
  Check(100, 100);
J
jorlow@chromium.org 已提交
232
  Corrupt(kLogFile, 19, 1);      // WriteBatch tag for first record
J
jorlow@chromium.org 已提交
233
  Corrupt(kLogFile, log::kBlockSize + 1000, 1);  // Somewhere in second block
J
jorlow@chromium.org 已提交
234
  Reopen();
J
jorlow@chromium.org 已提交
235 236 237

  // The 64 records in the first two log blocks are completely lost.
  Check(36, 36);
J
jorlow@chromium.org 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251
}

TEST(CorruptionTest, RecoverWriteError) {
  env_.writable_file_error_ = true;
  Status s = TryReopen();
  ASSERT_TRUE(!s.ok());
}

TEST(CorruptionTest, NewFileErrorDuringWrite) {
  // Do enough writing to force minor compaction
  env_.writable_file_error_ = true;
  const int num = 3 + (Options().write_buffer_size / kValueSize);
  std::string value_storage;
  Status s;
I
Igor Canadi 已提交
252 253
  bool failed = false;
  for (int i = 0; i < num; i++) {
J
jorlow@chromium.org 已提交
254 255 256
    WriteBatch batch;
    batch.Put("a", Value(100, &value_storage));
    s = db_->Write(WriteOptions(), &batch);
I
Igor Canadi 已提交
257 258 259 260
    if (!s.ok()) {
      failed = true;
    }
    ASSERT_TRUE(!failed || !s.ok());
J
jorlow@chromium.org 已提交
261 262 263 264 265 266 267 268 269 270
  }
  ASSERT_TRUE(!s.ok());
  ASSERT_GE(env_.num_writable_file_errors_, 1);
  env_.writable_file_error_ = false;
  Reopen();
}

TEST(CorruptionTest, TableFile) {
  Build(100);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
271
  dbi->TEST_FlushMemTable();
A
Abhishek Kona 已提交
272 273
  dbi->TEST_CompactRange(0, nullptr, nullptr);
  dbi->TEST_CompactRange(1, nullptr, nullptr);
J
jorlow@chromium.org 已提交
274 275 276 277 278 279 280 281

  Corrupt(kTableFile, 100, 1);
  Check(99, 99);
}

TEST(CorruptionTest, TableFileIndexData) {
  Build(10000);  // Enough to build multiple Tables
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
282
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
283

284
  Corrupt(kTableFile, -2000, 500);
J
jorlow@chromium.org 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
  Reopen();
  Check(5000, 9999);
}

TEST(CorruptionTest, MissingDescriptor) {
  Build(1000);
  RepairDB();
  Reopen();
  Check(1000, 1000);
}

TEST(CorruptionTest, SequenceNumberRecovery) {
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5"));
  RepairDB();
  Reopen();
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v5", v);
  // Write something.  If sequence number was not recovered properly,
  // it will be hidden by an earlier write.
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6"));
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v6", v);
  Reopen();
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v6", v);
}

TEST(CorruptionTest, CorruptedDescriptor) {
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
320
  dbi->TEST_FlushMemTable();
A
Abhishek Kona 已提交
321
  dbi->TEST_CompactRange(0, nullptr, nullptr);
J
jorlow@chromium.org 已提交
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

  Corrupt(kDescriptorFile, 0, 1000);
  Status s = TryReopen();
  ASSERT_TRUE(!s.ok());

  RepairDB();
  Reopen();
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("hello", v);
}

TEST(CorruptionTest, CompactionInputError) {
  Build(10);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
337
  dbi->TEST_FlushMemTable();
338
  const int last = dbi->MaxMemCompactionLevel();
339
  ASSERT_EQ(1, Property("rocksdb.num-files-at-level" + NumberToString(last)));
J
jorlow@chromium.org 已提交
340 341 342 343 344 345 346 347 348 349 350 351

  Corrupt(kTableFile, 100, 1);
  Check(9, 9);

  // Force compactions by writing lots of values
  Build(10000);
  Check(10000, 10000);
}

TEST(CorruptionTest, CompactionInputErrorParanoid) {
  Options options;
  options.paranoid_checks = true;
352 353
  options.write_buffer_size = 131072;
  options.max_write_buffer_number = 2;
J
jorlow@chromium.org 已提交
354
  Reopen(&options);
355 356
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);

357
  // Fill levels >= 1 so memtable flush outputs to level 0
358
  for (int level = 1; level < dbi->NumberLevels(); level++) {
359 360
    dbi->Put(WriteOptions(), "", "begin");
    dbi->Put(WriteOptions(), "~", "end");
361
    dbi->TEST_FlushMemTable();
362
  }
J
jorlow@chromium.org 已提交
363

364 365 366
  options.max_mem_compaction_level = 0;
  Reopen(&options);

I
Igor Canadi 已提交
367
  dbi = reinterpret_cast<DBImpl*>(db_);
J
jorlow@chromium.org 已提交
368
  Build(10);
369
  dbi->TEST_FlushMemTable();
370
  dbi->TEST_WaitForCompact();
371
  ASSERT_EQ(1, Property("rocksdb.num-files-at-level0"));
J
jorlow@chromium.org 已提交
372

373
  CorruptTableFileAtLevel(0, 100, 1);
J
jorlow@chromium.org 已提交
374 375 376 377 378
  Check(9, 9);

  // Write must eventually fail because of corrupted table
  Status s;
  std::string tmp1, tmp2;
I
Igor Canadi 已提交
379
  bool failed = false;
380
  for (int i = 0; i < 10000; i++) {
J
jorlow@chromium.org 已提交
381
    s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
I
Igor Canadi 已提交
382 383 384 385 386
    if (!s.ok()) {
      failed = true;
    }
    // if one write failed, every subsequent write must fail, too
    ASSERT_TRUE(!failed || !s.ok()) << "write did not fail in a corrupted db";
J
jorlow@chromium.org 已提交
387 388 389 390 391 392 393
  }
  ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
}

TEST(CorruptionTest, UnrelatedKeys) {
  Build(10);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
394
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
395 396 397 398 399 400 401
  Corrupt(kTableFile, 100, 1);

  std::string tmp1, tmp2;
  ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
  ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
402
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
403 404 405 406
  ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
  ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
}

I
Igor Canadi 已提交
407 408 409 410 411 412 413 414 415 416 417
TEST(CorruptionTest, FileSystemStateCorrupted) {
  for (int iter = 0; iter < 2; ++iter) {
    Options options;
    options.paranoid_checks = true;
    options.create_if_missing = true;
    Reopen(&options);
    Build(10);
    ASSERT_OK(db_->Flush(FlushOptions()));
    DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
    std::vector<LiveFileMetaData> metadata;
    dbi->GetLiveFilesMetaData(&metadata);
418
    ASSERT_GT(metadata.size(), size_t(0));
I
Igor Canadi 已提交
419 420 421
    std::string filename = dbname_ + metadata[0].name;

    delete db_;
I
Igor Canadi 已提交
422
    db_ = nullptr;
I
Igor Canadi 已提交
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439

    if (iter == 0) {  // corrupt file size
      unique_ptr<WritableFile> file;
      env_.NewWritableFile(filename, &file, EnvOptions());
      file->Append(Slice("corrupted sst"));
      file.reset();
    } else {  // delete the file
      env_.DeleteFile(filename);
    }

    Status x = TryReopen(&options);
    ASSERT_TRUE(x.IsCorruption());
    DestroyDB(dbname_, options_);
    Reopen(&options);
  }
}

440
}  // namespace rocksdb
J
jorlow@chromium.org 已提交
441 442

int main(int argc, char** argv) {
443
  return rocksdb::test::RunAllTests();
J
jorlow@chromium.org 已提交
444
}