corruption_test.cc 12.2 KB
Newer Older
1 2 3 4 5
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
J
jorlow@chromium.org 已提交
6 7 8 9
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

10
#include "rocksdb/db.h"
J
jorlow@chromium.org 已提交
11 12 13 14 15

#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
16 17
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
S
Siying Dong 已提交
18
#include "rocksdb/table.h"
19
#include "rocksdb/write_batch.h"
J
jorlow@chromium.org 已提交
20 21
#include "db/db_impl.h"
#include "db/filename.h"
J
jorlow@chromium.org 已提交
22
#include "db/log_format.h"
J
jorlow@chromium.org 已提交
23 24 25 26 27
#include "db/version_set.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"

28
namespace rocksdb {
J
jorlow@chromium.org 已提交
29 30 31 32 33 34 35

static const int kValueSize = 1000;

class CorruptionTest {
 public:
  test::ErrorEnv env_;
  std::string dbname_;
36
  shared_ptr<Cache> tiny_cache_;
J
jorlow@chromium.org 已提交
37 38 39
  Options options_;
  DB* db_;

40
  CorruptionTest() {
41
    tiny_cache_ = NewLRUCache(100);
J
jorlow@chromium.org 已提交
42
    options_.env = &env_;
43
    dbname_ = test::TmpDir() + "/corruption_test";
J
jorlow@chromium.org 已提交
44 45
    DestroyDB(dbname_, options_);

A
Abhishek Kona 已提交
46
    db_ = nullptr;
J
jorlow@chromium.org 已提交
47
    options_.create_if_missing = true;
H
Haobo Xu 已提交
48
    options_.block_size_deviation = 0; // make unit test pass for now
J
jorlow@chromium.org 已提交
49 50 51 52 53 54 55 56 57
    Reopen();
    options_.create_if_missing = false;
  }

  ~CorruptionTest() {
     delete db_;
     DestroyDB(dbname_, Options());
  }

A
Abhishek Kona 已提交
58
  Status TryReopen(Options* options = nullptr) {
J
jorlow@chromium.org 已提交
59
    delete db_;
A
Abhishek Kona 已提交
60
    db_ = nullptr;
J
jorlow@chromium.org 已提交
61 62
    Options opt = (options ? *options : options_);
    opt.env = &env_;
63
    opt.block_cache = tiny_cache_;
H
Haobo Xu 已提交
64
    opt.block_size_deviation = 0;
X
Xing Jin 已提交
65
    opt.arena_block_size = 4096;
J
jorlow@chromium.org 已提交
66 67 68
    return DB::Open(opt, dbname_, &db_);
  }

A
Abhishek Kona 已提交
69
  void Reopen(Options* options = nullptr) {
J
jorlow@chromium.org 已提交
70 71 72 73 74
    ASSERT_OK(TryReopen(options));
  }

  void RepairDB() {
    delete db_;
A
Abhishek Kona 已提交
75
    db_ = nullptr;
76
    ASSERT_OK(::rocksdb::RepairDB(dbname_, options_));
J
jorlow@chromium.org 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
  }

  void Build(int n) {
    std::string key_space, value_space;
    WriteBatch batch;
    for (int i = 0; i < n; i++) {
      //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n);
      Slice key = Key(i, &key_space);
      batch.Clear();
      batch.Put(key, Value(i, &value_space));
      ASSERT_OK(db_->Write(WriteOptions(), &batch));
    }
  }

  void Check(int min_expected, int max_expected) {
92
    unsigned int next_expected = 0;
J
jorlow@chromium.org 已提交
93 94 95 96 97
    int missed = 0;
    int bad_keys = 0;
    int bad_values = 0;
    int correct = 0;
    std::string value_space;
98 99 100 101 102 103
    // Do not verify checksums. If we verify checksums then the
    // db itself will raise errors because data is corrupted.
    // Instead, we want the reads to be successful and this test
    // will detect whether the appropriate corruptions have
    // occured.
    Iterator* iter = db_->NewIterator(ReadOptions(false, true));
J
jorlow@chromium.org 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
      uint64_t key;
      Slice in(iter->key());
      if (!ConsumeDecimalNumber(&in, &key) ||
          !in.empty() ||
          key < next_expected) {
        bad_keys++;
        continue;
      }
      missed += (key - next_expected);
      next_expected = key + 1;
      if (iter->value() != Value(key, &value_space)) {
        bad_values++;
      } else {
        correct++;
      }
    }
    delete iter;

    fprintf(stderr,
            "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n",
            min_expected, max_expected, correct, bad_keys, bad_values, missed);
    ASSERT_LE(min_expected, correct);
    ASSERT_GE(max_expected, correct);
  }

130
  void CorruptFile(const std::string fname, int offset, int bytes_to_corrupt) {
J
jorlow@chromium.org 已提交
131
    struct stat sbuf;
J
jorlow@chromium.org 已提交
132 133 134 135 136 137 138 139 140 141 142
    if (stat(fname.c_str(), &sbuf) != 0) {
      const char* msg = strerror(errno);
      ASSERT_TRUE(false) << fname << ": " << msg;
    }

    if (offset < 0) {
      // Relative to end of file; make it absolute
      if (-offset > sbuf.st_size) {
        offset = 0;
      } else {
        offset = sbuf.st_size + offset;
J
jorlow@chromium.org 已提交
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
      }
    }
    if (offset > sbuf.st_size) {
      offset = sbuf.st_size;
    }
    if (offset + bytes_to_corrupt > sbuf.st_size) {
      bytes_to_corrupt = sbuf.st_size - offset;
    }

    // Do it
    std::string contents;
    Status s = ReadFileToString(Env::Default(), fname, &contents);
    ASSERT_TRUE(s.ok()) << s.ToString();
    for (int i = 0; i < bytes_to_corrupt; i++) {
      contents[i + offset] ^= 0x80;
    }
    s = WriteStringToFile(Env::Default(), contents, fname);
    ASSERT_TRUE(s.ok()) << s.ToString();
  }

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
  void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) {
    // Pick file to corrupt
    std::vector<std::string> filenames;
    ASSERT_OK(env_.GetChildren(dbname_, &filenames));
    uint64_t number;
    FileType type;
    std::string fname;
    int picked_number = -1;
    for (unsigned int i = 0; i < filenames.size(); i++) {
      if (ParseFileName(filenames[i], &number, &type) &&
          type == filetype &&
          static_cast<int>(number) > picked_number) {  // Pick latest file
        fname = dbname_ + "/" + filenames[i];
        picked_number = number;
      }
    }
    ASSERT_TRUE(!fname.empty()) << filetype;

    CorruptFile(fname, offset, bytes_to_corrupt);
  }

  // corrupts exactly one file at level `level`. if no file found at level,
  // asserts
  void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) {
    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    for (const auto& m : metadata) {
      if (m.level == level) {
        CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt);
        return;
      }
    }
    ASSERT_TRUE(false) << "no file found at level";
  }


199 200 201 202 203 204 205 206
  int Property(const std::string& name) {
    std::string property;
    int result;
    if (db_->GetProperty(name, &property) &&
        sscanf(property.c_str(), "%d", &result) == 1) {
      return result;
    } else {
      return -1;
J
jorlow@chromium.org 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
    }
  }

  // Return the ith key
  Slice Key(int i, std::string* storage) {
    char buf[100];
    snprintf(buf, sizeof(buf), "%016d", i);
    storage->assign(buf, strlen(buf));
    return Slice(*storage);
  }

  // Return the value to associate with the specified key
  Slice Value(int k, std::string* storage) {
    Random r(k);
    return test::RandomString(&r, kValueSize, storage);
  }
};

TEST(CorruptionTest, Recovery) {
J
jorlow@chromium.org 已提交
226 227
  Build(100);
  Check(100, 100);
J
jorlow@chromium.org 已提交
228
  Corrupt(kLogFile, 19, 1);      // WriteBatch tag for first record
J
jorlow@chromium.org 已提交
229
  Corrupt(kLogFile, log::kBlockSize + 1000, 1);  // Somewhere in second block
J
jorlow@chromium.org 已提交
230
  Reopen();
J
jorlow@chromium.org 已提交
231 232 233

  // The 64 records in the first two log blocks are completely lost.
  Check(36, 36);
J
jorlow@chromium.org 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246 247
}

TEST(CorruptionTest, RecoverWriteError) {
  env_.writable_file_error_ = true;
  Status s = TryReopen();
  ASSERT_TRUE(!s.ok());
}

TEST(CorruptionTest, NewFileErrorDuringWrite) {
  // Do enough writing to force minor compaction
  env_.writable_file_error_ = true;
  const int num = 3 + (Options().write_buffer_size / kValueSize);
  std::string value_storage;
  Status s;
I
Igor Canadi 已提交
248 249
  bool failed = false;
  for (int i = 0; i < num; i++) {
J
jorlow@chromium.org 已提交
250 251 252
    WriteBatch batch;
    batch.Put("a", Value(100, &value_storage));
    s = db_->Write(WriteOptions(), &batch);
I
Igor Canadi 已提交
253 254 255 256
    if (!s.ok()) {
      failed = true;
    }
    ASSERT_TRUE(!failed || !s.ok());
J
jorlow@chromium.org 已提交
257 258 259 260 261 262 263 264 265 266
  }
  ASSERT_TRUE(!s.ok());
  ASSERT_GE(env_.num_writable_file_errors_, 1);
  env_.writable_file_error_ = false;
  Reopen();
}

TEST(CorruptionTest, TableFile) {
  Build(100);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
267
  dbi->TEST_FlushMemTable();
A
Abhishek Kona 已提交
268 269
  dbi->TEST_CompactRange(0, nullptr, nullptr);
  dbi->TEST_CompactRange(1, nullptr, nullptr);
J
jorlow@chromium.org 已提交
270 271 272 273 274 275 276 277

  Corrupt(kTableFile, 100, 1);
  Check(99, 99);
}

TEST(CorruptionTest, TableFileIndexData) {
  Build(10000);  // Enough to build multiple Tables
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
278
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
279

280
  Corrupt(kTableFile, -2000, 500);
J
jorlow@chromium.org 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
  Reopen();
  Check(5000, 9999);
}

TEST(CorruptionTest, MissingDescriptor) {
  Build(1000);
  RepairDB();
  Reopen();
  Check(1000, 1000);
}

TEST(CorruptionTest, SequenceNumberRecovery) {
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4"));
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5"));
  RepairDB();
  Reopen();
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v5", v);
  // Write something.  If sequence number was not recovered properly,
  // it will be hidden by an earlier write.
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6"));
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v6", v);
  Reopen();
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("v6", v);
}

TEST(CorruptionTest, CorruptedDescriptor) {
  ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
316
  dbi->TEST_FlushMemTable();
A
Abhishek Kona 已提交
317
  dbi->TEST_CompactRange(0, nullptr, nullptr);
J
jorlow@chromium.org 已提交
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

  Corrupt(kDescriptorFile, 0, 1000);
  Status s = TryReopen();
  ASSERT_TRUE(!s.ok());

  RepairDB();
  Reopen();
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
  ASSERT_EQ("hello", v);
}

TEST(CorruptionTest, CompactionInputError) {
  Build(10);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
333
  dbi->TEST_FlushMemTable();
334
  const int last = dbi->MaxMemCompactionLevel();
335
  ASSERT_EQ(1, Property("rocksdb.num-files-at-level" + NumberToString(last)));
J
jorlow@chromium.org 已提交
336 337 338 339 340 341 342 343 344 345 346 347

  Corrupt(kTableFile, 100, 1);
  Check(9, 9);

  // Force compactions by writing lots of values
  Build(10000);
  Check(10000, 10000);
}

TEST(CorruptionTest, CompactionInputErrorParanoid) {
  Options options;
  options.paranoid_checks = true;
348 349
  options.write_buffer_size = 131072;
  options.max_write_buffer_number = 2;
J
jorlow@chromium.org 已提交
350
  Reopen(&options);
351 352
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);

353
  // Fill levels >= 1 so memtable flush outputs to level 0
354
  for (int level = 1; level < dbi->NumberLevels(); level++) {
355 356
    dbi->Put(WriteOptions(), "", "begin");
    dbi->Put(WriteOptions(), "~", "end");
357
    dbi->TEST_FlushMemTable();
358
  }
J
jorlow@chromium.org 已提交
359

360 361 362
  options.max_mem_compaction_level = 0;
  Reopen(&options);

J
jorlow@chromium.org 已提交
363
  Build(10);
364
  dbi->TEST_FlushMemTable();
365
  dbi->TEST_WaitForCompact();
366
  ASSERT_EQ(1, Property("rocksdb.num-files-at-level0"));
J
jorlow@chromium.org 已提交
367

368
  CorruptTableFileAtLevel(0, 100, 1);
J
jorlow@chromium.org 已提交
369 370 371 372 373
  Check(9, 9);

  // Write must eventually fail because of corrupted table
  Status s;
  std::string tmp1, tmp2;
I
Igor Canadi 已提交
374
  bool failed = false;
375
  for (int i = 0; i < 10000; i++) {
J
jorlow@chromium.org 已提交
376
    s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
I
Igor Canadi 已提交
377 378 379 380 381
    if (!s.ok()) {
      failed = true;
    }
    // if one write failed, every subsequent write must fail, too
    ASSERT_TRUE(!failed || !s.ok()) << "write did not fail in a corrupted db";
J
jorlow@chromium.org 已提交
382 383 384 385 386 387 388
  }
  ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
}

TEST(CorruptionTest, UnrelatedKeys) {
  Build(10);
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
389
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
390 391 392 393 394 395 396
  Corrupt(kTableFile, 100, 1);

  std::string tmp1, tmp2;
  ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2)));
  std::string v;
  ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
  ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
397
  dbi->TEST_FlushMemTable();
J
jorlow@chromium.org 已提交
398 399 400 401
  ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v));
  ASSERT_EQ(Value(1000, &tmp2).ToString(), v);
}

I
Igor Canadi 已提交
402 403 404 405 406 407 408 409 410 411 412
TEST(CorruptionTest, FileSystemStateCorrupted) {
  for (int iter = 0; iter < 2; ++iter) {
    Options options;
    options.paranoid_checks = true;
    options.create_if_missing = true;
    Reopen(&options);
    Build(10);
    ASSERT_OK(db_->Flush(FlushOptions()));
    DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
    std::vector<LiveFileMetaData> metadata;
    dbi->GetLiveFilesMetaData(&metadata);
413
    ASSERT_GT(metadata.size(), size_t(0));
I
Igor Canadi 已提交
414 415 416
    std::string filename = dbname_ + metadata[0].name;

    delete db_;
I
Igor Canadi 已提交
417
    db_ = nullptr;
I
Igor Canadi 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434

    if (iter == 0) {  // corrupt file size
      unique_ptr<WritableFile> file;
      env_.NewWritableFile(filename, &file, EnvOptions());
      file->Append(Slice("corrupted sst"));
      file.reset();
    } else {  // delete the file
      env_.DeleteFile(filename);
    }

    Status x = TryReopen(&options);
    ASSERT_TRUE(x.IsCorruption());
    DestroyDB(dbname_, options_);
    Reopen(&options);
  }
}

435
}  // namespace rocksdb
J
jorlow@chromium.org 已提交
436 437

int main(int argc, char** argv) {
438
  return rocksdb::test::RunAllTests();
J
jorlow@chromium.org 已提交
439
}