pessimistic_transaction.h 7.2 KB
Newer Older
1
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
S
Siying Dong 已提交
2 3 4
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
A
agiardullo 已提交
5 6 7 8 9

#pragma once

#ifndef ROCKSDB_LITE

M
Manuel Ung 已提交
10
#include <algorithm>
A
agiardullo 已提交
11
#include <atomic>
12
#include <mutex>
A
agiardullo 已提交
13 14 15 16 17 18 19 20
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>

#include "db/write_callback.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
21
#include "rocksdb/snapshot.h"
A
agiardullo 已提交
22 23 24 25 26
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/utilities/write_batch_with_index.h"
M
Manuel Ung 已提交
27
#include "util/autovector.h"
A
agiardullo 已提交
28
#include "utilities/transactions/transaction_base.h"
A
agiardullo 已提交
29 30 31 32
#include "utilities/transactions/transaction_util.h"

namespace rocksdb {

M
Maysam Yabandeh 已提交
33
class PessimisticTransactionDB;
A
agiardullo 已提交
34

M
Maysam Yabandeh 已提交
35 36 37
// A transaction under pessimistic concurrency control. This class implements
// the locking API and interfaces with the lock manager as well as the
// pessimistic transactional db.
38
class PessimisticTransaction : public TransactionBaseImpl {
A
agiardullo 已提交
39
 public:
40
  PessimisticTransaction(TransactionDB* db, const WriteOptions& write_options,
41
                         const TransactionOptions& txn_options);
A
agiardullo 已提交
42

43
  virtual ~PessimisticTransaction();
A
agiardullo 已提交
44

45
  void Reinitialize(TransactionDB* txn_db, const WriteOptions& write_options,
46 47
                    const TransactionOptions& txn_options);

48
  Status Prepare() override;
R
Reid Horuff 已提交
49

50
  Status Commit() override;
A
agiardullo 已提交
51

M
Maysam Yabandeh 已提交
52 53 54
  // It is basically Commit without going through Prepare phase. The write batch
  // is also directly provided instead of expecting txn to gradually batch the
  // transactions writes to an internal write batch.
55
  Status CommitBatch(WriteBatch* batch);
A
agiardullo 已提交
56

M
Maysam Yabandeh 已提交
57
  Status Rollback() override;
A
agiardullo 已提交
58

59
  Status RollbackToSavePoint() override;
A
agiardullo 已提交
60

R
Reid Horuff 已提交
61 62
  Status SetName(const TransactionName& name) override;

A
agiardullo 已提交
63 64 65
  // Generate a new unique transaction identifier
  static TransactionID GenTxnID();

66 67
  TransactionID GetID() const override { return txn_id_; }

M
Manuel Ung 已提交
68 69
  std::vector<TransactionID> GetWaitingTxns(uint32_t* column_family_id,
                                            std::string* key) const override {
70
    std::lock_guard<std::mutex> lock(wait_mutex_);
M
Manuel Ung 已提交
71 72
    std::vector<TransactionID> ids(waiting_txn_ids_.size());
    if (key) *key = waiting_key_ ? *waiting_key_ : "";
73
    if (column_family_id) *column_family_id = waiting_cf_id_;
M
Manuel Ung 已提交
74 75
    std::copy(waiting_txn_ids_.begin(), waiting_txn_ids_.end(), ids.begin());
    return ids;
76 77
  }

M
Manuel Ung 已提交
78
  void SetWaitingTxn(autovector<TransactionID> ids, uint32_t column_family_id,
79 80
                     const std::string* key) {
    std::lock_guard<std::mutex> lock(wait_mutex_);
M
Manuel Ung 已提交
81
    waiting_txn_ids_ = ids;
82 83 84
    waiting_cf_id_ = column_family_id;
    waiting_key_ = key;
  }
A
agiardullo 已提交
85

M
Manuel Ung 已提交
86 87 88 89 90 91 92
  void ClearWaitingTxn() {
    std::lock_guard<std::mutex> lock(wait_mutex_);
    waiting_txn_ids_.clear();
    waiting_cf_id_ = 0;
    waiting_key_ = nullptr;
  }

A
agiardullo 已提交
93
  // Returns the time (in microseconds according to Env->GetMicros())
A
agiardullo 已提交
94 95 96 97 98 99 100
  // that this transaction will be expired.  Returns 0 if this transaction does
  // not expire.
  uint64_t GetExpirationTime() const { return expiration_time_; }

  // returns true if this transaction has an expiration_time and has expired.
  bool IsExpired() const;

A
agiardullo 已提交
101
  // Returns the number of microseconds a transaction can wait on acquiring a
A
agiardullo 已提交
102 103
  // lock or -1 if there is no timeout.
  int64_t GetLockTimeout() const { return lock_timeout_; }
A
agiardullo 已提交
104 105 106
  void SetLockTimeout(int64_t timeout) override {
    lock_timeout_ = timeout * 1000;
  }
A
agiardullo 已提交
107

108 109 110
  // Returns true if locks were stolen successfully, false otherwise.
  bool TryStealingLocks();

111
  bool IsDeadlockDetect() const override { return deadlock_detect_; }
M
Manuel Ung 已提交
112 113 114

  int64_t GetDeadlockDetectDepth() const { return deadlock_detect_depth_; }

A
agiardullo 已提交
115
 protected:
116 117 118 119
  // Refer to
  // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery
  bool use_only_the_last_commit_time_batch_for_recovery_ = false;

120 121 122 123
  virtual Status PrepareInternal() = 0;

  virtual Status CommitWithoutPrepareInternal() = 0;

124 125 126 127
  // batch_cnt if non-zero is the number of sub-batches. A sub-batch is a batch
  // with no duplicate keys. If zero, then the number of sub-batches is unknown.
  virtual Status CommitBatchInternal(WriteBatch* batch,
                                     size_t batch_cnt = 0) = 0;
M
Maysam Yabandeh 已提交
128

129 130
  virtual Status CommitInternal() = 0;

M
Maysam Yabandeh 已提交
131 132
  virtual Status RollbackInternal() = 0;

133
  virtual void Initialize(const TransactionOptions& txn_options);
M
Maysam Yabandeh 已提交
134 135 136

  Status LockBatch(WriteBatch* batch, TransactionKeyMap* keys_to_unlock);

A
agiardullo 已提交
137
  Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
M
Manuel Ung 已提交
138
                 bool read_only, bool exclusive,
139
                 bool skip_validate = false) override;
A
agiardullo 已提交
140

M
Maysam Yabandeh 已提交
141 142
  void Clear() override;

M
Maysam Yabandeh 已提交
143
  PessimisticTransactionDB* txn_db_impl_;
R
Reid Horuff 已提交
144
  DBImpl* db_impl_;
A
agiardullo 已提交
145

M
Maysam Yabandeh 已提交
146 147 148 149 150
  // If non-zero, this transaction should not be committed after this time (in
  // microseconds according to Env->NowMicros())
  uint64_t expiration_time_;

 private:
151
  friend class TransactionTest_ValidateSnapshotTest_Test;
A
agiardullo 已提交
152 153 154 155
  // Used to create unique ids for transactions.
  static std::atomic<TransactionID> txn_id_counter_;

  // Unique ID for this transaction
156
  TransactionID txn_id_;
A
agiardullo 已提交
157

M
Manuel Ung 已提交
158
  // IDs for the transactions that are blocking the current transaction.
159
  //
M
Manuel Ung 已提交
160 161
  // empty if current transaction is not waiting.
  autovector<TransactionID> waiting_txn_ids_;
162 163 164 165 166 167 168 169 170 171 172 173

  // The following two represents the (cf, key) that a transaction is waiting
  // on.
  //
  // If waiting_key_ is not null, then the pointer should always point to
  // a valid string object. The reason is that it is only non-null when the
  // transaction is blocked in the TransactionLockMgr::AcquireWithTimeout
  // function. At that point, the key string object is one of the function
  // parameters.
  uint32_t waiting_cf_id_;
  const std::string* waiting_key_;

M
Manuel Ung 已提交
174
  // Mutex protecting waiting_txn_ids_, waiting_cf_id_ and waiting_key_.
175 176
  mutable std::mutex wait_mutex_;

A
agiardullo 已提交
177 178 179
  // Timeout in microseconds when locking a key or -1 if there is no timeout.
  int64_t lock_timeout_;

M
Manuel Ung 已提交
180 181 182 183 184 185
  // Whether to perform deadlock detection or not.
  bool deadlock_detect_;

  // Whether to perform deadlock detection or not.
  int64_t deadlock_detect_depth_;

186
  virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
187 188
                                  const Slice& key,
                                  SequenceNumber* tracked_at_seq);
A
agiardullo 已提交
189

M
Maysam Yabandeh 已提交
190 191
  void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
                          const Slice& key) override;
A
agiardullo 已提交
192

M
Maysam Yabandeh 已提交
193
  // No copying allowed
194 195
  PessimisticTransaction(const PessimisticTransaction&);
  void operator=(const PessimisticTransaction&);
M
Maysam Yabandeh 已提交
196
};
A
agiardullo 已提交
197

198
class WriteCommittedTxn : public PessimisticTransaction {
M
Maysam Yabandeh 已提交
199
 public:
200
  WriteCommittedTxn(TransactionDB* db, const WriteOptions& write_options,
201
                    const TransactionOptions& txn_options);
A
agiardullo 已提交
202

203
  virtual ~WriteCommittedTxn() {}
M
Maysam Yabandeh 已提交
204 205

 private:
206 207 208 209
  Status PrepareInternal() override;

  Status CommitWithoutPrepareInternal() override;

210
  Status CommitBatchInternal(WriteBatch* batch, size_t batch_cnt) override;
M
Maysam Yabandeh 已提交
211

212 213
  Status CommitInternal() override;

M
Maysam Yabandeh 已提交
214 215
  Status RollbackInternal() override;

A
agiardullo 已提交
216
  // No copying allowed
217 218
  WriteCommittedTxn(const WriteCommittedTxn&);
  void operator=(const WriteCommittedTxn&);
A
agiardullo 已提交
219 220 221 222 223
};

}  // namespace rocksdb

#endif  // ROCKSDB_LITE