tdbInt.h 10.4 KB
Newer Older
H
more  
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
Hongze Cheng 已提交
16 17
#ifndef _TD_TDB_INTERNAL_H_
#define _TD_TDB_INTERNAL_H_
H
Hongze Cheng 已提交
18

H
Hongze Cheng 已提交
19
#include "tdb.h"
H
refact  
Hongze Cheng 已提交
20

H
more  
Hongze Cheng 已提交
21 22 23 24
#ifdef __cplusplus
extern "C" {
#endif

H
more  
Hongze Cheng 已提交
25 26 27 28 29 30 31 32 33
typedef int8_t   i8;
typedef int16_t  i16;
typedef int32_t  i32;
typedef int64_t  i64;
typedef uint8_t  u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;

H
Hongze Cheng 已提交
34 35 36 37 38 39
// SPgno
typedef u32 SPgno;

#include "tdbOs.h"
#include "tdbUtil.h"

H
Hongze Cheng 已提交
40 41 42 43 44 45 46 47 48 49
// p must be u8 *
#define TDB_GET_U24(p) ((p)[0] * 65536 + *(u16 *)((p) + 1))
#define TDB_PUT_U24(p, v)       \
  do {                          \
    int tv = (v);               \
    (p)[2] = tv & 0xff;         \
    (p)[1] = (tv >> 8) & 0xff;  \
    (p)[0] = (tv >> 16) & 0xff; \
  } while (0)

H
Hongze Cheng 已提交
50
// fileid
H
Hongze Cheng 已提交
51
#define TDB_FILE_ID_LEN 24
H
Hongze Cheng 已提交
52

H
Hongze Cheng 已提交
53
// SPgid
H
Hongze Cheng 已提交
54
typedef struct {
H
Hongze Cheng 已提交
55
  uint8_t fileid[TDB_FILE_ID_LEN];
H
more  
Hongze Cheng 已提交
56
  SPgno   pgno;
H
Hongze Cheng 已提交
57
} SPgid;
H
Hongze Cheng 已提交
58

H
Hongze Cheng 已提交
59
// pgsz_t
H
Hongze Cheng 已提交
60 61
#define TDB_MIN_PGSIZE       512       // 512B
#define TDB_MAX_PGSIZE       16777216  // 16M
H
more  
Hongze Cheng 已提交
62
#define TDB_DEFAULT_PGSIZE   4096
H
Hongze Cheng 已提交
63 64
#define TDB_IS_PGSIZE_VLD(s) (((s) >= TDB_MIN_PGSIZE) && ((s) <= TDB_MAX_PGSIZE))

H
Hongze Cheng 已提交
65 66 67
// dbname
#define TDB_MAX_DBNAME_LEN 24

H
Hongze Cheng 已提交
68
#define TDB_VARIANT_LEN ((int)-1)
H
Hongze Cheng 已提交
69

H
Hongze Cheng 已提交
70
#define TDB_JOURNAL_NAME "tdb.journal"
H
Hongze Cheng 已提交
71

H
Hongze Cheng 已提交
72
#define TDB_FILENAME_LEN 128
H
Hongze Cheng 已提交
73

H
Hongze Cheng 已提交
74 75
#define BTREE_MAX_DEPTH 20

H
Hongze Cheng 已提交
76
#define TDB_FLAG_IS(flags, flag)     ((flags) == (flag))
H
Hongze Cheng 已提交
77 78
#define TDB_FLAG_HAS(flags, flag)    (((flags) & (flag)) != 0)
#define TDB_FLAG_NO(flags, flag)     ((flags) & (flag) == 0)
H
Hongze Cheng 已提交
79 80
#define TDB_FLAG_ADD(flags, flag)    ((flags) | (flag))
#define TDB_FLAG_REMOVE(flags, flag) ((flags) & (~(flag)))
H
Hongze Cheng 已提交
81

H
refact  
Hongze Cheng 已提交
82 83
typedef struct SPager  SPager;
typedef struct SPCache SPCache;
H
Hongze Cheng 已提交
84
typedef struct SPage   SPage;
H
refact  
Hongze Cheng 已提交
85

H
Hongze Cheng 已提交
86
// transaction
H
Hongze Cheng 已提交
87

H
Hongze Cheng 已提交
88 89 90 91
#define TDB_TXN_IS_WRITE(PTXN)            ((PTXN)->flags & TDB_TXN_WRITE)
#define TDB_TXN_IS_READ(PTXN)             (!TDB_TXN_IS_WRITE(PTXN))
#define TDB_TXN_IS_READ_UNCOMMITTED(PTXN) ((PTXN)->flags & TDB_TXN_READ_UNCOMMITTED)

H
Hongze Cheng 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105
// tdbEnv.c ====================================
void    tdbEnvAddPager(TENV *pEnv, SPager *pPager);
void    tdbEnvRemovePager(TENV *pEnv, SPager *pPager);
SPager *tdbEnvGetPager(TENV *pEnv, const char *fname);

// tdbBtree.c ====================================
typedef struct SBTree SBTree;
typedef struct SBTC   SBTC;
typedef struct SBtInfo {
  SPgno root;
  int   nLevel;
  int   nData;
} SBtInfo;

H
Hongze Cheng 已提交
106 107 108 109 110 111 112 113 114
typedef struct {
  int       kLen;
  const u8 *pKey;
  int       vLen;
  const u8 *pVal;
  SPgno     pgno;
  u8       *pBuf;
} SCellDecoder;

H
Hongze Cheng 已提交
115
struct SBTC {
H
Hongze Cheng 已提交
116 117 118 119 120 121 122 123 124
  SBTree      *pBt;
  i8           iPage;
  SPage       *pPage;
  int          idx;
  int          idxStack[BTREE_MAX_DEPTH + 1];
  SPage       *pgStack[BTREE_MAX_DEPTH + 1];
  SCellDecoder coder;
  TXN         *pTxn;
  TXN          txn;
H
Hongze Cheng 已提交
125 126 127 128 129 130
};

// SBTree
int tdbBtreeOpen(int keyLen, int valLen, SPager *pFile, tdb_cmpr_fn_t kcmpr, SBTree **ppBt);
int tdbBtreeClose(SBTree *pBt);
int tdbBtreeInsert(SBTree *pBt, const void *pKey, int kLen, const void *pVal, int vLen, TXN *pTxn);
H
Hongze Cheng 已提交
131
int tdbBtreeDelete(SBTree *pBt, const void *pKey, int kLen, TXN *pTxn);
H
Hongze Cheng 已提交
132
int tdbBtreeUpsert(SBTree *pBt, const void *pKey, int nKey, const void *pData, int nData, TXN *pTxn);
H
Hongze Cheng 已提交
133 134 135 136 137
int tdbBtreeGet(SBTree *pBt, const void *pKey, int kLen, void **ppVal, int *vLen);
int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkLen, void **ppVal, int *vLen);

// SBTC
int tdbBtcOpen(SBTC *pBtc, SBTree *pBt, TXN *pTxn);
H
Hongze Cheng 已提交
138
int tdbBtcClose(SBTC *pBtc);
H
Hongze Cheng 已提交
139
int tdbBtcIsValid(SBTC *pBtc);
H
Hongze Cheng 已提交
140
int tdbBtcMoveTo(SBTC *pBtc, const void *pKey, int kLen, int *pCRst);
H
Hongze Cheng 已提交
141 142
int tdbBtcMoveToFirst(SBTC *pBtc);
int tdbBtcMoveToLast(SBTC *pBtc);
H
Hongze Cheng 已提交
143 144
int tdbBtcMoveToNext(SBTC *pBtc);
int tdbBtcMoveToPrev(SBTC *pBtc);
H
Hongze Cheng 已提交
145
int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen);
H
Hongze Cheng 已提交
146
int tdbBtcGet(SBTC *pBtc, const void **ppKey, int *kLen, const void **ppVal, int *vLen);
H
Hongze Cheng 已提交
147
int tdbBtcDelete(SBTC *pBtc);
H
Hongze Cheng 已提交
148
int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int nData, int insert);
H
Hongze Cheng 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

// tdbPager.c ====================================

int  tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager);
int  tdbPagerClose(SPager *pPager);
int  tdbPagerOpenDB(SPager *pPager, SPgno *ppgno, bool toCreate);
int  tdbPagerWrite(SPager *pPager, SPage *pPage);
int  tdbPagerBegin(SPager *pPager, TXN *pTxn);
int  tdbPagerCommit(SPager *pPager, TXN *pTxn);
int  tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPage)(SPage *, void *, int), void *arg,
                       TXN *pTxn);
void tdbPagerReturnPage(SPager *pPager, SPage *pPage, TXN *pTxn);
int  tdbPagerAllocPage(SPager *pPager, SPgno *ppgno);

// tdbPCache.c ====================================
#define TDB_PCACHE_PAGE \
  u8      isAnchor;     \
  u8      isLocal;      \
  u8      isDirty;      \
  i32     nRef;         \
  SPage  *pCacheNext;   \
  SPage  *pFreeNext;    \
  SPage  *pHashNext;    \
  SPage  *pLruNext;     \
  SPage  *pLruPrev;     \
  SPage  *pDirtyNext;   \
  SPager *pPager;       \
  SPgid   pgid;

// For page ref
#define TDB_INIT_PAGE_REF(pPage) ((pPage)->nRef = 0)
#define TDB_REF_PAGE(pPage)      atomic_add_fetch_32(&((pPage)->nRef), 1)
#define TDB_UNREF_PAGE(pPage)    atomic_sub_fetch_32(&((pPage)->nRef), 1)
#define TDB_GET_PAGE_REF(pPage)  atomic_load_32(&((pPage)->nRef))

int    tdbPCacheOpen(int pageSize, int cacheSize, SPCache **ppCache);
int    tdbPCacheClose(SPCache *pCache);
SPage *tdbPCacheFetch(SPCache *pCache, const SPgid *pPgid, TXN *pTxn);
void   tdbPCacheRelease(SPCache *pCache, SPage *pPage, TXN *pTxn);
int    tdbPCacheGetPageSize(SPCache *pCache);

// tdbPage.c ====================================
typedef u8 SCell;

// PAGE APIS implemented
typedef struct {
  int szOffset;
  int szPageHdr;
  int szFreeCell;
  // cell number
  int (*getCellNum)(SPage *);
  void (*setCellNum)(SPage *, int);
  // cell content offset
  int (*getCellBody)(SPage *);
  void (*setCellBody)(SPage *, int);
  // first free cell offset (0 means no free cells)
  int (*getCellFree)(SPage *);
  void (*setCellFree)(SPage *, int);
  // total free bytes
  int (*getFreeBytes)(SPage *);
  void (*setFreeBytes)(SPage *, int);
  // cell offset at idx
  int (*getCellOffset)(SPage *, int);
  void (*setCellOffset)(SPage *, int, int);
  // free cell info
  void (*getFreeCellInfo)(SCell *pCell, int *szCell, int *nxOffset);
  void (*setFreeCellInfo)(SCell *pCell, int szCell, int nxOffset);
} SPageMethods;

#pragma pack(push, 1)

// Page footer
typedef struct {
  u8 cksm[4];
} SPageFtr;
#pragma pack(pop)

struct SPage {
  tdb_spinlock_t lock;
  int            pageSize;
  u8            *pData;
  SPageMethods  *pPageMethods;
  // Fields below used by pager and am
  u8       *pPageHdr;
  u8       *pCellIdx;
  u8       *pFreeStart;
  u8       *pFreeEnd;
  SPageFtr *pPageFtr;
  int       nOverflow;
  SCell    *apOvfl[4];
  int       aiOvfl[4];
  int       kLen;  // key length of the page, -1 for unknown
  int       vLen;  // value length of the page, -1 for unknown
  int       maxLocal;
  int       minLocal;
  int (*xCellSize)(const SPage *, SCell *);
  // Fields used by SPCache
  TDB_PCACHE_PAGE
};

// For page lock
#define P_LOCK_SUCC 0
#define P_LOCK_BUSY 1
#define P_LOCK_FAIL -1

static inline int tdbTryLockPage(tdb_spinlock_t *pLock) {
  int ret;
  if (tdbSpinlockTrylock(pLock) == 0) {
    ret = P_LOCK_SUCC;
  } else if (errno == EBUSY) {
    ret = P_LOCK_BUSY;
  } else {
    ret = P_LOCK_FAIL;
  }
  return ret;
}
H
Hongze Cheng 已提交
265

H
Hongze Cheng 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
#define TDB_INIT_PAGE_LOCK(pPage)    tdbSpinlockInit(&((pPage)->lock), 0)
#define TDB_DESTROY_PAGE_LOCK(pPage) tdbSpinlockDestroy(&((pPage)->lock))
#define TDB_LOCK_PAGE(pPage)         tdbSpinlockLock(&((pPage)->lock))
#define TDB_UNLOCK_PAGE(pPage)       tdbSpinlockUnlock(&((pPage)->lock))
#define TDB_TRY_LOCK_PAGE(pPage)     tdbTryLockPage(&((pPage)->lock))

// APIs
#define TDB_PAGE_TOTAL_CELLS(pPage)        ((pPage)->nOverflow + (pPage)->pPageMethods->getCellNum(pPage))
#define TDB_PAGE_USABLE_SIZE(pPage)        ((u8 *)(pPage)->pPageFtr - (pPage)->pCellIdx)
#define TDB_PAGE_FREE_SIZE(pPage)          (*(pPage)->pPageMethods->getFreeBytes)(pPage)
#define TDB_PAGE_PGNO(pPage)               ((pPage)->pgid.pgno)
#define TDB_BYTES_CELL_TAKEN(pPage, pCell) ((*(pPage)->xCellSize)(pPage, pCell) + (pPage)->pPageMethods->szOffset)
#define TDB_PAGE_OFFSET_SIZE(pPage)        ((pPage)->pPageMethods->szOffset)

int  tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t), void *arg);
int  tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg);
void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *));
void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *));
int  tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl);
int  tdbPageDropCell(SPage *pPage, int idx);
H
Hongze Cheng 已提交
286
int  tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell);
H
Hongze Cheng 已提交
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
void tdbPageCopy(SPage *pFromPage, SPage *pToPage);
int  tdbPageCapacity(int pageSize, int amHdrSize);

static inline SCell *tdbPageGetCell(SPage *pPage, int idx) {
  SCell *pCell;
  int    iOvfl;
  int    lidx;

  ASSERT(idx >= 0 && idx < TDB_PAGE_TOTAL_CELLS(pPage));

  iOvfl = 0;
  for (; iOvfl < pPage->nOverflow; iOvfl++) {
    if (pPage->aiOvfl[iOvfl] == idx) {
      pCell = pPage->apOvfl[iOvfl];
      return pCell;
    } else if (pPage->aiOvfl[iOvfl] > idx) {
      break;
    }
  }
H
Hongze Cheng 已提交
306

H
Hongze Cheng 已提交
307 308 309
  lidx = idx - iOvfl;
  ASSERT(lidx >= 0 && lidx < pPage->pPageMethods->getCellNum(pPage));
  pCell = pPage->pData + pPage->pPageMethods->getCellOffset(pPage, lidx);
H
Hongze Cheng 已提交
310

H
Hongze Cheng 已提交
311 312
  return pCell;
}
H
Hongze Cheng 已提交
313

H
Hongze Cheng 已提交
314 315 316 317 318 319 320 321 322 323
struct STEnv {
  char    *rootDir;
  char    *jfname;
  int      jfd;
  SPCache *pCache;
  SPager  *pgrList;
  int      nPager;
  int      nPgrHash;
  SPager **pgrHash;
};
H
Hongze Cheng 已提交
324

H
Hongze Cheng 已提交
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
struct SPager {
  char    *dbFileName;
  char    *jFileName;
  int      pageSize;
  uint8_t  fid[TDB_FILE_ID_LEN];
  tdb_fd_t fd;
  tdb_fd_t jfd;
  SPCache *pCache;
  SPgno    dbFileSize;
  SPgno    dbOrigSize;
  SPage   *pDirty;
  u8       inTran;
  SPager  *pNext;      // used by TENV
  SPager  *pHashNext;  // used by TENV
};

H
more  
Hongze Cheng 已提交
341 342 343 344
#ifdef __cplusplus
}
#endif

H
Hongze Cheng 已提交
345
#endif /*_TD_TDB_INTERNAL_H_*/