tdbPage.c 24.1 KB
Newer Older
H
Hongze Cheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include "tdbInt.h"

H
Hongze Cheng 已提交
18 19 20
extern SPageMethods pageMethods;
extern SPageMethods pageLargeMethods;

H
Hongze Cheng 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33
#define TDB_PAGE_HDR_SIZE(pPage)                        ((pPage)->pPageMethods->szPageHdr)
#define TDB_PAGE_FREE_CELL_SIZE(pPage)                  ((pPage)->pPageMethods->szFreeCell)
#define TDB_PAGE_NCELLS(pPage)                          (*(pPage)->pPageMethods->getCellNum)(pPage)
#define TDB_PAGE_CCELLS(pPage)                          (*(pPage)->pPageMethods->getCellBody)(pPage)
#define TDB_PAGE_FCELL(pPage)                           (*(pPage)->pPageMethods->getCellFree)(pPage)
#define TDB_PAGE_NFREE(pPage)                           (*(pPage)->pPageMethods->getFreeBytes)(pPage)
#define TDB_PAGE_CELL_OFFSET_AT(pPage, idx)             (*(pPage)->pPageMethods->getCellOffset)(pPage, idx)
#define TDB_PAGE_NCELLS_SET(pPage, NCELLS)              (*(pPage)->pPageMethods->setCellNum)(pPage, NCELLS)
#define TDB_PAGE_CCELLS_SET(pPage, CCELLS)              (*(pPage)->pPageMethods->setCellBody)(pPage, CCELLS)
#define TDB_PAGE_FCELL_SET(pPage, FCELL)                (*(pPage)->pPageMethods->setCellFree)(pPage, FCELL)
#define TDB_PAGE_NFREE_SET(pPage, NFREE)                (*(pPage)->pPageMethods->setFreeBytes)(pPage, NFREE)
#define TDB_PAGE_CELL_OFFSET_AT_SET(pPage, idx, OFFSET) (*(pPage)->pPageMethods->setCellOffset)(pPage, idx, OFFSET)
#define TDB_PAGE_CELL_AT(pPage, idx)                    ((pPage)->pData + TDB_PAGE_CELL_OFFSET_AT(pPage, idx))
H
Hongze Cheng 已提交
34 35
#define TDB_PAGE_MAX_FREE_BLOCK(pPage, szAmHdr) \
  ((pPage)->pageSize - (szAmHdr)-TDB_PAGE_HDR_SIZE(pPage) - sizeof(SPageFtr))
H
Hongze Cheng 已提交
36

H
Hongze Cheng 已提交
37 38
static int tdbPageAllocate(SPage *pPage, int size, SCell **ppCell);
static int tdbPageDefragment(SPage *pPage);
H
Hongze Cheng 已提交
39
static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell);
H
Hongze Cheng 已提交
40

H
Hongze Cheng 已提交
41 42 43 44 45
int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t), void *arg) {
  SPage *pPage;
  u8    *ptr;
  int    size;

46 47 48 49
  if (!xMalloc) {
    tdbError("tdb/page-create: null xMalloc.");
    return -1;
  }
H
Hongze Cheng 已提交
50

51 52 53 54
  if (!TDB_IS_PGSIZE_VLD(pageSize)) {
    tdbError("tdb/page-create: invalid pageSize: %d.", pageSize);
    return -1;
  }
H
Hongze Cheng 已提交
55 56 57 58

  *ppPage = NULL;
  size = pageSize + sizeof(*pPage);

H
Hongze Cheng 已提交
59
  ptr = (u8 *)(xMalloc(arg, size));
H
Hongze Cheng 已提交
60
  if (ptr == NULL) {
H
Hongze Cheng 已提交
61 62 63 64 65 66
    return -1;
  }

  memset(ptr, 0, size);
  pPage = (SPage *)(ptr + pageSize);

H
Hongze Cheng 已提交
67
  TDB_INIT_PAGE_LOCK(pPage);
H
Hongze Cheng 已提交
68
  pPage->pageSize = pageSize;
H
Hongze Cheng 已提交
69
  pPage->pData = ptr;
H
Hongze Cheng 已提交
70
  if (pageSize < 65536) {
H
Hongze Cheng 已提交
71
    pPage->pPageMethods = &pageMethods;
H
Hongze Cheng 已提交
72
  } else {
H
Hongze Cheng 已提交
73
    pPage->pPageMethods = &pageLargeMethods;
H
Hongze Cheng 已提交
74 75 76
  }

  *ppPage = pPage;
77

78
  tdbTrace("tdb/page-create: %p/%d %p", pPage, pPage->id, xMalloc);
H
Hongze Cheng 已提交
79 80 81 82 83 84
  return 0;
}

int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) {
  u8 *ptr;

85 86 87 88 89 90 91 92 93 94 95
  tdbTrace("tdb/page-destroy: %p/%d %p", pPage, pPage->id, xFree);

  if (pPage->isDirty) {
    tdbError("tdb/page-destroy: dirty page: %" PRIu8 ".", pPage->isDirty);
    return -1;
  }

  if (!xFree) {
    tdbError("tdb/page-destroy: null xFree.");
    return -1;
  }
H
Hongze Cheng 已提交
96

97
  for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) {
98
    tdbTrace("tdbPage/destroy/free ovfl cell: %p/%p", pPage->apOvfl[iOvfl], pPage);
99 100 101
    tdbOsFree(pPage->apOvfl[iOvfl]);
  }

H
Hongze Cheng 已提交
102
  ptr = pPage->pData;
H
Hongze Cheng 已提交
103
  xFree(arg, ptr);
H
Hongze Cheng 已提交
104 105 106 107

  return 0;
}

108
void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)) {
109
  tdbTrace("page/zero: %p %" PRIu8 " %p", pPage, szAmHdr, xCellSize);
H
Hongze Cheng 已提交
110
  pPage->pPageHdr = pPage->pData + szAmHdr;
H
Hongze Cheng 已提交
111 112 113
  TDB_PAGE_NCELLS_SET(pPage, 0);
  TDB_PAGE_CCELLS_SET(pPage, pPage->pageSize - sizeof(SPageFtr));
  TDB_PAGE_FCELL_SET(pPage, 0);
H
Hongze Cheng 已提交
114 115 116 117 118 119
  TDB_PAGE_NFREE_SET(pPage, TDB_PAGE_MAX_FREE_BLOCK(pPage, szAmHdr));
  pPage->pCellIdx = pPage->pPageHdr + TDB_PAGE_HDR_SIZE(pPage);
  pPage->pFreeStart = pPage->pCellIdx;
  pPage->pFreeEnd = pPage->pData + TDB_PAGE_CCELLS(pPage);
  pPage->pPageFtr = (SPageFtr *)(pPage->pData + pPage->pageSize - sizeof(SPageFtr));
  pPage->nOverflow = 0;
H
Hongze Cheng 已提交
120
  pPage->xCellSize = xCellSize;
H
Hongze Cheng 已提交
121

122 123 124 125
  if ((u8 *)pPage->pPageFtr != pPage->pFreeEnd) {
    tdbError("tdb/page-zero: invalid page, pFreeEnd: %p, pPageFtr: %p", pPage->pFreeEnd, pPage->pPageFtr);
    return;
  }
H
Hongze Cheng 已提交
126 127
}

128
void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)) {
129
  tdbTrace("page/init: %p %" PRIu8 " %p", pPage, szAmHdr, xCellSize);
H
Hongze Cheng 已提交
130
  pPage->pPageHdr = pPage->pData + szAmHdr;
131 132 133
  if (TDB_PAGE_NCELLS(pPage) == 0) {
    return tdbPageZero(pPage, szAmHdr, xCellSize);
  }
H
Hongze Cheng 已提交
134 135 136 137 138
  pPage->pCellIdx = pPage->pPageHdr + TDB_PAGE_HDR_SIZE(pPage);
  pPage->pFreeStart = pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * TDB_PAGE_NCELLS(pPage);
  pPage->pFreeEnd = pPage->pData + TDB_PAGE_CCELLS(pPage);
  pPage->pPageFtr = (SPageFtr *)(pPage->pData + pPage->pageSize - sizeof(SPageFtr));
  pPage->nOverflow = 0;
H
Hongze Cheng 已提交
139
  pPage->xCellSize = xCellSize;
H
Hongze Cheng 已提交
140

141 142 143 144 145 146 147 148 149
  if (pPage->pFreeEnd < pPage->pFreeStart) {
    tdbError("tdb/page-init: invalid page, pFreeEnd: %p, pFreeStart: %p", pPage->pFreeEnd, pPage->pFreeStart);
    return;
  }
  if (pPage->pFreeEnd - pPage->pFreeStart > TDB_PAGE_NFREE(pPage)) {
    tdbError("tdb/page-init: invalid page, pFreeEnd: %p, pFreeStart: %p, NFREE: %d", pPage->pFreeEnd, pPage->pFreeStart,
             TDB_PAGE_NFREE(pPage));
    return;
  }
H
Hongze Cheng 已提交
150 151
}

H
Hongze Cheng 已提交
152
int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl) {
H
Hongze Cheng 已提交
153 154
  int    nFree;
  int    nCells;
H
Hongze Cheng 已提交
155
  int    iOvfl;
H
Hongze Cheng 已提交
156 157
  int    lidx;  // local idx
  SCell *pNewCell;
H
Hongze Cheng 已提交
158

159 160 161 162 163
  if (szCell > TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)) {
    tdbError("tdb/page-insert-cell: invalid page, szCell: %d, max free: %lu", szCell,
             TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData));
    return -1;
  }
H
Hongze Cheng 已提交
164

H
Hongze Cheng 已提交
165 166
  nFree = TDB_PAGE_NFREE(pPage);
  nCells = TDB_PAGE_NCELLS(pPage);
H
Hongze Cheng 已提交
167

168
  for (iOvfl = 0; iOvfl < pPage->nOverflow; ++iOvfl) {
H
Hongze Cheng 已提交
169 170 171 172 173 174
    if (pPage->aiOvfl[iOvfl] >= idx) {
      break;
    }
  }

  lidx = idx - iOvfl;
H
Hongze Cheng 已提交
175

H
Hongze Cheng 已提交
176 177 178 179 180 181 182 183
  if (asOvfl || nFree < szCell + TDB_PAGE_OFFSET_SIZE(pPage)) {
    // TODO: make it extensible
    // add the cell as an overflow cell
    for (int i = pPage->nOverflow; i > iOvfl; i--) {
      pPage->apOvfl[i] = pPage->apOvfl[i - 1];
      pPage->aiOvfl[i] = pPage->aiOvfl[i - 1];
    }

H
Hongze Cheng 已提交
184
    // TODO: here has memory leak
H
Hongze Cheng 已提交
185
    pNewCell = (SCell *)tdbOsMalloc(szCell);
H
Hongze Cheng 已提交
186 187
    memcpy(pNewCell, pCell, szCell);

188
    tdbTrace("tdbPage/insert/new ovfl cell: %p/%p", pNewCell, pPage);
189

H
Hongze Cheng 已提交
190
    pPage->apOvfl[iOvfl] = pNewCell;
H
Hongze Cheng 已提交
191 192 193 194
    pPage->aiOvfl[iOvfl] = idx;
    pPage->nOverflow++;
    iOvfl++;
  } else {
H
Hongze Cheng 已提交
195
    // page must has enough space to hold the cell locally
H
Hongze Cheng 已提交
196
    tdbPageAllocate(pPage, szCell, &pNewCell);
H
Hongze Cheng 已提交
197 198 199 200 201 202 203

    memcpy(pNewCell, pCell, szCell);

    // no overflow cell exists in this page
    u8 *src = pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * lidx;
    u8 *dest = src + TDB_PAGE_OFFSET_SIZE(pPage);
    memmove(dest, src, pPage->pFreeStart - dest);
H
Hongze Cheng 已提交
204
    TDB_PAGE_CELL_OFFSET_AT_SET(pPage, lidx, pNewCell - pPage->pData);
H
Hongze Cheng 已提交
205
    TDB_PAGE_NCELLS_SET(pPage, nCells + 1);
H
Hongze Cheng 已提交
206

207 208 209 210 211
    if (pPage->pFreeStart != pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * (nCells + 1)) {
      tdbError("tdb/page-insert-cell: invalid page, pFreeStart: %p, pCellIdx: %p, nCells: %d", pPage->pFreeStart,
               pPage->pCellIdx, nCells);
      return -1;
    }
H
Hongze Cheng 已提交
212 213 214 215
  }

  for (; iOvfl < pPage->nOverflow; iOvfl++) {
    pPage->aiOvfl[iOvfl]++;
H
Hongze Cheng 已提交
216 217 218 219 220
  }

  return 0;
}

221 222
int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell, TXN *pTxn, SBTree *pBt) {
  tdbPageDropCell(pPage, idx, pTxn, pBt);
H
Hongze Cheng 已提交
223 224 225
  return tdbPageInsertCell(pPage, idx, pCell, szCell, 0);
}

226
int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) {
H
Hongze Cheng 已提交
227 228 229 230
  int    lidx;
  SCell *pCell;
  int    szCell;
  int    nCells;
H
Hongze Cheng 已提交
231
  int    iOvfl;
H
Hongze Cheng 已提交
232 233 234

  nCells = TDB_PAGE_NCELLS(pPage);

235 236 237 238
  if (idx < 0 || idx >= nCells + pPage->nOverflow) {
    tdbError("tdb/page-drop-cell: idx: %d out of range, nCells: %d, nOvfl: %d.", idx, nCells, pPage->nOverflow);
    return -1;
  }
H
Hongze Cheng 已提交
239 240 241 242

  iOvfl = 0;
  for (; iOvfl < pPage->nOverflow; iOvfl++) {
    if (pPage->aiOvfl[iOvfl] == idx) {
H
Hongze Cheng 已提交
243
      // remove the over flow cell
244
      tdbOsFree(pPage->apOvfl[iOvfl]);
245
      tdbTrace("tdbPage/drop/free ovfl cell: %p", pPage->apOvfl[iOvfl]);
H
Hongze Cheng 已提交
246 247 248 249 250 251
      for (; (++iOvfl) < pPage->nOverflow;) {
        pPage->aiOvfl[iOvfl - 1] = pPage->aiOvfl[iOvfl] - 1;
        pPage->apOvfl[iOvfl - 1] = pPage->apOvfl[iOvfl];
      }

      pPage->nOverflow--;
H
Hongze Cheng 已提交
252 253 254 255
      return 0;
    } else if (pPage->aiOvfl[iOvfl] > idx) {
      break;
    }
H
Hongze Cheng 已提交
256 257
  }

H
Hongze Cheng 已提交
258
  lidx = idx - iOvfl;
H
Hongze Cheng 已提交
259
  pCell = TDB_PAGE_CELL_AT(pPage, lidx);
260
  szCell = (*pPage->xCellSize)(pPage, pCell, 1, pTxn, pBt);
H
Hongze Cheng 已提交
261 262 263
  tdbPageFree(pPage, lidx, pCell, szCell);
  TDB_PAGE_NCELLS_SET(pPage, nCells - 1);

H
Hongze Cheng 已提交
264 265
  for (; iOvfl < pPage->nOverflow; iOvfl++) {
    pPage->aiOvfl[iOvfl]--;
266 267 268 269
    if (pPage->aiOvfl[iOvfl] <= 0) {
      tdbError("tdb/page-drop-cell: invalid ai idx: %d", pPage->aiOvfl[iOvfl]);
      return -1;
    }
H
Hongze Cheng 已提交
270 271
  }

H
Hongze Cheng 已提交
272 273 274
  return 0;
}

275
void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) {
H
Hongze Cheng 已提交
276 277
  int delta, nFree;

H
Hongze Cheng 已提交
278 279 280
  pToPage->pFreeStart = pToPage->pPageHdr + (pFromPage->pFreeStart - pFromPage->pPageHdr);
  pToPage->pFreeEnd = (u8 *)(pToPage->pPageFtr) - ((u8 *)pFromPage->pPageFtr - pFromPage->pFreeEnd);

281 282 283 284
  if (pToPage->pFreeEnd < pToPage->pFreeStart) {
    tdbError("tdb/page-copy: invalid to page, pFreeStart: %p, pFreeEnd: %p", pToPage->pFreeStart, pToPage->pFreeEnd);
    return;
  }
H
Hongze Cheng 已提交
285 286 287

  memcpy(pToPage->pPageHdr, pFromPage->pPageHdr, pFromPage->pFreeStart - pFromPage->pPageHdr);
  memcpy(pToPage->pFreeEnd, pFromPage->pFreeEnd, (u8 *)pFromPage->pPageFtr - pFromPage->pFreeEnd);
H
Hongze Cheng 已提交
288

289 290 291 292 293
  if (TDB_PAGE_CCELLS(pToPage) != pToPage->pFreeEnd - pToPage->pData) {
    tdbError("tdb/page-copy: invalid to page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pToPage),
             pToPage->pFreeEnd - pToPage->pData);
    return;
  }
H
Hongze Cheng 已提交
294

H
Hongze Cheng 已提交
295
  delta = (pToPage->pPageHdr - pToPage->pData) - (pFromPage->pPageHdr - pFromPage->pData);
H
Hongze Cheng 已提交
296 297 298 299 300
  if (delta != 0) {
    nFree = TDB_PAGE_NFREE(pFromPage);
    TDB_PAGE_NFREE_SET(pToPage, nFree - delta);
  }

H
Hongze Cheng 已提交
301 302
  // Copy the overflow cells
  for (int iOvfl = 0; iOvfl < pFromPage->nOverflow; iOvfl++) {
303 304 305 306 307
    SCell *pNewCell = pFromPage->apOvfl[iOvfl];
    if (deepCopyOvfl) {
      int szCell = (*pFromPage->xCellSize)(pFromPage, pFromPage->apOvfl[iOvfl], 0, NULL, NULL);
      pNewCell = (SCell *)tdbOsMalloc(szCell);
      memcpy(pNewCell, pFromPage->apOvfl[iOvfl], szCell);
308
      tdbTrace("tdbPage/copy/new ovfl cell: %p/%p/%p", pNewCell, pToPage, pFromPage);
309 310 311
    }

    pToPage->apOvfl[iOvfl] = pNewCell;
H
Hongze Cheng 已提交
312 313 314
    pToPage->aiOvfl[iOvfl] = pFromPage->aiOvfl[iOvfl];
  }
  pToPage->nOverflow = pFromPage->nOverflow;
H
Hongze Cheng 已提交
315 316
}

H
Hongze Cheng 已提交
317 318
int tdbPageCapacity(int pageSize, int amHdrSize) {
  int szPageHdr;
319
  int minCellIndexSize;  // at least one cell in cell index
H
Hongze Cheng 已提交
320 321 322

  if (pageSize < 65536) {
    szPageHdr = pageMethods.szPageHdr;
M
Minglei Jin 已提交
323
    minCellIndexSize = pageMethods.szOffset;
H
Hongze Cheng 已提交
324 325
  } else {
    szPageHdr = pageLargeMethods.szPageHdr;
M
Minglei Jin 已提交
326
    minCellIndexSize = pageLargeMethods.szOffset;
H
Hongze Cheng 已提交
327 328
  }

M
Minglei Jin 已提交
329
  return pageSize - szPageHdr - amHdrSize - sizeof(SPageFtr) - minCellIndexSize;
H
Hongze Cheng 已提交
330 331
}

H
Hongze Cheng 已提交
332 333 334 335 336 337
static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) {
  SCell *pFreeCell;
  u8    *pOffset;
  int    nFree;
  int    ret;
  int    cellFree;
H
Hongze Cheng 已提交
338
  SCell *pCell = NULL;
H
Hongze Cheng 已提交
339 340

  *ppCell = NULL;
H
Hongze Cheng 已提交
341
  nFree = TDB_PAGE_NFREE(pPage);
H
Hongze Cheng 已提交
342

343 344 345 346 347 348 349 350 351 352
  if (nFree < szCell + TDB_PAGE_OFFSET_SIZE(pPage)) {
    tdbError("tdb/page-allocate: invalid cell size, nFree: %d, szCell: %d, szOffset: %d", nFree, szCell,
             TDB_PAGE_OFFSET_SIZE(pPage));
    return -1;
  }
  if (TDB_PAGE_CCELLS(pPage) != pPage->pFreeEnd - pPage->pData) {
    tdbError("tdb/page-allocate: invalid page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pPage),
             pPage->pFreeEnd - pPage->pData);
    return -1;
  }
H
Hongze Cheng 已提交
353 354 355 356

  // 1. Try to allocate from the free space block area
  if (pPage->pFreeEnd - pPage->pFreeStart >= szCell + TDB_PAGE_OFFSET_SIZE(pPage)) {
    pPage->pFreeEnd -= szCell;
H
Hongze Cheng 已提交
357
    pCell = pPage->pFreeEnd;
H
Hongze Cheng 已提交
358
    TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData);
H
Hongze Cheng 已提交
359
    goto _alloc_finish;
H
Hongze Cheng 已提交
360 361 362
  }

  // 2. Try to allocate from the page free list
H
Hongze Cheng 已提交
363
  cellFree = TDB_PAGE_FCELL(pPage);
364 365 366 367
  if (cellFree != 0 && cellFree < pPage->pFreeEnd - pPage->pData) {
    tdbError("tdb/page-allocate: cellFree: %d, pFreeEnd: %p, pData: %p.", cellFree, pPage->pFreeEnd, pPage->pData);
    return -1;
  }
H
Hongze Cheng 已提交
368 369 370 371 372 373
  if (cellFree && pPage->pFreeEnd - pPage->pFreeStart >= TDB_PAGE_OFFSET_SIZE(pPage)) {
    SCell *pPrevFreeCell = NULL;
    int    szPrevFreeCell;
    int    szFreeCell;
    int    nxFreeCell;
    int    newSize;
H
Hongze Cheng 已提交
374 375

    for (;;) {
H
Hongze Cheng 已提交
376 377 378 379 380 381
      if (cellFree == 0) break;

      pFreeCell = pPage->pData + cellFree;
      pPage->pPageMethods->getFreeCellInfo(pFreeCell, &szFreeCell, &nxFreeCell);

      if (szFreeCell >= szCell) {
H
Hongze Cheng 已提交
382
        pCell = pFreeCell;
H
Hongze Cheng 已提交
383 384 385 386 387 388 389 390 391 392

        newSize = szFreeCell - szCell;
        pFreeCell += szCell;
        if (newSize >= TDB_PAGE_FREE_CELL_SIZE(pPage)) {
          pPage->pPageMethods->setFreeCellInfo(pFreeCell, newSize, nxFreeCell);
          if (pPrevFreeCell) {
            pPage->pPageMethods->setFreeCellInfo(pPrevFreeCell, szPrevFreeCell, pFreeCell - pPage->pData);
          } else {
            TDB_PAGE_FCELL_SET(pPage, pFreeCell - pPage->pData);
          }
H
Hongze Cheng 已提交
393
        } else {
H
Hongze Cheng 已提交
394 395 396 397 398
          if (pPrevFreeCell) {
            pPage->pPageMethods->setFreeCellInfo(pPrevFreeCell, szPrevFreeCell, nxFreeCell);
          } else {
            TDB_PAGE_FCELL_SET(pPage, nxFreeCell);
          }
H
Hongze Cheng 已提交
399
        }
H
Hongze Cheng 已提交
400 401

        goto _alloc_finish;
H
Hongze Cheng 已提交
402
      } else {
H
Hongze Cheng 已提交
403 404 405
        pPrevFreeCell = pFreeCell;
        szPrevFreeCell = szFreeCell;
        cellFree = nxFreeCell;
H
Hongze Cheng 已提交
406 407 408 409
      }
    }
  }

H
Hongze Cheng 已提交
410 411
  // 3. Try to dfragment and allocate again
  tdbPageDefragment(pPage);
412 413 414 415 416 417 418 419 420 421 422 423 424
  if (pPage->pFreeEnd - pPage->pFreeStart != nFree) {
    tdbError("tdb/page-allocate: nFree: %d, pFreeStart: %p, pFreeEnd: %p.", nFree, pPage->pFreeStart, pPage->pFreeEnd);
    return -1;
  }
  if (TDB_PAGE_NFREE(pPage) != nFree) {
    tdbError("tdb/page-allocate: nFree: %d, page free: %d.", nFree, TDB_PAGE_NFREE(pPage));
    return -1;
  }
  if (pPage->pFreeEnd - pPage->pData != TDB_PAGE_CCELLS(pPage)) {
    tdbError("tdb/page-allocate: ccells: %d, pFreeStart: %p, pData: %p.", TDB_PAGE_CCELLS(pPage), pPage->pFreeStart,
             pPage->pData);
    return -1;
  }
H
Hongze Cheng 已提交
425

H
Hongze Cheng 已提交
426 427 428
  pPage->pFreeEnd -= szCell;
  pCell = pPage->pFreeEnd;
  TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData);
H
Hongze Cheng 已提交
429

H
Hongze Cheng 已提交
430
_alloc_finish:
431 432 433 434 435
  if (NULL == pCell) {
    tdbError("tdb/page-allocate: null ptr pCell.");
    return -1;
  }

H
Hongze Cheng 已提交
436 437
  pPage->pFreeStart += TDB_PAGE_OFFSET_SIZE(pPage);
  TDB_PAGE_NFREE_SET(pPage, nFree - szCell - TDB_PAGE_OFFSET_SIZE(pPage));
H
Hongze Cheng 已提交
438 439 440 441
  *ppCell = pCell;
  return 0;
}

H
Hongze Cheng 已提交
442 443 444 445 446 447
static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell) {
  int nFree;
  int cellFree;
  u8 *dest;
  u8 *src;

448 449 450 451 452 453 454 455 456 457 458 459
  if (pCell < pPage->pFreeEnd) {
    tdbError("tdb/page-free: invalid cell, cell: %p, free end: %p", pCell, pPage->pFreeEnd);
    return -1;
  }
  if (pCell + szCell > (u8 *)(pPage->pPageFtr)) {
    tdbError("tdb/page-free: cell crosses page footer, cell: %p, size: %d footer: %p", pCell, szCell, pPage->pFreeEnd);
    return -1;
  }
  if (pCell != TDB_PAGE_CELL_AT(pPage, idx)) {
    tdbError("tdb/page-free: cell pos incorrect, cell: %p, pos: %p", pCell, TDB_PAGE_CELL_AT(pPage, idx));
    return -1;
  }
H
Hongze Cheng 已提交
460 461 462 463 464 465 466 467 468 469 470 471

  nFree = TDB_PAGE_NFREE(pPage);

  if (pCell == pPage->pFreeEnd) {
    pPage->pFreeEnd += szCell;
    TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData);
  } else {
    if (szCell >= TDB_PAGE_FREE_CELL_SIZE(pPage)) {
      cellFree = TDB_PAGE_FCELL(pPage);
      pPage->pPageMethods->setFreeCellInfo(pCell, szCell, cellFree);
      TDB_PAGE_FCELL_SET(pPage, pCell - pPage->pData);
    } else {
472 473
      tdbError("tdb/page-free: invalid cell size: %d", szCell);
      return -1;
H
Hongze Cheng 已提交
474 475 476 477 478 479 480 481 482 483
    }
  }

  dest = pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * idx;
  src = dest + TDB_PAGE_OFFSET_SIZE(pPage);
  memmove(dest, src, pPage->pFreeStart - src);

  pPage->pFreeStart -= TDB_PAGE_OFFSET_SIZE(pPage);
  nFree = nFree + szCell + TDB_PAGE_OFFSET_SIZE(pPage);
  TDB_PAGE_NFREE_SET(pPage, nFree);
H
Hongze Cheng 已提交
484 485 486 487
  return 0;
}

static int tdbPageDefragment(SPage *pPage) {
H
Hongze Cheng 已提交
488 489 490 491 492 493 494 495 496 497 498 499
  int    nFree;
  int    nCells;
  SCell *pCell;
  SCell *pNextCell;
  SCell *pTCell;
  int    szCell;
  int    idx;
  int    iCell;

  nFree = TDB_PAGE_NFREE(pPage);
  nCells = TDB_PAGE_NCELLS(pPage);

500 501 502 503
  if (pPage->pFreeEnd - pPage->pFreeStart >= nFree) {
    tdbError("tdb/page-defragment: invalid free range, nFree: %d.", nFree);
    return -1;
  }
H
Hongze Cheng 已提交
504

H
Hongze Cheng 已提交
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
  // Loop to compact the page content
  // Here we use an O(n^2) algorithm to do the job since
  // this is a low frequency job.
  pNextCell = (u8 *)pPage->pPageFtr;
  pCell = NULL;
  for (iCell = 0;; iCell++) {
    // compact over
    if (iCell == nCells) {
      pPage->pFreeEnd = pNextCell;
      break;
    }

    for (int i = 0; i < nCells; i++) {
      if (TDB_PAGE_CELL_OFFSET_AT(pPage, i) < pNextCell - pPage->pData) {
        pTCell = TDB_PAGE_CELL_AT(pPage, i);
        if (pCell == NULL || pCell < pTCell) {
          pCell = pTCell;
          idx = i;
        }
      } else {
        continue;
      }
    }

529 530 531 532
    if (NULL == pCell) {
      tdbError("tdb/page-defragment: null ptr pCell.");
      return -1;
    }
H
Hongze Cheng 已提交
533

534
    szCell = (*pPage->xCellSize)(pPage, pCell, 0, NULL, NULL);
H
Hongze Cheng 已提交
535

536 537 538 539 540 541
    if (pCell + szCell > pNextCell) {
      tdbError("tdb/page-defragment: invalid cell range, pCell: %p, szCell: %d, pNextCell: %p.", pCell, szCell,
               pNextCell);
      return -1;
    }

H
Hongze Cheng 已提交
542 543 544 545 546 547 548 549 550
    if (pCell + szCell < pNextCell) {
      memmove(pNextCell - szCell, pCell, szCell);
    }

    pCell = NULL;
    pNextCell = pNextCell - szCell;
    TDB_PAGE_CELL_OFFSET_AT_SET(pPage, idx, pNextCell - pPage->pData);
  }

551 552 553 554 555
  if (pPage->pFreeEnd - pPage->pFreeStart != nFree) {
    tdbError("tdb/page-defragment: invalid free range, nFree: %d.", nFree);
    return -1;
  }

H
Hongze Cheng 已提交
556 557 558
  TDB_PAGE_CCELLS_SET(pPage, pPage->pFreeEnd - pPage->pData);
  TDB_PAGE_FCELL_SET(pPage, 0);

H
Hongze Cheng 已提交
559 560 561 562
  return 0;
}

/* ---------------------------------------------------------------------------------------------------------- */
wafwerar's avatar
wafwerar 已提交
563

H
Hongze Cheng 已提交
564
#pragma pack(push, 1)
wafwerar's avatar
wafwerar 已提交
565
typedef struct {
H
Hongze Cheng 已提交
566 567 568 569 570 571
  u16 cellNum;
  u16 cellBody;
  u16 cellFree;
  u16 nFree;
} SPageHdr;

wafwerar's avatar
wafwerar 已提交
572
typedef struct {
H
Hongze Cheng 已提交
573 574 575
  u16 szCell;
  u16 nxOffset;
} SFreeCell;
wafwerar's avatar
wafwerar 已提交
576
#pragma pack(pop)
H
Hongze Cheng 已提交
577

H
Hongze Cheng 已提交
578 579 580
// cellNum
static inline int  getPageCellNum(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellNum; }
static inline void setPageCellNum(SPage *pPage, int cellNum) {
581 582 583 584
  if (cellNum >= 65536) {
    tdbError("tdb/page-set-cell-num: invalid cellNum: %d.", cellNum);
    return;
  }
H
Hongze Cheng 已提交
585 586 587 588 589 590
  ((SPageHdr *)(pPage->pPageHdr))[0].cellNum = (u16)cellNum;
}

// cellBody
static inline int  getPageCellBody(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellBody; }
static inline void setPageCellBody(SPage *pPage, int cellBody) {
591 592 593 594
  if (cellBody >= 65536) {
    tdbError("tdb/page-set-cell-body: invalid cellBody: %d.", cellBody);
    return;
  }
H
Hongze Cheng 已提交
595 596 597 598 599 600
  ((SPageHdr *)(pPage->pPageHdr))[0].cellBody = (u16)cellBody;
}

// cellFree
static inline int  getPageCellFree(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].cellFree; }
static inline void setPageCellFree(SPage *pPage, int cellFree) {
601 602 603 604
  if (cellFree >= 65536) {
    tdbError("tdb/page-set-cell-free: invalid cellFree: %d.", cellFree);
    return;
  }
H
Hongze Cheng 已提交
605 606 607 608 609 610
  ((SPageHdr *)(pPage->pPageHdr))[0].cellFree = (u16)cellFree;
}

// nFree
static inline int  getPageNFree(SPage *pPage) { return ((SPageHdr *)(pPage->pPageHdr))[0].nFree; }
static inline void setPageNFree(SPage *pPage, int nFree) {
611 612 613 614
  if (nFree >= 65536) {
    tdbError("tdb/page-set-nfree: invalid nFree: %d.", nFree);
    return;
  }
H
Hongze Cheng 已提交
615 616 617 618 619
  ((SPageHdr *)(pPage->pPageHdr))[0].nFree = (u16)nFree;
}

// cell offset
static inline int getPageCellOffset(SPage *pPage, int idx) {
620 621 622 623 624 625
  int cellNum = getPageCellNum(pPage);
  if (idx < 0 || idx >= cellNum) {
    tdbError("tdb/page-cell-offset: idx: %d out of range[%d, %d).", idx, 0, cellNum);
    return -1;
  }

H
Hongze Cheng 已提交
626 627 628 629
  return ((u16 *)pPage->pCellIdx)[idx];
}

static inline void setPageCellOffset(SPage *pPage, int idx, int offset) {
630 631 632 633
  if (offset >= 65536) {
    tdbError("tdb/page-set-cell-offset: invalid offset: %d.", offset);
    return;
  }
H
Hongze Cheng 已提交
634 635 636
  ((u16 *)pPage->pCellIdx)[idx] = (u16)offset;
}

H
Hongze Cheng 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649
// free cell info
static inline void getPageFreeCellInfo(SCell *pCell, int *szCell, int *nxOffset) {
  SFreeCell *pFreeCell = (SFreeCell *)pCell;
  *szCell = pFreeCell->szCell;
  *nxOffset = pFreeCell->nxOffset;
}

static inline void setPageFreeCellInfo(SCell *pCell, int szCell, int nxOffset) {
  SFreeCell *pFreeCell = (SFreeCell *)pCell;
  pFreeCell->szCell = szCell;
  pFreeCell->nxOffset = nxOffset;
}

H
Hongze Cheng 已提交
650
SPageMethods pageMethods = {
H
Hongze Cheng 已提交
651 652 653
    2,                    // szOffset
    sizeof(SPageHdr),     // szPageHdr
    sizeof(SFreeCell),    // szFreeCell
H
Hongze Cheng 已提交
654 655 656 657 658 659 660 661 662 663 664 665
    getPageCellNum,       // getCellNum
    setPageCellNum,       // setCellNum
    getPageCellBody,      // getCellBody
    setPageCellBody,      // setCellBody
    getPageCellFree,      // getCellFree
    setPageCellFree,      // setCellFree
    getPageNFree,         // getFreeBytes
    setPageNFree,         // setFreeBytes
    getPageCellOffset,    // getCellOffset
    setPageCellOffset,    // setCellOffset
    getPageFreeCellInfo,  // getFreeCellInfo
    setPageFreeCellInfo   // setFreeCellInfo
H
Hongze Cheng 已提交
666 667
};

H
Hongze Cheng 已提交
668
#pragma pack(push, 1)
wafwerar's avatar
wafwerar 已提交
669
typedef struct {
H
Hongze Cheng 已提交
670 671 672 673 674 675
  u8 cellNum[3];
  u8 cellBody[3];
  u8 cellFree[3];
  u8 nFree[3];
} SPageHdrL;

wafwerar's avatar
wafwerar 已提交
676
typedef struct {
H
Hongze Cheng 已提交
677 678 679
  u8 szCell[3];
  u8 nxOffset[3];
} SFreeCellL;
wafwerar's avatar
wafwerar 已提交
680
#pragma pack(pop)
H
Hongze Cheng 已提交
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707

// cellNum
static inline int  getLPageCellNum(SPage *pPage) { return TDB_GET_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellNum); }
static inline void setLPageCellNum(SPage *pPage, int cellNum) {
  TDB_PUT_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellNum, cellNum);
}

// cellBody
static inline int  getLPageCellBody(SPage *pPage) { return TDB_GET_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellBody); }
static inline void setLPageCellBody(SPage *pPage, int cellBody) {
  TDB_PUT_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellBody, cellBody);
}

// cellFree
static inline int  getLPageCellFree(SPage *pPage) { return TDB_GET_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellFree); }
static inline void setLPageCellFree(SPage *pPage, int cellFree) {
  TDB_PUT_U24(((SPageHdrL *)(pPage->pPageHdr))[0].cellFree, cellFree);
}

// nFree
static inline int  getLPageNFree(SPage *pPage) { return TDB_GET_U24(((SPageHdrL *)(pPage->pPageHdr))[0].nFree); }
static inline void setLPageNFree(SPage *pPage, int nFree) {
  TDB_PUT_U24(((SPageHdrL *)(pPage->pPageHdr))[0].nFree, nFree);
}

// cell offset
static inline int getLPageCellOffset(SPage *pPage, int idx) {
708 709 710 711 712 713
  int cellNum = getLPageCellNum(pPage);
  if (idx < 0 || idx >= cellNum) {
    tdbError("tdb/lpage-cell-offset: idx: %d out of range[%d, %d).", idx, 0, cellNum);
    return -1;
  }

H
Hongze Cheng 已提交
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
  return TDB_GET_U24(pPage->pCellIdx + 3 * idx);
}

static inline void setLPageCellOffset(SPage *pPage, int idx, int offset) {
  TDB_PUT_U24(pPage->pCellIdx + 3 * idx, offset);
}

// free cell info
static inline void getLPageFreeCellInfo(SCell *pCell, int *szCell, int *nxOffset) {
  SFreeCellL *pFreeCell = (SFreeCellL *)pCell;
  *szCell = TDB_GET_U24(pFreeCell->szCell);
  *nxOffset = TDB_GET_U24(pFreeCell->nxOffset);
}

static inline void setLPageFreeCellInfo(SCell *pCell, int szCell, int nxOffset) {
  SFreeCellL *pFreeCell = (SFreeCellL *)pCell;
  TDB_PUT_U24(pFreeCell->szCell, szCell);
  TDB_PUT_U24(pFreeCell->nxOffset, nxOffset);
}

SPageMethods pageLargeMethods = {
    3,                     // szOffset
    sizeof(SPageHdrL),     // szPageHdr
    sizeof(SFreeCellL),    // szFreeCell
    getLPageCellNum,       // getCellNum
    setLPageCellNum,       // setCellNum
    getLPageCellBody,      // getCellBody
    setLPageCellBody,      // setCellBody
    getLPageCellFree,      // getCellFree
    setLPageCellFree,      // setCellFree
    getLPageNFree,         // getFreeBytes
    setLPageNFree,         // setFreeBytes
    getLPageCellOffset,    // getCellOffset
    setLPageCellOffset,    // setCellOffset
    getLPageFreeCellInfo,  // getFreeCellInfo
    setLPageFreeCellInfo   // setFreeCellInfo
M
Minglei Jin 已提交
750
};