提交 c440e553 编写于 作者: H Haojun Liao

[td-13039] add page compression.

上级 d3228cf9
...@@ -136,6 +136,11 @@ int32_t getPageId(const SPageInfo* pPgInfo); ...@@ -136,6 +136,11 @@ int32_t getPageId(const SPageInfo* pPgInfo);
*/ */
int32_t getBufPageSize(const SDiskbasedBuf* pBuf); int32_t getBufPageSize(const SDiskbasedBuf* pBuf);
/**
*
* @param pBuf
* @return
*/
int32_t getNumOfInMemBufPages(const SDiskbasedBuf* pBuf); int32_t getNumOfInMemBufPages(const SDiskbasedBuf* pBuf);
/** /**
...@@ -147,10 +152,10 @@ bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf); ...@@ -147,10 +152,10 @@ bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf);
/** /**
* Set the buffer page is dirty, and needs to be flushed to disk when swap out. * Set the buffer page is dirty, and needs to be flushed to disk when swap out.
* @param pPageInfo * @param pPage
* @param dirty * @param dirty
*/ */
void setBufPageDirty(void* pPageInfo, bool dirty); void setBufPageDirty(void* pPage, bool dirty);
/** /**
* Set the compress/ no-compress flag for paged buffer, when flushing data in disk. * Set the compress/ no-compress flag for paged buffer, when flushing data in disk.
...@@ -158,6 +163,13 @@ void setBufPageDirty(void* pPageInfo, bool dirty); ...@@ -158,6 +163,13 @@ void setBufPageDirty(void* pPageInfo, bool dirty);
*/ */
void setBufPageCompressOnDisk(SDiskbasedBuf* pBuf, bool comp); void setBufPageCompressOnDisk(SDiskbasedBuf* pBuf, bool comp);
/**
* Set the pageId page buffer is not need
* @param pBuf
* @param pageId
*/
void dBufSetBufPageRecycled(SDiskbasedBuf *pBuf, void* pPage);
/** /**
* Print the statistics when closing this buffer * Print the statistics when closing this buffer
* @param pBuf * @param pBuf
......
...@@ -49,7 +49,7 @@ typedef struct SLHashNode { ...@@ -49,7 +49,7 @@ typedef struct SLHashNode {
} SLHashNode; } SLHashNode;
#define GET_LHASH_NODE_KEY(_n) (((char*)(_n)) + sizeof(SLHashNode)) #define GET_LHASH_NODE_KEY(_n) (((char*)(_n)) + sizeof(SLHashNode))
#define GET_LHASH_NODE_DATA(_n) ((char*)(_n) + sizeof(SLHashNode) + (_n)->keyLen) #define GET_LHASH_NODE_DATA(_n) ((char*)(_n) + sizeof(SLHashNode) + ((SLHashNode*)(_n))->keyLen)
#define GET_LHASH_NODE_LEN(_n) (sizeof(SLHashNode) + ((SLHashNode*)(_n))->keyLen + ((SLHashNode*)(_n))->dataLen) #define GET_LHASH_NODE_LEN(_n) (sizeof(SLHashNode) + ((SLHashNode*)(_n))->keyLen + ((SLHashNode*)(_n))->dataLen)
static int32_t doAddNewBucket(SLHashObj* pHashObj); static int32_t doAddNewBucket(SLHashObj* pHashObj);
...@@ -60,58 +60,13 @@ static int32_t doGetBucketIdFromHashVal(int32_t hashv, int32_t bits) { ...@@ -60,58 +60,13 @@ static int32_t doGetBucketIdFromHashVal(int32_t hashv, int32_t bits) {
static int32_t doGetAlternativeBucketId(int32_t bucketId, int32_t bits, int32_t numOfBuckets) { static int32_t doGetAlternativeBucketId(int32_t bucketId, int32_t bits, int32_t numOfBuckets) {
int32_t v = bucketId - (1ul << (bits - 1)); int32_t v = bucketId - (1ul << (bits - 1));
ASSERT(v < numOfBuckets); ASSERT(v < numOfBuckets);
return v; return v;
} }
SLHashObj* tHashInit(int32_t inMemPages, int32_t pageSize, _hash_fn_t fn, int32_t numOfTuplePerPage) { static int32_t doGetRelatedSplitBucketId(int32_t bucketId, int32_t bits) {
SLHashObj* pHashObj = calloc(1, sizeof(SLHashObj)); int32_t splitBucketId = (1ul << (bits - 1)) ^ bucketId;
if (pHashObj == NULL) { return splitBucketId;
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
int32_t code = createDiskbasedBuf(&pHashObj->pBuf, pageSize, inMemPages * pageSize, 0, "/tmp");
if (code != 0) {
terrno = code;
return NULL;
}
setBufPageCompressOnDisk(pHashObj->pBuf, false);
/**
* The number of bits in the hash value, which is used to decide the exact bucket where the object should be located in.
* The initial value is 0.
*/
pHashObj->bits = 0;
pHashObj->hashFn = fn;
pHashObj->tuplesPerPage = numOfTuplePerPage;
pHashObj->numOfAlloc = 4; // initial allocated array list
pHashObj->pBucket = calloc(pHashObj->numOfAlloc, POINTER_BYTES);
code = doAddNewBucket(pHashObj);
if (code != TSDB_CODE_SUCCESS) {
destroyDiskbasedBuf(pHashObj->pBuf);
tfree(pHashObj);
terrno = code;
return NULL;
}
return pHashObj;
}
void* tHashCleanup(SLHashObj* pHashObj) {
destroyDiskbasedBuf(pHashObj->pBuf);
for(int32_t i = 0; i < pHashObj->numOfBuckets; ++i) {
taosArrayDestroy(pHashObj->pBucket[i]->pPageIdList);
tfree(pHashObj->pBucket[i]);
}
tfree(pHashObj->pBucket);
tfree(pHashObj);
return NULL;
} }
static void doCopyObject(char* p, const void* key, int32_t keyLen, const void* data, int32_t size) { static void doCopyObject(char* p, const void* key, int32_t keyLen, const void* data, int32_t size) {
...@@ -135,7 +90,7 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t ...@@ -135,7 +90,7 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t
// put to current buf page // put to current buf page
size_t nodeSize = sizeof(SLHashNode) + keyLen + size; size_t nodeSize = sizeof(SLHashNode) + keyLen + size;
ASSERT(nodeSize <= getBufPageSize(pHashObj->pBuf)); ASSERT(nodeSize + sizeof(SFilePage) <= getBufPageSize(pHashObj->pBuf));
if (pPage->num + nodeSize > getBufPageSize(pHashObj->pBuf)) { if (pPage->num + nodeSize > getBufPageSize(pHashObj->pBuf)) {
releaseBufPage(pHashObj->pBuf, pPage); releaseBufPage(pHashObj->pBuf, pPage);
...@@ -143,19 +98,19 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t ...@@ -143,19 +98,19 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t
// allocate the overflow buffer page to hold this k/v. // allocate the overflow buffer page to hold this k/v.
int32_t newPageId = -1; int32_t newPageId = -1;
SFilePage* pNewPage = getNewBufPage(pHashObj->pBuf, 0, &newPageId); SFilePage* pNewPage = getNewBufPage(pHashObj->pBuf, 0, &newPageId);
if (pNewPage == 0) { if (pNewPage == NULL) {
// TODO handle error return TSDB_CODE_OUT_OF_MEMORY;
} }
taosArrayPush(pBucket->pPageIdList, &newPageId); taosArrayPush(pBucket->pPageIdList, &newPageId);
doCopyObject(pNewPage->data, key, keyLen, data, size); doCopyObject(pNewPage->data, key, keyLen, data, size);
pNewPage->num = nodeSize; pNewPage->num = sizeof(SFilePage) + nodeSize;
setBufPageDirty(pNewPage, true); setBufPageDirty(pNewPage, true);
releaseBufPage(pHashObj->pBuf, pNewPage); releaseBufPage(pHashObj->pBuf, pNewPage);
} else { } else {
char* p = pPage->data + pPage->num; char* p = (char*) pPage + pPage->num;
doCopyObject(p, key, keyLen, data, size); doCopyObject(p, key, keyLen, data, size);
pPage->num += nodeSize; pPage->num += nodeSize;
setBufPageDirty(pPage, true); setBufPageDirty(pPage, true);
...@@ -163,17 +118,18 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t ...@@ -163,17 +118,18 @@ static int32_t doAddToBucket(SLHashObj* pHashObj, SLHashBucket* pBucket, int32_t
} }
pBucket->size += 1; pBucket->size += 1;
// printf("===> add to bucket:0x%x, num:%d, key:%d\n", index, pBucket->size, *(int*) key); printf("===> add to bucket:0x%x, num:%d, key:%d\n", index, pBucket->size, *(int*) key);
return TSDB_CODE_SUCCESS;
} }
// TODO merge the fragments on multiple pages to recycle the empty disk page ASAP
static void doRemoveFromBucket(SFilePage* pPage, SLHashNode* pNode, SLHashBucket* pBucket) { static void doRemoveFromBucket(SFilePage* pPage, SLHashNode* pNode, SLHashBucket* pBucket) {
ASSERT(pPage != NULL && pNode != NULL); ASSERT(pPage != NULL && pNode != NULL && pBucket->size >= 1);
int32_t len = GET_LHASH_NODE_LEN(pNode); int32_t len = GET_LHASH_NODE_LEN(pNode);
char* p = (char*) pNode + len; char* p = (char*) pNode + len;
char* pEnd = pPage->data + pPage->num; char* pEnd = (char*)pPage + pPage->num;
memmove(pNode, p, (pEnd - p)); memmove(pNode, p, (pEnd - p));
pPage->num -= len; pPage->num -= len;
...@@ -182,9 +138,50 @@ static void doRemoveFromBucket(SFilePage* pPage, SLHashNode* pNode, SLHashBucket ...@@ -182,9 +138,50 @@ static void doRemoveFromBucket(SFilePage* pPage, SLHashNode* pNode, SLHashBucket
} }
setBufPageDirty(pPage, true); setBufPageDirty(pPage, true);
pBucket->size -= 1; pBucket->size -= 1;
} }
static void doCompressBucketPages(SLHashObj *pHashObj, SLHashBucket* pBucket) {
size_t numOfPages = taosArrayGetSize(pBucket->pPageIdList);
if (numOfPages <= 1) {
return;
}
int32_t* firstPage = taosArrayGet(pBucket->pPageIdList, 0);
SFilePage* pFirst = getBufPage(pHashObj->pBuf, *firstPage);
int32_t* pageId = taosArrayGetLast(pBucket->pPageIdList);
SFilePage* pLast = getBufPage(pHashObj->pBuf, *pageId);
char* pStart = pLast->data;
int32_t nodeSize = GET_LHASH_NODE_LEN(pStart);
while (1) {
if (pFirst->num + nodeSize < getBufPageSize(pHashObj->pBuf)) {
char* p = ((char*)pFirst) + pFirst->num;
SLHashNode* pNode = (SLHashNode*)pStart;
doCopyObject(p, GET_LHASH_NODE_KEY(pStart), pNode->keyLen, GET_LHASH_NODE_DATA(pStart), pNode->dataLen);
setBufPageDirty(pFirst, true);
pFirst->num += nodeSize;
pLast->num -= nodeSize;
pStart += nodeSize;
if (pStart - pLast->data >= pLast->num) {
// this is empty
dBufSetBufPageRecycled(pHashObj->pBuf, pLast);
taosArrayRemove(pBucket->pPageIdList, numOfPages - 1);
break;
}
nodeSize = GET_LHASH_NODE_LEN(pStart);
} else { // move to the front of pLast page
memmove(pLast->data, pStart,(((char*)pLast) + pLast->num - pStart));
break;
}
}
}
static int32_t doAddNewBucket(SLHashObj* pHashObj) { static int32_t doAddNewBucket(SLHashObj* pHashObj) {
if (pHashObj->numOfBuckets + 1 > pHashObj->numOfAlloc) { if (pHashObj->numOfBuckets + 1 > pHashObj->numOfAlloc) {
int32_t newLen = pHashObj->numOfAlloc * 1.25; int32_t newLen = pHashObj->numOfAlloc * 1.25;
...@@ -212,15 +209,66 @@ static int32_t doAddNewBucket(SLHashObj* pHashObj) { ...@@ -212,15 +209,66 @@ static int32_t doAddNewBucket(SLHashObj* pHashObj) {
int32_t pageId = -1; int32_t pageId = -1;
SFilePage* p = getNewBufPage(pHashObj->pBuf, 0, &pageId); SFilePage* p = getNewBufPage(pHashObj->pBuf, 0, &pageId);
releaseBufPage(pHashObj->pBuf, p); p->num = sizeof(SFilePage);
setBufPageDirty(p, true);
releaseBufPage(pHashObj->pBuf, p);
taosArrayPush(pBucket->pPageIdList, &pageId); taosArrayPush(pBucket->pPageIdList, &pageId);
pHashObj->numOfBuckets += 1; pHashObj->numOfBuckets += 1;
// printf("---------------add new bucket, id:0x%x, total:%d\n", pHashObj->numOfBuckets - 1, pHashObj->numOfBuckets); printf("---------------add new bucket, id:0x%x, total:%d\n", pHashObj->numOfBuckets - 1, pHashObj->numOfBuckets);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
SLHashObj* tHashInit(int32_t inMemPages, int32_t pageSize, _hash_fn_t fn, int32_t numOfTuplePerPage) {
SLHashObj* pHashObj = calloc(1, sizeof(SLHashObj));
if (pHashObj == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
int32_t code = createDiskbasedBuf(&pHashObj->pBuf, pageSize, inMemPages * pageSize, 0, "/tmp");
if (code != 0) {
terrno = code;
return NULL;
}
setBufPageCompressOnDisk(pHashObj->pBuf, false);
/**
* The number of bits in the hash value, which is used to decide the exact bucket where the object should be located in.
* The initial value is 0.
*/
pHashObj->bits = 0;
pHashObj->hashFn = fn;
pHashObj->tuplesPerPage = numOfTuplePerPage;
pHashObj->numOfAlloc = 4; // initial allocated array list
pHashObj->pBucket = calloc(pHashObj->numOfAlloc, POINTER_BYTES);
code = doAddNewBucket(pHashObj);
if (code != TSDB_CODE_SUCCESS) {
destroyDiskbasedBuf(pHashObj->pBuf);
tfree(pHashObj);
terrno = code;
return NULL;
}
return pHashObj;
}
void* tHashCleanup(SLHashObj* pHashObj) {
destroyDiskbasedBuf(pHashObj->pBuf);
for(int32_t i = 0; i < pHashObj->numOfBuckets; ++i) {
taosArrayDestroy(pHashObj->pBucket[i]->pPageIdList);
tfree(pHashObj->pBucket[i]);
}
tfree(pHashObj->pBucket);
tfree(pHashObj);
return NULL;
}
int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data, size_t size) { int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data, size_t size) {
ASSERT(pHashObj != NULL && key != NULL); ASSERT(pHashObj != NULL && key != NULL);
...@@ -231,17 +279,16 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data ...@@ -231,17 +279,16 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data
int32_t hashVal = pHashObj->hashFn(key, keyLen); int32_t hashVal = pHashObj->hashFn(key, keyLen);
int32_t v = doGetBucketIdFromHashVal(hashVal, pHashObj->bits); int32_t v = doGetBucketIdFromHashVal(hashVal, pHashObj->bits);
if (pHashObj->numOfBuckets > v) { if (v >= pHashObj->numOfBuckets) {
SLHashBucket* pBucket = pHashObj->pBucket[v]; int32_t newBucketId = doGetAlternativeBucketId(v, pHashObj->bits, pHashObj->numOfBuckets);
printf("bucketId: 0x%x not exists, put it into 0x%x instead\n", v, newBucketId);
// TODO check return code v = newBucketId;
doAddToBucket(pHashObj, pBucket, v, key, keyLen, data, size); }
} else { // no matched bucket exists, find the candidate bucket
int32_t bucketId = doGetAlternativeBucketId(v, pHashObj->bits, pHashObj->numOfBuckets);
// printf("bucketId: 0x%x not exists, put it into 0x%x instead\n", v, bucketId);
SLHashBucket* pBucket = pHashObj->pBucket[bucketId]; SLHashBucket* pBucket = pHashObj->pBucket[v];
doAddToBucket(pHashObj, pBucket, bucketId, key, keyLen, data, size); int32_t code = doAddToBucket(pHashObj, pBucket, v, key, keyLen, data, size);
if (code != TSDB_CODE_SUCCESS) {
return code;
} }
} }
...@@ -252,42 +299,46 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data ...@@ -252,42 +299,46 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data
int32_t newBucketId = pHashObj->numOfBuckets; int32_t newBucketId = pHashObj->numOfBuckets;
int32_t code = doAddNewBucket(pHashObj); int32_t code = doAddNewBucket(pHashObj);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
int32_t numOfBits = ceil(log(pHashObj->numOfBuckets) / log(2)); int32_t numOfBits = ceil(log(pHashObj->numOfBuckets) / log(2));
if (numOfBits > pHashObj->bits) { if (numOfBits > pHashObj->bits) {
// printf("extend the bits from %d to %d, new bucket:%d\n", pHashObj->bits, numOfBits, newBucketId); printf("extend the bits from %d to %d, new bucket:%d\n", pHashObj->bits, numOfBits, newBucketId);
ASSERT(numOfBits == pHashObj->bits + 1); ASSERT(numOfBits == pHashObj->bits + 1);
pHashObj->bits = numOfBits; pHashObj->bits = numOfBits;
} }
int32_t splitBucketId = (1ul << (pHashObj->bits - 1)) ^ newBucketId; int32_t splitBucketId = doGetRelatedSplitBucketId(newBucketId, pHashObj->bits);
// load all data in this bucket and check if the data needs to relocated into the new bucket // load all data in this bucket and check if the data needs to relocated into the new bucket
SLHashBucket* pBucket = pHashObj->pBucket[splitBucketId]; SLHashBucket* pBucket = pHashObj->pBucket[splitBucketId];
// printf("split %d items' bucket:0x%x to new bucket:0x%x\n", pBucket->size, splitBucketId, newBucketId); printf("split %d items' bucket:0x%x to new bucket:0x%x\n", pBucket->size, splitBucketId, newBucketId);
for (int32_t i = 0; i < taosArrayGetSize(pBucket->pPageIdList); ++i) { for (int32_t i = 0; i < taosArrayGetSize(pBucket->pPageIdList); ++i) {
int32_t pageId = *(int32_t*)taosArrayGet(pBucket->pPageIdList, i); int32_t pageId = *(int32_t*)taosArrayGet(pBucket->pPageIdList, i);
SFilePage* p = getBufPage(pHashObj->pBuf, pageId); SFilePage* p = getBufPage(pHashObj->pBuf, pageId);
char* pStart = p->data; char* pStart = p->data;
while (pStart - p->data < p->num) { while (pStart - ((char*) p) < p->num) {
SLHashNode* pNode = (SLHashNode*)pStart; SLHashNode* pNode = (SLHashNode*)pStart;
ASSERT(pNode->keyLen > 0 && pNode->dataLen >= 0);
char* k = GET_LHASH_NODE_KEY(pNode); char* k = GET_LHASH_NODE_KEY(pNode);
int32_t hashv = pHashObj->hashFn(k, pNode->keyLen); int32_t hashv = pHashObj->hashFn(k, pNode->keyLen);
int32_t v1 = doGetBucketIdFromHashVal(hashv, pHashObj->bits);
int32_t v1 = hashv & ((1ul << (pHashObj->bits)) - 1);
if (v1 != splitBucketId) { // place it into the new bucket if (v1 != splitBucketId) { // place it into the new bucket
ASSERT(v1 == newBucketId); ASSERT(v1 == newBucketId);
// printf("move key:%d to 0x%x bucket, remain items:%d\n", *(int32_t*)k, v1, pBucket->size - 1); printf("move key:%d to 0x%x bucket, remain items:%d\n", *(int32_t*)k, v1, pBucket->size - 1);
SLHashBucket* pNewBucket = pHashObj->pBucket[newBucketId]; SLHashBucket* pNewBucket = pHashObj->pBucket[newBucketId];
doAddToBucket(pHashObj, pNewBucket, newBucketId, (void*)GET_LHASH_NODE_KEY(pNode), pNode->keyLen, doAddToBucket(pHashObj, pNewBucket, newBucketId, (void*)GET_LHASH_NODE_KEY(pNode), pNode->keyLen,
GET_LHASH_NODE_KEY(pNode), pNode->dataLen); GET_LHASH_NODE_KEY(pNode), pNode->dataLen);
doRemoveFromBucket(p, pNode, pBucket); doRemoveFromBucket(p, pNode, pBucket);
} else { } else {
// printf("check key:%d, located into: %d, skip it\n", *(int*) k, v1); printf("check key:%d, located into: %d, skip it\n", *(int*) k, v1);
int32_t nodeSize = GET_LHASH_NODE_LEN(pStart); int32_t nodeSize = GET_LHASH_NODE_LEN(pStart);
pStart += nodeSize; pStart += nodeSize;
...@@ -295,7 +346,11 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data ...@@ -295,7 +346,11 @@ int32_t tHashPut(SLHashObj* pHashObj, const void *key, size_t keyLen, void *data
} }
releaseBufPage(pHashObj->pBuf, p); releaseBufPage(pHashObj->pBuf, p);
} }
doCompressBucketPages(pHashObj, pBucket);
} }
return TSDB_CODE_SUCCESS;
} }
char* tHashGet(SLHashObj* pHashObj, const void *key, size_t keyLen) { char* tHashGet(SLHashObj* pHashObj, const void *key, size_t keyLen) {
...@@ -332,7 +387,7 @@ char* tHashGet(SLHashObj* pHashObj, const void *key, size_t keyLen) { ...@@ -332,7 +387,7 @@ char* tHashGet(SLHashObj* pHashObj, const void *key, size_t keyLen) {
} }
int32_t tHashRemove(SLHashObj* pHashObj, const void *key, size_t keyLen) { int32_t tHashRemove(SLHashObj* pHashObj, const void *key, size_t keyLen) {
// todo
} }
void tHashPrint(const SLHashObj* pHashObj, int32_t type) { void tHashPrint(const SLHashObj* pHashObj, int32_t type) {
...@@ -343,8 +398,8 @@ void tHashPrint(const SLHashObj* pHashObj, int32_t type) { ...@@ -343,8 +398,8 @@ void tHashPrint(const SLHashObj* pHashObj, int32_t type) {
if (type == LINEAR_HASH_DATA) { if (type == LINEAR_HASH_DATA) {
for (int32_t i = 0; i < pHashObj->numOfBuckets; ++i) { for (int32_t i = 0; i < pHashObj->numOfBuckets; ++i) {
// printf("bucket: 0x%x, obj:%d, page:%d\n", i, pHashObj->pBucket[i]->size, printf("bucket: 0x%x, obj:%d, page:%d\n", i, pHashObj->pBucket[i]->size,
// (int)taosArrayGetSize(pHashObj->pBucket[i]->pPageIdList)); (int)taosArrayGetSize(pHashObj->pBucket[i]->pPageIdList));
} }
} else { } else {
dBufPrintStatis(pHashObj->pBuf); dBufPrintStatis(pHashObj->pBuf);
......
...@@ -23,28 +23,28 @@ ...@@ -23,28 +23,28 @@
#pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-variable" #pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wsign-compare" #pragma GCC diagnostic ignored "-Wsign-compare"
#include "os.h"
TEST(testCase, linear_hash_Tests) { TEST(testCase, linear_hash_Tests) {
srand(time(NULL)); srand(time(NULL));
_hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT); _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT);
#if 1 #if 1
SLHashObj* pHashObj = tHashInit(220000, 64 + 8, fn, 4); SLHashObj* pHashObj = tHashInit(10, 128 + 8, fn, 8);
for(int32_t i = 0; i < 500000; ++i) { for(int32_t i = 0; i < 100; ++i) {
tHashPut(pHashObj, &i, sizeof(i), &i, sizeof(i)); int32_t code = tHashPut(pHashObj, &i, sizeof(i), &i, sizeof(i));
assert(code == 0);
} }
tHashPrint(pHashObj, LINEAR_HASH_STATIS); // tHashPrint(pHashObj, LINEAR_HASH_STATIS);
for(int32_t i = 0; i < 10000; ++i) { // for(int32_t i = 0; i < 10000; ++i) {
char* v = tHashGet(pHashObj, &i, sizeof(i)); // char* v = tHashGet(pHashObj, &i, sizeof(i));
if (v != NULL) { // if (v != NULL) {
// printf("find value: %d, key:%d\n", *(int32_t*) v, i); //// printf("find value: %d, key:%d\n", *(int32_t*) v, i);
} else { // } else {
printf("failed to found key:%d in hash\n", i); // printf("failed to found key:%d in hash\n", i);
} // }
} // }
tHashPrint(pHashObj, LINEAR_HASH_DATA); tHashPrint(pHashObj, LINEAR_HASH_DATA);
tHashCleanup(pHashObj); tHashCleanup(pHashObj);
......
...@@ -5,25 +5,26 @@ ...@@ -5,25 +5,26 @@
#include "tcompression.h" #include "tcompression.h"
#include "thash.h" #include "thash.h"
//enum {
// true = 0x1,
// BUF_PAGE_RELEASED = 0x2,
// true = 0x3,
//};
#define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES) #define GET_DATA_PAYLOAD(_p) ((char *)(_p)->pData + POINTER_BYTES)
#define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) #define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages)
typedef struct SFreeListItem {
int32_t offset;
int32_t len;
} SFreeListItem;
typedef struct SPageDiskInfo { typedef struct SPageDiskInfo {
int64_t offset; int64_t offset;
int32_t length; int32_t length;
} SPageDiskInfo; } SPageDiskInfo, SFreeListItem;
struct SPageInfo { struct SPageInfo {
SListNode* pn; // point to list node SListNode* pn; // point to list node
void* pData; void* pData;
int64_t offset; int64_t offset;
int32_t pageId; int32_t pageId;
int32_t length:30; int32_t length:29;
bool used:1; // set current page is in used bool used:1; // set current page is in used
bool dirty:1; // set current buffer page is dirty or not bool dirty:1; // set current buffer page is dirty or not
}; };
...@@ -51,46 +52,6 @@ struct SDiskbasedBuf { ...@@ -51,46 +52,6 @@ struct SDiskbasedBuf {
SDiskbasedBufStatis statis; SDiskbasedBufStatis statis;
}; };
int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir) {
*pBuf = calloc(1, sizeof(SDiskbasedBuf));
SDiskbasedBuf* pResBuf = *pBuf;
if (pResBuf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pResBuf->pageSize = pagesize;
pResBuf->numOfPages = 0; // all pages are in buffer in the first place
pResBuf->totalBufSize = 0;
pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit.
pResBuf->allocateId = -1;
pResBuf->comp = true;
pResBuf->file = NULL;
pResBuf->qId = qId;
pResBuf->fileSize = 0;
// at least more than 2 pages must be in memory
assert(inMemBufSize >= pagesize * 2);
pResBuf->lruList = tdListNew(POINTER_BYTES);
// init id hash table
pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES
pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, false);
char path[PATH_MAX] = {0};
taosGetTmpfilePath(dir, "qbuf", path);
pResBuf->path = strdup(path);
pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t));
// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize,
// pResBuf->inMemPages, pResBuf->path);
return TSDB_CODE_SUCCESS;
}
static int32_t createDiskFile(SDiskbasedBuf* pBuf) { static int32_t createDiskFile(SDiskbasedBuf* pBuf) {
pBuf->file = fopen(pBuf->path, "wb+"); pBuf->file = fopen(pBuf->path, "wb+");
if (pBuf->file == NULL) { if (pBuf->file == NULL) {
...@@ -135,10 +96,10 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { ...@@ -135,10 +96,10 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) {
size_t num = taosArrayGetSize(pBuf->pFree); size_t num = taosArrayGetSize(pBuf->pFree);
for(int32_t i = 0; i < num; ++i) { for(int32_t i = 0; i < num; ++i) {
SFreeListItem* pi = taosArrayGet(pBuf->pFree, i); SFreeListItem* pi = taosArrayGet(pBuf->pFree, i);
if (pi->len >= size) { if (pi->length >= size) {
offset = pi->offset; offset = pi->offset;
pi->offset += (int32_t)size; pi->offset += (int32_t)size;
pi->len -= (int32_t)size; pi->length -= (int32_t)size;
return offset; return offset;
} }
...@@ -160,7 +121,7 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { ...@@ -160,7 +121,7 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) {
static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) {
assert(!pg->used && pg->pData != NULL); assert(!pg->used && pg->pData != NULL);
int32_t size = -1; int32_t size = pBuf->pageSize;
char* t = NULL; char* t = NULL;
if (pg->offset == -1 || pg->dirty) { if (pg->offset == -1 || pg->dirty) {
void* payload = GET_DATA_PAYLOAD(pg); void* payload = GET_DATA_PAYLOAD(pg);
...@@ -169,66 +130,68 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { ...@@ -169,66 +130,68 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) {
} }
// this page is flushed to disk for the first time // this page is flushed to disk for the first time
if (pg->offset == -1) { if (pg->dirty) {
assert(pg->dirty == true); if (pg->offset == -1) {
assert(pg->dirty == true);
pg->offset = allocatePositionInFile(pBuf, size); pg->offset = allocatePositionInFile(pBuf, size);
pBuf->nextPos += size; pBuf->nextPos += size;
int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET); int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET);
if (ret != 0) { if (ret != 0) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return NULL; return NULL;
} }
ret = (int32_t) fwrite(t, 1, size, pBuf->file); ret = (int32_t)fwrite(t, 1, size, pBuf->file);
if (ret != size) { if (ret != size) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
return NULL; return NULL;
} }
if (pBuf->fileSize < pg->offset + size) { if (pBuf->fileSize < pg->offset + size) {
pBuf->fileSize = pg->offset + size; pBuf->fileSize = pg->offset + size;
} }
pBuf->statis.flushBytes += size; pBuf->statis.flushBytes += size;
pBuf->statis.flushPages += 1; pBuf->statis.flushPages += 1;
} else if (pg->dirty) { } else {
// length becomes greater, current space is not enough, allocate new place, otherwise, do nothing // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing
if (pg->length < size) { if (pg->length < size) {
// 1. add current space to free list // 1. add current space to free list
SPageDiskInfo dinfo = {.length = pg->length, .offset = pg->offset}; SPageDiskInfo dinfo = {.length = pg->length, .offset = pg->offset};
taosArrayPush(pBuf->pFree, &dinfo); taosArrayPush(pBuf->pFree, &dinfo);
// 2. allocate new position, and update the info
pg->offset = allocatePositionInFile(pBuf, size);
pBuf->nextPos += size;
}
// 2. allocate new position, and update the info // 3. write to disk.
pg->offset = allocatePositionInFile(pBuf, size); int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET);
pBuf->nextPos += size; if (ret != 0) {
} terrno = TAOS_SYSTEM_ERROR(errno);
return NULL;
}
// 3. write to disk. ret = (int32_t)fwrite(t, 1, size, pBuf->file);
int32_t ret = fseek(pBuf->file, pg->offset, SEEK_SET); if (ret != size) {
if (ret != 0) { terrno = TAOS_SYSTEM_ERROR(errno);
terrno = TAOS_SYSTEM_ERROR(errno); return NULL;
return NULL; }
}
ret = (int32_t)fwrite(t, 1, size, pBuf->file); if (pBuf->fileSize < pg->offset + size) {
if (ret != size) { pBuf->fileSize = pg->offset + size;
terrno = TAOS_SYSTEM_ERROR(errno); }
return NULL;
}
if (pBuf->fileSize < pg->offset + size) { pBuf->statis.flushBytes += size;
pBuf->fileSize = pg->offset + size; pBuf->statis.flushPages += 1;
} }
} else {// NOTE: the size may be -1, the this recycle page has not been flushed to disk yet.
pBuf->statis.flushBytes += size;
pBuf->statis.flushPages += 1;
} else {
size = pg->length; size = pg->length;
} }
assert(size >= 0); ASSERT(size > 0 || (pg->offset == -1 && pg->length == -1));
char* pDataBuf = pg->pData; char* pDataBuf = pg->pData;
memset(pDataBuf, 0, pBuf->pageSize); memset(pDataBuf, 0, pBuf->pageSize);
...@@ -313,13 +276,10 @@ static SPageInfo* registerPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t pag ...@@ -313,13 +276,10 @@ static SPageInfo* registerPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t pag
static SListNode* getEldestUnrefedPage(SDiskbasedBuf* pBuf) { static SListNode* getEldestUnrefedPage(SDiskbasedBuf* pBuf) {
SListIter iter = {0}; SListIter iter = {0};
tdListInitIter(pBuf->lruList, &iter, TD_LIST_BACKWARD); tdListInitIter(pBuf->lruList, &iter, TD_LIST_BACKWARD);
SListNode* pn = NULL; SListNode* pn = NULL;
while((pn = tdListNext(&iter)) != NULL) { while((pn = tdListNext(&iter)) != NULL) {
assert(pn != NULL);
SPageInfo* pageInfo = *(SPageInfo**) pn->data; SPageInfo* pageInfo = *(SPageInfo**) pn->data;
assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); assert(pageInfo->pageId >= 0 && pageInfo->pn == pn);
...@@ -377,6 +337,56 @@ static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { ...@@ -377,6 +337,56 @@ static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) {
return pageSize + POINTER_BYTES + 2; return pageSize + POINTER_BYTES + 2;
} }
static SPageInfo* getPageInfoFromPayload(void* page) {
int32_t offset = offsetof(SPageInfo, pData);
char* p = page - offset;
SPageInfo* ppi = ((SPageInfo**) p)[0];
return ppi;
}
int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMemBufSize, uint64_t qId, const char* dir) {
*pBuf = calloc(1, sizeof(SDiskbasedBuf));
SDiskbasedBuf* pResBuf = *pBuf;
if (pResBuf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pResBuf->pageSize = pagesize;
pResBuf->numOfPages = 0; // all pages are in buffer in the first place
pResBuf->totalBufSize = 0;
pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit.
pResBuf->allocateId = -1;
pResBuf->comp = true;
pResBuf->file = NULL;
pResBuf->qId = qId;
pResBuf->fileSize = 0;
pResBuf->pFree = taosArrayInit(4, sizeof(SFreeListItem));
// at least more than 2 pages must be in memory
assert(inMemBufSize >= pagesize * 2);
pResBuf->lruList = tdListNew(POINTER_BYTES);
// init id hash table
_hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT);
pResBuf->groupSet = taosHashInit(10, fn, true, false);
pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES
pResBuf->all = taosHashInit(10, fn, true, false);
char path[PATH_MAX] = {0};
taosGetTmpfilePath(dir, "paged-buf", path);
pResBuf->path = strdup(path);
pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t));
// qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, pResBuf->pageSize,
// pResBuf->inMemPages, pResBuf->path);
return TSDB_CODE_SUCCESS;
}
void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) { void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) {
pBuf->statis.getPages += 1; pBuf->statis.getPages += 1;
...@@ -386,6 +396,7 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) { ...@@ -386,6 +396,7 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) {
// Failed to allocate a new buffer page, and there is an error occurs. // Failed to allocate a new buffer page, and there is an error occurs.
if (availablePage == NULL) { if (availablePage == NULL) {
assert(0);
return NULL; return NULL;
} }
} }
...@@ -393,10 +404,6 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) { ...@@ -393,10 +404,6 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t groupId, int32_t* pageId) {
// register new id in this group // register new id in this group
*pageId = (++pBuf->allocateId); *pageId = (++pBuf->allocateId);
if (*pageId == 11) {
printf("page is allocated, id:%d\n", *pageId);
}
// register page id info // register page id info
SPageInfo* pi = registerPage(pBuf, groupId, *pageId); SPageInfo* pi = registerPage(pBuf, groupId, *pageId);
...@@ -443,7 +450,6 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { ...@@ -443,7 +450,6 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) {
(*pi)->used = true; (*pi)->used = true;
return (void *)(GET_DATA_PAYLOAD(*pi)); return (void *)(GET_DATA_PAYLOAD(*pi));
} else { // not in memory } else { // not in memory
assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->length >= 0 && (*pi)->offset >= 0); assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->length >= 0 && (*pi)->offset >= 0);
...@@ -477,15 +483,12 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { ...@@ -477,15 +483,12 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) {
void releaseBufPage(SDiskbasedBuf* pBuf, void* page) { void releaseBufPage(SDiskbasedBuf* pBuf, void* page) {
assert(pBuf != NULL && page != NULL); assert(pBuf != NULL && page != NULL);
int32_t offset = offsetof(SPageInfo, pData); SPageInfo* ppi = getPageInfoFromPayload(page);
char* p = page - offset;
SPageInfo* ppi = ((SPageInfo**) p)[0];
releaseBufPageInfo(pBuf, ppi); releaseBufPageInfo(pBuf, ppi);
} }
void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) { void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) {
assert(pi->pData != NULL && pi->used); assert(pi->pData != NULL && pi->used == true);
pi->used = false; pi->used = false;
pBuf->statis.releasePages += 1; pBuf->statis.releasePages += 1;
...@@ -549,6 +552,8 @@ void destroyDiskbasedBuf(SDiskbasedBuf* pBuf) { ...@@ -549,6 +552,8 @@ void destroyDiskbasedBuf(SDiskbasedBuf* pBuf) {
tdListFree(pBuf->lruList); tdListFree(pBuf->lruList);
taosArrayDestroy(pBuf->emptyDummyIdList); taosArrayDestroy(pBuf->emptyDummyIdList);
taosArrayDestroy(pBuf->pFree);
taosHashCleanup(pBuf->groupSet); taosHashCleanup(pBuf->groupSet);
taosHashCleanup(pBuf->all); taosHashCleanup(pBuf->all);
...@@ -580,10 +585,7 @@ bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf) { ...@@ -580,10 +585,7 @@ bool isAllDataInMemBuf(const SDiskbasedBuf* pBuf) {
} }
void setBufPageDirty(void* pPage, bool dirty) { void setBufPageDirty(void* pPage, bool dirty) {
int32_t offset = offsetof(SPageInfo, pData); SPageInfo* ppi = getPageInfoFromPayload(pPage);
char* p = (char*)pPage - offset;
SPageInfo* ppi = ((SPageInfo**) p)[0];
ppi->dirty = dirty; ppi->dirty = dirty;
} }
...@@ -591,6 +593,18 @@ void setBufPageCompressOnDisk(SDiskbasedBuf* pBuf, bool comp) { ...@@ -591,6 +593,18 @@ void setBufPageCompressOnDisk(SDiskbasedBuf* pBuf, bool comp) {
pBuf->comp = comp; pBuf->comp = comp;
} }
void dBufSetBufPageRecycled(SDiskbasedBuf *pBuf, void* pPage) {
SPageInfo* ppi = getPageInfoFromPayload(pPage);
ppi->used = false;
ppi->dirty = false;
// it is a in-memory page that has not been flushed to disk yet.
if (ppi->length != -1 && ppi->offset != -1) {
SFreeListItem item = {.length = ppi->length, .offset = ppi->offset};
taosArrayPush(pBuf->pFree, &item);
}
}
void dBufSetPrintInfo(SDiskbasedBuf* pBuf) { void dBufSetPrintInfo(SDiskbasedBuf* pBuf) {
pBuf->printStatis = true; pBuf->printStatis = true;
...@@ -618,3 +632,4 @@ void dBufPrintStatis(const SDiskbasedBuf* pBuf) { ...@@ -618,3 +632,4 @@ void dBufPrintStatis(const SDiskbasedBuf* pBuf) {
ps->getPages, ps->releasePages, ps->flushBytes / 1024.0f, ps->flushPages, ps->loadBytes / 1024.0f, ps->loadPages, ps->getPages, ps->releasePages, ps->flushBytes / 1024.0f, ps->flushPages, ps->loadBytes / 1024.0f, ps->loadPages,
ps->loadBytes / (1024.0 * ps->loadPages)); ps->loadBytes / (1024.0 * ps->loadPages));
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册