提交 c012704a 编写于 作者: H Hongze Cheng

Submit the first version of merge import

上级 d3941fe9
...@@ -372,13 +372,60 @@ void vnodeCancelCommit(SVnodeObj *pVnode) { ...@@ -372,13 +372,60 @@ void vnodeCancelCommit(SVnodeObj *pVnode) {
taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer);
} }
/* The vnode cache lock should be hold before calling this interface
*/
SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
SVnodeCfg *pCfg = &(pVnode->cfg);
SCacheBlock *pCacheBlock = NULL;
int skipped = 0;
while (1) {
pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]);
if (pCacheBlock->blockId == 0) break;
if (pCacheBlock->notFree) {
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
if (skipped > pPool->threshold) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d committing process is too slow, notFreeSlots:%d....", pVnode->vnode, pPool->notFreeSlots);
return NULL;
}
} else {
SMeterObj * pRelObj = pCacheBlock->pMeterObj;
SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache;
int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks;
pCacheBlock = pRelInfo->cacheBlocks[firstSlot];
if (pCacheBlock) {
pPool->freeSlot = pCacheBlock->index;
vnodeFreeCacheBlock(pCacheBlock);
break;
} else {
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
}
}
}
pCacheBlock = (SCacheBlock *)(pPool->pMem[pPool->freeSlot]);
pCacheBlock->index = pPool->freeSlot;
pCacheBlock->notFree = 1;
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
pPool->notFreeSlots++;
return pCacheBlock;
}
int vnodeAllocateCacheBlock(SMeterObj *pObj) { int vnodeAllocateCacheBlock(SMeterObj *pObj) {
int index; int index;
SCachePool * pPool; SCachePool * pPool;
SCacheBlock *pCacheBlock; SCacheBlock *pCacheBlock;
SCacheInfo * pInfo; SCacheInfo * pInfo;
SVnodeObj * pVnode; SVnodeObj * pVnode;
int skipped = 0, commit = 0; int commit = 0;
pVnode = vnodeList + pObj->vnode; pVnode = vnodeList + pObj->vnode;
pPool = (SCachePool *)pVnode->pCachePool; pPool = (SCachePool *)pVnode->pCachePool;
...@@ -406,45 +453,10 @@ int vnodeAllocateCacheBlock(SMeterObj *pObj) { ...@@ -406,45 +453,10 @@ int vnodeAllocateCacheBlock(SMeterObj *pObj) {
return -1; return -1;
} }
while (1) { if ((pCacheBlock = vnodeGetFreeCacheBlock(pVnode)) == NULL) return -1;
pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]);
if (pCacheBlock->blockId == 0) break;
if (pCacheBlock->notFree) {
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
if (skipped > pPool->threshold) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d sid:%d id:%s, committing process is too slow, notFreeSlots:%d....",
pObj->vnode, pObj->sid, pObj->meterId, pPool->notFreeSlots);
return -1;
}
} else {
SMeterObj *pRelObj = pCacheBlock->pMeterObj;
SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache;
int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks;
pCacheBlock = pRelInfo->cacheBlocks[firstSlot];
if (pCacheBlock) {
pPool->freeSlot = pCacheBlock->index;
vnodeFreeCacheBlock(pCacheBlock);
break;
} else {
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
}
}
}
index = pPool->freeSlot;
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
pPool->notFreeSlots++;
index = pCacheBlock->index;
pCacheBlock->pMeterObj = pObj; pCacheBlock->pMeterObj = pObj;
pCacheBlock->notFree = 1;
pCacheBlock->index = index;
pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *); pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *);
for (int col = 1; col < pObj->numOfColumns; ++col) for (int col = 1; col < pObj->numOfColumns; ++col)
......
...@@ -103,8 +103,8 @@ void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead, ...@@ -103,8 +103,8 @@ void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead,
} }
void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) { void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) {
sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId); if (nHeadName != NULL) sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId);
sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId); if (nLastName != NULL) sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId);
} }
void vnodeCreateDataDirIfNeeded(int vnode, char *path) { void vnodeCreateDataDirIfNeeded(int vnode, char *path) {
......
...@@ -15,31 +15,24 @@ ...@@ -15,31 +15,24 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include <arpa/inet.h> #include <arpa/inet.h>
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include "trpc.h"
#include "ttimer.h"
#include "vnode.h" #include "vnode.h"
#include "vnodeMgmt.h"
#include "vnodeShell.h"
#include "vnodeShell.h"
#include "vnodeUtil.h" #include "vnodeUtil.h"
#pragma GCC diagnostic ignored "-Wpointer-sign"
#pragma GCC diagnostic ignored "-Wint-conversion"
typedef struct { extern void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId);
SCompHeader *headList; extern int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize,
SCompInfo compInfo; char *temp, char *buffer, int bufferSize);
int last; // 0:last block in data file, 1:not the last block extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints);
int newBlocks; extern void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId);
int oldNumOfBlocks; extern int vnodeCreateEmptyCompFile(int vnode, int fileId);
int64_t compInfoOffset; // offset for compInfo in head file extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode);
int64_t leftOffset; // copy from this offset to end of head file extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode);
int64_t hfdSize; // old head file size
} SHeadInfo;
#define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx)))
typedef struct { typedef struct {
void * signature; void * signature;
SShellObj *pShell; SShellObj *pShell;
...@@ -56,226 +49,113 @@ typedef struct { ...@@ -56,226 +49,113 @@ typedef struct {
// only for file // only for file
int numOfPoints; int numOfPoints;
int fileId;
int64_t offset; // offset in data file int64_t offset; // offset in data file
SData *sdata[TSDB_MAX_COLUMNS]; char * payload;
char *buffer; char * opayload; // allocated space for payload from client
char *payload;
char *opayload;
int rows; int rows;
} SImportInfo; } SImportInfo;
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport); typedef struct {
// in .head file
int vnodeGetImportStartPart(SMeterObj *pObj, char *payload, int rows, TSKEY key1) { SCompHeader *pHeader;
int i; size_t pHeaderSize;
for (i = 0; i < rows; ++i) {
TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint));
if (key >= key1) break;
}
return i;
}
int vnodeGetImportEndPart(SMeterObj *pObj, char *payload, int rows, char **pStart, TSKEY key0) {
int i;
for (i = 0; i < rows; ++i) {
TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint));
if (key > key0) break;
}
*pStart = payload + i * pObj->bytesPerPoint;
return rows - i;
}
int vnodeCloseFileForImport(SMeterObj *pObj, SHeadInfo *pHinfo) {
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
TSCKSUM chksum = 0;
if (pHinfo->newBlocks == 0 || pHinfo->compInfoOffset == 0) return 0;
if (pHinfo->oldNumOfBlocks == 0) twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM));
int leftSize = pHinfo->hfdSize - pHinfo->leftOffset;
if (leftSize > 0) {
lseek(pVnode->hfd, pHinfo->leftOffset, SEEK_SET);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, leftSize);
}
pHinfo->compInfo.numOfBlocks += pHinfo->newBlocks; SCompInfo compInfo;
int offset = (pHinfo->compInfo.numOfBlocks - pHinfo->oldNumOfBlocks) * sizeof(SCompBlock); SCompBlock *pBlocks;
if (pHinfo->oldNumOfBlocks == 0) offset += sizeof(SCompInfo) + sizeof(TSCKSUM); // in .data file
int blockId;
uint8_t blockLoadState;
pHinfo->headList[pObj->sid].compInfoOffset = pHinfo->compInfoOffset; SField *pField;
for (int sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid) { size_t pFieldSize;
if (pHinfo->headList[sid].compInfoOffset) pHinfo->headList[sid].compInfoOffset += offset;
}
lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); SData *data[TSDB_MAX_COLUMNS];
int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM); char * buffer;
taosCalcChecksumAppend(0, (uint8_t *)pHinfo->headList, tmsize);
twrite(pVnode->nfd, pHinfo->headList, tmsize);
int size = pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock); char *temp;
char *buffer = malloc(size);
lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET);
read(pVnode->nfd, buffer, size);
SCompBlock *pBlock = (SCompBlock *)(buffer + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock));
pHinfo->compInfo.uid = pObj->uid; char * tempBuffer;
pHinfo->compInfo.delimiter = TSDB_VNODE_DELIMITER; size_t tempBufferSize;
pHinfo->compInfo.last = pBlock->last; // Variables for sendfile
int64_t compInfoOffset;
int64_t nextNo0Offset; // next sid whose compInfoOffset > 0
int64_t hfSize;
int64_t driftOffset;
taosCalcChecksumAppend(0, (uint8_t *)(&pHinfo->compInfo), sizeof(SCompInfo)); int oldNumOfBlocks;
lseek(pVnode->nfd, pHinfo->compInfoOffset, SEEK_SET); int newNumOfBlocks;
twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo)); int last;
} SImportHandle;
chksum = taosCalcChecksum(0, (uint8_t *)buffer, size); typedef struct {
lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo) + size, SEEK_SET); int slot;
twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM)); int pos;
free(buffer); int oslot; // old slot
TSKEY nextKey;
} SBlockIter;
vnodeCloseCommitFiles(pVnode); typedef struct {
int64_t spos;
int64_t epos;
int64_t totalRows;
char * offset[];
} SMergeBuffer;
return 0; int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport);
}
int vnodeProcessLastBlock(SImportInfo *pImport, SHeadInfo *pHinfo, SData *data[]) { int vnodeFindKeyInCache(SImportInfo *pImport, int order) {
SMeterObj *pObj = pImport->pObj; SMeterObj * pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SCompBlock lastBlock;
int code = 0; int code = 0;
SQuery query;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
if (pHinfo->compInfo.last == 0) return 0; TSKEY key = order ? pImport->firstKey : pImport->lastKey;
memset(&query, 0, sizeof(query));
// read into memory query.order.order = order;
uint64_t offset = query.skey = key;
pHinfo->compInfoOffset + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock) + sizeof(SCompInfo); query.ekey = order ? pImport->lastKey : pImport->firstKey;
lseek(pVnode->hfd, offset, SEEK_SET); vnodeSearchPointInCache(pObj, &query);
read(pVnode->hfd, &lastBlock, sizeof(SCompBlock));
assert(lastBlock.last);
if (lastBlock.sversion != pObj->sversion) {
lseek(pVnode->lfd, lastBlock.offset, SEEK_SET);
lastBlock.offset = lseek(pVnode->dfd, 0, SEEK_END);
tsendfile(pVnode->dfd, pVnode->lfd, NULL, lastBlock.len);
lastBlock.last = 0; if (query.slot < 0) {
lseek(pVnode->hfd, offset, SEEK_SET); pImport->slot = pInfo->commitSlot;
twrite(pVnode->hfd, &lastBlock, sizeof(SCompBlock)); if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
pImport->key = 0;
dTrace("vid:%d sid:%d id:%s, key:%ld, import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key);
code = 0;
} else { } else {
vnodeReadLastBlockToMem(pObj, &lastBlock, data); pImport->slot = query.slot;
pHinfo->compInfo.numOfBlocks--; pImport->pos = query.pos;
code = lastBlock.numOfPoints; pImport->key = query.key;
}
return code;
}
int vnodeOpenFileForImport(SImportInfo *pImport, char *payload, SHeadInfo *pHinfo, SData *data[]) {
SMeterObj *pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
TSKEY firstKey = *((TSKEY *)payload);
struct stat filestat;
int sid, rowsBefore = 0;
if (pVnode->nfd <= 0 || firstKey > pVnode->commitLastKey) {
if (pVnode->nfd > 0) vnodeCloseFileForImport(pObj, pHinfo);
pVnode->commitFirstKey = firstKey;
if (vnodeOpenCommitFiles(pVnode, pObj->sid) < 0) return -1;
fstat(pVnode->hfd, &filestat);
pHinfo->hfdSize = filestat.st_size;
pHinfo->newBlocks = 0;
pHinfo->last = 1; // by default, new blockes are at the end of block list
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
read(pVnode->hfd, pHinfo->headList, sizeof(SCompHeader) * pCfg->maxSessions);
if (pHinfo->headList[pObj->sid].compInfoOffset > 0) { if (key != query.key) {
lseek(pVnode->hfd, pHinfo->headList[pObj->sid].compInfoOffset, SEEK_SET); if (order == 0) {
if (read(pVnode->hfd, &pHinfo->compInfo, sizeof(SCompInfo)) != sizeof(SCompInfo)) { // since pos is the position which has smaller key, data shall be imported after it
dError("vid:%d sid:%d, failed to read compInfo from file:%s", pObj->vnode, pObj->sid, pVnode->cfn); pImport->pos++;
return -1; if (pImport->pos >= pObj->pointsPerBlock) {
pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
} }
if (pHinfo->compInfo.uid == pObj->uid) {
pHinfo->compInfoOffset = pHinfo->headList[pObj->sid].compInfoOffset;
pHinfo->leftOffset = pHinfo->headList[pObj->sid].compInfoOffset + sizeof(SCompInfo);
} else { } else {
pHinfo->headList[pObj->sid].compInfoOffset = 0; if (pImport->pos < 0) pImport->pos = 0;
}
}
if ( pHinfo->headList[pObj->sid].compInfoOffset == 0 ) {
memset(&pHinfo->compInfo, 0, sizeof(SCompInfo));
pHinfo->compInfo.uid = pObj->uid;
for (sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid)
if (pHinfo->headList[sid].compInfoOffset > 0) break;
pHinfo->compInfoOffset = (sid == pCfg->maxSessions) ? pHinfo->hfdSize : pHinfo->headList[sid].compInfoOffset;
pHinfo->leftOffset = pHinfo->compInfoOffset;
} }
pHinfo->oldNumOfBlocks = pHinfo->compInfo.numOfBlocks;
lseek(pVnode->hfd, 0, SEEK_SET);
lseek(pVnode->nfd, 0, SEEK_SET);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfoOffset);
twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo));
if (pHinfo->headList[pObj->sid].compInfoOffset > 0) lseek(pVnode->hfd, sizeof(SCompInfo), SEEK_CUR);
if (pVnode->commitFileId < pImport->fileId) {
if (pHinfo->compInfo.numOfBlocks > 0)
pHinfo->leftOffset += pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock);
rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data);
// copy all existing compBlockInfo
lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET);
if (pHinfo->compInfo.numOfBlocks > 0)
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock));
} else if (pVnode->commitFileId == pImport->fileId) {
int slots = pImport->pos ? pImport->slot + 1 : pImport->slot;
pHinfo->leftOffset += slots * sizeof(SCompBlock);
// check if last block is at last file, if it is, read into memory
if (pImport->pos == 0 && pHinfo->compInfo.numOfBlocks > 0 && pImport->slot == pHinfo->compInfo.numOfBlocks &&
pHinfo->compInfo.last) {
rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data);
if ( rowsBefore > 0 ) pImport->slot--;
} }
code = 0;
// this block will be replaced by new blocks
if (pImport->pos > 0) pHinfo->compInfo.numOfBlocks--;
if (pImport->slot > 0) {
lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pImport->slot * sizeof(SCompBlock));
} }
if (pImport->slot < pHinfo->compInfo.numOfBlocks) return code;
pHinfo->last = 0; // new blocks are not at the end of block list }
} else {
// nothing
pHinfo->last = 0; // new blocks are not at the end of block list void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) {
} SVnodeObj *pVnode = vnodeList + vnode;
}
return rowsBefore; int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
int fid = now / delta;
*minKey = (fid - pVnode->maxFiles + 1) * delta;
*maxKey = (fid + 2) * delta - 1;
return;
} }
extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints);
int vnodeImportToFile(SImportInfo *pImport);
void vnodeProcessImportTimer(void *param, void *tmrId) { void vnodeProcessImportTimer(void *param, void *tmrId) {
SImportInfo *pImport = (SImportInfo *)param; SImportInfo *pImport = (SImportInfo *)param;
if (pImport == NULL || pImport->signature != param) { if (pImport == NULL || pImport->signature != param) {
...@@ -283,18 +163,18 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { ...@@ -283,18 +163,18 @@ void vnodeProcessImportTimer(void *param, void *tmrId) {
return; return;
} }
SMeterObj *pObj = pImport->pObj; SMeterObj * pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; SVnodeObj * pVnode = &vnodeList[pObj->vnode];
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; SCachePool *pPool = (SCachePool *)pVnode->pCachePool;
SShellObj *pShell = pImport->pShell; SShellObj * pShell = pImport->pShell;
pImport->retry++; pImport->retry++;
//slow query will block the import operation // slow query will block the import operation
int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_IMPORTING); int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_IMPORTING);
if (state >= TSDB_METER_STATE_DELETING) { if (state >= TSDB_METER_STATE_DELETING) {
dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->vnode, pObj->sid, pObj->meterId, state); state);
return; return;
} }
...@@ -303,7 +183,7 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { ...@@ -303,7 +183,7 @@ void vnodeProcessImportTimer(void *param, void *tmrId) {
num = pObj->numOfQueries; num = pObj->numOfQueries;
pthread_mutex_unlock(&pVnode->vmutex); pthread_mutex_unlock(&pVnode->vmutex);
//if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY // if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY
int32_t commitInProcess = 0; int32_t commitInProcess = 0;
pthread_mutex_lock(&pPool->vmutex); pthread_mutex_lock(&pPool->vmutex);
if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) { if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) {
...@@ -311,9 +191,10 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { ...@@ -311,9 +191,10 @@ void vnodeProcessImportTimer(void *param, void *tmrId) {
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
if (pImport->retry < 1000) { if (pImport->retry < 1000) {
dTrace("vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready." dTrace(
"commitInProcess:%d, numOfQueries:%d, state:%d", pObj->vnode, pObj->sid, pObj->meterId, "vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready."
commitInProcess, num, state); "commitInProcess:%d, numOfQueries:%d, state:%d",
pObj->vnode, pObj->sid, pObj->meterId, commitInProcess, num, state);
taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl); taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl);
return; return;
...@@ -345,646 +226,1430 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { ...@@ -345,646 +226,1430 @@ void vnodeProcessImportTimer(void *param, void *tmrId) {
free(pImport); free(pImport);
} }
int vnodeImportToFile(SImportInfo *pImport) { int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion,
SMeterObj *pObj = pImport->pObj; int *pNumOfPoints, TSKEY now) {
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; SSubmitMsg *pSubmit = (SSubmitMsg *)cont;
SVnodeCfg *pCfg = &pVnode->cfg; SVnodeObj * pVnode = vnodeList + pObj->vnode;
SHeadInfo headInfo; int rows;
int code = 0, col; char * payload;
SCompBlock compBlock; int code = TSDB_CODE_ACTION_IN_PROGRESS;
char * payload = pImport->payload; SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
int rows = pImport->rows; SShellObj * pShell = (SShellObj *)param;
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; int pointsImported = 0;
TSKEY minKey, maxKey;
TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1)));
TSKEY firstKey = *((TSKEY *)payload);
memset(&headInfo, 0, sizeof(headInfo));
headInfo.headList = malloc(sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM));
SData *cdata[TSDB_MAX_COLUMNS]; rows = htons(pSubmit->numOfRows);
char *buffer1 = int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows);
malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns); if (expectedLen != contLen) {
cdata[0] = (SData *)buffer1; dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId,
expectedLen, contLen);
return TSDB_CODE_WRONG_MSG_SIZE;
}
SData *data[TSDB_MAX_COLUMNS]; // FIXME: check sversion here should not be here (Take import convert to insert case into consideration)
char *buffer2 = if (sversion != pObj->sversion) {
malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns); dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId,
data[0] = (SData *)buffer2; pObj->sversion, sversion);
return TSDB_CODE_OTHERS;
}
for (col = 1; col < pObj->numOfColumns; ++col) { // Check timestamp context.
cdata[col] = (SData *)(((char *)cdata[col - 1]) + sizeof(SData) + EXTRA_BYTES + payload = pSubmit->payLoad;
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
data[col] = (SData *)(((char *)data[col - 1]) + sizeof(SData) + EXTRA_BYTES + TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); assert(firstKey <= lastKey);
vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey);
if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) {
dError(
"vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld "
"maxAllowedKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey);
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
} }
int rowsBefore = 0; // FIXME: Commit log here is invalid (Take retry into consideration)
int rowsRead = 0; if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) {
int rowsUnread = 0; if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;
int leftRows = rows; // left number of rows of imported data code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion);
int row, rowsToWrite; if (code != 0) return code;
int64_t offset[TSDB_MAX_COLUMNS]; }
if (pImport->pos > 0) { if (firstKey > pObj->lastKey) { // Just call insert
for (col = 0; col < pObj->numOfColumns; ++col) vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
memcpy(data[col]->data, pImport->sdata[col]->data, pImport->pos * pObj->schema[col].bytes); vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT);
code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported, now);
rowsBefore = pImport->pos; if (pShell) {
rowsRead = pImport->pos; pShell->code = code;
rowsUnread = pImport->numOfPoints - pImport->pos; pShell->numOfTotalPoints += pointsImported;
} }
dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to file, firstKey:%ld lastKey:%ld", vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT);
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey); } else { // trigger import
do { SImportInfo *pNew, import;
if (leftRows > 0) {
code = vnodeOpenFileForImport(pImport, payload, &headInfo, data);
if (code < 0) goto _exit;
if (code > 0) {
rowsBefore = code;
code = 0;
};
} else {
// if payload is already imported, rows unread shall still be processed
rowsBefore = 0;
}
int rowsToProcess = pObj->pointsPerFileBlock - rowsBefore; dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld",
if (rowsToProcess > leftRows) rowsToProcess = leftRows; pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey);
memset(&import, 0, sizeof(import));
import.firstKey = firstKey;
import.lastKey = lastKey;
import.pObj = pObj;
import.pShell = pShell;
import.payload = payload;
import.rows = rows;
for (col = 0; col < pObj->numOfColumns; ++col) { // FIXME: mutex here seems meaningless and num here still can
offset[col] = data[col]->data + rowsBefore * pObj->schema[col].bytes; // be changed
} int32_t num = 0;
pthread_mutex_lock(&pVnode->vmutex);
num = pObj->numOfQueries;
pthread_mutex_unlock(&pVnode->vmutex);
row = 0; int32_t commitInProcess = 0;
if (leftRows > 0) {
for (row = 0; row < rowsToProcess; ++row) {
if (*((TSKEY *)payload) > pVnode->commitLastKey) break;
for (col = 0; col < pObj->numOfColumns; ++col) { pthread_mutex_lock(&pPool->vmutex);
memcpy((void *)offset[col], payload, pObj->schema[col].bytes); if (((commitInProcess = pPool->commitInProcess) == 1) ||
payload += pObj->schema[col].bytes; num > 0) { // mutual exclusion with read (need to change here)
offset[col] += pObj->schema[col].bytes; pthread_mutex_unlock(&pPool->vmutex);
}
}
}
leftRows -= row; pNew = (SImportInfo *)malloc(sizeof(SImportInfo));
rowsToWrite = rowsBefore + row; memcpy(pNew, &import, sizeof(SImportInfo));
rowsBefore = 0; pNew->signature = pNew;
int payloadLen = contLen - sizeof(SSubmitMsg);
pNew->payload = malloc(payloadLen);
pNew->opayload = pNew->payload;
memcpy(pNew->payload, payload, payloadLen);
if (leftRows == 0 && rowsUnread > 0) { dTrace("vid:%d sid:%d id:%s, import later, commit in process:%d, numOfQueries:%d", pObj->vnode, pObj->sid,
// copy the unread pObj->meterId, commitInProcess, pObj->numOfQueries);
int rowsToCopy = pObj->pointsPerFileBlock - rowsToWrite;
if (rowsToCopy > rowsUnread) rowsToCopy = rowsUnread;
for (col = 0; col < pObj->numOfColumns; ++col) { taosTmrStart(vnodeProcessImportTimer, 10, pNew, vnodeTmrCtrl);
int bytes = pObj->schema[col].bytes; return 0;
memcpy(data[col]->data + rowsToWrite * bytes, pImport->sdata[col]->data + rowsRead * bytes, rowsToCopy * bytes); } else {
pPool->commitInProcess = 1;
pthread_mutex_unlock(&pPool->vmutex);
int code = vnodeImportData(pObj, &import);
if (pShell) {
pShell->code = code;
pShell->numOfTotalPoints += import.importedRows;
} }
rowsRead += rowsToCopy;
rowsUnread -= rowsToCopy;
rowsToWrite += rowsToCopy;
} }
for (col = 0; col < pObj->numOfColumns; ++col) {
data[col]->len = rowsToWrite * pObj->schema[col].bytes;
} }
compBlock.last = headInfo.last; // How about the retry? Will this also cause vnode version++?
vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite); pVnode->version++;
twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock));
rowsToWrite = 0;
headInfo.newBlocks++;
} while (leftRows > 0 || rowsUnread > 0);
if (compBlock.keyLast > pObj->lastKeyOnFile) if (pShell) {
pObj->lastKeyOnFile = compBlock.keyLast; pShell->count--;
if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pShell, pShell->code, pShell->numOfTotalPoints);
}
vnodeCloseFileForImport(pObj, &headInfo); return 0;
dTrace("vid:%d sid:%d id:%s, %d rows data are imported to file", pObj->vnode, pObj->sid, pObj->meterId, rows); }
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; /* Function to search keys in a range
pthread_mutex_lock(&pPool->vmutex); *
* Assumption: keys in payload are in ascending order
*
* @payload: data records, key in ascending order
* @step: bytes each record takes
* @rows: number of data records
* @skey: range start (included)
* @ekey: range end (included)
* @srows: rtype, start index of records
* @nrows: rtype, number of records in range
*
* @rtype: 0 means find data in the range
* -1 means find no data in the range
*/
static int vnodeSearchKeyInRange(char *payload, int step, int rows, TSKEY skey, TSKEY ekey, int *srow, int *nrows) {
if (rows <= 0 || KEY_AT_INDEX(payload, step, 0) > ekey || KEY_AT_INDEX(payload, step, rows - 1) < skey || skey > ekey)
return -1;
if (pInfo->numOfBlocks > 0) { int left = 0;
int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; int right = rows - 1;
TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0])); int mid;
// data may be in commited cache, cache shall be released // Binary search the first key in payload >= skey
if (lastKey > firstKeyInCache) { do {
while (slot != pInfo->commitSlot) { mid = (left + right) / 2;
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; if (skey < KEY_AT_INDEX(payload, step, mid)) {
vnodeFreeCacheBlock(pCacheBlock); right = mid;
slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; } else if (skey > KEY_AT_INDEX(payload, step, mid)) {
left = mid + 1;
} else {
break;
} }
} while (left < right);
// last slot, the uncommitted slots shall be shifted if (skey <= KEY_AT_INDEX(payload, step, mid)) {
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; *srow = mid;
int points = pCacheBlock->numOfPoints - pInfo->commitPoint; } else {
if (points > 0) { if (mid + 1 >= rows) {
for (int col = 0; col < pObj->numOfColumns; ++col) { return -1;
int size = points * pObj->schema[col].bytes; } else {
memmove(pCacheBlock->offset[col], pCacheBlock->offset[col] + pObj->schema[col].bytes * pInfo->commitPoint, size); *srow = mid + 1;
} }
} }
if (pInfo->commitPoint != pObj->pointsPerBlock) { assert(skey <= KEY_AT_INDEX(payload, step, *srow));
// commit point shall be set to 0 if last block is not full
pInfo->commitPoint = 0; *nrows = 0;
pCacheBlock->numOfPoints = points; for (int i = *srow; i < rows; i++) {
if (slot == pInfo->currentSlot) { if (KEY_AT_INDEX(payload, step, i) <= ekey) {
__sync_fetch_and_add(&pObj->freePoints, pInfo->commitPoint); (*nrows)++;
}
} else { } else {
// if last block is full and committed break;
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot];
if (pCacheBlock->pMeterObj == pObj) {
vnodeFreeCacheBlock(pCacheBlock);
} }
} }
}
}
if (lastKey > pObj->lastKeyOnFile) pObj->lastKeyOnFile = lastKey;
pthread_mutex_unlock(&pPool->vmutex);
_exit: if (*nrows == 0) return -1;
tfree(headInfo.headList);
tfree(buffer1);
tfree(buffer2);
tfree(pImport->buffer);
return code; return 0;
} }
int vnodeImportToCache(SImportInfo *pImport, char *payload, int rows) { int vnodeOpenMinFilesForImport(int vnode, int fid) {
SMeterObj *pObj = pImport->pObj; char dname[TSDB_FILENAME_LEN] = "\0";
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; SVnodeObj * pVnode = vnodeList + vnode;
SVnodeCfg *pCfg = &pVnode->cfg; struct stat filestat;
int code = -1; int minFileSize;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
int slot, pos, row, col, points, tpoints;
char *data[TSDB_MAX_COLUMNS], *current[TSDB_MAX_COLUMNS];
int slots = pInfo->unCommittedBlocks + 1;
int trows = slots * pObj->pointsPerBlock + rows; // max rows in buffer
int tsize = (trows / pObj->pointsPerBlock + 1) * pCfg->cacheBlockSize;
TSKEY firstKey = *((TSKEY *)payload);
TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1)));
if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) {
dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints);
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
}
dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to cache, firstKey:%ld lastKey:%ld", minFileSize = TSDB_FILE_HEADER_LEN + sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey);
pthread_mutex_lock(&(pVnode->vmutex)); vnodeGetHeadDataLname(pVnode->cfn, dname, pVnode->lfn, vnode, fid);
if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey;
pthread_mutex_unlock(&(pVnode->vmutex));
char *buffer = malloc(tsize); // buffer to hold unCommitted data plus import data // Open .head file
data[0] = buffer; pVnode->hfd = open(pVnode->cfn, O_RDONLY);
current[0] = data[0]; if (pVnode->hfd < 0) {
for (col = 1; col < pObj->numOfColumns; ++col) { dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
data[col] = data[col - 1] + trows * pObj->schema[col - 1].bytes; taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
current[col] = data[col]; goto _error_open;
} }
// write import data into buffer first fstat(pVnode->hfd, &filestat);
for (row = 0; row < rows; ++row) { if (filestat.st_size < minFileSize) {
for (col = 0; col < pObj->numOfColumns; ++col) { dError("vid:%d, head file:%s is corrupted", vnode, pVnode->cfn);
memcpy(current[col], payload, pObj->schema[col].bytes); taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn);
payload += pObj->schema[col].bytes; goto _error_open;
current[col] += pObj->schema[col].bytes;
} }
// Open .data file
pVnode->dfd = open(dname, O_RDWR);
if (pVnode->dfd < 0) {
dError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
goto _error_open;
} }
// copy the overwritten data into buffer fstat(pVnode->dfd, &filestat);
tpoints = rows; if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
pos = pImport->pos; dError("vid:%d, data file:%s corrupted", vnode, dname);
slot = pImport->slot; taosLogError("vid:%d, data file:%s corrupted", vnode, dname);
while (1) { goto _error_open;
points = pInfo->cacheBlocks[slot]->numOfPoints - pos;
for (col = 0; col < pObj->numOfColumns; ++col) {
int size = points * pObj->schema[col].bytes;
memcpy(current[col], pInfo->cacheBlocks[slot]->offset[col] + pos * pObj->schema[col].bytes, size);
current[col] += size;
} }
pos = 0;
tpoints += points;
if (slot == pInfo->currentSlot) break; // Open .last file
slot = (slot + 1) % pInfo->maxBlocks; pVnode->lfd = open(pVnode->lfn, O_RDWR);
if (pVnode->lfd < 0) {
dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
goto _error_open;
} }
for (col = 0; col < pObj->numOfColumns; ++col) current[col] = data[col]; fstat(pVnode->lfd, &filestat);
pos = pImport->pos; if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
goto _error_open;
}
// write back to existing slots first return 0;
slot = pImport->slot;
while (1) {
points = (tpoints > pObj->pointsPerBlock - pos) ? pObj->pointsPerBlock - pos : tpoints;
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot];
for (col = 0; col < pObj->numOfColumns; ++col) {
int size = points * pObj->schema[col].bytes;
memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size);
current[col] += size;
}
pCacheBlock->numOfPoints = points + pos;
pos = 0;
tpoints -= points;
if (slot == pInfo->currentSlot) break; _error_open:
slot = (slot + 1) % pInfo->maxBlocks; if (pVnode->hfd > 0) close(pVnode->hfd);
} pVnode->hfd = 0;
// allocate new cache block if there are still data left if (pVnode->dfd > 0) close(pVnode->dfd);
while (tpoints > 0) { pVnode->dfd = 0;
pImport->commit = vnodeAllocateCacheBlock(pObj);
if (pImport->commit < 0) goto _exit;
points = (tpoints > pObj->pointsPerBlock) ? pObj->pointsPerBlock : tpoints;
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot];
for (col = 0; col < pObj->numOfColumns; ++col) {
int size = points * pObj->schema[col].bytes;
memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size);
current[col] += size;
}
tpoints -= points;
pCacheBlock->numOfPoints = points;
}
code = 0; if (pVnode->lfd > 0) close(pVnode->lfd);
__sync_fetch_and_sub(&pObj->freePoints, rows); pVnode->lfd = 0;
dTrace("vid:%d sid:%d id:%s, %d rows data are imported to cache", pObj->vnode, pObj->sid, pObj->meterId, rows);
_exit: return -1;
free(buffer);
return code;
} }
int vnodeFindKeyInFile(SImportInfo *pImport, int order) { /* Function to open .t file and sendfile the first part
SMeterObj *pObj = pImport->pObj; */
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid) {
int code = -1; char dHeadName[TSDB_FILENAME_LEN] = "\0";
SQuery query; SVnodeObj * pVnode = vnodeList + pObj->vnode;
SColumnInfoEx colList[TSDB_MAX_COLUMNS] = {0}; struct stat filestat;
int sid;
// cfn: .head
if (readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN) < 0) return -1;
size_t len = strlen(dHeadName);
// switch head name
switch (dHeadName[len - 1]) {
case '0':
dHeadName[len - 1] = '1';
break;
case '1':
dHeadName[len - 1] = '0';
break;
default:
dError("vid: %d, fid: %d, head target filename not end with 0 or 1", pVnode->vnode, fid);
return -1;
}
TSKEY key = order ? pImport->firstKey : pImport->lastKey; vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid);
memset(&query, 0, sizeof(query)); symlink(dHeadName, pVnode->nfn);
query.order.order = order;
query.skey = key; pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
query.ekey = order ? INT64_MAX : 0; if (pVnode->nfd < 0) {
query.colList = colList; dError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
query.numOfCols = pObj->numOfColumns; taosLogError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
return -1;
}
for (int16_t i = 0; i < pObj->numOfColumns; ++i) { fstat(pVnode->hfd, &filestat);
colList[i].data.colId = pObj->schema[i].colId; pHandle->hfSize = filestat.st_size;
colList[i].data.bytes = pObj->schema[i].bytes;
colList[i].data.type = pObj->schema[i].type;
colList[i].colIdx = i; // Find the next sid whose compInfoOffset > 0
colList[i].colIdxInBuf = i; for (sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; sid++) {
if (pHandle->pHeader[sid].compInfoOffset > 0) break;
} }
int ret = vnodeSearchPointInFile(pObj, &query); pHandle->nextNo0Offset = (sid == pVnode->cfg.maxSessions) ? pHandle->hfSize : pHandle->pHeader[sid].compInfoOffset;
if (ret >= 0) { // FIXME: sendfile the original part
if (query.slot < 0) { // TODO: Here, we need to take the deleted table case in consideration, this function
pImport->slot = 0; // just assume the case is handled before calling this function
pImport->pos = 0; if (pHandle->pHeader[pObj->sid].compInfoOffset > 0) {
pImport->key = 0; pHandle->compInfoOffset = pHandle->pHeader[pObj->sid].compInfoOffset;
pImport->fileId = pVnode->fileId - pVnode->numOfFiles + 1; } else {
dTrace("vid:%d sid:%d id:%s, import to head of file", pObj->vnode, pObj->sid, pObj->meterId); pHandle->compInfoOffset = pHandle->nextNo0Offset;
code = 0; }
} else if (query.slot >= 0) {
code = 0;
pImport->slot = query.slot;
pImport->pos = query.pos;
pImport->key = query.key;
pImport->fileId = query.fileId;
SCompBlock *pBlock = &query.pBlock[query.slot];
pImport->numOfPoints = pBlock->numOfPoints;
if (pImport->key != key) { assert(pHandle->compInfoOffset <= pHandle->hfSize);
if (order == 0) {
pImport->pos++;
if (pImport->pos >= pBlock->numOfPoints) { lseek(pVnode->hfd, 0, SEEK_SET);
pImport->slot++; lseek(pVnode->nfd, 0, SEEK_SET);
pImport->pos = 0; if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) {
// TODO : deal with ERROR here
} }
} else {
if (pImport->pos < 0) pImport->pos = 0; // Leave a SCompInfo space here
lseek(pVnode->nfd, sizeof(SCompInfo), SEEK_CUR);
return 0;
}
typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadMod;
/* Function to load a block data at the requirement of mod
*/
static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod) {
size_t size;
int code = 0;
SCompBlock *pBlock = pHandle->pBlocks + blockId;
assert(pBlock->sversion == pObj->sversion);
SVnodeObj *pVnode = vnodeList + pObj->vnode;
int dfd = pBlock->last ? pVnode->lfd : pVnode->dfd;
if (pHandle->blockId != blockId) {
pHandle->blockId = blockId;
pHandle->blockLoadState = 0;
} }
if (pHandle->blockLoadState == 0){ // Reload pField
size = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM);
if (pHandle->pFieldSize < size) {
pHandle->pField = (SField *)realloc((void *)(pHandle->pField), size);
if (pHandle->pField == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
return -1;
}
pHandle->pFieldSize = size;
} }
if (pImport->key != key && pImport->pos > 0) { lseek(dfd, pBlock->offset, SEEK_SET);
if ( pObj->sversion != pBlock->sversion ) { if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) {
dError("vid:%d sid:%d id:%s, import sversion not matached, expected:%d received:%d", pObj->vnode, pObj->sid, dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%ld reason:%s", pVnode->vnode, pObj->sid,
pBlock->sversion, pObj->sversion); pObj->meterId, pHandle->pFieldSize, strerror(errno));
code = TSDB_CODE_OTHERS; return -1;
} else { }
pImport->offset = pBlock->offset;
if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) {
dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->lfn);
return -1;
}
}
pImport->buffer = { // Allocate necessary buffer
malloc(pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + sizeof(SData) * pObj->numOfColumns); size = pObj->bytesPerPoint * pObj->pointsPerFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns;
pImport->sdata[0] = (SData *)pImport->buffer; if (pHandle->buffer == NULL) {
for (int col = 1; col < pObj->numOfColumns; ++col) pHandle->buffer = malloc(size);
pImport->sdata[col] = (SData *)(((char *)pImport->sdata[col - 1]) + sizeof(SData) + if (pHandle->buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
return -1;
}
// TODO: Init data
pHandle->data[0] = (SData *)(pHandle->buffer);
for (int col = 1; col < pObj->numOfColumns; col++) {
pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
code = vnodeReadCompBlockToMem(pObj, &query, pImport->sdata); if (pHandle->temp == NULL) {
if (code < 0) { pHandle->temp = malloc(size);
code = -code; if (pHandle->temp == NULL) {
tfree(pImport->buffer); dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
return -1;
} }
} }
if (pHandle->tempBuffer == NULL) {
pHandle->tempBufferSize = pObj->maxBytes + EXTRA_BYTES;
pHandle->tempBuffer = malloc(pHandle->tempBufferSize);
if (pHandle->tempBuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->tempBufferSize);
return -1;
} }
} }
} else {
dError("vid:%d sid:%d id:%s, file is corrupted, import failed", pObj->vnode, pObj->sid, pObj->meterId);
code = -ret;
} }
tclose(query.hfd); if ((loadMod & DATA_LOAD_TIMESTAMP) &&
tclose(query.dfd); (~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part
tclose(query.lfd); code =
vnodeFreeFields(&query); vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX,
tfree(query.pBlock); pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints,
pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize);
return code; if (code != 0) return -1;
} pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP;
}
int vnodeFindKeyInCache(SImportInfo *pImport, int order) { if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns
SMeterObj *pObj = pImport->pObj; for (int col = 1; col < pBlock->numOfCols; col++) {
int code = 0; code = vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data,
SQuery query; pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer,
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; pHandle->tempBufferSize);
if (code != 0) return -1;
}
TSKEY key = order ? pImport->firstKey : pImport->lastKey; pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA;
memset(&query, 0, sizeof(query)); }
query.order.order = order;
query.skey = key;
query.ekey = order ? pImport->lastKey : pImport->firstKey;
vnodeSearchPointInCache(pObj, &query);
if (query.slot < 0) { return 0;
pImport->slot = pInfo->commitSlot; }
if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
pImport->key = 0;
dTrace("vid:%d sid:%d id:%s, key:%ld, import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key);
code = 0;
} else {
pImport->slot = query.slot;
pImport->pos = query.pos;
pImport->key = query.key;
if (key != query.key) { static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) {
if (order == 0) { SVnodeObj *pVnode = vnodeList + pObj->vnode;
// since pos is the position which has smaller key, data shall be imported after it char dpath[TSDB_FILENAME_LEN] = "\0";
pImport->pos++; SCompInfo compInfo;
if (pImport->pos >= pObj->pointsPerBlock) { __off_t offset = 0;
pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0; if (pVnode->nfd > 0) {
offset = lseek(pVnode->nfd, 0, SEEK_CUR);
assert(offset == pHandle->nextNo0Offset + pHandle->driftOffset);
{ // Write the SCompInfo part
compInfo.uid = pObj->uid;
compInfo.last = pHandle->last;
compInfo.numOfBlocks = pHandle->newNumOfBlocks + pHandle->oldNumOfBlocks;
compInfo.delimiter = TSDB_VNODE_DELIMITER;
taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo));
lseek(pVnode->nfd, pHandle->compInfoOffset, SEEK_SET);
if (twrite(pVnode->nfd, (void *)(&compInfo), sizeof(SCompInfo)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompInfo, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
strerror(errno));
return -1;
} }
} else {
if (pImport->pos < 0) pImport->pos = 0;
} }
// Write the rest of the SCompBlock part
if (pHandle->hfSize > pHandle->nextNo0Offset) {
lseek(pVnode->nfd, 0, SEEK_END);
lseek(pVnode->hfd, pHandle->nextNo0Offset, SEEK_SET);
if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->hfSize - pHandle->nextNo0Offset) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to sendfile, size:%ld, reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->hfSize - pHandle->nextNo0Offset, strerror(errno));
return -1;
} }
code = 0;
} }
return code; // Write SCompHeader part
} pHandle->pHeader[pObj->sid].compInfoOffset = pHandle->compInfoOffset;
for (int sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; ++sid) {
if (pHandle->pHeader[sid].compInfoOffset > 0) {
pHandle->pHeader[sid].compInfoOffset += pHandle->driftOffset;
}
}
int vnodeImportStartToCache(SImportInfo *pImport, char *payload, int rows) { taosCalcChecksumAppend(0, (uint8_t *)(pHandle->pHeader), pHandle->pHeaderSize);
int code = 0; lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
SMeterObj *pObj = pImport->pObj; if (twrite(pVnode->nfd, (void *)(pHandle->pHeader), pHandle->pHeaderSize) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompHeader part, size:%ld, reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->pHeaderSize, strerror(errno));
return -1;
}
}
code = vnodeFindKeyInCache(pImport, 1); // Close opened files
if (code != 0) return code; close(pVnode->dfd);
pVnode->dfd = 0;
if (pImport->key != pImport->firstKey) { close(pVnode->hfd);
rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key); pVnode->hfd = 0;
pImport->importedRows = rows;
code = vnodeImportToCache(pImport, payload, rows); close(pVnode->lfd);
} else { pVnode->lfd = 0;
dTrace("vid:%d sid:%d id:%s, data is already imported to cache", pObj->vnode, pObj->sid, pObj->meterId);
if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN);
rename(pVnode->nfn, pVnode->cfn);
remove(dpath);
} }
return code; return 0;
} }
int vnodeImportStartToFile(SImportInfo *pImport, char *payload, int rows) { void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) {
int code = 0; int sdataRow;
SMeterObj *pObj = pImport->pObj; int offset;
code = vnodeFindKeyInFile(pImport, 1); for (int row = 0; row < rows; ++row) {
if (code != 0) return code; sdataRow = row + rowOffset;
offset = 0;
for (int col = 0; col < pObj->numOfColumns; ++col) {
memcpy(data[col]->data + sdataRow * pObj->schema[col].bytes, payload + pObj->bytesPerPoint * row + offset,
pObj->schema[col].bytes);
if (pImport->key != pImport->firstKey) { offset += pObj->schema[col].bytes;
pImport->payload = payload;
pImport->rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key);
pImport->importedRows = pImport->rows;
code = vnodeImportToFile(pImport);
} else {
dTrace("vid:%d sid:%d id:%s, data is already imported to file", pObj->vnode, pObj->sid, pObj->meterId);
} }
}
}
return code; // TODO : Check the correctness
int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) {
int numOfFiles = 0, fileId, filesAdded = 0;
int vnode = pVnode->vnode;
SVnodeCfg *pCfg = &(pVnode->cfg);
if (pVnode->lastKeyOnFile == 0) {
if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10;
pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile;
pVnode->lastKeyOnFile = (long)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1;
pVnode->numOfFiles = 1;
if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1;
}
numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile;
if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1;
dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode,
pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles);
if (numOfFiles >= pVnode->numOfFiles) {
// create empty header files backward
filesAdded = numOfFiles - pVnode->numOfFiles + 1;
for (int i = 0; i < filesAdded; ++i) {
fileId = pVnode->fileId - pVnode->numOfFiles - i;
if (vnodeCreateEmptyCompFile(vnode, fileId) < 0) return -1;
}
} else if (numOfFiles < 0) {
// create empty header files forward
pVnode->fileId++;
if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1;
pVnode->lastKeyOnFile += (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile;
filesAdded = 1;
numOfFiles = 0; // hacker way
}
fileId = pVnode->fileId - numOfFiles;
pVnode->commitLastKey =
pVnode->lastKeyOnFile - (long)numOfFiles * tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile;
pVnode->commitFirstKey = pVnode->commitLastKey - (long)tsMsPerDay[pVnode->cfg.precision] * pCfg->daysPerFile + 1;
pVnode->commitFileId = fileId;
pVnode->numOfFiles = pVnode->numOfFiles + filesAdded;
return 0;
} }
int vnodeImportWholeToFile(SImportInfo *pImport, char *payload, int rows) { static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) {
int code = 0; SMeterObj * pObj = (SMeterObj *)(pImport->pObj);
SMeterObj *pObj = pImport->pObj; SVnodeObj * pVnode = vnodeList + pObj->vnode;
SImportHandle importHandle;
size_t size = 0;
SData * data[TSDB_MAX_COLUMNS];
char * buffer = NULL;
SData * cdata[TSDB_MAX_COLUMNS];
char * cbuffer = NULL;
SCompBlock compBlock;
TSCKSUM checksum = 0;
int pointsImported = 0;
code = vnodeFindKeyInFile(pImport, 0); TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
if (code != 0) return code; TSKEY minFileKey = fid * delta;
TSKEY maxFileKey = minFileKey + delta - 1;
TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey >= minFileKey && firstKey <= maxFileKey && lastKey >= minFileKey && lastKey <= maxFileKey);
// create neccessary files
pVnode->commitFirstKey = firstKey;
if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1;
assert(pVnode->commitFileId == fid);
// Open least files to import .head(hfd) .data(dfd) .last(lfd)
if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return -1;
memset(&importHandle, 0, sizeof(SImportHandle));
{ // Load SCompHeader part from .head file
importHandle.pHeaderSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
importHandle.pHeader = (SCompHeader *)malloc(importHandle.pHeaderSize);
if (importHandle.pHeader == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, importHandle.pHeaderSize);
goto _error_merge;
}
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode,
pObj->sid, pObj->meterId, fid, strerror(errno));
goto _error_merge;
}
if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId,
fid);
goto _error_merge;
}
}
{ // Initialize data[] and cdata[], which is used to hold data to write to data file
size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES) * pObj->numOfColumns;
buffer = (char *)malloc(size);
if (buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
goto _error_merge;
}
cbuffer = (char *)malloc(size);
if (cbuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
goto _error_merge;
}
data[0] = (SData *)buffer;
cdata[0] = (SData *)cbuffer;
for (int col = 1; col < pObj->numOfColumns; col++) {
data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it
_write_empty_point:
if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
goto _error_merge;
}
importHandle.oldNumOfBlocks = 0;
importHandle.driftOffset += sizeof(SCompInfo);
for (int rowsWritten = 0; rowsWritten < rows;) {
int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */);
vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0);
pointsImported += rowsToWrite;
// TODO : Write the block to the file
compBlock.last = 1;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) {
// TODO: deal with ERROR here
}
importHandle.last = compBlock.last;
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock));
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowsWritten += rowsToWrite;
}
twrite(pVnode->nfd, &checksum, sizeof(TSCKSUM));
importHandle.driftOffset += sizeof(TSCKSUM);
} else { // Else if there are old data in this file.
{ // load SCompInfo and SCompBlock part
lseek(pVnode->hfd, importHandle.pHeader[pObj->sid].compInfoOffset, SEEK_SET);
if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) {
dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, strerror(errno));
goto _error_merge;
}
if ((importHandle.compInfo.delimiter != TSDB_VNODE_DELIMITER) ||
(!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) {
dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter);
goto _error_merge;
}
{ // Check the context of SCompInfo part
if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter
goto _write_empty_point;
}
}
importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks;
importHandle.last = importHandle.compInfo.last;
size = sizeof(SCompBlock) * importHandle.compInfo.numOfBlocks + sizeof(TSCKSUM);
importHandle.pBlocks = (SCompBlock *)malloc(size);
if (importHandle.pBlocks == NULL) {
dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid,
pObj->meterId, size);
goto _error_merge;
}
if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) {
dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, strerror(errno));
goto _error_merge;
}
if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) {
dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId,
pVnode->cfn);
goto _error_merge;
}
}
/* Now we have _payload_, we have _importHandle.pBlocks_, just merge payload into the importHandle.pBlocks
*
* Input: payload, pObj->bytesPerBlock, rows, importHandle.pBlocks
*/
{
int payloadIter = 0;
SBlockIter blockIter = {0, 0, 0, 0};
while (1) {
if (payloadIter >= rows) { // payload end, break
// write the remaining blocks to the file
if (pVnode->nfd > 0) {
int blocksLeft = importHandle.compInfo.numOfBlocks - blockIter.oslot;
if (blocksLeft > 0) {
checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft);
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno));
goto _error_merge;
}
}
if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno));
goto _error_merge;
}
}
break;
}
if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break
assert(false);
// Should never come here
int rowsLeft = rows - payloadIter;
if (pVnode->nfd > 0 && rowsLeft > 0) {
// TODO : Convert into while here
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, rowsLeft, data, 0);
pointsImported++;
assert(importHandle.last == 0);
compBlock.last = 1;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rows - payloadIter) < 0) {
// TODO :
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
importHandle.last = compBlock.last;
twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock));
twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM));
}
break;
}
TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
{ // Binary search the (slot, pos) which is >= key as well as nextKey
int left = blockIter.slot;
int right = importHandle.compInfo.numOfBlocks - 1;
TSKEY minKey = importHandle.pBlocks[left].keyFirst;
TSKEY maxKey = importHandle.pBlocks[right].keyLast;
assert(minKey <= maxKey);
if (key < minKey) { // Case 1. write just ahead the blockIter.slot
blockIter.slot = left;
blockIter.pos = 0;
blockIter.nextKey = minKey;
} else if (key > maxKey) { // Case 2. write to the end
if (importHandle.pBlocks[right].last) { // Case 2.1 last block in .last file, need to merge
assert(importHandle.last != 0);
importHandle.last = 0;
blockIter.slot = right;
blockIter.pos = importHandle.pBlocks[right].numOfPoints;
} else { // Case 2.2 just write after the last block
blockIter.slot = right + 1;
blockIter.pos = 0;
}
blockIter.nextKey = maxFileKey + 1;
} else { // Case 3. need to search the block for slot and pos
if (key == minKey || key == maxKey) {
payloadIter++;
continue;
}
// Here: minKey < key < maxKey
int mid;
TSKEY blockMinKey;
TSKEY blockMaxKey;
// Binary search the slot
do {
mid = (left + right) / 2;
blockMinKey = importHandle.pBlocks[mid].keyFirst;
blockMaxKey = importHandle.pBlocks[mid].keyLast;
assert(blockMinKey <= blockMaxKey);
if (key < blockMinKey) {
right = mid;
} else if (key > blockMaxKey) {
left = mid + 1;
} else { /* blockMinKey <= key <= blockMaxKey */
break;
}
} while (left < right);
if (key == blockMinKey || key == blockMaxKey) { // duplicate key
payloadIter++;
continue;
}
// Get the slot
if (key > blockMaxKey) { /* pos = 0 or pos = ? */
blockIter.slot = mid + 1;
} else { /* key < blockMinKey (pos = 0) || (key > blockMinKey && key < blockMaxKey) (pos=?) */
blockIter.slot = mid;
}
if (pImport->key != pImport->lastKey) { // Get the pos
pImport->payload = payload; assert(blockIter.slot < importHandle.compInfo.numOfBlocks);
pImport->rows = vnodeGetImportEndPart(pObj, payload, rows, &pImport->payload, pImport->key);
pImport->importedRows = pImport->rows; if (key == importHandle.pBlocks[blockIter.slot].keyFirst ||
code = vnodeImportToFile(pImport); key == importHandle.pBlocks[blockIter.slot].keyLast) {
payloadIter++;
continue;
}
assert(key < importHandle.pBlocks[blockIter.slot].keyLast);
/* */
if (key < importHandle.pBlocks[blockIter.slot].keyFirst) {
blockIter.pos = 0;
blockIter.nextKey = importHandle.pBlocks[blockIter.slot].keyFirst;
} else { } else {
code = vnodeImportStartToFile(pImport, payload, rows); SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot;
if (pBlock->sversion != pObj->sversion) { /*TODO*/
}
if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP) < 0) {
}
int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC);
assert(pos != 0);
if (KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), pos) == key) {
payloadIter++;
continue;
} }
return code; blockIter.pos = pos;
} blockIter.nextKey = (blockIter.slot + 1 < importHandle.compInfo.numOfBlocks)
? importHandle.pBlocks[blockIter.slot + 1].keyFirst
: maxFileKey + 1;
// Need to merge with this block
if (importHandle.pBlocks[blockIter.slot].last) { // this is to merge with the last block
assert((blockIter.slot == (importHandle.compInfo.numOfBlocks - 1)));
importHandle.last = 0;
}
}
}
}
int vnodeImportWholeToCache(SImportInfo *pImport, char *payload, int rows) { // Open the new .t file if not opened yet.
int code = 0; if (pVnode->nfd <= 0) {
SMeterObj *pObj = pImport->pObj; if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
goto _error_merge;
}
}
code = vnodeFindKeyInCache(pImport, 0); if (blockIter.slot > blockIter.oslot) { // write blocks in range [blockIter.oslot, blockIter.slot) to .t file
if (code != 0) return code; checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot));
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot),
strerror(errno));
goto _error_merge;
}
blockIter.oslot = blockIter.slot;
}
if (blockIter.pos == 0) { // No need to merge
// copy payload part to data
int rowOffset = 0;
for (; payloadIter < rows; rowOffset++) {
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) break;
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
payloadIter++;
}
// write directly to .data file
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO: Deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)) < 0) {
// TODO : deal with the ERROR here
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
} else { // Merge block and payload from payloadIter
if (pImport->key != pImport->lastKey) { if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot,
char *pStart; DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA) < 0) { // Load neccessary blocks
if ( pImport->key < pObj->lastKeyOnFile ) pImport->key = pObj->lastKeyOnFile; goto _error_merge;
rows = vnodeGetImportEndPart(pObj, payload, rows, &pStart, pImport->key); }
pImport->importedRows = rows;
code = vnodeImportToCache(pImport, pStart, rows); importHandle.oldNumOfBlocks--;
importHandle.driftOffset -= sizeof(SCompBlock);
int rowOffset = blockIter.pos; // counter for data
// Copy the front part
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy((void *)(data[col]->data), (void *)(importHandle.data[col]->data),
pObj->schema[col].bytes * blockIter.pos);
}
// Merge part
while (1) {
if (rowOffset >= pVnode->cfg.rowsInFileBlock) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
}
if ((payloadIter >= rows || KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) &&
blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints)
break;
if (payloadIter >= rows ||
KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) { // payload end
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
} else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
payloadIter++;
rowOffset++;
} else { } else {
if (pImport->firstKey > pObj->lastKeyOnFile) { if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) ==
code = vnodeImportStartToCache(pImport, payload, rows); KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
} else if (pImport->firstKey < pObj->lastKeyOnFile) { blockIter.pos)) { // duplicate key
code = vnodeImportStartToFile(pImport, payload, rows); payloadIter++;
} else { // firstKey == pObj->lastKeyOnFile continue;
dTrace("vid:%d sid:%d id:%s, data is already there", pObj->vnode, pObj->sid, pObj->meterId); } else if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) <
KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
blockIter.pos)) {
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
payloadIter++;
rowOffset++;
} else {
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos,
pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
}
} }
} }
if (rowOffset > 0) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
return code; checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
}
blockIter.slot++;
blockIter.oslot = blockIter.slot;
}
}
}
}
// Write the SCompInfo part
if (vnodeCloseImportFiles(pObj, &importHandle) < 0) {
goto _error_merge;
}
pImport->importedRows += pointsImported;
// TODO: free the allocated memory
tfree(buffer);
tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
return 0;
_error_merge:
tfree(buffer);
tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
close(pVnode->dfd);
pVnode->dfd = 0;
close(pVnode->hfd);
pVnode->hfd = 0;
close(pVnode->lfd);
pVnode->lfd = 0;
if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
remove(pVnode->nfn);
}
return -1;
} }
int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, #define FORWARD_ITER(iter, step, slotLimit, posLimit) \
int *pNumOfPoints, TSKEY now) { { \
SSubmitMsg *pSubmit = (SSubmitMsg *)cont; if ((iter.pos) + (step) < (posLimit)) { \
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; (iter.pos) = (iter.pos) + (step); \
int rows; } else { \
char *payload; (iter.pos) = 0; \
int code = TSDB_CODE_ACTION_IN_PROGRESS; (iter.slot) = ((iter.slot) + 1) % (slotLimit); \
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; } \
SShellObj *pShell = (SShellObj *)param; }
int pointsImported = 0;
rows = htons(pSubmit->numOfRows); int isCacheEnd(SBlockIter iter, SMeterObj *pMeter) {
int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); SCacheInfo *pInfo = (SCacheInfo *)(pMeter->pCache);
if (expectedLen != contLen) { int slot = 0;
dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId, int pos = 0;
expectedLen, contLen);
return TSDB_CODE_WRONG_MSG_SIZE; if (pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints == pMeter->pointsPerBlock) {
slot = (pInfo->currentSlot + 1) % (pInfo->maxBlocks);
pos = 0;
} else {
slot = pInfo->currentSlot;
pos = pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
} }
return ((iter.slot == slot) && (iter.pos == pos));
}
if (sversion != pObj->sversion) { int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) {
dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, SMeterObj * pObj = pImport->pObj;
pObj->sversion, sversion); SVnodeObj * pVnode = vnodeList + pObj->vnode;
return TSDB_CODE_OTHERS; int code = -1;
SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache);
int payloadIter;
SCachePool * pPool = pVnode->pCachePool;
int isCacheIterEnd = 0;
int spayloadIter = 0;
int isAppendData = 0;
int rowsImported = 0;
int totalRows = 0;
size_t size = 0;
SMergeBuffer *pBuffer = NULL;
TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey <= lastKey && firstKey > pObj->lastKeyOnFile);
// TODO: make this condition less strict
if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { // No free room to hold the data
dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints);
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
} }
payload = pSubmit->payLoad; if (pInfo->numOfBlocks == 0) {
TSKEY firstKey = *(TSKEY *)payload; if (vnodeAllocateCacheBlock(pObj) < 0) {
TSKEY lastKey = *(TSKEY *)(payload + pObj->bytesPerPoint*(rows-1)); // TODO: deal with the ERROR here
int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision]; }
TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision];
TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 1;
if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) {
dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%lld, data is out of range, rows:%d firstKey:%lld lastKey:%lld minAllowedKey:%lld maxAllowedKey:%lld",
pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, rows, firstKey, lastKey, minAllowedKey, maxAllowedKey);
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
} }
// forward to peers // Find the first importable record from payload
if (pShell && pVnode->cfg.replications > 1) { pImport->lastKey = lastKey;
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion); for (payloadIter = 0; payloadIter < rows; payloadIter++) {
if (code != 0) return code; TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
if (key == pObj->lastKey) continue;
if (key > pObj->lastKey) { // Just as insert
pImport->slot = pInfo->currentSlot;
pImport->pos = pInfo->cacheBlocks[pImport->slot]->numOfPoints;
isCacheIterEnd = 1;
break;
} else {
pImport->firstKey = key;
if (vnodeFindKeyInCache(pImport, 1) < 0) {
goto _exit;
} }
if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (pImport->firstKey != pImport->key) break;
if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; }
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion);
if (code != 0) return code;
} }
if (*((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)) > pObj->lastKey) { if (payloadIter == rows) {
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); pImport->importedRows = 0;
vnodeSetMeterState(pObj, TSDB_METER_STATE_INSERT); code = 0;
code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported, now); goto _exit;
}
if (pShell) { spayloadIter = payloadIter;
pShell->code = code; if (pImport->pos == pObj->pointsPerBlock) assert(isCacheIterEnd);
pShell->numOfTotalPoints += pointsImported;
// Allocate a new merge buffer work as buffer
totalRows = pObj->pointsPerBlock + rows - payloadIter + 1;
size = sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns + pObj->bytesPerPoint * totalRows;
pBuffer = (SMergeBuffer *)malloc(size);
if (pBuffer == NULL) {
dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size);
return code;
}
pBuffer->spos = 0;
pBuffer->epos = 0;
pBuffer->totalRows = totalRows;
pBuffer->offset[0] = (char *)pBuffer + sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns;
for (int col = 1; col < pObj->numOfColumns; col++) {
pBuffer->offset[col] = pBuffer->offset[col - 1] + pObj->schema[col - 1].bytes * totalRows;
} }
vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); // TODO: take pImport->pos = pObj->pointsPerBlock into consideration
{ // Do the merge staff
SBlockIter cacheIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to traverse old cache data
SBlockIter writeIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to write data to cache
int availPoints = pObj->pointsPerBlock - pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
assert(availPoints >= 0);
while (1) {
if ((payloadIter >= rows) && isCacheIterEnd) break;
if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush
if (writeIter.pos == pObj->pointsPerBlock) {
writeIter.pos = 0;
writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks;
}
while (pBuffer->spos != pBuffer->epos) {
if (writeIter.slot == cacheIter.slot && writeIter.pos == cacheIter.pos) break;
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos,
pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes);
}
if (writeIter.pos + 1 < pObj->pointsPerBlock) {
writeIter.pos++;
} else { } else {
SImportInfo *pNew, import; pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1;
writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks;
writeIter.pos = 0;
}
pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows;
}
}
if ((payloadIter >= rows) ||
((!isCacheIterEnd) &&
(KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >
KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY),
cacheIter.pos)))) { // if (payload end || (cacheIter not end && payloadKey > blockKey))
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes);
}
FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock);
isCacheIterEnd = isCacheEnd(cacheIter, pObj);
} else if ((isCacheIterEnd) ||
((payloadIter < rows) &&
(KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) <
KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY),
cacheIter.pos)))) { // cacheIter end || (payloadIter not end && payloadKey < blockKey)
if (availPoints == 0) { // Need to allocate a new cache block
pthread_mutex_lock(&(pPool->vmutex));
SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode);
if (pNewBlock == NULL) { // Failed to allocate a new cache block
pthread_mutex_unlock(&(pPool->vmutex));
payloadIter = rows;
code = TSDB_CODE_ACTION_IN_PROGRESS;
pImport->commit = 1;
continue;
}
dTrace("vid:%d sid:%d id:%s, import %d rows data", pObj->vnode, pObj->sid, pObj->meterId, rows); pNewBlock->pMeterObj = pObj;
memset(&import, 0, sizeof(import)); pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns;
import.firstKey = *((TSKEY *)(payload)); for (int col = 1; col < pObj->numOfColumns; col++)
import.lastKey = *((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)); pNewBlock->offset[col] = pNewBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock;
import.pObj = pObj;
import.pShell = pShell;
import.payload = payload;
import.rows = rows;
int32_t num = 0; int newSlot = (writeIter.slot + 1) % pInfo->maxBlocks;
pthread_mutex_lock(&pVnode->vmutex); pInfo->blocks++;
num = pObj->numOfQueries; int tblockId = pInfo->blocks;
pthread_mutex_unlock(&pVnode->vmutex);
int32_t commitInProcess = 0; if (writeIter.slot != pInfo->currentSlot) {
for (int tslot = pInfo->currentSlot; tslot != writeIter.slot;) {
int nextSlot = (tslot + 1) % pInfo->maxBlocks;
pInfo->cacheBlocks[nextSlot] = pInfo->cacheBlocks[tslot];
pInfo->cacheBlocks[nextSlot]->slot = nextSlot;
pInfo->cacheBlocks[nextSlot]->blockId = tblockId--;
tslot = (tslot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
}
pthread_mutex_lock(&pPool->vmutex); int index = pNewBlock->index;
if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { if (cacheIter.slot == writeIter.slot) {
pthread_mutex_unlock(&pPool->vmutex); pNewBlock->numOfPoints = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints;
int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos;
if (pointsLeft > 0) {
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy((void *)(pNewBlock->offset[col]),
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes * pointsLeft);
}
}
}
pNewBlock->blockId = tblockId;
pNewBlock->slot = newSlot;
pNewBlock->index = index;
pInfo->cacheBlocks[newSlot] = pNewBlock;
pInfo->numOfBlocks++;
pInfo->unCommittedBlocks++;
pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks;
pthread_mutex_unlock(&(pPool->vmutex));
cacheIter.slot = (cacheIter.slot + 1) % pInfo->maxBlocks;
// move a cache of data forward
availPoints = pObj->pointsPerBlock;
}
pNew = (SImportInfo *)malloc(sizeof(SImportInfo)); int offset = 0;
memcpy(pNew, &import, sizeof(SImportInfo)); for (int col = 0; col < pObj->numOfColumns; col++) {
pNew->signature = pNew; memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
int payloadLen = contLen - sizeof(SSubmitMsg); payload + pObj->bytesPerPoint * payloadIter + offset, pObj->schema[col].bytes);
pNew->payload = malloc(payloadLen); offset += pObj->schema[col].bytes;
pNew->opayload = pNew->payload; }
memcpy(pNew->payload, payload, payloadLen); if (spayloadIter == payloadIter) {// update pVnode->firstKey
pthread_mutex_lock(&(pVnode->vmutex));
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < pVnode->firstKey) pVnode->firstKey = firstKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
if (isCacheIterEnd) {
pObj->lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
if (!isAppendData) isAppendData = 1;
}
dTrace("vid:%d sid:%d id:%s, import later, commit in process:%d, numOfQueries:%d", pObj->vnode, pObj->sid, rowsImported++;
pObj->meterId, commitInProcess, pObj->numOfQueries); availPoints--;
payloadIter++;
taosTmrStart(vnodeProcessImportTimer, 10, pNew, vnodeTmrCtrl);
return 0;
} else { } else {
pPool->commitInProcess = 1; payloadIter++;
pthread_mutex_unlock(&pPool->vmutex); continue;
int code = vnodeImportData(pObj, &import);
if (pShell) {
pShell->code = code;
pShell->numOfTotalPoints += import.importedRows;
} }
pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows;
} }
if (pBuffer->spos != pBuffer->epos) {
if (writeIter.pos == pObj->pointsPerBlock) {
writeIter.pos = 0;
writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks;
}
while (pBuffer->spos != pBuffer->epos) {
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pInfo->cacheBlocks[writeIter.slot]->offset[col] + pObj->schema[col].bytes * writeIter.pos,
pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes);
} }
pVnode->version++; if (writeIter.pos + 1 < pObj->pointsPerBlock) {
writeIter.pos++;
} else {
pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos + 1;
writeIter.slot = (writeIter.slot + 1) % pInfo->maxBlocks;
writeIter.pos = 0;
}
if (pShell) { pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows;
pShell->count--;
if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pShell, pShell->code, pShell->numOfTotalPoints);
} }
return 0; if (writeIter.pos != 0) pInfo->cacheBlocks[writeIter.slot]->numOfPoints = writeIter.pos;
}
if (isAppendData) {
pthread_mutex_lock(&(pVnode->vmutex));
if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
}
pImport->importedRows += rowsImported;
code = 0;
_exit:
tfree(pBuffer);
return code;
} }
//todo abort from the procedure if the meter is going to be dropped int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) {
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) {
int code = 0; int code = 0;
// TODO : Check the correctness of pObj and pVnode
SMeterObj *pObj = (SMeterObj *)(pImport->pObj);
SVnodeObj *pVnode = vnodeList + pObj->vnode;
int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
int sfid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0) / delta;
int efid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1) / delta;
if (pImport->lastKey > pObj->lastKeyOnFile) { for (int fid = sfid; fid <= efid; fid++) {
code = vnodeImportWholeToCache(pImport, pImport->payload, pImport->rows); TSKEY skey = fid * delta;
} else if (pImport->lastKey < pObj->lastKeyOnFile) { TSKEY ekey = skey + delta - 1;
code = vnodeImportWholeToFile(pImport, pImport->payload, pImport->rows); int srow = 0, nrows = 0;
} else { // lastKey == pObj->lastkeyOnFile
code = vnodeImportStartToFile(pImport, pImport->payload, pImport->rows); if (vnodeSearchKeyInRange(payload, pObj->bytesPerPoint, rows, skey, ekey, &srow, &nrows) < 0) continue;
assert(nrows > 0);
dTrace("vid:%d sid:%d meterId:%s, %d rows of data will be imported to file %d, srow:%d firstKey:%ld lastKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, nrows, fid, srow, KEY_AT_INDEX(payload, pObj->bytesPerPoint, srow),
KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1)));
code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid);
if (code != 0) break;
} }
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; return code;
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; }
// TODO : add offset in pShell to make it avoid repeatedly deal with messages
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) {
int code = 0;
int srow = 0, nrows = 0;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
// 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache
if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX,
&srow, &nrows) >= 0) {
code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
if (pImport->commit) { // Need to commit now
pPool->commitInProcess = 0; pPool->commitInProcess = 0;
vnodeProcessCommitTimer(pVnode, NULL);
return code;
}
if (pImport->commit) vnodeProcessCommitTimer(pVnode, NULL); if (code != 0) return code;
}
// 2. import data (0, pObj->lastKeyOnFile) into files
if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow,
&nrows) >= 0) {
code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
}
pPool->commitInProcess = 0;
return code; return code;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册