提交 0d963c5a 编写于 作者: H hjxilinx

[jira none]

...@@ -56,6 +56,7 @@ static int32_t tscToInteger(SSQLToken *pToken, int64_t *value, char **endPtr) { ...@@ -56,6 +56,7 @@ static int32_t tscToInteger(SSQLToken *pToken, int64_t *value, char **endPtr) {
radix = 2; radix = 2;
} }
errno = 0;
*value = strtoll(pToken->z, endPtr, radix); *value = strtoll(pToken->z, endPtr, radix);
return numType; return numType;
...@@ -66,6 +67,8 @@ static int32_t tscToDouble(SSQLToken *pToken, double *value, char **endPtr) { ...@@ -66,6 +67,8 @@ static int32_t tscToDouble(SSQLToken *pToken, double *value, char **endPtr) {
if (TK_ILLEGAL == numType) { if (TK_ILLEGAL == numType) {
return numType; return numType;
} }
errno = 0;
*value = strtod(pToken->z, endPtr); *value = strtod(pToken->z, endPtr);
return numType; return numType;
} }
......
...@@ -2546,6 +2546,10 @@ int32_t setShowInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { ...@@ -2546,6 +2546,10 @@ int32_t setShowInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) {
} }
} }
}else if (type == SHOW_VNODES) { }else if (type == SHOW_VNODES) {
if (NULL == pInfo->pDCLInfo) {
return invalidSqlErrMsg(pCmd, "No specified ip of dnode");
}
// show vnodes may be ip addr of dnode in payload // show vnodes may be ip addr of dnode in payload
if (pInfo->pDCLInfo->nTokens > 0) { if (pInfo->pDCLInfo->nTokens > 0) {
SSQLToken* pDnodeIp = &pInfo->pDCLInfo->a[0]; SSQLToken* pDnodeIp = &pInfo->pDCLInfo->a[0];
......
...@@ -64,8 +64,8 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const ...@@ -64,8 +64,8 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const
#ifdef CLUSTER #ifdef CLUSTER
if (ip && ip[0]) { if (ip && ip[0]) {
strcpy(tscMgmtIpList.ipstr[0], ip); strcpy(tscMgmtIpList.ipstr[1], ip);
tscMgmtIpList.ip[0] = inet_addr(ip); tscMgmtIpList.ip[1] = inet_addr(ip);
} }
#else #else
if (ip && ip[0]) { if (ip && ip[0]) {
......
...@@ -68,6 +68,8 @@ ...@@ -68,6 +68,8 @@
#define HTTP_COMPRESS_IDENTITY 0 #define HTTP_COMPRESS_IDENTITY 0
#define HTTP_COMPRESS_GZIP 2 #define HTTP_COMPRESS_GZIP 2
#define HTTP_SESSION_ID_LEN (TSDB_USER_LEN * 2 + 1)
typedef enum { typedef enum {
HTTP_CONTEXT_STATE_READY, HTTP_CONTEXT_STATE_READY,
HTTP_CONTEXT_STATE_HANDLING, HTTP_CONTEXT_STATE_HANDLING,
...@@ -83,7 +85,7 @@ typedef struct { ...@@ -83,7 +85,7 @@ typedef struct {
int expire; int expire;
int access; int access;
void *taos; void *taos;
char id[TSDB_USER_LEN]; char id[HTTP_SESSION_ID_LEN + 1];
} HttpSession; } HttpSession;
typedef enum { typedef enum {
......
...@@ -50,6 +50,7 @@ bool httpParseBasicAuthToken(HttpContext *pContext, char *token, int len) { ...@@ -50,6 +50,7 @@ bool httpParseBasicAuthToken(HttpContext *pContext, char *token, int len) {
return false; return false;
} }
strncpy(pContext->user, base64, (size_t)user_len); strncpy(pContext->user, base64, (size_t)user_len);
pContext->user[user_len] = 0;
char *password = user + 1; char *password = user + 1;
int pass_len = (int)((base64 + outlen) - password); int pass_len = (int)((base64 + outlen) - password);
...@@ -60,6 +61,7 @@ bool httpParseBasicAuthToken(HttpContext *pContext, char *token, int len) { ...@@ -60,6 +61,7 @@ bool httpParseBasicAuthToken(HttpContext *pContext, char *token, int len) {
return false; return false;
} }
strncpy(pContext->pass, password, (size_t)pass_len); strncpy(pContext->pass, password, (size_t)pass_len);
pContext->pass[pass_len] = 0;
free(base64); free(base64);
httpTrace("context:%p, fd:%d, ip:%s, basic token parsed success, user:%s", pContext, pContext->fd, pContext->ipstr, httpTrace("context:%p, fd:%d, ip:%s, basic token parsed success, user:%s", pContext, pContext->fd, pContext->ipstr,
......
...@@ -69,7 +69,7 @@ char* httpMsg[] = { ...@@ -69,7 +69,7 @@ char* httpMsg[] = {
"field value type should be number or string", "field value type should be number or string",
"field value is null", // 51 "field value is null", // 51
"parse basic auth token error", "parse basic auth token error",
"parse taosd auth token error", "parse http auth token error",
"host type should be string", "host type should be string",
// grafana // grafana
......
...@@ -41,8 +41,8 @@ void httpCreateSession(HttpContext *pContext, void *taos) { ...@@ -41,8 +41,8 @@ void httpCreateSession(HttpContext *pContext, void *taos) {
pthread_mutex_lock(&server->serverMutex); pthread_mutex_lock(&server->serverMutex);
if (pContext->session != NULL && pContext->session == pContext->session->signature) { if (pContext->session != NULL && pContext->session == pContext->session->signature) {
httpTrace("context:%p, fd:%d, ip:%s, user:%s, set exist session:%p:%s:%p expired", pContext, pContext->fd, httpTrace("context:%p, fd:%d, ip:%s, user:%s, set exist session:%p:%p expired", pContext, pContext->fd,
pContext->ipstr, pContext->user, pContext->session, pContext->session->id, pContext->session->taos); pContext->ipstr, pContext->user, pContext->session, pContext->session->taos);
pContext->session->expire = 0; pContext->session->expire = 0;
pContext->session->access--; pContext->session->access--;
} }
...@@ -51,7 +51,7 @@ void httpCreateSession(HttpContext *pContext, void *taos) { ...@@ -51,7 +51,7 @@ void httpCreateSession(HttpContext *pContext, void *taos) {
session.taos = taos; session.taos = taos;
session.expire = (int)taosGetTimestampSec() + server->sessionExpire; session.expire = (int)taosGetTimestampSec() + server->sessionExpire;
session.access = 1; session.access = 1;
strcpy(session.id, pContext->user); snprintf(session.id, HTTP_SESSION_ID_LEN, "%s.%s", pContext->user, pContext->pass);
pContext->session = (HttpSession *)taosAddStrHash(server->pSessionHash, session.id, (char *)(&session)); pContext->session = (HttpSession *)taosAddStrHash(server->pSessionHash, session.id, (char *)(&session));
if (pContext->session == NULL) { if (pContext->session == NULL) {
httpError("context:%p, fd:%d, ip:%s, user:%s, error:%s", pContext, pContext->fd, pContext->ipstr, pContext->user, httpError("context:%p, fd:%d, ip:%s, user:%s, error:%s", pContext, pContext->fd, pContext->ipstr, pContext->user,
...@@ -62,20 +62,23 @@ void httpCreateSession(HttpContext *pContext, void *taos) { ...@@ -62,20 +62,23 @@ void httpCreateSession(HttpContext *pContext, void *taos) {
} }
pContext->session->signature = pContext->session; pContext->session->signature = pContext->session;
httpTrace("context:%p, fd:%d, ip:%s, user:%s, create a new session:%p:%s:%p", pContext, pContext->fd, pContext->ipstr, httpTrace("context:%p, fd:%d, ip:%s, user:%s, create a new session:%p:%p", pContext, pContext->fd, pContext->ipstr,
pContext->user, pContext->session, pContext->session->id, pContext->session->taos); pContext->user, pContext->session, pContext->session->taos);
pthread_mutex_unlock(&server->serverMutex); pthread_mutex_unlock(&server->serverMutex);
} }
void httpFetchSession(HttpContext *pContext) { void httpFetchSessionImp(HttpContext *pContext) {
HttpServer *server = pContext->pThread->pServer; HttpServer *server = pContext->pThread->pServer;
pthread_mutex_lock(&server->serverMutex); pthread_mutex_lock(&server->serverMutex);
pContext->session = (HttpSession *)taosGetStrHashData(server->pSessionHash, pContext->user); char sessionId[HTTP_SESSION_ID_LEN];
snprintf(sessionId, HTTP_SESSION_ID_LEN, "%s.%s", pContext->user, pContext->pass);
pContext->session = (HttpSession *)taosGetStrHashData(server->pSessionHash, sessionId);
if (pContext->session != NULL && pContext->session == pContext->session->signature) { if (pContext->session != NULL && pContext->session == pContext->session->signature) {
pContext->session->access++; pContext->session->access++;
httpTrace("context:%p, fd:%d, ip:%s, user:%s, find an exist session:%p:%s:%p, access:%d, expire:%d", httpTrace("context:%p, fd:%d, ip:%s, user:%s, find an exist session:%p:%p, access:%d, expire:%d",
pContext, pContext->fd, pContext->ipstr, pContext->user, pContext->session, pContext->session->id, pContext, pContext->fd, pContext->ipstr, pContext->user, pContext->session,
pContext->session->taos, pContext->session->access, pContext->session->expire); pContext->session->taos, pContext->session->access, pContext->session->expire);
pContext->session->expire = (int)taosGetTimestampSec() + server->sessionExpire; pContext->session->expire = (int)taosGetTimestampSec() + server->sessionExpire;
} else { } else {
...@@ -86,6 +89,20 @@ void httpFetchSession(HttpContext *pContext) { ...@@ -86,6 +89,20 @@ void httpFetchSession(HttpContext *pContext) {
pthread_mutex_unlock(&server->serverMutex); pthread_mutex_unlock(&server->serverMutex);
} }
void httpFetchSession(HttpContext *pContext) {
if (pContext->session == NULL) {
httpFetchSessionImp(pContext);
} else {
char sessionId[HTTP_SESSION_ID_LEN];
snprintf(sessionId, HTTP_SESSION_ID_LEN, "%s.%s", pContext->user, pContext->pass);
if (strcmp(pContext->session->id, sessionId) != 0) {
httpError("context:%p, fd:%d, ip:%s, user:%s, password may be changed", pContext, pContext->fd, pContext->ipstr, pContext->user);
httpRestoreSession(pContext);
httpFetchSessionImp(pContext);
}
}
}
void httpRestoreSession(HttpContext *pContext) { void httpRestoreSession(HttpContext *pContext) {
HttpServer * server = pContext->pThread->pServer; HttpServer * server = pContext->pThread->pServer;
...@@ -97,15 +114,16 @@ void httpRestoreSession(HttpContext *pContext) { ...@@ -97,15 +114,16 @@ void httpRestoreSession(HttpContext *pContext) {
return; return;
} }
session->access--; session->access--;
httpTrace("context:%p, ip:%s, user:%s, restore session:%p:%s:%p, access:%d, expire:%d", httpTrace("context:%p, ip:%s, user:%s, restore session:%p:%p, access:%d, expire:%d",
pContext, pContext->ipstr, pContext->user, session, session->id, session->taos, pContext, pContext->ipstr, pContext->user, session, session->taos,
session->access, pContext->session->expire); session->access, pContext->session->expire);
pContext->session = NULL;
pthread_mutex_unlock(&server->serverMutex); pthread_mutex_unlock(&server->serverMutex);
} }
void httpResetSession(char *session) { void httpResetSession(char *session) {
HttpSession *pSession = (HttpSession *)session; HttpSession *pSession = (HttpSession *)session;
httpTrace("close session:%p:%s:%p", pSession, pSession->id, pSession->taos); httpTrace("close session:%p:%p", pSession, pSession->taos);
if (pSession->taos != NULL) { if (pSession->taos != NULL) {
taos_close(pSession->taos); taos_close(pSession->taos);
pSession->taos = NULL; pSession->taos = NULL;
...@@ -144,12 +162,12 @@ int httpSessionExpired(char *session) { ...@@ -144,12 +162,12 @@ int httpSessionExpired(char *session) {
return 0; // un-expired, so return false return 0; // un-expired, so return false
} }
if (pSession->access > 0) { if (pSession->access > 0) {
httpTrace("session:%p:%s:%p is expired, but still access:%d", pSession, pSession->id, pSession->taos, httpTrace("session:%p:%p is expired, but still access:%d", pSession, pSession->taos,
pSession->access); pSession->access);
return 0; // still used, so return false return 0; // still used, so return false
} }
httpTrace("need close session:%p:%s:%p for it expired, cur:%d, expire:%d, invertal:%d", httpTrace("need close session:%p:%p for it expired, cur:%d, expire:%d, invertal:%d",
pSession, pSession->id, pSession->taos, cur, pSession->expire, cur - pSession->expire); pSession, pSession->taos, cur, pSession->expire, cur - pSession->expire);
} }
return 1; return 1;
......
...@@ -378,9 +378,7 @@ void httpProcessRequestCb(void *param, TAOS_RES *result, int code) { ...@@ -378,9 +378,7 @@ void httpProcessRequestCb(void *param, TAOS_RES *result, int code) {
} }
void httpProcessRequest(HttpContext *pContext) { void httpProcessRequest(HttpContext *pContext) {
if (pContext->session == NULL) {
httpFetchSession(pContext); httpFetchSession(pContext);
}
if (pContext->session == NULL || pContext->session != pContext->session->signature || if (pContext->session == NULL || pContext->session != pContext->session->signature ||
pContext->reqType == HTTP_REQTYPE_LOGIN) { pContext->reqType == HTTP_REQTYPE_LOGIN) {
......
...@@ -476,6 +476,8 @@ int mgmtRetrieveVnodes(SShowObj *pShow, char *data, int rows, SConnObj *pConn) { ...@@ -476,6 +476,8 @@ int mgmtRetrieveVnodes(SShowObj *pShow, char *data, int rows, SConnObj *pConn) {
continue; continue;
} }
cols = 0;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
*(uint32_t *)pWrite = pVnode->vnode; *(uint32_t *)pWrite = pVnode->vnode;
cols++; cols++;
......
...@@ -660,7 +660,7 @@ int mgmtCreateMeter(SDbObj *pDb, SCreateTableMsg *pCreate) { ...@@ -660,7 +660,7 @@ int mgmtCreateMeter(SDbObj *pDb, SCreateTableMsg *pCreate) {
pMeter->uid = (((uint64_t)pMeter->gid.vgId) << 40) + ((((uint64_t)pMeter->gid.sid) & ((1ul << 24) - 1ul)) << 16) + pMeter->uid = (((uint64_t)pMeter->gid.vgId) << 40) + ((((uint64_t)pMeter->gid.sid) & ((1ul << 24) - 1ul)) << 16) +
((uint64_t)sdbVersion & ((1ul << 16) - 1ul)); ((uint64_t)sdbVersion & ((1ul << 16) - 1ul));
mTrace("table:%s, create table in vgroup, vgId:%d sid:%d vnode:%d uid:%ld db:%s", mTrace("table:%s, create table in vgroup, vgId:%d sid:%d vnode:%d uid:%llu db:%s",
pMeter->meterId, pVgroup->vgId, sid, pVgroup->vnodeGid[0].vnode, pMeter->uid, pDb->name); pMeter->meterId, pVgroup->vgId, sid, pVgroup->vnodeGid[0].vnode, pMeter->uid, pDb->name);
} else { } else {
pMeter->uid = (((uint64_t)pMeter->createdTime) << 16) + ((uint64_t)sdbVersion & ((1ul << 16) - 1ul)); pMeter->uid = (((uint64_t)pMeter->createdTime) << 16) + ((uint64_t)sdbVersion & ((1ul << 16) - 1ul));
......
...@@ -372,13 +372,60 @@ void vnodeCancelCommit(SVnodeObj *pVnode) { ...@@ -372,13 +372,60 @@ void vnodeCancelCommit(SVnodeObj *pVnode) {
taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer);
} }
/* The vnode cache lock should be hold before calling this interface
*/
SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
SVnodeCfg *pCfg = &(pVnode->cfg);
SCacheBlock *pCacheBlock = NULL;
int skipped = 0;
while (1) {
pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]);
if (pCacheBlock->blockId == 0) break;
if (pCacheBlock->notFree) {
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
if (skipped > pPool->threshold) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d committing process is too slow, notFreeSlots:%d....", pVnode->vnode, pPool->notFreeSlots);
return NULL;
}
} else {
SMeterObj * pRelObj = pCacheBlock->pMeterObj;
SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache;
int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks;
pCacheBlock = pRelInfo->cacheBlocks[firstSlot];
if (pCacheBlock) {
pPool->freeSlot = pCacheBlock->index;
vnodeFreeCacheBlock(pCacheBlock);
break;
} else {
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
}
}
}
pCacheBlock = (SCacheBlock *)(pPool->pMem[pPool->freeSlot]);
pCacheBlock->index = pPool->freeSlot;
pCacheBlock->notFree = 1;
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
pPool->notFreeSlots++;
return pCacheBlock;
}
int vnodeAllocateCacheBlock(SMeterObj *pObj) { int vnodeAllocateCacheBlock(SMeterObj *pObj) {
int index; int index;
SCachePool * pPool; SCachePool * pPool;
SCacheBlock *pCacheBlock; SCacheBlock *pCacheBlock;
SCacheInfo * pInfo; SCacheInfo * pInfo;
SVnodeObj * pVnode; SVnodeObj * pVnode;
int skipped = 0, commit = 0; int commit = 0;
pVnode = vnodeList + pObj->vnode; pVnode = vnodeList + pObj->vnode;
pPool = (SCachePool *)pVnode->pCachePool; pPool = (SCachePool *)pVnode->pCachePool;
...@@ -406,45 +453,10 @@ int vnodeAllocateCacheBlock(SMeterObj *pObj) { ...@@ -406,45 +453,10 @@ int vnodeAllocateCacheBlock(SMeterObj *pObj) {
return -1; return -1;
} }
while (1) { if ((pCacheBlock = vnodeGetFreeCacheBlock(pVnode)) == NULL) return -1;
pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]);
if (pCacheBlock->blockId == 0) break;
if (pCacheBlock->notFree) {
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
if (skipped > pPool->threshold) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d sid:%d id:%s, committing process is too slow, notFreeSlots:%d....",
pObj->vnode, pObj->sid, pObj->meterId, pPool->notFreeSlots);
return -1;
}
} else {
SMeterObj *pRelObj = pCacheBlock->pMeterObj;
SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache;
int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks;
pCacheBlock = pRelInfo->cacheBlocks[firstSlot];
if (pCacheBlock) {
pPool->freeSlot = pCacheBlock->index;
vnodeFreeCacheBlock(pCacheBlock);
break;
} else {
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
}
}
}
index = pPool->freeSlot;
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
pPool->notFreeSlots++;
index = pCacheBlock->index;
pCacheBlock->pMeterObj = pObj; pCacheBlock->pMeterObj = pObj;
pCacheBlock->notFree = 1;
pCacheBlock->index = index;
pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *); pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *);
for (int col = 1; col < pObj->numOfColumns; ++col) for (int col = 1; col < pObj->numOfColumns; ++col)
......
...@@ -95,8 +95,8 @@ void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead, ...@@ -95,8 +95,8 @@ void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead,
} }
void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) { void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) {
sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId); if (nHeadName != NULL) sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId);
sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId); if (nLastName != NULL) sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId);
} }
void vnodeCreateDataDirIfNeeded(int vnode, char *path) { void vnodeCreateDataDirIfNeeded(int vnode, char *path) {
...@@ -180,29 +180,24 @@ int vnodeCreateEmptyCompFile(int vnode, int fileId) { ...@@ -180,29 +180,24 @@ int vnodeCreateEmptyCompFile(int vnode, int fileId) {
return 0; return 0;
} }
int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) {
char name[TSDB_FILENAME_LEN]; int numOfFiles = 0, fileId, filesAdded = 0;
char dHeadName[TSDB_FILENAME_LEN] = "\0";
char dLastName[TSDB_FILENAME_LEN] = "\0";
int len = 0;
struct stat filestat;
int vnode = pVnode->vnode; int vnode = pVnode->vnode;
int fileId, numOfFiles, filesAdded = 0; SVnodeCfg *pCfg = &(pVnode->cfg);
SVnodeCfg * pCfg = &pVnode->cfg;
if (pVnode->lastKeyOnFile == 0) { if (pVnode->lastKeyOnFile == 0) {
if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10; if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10;
pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; pVnode->fileId = pVnode->firstKey / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile;
pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1; pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[pVnode->cfg.precision] - 1;
pVnode->numOfFiles = 1; pVnode->numOfFiles = 1;
vnodeCreateEmptyCompFile(vnode, pVnode->fileId); if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1;
} }
numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile; numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[pVnode->cfg.precision] / pCfg->daysPerFile;
if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1; if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1;
dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", dTrace("vid:%d, commitFirstKey:%ld lastKeyOnFile:%ld numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode,
vnode, pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles); pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles);
if (numOfFiles >= pVnode->numOfFiles) { if (numOfFiles >= pVnode->numOfFiles) {
// create empty header files backward // create empty header files backward
...@@ -238,6 +233,24 @@ int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) { ...@@ -238,6 +233,24 @@ int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) {
pVnode->commitFileId = fileId; pVnode->commitFileId = fileId;
pVnode->numOfFiles = pVnode->numOfFiles + filesAdded; pVnode->numOfFiles = pVnode->numOfFiles + filesAdded;
return 0;
}
int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) {
char name[TSDB_FILENAME_LEN];
char dHeadName[TSDB_FILENAME_LEN] = "\0";
char dLastName[TSDB_FILENAME_LEN] = "\0";
int len = 0;
struct stat filestat;
int vnode = pVnode->vnode;
int fileId, numOfFiles, filesAdded = 0;
SVnodeCfg * pCfg = &pVnode->cfg;
if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1;
fileId = pVnode->commitFileId;
dTrace("vid:%d, commit fileId:%d, commitLastKey:%ld, vnodeLastKey:%ld, lastKeyOnFile:%ld numOfFiles:%d", dTrace("vid:%d, commit fileId:%d, commitLastKey:%ld, vnodeLastKey:%ld, lastKeyOnFile:%ld numOfFiles:%d",
vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles); vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles);
...@@ -1817,7 +1830,15 @@ int vnodeInitFile(int vnode) { ...@@ -1817,7 +1830,15 @@ int vnodeInitFile(int vnode) {
pVnode->fmagic = (uint64_t *)calloc(pVnode->maxFiles + 1, sizeof(uint64_t)); pVnode->fmagic = (uint64_t *)calloc(pVnode->maxFiles + 1, sizeof(uint64_t));
int fileId = pVnode->fileId; int fileId = pVnode->fileId;
for (int i = 0; i < pVnode->numOfFiles; ++i) { /*
* The actual files will far exceed the files that need to exist
*/
if (pVnode->numOfFiles > pVnode->maxFiles) {
dError("vid:%d numOfFiles:%d should not larger than maxFiles:%d", vnode, pVnode->numOfFiles, pVnode->maxFiles);
}
int numOfFiles = MIN(pVnode->numOfFiles, pVnode->maxFiles);
for (int i = 0; i < numOfFiles; ++i) {
if (vnodeUpdateFileMagic(vnode, fileId) < 0) { if (vnodeUpdateFileMagic(vnode, fileId) < 0) {
if (pVnode->cfg.replications > 1) { if (pVnode->cfg.replications > 1) {
pVnode->badFileId = fileId; pVnode->badFileId = fileId;
......
...@@ -16,27 +16,20 @@ ...@@ -16,27 +16,20 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "trpc.h"
#include "ttimer.h"
#include "vnode.h" #include "vnode.h"
#include "vnodeMgmt.h"
#include "vnodeShell.h"
#include "vnodeShell.h"
#include "vnodeUtil.h" #include "vnodeUtil.h"
#pragma GCC diagnostic ignored "-Wpointer-sign"
#pragma GCC diagnostic ignored "-Wint-conversion"
typedef struct { extern void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId);
SCompHeader *headList; extern int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize,
SCompInfo compInfo; char *temp, char *buffer, int bufferSize);
int last; // 0:last block in data file, 1:not the last block extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints);
int newBlocks; extern void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId);
int oldNumOfBlocks; extern int vnodeCreateEmptyCompFile(int vnode, int fileId);
int64_t compInfoOffset; // offset for compInfo in head file extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode);
int64_t leftOffset; // copy from this offset to end of head file extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode);
int64_t hfdSize; // old head file size extern int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode);
} SHeadInfo;
#define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx)))
typedef struct { typedef struct {
void * signature; void * signature;
SShellObj *pShell; SShellObj *pShell;
...@@ -53,952 +46,1507 @@ typedef struct { ...@@ -53,952 +46,1507 @@ typedef struct {
// only for file // only for file
int numOfPoints; int numOfPoints;
int fileId;
int64_t offset; // offset in data file int64_t offset; // offset in data file
SData *sdata[TSDB_MAX_COLUMNS]; char * payload;
char *buffer; char * opayload; // allocated space for payload from client
char *payload;
char *opayload;
int rows; int rows;
} SImportInfo; } SImportInfo;
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport); typedef struct {
// in .head file
int vnodeGetImportStartPart(SMeterObj *pObj, char *payload, int rows, TSKEY key1) { SCompHeader *pHeader;
int i; size_t pHeaderSize;
for (i = 0; i < rows; ++i) {
TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint));
if (key >= key1) break;
}
return i;
}
int vnodeGetImportEndPart(SMeterObj *pObj, char *payload, int rows, char **pStart, TSKEY key0) {
int i;
for (i = 0; i < rows; ++i) {
TSKEY key = *((TSKEY *)(payload + i * pObj->bytesPerPoint));
if (key > key0) break;
}
*pStart = payload + i * pObj->bytesPerPoint;
return rows - i;
}
int vnodeCloseFileForImport(SMeterObj *pObj, SHeadInfo *pHinfo) {
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
TSCKSUM chksum = 0;
if (pHinfo->newBlocks == 0 || pHinfo->compInfoOffset == 0) return 0;
if (pHinfo->oldNumOfBlocks == 0) twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM));
int leftSize = pHinfo->hfdSize - pHinfo->leftOffset;
if (leftSize > 0) {
lseek(pVnode->hfd, pHinfo->leftOffset, SEEK_SET);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, leftSize);
}
pHinfo->compInfo.numOfBlocks += pHinfo->newBlocks; SCompInfo compInfo;
int offset = (pHinfo->compInfo.numOfBlocks - pHinfo->oldNumOfBlocks) * sizeof(SCompBlock); SCompBlock *pBlocks;
if (pHinfo->oldNumOfBlocks == 0) offset += sizeof(SCompInfo) + sizeof(TSCKSUM); // in .data file
int blockId;
uint8_t blockLoadState;
pHinfo->headList[pObj->sid].compInfoOffset = pHinfo->compInfoOffset; SField *pField;
for (int sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid) { size_t pFieldSize;
if (pHinfo->headList[sid].compInfoOffset) pHinfo->headList[sid].compInfoOffset += offset;
}
lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET); SData *data[TSDB_MAX_COLUMNS];
int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM); char * buffer;
taosCalcChecksumAppend(0, (uint8_t *)pHinfo->headList, tmsize);
twrite(pVnode->nfd, pHinfo->headList, tmsize);
int size = pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock); char *temp;
char *buffer = malloc(size);
lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET);
read(pVnode->nfd, buffer, size);
SCompBlock *pBlock = (SCompBlock *)(buffer + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock));
pHinfo->compInfo.uid = pObj->uid; char * tempBuffer;
pHinfo->compInfo.delimiter = TSDB_VNODE_DELIMITER; size_t tempBufferSize;
pHinfo->compInfo.last = pBlock->last; // Variables for sendfile
int64_t compInfoOffset;
int64_t nextNo0Offset; // next sid whose compInfoOffset > 0
int64_t hfSize;
int64_t driftOffset;
taosCalcChecksumAppend(0, (uint8_t *)(&pHinfo->compInfo), sizeof(SCompInfo)); int oldNumOfBlocks;
lseek(pVnode->nfd, pHinfo->compInfoOffset, SEEK_SET); int newNumOfBlocks;
twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo)); int last;
} SImportHandle;
chksum = taosCalcChecksum(0, (uint8_t *)buffer, size); typedef struct {
lseek(pVnode->nfd, pHinfo->compInfoOffset + sizeof(SCompInfo) + size, SEEK_SET); int slot;
twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM)); int pos;
free(buffer); int oslot; // old slot
TSKEY nextKey;
} SBlockIter;
vnodeCloseCommitFiles(pVnode); typedef struct {
int64_t spos;
int64_t epos;
int64_t totalRows;
char * offset[];
} SMergeBuffer;
return 0; int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport);
}
int vnodeProcessLastBlock(SImportInfo *pImport, SHeadInfo *pHinfo, SData *data[]) { int vnodeFindKeyInCache(SImportInfo *pImport, int order) {
SMeterObj *pObj = pImport->pObj; SMeterObj * pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SCompBlock lastBlock;
int code = 0; int code = 0;
SQuery query;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
if (pHinfo->compInfo.last == 0) return 0; TSKEY key = order ? pImport->firstKey : pImport->lastKey;
memset(&query, 0, sizeof(query));
// read into memory query.order.order = order;
uint64_t offset = query.skey = key;
pHinfo->compInfoOffset + (pHinfo->compInfo.numOfBlocks - 1) * sizeof(SCompBlock) + sizeof(SCompInfo); query.ekey = order ? pImport->lastKey : pImport->firstKey;
lseek(pVnode->hfd, offset, SEEK_SET); vnodeSearchPointInCache(pObj, &query);
read(pVnode->hfd, &lastBlock, sizeof(SCompBlock));
assert(lastBlock.last);
if (lastBlock.sversion != pObj->sversion) {
lseek(pVnode->lfd, lastBlock.offset, SEEK_SET);
lastBlock.offset = lseek(pVnode->dfd, 0, SEEK_END);
tsendfile(pVnode->dfd, pVnode->lfd, NULL, lastBlock.len);
lastBlock.last = 0; if (query.slot < 0) {
lseek(pVnode->hfd, offset, SEEK_SET); pImport->slot = pInfo->commitSlot;
twrite(pVnode->hfd, &lastBlock, sizeof(SCompBlock)); if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
pImport->key = 0;
dTrace("vid:%d sid:%d id:%s, key:%ld, import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key);
code = 0;
} else { } else {
vnodeReadLastBlockToMem(pObj, &lastBlock, data); pImport->slot = query.slot;
pHinfo->compInfo.numOfBlocks--; pImport->pos = query.pos;
code = lastBlock.numOfPoints; pImport->key = query.key;
}
return code;
}
int vnodeOpenFileForImport(SImportInfo *pImport, char *payload, SHeadInfo *pHinfo, SData *data[]) {
SMeterObj *pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
TSKEY firstKey = *((TSKEY *)payload);
struct stat filestat;
int sid, rowsBefore = 0;
if (pVnode->nfd <= 0 || firstKey > pVnode->commitLastKey) {
if (pVnode->nfd > 0) vnodeCloseFileForImport(pObj, pHinfo);
pVnode->commitFirstKey = firstKey;
if (vnodeOpenCommitFiles(pVnode, pObj->sid) < 0) return -1;
fstat(pVnode->hfd, &filestat);
pHinfo->hfdSize = filestat.st_size;
pHinfo->newBlocks = 0;
pHinfo->last = 1; // by default, new blockes are at the end of block list
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
read(pVnode->hfd, pHinfo->headList, sizeof(SCompHeader) * pCfg->maxSessions);
if (pHinfo->headList[pObj->sid].compInfoOffset > 0) { if (key != query.key) {
lseek(pVnode->hfd, pHinfo->headList[pObj->sid].compInfoOffset, SEEK_SET); if (order == 0) {
if (read(pVnode->hfd, &pHinfo->compInfo, sizeof(SCompInfo)) != sizeof(SCompInfo)) { // since pos is the position which has smaller key, data shall be imported after it
dError("vid:%d sid:%d, failed to read compInfo from file:%s", pObj->vnode, pObj->sid, pVnode->cfn); pImport->pos++;
return -1; if (pImport->pos >= pObj->pointsPerBlock) {
pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
} }
if (pHinfo->compInfo.uid == pObj->uid) {
pHinfo->compInfoOffset = pHinfo->headList[pObj->sid].compInfoOffset;
pHinfo->leftOffset = pHinfo->headList[pObj->sid].compInfoOffset + sizeof(SCompInfo);
} else { } else {
pHinfo->headList[pObj->sid].compInfoOffset = 0; if (pImport->pos < 0) pImport->pos = 0;
} }
} }
code = 0;
if ( pHinfo->headList[pObj->sid].compInfoOffset == 0 ) {
memset(&pHinfo->compInfo, 0, sizeof(SCompInfo));
pHinfo->compInfo.uid = pObj->uid;
for (sid = pObj->sid + 1; sid < pCfg->maxSessions; ++sid)
if (pHinfo->headList[sid].compInfoOffset > 0) break;
pHinfo->compInfoOffset = (sid == pCfg->maxSessions) ? pHinfo->hfdSize : pHinfo->headList[sid].compInfoOffset;
pHinfo->leftOffset = pHinfo->compInfoOffset;
} }
pHinfo->oldNumOfBlocks = pHinfo->compInfo.numOfBlocks; return code;
lseek(pVnode->hfd, 0, SEEK_SET); }
lseek(pVnode->nfd, 0, SEEK_SET);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfoOffset);
twrite(pVnode->nfd, &pHinfo->compInfo, sizeof(SCompInfo));
if (pHinfo->headList[pObj->sid].compInfoOffset > 0) lseek(pVnode->hfd, sizeof(SCompInfo), SEEK_CUR);
if (pVnode->commitFileId < pImport->fileId) {
if (pHinfo->compInfo.numOfBlocks > 0)
pHinfo->leftOffset += pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock);
rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data);
// copy all existing compBlockInfo void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) {
lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET); SVnodeObj *pVnode = vnodeList + vnode;
if (pHinfo->compInfo.numOfBlocks > 0)
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHinfo->compInfo.numOfBlocks * sizeof(SCompBlock));
} else if (pVnode->commitFileId == pImport->fileId) { int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
int slots = pImport->pos ? pImport->slot + 1 : pImport->slot; int fid = now / delta;
pHinfo->leftOffset += slots * sizeof(SCompBlock); *minKey = (fid - pVnode->maxFiles + 1) * delta;
*maxKey = (fid + 2) * delta - 1;
return;
}
// check if last block is at last file, if it is, read into memory int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion,
if (pImport->pos == 0 && pHinfo->compInfo.numOfBlocks > 0 && pImport->slot == pHinfo->compInfo.numOfBlocks && int *pNumOfPoints, TSKEY now) {
pHinfo->compInfo.last) { SSubmitMsg *pSubmit = (SSubmitMsg *)cont;
rowsBefore = vnodeProcessLastBlock(pImport, pHinfo, data); SVnodeObj * pVnode = vnodeList + pObj->vnode;
if ( rowsBefore > 0 ) pImport->slot--; int rows = 0;
} char * payload = NULL;
int code = TSDB_CODE_SUCCESS;
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
SShellObj * pShell = (SShellObj *)param;
TSKEY firstKey, lastKey;
// this block will be replaced by new blocks payload = pSubmit->payLoad;
if (pImport->pos > 0) pHinfo->compInfo.numOfBlocks--;
if (pImport->slot > 0) { rows = htons(pSubmit->numOfRows);
lseek(pVnode->hfd, pHinfo->compInfoOffset + sizeof(SCompInfo), SEEK_SET); assert(rows > 0);
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pImport->slot * sizeof(SCompBlock)); int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows);
if (expectedLen != contLen) {
dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId,
expectedLen, contLen);
return TSDB_CODE_WRONG_MSG_SIZE;
} }
if (pImport->slot < pHinfo->compInfo.numOfBlocks) // Check timestamp context.
pHinfo->last = 0; // new blocks are not at the end of block list TSKEY minKey = 0, maxKey = 0;
firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
} else { lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
// nothing assert(firstKey <= lastKey);
vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey);
pHinfo->last = 0; // new blocks are not at the end of block list if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) {
dError(
"vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %ld lastKey: %ld minAllowedKey:%ld "
"maxAllowedKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey);
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
} }
// forward to peers
if (pShell && pVnode->cfg.replications > 1) {
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion);
if (code != 0) return code;
} }
return rowsBefore; if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) {
} if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion);
if (code != 0) return code;
}
extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints); /*
int vnodeImportToFile(SImportInfo *pImport); * The timestamp of all records in a submit payload are always in ascending order, guaranteed by client, so here only
* the first key.
*/
if (firstKey > pObj->lastKey) { // Just call insert
code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, pNumOfPoints, now);
} else { // trigger import
if (sversion != pObj->sversion) {
dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->sversion, sversion);
return TSDB_CODE_OTHERS;
}
void vnodeProcessImportTimer(void *param, void *tmrId) { // check the table status for perform import historical data
SImportInfo *pImport = (SImportInfo *)param; if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING)) != TSDB_CODE_SUCCESS) {
if (pImport == NULL || pImport->signature != param) { return code;
dError("import timer is messed up, signature:%p", pImport);
return;
} }
SMeterObj *pObj = pImport->pObj; SImportInfo import = {0};
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SCachePool *pPool = (SCachePool *)pVnode->pCachePool;
SShellObj *pShell = pImport->pShell;
pImport->retry++; dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%ld, lastKey:%ld, object lastKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey);
int32_t code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING); import.firstKey = firstKey;
if (code == TSDB_CODE_NOT_ACTIVE_TABLE) { import.lastKey = lastKey;
return; import.pObj = pObj;
} import.pShell = pShell;
import.payload = payload;
import.rows = rows;
// FIXME: mutex here seems meaningless and num here still can be changed
int32_t num = 0; int32_t num = 0;
pthread_mutex_lock(&pVnode->vmutex); pthread_mutex_lock(&pVnode->vmutex);
num = pObj->numOfQueries; num = pObj->numOfQueries;
pthread_mutex_unlock(&pVnode->vmutex); pthread_mutex_unlock(&pVnode->vmutex);
//if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY
int32_t commitInProcess = 0; int32_t commitInProcess = 0;
pthread_mutex_lock(&pPool->vmutex); pthread_mutex_lock(&pPool->vmutex);
if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || code == TSDB_CODE_ACTION_IN_PROGRESS) { if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) {
// mutual exclusion with read (need to change here)
pthread_mutex_unlock(&pPool->vmutex); pthread_mutex_unlock(&pPool->vmutex);
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
return TSDB_CODE_ACTION_IN_PROGRESS;
if (pImport->retry < 1000) {
dTrace("vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready."
"commitInProcess:%d, numOfQueries:%d, state:%d", pObj->vnode, pObj->sid, pObj->meterId,
commitInProcess, num, pObj->state);
taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl);
return;
} else {
pShell->code = TSDB_CODE_TOO_SLOW;
}
} else { } else {
pPool->commitInProcess = 1; pPool->commitInProcess = 1;
pthread_mutex_unlock(&pPool->vmutex); pthread_mutex_unlock(&pPool->vmutex);
int32_t ret = vnodeImportData(pObj, pImport); code = vnodeImportData(pObj, &import);
if (pShell) { *pNumOfPoints = import.importedRows;
pShell->code = ret;
pShell->numOfTotalPoints += pImport->importedRows;
}
} }
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
pVnode->version++; pVnode->version++;
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
// send response back to shell
if (pShell) {
pShell->count--;
if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pImport->pShell, pShell->code, pShell->numOfTotalPoints);
} }
pImport->signature = NULL; return code;
free(pImport->opayload);
free(pImport);
} }
int vnodeImportToFile(SImportInfo *pImport) { /* Function to search keys in a range
SMeterObj *pObj = pImport->pObj; *
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; * Assumption: keys in payload are in ascending order
SVnodeCfg *pCfg = &pVnode->cfg; *
SHeadInfo headInfo; * @payload: data records, key in ascending order
int code = 0, col; * @step: bytes each record takes
SCompBlock compBlock; * @rows: number of data records
char * payload = pImport->payload; * @skey: range start (included)
int rows = pImport->rows; * @ekey: range end (included)
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; * @srows: rtype, start index of records
* @nrows: rtype, number of records in range
TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1))); *
TSKEY firstKey = *((TSKEY *)payload); * @rtype: 0 means find data in the range
memset(&headInfo, 0, sizeof(headInfo)); * -1 means find no data in the range
headInfo.headList = malloc(sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM)); */
static int vnodeSearchKeyInRange(char *payload, int step, int rows, TSKEY skey, TSKEY ekey, int *srow, int *nrows) {
SData *cdata[TSDB_MAX_COLUMNS]; if (rows <= 0 || KEY_AT_INDEX(payload, step, 0) > ekey || KEY_AT_INDEX(payload, step, rows - 1) < skey || skey > ekey)
char *buffer1 = return -1;
malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns);
cdata[0] = (SData *)buffer1;
SData *data[TSDB_MAX_COLUMNS]; int left = 0;
char *buffer2 = int right = rows - 1;
malloc(pObj->bytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns); int mid;
data[0] = (SData *)buffer2;
for (col = 1; col < pObj->numOfColumns; ++col) { // Binary search the first key in payload >= skey
cdata[col] = (SData *)(((char *)cdata[col - 1]) + sizeof(SData) + EXTRA_BYTES + do {
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + sizeof(TSCKSUM)); mid = (left + right) / 2;
data[col] = (SData *)(((char *)data[col - 1]) + sizeof(SData) + EXTRA_BYTES + if (skey < KEY_AT_INDEX(payload, step, mid)) {
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + sizeof(TSCKSUM)); right = mid;
} else if (skey > KEY_AT_INDEX(payload, step, mid)) {
left = mid + 1;
} else {
break;
} }
} while (left < right);
int rowsBefore = 0; if (skey <= KEY_AT_INDEX(payload, step, mid)) {
int rowsRead = 0; *srow = mid;
int rowsUnread = 0; } else {
int leftRows = rows; // left number of rows of imported data if (mid + 1 >= rows) {
int row, rowsToWrite; return -1;
int64_t offset[TSDB_MAX_COLUMNS]; } else {
*srow = mid + 1;
if (pImport->pos > 0) { }
for (col = 0; col < pObj->numOfColumns; ++col)
memcpy(data[col]->data, pImport->sdata[col]->data, pImport->pos * pObj->schema[col].bytes);
rowsBefore = pImport->pos;
rowsRead = pImport->pos;
rowsUnread = pImport->numOfPoints - pImport->pos;
} }
dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to file, firstKey:%ld lastKey:%ld", assert(skey <= KEY_AT_INDEX(payload, step, *srow));
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey);
do { *nrows = 0;
if (leftRows > 0) { for (int i = *srow; i < rows; i++) {
code = vnodeOpenFileForImport(pImport, payload, &headInfo, data); if (KEY_AT_INDEX(payload, step, i) <= ekey) {
if (code < 0) goto _exit; (*nrows)++;
if (code > 0) {
rowsBefore = code;
code = 0;
};
} else { } else {
// if payload is already imported, rows unread shall still be processed break;
rowsBefore = 0; }
} }
int rowsToProcess = pObj->pointsPerFileBlock - rowsBefore; if (*nrows == 0) return -1;
if (rowsToProcess > leftRows) rowsToProcess = leftRows;
for (col = 0; col < pObj->numOfColumns; ++col) { return 0;
offset[col] = data[col]->data + rowsBefore * pObj->schema[col].bytes; }
}
row = 0; int vnodeOpenMinFilesForImport(int vnode, int fid) {
if (leftRows > 0) { char dname[TSDB_FILENAME_LEN] = "\0";
for (row = 0; row < rowsToProcess; ++row) { SVnodeObj * pVnode = vnodeList + vnode;
if (*((TSKEY *)payload) > pVnode->commitLastKey) break; struct stat filestat;
int minFileSize;
for (col = 0; col < pObj->numOfColumns; ++col) { minFileSize = TSDB_FILE_HEADER_LEN + sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
memcpy((void *)offset[col], payload, pObj->schema[col].bytes);
payload += pObj->schema[col].bytes;
offset[col] += pObj->schema[col].bytes;
}
}
}
leftRows -= row; vnodeGetHeadDataLname(pVnode->cfn, dname, pVnode->lfn, vnode, fid);
rowsToWrite = rowsBefore + row;
rowsBefore = 0;
if (leftRows == 0 && rowsUnread > 0) { // Open .head file
// copy the unread pVnode->hfd = open(pVnode->cfn, O_RDONLY);
int rowsToCopy = pObj->pointsPerFileBlock - rowsToWrite; if (pVnode->hfd < 0) {
if (rowsToCopy > rowsUnread) rowsToCopy = rowsUnread; dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
goto _error_open;
}
for (col = 0; col < pObj->numOfColumns; ++col) { fstat(pVnode->hfd, &filestat);
int bytes = pObj->schema[col].bytes; if (filestat.st_size < minFileSize) {
memcpy(data[col]->data + rowsToWrite * bytes, pImport->sdata[col]->data + rowsRead * bytes, rowsToCopy * bytes); dError("vid:%d, head file:%s is corrupted", vnode, pVnode->cfn);
taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn);
goto _error_open;
} }
rowsRead += rowsToCopy; // Open .data file
rowsUnread -= rowsToCopy; pVnode->dfd = open(dname, O_RDWR);
rowsToWrite += rowsToCopy; if (pVnode->dfd < 0) {
dError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
goto _error_open;
} }
for (col = 0; col < pObj->numOfColumns; ++col) { fstat(pVnode->dfd, &filestat);
data[col]->len = rowsToWrite * pObj->schema[col].bytes; if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, data file:%s corrupted", vnode, dname);
taosLogError("vid:%d, data file:%s corrupted", vnode, dname);
goto _error_open;
} }
compBlock.last = headInfo.last; // Open .last file
vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite); pVnode->lfd = open(pVnode->lfn, O_RDWR);
twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)); if (pVnode->lfd < 0) {
dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
goto _error_open;
}
rowsToWrite = 0; fstat(pVnode->lfd, &filestat);
headInfo.newBlocks++; if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
goto _error_open;
}
} while (leftRows > 0 || rowsUnread > 0); return 0;
if (compBlock.keyLast > pObj->lastKeyOnFile) _error_open:
pObj->lastKeyOnFile = compBlock.keyLast; if (pVnode->hfd > 0) close(pVnode->hfd);
pVnode->hfd = 0;
vnodeCloseFileForImport(pObj, &headInfo); if (pVnode->dfd > 0) close(pVnode->dfd);
dTrace("vid:%d sid:%d id:%s, %d rows data are imported to file", pObj->vnode, pObj->sid, pObj->meterId, rows); pVnode->dfd = 0;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; if (pVnode->lfd > 0) close(pVnode->lfd);
pthread_mutex_lock(&pPool->vmutex); pVnode->lfd = 0;
if (pInfo->numOfBlocks > 0) { return -1;
int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; }
TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0]));
// data may be in commited cache, cache shall be released /* Function to open .t file and sendfile the first part
if (lastKey > firstKeyInCache) { */
while (slot != pInfo->commitSlot) { int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid) {
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; char dHeadName[TSDB_FILENAME_LEN] = "\0";
vnodeFreeCacheBlock(pCacheBlock); SVnodeObj * pVnode = vnodeList + pObj->vnode;
slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks; struct stat filestat;
int sid;
// cfn: .head
if (readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN) < 0) return -1;
size_t len = strlen(dHeadName);
// switch head name
switch (dHeadName[len - 1]) {
case '0':
dHeadName[len - 1] = '1';
break;
case '1':
dHeadName[len - 1] = '0';
break;
default:
dError("vid: %d, fid: %d, head target filename not end with 0 or 1", pVnode->vnode, fid);
return -1;
} }
// last slot, the uncommitted slots shall be shifted, a cache block may have empty rows vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid);
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; if (symlink(dHeadName, pVnode->nfn) < 0) return -1;
int points = pCacheBlock->numOfPoints - pInfo->commitPoint;
if (points > 0) { pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
for (int col = 0; col < pObj->numOfColumns; ++col) { if (pVnode->nfd < 0) {
int size = points * pObj->schema[col].bytes; dError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
memmove(pCacheBlock->offset[col], pCacheBlock->offset[col] + pObj->schema[col].bytes * pInfo->commitPoint, size); taosLogError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
} return -1;
} }
if (pInfo->commitPoint != pObj->pointsPerBlock) { fstat(pVnode->hfd, &filestat);
// commit point shall be set to 0 if last block is not full pHandle->hfSize = filestat.st_size;
pInfo->commitPoint = 0;
pCacheBlock->numOfPoints = points; // Find the next sid whose compInfoOffset > 0
if (slot == pInfo->currentSlot) { for (sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; sid++) {
atomic_fetch_add_32(&pObj->freePoints, pInfo->commitPoint); if (pHandle->pHeader[sid].compInfoOffset > 0) break;
} }
pHandle->nextNo0Offset = (sid == pVnode->cfg.maxSessions) ? pHandle->hfSize : pHandle->pHeader[sid].compInfoOffset;
// FIXME: sendfile the original part
// TODO: Here, we need to take the deleted table case in consideration, this function
// just assume the case is handled before calling this function
if (pHandle->pHeader[pObj->sid].compInfoOffset > 0) {
pHandle->compInfoOffset = pHandle->pHeader[pObj->sid].compInfoOffset;
} else { } else {
// if last block is full and committed pHandle->compInfoOffset = pHandle->nextNo0Offset;
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot];
if (pCacheBlock->pMeterObj == pObj) {
vnodeFreeCacheBlock(pCacheBlock);
}
}
}
} }
if (lastKey > pObj->lastKeyOnFile) pObj->lastKeyOnFile = lastKey; assert(pHandle->compInfoOffset <= pHandle->hfSize);
pthread_mutex_unlock(&pPool->vmutex); lseek(pVnode->hfd, 0, SEEK_SET);
lseek(pVnode->nfd, 0, SEEK_SET);
if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) {
return -1;
}
_exit: // Leave a SCompInfo space here
tfree(headInfo.headList); lseek(pVnode->nfd, sizeof(SCompInfo), SEEK_CUR);
tfree(buffer1);
tfree(buffer2);
tfree(pImport->buffer);
return code; return 0;
} }
int vnodeImportToCache(SImportInfo *pImport, char *payload, int rows) { typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadMod;
SMeterObj *pObj = pImport->pObj;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
int code = -1;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
int slot, pos, row, col, points, tpoints;
char *data[TSDB_MAX_COLUMNS], *current[TSDB_MAX_COLUMNS]; /* Function to load a block data at the requirement of mod
int slots = pInfo->unCommittedBlocks + 1; */
int trows = slots * pObj->pointsPerBlock + rows; // max rows in buffer static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod, int *code) {
int tsize = (trows / pObj->pointsPerBlock + 1) * pCfg->cacheBlockSize; size_t size;
TSKEY firstKey = *((TSKEY *)payload); SCompBlock *pBlock = pHandle->pBlocks + blockId;
TSKEY lastKey = *((TSKEY *)(payload + pObj->bytesPerPoint * (rows - 1))); *code = TSDB_CODE_SUCCESS;
if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { assert(pBlock->sversion == pObj->sversion);
dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints);
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
}
assert(rows); SVnodeObj *pVnode = vnodeList + pObj->vnode;
dTrace("vid:%d sid:%d id:%s, %d rows data will be imported to cache, firstKey:%ld lastKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey);
pthread_mutex_lock(&(pVnode->vmutex)); int dfd = pBlock->last ? pVnode->lfd : pVnode->dfd;
if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey;
pthread_mutex_unlock(&(pVnode->vmutex));
char *buffer = malloc(tsize); // buffer to hold unCommitted data plus import data if (pHandle->blockId != blockId) {
data[0] = buffer; pHandle->blockId = blockId;
current[0] = data[0]; pHandle->blockLoadState = 0;
for (col = 1; col < pObj->numOfColumns; ++col) {
data[col] = data[col - 1] + trows * pObj->schema[col - 1].bytes;
current[col] = data[col];
} }
// write import data into buffer first if (pHandle->blockLoadState == 0){ // Reload pField
for (row = 0; row < rows; ++row) { size = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM);
for (col = 0; col < pObj->numOfColumns; ++col) { if (pHandle->pFieldSize < size) {
memcpy(current[col], payload, pObj->schema[col].bytes); pHandle->pField = (SField *)realloc((void *)(pHandle->pField), size);
payload += pObj->schema[col].bytes; if (pHandle->pField == NULL) {
current[col] += pObj->schema[col].bytes; dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
} }
pHandle->pFieldSize = size;
} }
// copy the overwritten data into buffer, merge cache blocks lseek(dfd, pBlock->offset, SEEK_SET);
tpoints = rows; if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) {
pos = pImport->pos; dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%ld reason:%s", pVnode->vnode, pObj->sid,
slot = pImport->slot; pObj->meterId, pHandle->pFieldSize, strerror(errno));
while (1) { *code = TSDB_CODE_FILE_CORRUPTED;
points = pInfo->cacheBlocks[slot]->numOfPoints - pos; return -1;
for (col = 0; col < pObj->numOfColumns; ++col) { }
int size = points * pObj->schema[col].bytes;
memcpy(current[col], pInfo->cacheBlocks[slot]->offset[col] + pos * pObj->schema[col].bytes, size); if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) {
current[col] += size; dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->lfn);
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
}
} }
pos = 0;
tpoints += points;
if (slot == pInfo->currentSlot) break; { // Allocate necessary buffer
slot = (slot + 1) % pInfo->maxBlocks; size = pObj->bytesPerPoint * pObj->pointsPerFileBlock +
(sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns;
if (pHandle->buffer == NULL) {
pHandle->buffer = malloc(size);
if (pHandle->buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
} }
for (col = 0; col < pObj->numOfColumns; ++col) current[col] = data[col]; // TODO: Init data
pos = pImport->pos; pHandle->data[0] = (SData *)(pHandle->buffer);
for (int col = 1; col < pObj->numOfColumns; col++) {
pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES +
sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
// write back to existing slots first if (pHandle->temp == NULL) {
slot = pImport->slot; pHandle->temp = malloc(size);
while (1) { if (pHandle->temp == NULL) {
points = (tpoints > pObj->pointsPerBlock - pos) ? pObj->pointsPerBlock - pos : tpoints; dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot]; pObj->meterId, size);
for (col = 0; col < pObj->numOfColumns; ++col) { *code = TSDB_CODE_SERV_OUT_OF_MEMORY;
int size = points * pObj->schema[col].bytes; return -1;
memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size);
current[col] += size;
} }
pCacheBlock->numOfPoints = points + pos;
pos = 0;
tpoints -= points;
if (tpoints == 0) {
// free the rest of cache blocks, since cache blocks are merged
int currentSlot = slot;
while (slot != pInfo->currentSlot) {
slot = (slot + 1) % pInfo->maxBlocks;
pCacheBlock = pInfo->cacheBlocks[slot];
vnodeFreeCacheBlock(pCacheBlock);
} }
pInfo->currentSlot = currentSlot; if (pHandle->tempBuffer == NULL) {
slot = currentSlot; // make sure to exit from the while loop pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES + sizeof(TSCKSUM);
pHandle->tempBuffer = malloc(pHandle->tempBufferSize);
if (pHandle->tempBuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->tempBufferSize);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
}
}
}
if ((loadMod & DATA_LOAD_TIMESTAMP) &&
(~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part
if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX,
pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints,
pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize) < 0) {
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
} }
if (slot == pInfo->currentSlot) break; pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP;
slot = (slot + 1) % pInfo->maxBlocks;
} }
// allocate new cache block if there are still data left if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns
while (tpoints > 0) { for (int col = 1; col < pBlock->numOfCols; col++) {
pImport->commit = vnodeAllocateCacheBlock(pObj); if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data,
if (pImport->commit < 0) goto _exit; pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer,
points = (tpoints > pObj->pointsPerBlock) ? pObj->pointsPerBlock : tpoints; pHandle->tempBufferSize) < 0) {
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot]; *code = TSDB_CODE_FILE_CORRUPTED;
for (col = 0; col < pObj->numOfColumns; ++col) { return -1;
int size = points * pObj->schema[col].bytes;
memcpy(pCacheBlock->offset[col] + pos * pObj->schema[col].bytes, current[col], size);
current[col] += size;
} }
tpoints -= points;
pCacheBlock->numOfPoints = points;
} }
code = 0; pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA;
atomic_fetch_sub_32(&pObj->freePoints, rows); }
dTrace("vid:%d sid:%d id:%s, %d rows data are imported to cache", pObj->vnode, pObj->sid, pObj->meterId, rows);
_exit: return 0;
free(buffer);
return code;
} }
int vnodeFindKeyInFile(SImportInfo *pImport, int order) { static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) {
SMeterObj *pObj = pImport->pObj; SVnodeObj *pVnode = vnodeList + pObj->vnode;
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; char dpath[TSDB_FILENAME_LEN] = "\0";
int code = -1; SCompInfo compInfo;
SQuery query; __off_t offset = 0;
SColumnInfoEx colList[TSDB_MAX_COLUMNS] = {0};
if (pVnode->nfd > 0) {
TSKEY key = order ? pImport->firstKey : pImport->lastKey; offset = lseek(pVnode->nfd, 0, SEEK_CUR);
memset(&query, 0, sizeof(query)); assert(offset == pHandle->nextNo0Offset + pHandle->driftOffset);
query.order.order = order;
query.skey = key; { // Write the SCompInfo part
query.ekey = order ? INT64_MAX : 0; compInfo.uid = pObj->uid;
query.colList = colList; compInfo.last = pHandle->last;
query.numOfCols = pObj->numOfColumns; compInfo.numOfBlocks = pHandle->newNumOfBlocks + pHandle->oldNumOfBlocks;
compInfo.delimiter = TSDB_VNODE_DELIMITER;
taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo));
lseek(pVnode->nfd, pHandle->compInfoOffset, SEEK_SET);
if (twrite(pVnode->nfd, (void *)(&compInfo), sizeof(SCompInfo)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompInfo, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
strerror(errno));
return -1;
}
}
for (int16_t i = 0; i < pObj->numOfColumns; ++i) { // Write the rest of the SCompBlock part
colList[i].data.colId = pObj->schema[i].colId; if (pHandle->hfSize > pHandle->nextNo0Offset) {
colList[i].data.bytes = pObj->schema[i].bytes; lseek(pVnode->nfd, 0, SEEK_END);
colList[i].data.type = pObj->schema[i].type; lseek(pVnode->hfd, pHandle->nextNo0Offset, SEEK_SET);
if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->hfSize - pHandle->nextNo0Offset) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to sendfile, size:%ld, reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->hfSize - pHandle->nextNo0Offset, strerror(errno));
return -1;
}
}
colList[i].colIdx = i; // Write SCompHeader part
colList[i].colIdxInBuf = i; pHandle->pHeader[pObj->sid].compInfoOffset = pHandle->compInfoOffset;
for (int sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; ++sid) {
if (pHandle->pHeader[sid].compInfoOffset > 0) {
pHandle->pHeader[sid].compInfoOffset += pHandle->driftOffset;
}
} }
int ret = vnodeSearchPointInFile(pObj, &query); taosCalcChecksumAppend(0, (uint8_t *)(pHandle->pHeader), pHandle->pHeaderSize);
lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (twrite(pVnode->nfd, (void *)(pHandle->pHeader), pHandle->pHeaderSize) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompHeader part, size:%ld, reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->pHeaderSize, strerror(errno));
return -1;
}
}
if (ret >= 0) { // Close opened files
if (query.slot < 0) { close(pVnode->dfd);
pImport->slot = 0; pVnode->dfd = 0;
pImport->pos = 0;
pImport->key = 0;
pImport->fileId = pVnode->fileId - pVnode->numOfFiles + 1;
dTrace("vid:%d sid:%d id:%s, import to head of file", pObj->vnode, pObj->sid, pObj->meterId);
code = 0;
} else if (query.slot >= 0) {
code = 0;
pImport->slot = query.slot;
pImport->pos = query.pos;
pImport->key = query.key;
pImport->fileId = query.fileId;
SCompBlock *pBlock = &query.pBlock[query.slot];
pImport->numOfPoints = pBlock->numOfPoints;
if (pImport->key != key) { close(pVnode->hfd);
if (order == 0) { pVnode->hfd = 0;
pImport->pos++;
if (pImport->pos >= pBlock->numOfPoints) { close(pVnode->lfd);
pImport->slot++; pVnode->lfd = 0;
pImport->pos = 0;
if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN);
rename(pVnode->nfn, pVnode->cfn);
remove(dpath);
} }
} else {
if (pImport->pos < 0) pImport->pos = 0; return 0;
}
static void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) {
int sdataRow;
int offset;
for (int row = 0; row < rows; ++row) {
sdataRow = row + rowOffset;
offset = 0;
for (int col = 0; col < pObj->numOfColumns; ++col) {
memcpy(data[col]->data + sdataRow * pObj->schema[col].bytes, payload + pObj->bytesPerPoint * row + offset,
pObj->schema[col].bytes);
offset += pObj->schema[col].bytes;
} }
} }
}
if (pImport->key != key && pImport->pos > 0) { static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) {
if ( pObj->sversion != pBlock->sversion ) { SMeterObj * pObj = (SMeterObj *)(pImport->pObj);
dError("vid:%d sid:%d id:%s, import sversion not matched, expected:%d received:%d", pObj->vnode, pObj->sid, SVnodeObj * pVnode = vnodeList + pObj->vnode;
pObj->meterId, pBlock->sversion, pObj->sversion); SImportHandle importHandle;
code = TSDB_CODE_OTHERS; size_t size = 0;
} else { SData * data[TSDB_MAX_COLUMNS];
pImport->offset = pBlock->offset; char * buffer = NULL;
SData * cdata[TSDB_MAX_COLUMNS];
char * cbuffer = NULL;
SCompBlock compBlock;
TSCKSUM checksum = 0;
int pointsImported = 0;
int code = TSDB_CODE_SUCCESS;
SCachePool * pPool = (SCachePool *)pVnode->pCachePool;
SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache);
TSKEY lastKeyImported = 0;
TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
TSKEY minFileKey = fid * delta;
TSKEY maxFileKey = minFileKey + delta - 1;
TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
pImport->buffer = assert(firstKey >= minFileKey && firstKey <= maxFileKey && lastKey >= minFileKey && lastKey <= maxFileKey);
malloc(pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + sizeof(SData) * pObj->numOfColumns);
pImport->sdata[0] = (SData *)pImport->buffer; // create neccessary files
for (int col = 1; col < pObj->numOfColumns; ++col) pVnode->commitFirstKey = firstKey;
pImport->sdata[col] = (SData *)(((char *)pImport->sdata[col - 1]) + sizeof(SData) + if (vnodeCreateNeccessaryFiles(pVnode) < 0) return TSDB_CODE_OTHERS;
assert(pVnode->commitFileId == fid);
// Open least files to import .head(hfd) .data(dfd) .last(lfd)
if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_FILE_CORRUPTED;
memset(&importHandle, 0, sizeof(SImportHandle));
{ // Load SCompHeader part from .head file
importHandle.pHeaderSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
importHandle.pHeader = (SCompHeader *)malloc(importHandle.pHeaderSize);
if (importHandle.pHeader == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, importHandle.pHeaderSize);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode,
pObj->sid, pObj->meterId, fid, strerror(errno));
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId,
fid);
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
}
{ // Initialize data[] and cdata[], which is used to hold data to write to data file
size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns;
buffer = (char *)malloc(size);
if (buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
cbuffer = (char *)malloc(size);
if (cbuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
data[0] = (SData *)buffer;
cdata[0] = (SData *)cbuffer;
for (int col = 1; col < pObj->numOfColumns; col++) {
data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes); pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
code = vnodeReadCompBlockToMem(pObj, &query, pImport->sdata); if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it
if (code < 0) { _write_empty_point:
code = -code; if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
tfree(pImport->buffer); code = TSDB_CODE_OTHERS;
goto _error_merge;
} }
importHandle.oldNumOfBlocks = 0;
importHandle.driftOffset += sizeof(SCompInfo);
lastKeyImported = lastKey;
for (int rowsWritten = 0; rowsWritten < rows;) {
int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */);
vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0);
pointsImported += rowsToWrite;
compBlock.last = 1;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) {
// TODO: deal with ERROR here
} }
importHandle.last = compBlock.last;
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock));
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowsWritten += rowsToWrite;
} }
twrite(pVnode->nfd, &checksum, sizeof(TSCKSUM));
importHandle.driftOffset += sizeof(TSCKSUM);
} else { // Else if there are old data in this file.
{ // load SCompInfo and SCompBlock part
lseek(pVnode->hfd, importHandle.pHeader[pObj->sid].compInfoOffset, SEEK_SET);
if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) {
dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, strerror(errno));
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
} }
} else {
dError("vid:%d sid:%d id:%s, file is corrupted, import failed", pObj->vnode, pObj->sid, pObj->meterId); if ((importHandle.compInfo.delimiter != TSDB_VNODE_DELIMITER) ||
code = -ret; (!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) {
dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter);
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
} }
tclose(query.hfd); // Check the context of SCompInfo part
tclose(query.dfd); if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter
tclose(query.lfd); goto _write_empty_point;
vnodeFreeFields(&query); }
tfree(query.pBlock);
return code; importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks;
} importHandle.last = importHandle.compInfo.last;
int vnodeFindKeyInCache(SImportInfo *pImport, int order) { size = sizeof(SCompBlock) * importHandle.compInfo.numOfBlocks + sizeof(TSCKSUM);
SMeterObj *pObj = pImport->pObj; importHandle.pBlocks = (SCompBlock *)malloc(size);
int code = 0; if (importHandle.pBlocks == NULL) {
SQuery query; dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid,
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache; pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
TSKEY key = order ? pImport->firstKey : pImport->lastKey; if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) {
memset(&query, 0, sizeof(query)); dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid,
query.order.order = order; pObj->meterId, strerror(errno));
query.skey = key; code = TSDB_CODE_FILE_CORRUPTED;
query.ekey = order ? pImport->lastKey : pImport->firstKey; goto _error_merge;
vnodeSearchPointInCache(pObj, &query); }
if (query.slot < 0) { if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) {
pImport->slot = pInfo->commitSlot; dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId,
if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks; pVnode->cfn);
pImport->pos = 0; code = TSDB_CODE_FILE_CORRUPTED;
pImport->key = 0; goto _error_merge;
dTrace("vid:%d sid:%d id:%s, key:%ld, import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key); }
code = 0; }
} else {
pImport->slot = query.slot;
pImport->pos = query.pos;
pImport->key = query.key;
if (key != query.key) { /* Now we have _payload_, we have _importHandle.pBlocks_, just merge payload into the importHandle.pBlocks
if (order == 0) { *
// since pos is the position which has smaller key, data shall be imported after it * Input: payload, pObj->bytesPerBlock, rows, importHandle.pBlocks
pImport->pos++; */
if (pImport->pos >= pObj->pointsPerBlock) { {
pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks; int payloadIter = 0;
pImport->pos = 0; SBlockIter blockIter = {0, 0, 0, 0};
while (1) {
if (payloadIter >= rows) { // payload end, break
// write the remaining blocks to the file
if (pVnode->nfd > 0) {
int blocksLeft = importHandle.compInfo.numOfBlocks - blockIter.oslot;
if (blocksLeft > 0) {
checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft);
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
break;
}
if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break
// Should never come here
assert(false);
}
TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
{ // Binary search the (slot, pos) which is >= key as well as nextKey
int left = blockIter.slot;
int right = importHandle.compInfo.numOfBlocks - 1;
TSKEY minKey = importHandle.pBlocks[left].keyFirst;
TSKEY maxKey = importHandle.pBlocks[right].keyLast;
assert(minKey <= maxKey);
if (key < minKey) { // Case 1. write just ahead the blockIter.slot
blockIter.slot = left;
blockIter.pos = 0;
blockIter.nextKey = minKey;
} else if (key > maxKey) { // Case 2. write to the end
if (importHandle.pBlocks[right].last) { // Case 2.1 last block in .last file, need to merge
assert(importHandle.last != 0);
importHandle.last = 0;
blockIter.slot = right;
blockIter.pos = importHandle.pBlocks[right].numOfPoints;
} else { // Case 2.2 just write after the last block
blockIter.slot = right + 1;
blockIter.pos = 0;
}
blockIter.nextKey = maxFileKey + 1;
} else { // Case 3. need to search the block for slot and pos
if (key == minKey || key == maxKey) {
payloadIter++;
continue;
}
// Here: minKey < key < maxKey
int mid;
TSKEY blockMinKey;
TSKEY blockMaxKey;
// Binary search the slot
do {
mid = (left + right) / 2;
blockMinKey = importHandle.pBlocks[mid].keyFirst;
blockMaxKey = importHandle.pBlocks[mid].keyLast;
assert(blockMinKey <= blockMaxKey);
if (key < blockMinKey) {
right = mid;
} else if (key > blockMaxKey) {
left = mid + 1;
} else { /* blockMinKey <= key <= blockMaxKey */
break;
}
} while (left < right);
if (key == blockMinKey || key == blockMaxKey) { // duplicate key
payloadIter++;
continue;
} }
// Get the slot
if (key > blockMaxKey) { /* pos = 0 or pos = ? */
blockIter.slot = mid + 1;
} else { /* key < blockMinKey (pos = 0) || (key > blockMinKey && key < blockMaxKey) (pos=?) */
blockIter.slot = mid;
}
// Get the pos
assert(blockIter.slot < importHandle.compInfo.numOfBlocks);
if (key == importHandle.pBlocks[blockIter.slot].keyFirst ||
key == importHandle.pBlocks[blockIter.slot].keyLast) {
payloadIter++;
continue;
}
assert(key < importHandle.pBlocks[blockIter.slot].keyLast);
/* */
if (key < importHandle.pBlocks[blockIter.slot].keyFirst) {
blockIter.pos = 0;
blockIter.nextKey = importHandle.pBlocks[blockIter.slot].keyFirst;
} else { } else {
if (pImport->pos < 0) pImport->pos = 0; SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot;
if (pBlock->sversion != pObj->sversion) { /*TODO*/
} }
if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP, &code) < 0) {
goto _error_merge;
} }
code = 0; int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC);
assert(pos != 0);
if (KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), pos) == key) {
payloadIter++;
continue;
} }
return code; blockIter.pos = pos;
} blockIter.nextKey = (blockIter.slot + 1 < importHandle.compInfo.numOfBlocks)
? importHandle.pBlocks[blockIter.slot + 1].keyFirst
: maxFileKey + 1;
// Need to merge with this block
if (importHandle.pBlocks[blockIter.slot].last) { // this is to merge with the last block
assert((blockIter.slot == (importHandle.compInfo.numOfBlocks - 1)));
importHandle.last = 0;
}
}
}
}
int vnodeImportStartToCache(SImportInfo *pImport, char *payload, int rows) { // Open the new .t file if not opened yet.
int code = 0; if (pVnode->nfd <= 0) {
SMeterObj *pObj = pImport->pObj; if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
code = vnodeFindKeyInCache(pImport, 1); if (blockIter.slot > blockIter.oslot) { // write blocks in range [blockIter.oslot, blockIter.slot) to .t file
if (code != 0) return code; checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot));
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot),
strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
blockIter.oslot = blockIter.slot;
}
if (blockIter.pos == 0) { // No need to merge
// copy payload part to data
int rowOffset = 0;
for (; payloadIter < rows; rowOffset++) {
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) break;
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
}
// write directly to .data file
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO: Deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)) < 0) {
// TODO : deal with the ERROR here
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
} else { // Merge block and payload from payloadIter
if (pImport->key != pImport->firstKey) { if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot,
rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key); DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA, &code) < 0) { // Load neccessary blocks
pImport->importedRows = rows; goto _error_merge;
code = vnodeImportToCache(pImport, payload, rows); }
importHandle.oldNumOfBlocks--;
importHandle.driftOffset -= sizeof(SCompBlock);
int rowOffset = blockIter.pos; // counter for data
// Copy the front part
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy((void *)(data[col]->data), (void *)(importHandle.data[col]->data),
pObj->schema[col].bytes * blockIter.pos);
}
// Merge part
while (1) {
if (rowOffset >= pVnode->cfg.rowsInFileBlock) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
}
if ((payloadIter >= rows || KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) &&
blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints)
break;
if (payloadIter >= rows ||
KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) { // payload end
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
} else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
rowOffset++;
} else {
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) ==
KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
blockIter.pos)) { // duplicate key
payloadIter++;
continue;
} else if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) <
KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
blockIter.pos)) {
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
rowOffset++;
} else { } else {
dTrace("vid:%d sid:%d id:%s, data is already imported to cache, firstKey:%lld", pObj->vnode, pObj->sid, pObj->meterId, pImport->firstKey); for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos,
pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
}
}
}
if (rowOffset > 0) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
} }
blockIter.slot++;
blockIter.oslot = blockIter.slot;
}
}
}
}
// Write the SCompInfo part
if (vnodeCloseImportFiles(pObj, &importHandle) < 0) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
pImport->importedRows += pointsImported;
pthread_mutex_lock(&(pPool->vmutex));
if (pInfo->numOfBlocks > 0) {
int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0]));
// data may be in commited cache, cache shall be released
if (lastKeyImported > firstKeyInCache) {
while (slot != pInfo->commitSlot) {
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot];
vnodeFreeCacheBlock(pCacheBlock);
slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
if (pInfo->commitPoint == pObj->pointsPerBlock) {
if (pInfo->cacheBlocks[pInfo->commitSlot]->pMeterObj == pObj) {
vnodeFreeCacheBlock(pInfo->cacheBlocks[pInfo->commitSlot]);
}
}
}
}
pthread_mutex_unlock(&(pPool->vmutex));
// TODO: free the allocated memory
tfree(buffer);
tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
return code; return code;
}
int vnodeImportStartToFile(SImportInfo *pImport, char *payload, int rows) { _error_merge:
int code = 0; tfree(buffer);
SMeterObj *pObj = pImport->pObj; tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
code = vnodeFindKeyInFile(pImport, 1); close(pVnode->dfd);
if (code != 0) return code; pVnode->dfd = 0;
if (pImport->key != pImport->firstKey) { close(pVnode->hfd);
pImport->payload = payload; pVnode->hfd = 0;
pImport->rows = vnodeGetImportStartPart(pObj, payload, rows, pImport->key);
pImport->importedRows = pImport->rows; close(pVnode->lfd);
code = vnodeImportToFile(pImport); pVnode->lfd = 0;
} else {
dTrace("vid:%d sid:%d id:%s, data is already imported to file", pObj->vnode, pObj->sid, pObj->meterId); if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
remove(pVnode->nfn);
} }
return code; return code;
} }
int vnodeImportWholeToFile(SImportInfo *pImport, char *payload, int rows) { #define FORWARD_ITER(iter, step, slotLimit, posLimit) \
int code = 0; { \
SMeterObj *pObj = pImport->pObj; if ((iter.pos) + (step) < (posLimit)) { \
(iter.pos) = (iter.pos) + (step); \
} else { \
(iter.pos) = 0; \
(iter.slot) = ((iter.slot) + 1) % (slotLimit); \
} \
}
code = vnodeFindKeyInFile(pImport, 0); int isCacheEnd(SBlockIter iter, SMeterObj *pMeter) {
if (code != 0) return code; SCacheInfo *pInfo = (SCacheInfo *)(pMeter->pCache);
int slot = 0;
int pos = 0;
if (pImport->key != pImport->lastKey) { if (pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints == pMeter->pointsPerBlock) {
pImport->payload = payload; slot = (pInfo->currentSlot + 1) % (pInfo->maxBlocks);
pImport->rows = vnodeGetImportEndPart(pObj, payload, rows, &pImport->payload, pImport->key); pos = 0;
pImport->importedRows = pImport->rows;
code = vnodeImportToFile(pImport);
} else { } else {
code = vnodeImportStartToFile(pImport, payload, rows); slot = pInfo->currentSlot;
pos = pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
} }
return ((iter.slot == slot) && (iter.pos == pos));
return code;
} }
int vnodeImportWholeToCache(SImportInfo *pImport, char *payload, int rows) { static void vnodeFlushMergeBuffer(SMergeBuffer *pBuffer, SBlockIter *pWriteIter, SBlockIter *pCacheIter,
int code = 0; SMeterObj *pObj, SCacheInfo *pInfo, int checkBound) {
SMeterObj *pObj = pImport->pObj; // Function to flush the merge buffer data to cache
if (pWriteIter->pos == pObj->pointsPerBlock) {
pWriteIter->pos = 0;
pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks;
}
code = vnodeFindKeyInCache(pImport, 0); while (pBuffer->spos != pBuffer->epos) {
if (code != 0) return code; if (checkBound && pWriteIter->slot == pCacheIter->slot && pWriteIter->pos == pCacheIter->pos) break;
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pInfo->cacheBlocks[pWriteIter->slot]->offset[col] + pObj->schema[col].bytes * pWriteIter->pos,
pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes);
}
if (pImport->key != pImport->lastKey) { if (pWriteIter->pos + 1 < pObj->pointsPerBlock) {
char *pStart; (pWriteIter->pos)++;
if ( pImport->key < pObj->lastKeyOnFile ) pImport->key = pObj->lastKeyOnFile;
rows = vnodeGetImportEndPart(pObj, payload, rows, &pStart, pImport->key);
pImport->importedRows = rows;
code = vnodeImportToCache(pImport, pStart, rows);
} else { } else {
if (pImport->firstKey > pObj->lastKeyOnFile) { pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos + 1;
code = vnodeImportStartToCache(pImport, payload, rows); pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks;
} else if (pImport->firstKey < pObj->lastKeyOnFile) { pWriteIter->pos = 0;
code = vnodeImportStartToFile(pImport, payload, rows);
} else { // firstKey == pObj->lastKeyOnFile
dTrace("vid:%d sid:%d id:%s, data is already there", pObj->vnode, pObj->sid, pObj->meterId);
} }
pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows;
} }
return code; if ((!checkBound) && pWriteIter->pos != 0) {
pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos;
}
} }
int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion, int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) {
int *pNumOfPoints, TSKEY now) { SMeterObj * pObj = pImport->pObj;
SSubmitMsg *pSubmit = (SSubmitMsg *)cont; SVnodeObj * pVnode = vnodeList + pObj->vnode;
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; int code = -1;
int rows; SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache);
char *payload; int payloadIter;
int code = TSDB_CODE_ACTION_IN_PROGRESS; SCachePool * pPool = (SCachePool *)(pVnode->pCachePool);
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; int isCacheIterEnd = 0;
SShellObj *pShell = (SShellObj *)param; int spayloadIter = 0;
int pointsImported = 0; int isAppendData = 0;
int rowsImported = 0;
rows = htons(pSubmit->numOfRows); int totalRows = 0;
int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows); size_t size = 0;
if (expectedLen != contLen) { SMergeBuffer *pBuffer = NULL;
dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId,
expectedLen, contLen); TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
return TSDB_CODE_WRONG_MSG_SIZE; TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey <= lastKey && firstKey > pObj->lastKeyOnFile);
// TODO: make this condition less strict
if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { // No free room to hold the data
dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints);
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
} }
if (sversion != pObj->sversion) { if (pInfo->numOfBlocks == 0) {
dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId, if (vnodeAllocateCacheBlock(pObj) < 0) {
pObj->sversion, sversion); pImport->importedRows = 0;
return TSDB_CODE_OTHERS; pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
}
} }
payload = pSubmit->payLoad; // Find the first importable record from payload
TSKEY firstKey = *(TSKEY *)payload; pImport->lastKey = lastKey;
TSKEY lastKey = *(TSKEY *)(payload + pObj->bytesPerPoint*(rows-1)); for (payloadIter = 0; payloadIter < rows; payloadIter++) {
int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision]; TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision]; if (key == pObj->lastKey) continue;
TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 1; if (key > pObj->lastKey) { // Just as insert
if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) { pImport->slot = pInfo->currentSlot;
dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%lld, data is out of range, rows:%d firstKey:%lld lastKey:%lld minAllowedKey:%lld maxAllowedKey:%lld", pImport->pos = pInfo->cacheBlocks[pImport->slot]->numOfPoints;
pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, rows, firstKey, lastKey, minAllowedKey, maxAllowedKey); isCacheIterEnd = 1;
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; break;
} else {
pImport->firstKey = key;
if (vnodeFindKeyInCache(pImport, 1) < 0) {
goto _exit;
} }
// forward to peers if (pImport->firstKey != pImport->key) break;
if (pShell && pVnode->cfg.replications > 1) { }
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion);
if (code != 0) return code;
} }
if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (payloadIter == rows) {
if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; pImport->importedRows = 0;
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion); code = 0;
if (code != 0) return code; goto _exit;
} }
if (*((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)) > pObj->lastKey) { spayloadIter = payloadIter;
code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported, now); if (pImport->pos == pObj->pointsPerBlock) assert(isCacheIterEnd);
if (pShell) { // Allocate a new merge buffer work as buffer
pShell->code = code; totalRows = pObj->pointsPerBlock + rows - payloadIter + 1;
pShell->numOfTotalPoints += pointsImported; size = sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns + pObj->bytesPerPoint * totalRows;
pBuffer = (SMergeBuffer *)malloc(size);
if (pBuffer == NULL) {
dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
pBuffer->spos = 0;
pBuffer->epos = 0;
pBuffer->totalRows = totalRows;
pBuffer->offset[0] = (char *)pBuffer + sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns;
for (int col = 1; col < pObj->numOfColumns; col++) {
pBuffer->offset[col] = pBuffer->offset[col - 1] + pObj->schema[col - 1].bytes * totalRows;
} }
} else {
SImportInfo *pNew, import;
dTrace("vid:%d sid:%d id:%s, import %d rows data", pObj->vnode, pObj->sid, pObj->meterId, rows); // TODO: take pImport->pos = pObj->pointsPerBlock into consideration
memset(&import, 0, sizeof(import)); { // Do the merge staff
import.firstKey = *((TSKEY *)(payload)); SBlockIter cacheIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to traverse old cache data
import.lastKey = *((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)); SBlockIter writeIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to write data to cache
import.pObj = pObj; int availPoints = pObj->pointsPerBlock - pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
import.pShell = pShell;
import.payload = payload;
import.rows = rows;
if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING)) != TSDB_CODE_SUCCESS) { assert(availPoints >= 0);
return code;
while (1) {
if ((payloadIter >= rows) && isCacheIterEnd) break;
if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush
vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 1);
}
TSKEY payloadKey = (payloadIter < rows) ? KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) : INT64_MAX;
TSKEY cacheKey = (isCacheIterEnd) ? INT64_MAX : KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), cacheIter.pos);
if (cacheKey < payloadKey) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes);
}
FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock);
isCacheIterEnd = isCacheEnd(cacheIter, pObj);
} else if (cacheKey > payloadKey) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload
if (availPoints == 0) { // Need to allocate a new cache block
pthread_mutex_lock(&(pPool->vmutex));
// TODO: Need to check if there are enough slots to hold a new one
SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode);
if (pNewBlock == NULL) { // Failed to allocate a new cache block, need to commit and loop over the remaining cache records
pthread_mutex_unlock(&(pPool->vmutex));
payloadIter = rows;
code = TSDB_CODE_ACTION_IN_PROGRESS;
pImport->commit = 1;
continue;
} }
int32_t num = 0; assert(pInfo->numOfBlocks <= pInfo->maxBlocks);
pthread_mutex_lock(&pVnode->vmutex); if (pInfo->numOfBlocks == pInfo->maxBlocks) {
num = pObj->numOfQueries; vnodeFreeCacheBlock(pInfo->cacheBlocks[(pInfo->currentSlot + 1) % pInfo->maxBlocks]);
pthread_mutex_unlock(&pVnode->vmutex); }
int32_t commitInProcess = 0; pNewBlock->pMeterObj = pObj;
pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns;
for (int col = 1; col < pObj->numOfColumns; col++)
pNewBlock->offset[col] = pNewBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock;
pthread_mutex_lock(&pPool->vmutex); int newSlot = (writeIter.slot + 1) % pInfo->maxBlocks;
if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { pInfo->blocks++;
pthread_mutex_unlock(&pPool->vmutex); int tblockId = pInfo->blocks;
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
if (writeIter.slot != pInfo->currentSlot) {
for (int tslot = pInfo->currentSlot; tslot != writeIter.slot;) {
int nextSlot = (tslot + 1) % pInfo->maxBlocks;
pInfo->cacheBlocks[nextSlot] = pInfo->cacheBlocks[tslot];
pInfo->cacheBlocks[nextSlot]->slot = nextSlot;
pInfo->cacheBlocks[nextSlot]->blockId = tblockId--;
tslot = (tslot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
}
pNew = (SImportInfo *)malloc(sizeof(SImportInfo)); int index = pNewBlock->index;
memcpy(pNew, &import, sizeof(SImportInfo)); if (cacheIter.slot == writeIter.slot) {
pNew->signature = pNew; pNewBlock->numOfPoints = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints;
int payloadLen = contLen - sizeof(SSubmitMsg); int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos;
pNew->payload = malloc(payloadLen); if (pointsLeft > 0) {
pNew->opayload = pNew->payload; for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pNew->payload, payload, payloadLen); memcpy((void *)(pNewBlock->offset[col] + pObj->schema[col].bytes*cacheIter.pos),
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes * pointsLeft);
}
}
}
pNewBlock->blockId = tblockId;
pNewBlock->slot = newSlot;
pNewBlock->index = index;
pInfo->cacheBlocks[newSlot] = pNewBlock;
pInfo->numOfBlocks++;
pInfo->unCommittedBlocks++;
pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks;
pthread_mutex_unlock(&(pPool->vmutex));
cacheIter.slot = (cacheIter.slot + 1) % pInfo->maxBlocks;
// move a cache of data forward
availPoints = pObj->pointsPerBlock;
}
dTrace("vid:%d sid:%d id:%s, import later, commit in process:%d, numOfQueries:%d", pObj->vnode, pObj->sid, int offset = 0;
pObj->meterId, commitInProcess, pObj->numOfQueries); for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
payload + pObj->bytesPerPoint * payloadIter + offset, pObj->schema[col].bytes);
offset += pObj->schema[col].bytes;
}
if (spayloadIter == payloadIter) {// update pVnode->firstKey
pthread_mutex_lock(&(pVnode->vmutex));
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < pVnode->firstKey) pVnode->firstKey = firstKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
if (isCacheIterEnd) {
pObj->lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
if (!isAppendData) isAppendData = 1;
}
rowsImported++;
availPoints--;
payloadIter++;
/*
* vnodeProcessImportTimer will set the import status for this table, so need to
* set the import flag here
*/
taosTmrStart(vnodeProcessImportTimer, 10, pNew, vnodeTmrCtrl);
return 0;
} else { } else {
pPool->commitInProcess = 1; payloadIter++;
pthread_mutex_unlock(&pPool->vmutex); continue;
}
pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows;
}
int ret = vnodeImportData(pObj, &import); if (pBuffer->spos != pBuffer->epos) { // Flush the remaining data in the merge buffer
if (pShell) { vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 0);
pShell->code = ret; } else {
pShell->numOfTotalPoints += import.importedRows; // Should never come here
assert(false);
} }
if (isAppendData) {
pthread_mutex_lock(&(pVnode->vmutex));
if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey;
pthread_mutex_unlock(&(pVnode->vmutex));
} }
} }
pImport->importedRows += rowsImported;
atomic_fetch_sub_32(&(pObj->freePoints), rowsImported);
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); code = TSDB_CODE_SUCCESS;
pVnode->version++;
_exit:
tfree(pBuffer);
return code;
}
int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) {
int code = 0;
// TODO : Check the correctness of pObj and pVnode
SMeterObj *pObj = (SMeterObj *)(pImport->pObj);
SVnodeObj *pVnode = vnodeList + pObj->vnode;
int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[pVnode->cfg.precision];
int sfid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0) / delta;
int efid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1) / delta;
for (int fid = sfid; fid <= efid; fid++) {
TSKEY skey = fid * delta;
TSKEY ekey = skey + delta - 1;
int srow = 0, nrows = 0;
if (vnodeSearchKeyInRange(payload, pObj->bytesPerPoint, rows, skey, ekey, &srow, &nrows) < 0) continue;
if (pShell) { assert(nrows > 0);
pShell->count--;
if (pShell->count <= 0) vnodeSendShellSubmitRspMsg(pShell, pShell->code, pShell->numOfTotalPoints); dTrace("vid:%d sid:%d meterId:%s, %d rows of data will be imported to file %d, srow:%d firstKey:%ld lastKey:%ld",
pObj->vnode, pObj->sid, pObj->meterId, nrows, fid, srow, KEY_AT_INDEX(payload, pObj->bytesPerPoint, srow),
KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1)));
code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid);
if (code != TSDB_CODE_SUCCESS) break;
} }
return 0; return code;
} }
//todo abort from the procedure if the meter is going to be dropped // TODO : add offset in pShell to make it avoid repeatedly deal with messages
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) {
int code = 0; int code = 0;
int srow = 0, nrows = 0;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
// 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache
if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX,
&srow, &nrows) >= 0) {
assert(nrows > 0);
code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
if (pImport->commit) { // Need to commit now
pPool->commitInProcess = 0;
vnodeProcessCommitTimer(pVnode, NULL);
return code;
}
if (pImport->lastKey > pObj->lastKeyOnFile) { if (code != TSDB_CODE_SUCCESS) return code;
code = vnodeImportWholeToCache(pImport, pImport->payload, pImport->rows);
} else if (pImport->lastKey < pObj->lastKeyOnFile) {
code = vnodeImportWholeToFile(pImport, pImport->payload, pImport->rows);
} else { // lastKey == pObj->lastkeyOnFile
code = vnodeImportStartToFile(pImport, pImport->payload, pImport->rows);
} }
SVnodeObj *pVnode = &vnodeList[pObj->vnode]; // 2. import data (0, pObj->lastKeyOnFile) into files
SCachePool *pPool = (SCachePool *)pVnode->pCachePool; if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow,
pPool->commitInProcess = 0; &nrows) >= 0) {
assert(nrows > 0);
code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
}
if (pImport->commit) vnodeProcessCommitTimer(pVnode, NULL); pPool->commitInProcess = 0;
return code; return code;
} }
...@@ -584,12 +584,12 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi ...@@ -584,12 +584,12 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi
if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) { if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) {
if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG; if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_INSERT, cont, contLen, sversion); code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_INSERT, cont, contLen, sversion);
if (code != 0) return code; if (code != TSDB_CODE_SUCCESS) return code;
} }
if (source == TSDB_DATA_SOURCE_SHELL && pVnode->cfg.replications > 1) { if (source == TSDB_DATA_SOURCE_SHELL && pVnode->cfg.replications > 1) {
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_INSERT, sversion); code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_INSERT, sversion);
if (code != 0) return code; if (code != TSDB_CODE_SUCCESS) return code;
} }
if (pObj->sversion < sversion) { if (pObj->sversion < sversion) {
...@@ -601,11 +601,11 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi ...@@ -601,11 +601,11 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi
} }
pData = pSubmit->payLoad; pData = pSubmit->payLoad;
code = TSDB_CODE_SUCCESS;
TSKEY firstKey = *((TSKEY *)pData); TSKEY firstKey = *((TSKEY *)pData);
TSKEY lastKey = *((TSKEY *)(pData + pObj->bytesPerPoint * (numOfPoints - 1))); TSKEY lastKey = *((TSKEY *)(pData + pObj->bytesPerPoint * (numOfPoints - 1)));
int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision]; int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[pVnode->cfg.precision];
TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision]; TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision];
TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 2; TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[pVnode->cfg.precision] - 2;
if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) { if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) {
...@@ -619,7 +619,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi ...@@ -619,7 +619,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi
} }
for (i = 0; i < numOfPoints; ++i) { // meter will be dropped, abort current insertion for (i = 0; i < numOfPoints; ++i) { // meter will be dropped, abort current insertion
if (pObj->state >= TSDB_METER_STATE_DELETING) { if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) {
dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId, dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->state); pObj->state);
...@@ -648,6 +648,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi ...@@ -648,6 +648,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi
pData += pObj->bytesPerPoint; pData += pObj->bytesPerPoint;
points++; points++;
} }
atomic_fetch_add_64(&(pVnode->vnodeStatistic.pointsWritten), points * (pObj->numOfColumns - 1)); atomic_fetch_add_64(&(pVnode->vnodeStatistic.pointsWritten), points * (pObj->numOfColumns - 1));
atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), points * pObj->bytesPerPoint); atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), points * pObj->bytesPerPoint);
...@@ -660,6 +661,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi ...@@ -660,6 +661,7 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi
pVnode->version++; pVnode->version++;
pthread_mutex_unlock(&(pVnode->vmutex)); pthread_mutex_unlock(&(pVnode->vmutex));
vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT);
_over: _over:
......
...@@ -39,10 +39,21 @@ SShellObj **shellList = NULL; ...@@ -39,10 +39,21 @@ SShellObj **shellList = NULL;
int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj);
int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj);
int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj); int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj);
static void vnodeProcessBatchSubmitTimer(void *param, void *tmrId);
int vnodeSelectReqNum = 0; int vnodeSelectReqNum = 0;
int vnodeInsertReqNum = 0; int vnodeInsertReqNum = 0;
typedef struct {
int32_t import;
int32_t vnode;
int32_t numOfSid;
int32_t ssid; // Start sid
SShellObj *pObj;
int64_t offset; // offset relative the blks
char blks[];
} SBatchSubmitInfo;
void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) { void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) {
int sid, vnode; int sid, vnode;
SShellObj *pObj = (SShellObj *)ahandle; SShellObj *pObj = (SShellObj *)ahandle;
...@@ -249,6 +260,7 @@ int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints) { ...@@ -249,6 +260,7 @@ int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints) {
char *pMsg, *pStart; char *pMsg, *pStart;
int msgLen; int msgLen;
dTrace("code:%d numOfTotalPoints:%d", code, numOfPoints);
pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_SUBMIT_RSP, 128); pStart = taosBuildRspMsgWithSize(pObj->thandle, TSDB_MSG_TYPE_SUBMIT_RSP, 128);
if (pStart == NULL) return -1; if (pStart == NULL) return -1;
pMsg = pStart; pMsg = pStart;
...@@ -280,6 +292,7 @@ int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj) { ...@@ -280,6 +292,7 @@ int vnodeProcessQueryRequest(char *pMsg, int msgLen, SShellObj *pObj) {
} }
if (pQueryMsg->numOfSids <= 0) { if (pQueryMsg->numOfSids <= 0) {
dError("Invalid number of meters to query, numOfSids:%d", pQueryMsg->numOfSids);
code = TSDB_CODE_INVALID_QUERY_MSG; code = TSDB_CODE_INVALID_QUERY_MSG;
goto _query_over; goto _query_over;
} }
...@@ -485,10 +498,83 @@ int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj) { ...@@ -485,10 +498,83 @@ int vnodeProcessRetrieveRequest(char *pMsg, int msgLen, SShellObj *pObj) {
return msgLen; return msgLen;
} }
static int vnodeCheckSubmitBlockContext(SShellSubmitBlock *pBlocks, SVnodeObj *pVnode) {
int32_t sid = htonl(pBlocks->sid);
uint64_t uid = htobe64(pBlocks->uid);
if (sid >= pVnode->cfg.maxSessions || sid <= 0) {
dError("vid:%d sid:%d, sid is out of range", sid);
return TSDB_CODE_INVALID_TABLE_ID;
}
SMeterObj *pMeterObj = pVnode->meterList[sid];
if (pMeterObj == NULL) {
dError("vid:%d sid:%d, not active table", pVnode->vnode, sid);
vnodeSendMeterCfgMsg(pVnode->vnode, sid);
return TSDB_CODE_NOT_ACTIVE_TABLE;
}
if (pMeterObj->uid != uid) {
dError("vid:%d sid:%d id:%s, uid:%lld, uid in msg:%lld, uid mismatch", pVnode->vnode, sid, pMeterObj->meterId,
pMeterObj->uid, uid);
return TSDB_CODE_INVALID_SUBMIT_MSG;
}
return TSDB_CODE_SUCCESS;
}
static int vnodeDoSubmitJob(SVnodeObj *pVnode, int import, int32_t *ssid, int32_t esid, SShellSubmitBlock **ppBlocks,
TSKEY now, SShellObj *pObj) {
SShellSubmitBlock *pBlocks = *ppBlocks;
int code = TSDB_CODE_SUCCESS;
int32_t numOfPoints = 0;
int32_t i = 0;
for (i = *ssid; i < esid; i++) {
numOfPoints = 0;
code = vnodeCheckSubmitBlockContext(pBlocks, pVnode);
if (code != TSDB_CODE_SUCCESS) break;
SMeterObj *pMeterObj = (SMeterObj *)(pVnode->meterList[htonl(pBlocks->sid)]);
// dont include sid, vid
int32_t subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint;
int32_t sversion = htonl(pBlocks->sversion);
if (import) {
code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj,
sversion, &numOfPoints, now);
pObj->numOfTotalPoints += numOfPoints;
// records for one table should be consecutive located in the payload buffer, which is guaranteed by client
if (code == TSDB_CODE_SUCCESS) {
pObj->count--;
}
} else {
code = vnodeInsertPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL,
sversion, &numOfPoints, now);
pObj->numOfTotalPoints += numOfPoints;
}
if (code != TSDB_CODE_SUCCESS) break;
pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) +
htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint);
}
*ssid = i;
*ppBlocks = pBlocks;
return code;
}
int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) {
int code = 0, ret = 0; int code = 0, ret = 0;
int32_t i = 0;
SShellSubmitMsg shellSubmit = *(SShellSubmitMsg *)pMsg; SShellSubmitMsg shellSubmit = *(SShellSubmitMsg *)pMsg;
SShellSubmitMsg *pSubmit = &shellSubmit; SShellSubmitMsg *pSubmit = &shellSubmit;
SShellSubmitBlock *pBlocks = NULL;
pSubmit->vnode = htons(pSubmit->vnode); pSubmit->vnode = htons(pSubmit->vnode);
pSubmit->numOfSid = htonl(pSubmit->numOfSid); pSubmit->numOfSid = htonl(pSubmit->numOfSid);
...@@ -526,67 +612,69 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { ...@@ -526,67 +612,69 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) {
pObj->count = pSubmit->numOfSid; // for import pObj->count = pSubmit->numOfSid; // for import
pObj->code = 0; // for import pObj->code = 0; // for import
pObj->numOfTotalPoints = 0; // for import pObj->numOfTotalPoints = 0;
SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pMsg + sizeof(SShellSubmitMsg));
int32_t numOfPoints = 0;
int32_t numOfTotalPoints = 0;
// We take current time here to avoid it in the for loop.
TSKEY now = taosGetTimestamp(pVnode->cfg.precision); TSKEY now = taosGetTimestamp(pVnode->cfg.precision);
for (int32_t i = 0; i < pSubmit->numOfSid; ++i) { pBlocks = (SShellSubmitBlock *)(pMsg + sizeof(SShellSubmitMsg));
numOfPoints = 0; i = 0;
code = vnodeDoSubmitJob(pVnode, pSubmit->import, &i, pSubmit->numOfSid, &pBlocks, now, pObj);
pBlocks->sid = htonl(pBlocks->sid); _submit_over:
pBlocks->uid = htobe64(pBlocks->uid); ret = 0;
if (pSubmit->import) { // Import case
if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
if (pBlocks->sid >= pVnode->cfg.maxSessions || pBlocks->sid <= 0) { SBatchSubmitInfo *pSubmitInfo =
dTrace("sid:%d is out of range", pBlocks->sid); (SBatchSubmitInfo *)calloc(1, sizeof(SBatchSubmitInfo) + msgLen - sizeof(SShellSubmitMsg));
code = TSDB_CODE_INVALID_TABLE_ID; if (pSubmitInfo == NULL) {
goto _submit_over; code = TSDB_CODE_SERV_OUT_OF_MEMORY;
ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints);
} else { // Start a timer to process the next part of request
pSubmitInfo->import = 1;
pSubmitInfo->vnode = pSubmit->vnode;
pSubmitInfo->numOfSid = pSubmit->numOfSid;
pSubmitInfo->ssid = i; // start from this position, not the initial position
pSubmitInfo->pObj = pObj;
pSubmitInfo->offset = ((char *)pBlocks) - (pMsg + sizeof(SShellSubmitMsg));
assert(pSubmitInfo->offset >= 0);
memcpy((void *)(pSubmitInfo->blks), (void *)(pMsg + sizeof(SShellSubmitMsg)), msgLen - sizeof(SShellSubmitMsg));
taosTmrStart(vnodeProcessBatchSubmitTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl);
} }
} else {
int vnode = pSubmit->vnode; if (code == TSDB_CODE_SUCCESS) assert(pObj->count == 0);
int sid = pBlocks->sid; ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints);
SMeterObj *pMeterObj = vnodeList[vnode].meterList[sid];
if (pMeterObj == NULL) {
dError("vid:%d sid:%d, not active table", vnode, sid);
vnodeSendMeterCfgMsg(vnode, sid);
code = TSDB_CODE_NOT_ACTIVE_TABLE;
goto _submit_over;
} }
} else { // Insert case
if (pMeterObj->uid != pBlocks->uid) { ret = vnodeSendShellSubmitRspMsg(pObj, code, pObj->numOfTotalPoints);
dError("vid:%d sid:%d, meterId:%s, uid:%lld, uid in msg:%lld, uid mismatch", vnode, sid, pMeterObj->meterId,
pMeterObj->uid, pBlocks->uid);
code = TSDB_CODE_INVALID_SUBMIT_MSG;
goto _submit_over;
} }
// dont include sid, vid atomic_fetch_add_32(&vnodeInsertReqNum, 1);
int subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; return ret;
int sversion = htonl(pBlocks->sversion); }
if (pSubmit->import) { static void vnodeProcessBatchSubmitTimer(void *param, void *tmrId) {
code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, SBatchSubmitInfo *pSubmitInfo = (SBatchSubmitInfo *)param;
sversion, &numOfPoints, now); assert(pSubmitInfo != NULL && pSubmitInfo->import);
} else {
code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL,
sversion, &numOfPoints, now);
}
if (code != TSDB_CODE_SUCCESS) {break;} int32_t i = 0;
int32_t code = TSDB_CODE_SUCCESS;
numOfTotalPoints += numOfPoints; SShellObj * pShell = pSubmitInfo->pObj;
pBlocks = (SShellSubmitBlock *)((char *)pBlocks + sizeof(SShellSubmitBlock) + SVnodeObj * pVnode = &vnodeList[pSubmitInfo->vnode];
htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint); SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pSubmitInfo->blks + pSubmitInfo->offset);
} TSKEY now = taosGetTimestamp(pVnode->cfg.precision);
i = pSubmitInfo->ssid;
_submit_over: code = vnodeDoSubmitJob(pVnode, pSubmitInfo->import, &i, pSubmitInfo->numOfSid, &pBlocks, now, pShell);
// for import, send the submit response only when return code is not zero
if (pSubmit->import == 0 || code != 0) ret = vnodeSendShellSubmitRspMsg(pObj, code, numOfTotalPoints);
atomic_fetch_add_32(&vnodeInsertReqNum, 1); if (code == TSDB_CODE_ACTION_IN_PROGRESS) {
return ret; pSubmitInfo->ssid = i;
pSubmitInfo->offset = ((char *)pBlocks) - pSubmitInfo->blks;
taosTmrStart(vnodeProcessBatchSubmitTimer, 10, (void *)pSubmitInfo, vnodeTmrCtrl);
} else {
if (code == TSDB_CODE_SUCCESS) assert(pShell->count == 0);
tfree(param);
vnodeSendShellSubmitRspMsg(pShell, code, pShell->numOfTotalPoints);
}
} }
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "tsqlfunction.h" #include "tsqlfunction.h"
#include "ttime.h" #include "ttime.h"
#include "ttypes.h" #include "ttypes.h"
#include "tutil.h"
#pragma GCC diagnostic ignored "-Wformat" #pragma GCC diagnostic ignored "-Wformat"
...@@ -46,8 +47,7 @@ void getTmpfilePath(const char *fileNamePrefix, char *dstPath) { ...@@ -46,8 +47,7 @@ void getTmpfilePath(const char *fileNamePrefix, char *dstPath) {
strcpy(tmpPath, tmpDir); strcpy(tmpPath, tmpDir);
strcat(tmpPath, tdengineTmpFileNamePrefix); strcat(tmpPath, tdengineTmpFileNamePrefix);
strcat(tmpPath, fileNamePrefix); strcat(tmpPath, fileNamePrefix);
strcat(tmpPath, "-%u-%u"); strcat(tmpPath, "-%llu-%u");
snprintf(dstPath, MAX_TMPFILE_PATH_LENGTH, tmpPath, taosGetPthreadId(), atomic_add_fetch_32(&tmpFileSerialNum, 1)); snprintf(dstPath, MAX_TMPFILE_PATH_LENGTH, tmpPath, taosGetPthreadId(), atomic_add_fetch_32(&tmpFileSerialNum, 1));
} }
...@@ -431,7 +431,8 @@ void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t * ...@@ -431,7 +431,8 @@ void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *
} }
void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) { void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx) {
double v = *(double *)value; //double v = *(double *)value;
double v = GET_DOUBLE_VAL(value);
if (pBucket->nRange.dMinVal == DBL_MAX) { if (pBucket->nRange.dMinVal == DBL_MAX) {
/* /*
...@@ -675,7 +676,8 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { ...@@ -675,7 +676,8 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) {
break; break;
}; };
case TSDB_DATA_TYPE_DOUBLE: { case TSDB_DATA_TYPE_DOUBLE: {
double val = *(double *)data; //double val = *(double *)data;
double val = GET_DOUBLE_VAL(data);
if (r->dMinVal > val) { if (r->dMinVal > val) {
r->dMinVal = val; r->dMinVal = val;
} }
...@@ -686,7 +688,8 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) { ...@@ -686,7 +688,8 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) {
break; break;
}; };
case TSDB_DATA_TYPE_FLOAT: { case TSDB_DATA_TYPE_FLOAT: {
double val = *(float *)data; //double val = *(float *)data;
double val = GET_FLOAT_VAL(data);
if (r->dMinVal > val) { if (r->dMinVal > val) {
r->dMinVal = val; r->dMinVal = val;
...@@ -734,12 +737,14 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { ...@@ -734,12 +737,14 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) {
break; break;
} }
case TSDB_DATA_TYPE_DOUBLE: { case TSDB_DATA_TYPE_DOUBLE: {
double val = *(double *)d; //double val = *(double *)d;
double val = GET_DOUBLE_VAL(d);
(pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx);
break; break;
} }
case TSDB_DATA_TYPE_FLOAT: { case TSDB_DATA_TYPE_FLOAT: {
double val = *(float *)d; //double val = *(float *)d;
double val = GET_FLOAT_VAL(d);
(pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx); (pBucket->HashFunc)(pBucket, &val, &segIdx, &slotIdx);
break; break;
} }
...@@ -840,16 +845,20 @@ static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, i ...@@ -840,16 +845,20 @@ static FORCE_INLINE int32_t columnValueAscendingComparator(char *f1, char *f2, i
return (first < second) ? -1 : 1; return (first < second) ? -1 : 1;
}; };
case TSDB_DATA_TYPE_DOUBLE: { case TSDB_DATA_TYPE_DOUBLE: {
double first = *(double *)f1; //double first = *(double *)f1;
double second = *(double *)f2; double first = GET_DOUBLE_VAL(f1);
//double second = *(double *)f2;
double second = GET_DOUBLE_VAL(f2);
if (first == second) { if (first == second) {
return 0; return 0;
} }
return (first < second) ? -1 : 1; return (first < second) ? -1 : 1;
}; };
case TSDB_DATA_TYPE_FLOAT: { case TSDB_DATA_TYPE_FLOAT: {
float first = *(float *)f1; //float first = *(float *)f1;
float second = *(float *)f2; //float second = *(float *)f2;
float first = GET_FLOAT_VAL(f1);
float second = GET_FLOAT_VAL(f2);
if (first == second) { if (first == second) {
return 0; return 0;
} }
...@@ -1298,10 +1307,16 @@ double findOnlyResult(tMemBucket *pMemBucket) { ...@@ -1298,10 +1307,16 @@ double findOnlyResult(tMemBucket *pMemBucket) {
return *(int8_t *)pPage->data; return *(int8_t *)pPage->data;
case TSDB_DATA_TYPE_BIGINT: case TSDB_DATA_TYPE_BIGINT:
return (double)(*(int64_t *)pPage->data); return (double)(*(int64_t *)pPage->data);
case TSDB_DATA_TYPE_DOUBLE: case TSDB_DATA_TYPE_DOUBLE: {
return *(double *)pPage->data; double dv = GET_DOUBLE_VAL(pPage->data);
case TSDB_DATA_TYPE_FLOAT: //return *(double *)pPage->data;
return *(float *)pPage->data; return dv;
}
case TSDB_DATA_TYPE_FLOAT: {
float fv = GET_FLOAT_VAL(pPage->data);
//return *(float *)pPage->data;
return fv;
}
default: default:
return 0; return 0;
} }
...@@ -1788,13 +1803,17 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) ...@@ -1788,13 +1803,17 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
break; break;
}; };
case TSDB_DATA_TYPE_FLOAT: { case TSDB_DATA_TYPE_FLOAT: {
td = *(float *)thisVal; //td = *(float *)thisVal;
nd = *(float *)nextVal; //nd = *(float *)nextVal;
td = GET_FLOAT_VAL(thisVal);
nd = GET_FLOAT_VAL(nextVal);
break; break;
} }
case TSDB_DATA_TYPE_DOUBLE: { case TSDB_DATA_TYPE_DOUBLE: {
td = *(double *)thisVal; //td = *(double *)thisVal;
nd = *(double *)nextVal; td = GET_DOUBLE_VAL(thisVal);
//nd = *(double *)nextVal;
nd = GET_DOUBLE_VAL(nextVal);
break; break;
} }
case TSDB_DATA_TYPE_BIGINT: { case TSDB_DATA_TYPE_BIGINT: {
...@@ -1831,15 +1850,17 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction) ...@@ -1831,15 +1850,17 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
break; break;
}; };
case TSDB_DATA_TYPE_FLOAT: { case TSDB_DATA_TYPE_FLOAT: {
finalResult = *(float *)thisVal; //finalResult = *(float *)thisVal;
finalResult = GET_FLOAT_VAL(thisVal);
break; break;
} }
case TSDB_DATA_TYPE_DOUBLE: { case TSDB_DATA_TYPE_DOUBLE: {
finalResult = *(double *)thisVal; //finalResult = *(double *)thisVal;
finalResult = GET_DOUBLE_VAL(thisVal);
break; break;
} }
case TSDB_DATA_TYPE_BIGINT: { case TSDB_DATA_TYPE_BIGINT: {
finalResult = (double)*(int64_t *)thisVal; finalResult = (double)(*(int64_t *)thisVal);
break; break;
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册