未验证 提交 e95b430e 编写于 作者: W wade zhang 提交者: GitHub

Merge pull request #21786 from taosdata/FIX/TD-24817-3.0

enh: make alter-hashrange atomic and idempotent
...@@ -46,6 +46,7 @@ typedef struct { ...@@ -46,6 +46,7 @@ typedef struct {
int32_t vgId; int32_t vgId;
int32_t vgVersion; int32_t vgVersion;
int8_t dropped; int8_t dropped;
int32_t toVgId;
char path[PATH_MAX + 20]; char path[PATH_MAX + 20];
} SWrapperCfg; } SWrapperCfg;
...@@ -55,6 +56,7 @@ typedef struct { ...@@ -55,6 +56,7 @@ typedef struct {
int32_t refCount; int32_t refCount;
int8_t dropped; int8_t dropped;
int8_t disable; int8_t disable;
int32_t toVgId;
char *path; char *path;
SVnode *pImpl; SVnode *pImpl;
SMultiWorker pWriteW; SMultiWorker pWriteW;
...@@ -70,6 +72,7 @@ typedef struct { ...@@ -70,6 +72,7 @@ typedef struct {
int32_t vnodeNum; int32_t vnodeNum;
int32_t opened; int32_t opened;
int32_t failed; int32_t failed;
bool updateVnodesList;
int32_t threadIndex; int32_t threadIndex;
TdThread thread; TdThread thread;
SVnodeMgmt *pMgmt; SVnodeMgmt *pMgmt;
......
...@@ -71,6 +71,8 @@ static int32_t vmDecodeVnodeList(SJson *pJson, SVnodeMgmt *pMgmt, SWrapperCfg ** ...@@ -71,6 +71,8 @@ static int32_t vmDecodeVnodeList(SJson *pJson, SVnodeMgmt *pMgmt, SWrapperCfg **
if (code < 0) goto _OVER; if (code < 0) goto _OVER;
tjsonGetInt32ValueFromDouble(vnode, "vgVersion", pCfg->vgVersion, code); tjsonGetInt32ValueFromDouble(vnode, "vgVersion", pCfg->vgVersion, code);
if (code < 0) goto _OVER; if (code < 0) goto _OVER;
tjsonGetInt32ValueFromDouble(vnode, "toVgId", pCfg->toVgId, code);
if (code < 0) goto _OVER;
snprintf(pCfg->path, sizeof(pCfg->path), "%s%svnode%d", pMgmt->path, TD_DIRSEP, pCfg->vgId); snprintf(pCfg->path, sizeof(pCfg->path), "%s%svnode%d", pMgmt->path, TD_DIRSEP, pCfg->vgId);
} }
...@@ -165,6 +167,7 @@ static int32_t vmEncodeVnodeList(SJson *pJson, SVnodeObj **ppVnodes, int32_t num ...@@ -165,6 +167,7 @@ static int32_t vmEncodeVnodeList(SJson *pJson, SVnodeObj **ppVnodes, int32_t num
if (tjsonAddDoubleToObject(vnode, "vgId", pVnode->vgId) < 0) return -1; if (tjsonAddDoubleToObject(vnode, "vgId", pVnode->vgId) < 0) return -1;
if (tjsonAddDoubleToObject(vnode, "dropped", pVnode->dropped) < 0) return -1; if (tjsonAddDoubleToObject(vnode, "dropped", pVnode->dropped) < 0) return -1;
if (tjsonAddDoubleToObject(vnode, "vgVersion", pVnode->vgVersion) < 0) return -1; if (tjsonAddDoubleToObject(vnode, "vgVersion", pVnode->vgVersion) < 0) return -1;
if (pVnode->toVgId && tjsonAddDoubleToObject(vnode, "toVgId", pVnode->toVgId) < 0) return -1;
if (tjsonAddItemToArray(vnodes, vnode) < 0) return -1; if (tjsonAddItemToArray(vnodes, vnode) < 0) return -1;
} }
...@@ -179,7 +182,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { ...@@ -179,7 +182,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) {
SVnodeObj **ppVnodes = NULL; SVnodeObj **ppVnodes = NULL;
char file[PATH_MAX] = {0}; char file[PATH_MAX] = {0};
char realfile[PATH_MAX] = {0}; char realfile[PATH_MAX] = {0};
snprintf(file, sizeof(file), "%s%svnodes.json.bak", pMgmt->path, TD_DIRSEP); snprintf(file, sizeof(file), "%s%svnodes_tmp.json", pMgmt->path, TD_DIRSEP);
snprintf(realfile, sizeof(realfile), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%svnodes.json", pMgmt->path, TD_DIRSEP);
int32_t numOfVnodes = 0; int32_t numOfVnodes = 0;
...@@ -226,4 +229,4 @@ _OVER: ...@@ -226,4 +229,4 @@ _OVER:
dError("failed to write vnodes file:%s since %s, vnodes:%d", realfile, terrstr(), numOfVnodes); dError("failed to write vnodes file:%s since %s, vnodes:%d", realfile, terrstr(), numOfVnodes);
} }
return code; return code;
} }
\ No newline at end of file
...@@ -484,10 +484,18 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -484,10 +484,18 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
int32_t srcVgId = req.srcVgId; int32_t srcVgId = req.srcVgId;
int32_t dstVgId = req.dstVgId; int32_t dstVgId = req.dstVgId;
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, dstVgId);
if (pVnode != NULL) {
dError("vgId:%d, vnode already exist", dstVgId);
vmReleaseVnode(pMgmt, pVnode);
terrno = TSDB_CODE_VND_ALREADY_EXIST;
return -1;
}
dInfo("vgId:%d, start to alter vnode hashrange:[%u, %u], dstVgId:%d", req.srcVgId, req.hashBegin, req.hashEnd, dInfo("vgId:%d, start to alter vnode hashrange:[%u, %u], dstVgId:%d", req.srcVgId, req.hashBegin, req.hashEnd,
req.dstVgId); req.dstVgId);
pVnode = vmAcquireVnode(pMgmt, srcVgId);
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, srcVgId);
if (pVnode == NULL) { if (pVnode == NULL) {
dError("vgId:%d, failed to alter hashrange since %s", srcVgId, terrstr()); dError("vgId:%d, failed to alter hashrange since %s", srcVgId, terrstr());
terrno = TSDB_CODE_VND_NOT_EXIST; terrno = TSDB_CODE_VND_NOT_EXIST;
...@@ -501,6 +509,13 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -501,6 +509,13 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
}; };
tstrncpy(wrapperCfg.path, pVnode->path, sizeof(wrapperCfg.path)); tstrncpy(wrapperCfg.path, pVnode->path, sizeof(wrapperCfg.path));
// prepare alter
pVnode->toVgId = dstVgId;
if (vmWriteVnodeListToFile(pMgmt) != 0) {
dError("vgId:%d, failed to write vnode list since %s", dstVgId, terrstr());
return -1;
}
dInfo("vgId:%d, close vnode", srcVgId); dInfo("vgId:%d, close vnode", srcVgId);
vmCloseVnode(pMgmt, pVnode, true); vmCloseVnode(pMgmt, pVnode, true);
...@@ -532,6 +547,7 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { ...@@ -532,6 +547,7 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
return -1; return -1;
} }
// complete alter
if (vmWriteVnodeListToFile(pMgmt) != 0) { if (vmWriteVnodeListToFile(pMgmt) != 0) {
dError("vgId:%d, failed to write vnode list since %s", dstVgId, terrstr()); dError("vgId:%d, failed to write vnode list since %s", dstVgId, terrstr());
return -1; return -1;
......
...@@ -158,6 +158,28 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal) ...@@ -158,6 +158,28 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
taosMemoryFree(pVnode); taosMemoryFree(pVnode);
} }
static int32_t vmRestoreVgroupId(SWrapperCfg *pCfg, STfs *pTfs) {
int32_t srcVgId = pCfg->vgId;
int32_t dstVgId = pCfg->toVgId;
if (dstVgId == 0) return 0;
char srcPath[TSDB_FILENAME_LEN];
char dstPath[TSDB_FILENAME_LEN];
snprintf(srcPath, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, srcVgId);
snprintf(dstPath, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, dstVgId);
int32_t vgId = vnodeRestoreVgroupId(srcPath, dstPath, srcVgId, dstVgId, pTfs);
if (vgId <= 0) {
dError("vgId:%d, failed to restore vgroup id. srcPath: %s", pCfg->vgId, srcPath);
return -1;
}
pCfg->vgId = vgId;
pCfg->toVgId = 0;
return 0;
}
static void *vmOpenVnodeInThread(void *param) { static void *vmOpenVnodeInThread(void *param) {
SVnodeThread *pThread = param; SVnodeThread *pThread = param;
SVnodeMgmt *pMgmt = pThread->pMgmt; SVnodeMgmt *pMgmt = pThread->pMgmt;
...@@ -174,17 +196,33 @@ static void *vmOpenVnodeInThread(void *param) { ...@@ -174,17 +196,33 @@ static void *vmOpenVnodeInThread(void *param) {
pMgmt->state.openVnodes, pMgmt->state.totalVnodes); pMgmt->state.openVnodes, pMgmt->state.totalVnodes);
tmsgReportStartup("vnode-open", stepDesc); tmsgReportStartup("vnode-open", stepDesc);
if (pCfg->toVgId) {
if (vmRestoreVgroupId(pCfg, pMgmt->pTfs) != 0) {
dError("vgId:%d, failed to restore vgroup id by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->failed++;
continue;
}
pThread->updateVnodesList = true;
}
snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pCfg->vgId); snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pCfg->vgId);
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb); SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
if (pImpl == NULL) { if (pImpl == NULL) {
dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex); dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->failed++; pThread->failed++;
} else { continue;
vmOpenVnode(pMgmt, pCfg, pImpl);
dInfo("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->opened++;
atomic_add_fetch_32(&pMgmt->state.openVnodes, 1);
} }
if (vmOpenVnode(pMgmt, pCfg, pImpl) != 0) {
dError("vgId:%d, failed to open vnode by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->failed++;
continue;
}
dInfo("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex);
pThread->opened++;
atomic_add_fetch_32(&pMgmt->state.openVnodes, 1);
} }
dInfo("thread:%d, numOfVnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, dInfo("thread:%d, numOfVnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened,
...@@ -242,6 +280,8 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) { ...@@ -242,6 +280,8 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) {
taosThreadAttrDestroy(&thAttr); taosThreadAttrDestroy(&thAttr);
} }
bool updateVnodesList = false;
for (int32_t t = 0; t < threadNum; ++t) { for (int32_t t = 0; t < threadNum; ++t) {
SVnodeThread *pThread = &threads[t]; SVnodeThread *pThread = &threads[t];
if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) { if (pThread->vnodeNum > 0 && taosCheckPthreadValid(pThread->thread)) {
...@@ -249,6 +289,7 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) { ...@@ -249,6 +289,7 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) {
taosThreadClear(&pThread->thread); taosThreadClear(&pThread->thread);
} }
taosMemoryFree(pThread->pCfgs); taosMemoryFree(pThread->pCfgs);
if (pThread->updateVnodesList) updateVnodesList = true;
} }
taosMemoryFree(threads); taosMemoryFree(threads);
taosMemoryFree(pCfgs); taosMemoryFree(pCfgs);
...@@ -256,10 +297,15 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) { ...@@ -256,10 +297,15 @@ static int32_t vmOpenVnodes(SVnodeMgmt *pMgmt) {
if (pMgmt->state.openVnodes != pMgmt->state.totalVnodes) { if (pMgmt->state.openVnodes != pMgmt->state.totalVnodes) {
dError("there are total vnodes:%d, opened:%d", pMgmt->state.totalVnodes, pMgmt->state.openVnodes); dError("there are total vnodes:%d, opened:%d", pMgmt->state.totalVnodes, pMgmt->state.openVnodes);
return -1; return -1;
} else {
dInfo("successfully opened %d vnodes", pMgmt->state.totalVnodes);
return 0;
} }
if (updateVnodesList && vmWriteVnodeListToFile(pMgmt) != 0) {
dError("failed to write vnode list since %s", terrstr());
return -1;
}
dInfo("successfully opened %d vnodes", pMgmt->state.totalVnodes);
return 0;
} }
static void *vmCloseVnodeInThread(void *param) { static void *vmCloseVnodeInThread(void *param) {
......
...@@ -1217,6 +1217,7 @@ static int32_t mndAddAlterVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, i ...@@ -1217,6 +1217,7 @@ static int32_t mndAddAlterVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, i
action.pCont = pReq; action.pCont = pReq;
action.contLen = contLen; action.contLen = contLen;
action.msgType = TDMT_VND_ALTER_HASHRANGE; action.msgType = TDMT_VND_ALTER_HASHRANGE;
action.acceptableCode = TSDB_CODE_VND_ALREADY_EXIST;
if (mndTransAppendRedoAction(pTrans, &action) != 0) { if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(pReq); taosMemoryFree(pReq);
......
...@@ -54,6 +54,7 @@ void vnodeCleanup(); ...@@ -54,6 +54,7 @@ void vnodeCleanup();
int32_t vnodeCreate(const char *path, SVnodeCfg *pCfg, STfs *pTfs); int32_t vnodeCreate(const char *path, SVnodeCfg *pCfg, STfs *pTfs);
int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *pTfs); int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *pTfs);
int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnodeHashRangeReq *pReq, STfs *pTfs); int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnodeHashRangeReq *pReq, STfs *pTfs);
int32_t vnodeRestoreVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, STfs *pTfs);
void vnodeDestroy(const char *path, STfs *pTfs); void vnodeDestroy(const char *path, STfs *pTfs);
SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb); SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb);
void vnodePreClose(SVnode *pVnode); void vnodePreClose(SVnode *pVnode);
......
...@@ -136,14 +136,13 @@ static int32_t vnodeVgroupIdLen(int32_t vgId) { ...@@ -136,14 +136,13 @@ static int32_t vnodeVgroupIdLen(int32_t vgId) {
} }
int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, STfs *pTfs) { int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, STfs *pTfs) {
int32_t ret = tfsRename(pTfs, srcPath, dstPath); int32_t ret = 0;
if (ret != 0) return ret;
char oldRname[TSDB_FILENAME_LEN] = {0}; char oldRname[TSDB_FILENAME_LEN] = {0};
char newRname[TSDB_FILENAME_LEN] = {0}; char newRname[TSDB_FILENAME_LEN] = {0};
char tsdbPath[TSDB_FILENAME_LEN] = {0}; char tsdbPath[TSDB_FILENAME_LEN] = {0};
char tsdbFilePrefix[TSDB_FILENAME_LEN] = {0}; char tsdbFilePrefix[TSDB_FILENAME_LEN] = {0};
snprintf(tsdbPath, TSDB_FILENAME_LEN, "%s%stsdb", dstPath, TD_DIRSEP); snprintf(tsdbPath, TSDB_FILENAME_LEN, "%s%stsdb", srcPath, TD_DIRSEP);
snprintf(tsdbFilePrefix, TSDB_FILENAME_LEN, "tsdb%sv", TD_DIRSEP); snprintf(tsdbFilePrefix, TSDB_FILENAME_LEN, "tsdb%sv", TD_DIRSEP);
STfsDir *tsdbDir = tfsOpendir(pTfs, tsdbPath); STfsDir *tsdbDir = tfsOpendir(pTfs, tsdbPath);
...@@ -168,7 +167,7 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr ...@@ -168,7 +167,7 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr
ret = tfsRename(pTfs, tsdbFile->rname, newRname); ret = tfsRename(pTfs, tsdbFile->rname, newRname);
if (ret != 0) { if (ret != 0) {
vInfo("vgId:%d, failed to rename file from %s to %s since %s", dstVgId, tsdbFile->rname, newRname, terrstr()); vError("vgId:%d, failed to rename file from %s to %s since %s", dstVgId, tsdbFile->rname, newRname, terrstr());
tfsClosedir(tsdbDir); tfsClosedir(tsdbDir);
return ret; return ret;
} }
...@@ -176,6 +175,21 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr ...@@ -176,6 +175,21 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr
} }
tfsClosedir(tsdbDir); tfsClosedir(tsdbDir);
vInfo("vgId:%d, rename dir from %s to %s", dstVgId, srcPath, dstPath);
ret = tfsRename(pTfs, srcPath, dstPath);
if (ret != 0) {
vError("vgId:%d, failed to rename dir from %s to %s since %s", dstVgId, srcPath, dstPath, terrstr());
}
return ret;
}
int32_t vnodeGetAbsDir(const char *relPath, STfs *pTfs, char *buf, size_t bufLen) {
if (pTfs) {
snprintf(buf, bufLen, "%s%s%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, relPath);
} else {
snprintf(buf, bufLen, "%s", relPath);
}
return 0; return 0;
} }
...@@ -184,13 +198,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod ...@@ -184,13 +198,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod
char dir[TSDB_FILENAME_LEN] = {0}; char dir[TSDB_FILENAME_LEN] = {0};
int32_t ret = 0; int32_t ret = 0;
if (pTfs) { vnodeGetAbsDir(srcPath, pTfs, dir, TSDB_FILENAME_LEN);
snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pTfs), TD_DIRSEP, srcPath);
} else {
snprintf(dir, TSDB_FILENAME_LEN, "%s", srcPath);
}
// todo add stat file to handle exception while vnode open
ret = vnodeLoadInfo(dir, &info); ret = vnodeLoadInfo(dir, &info);
if (ret < 0) { if (ret < 0) {
...@@ -245,6 +253,42 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod ...@@ -245,6 +253,42 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod
return 0; return 0;
} }
int32_t vnodeRestoreVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, STfs *pTfs) {
SVnodeInfo info = {0};
char dir[TSDB_FILENAME_LEN] = {0};
vnodeGetAbsDir(dstPath, pTfs, dir, TSDB_FILENAME_LEN);
if (vnodeLoadInfo(dir, &info) == 0) {
if (info.config.vgId != dstVgId) {
vError("vgId:%d, unexpected vnode config.vgId:%d", dstVgId, info.config.vgId);
return -1;
}
return dstVgId;
}
vnodeGetAbsDir(srcPath, pTfs, dir, TSDB_FILENAME_LEN);
if (vnodeLoadInfo(dir, &info) < 0) {
vError("vgId:%d, failed to read vnode config from %s since %s", srcVgId, srcPath, tstrerror(terrno));
return -1;
}
if (info.config.vgId == srcVgId) {
vInfo("vgId:%d, rollback alter hashrange", srcVgId);
return srcVgId;
} else if (info.config.vgId != dstVgId) {
vError("vgId:%d, unexpected vnode config.vgId:%d", dstVgId, info.config.vgId);
return -1;
}
vInfo("vgId:%d, rename %s to %s", dstVgId, srcPath, dstPath);
if (vnodeRenameVgroupId(srcPath, dstPath, srcVgId, dstVgId, pTfs) < 0) {
vError("vgId:%d, failed to rename vnode from %s to %s since %s", dstVgId, srcPath, dstPath, tstrerror(terrno));
return -1;
}
return dstVgId;
}
void vnodeDestroy(const char *path, STfs *pTfs) { void vnodeDestroy(const char *path, STfs *pTfs) {
vInfo("path:%s is removed while destroy vnode", path); vInfo("path:%s is removed while destroy vnode", path);
tfsRmdir(pTfs, path); tfsRmdir(pTfs, path);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册