/* * Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "meta.h" #define META_CACHE_BASE_BUCKET 1024 #define META_CACHE_STATS_BUCKET 16 // (uid , suid) : child table // (uid, 0) : normal table // (suid, suid) : super table typedef struct SMetaCacheEntry SMetaCacheEntry; struct SMetaCacheEntry { SMetaCacheEntry* next; SMetaInfo info; }; typedef struct SMetaStbStatsEntry { struct SMetaStbStatsEntry* next; SMetaStbStats info; } SMetaStbStatsEntry; typedef struct STagFilterResEntry { uint64_t suid; // uid for super table SList list; // the linked list of md5 digest, extracted from the serialized tag query condition uint32_t qTimes;// queried times for current super table } STagFilterResEntry; struct SMetaCache { // child, normal, super, table entry cache struct SEntryCache { int32_t nEntry; int32_t nBucket; SMetaCacheEntry** aBucket; } sEntryCache; // stable stats cache struct SStbStatsCache { int32_t nEntry; int32_t nBucket; SMetaStbStatsEntry** aBucket; } sStbStatsCache; // query cache struct STagFilterResCache { SHashObj* pTableEntry; SLRUCache* pUidResCache; uint64_t keyBuf[3]; } sTagFilterResCache; }; static void entryCacheClose(SMeta* pMeta) { if (pMeta->pCache) { // close entry cache for (int32_t iBucket = 0; iBucket < pMeta->pCache->sEntryCache.nBucket; iBucket++) { SMetaCacheEntry* pEntry = pMeta->pCache->sEntryCache.aBucket[iBucket]; while (pEntry) { SMetaCacheEntry* tEntry = pEntry->next; taosMemoryFree(pEntry); pEntry = tEntry; } } taosMemoryFree(pMeta->pCache->sEntryCache.aBucket); } } static void statsCacheClose(SMeta* pMeta) { if (pMeta->pCache) { // close entry cache for (int32_t iBucket = 0; iBucket < pMeta->pCache->sStbStatsCache.nBucket; iBucket++) { SMetaStbStatsEntry* pEntry = pMeta->pCache->sStbStatsCache.aBucket[iBucket]; while (pEntry) { SMetaStbStatsEntry* tEntry = pEntry->next; taosMemoryFree(pEntry); pEntry = tEntry; } } taosMemoryFree(pMeta->pCache->sStbStatsCache.aBucket); } } int32_t metaCacheOpen(SMeta* pMeta) { int32_t code = 0; SMetaCache* pCache = NULL; pCache = (SMetaCache*)taosMemoryMalloc(sizeof(SMetaCache)); if (pCache == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } // open entry cache pCache->sEntryCache.nEntry = 0; pCache->sEntryCache.nBucket = META_CACHE_BASE_BUCKET; pCache->sEntryCache.aBucket = (SMetaCacheEntry**)taosMemoryCalloc(pCache->sEntryCache.nBucket, sizeof(SMetaCacheEntry*)); if (pCache->sEntryCache.aBucket == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } // open stats cache pCache->sStbStatsCache.nEntry = 0; pCache->sStbStatsCache.nBucket = META_CACHE_STATS_BUCKET; pCache->sStbStatsCache.aBucket = (SMetaStbStatsEntry**)taosMemoryCalloc(pCache->sStbStatsCache.nBucket, sizeof(SMetaStbStatsEntry*)); if (pCache->sStbStatsCache.aBucket == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err2; } pCache->sTagFilterResCache.pUidResCache = taosLRUCacheInit(5*1024*1024, -1, 0.5); if (pCache->sTagFilterResCache.pUidResCache == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err2; } pCache->sTagFilterResCache.pTableEntry = taosHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), false, HASH_NO_LOCK); if (pCache->sTagFilterResCache.pTableEntry == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err2; } pMeta->pCache = pCache; return code; _err2: entryCacheClose(pMeta); _err: taosMemoryFree(pCache); metaError("vgId:%d, meta open cache failed since %s", TD_VID(pMeta->pVnode), tstrerror(code)); return code; } void metaCacheClose(SMeta* pMeta) { if (pMeta->pCache) { entryCacheClose(pMeta); statsCacheClose(pMeta); taosHashCleanup(pMeta->pCache->sTagFilterResCache.pTableEntry); taosLRUCacheCleanup(pMeta->pCache->sTagFilterResCache.pUidResCache); taosMemoryFree(pMeta->pCache->sTagFilterResCache.keyBuf); taosMemoryFree(pMeta->pCache); pMeta->pCache = NULL; } } static int32_t metaRehashCache(SMetaCache* pCache, int8_t expand) { int32_t code = 0; int32_t nBucket; if (expand) { nBucket = pCache->sEntryCache.nBucket * 2; } else { nBucket = pCache->sEntryCache.nBucket / 2; } SMetaCacheEntry** aBucket = (SMetaCacheEntry**)taosMemoryCalloc(nBucket, sizeof(SMetaCacheEntry*)); if (aBucket == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } // rehash for (int32_t iBucket = 0; iBucket < pCache->sEntryCache.nBucket; iBucket++) { SMetaCacheEntry* pEntry = pCache->sEntryCache.aBucket[iBucket]; while (pEntry) { SMetaCacheEntry* pTEntry = pEntry->next; pEntry->next = aBucket[TABS(pEntry->info.uid) % nBucket]; aBucket[TABS(pEntry->info.uid) % nBucket] = pEntry; pEntry = pTEntry; } } // final set taosMemoryFree(pCache->sEntryCache.aBucket); pCache->sEntryCache.nBucket = nBucket; pCache->sEntryCache.aBucket = aBucket; _exit: return code; } int32_t metaCacheUpsert(SMeta* pMeta, SMetaInfo* pInfo) { int32_t code = 0; // ASSERT(metaIsWLocked(pMeta)); // search SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(pInfo->uid) % pCache->sEntryCache.nBucket; SMetaCacheEntry** ppEntry = &pCache->sEntryCache.aBucket[iBucket]; while (*ppEntry && (*ppEntry)->info.uid != pInfo->uid) { ppEntry = &(*ppEntry)->next; } if (*ppEntry) { // update ASSERT(pInfo->suid == (*ppEntry)->info.suid); if (pInfo->version > (*ppEntry)->info.version) { (*ppEntry)->info.version = pInfo->version; (*ppEntry)->info.skmVer = pInfo->skmVer; } } else { // insert if (pCache->sEntryCache.nEntry >= pCache->sEntryCache.nBucket) { code = metaRehashCache(pCache, 1); if (code) goto _exit; iBucket = TABS(pInfo->uid) % pCache->sEntryCache.nBucket; } SMetaCacheEntry* pEntryNew = (SMetaCacheEntry*)taosMemoryMalloc(sizeof(*pEntryNew)); if (pEntryNew == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } pEntryNew->info = *pInfo; pEntryNew->next = pCache->sEntryCache.aBucket[iBucket]; pCache->sEntryCache.aBucket[iBucket] = pEntryNew; pCache->sEntryCache.nEntry++; } _exit: return code; } int32_t metaCacheDrop(SMeta* pMeta, int64_t uid) { int32_t code = 0; SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(uid) % pCache->sEntryCache.nBucket; SMetaCacheEntry** ppEntry = &pCache->sEntryCache.aBucket[iBucket]; while (*ppEntry && (*ppEntry)->info.uid != uid) { ppEntry = &(*ppEntry)->next; } SMetaCacheEntry* pEntry = *ppEntry; if (pEntry) { *ppEntry = pEntry->next; taosMemoryFree(pEntry); pCache->sEntryCache.nEntry--; if (pCache->sEntryCache.nEntry < pCache->sEntryCache.nBucket / 4 && pCache->sEntryCache.nBucket > META_CACHE_BASE_BUCKET) { code = metaRehashCache(pCache, 0); if (code) goto _exit; } } else { code = TSDB_CODE_NOT_FOUND; } _exit: return code; } int32_t metaCacheGet(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo) { int32_t code = 0; SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(uid) % pCache->sEntryCache.nBucket; SMetaCacheEntry* pEntry = pCache->sEntryCache.aBucket[iBucket]; while (pEntry && pEntry->info.uid != uid) { pEntry = pEntry->next; } if (pEntry) { *pInfo = pEntry->info; } else { code = TSDB_CODE_NOT_FOUND; } return code; } static int32_t metaRehashStatsCache(SMetaCache* pCache, int8_t expand) { int32_t code = 0; int32_t nBucket; if (expand) { nBucket = pCache->sStbStatsCache.nBucket * 2; } else { nBucket = pCache->sStbStatsCache.nBucket / 2; } SMetaStbStatsEntry** aBucket = (SMetaStbStatsEntry**)taosMemoryCalloc(nBucket, sizeof(SMetaStbStatsEntry*)); if (aBucket == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } // rehash for (int32_t iBucket = 0; iBucket < pCache->sStbStatsCache.nBucket; iBucket++) { SMetaStbStatsEntry* pEntry = pCache->sStbStatsCache.aBucket[iBucket]; while (pEntry) { SMetaStbStatsEntry* pTEntry = pEntry->next; pEntry->next = aBucket[TABS(pEntry->info.uid) % nBucket]; aBucket[TABS(pEntry->info.uid) % nBucket] = pEntry; pEntry = pTEntry; } } // final set taosMemoryFree(pCache->sStbStatsCache.aBucket); pCache->sStbStatsCache.nBucket = nBucket; pCache->sStbStatsCache.aBucket = aBucket; _exit: return code; } int32_t metaStatsCacheUpsert(SMeta* pMeta, SMetaStbStats* pInfo) { int32_t code = 0; // ASSERT(metaIsWLocked(pMeta)); // search SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(pInfo->uid) % pCache->sStbStatsCache.nBucket; SMetaStbStatsEntry** ppEntry = &pCache->sStbStatsCache.aBucket[iBucket]; while (*ppEntry && (*ppEntry)->info.uid != pInfo->uid) { ppEntry = &(*ppEntry)->next; } if (*ppEntry) { // update (*ppEntry)->info.ctbNum = pInfo->ctbNum; } else { // insert if (pCache->sStbStatsCache.nEntry >= pCache->sStbStatsCache.nBucket) { code = metaRehashStatsCache(pCache, 1); if (code) goto _exit; iBucket = TABS(pInfo->uid) % pCache->sStbStatsCache.nBucket; } SMetaStbStatsEntry* pEntryNew = (SMetaStbStatsEntry*)taosMemoryMalloc(sizeof(*pEntryNew)); if (pEntryNew == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } pEntryNew->info = *pInfo; pEntryNew->next = pCache->sStbStatsCache.aBucket[iBucket]; pCache->sStbStatsCache.aBucket[iBucket] = pEntryNew; pCache->sStbStatsCache.nEntry++; } _exit: return code; } int32_t metaStatsCacheDrop(SMeta* pMeta, int64_t uid) { int32_t code = 0; SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(uid) % pCache->sStbStatsCache.nBucket; SMetaStbStatsEntry** ppEntry = &pCache->sStbStatsCache.aBucket[iBucket]; while (*ppEntry && (*ppEntry)->info.uid != uid) { ppEntry = &(*ppEntry)->next; } SMetaStbStatsEntry* pEntry = *ppEntry; if (pEntry) { *ppEntry = pEntry->next; taosMemoryFree(pEntry); pCache->sStbStatsCache.nEntry--; if (pCache->sStbStatsCache.nEntry < pCache->sStbStatsCache.nBucket / 4 && pCache->sStbStatsCache.nBucket > META_CACHE_STATS_BUCKET) { code = metaRehashStatsCache(pCache, 0); if (code) goto _exit; } } else { code = TSDB_CODE_NOT_FOUND; } _exit: return code; } int32_t metaStatsCacheGet(SMeta* pMeta, int64_t uid, SMetaStbStats* pInfo) { int32_t code = TSDB_CODE_SUCCESS; SMetaCache* pCache = pMeta->pCache; int32_t iBucket = TABS(uid) % pCache->sStbStatsCache.nBucket; SMetaStbStatsEntry* pEntry = pCache->sStbStatsCache.aBucket[iBucket]; while (pEntry && pEntry->info.uid != uid) { pEntry = pEntry->next; } if (pEntry) { *pInfo = pEntry->info; } else { code = TSDB_CODE_NOT_FOUND; } return code; } int32_t metaGetCachedTableUidList(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray* pList1, bool* acquireRes) { uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; // generate the composed key for LRU cache SLRUCache* pCache = pMeta->pCache->sTagFilterResCache.pUidResCache; pBuf[0] = suid; memcpy(&pBuf[1], pKey, keyLen); int32_t len = keyLen + sizeof(uint64_t); LRUHandle *pHandle = taosLRUCacheLookup(pCache, pBuf, len); if (pHandle == NULL) { *acquireRes = 0; return TSDB_CODE_SUCCESS; } else { // do some book mark work after acquiring the filter result from cache STagFilterResEntry* pEntry = taosHashGet(pMeta->pCache->sTagFilterResCache.pTableEntry, &suid, sizeof(uint64_t)); ASSERT(pEntry != NULL); *acquireRes = 1; const char* p = taosLRUCacheValue(pMeta->pCache->sTagFilterResCache.pUidResCache, pHandle); int32_t size = *(int32_t*) p; taosArrayAddBatch(pList1, p + sizeof(int32_t), size); pEntry->qTimes += 1; // check if scanning all items are necessary or not if (pEntry->qTimes >= 5000 && TD_DLIST_NELES(&pEntry->list) > 10) { SArray* pList = taosArrayInit(64, POINTER_BYTES); SListIter iter = {0}; tdListInitIter(&pEntry->list, &iter, TD_LIST_FORWARD); SListNode* pNode = NULL; while ((pNode = tdListNext(&iter)) != NULL) { memcpy(pBuf + sizeof(suid), pNode->data, keyLen); // check whether it is existed in LRU cache, and remove it from linked list if not. LRUHandle* pRes = taosLRUCacheLookup(pCache, pBuf, len); if (pRes == NULL) { // remove the item in the linked list taosArrayPush(pList, &pNode); } else { taosLRUCacheRelease(pCache, pRes, false); } } // remove the keys, of which query uid lists have been replaced already. size_t s = taosArrayGetSize(pList); for(int32_t i = 0; i < s; ++i) { SListNode** p1 = taosArrayGet(pList, i); tdListPopNode(&pEntry->list, *p1); } pEntry->qTimes = 0; // reset the query times } } return TSDB_CODE_SUCCESS; } // check both the payload size and selectivity ratio int32_t metaUidFilterCachePut(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t payloadLen, double selectivityRatio) { if (selectivityRatio > tsSelectivityRatio) { return TSDB_CODE_SUCCESS; } if (payloadLen > tsTagFilterResCacheSize) { return TSDB_CODE_SUCCESS; } SLRUCache* pCache = pMeta->pCache->sTagFilterResCache.pUidResCache; SHashObj* pTableEntry = pMeta->pCache->sTagFilterResCache.pTableEntry; void* pEntry = taosHashGet(pMeta->pCache->sTagFilterResCache.pTableEntry, &suid, sizeof(uint64_t)); if (pEntry == NULL) { STagFilterResEntry* p = taosMemoryMalloc(sizeof(STagFilterResEntry)); p->qTimes = 0; tdListInit(&p->list, keyLen); taosHashPut(pTableEntry, &suid, sizeof(uint64_t), pEntry, POINTER_BYTES); pEntry = &p; } tdListAppend(&(*(STagFilterResEntry**)pEntry)->list, pKey); uint64_t* pBuf = pMeta->pCache->sTagFilterResCache.keyBuf; pBuf[0] = suid; memcpy(&pBuf[1], pKey, keyLen); ASSERT(sizeof(uint64_t) + keyLen == 24); // add to cache. taosLRUCacheInsert(pCache, pBuf, sizeof(uint64_t) + keyLen, pPayload, payloadLen, NULL, NULL, TAOS_LRU_PRIORITY_LOW); return TSDB_CODE_SUCCESS; } // remove the lru cache that are expired due to the tags value update, or creating, or dropping, of child tables int32_t metaUidCacheClear(SMeta* pMeta, uint64_t suid) { STagFilterResEntry* pEntry = taosHashGet(pMeta->pCache->sTagFilterResCache.pTableEntry, &suid, sizeof(uint64_t)); if (pEntry == NULL) { return TSDB_CODE_SUCCESS; } int32_t keyLen = sizeof(uint64_t) + 128; char* p = taosMemoryMalloc(keyLen); *(uint64_t*)p = pEntry->suid; SListIter iter = {0}; tdListInitIter(&pEntry->list, &iter, TD_LIST_FORWARD); SListNode* pNode = NULL; while ((pNode = tdListNext(&iter)) != NULL) { memcpy(p + sizeof(suid), pNode->data, 128); taosLRUCacheErase(pMeta->pCache->sTagFilterResCache.pUidResCache, p, keyLen); } pEntry->qTimes = 0; tdListEmpty(&pEntry->list); return TSDB_CODE_SUCCESS; }