/*------------------------------------------------------------------------- * * gddfuncs.c * Global DeadLock Detector - Helper Functions * * * Copyright (c) 2018-Present Pivotal Software, Inc. * * *------------------------------------------------------------------------- */ #include "postgres.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbgang.h" #include "cdb/cdbvars.h" #include "funcapi.h" #include "libpq-fe.h" #include "storage/lock.h" #include "storage/proc.h" #include "utils/builtins.h" typedef struct GddWaitStatusCtx GddWaitStatusCtx; struct GddWaitStatusCtx { LockData *lockData; CdbPgResults cdb_pgresults; int waiter; int holder; int seg; int row; }; static bool isGranted(LockInstanceData *lock); static bool lockEqual(LockInstanceData *lock1, LockInstanceData *lock2); static bool lockIsHoldTillEndXact(LockInstanceData *lock); static bool isGranted(LockInstanceData *lock) { return lock->holdMask != 0; } static bool lockEqual(LockInstanceData *lock1, LockInstanceData *lock2) { LOCKTAG *tag1 = &lock1->locktag; LOCKTAG *tag2 = &lock2->locktag; return memcmp(tag1, tag2, sizeof(LOCKTAG)) == 0; } static bool lockIsHoldTillEndXact(LockInstanceData *lock) { LOCKTAG *tag = &lock->locktag; if (lock->holdTillEndXact) return true; if (tag->locktag_type == LOCKTAG_TRANSACTION) return true; /* FIXME: other types */ return false; } /* * pg_dist_wait_status - produce a view with one row per waiting relation */ Datum pg_dist_wait_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; HeapTuple tuple; Datum result; Datum values[4]; bool nulls[4]; GddWaitStatusCtx *ctx; MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ tupdesc = CreateTemplateTupleDesc(4, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "waiter_dxid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "holder_dxid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "holdTillEndXact", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); ctx = palloc(sizeof(*ctx)); funcctx->user_fctx = ctx; ctx->cdb_pgresults.pg_results = NULL; ctx->cdb_pgresults.numResults = 0; ctx->waiter = 0; ctx->holder = 0; ctx->seg = 0; ctx->row = 0; /* * We need to collect the wait status from all the segments, * so a dispatch is needed on QD. */ if (Gp_role == GP_ROLE_DISPATCH) { int i; CdbDispatchCommand("SELECT * FROM pg_catalog.pg_dist_wait_status()", DF_NONE, &ctx->cdb_pgresults); if (ctx->cdb_pgresults.numResults == 0) elog(ERROR, "pg_dist_wait_status() didn't get back any data from the segDBs"); for (i = 0; i < ctx->cdb_pgresults.numResults; i++) { /* * Any error here should have propagated into errbuf, * so we shouldn't ever see anything other that tuples_ok here. * But, check to be sure. */ if (PQresultStatus(ctx->cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK) { cdbdisp_clearCdbPgResults(&ctx->cdb_pgresults); elog(ERROR,"pg_dist_wait_status(): resultStatus not tuples_Ok"); } /* * This query better match the tupledesc we just made above. */ if (PQnfields(ctx->cdb_pgresults.pg_results[i]) != tupdesc->natts) elog(ERROR, "unexpected number of columns returned from pg_lock_status() on segment (%d, expected %d)", PQnfields(ctx->cdb_pgresults.pg_results[i]), tupdesc->natts); } } /* * QD information must be collected after the dispatch. * Thus the wait relations on QD returned by this function * guarantee to be more later than the wait relations on QEs. * And this information can help while reducing edges on QD. * * Rule: When we are to reduce a dotted edge on QD, we have to think * more. If the lock-holding vertex(transaction) of this QD-edge, * is blocked on any QE. We should not reduce the edge at least * now. * * If any QEs wait relation is later than QD's, we cannot use the * above rule. */ ctx->lockData = GetLockStatusData(); MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); ctx = funcctx->user_fctx; /* * Find out all the waiting relations * * A relation is that: * (waiter.granted == false && * holder.granted == true && * waiter.waitLockMode conflict with holder.holdMask && * waiter.pid != holder.pid && * waiter.locktype == holder.locktype && * waiter.locktags == holder.locktags) * * The actual logic here is to perform a nested loop like this: * * for (waiter = 0; waiter < nelements; waiter++) * for (holder = 0; holder < nelements; holder++) * if (is_a_relation(waiter, holder)) * record the relation; * * But as we can only return one relation per call, we need to * simulate the loops manually. * * One transaction can wait for more than one transaction on the same * segment. For example, Tx A, B, and C all open a relation with shared * mode, and Tx D try to open the same relation with exclusive mode, D * will be blocked. There are 3 waiting edges(D-->A, D--->B, D--->C). */ while (ctx->waiter < ctx->lockData->nelements) { int waiter = ctx->waiter; LockInstanceData *w_lock = &ctx->lockData->locks[waiter]; /* A waiter should have granted == false */ if (isGranted(w_lock)) { ctx->waiter++; ctx->holder = 0; continue; } while (ctx->holder < ctx->lockData->nelements) { TransactionId w_dxid; TransactionId h_dxid; int holder = ctx->holder++; LockInstanceData *h_lock = &ctx->lockData->locks[holder]; if (holder == waiter) continue; /* A holder should have granted == true */ if (!isGranted(h_lock)) continue; if (w_lock->pid == h_lock->pid) continue; /* If waiter and holder have different lock methods, they must not be conflict */ if (w_lock->locktag.locktag_lockmethodid != h_lock->locktag.locktag_lockmethodid) continue; /* If waiter and holder are not conflict, should skip this edge */ if (!CheckWaitLockModeConflictHoldMask(w_lock->locktag, w_lock->waitLockMode, h_lock->holdMask)) continue; if (!lockEqual(w_lock, h_lock)) continue; /* A valid waiting relation is found */ /* Find out dxid differently on QD and QE */ w_dxid = w_lock->distribXid; h_dxid = h_lock->distribXid; values[0] = Int32GetDatum(GpIdentity.segindex); values[1] = TransactionIdGetDatum(w_dxid); values[2] = TransactionIdGetDatum(h_dxid); values[3] = BoolGetDatum(lockIsHoldTillEndXact(w_lock)); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } ctx->waiter++; ctx->holder = 0; } if (Gp_role == GP_ROLE_DISPATCH) { while (ctx->seg < ctx->cdb_pgresults.numResults) { int seg = ctx->seg; struct pg_result *pg_result = ctx->cdb_pgresults.pg_results[seg]; while (ctx->row < PQntuples(pg_result)) { int row = ctx->row++; values[0] = Int32GetDatum(atoi(PQgetvalue(pg_result, row, 0))); values[1] = TransactionIdGetDatum(atoi(PQgetvalue(pg_result, row, 1))); values[2] = TransactionIdGetDatum(atoi(PQgetvalue(pg_result, row, 2))); values[3] = BoolGetDatum(strncmp(PQgetvalue(pg_result, row, 3), "t", 1) == 0); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } ctx->seg++; ctx->row = 0; } } cdbdisp_clearCdbPgResults(&ctx->cdb_pgresults); if (Gp_role == GP_ROLE_DISPATCH) DisconnectAndDestroyAllGangs(false); SRF_RETURN_DONE(funcctx); }