Enable Sort operator to print sort method on explain analyze.(#1753)

This commit also resolves #1646.
Porting only the function `tuplesort_get_stats` in tuplesort.c from the
Postgres commit
https://github.com/postgres/postgres/blob/9bd27b7c9e998087f390774bd0f43916813a2847/src/backend/utils/sort/tuplesort.c
Link to gpdb-dev discussion - https://groups.google.com/a/greenplum.org/forum/#!topic/gpdb-dev/V-zIshnNzyE
上级 e4217b8d
......@@ -28,6 +28,53 @@
#include "utils/vmem_tracker.h"
#include "parser/parsetree.h"
#define NUM_SORT_METHOD 5
#define TOP_N_HEAP_SORT_STR "top-N heapsort"
#define QUICK_SORT_STR "quicksort"
#define EXTERNAL_SORT_STR "external sort"
#define EXTERNAL_MERGE_STR "external merge"
#define IN_PROGRESS_SORT_STR "sort still in progress"
#define NUM_SORT_SPACE_TYPE 2
#define MEMORY_STR_SORT_SPACE_TYPE "Memory"
#define DISK_STR_SORT_SPACE_TYPE "Disk"
/*
* Different sort method in GPDB.
*
* Make sure to update NUM_SORT_METHOD when this enum changes.
* This enum value is used an index in the array sortSpaceUsed
* in struct CdbExplain_NodeSummary.
*/
typedef enum
{
UNINITALIZED_SORT = 0,
TOP_N_HEAP_SORT = 1,
QUICK_SORT = 2,
EXTERNAL_SORT = 3,
EXTERNAL_MERGE = 4,
IN_PROGRESS_SORT = 5
} ExplainSortMethod;
typedef enum
{
UNINITIALIZED_SORT_SPACE_TYPE = 0,
MEMORY_SORT_SPACE_TYPE = 1,
DISK_SORT_SPACE_TYPE = 2
} ExplainSortSpaceType;
/*
* Convert the above enum `ExplainSortMethod` to printable string for
* Explain Analyze.
* Note : No conversion available for `UNINITALIZED_SORT`. Caller has to index
* this array by subtracting 1 from origin enum value.
*
* E.g. sort_method_enum_str[TOP_N_HEAP_SORT-1]
*/
const char *sort_method_enum_str[] = {TOP_N_HEAP_SORT_STR, QUICK_SORT_STR, EXTERNAL_SORT_STR, EXTERNAL_MERGE_STR, IN_PROGRESS_SORT_STR};
/* EXPLAIN ANALYZE statistics for one plan node of a slice */
typedef struct CdbExplain_StatInst
{
......@@ -47,6 +94,9 @@ typedef struct CdbExplain_StatInst
instr_time firststart; /* Start time of first iteration of node */
double peakMemBalance; /* Max mem account balance */
int numPartScanned; /* Number of part tables scanned */
ExplainSortMethod sortMethod; /* Type of sort */
ExplainSortSpaceType sortSpaceType; /* Sort space type */
long sortSpaceUsed; /* Memory / Disk used by sort(KBytes) */
int bnotes; /* Offset to beginning of node's extra text */
int enotes; /* Offset to end of node's extra text */
} CdbExplain_StatInst;
......@@ -114,6 +164,8 @@ typedef struct CdbExplain_NodeSummary
CdbExplain_Agg peakMemBalance;
/* Used for DynamicTableScan, DynamicIndexScan and DynamicBitmapTableScan */
CdbExplain_Agg totalPartTableScanned;
/* Summary of space used by sort */
CdbExplain_Agg sortSpaceUsed[NUM_SORT_SPACE_TYPE][NUM_SORT_METHOD];
/* insts array info */
int segindex0; /* segment id of insts[0] */
......@@ -276,6 +328,35 @@ static int
static int
cdbexplain_countLeafPartTables(PlanState *planstate);
/*
* Convert the sort method in string to corresponding
* enum ExplainSortMethod.
*
* If you change please update tuplesort_get_stats / tuplesort_get_stats_mk
* in tuplesort.c / tuplesort_mk.c
*/
static ExplainSortMethod
String2ExplainSortMethod(const char* sortMethod) {
if (sortMethod == NULL) {
return UNINITALIZED_SORT;
}
else if (strcmp(TOP_N_HEAP_SORT_STR, sortMethod) == 0) {
return TOP_N_HEAP_SORT;
}
else if (strcmp(QUICK_SORT_STR, sortMethod) == 0) {
return QUICK_SORT;
}
else if (strcmp(EXTERNAL_SORT_STR, sortMethod) == 0) {
return EXTERNAL_SORT;
}
else if (strcmp(EXTERNAL_MERGE_STR, sortMethod) == 0) {
return EXTERNAL_MERGE;
}
else if (strcmp(IN_PROGRESS_SORT_STR, sortMethod) == 0) {
return IN_PROGRESS_SORT;
}
return UNINITALIZED_SORT;
}
/*
* cdbexplain_localExecStats
......@@ -843,6 +924,17 @@ cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ct
si->peakMemBalance = MemoryAccounting_GetAccountPeakBalance(planstate->plan->memoryAccountId);
si->firststart = instr->firststart;
si->numPartScanned = instr->numPartScanned;
si->sortMethod = String2ExplainSortMethod(instr->sortMethod);
if (MEMORY_STR_SORT_SPACE_TYPE == instr->sortSpaceType)
{
si->sortSpaceType = MEMORY_SORT_SPACE_TYPE;
}
else
{
AssertImply(si->sortMethod != UNINITALIZED_SORT, strcmp(DISK_STR_SORT_SPACE_TYPE, instr->sortSpaceType) == 0);
si->sortSpaceType = DISK_SORT_SPACE_TYPE;
}
si->sortSpaceUsed = instr->sortSpaceUsed;
} /* cdbexplain_collectStatsFromNode */
......@@ -969,6 +1061,7 @@ cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx)
CdbExplain_DepStatAcc memory_accounting_global_peak;
CdbExplain_DepStatAcc peakMemBalance;
CdbExplain_DepStatAcc totalPartTableScanned;
CdbExplain_DepStatAcc sortSpaceUsed[NUM_SORT_SPACE_TYPE][NUM_SORT_METHOD];
int imsgptr;
int nInst;
......@@ -993,6 +1086,10 @@ cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx)
cdbexplain_depStatAcc_init0(&totalWorkfileCreated);
cdbexplain_depStatAcc_init0(&peakMemBalance);
cdbexplain_depStatAcc_init0(&totalPartTableScanned);
for (int idx = 0; idx < NUM_SORT_METHOD; ++idx) {
cdbexplain_depStatAcc_init0(&sortSpaceUsed[MEMORY_SORT_SPACE_TYPE-1][idx]);
cdbexplain_depStatAcc_init0(&sortSpaceUsed[DISK_SORT_SPACE_TYPE-1][idx]);
}
/* Initialize per-slice accumulators. */
cdbexplain_depStatAcc_init0(&peakmemused);
......@@ -1039,6 +1136,10 @@ cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx)
cdbexplain_depStatAcc_upd(&totalWorkfileCreated, (rsi->workfileCreated ? 1 : 0), rsh, rsi, nsi);
cdbexplain_depStatAcc_upd(&peakMemBalance, rsi->peakMemBalance, rsh, rsi, nsi);
cdbexplain_depStatAcc_upd(&totalPartTableScanned, rsi->numPartScanned, rsh, rsi, nsi);
if (rsi->sortMethod < NUM_SORT_METHOD && rsi->sortMethod != UNINITALIZED_SORT && rsi->sortSpaceType != UNINITIALIZED_SORT_SPACE_TYPE) {
Assert(rsi->sortSpaceType <= NUM_SORT_SPACE_TYPE);
cdbexplain_depStatAcc_upd(&sortSpaceUsed[rsi->sortSpaceType-1][rsi->sortMethod - 1], (double)rsi->sortSpaceUsed, rsh, rsi, nsi);
}
/* Update per-slice accumulators. */
cdbexplain_depStatAcc_upd(&peakmemused, rsh->worker.peakmemused, rsh, rsi, nsi);
......@@ -1054,6 +1155,10 @@ cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx)
ns->totalWorkfileCreated = totalWorkfileCreated.agg;
ns->peakMemBalance = peakMemBalance.agg;
ns->totalPartTableScanned = totalPartTableScanned.agg;
for (int idx = 0; idx < NUM_SORT_METHOD; ++idx) {
ns->sortSpaceUsed[MEMORY_SORT_SPACE_TYPE-1][idx] = sortSpaceUsed[MEMORY_SORT_SPACE_TYPE-1][idx].agg;
ns->sortSpaceUsed[DISK_SORT_SPACE_TYPE-1][idx] = sortSpaceUsed[DISK_SORT_SPACE_TYPE-1][idx].agg;
}
/* Roll up summary over all nodes of slice into RecvStatCtx. */
ctx->workmemused_max = Max(ctx->workmemused_max, workmemused.agg.vmax);
......@@ -1342,6 +1447,32 @@ nodeSupportWorkfileCaching(PlanState *planstate)
IsA(planstate, MaterialState));
}
/*
* nodeSupportWorkfileCaching
* Prints the sort method and memory used by sort operator.
*/
static void
show_cumulative_sort_info(struct StringInfoData *str,
int indent,
const char *sort_method,
const char* sort_space_type,
CdbExplain_Agg *agg)
{
if (agg->vcnt > 0) {
if (agg->vcnt > 1)
{
appendStringInfo(str, "Sort Method: %s Max %s: %ldKB Avg %s: %ldKB (%d segments)\n",
sort_method, sort_space_type, (long)(agg->vmax), sort_space_type, (long)(agg->vsum / agg->vcnt), agg->vcnt);
appendStringInfoFill(str, 2 * indent, ' ');
}
else
{
appendStringInfo(str, "Sort Method: %s %s: %ldKB\n", sort_method, sort_space_type, (long)(agg->vsum));
appendStringInfoFill(str, 2 * indent, ' ');
}
}
}
/*
* cdbexplain_showExecStats
* Called by qDisp process to format a node's EXPLAIN ANALYZE statistics.
......@@ -1443,6 +1574,13 @@ cdbexplain_showExecStats(struct PlanState *planstate,
ns->ntuples.vmax,
segbuf);
break;
case T_SortState:
for (int idx = 0; idx < NUM_SORT_METHOD; ++idx)
{
show_cumulative_sort_info(str, indent, sort_method_enum_str[idx], MEMORY_STR_SORT_SPACE_TYPE, &ns->sortSpaceUsed[MEMORY_SORT_SPACE_TYPE-1][idx]);
show_cumulative_sort_info(str, indent, sort_method_enum_str[idx], DISK_STR_SORT_SPACE_TYPE, &ns->sortSpaceUsed[DISK_SORT_SPACE_TYPE-1][idx]);
}
/* no break */
default:
if (ns->ntuples.vcnt > 1)
appendStringInfo(str,
......
......@@ -1642,8 +1642,6 @@ explain_outNode(StringInfo str,
((Sort *) plan)->sortColIdx,
SortKeystr,
str, indent, es);
show_sort_info((SortState *) planstate,
str, indent, es);
}
break;
case T_Result:
......
......@@ -425,6 +425,8 @@ static bool is_sortstate_rwfile(Tuplesortstate *state)
#define REVERSEDIRECTION(state) ((*(state)->reversedirection) (state))
#define LACKMEM(state) ((state)->availMem < 0)
static void tuplesort_get_stats(Tuplesortstate *state, const char **sortMethod, const char **spaceType, long *spaceUsed);
static inline void USEMEM(Tuplesortstate *state, int amt)
{
state->availMem -= amt;
......@@ -959,6 +961,11 @@ tuplesort_end(Tuplesortstate *state)
else
spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
/*
* Call before state->tapeset is closed.
*/
tuplesort_finalize_stats(state);
/*
* Delete temporary "tape" files, if any.
*
......@@ -975,8 +982,6 @@ tuplesort_end(Tuplesortstate *state)
}
}
tuplesort_finalize_stats(state);
if (trace_sort)
{
if (state->tapeset)
......@@ -1019,6 +1024,10 @@ tuplesort_finalize_stats(Tuplesortstate *state)
(double)MemoryContextGetPeakSpace(state->sortcontext);
state->statsFinalized = true;
tuplesort_get_stats(state,
&state->instrument->sortMethod,
&state->instrument->sortSpaceType,
&state->instrument->sortSpaceUsed);
}
}
......@@ -2558,21 +2567,20 @@ tuplesort_restorepos(Tuplesortstate *state)
}
/*
* tuplesort_explain - produce a line of information for EXPLAIN ANALYZE
* tuplesort_get_stats - extract summary statistics
*
* This can be called after tuplesort_performsort() finishes to obtain
* printable summary information about how the sort was performed.
*
* The result is a palloc'd string.
* spaceUsed is measured in kilobytes.
*/
char *
tuplesort_explain(Tuplesortstate *state)
static void
tuplesort_get_stats(Tuplesortstate *state,
const char **sortMethod,
const char **spaceType,
long *spaceUsed)
{
char *result = (char *) palloc(100);
long spaceUsed;
/*
* Note: it might seem we should print both memory and disk usage for a
* Note: it might seem we should provide both memory and disk usage for a
* disk-based sort. However, the current code doesn't track memory space
* accurately once we have begun to return tuples to the caller (since we
* don't account for pfree's the caller is expected to do), so we cannot
......@@ -2581,38 +2589,34 @@ tuplesort_explain(Tuplesortstate *state)
* tell us how much is actually used in sortcontext?
*/
if (state->tapeset)
spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
{
*spaceType = "Disk";
*spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
}
else
spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
{
*spaceType = "Memory";
*spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;
}
switch (state->status)
{
case TSS_SORTEDINMEM:
if (state->boundUsed)
snprintf(result, 100,
"Sort Method: top-N heapsort Memory: %ldkB",
spaceUsed);
*sortMethod = "top-N heapsort";
else
snprintf(result, 100,
"Sort Method: quicksort Memory: %ldkB",
spaceUsed);
*sortMethod = "quicksort";
break;
case TSS_SORTEDONTAPE:
snprintf(result, 100,
"Sort Method: external sort Disk: %ldkB",
spaceUsed);
*sortMethod = "external sort";
break;
case TSS_FINALMERGE:
snprintf(result, 100,
"Sort Method: external merge Disk: %ldkB",
spaceUsed);
*sortMethod = "external merge";
break;
default:
snprintf(result, 100, "sort still in progress");
*sortMethod = "still in progress";
break;
}
return result;
}
......
......@@ -377,6 +377,8 @@ struct Tuplesortstate_mk
int *gpmon_sort_tick;
};
static void tuplesort_get_stats_mk(Tuplesortstate_mk* state, const char **sortMethod, const char **spaceType, long *spaceUsed);
static bool
is_sortstate_rwfile(Tuplesortstate_mk *state)
{
......@@ -1009,10 +1011,15 @@ tuplesort_end_mk(Tuplesortstate_mk *state)
long spaceUsed;
if (state->tapeset)
spaceUsed = LogicalTapeSetBlocks(state->tapeset);
spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
else
spaceUsed = (MemoryContextGetCurrentSpace(state->sortcontext) + 1024) / 1024;
/*
* Call before state->tapeset is closed.
*/
tuplesort_finalize_stats_mk(state);
/*
* Delete temporary "tape" files, if any.
*
......@@ -1030,13 +1037,11 @@ tuplesort_end_mk(Tuplesortstate_mk *state)
}
}
if (state->work_set)
{
workfile_mgr_close_set(state->work_set);
}
tuplesort_finalize_stats_mk(state);
if (trace_sort)
PG_TRACE2(tuplesort__end, state->tapeset ? 1 : 0, spaceUsed);
......@@ -1093,6 +1098,7 @@ tuplesort_finalize_stats_mk(Tuplesortstate_mk *state)
}
state->statsFinalized = true;
tuplesort_get_stats_mk(state, &state->instrument->sortMethod, &state->instrument->sortSpaceType, &state->instrument->sortSpaceUsed);
}
}
......@@ -2625,21 +2631,20 @@ tuplesort_restorepos_mk(Tuplesortstate_mk *state)
/*
* tuplesort_explain - produce a line of information for EXPLAIN ANALYZE
* tuplesort_get_stats_mk - extract summary statistics
*
* This can be called after tuplesort_performsort() finishes to obtain
* This can be called after tuplesort_performsort_mk() finishes to obtain
* printable summary information about how the sort was performed.
*
* The result is a palloc'd string.
* spaceUsed is measured in kilobytes.
*/
char *
tuplesort_explain_mk(Tuplesortstate_mk *state)
static void
tuplesort_get_stats_mk(Tuplesortstate_mk* state,
const char **sortMethod,
const char **spaceType,
long *spaceUsed)
{
char *result = (char *) palloc(100);
long spaceUsed;
/*
* Note: it might seem we should print both memory and disk usage for a
* Note: it might seem we should provide both memory and disk usage for a
* disk-based sort. However, the current code doesn't track memory space
* accurately once we have begun to return tuples to the caller (since we
* don't account for pfree's the caller is expected to do), so we cannot
......@@ -2648,38 +2653,34 @@ tuplesort_explain_mk(Tuplesortstate_mk *state)
* tell us how much is actually used in sortcontext?
*/
if (state->tapeset)
spaceUsed = LogicalTapeSetBlocks(state->tapeset);
{
*spaceType = "Disk";
*spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024);
}
else
spaceUsed = (MemoryContextGetCurrentSpace(state->sortcontext) + 1024) / 1024;
switch (state->status)
{
case TSS_SORTEDINMEM:
if (state->mkctxt.boundUsed)
snprintf(result, 100,
"Sort Method: top-N heapsort Memory: %ldkB",
spaceUsed);
else
snprintf(result, 100,
"Sort Method: quicksort Memory: %ldkB",
spaceUsed);
break;
case TSS_SORTEDONTAPE:
snprintf(result, 100,
"Sort Method: external sort Disk: %ldkB",
spaceUsed);
break;
case TSS_FINALMERGE:
snprintf(result, 100,
"Sort Method: external merge Disk: %ldkB",
spaceUsed);
break;
default:
snprintf(result, 100, "sort still in progress");
break;
*spaceType = "Memory";
*spaceUsed = (MemoryContextGetCurrentSpace(state->sortcontext) + 1024) / 1024;
}
return result;
switch (state->status)
{
case TSS_SORTEDINMEM:
if (state->mkctxt.boundUsed)
*sortMethod = "top-N heapsort";
else
*sortMethod = "quicksort";
break;
case TSS_SORTEDONTAPE:
*sortMethod = "external sort";
break;
case TSS_FINALMERGE:
*sortMethod = "external merge";
break;
default:
*sortMethod = "still in progress";
}
return;
}
/*
......
......@@ -38,6 +38,9 @@ typedef struct Instrumentation
instr_time firststart; /* CDB: Start time of first iteration of node */
bool workfileCreated;/* TRUE if workfiles are created in this node */
int numPartScanned; /* Number of part tables scanned */
const char* sortMethod; /* CDB: Type of sort */
const char* sortSpaceType; /*CDB: Sort space type (Memory / Disk) */
long sortSpaceUsed; /* CDB: Memory / Disk used by sort(KBytes) */
struct CdbExplain_NodeSummary *cdbNodeSummary; /* stats from all qExecs */
} Instrumentation;
......
......@@ -138,9 +138,6 @@ extern void tuplesort_flush_mk(Tuplesortstate_mk *state);
extern void tuplesort_finalize_stats(Tuplesortstate *state);
extern void tuplesort_finalize_stats_mk(Tuplesortstate_mk *state);
extern char *tuplesort_explain(Tuplesortstate *state);
extern char *tuplesort_explain_mk(Tuplesortstate_mk *state);
extern int tuplesort_merge_order(long allowedMem);
/*
......
create schema sort_schema;
set search_path to sort_schema;
-- start_ignore
create language plpythonu;
-- end_ignore
-- Check if analyze output has Sort Method
create or replace function sort_schema.has_sortmethod(explain_analyze_query text)
returns setof int as
$$
rv = plpy.execute(explain_analyze_query)
search_text = 'Sort Method'
result = []
for i in range(len(rv)):
cur_line = rv[i]['QUERY PLAN']
if search_text.lower() in cur_line.lower():
result.append(1)
return result
$$
language plpythonu;
set gp_enable_mk_sort = on;
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g limit 100;');
has_sortmethod
----------------
1
(1 row)
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g;');
has_sortmethod
----------------
1
(1 row)
set gp_enable_mk_sort = off;
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g limit 100;');
has_sortmethod
----------------
1
(1 row)
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g;');
has_sortmethod
----------------
1
(1 row)
-- start_ignore
create table sort_a(i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into sort_a values(1, 2);
-- end_ignore
set gp_enable_mk_sort = on;
select sort_schema.has_sortmethod('explain analyze select i from sort_a order by i;');
has_sortmethod
----------------
1
(1 row)
set gp_enable_mk_sort = off;
select sort_schema.has_sortmethod('explain analyze select i from sort_a order by i;');
has_sortmethod
----------------
1
(1 row)
-- start_ignore
drop schema sort_schema cascade;
NOTICE: drop cascades to table sort_a
NOTICE: drop cascades to function has_sortmethod(text)
-- end_ignore
......@@ -106,7 +106,7 @@ test: bfv_catalog bfv_index bfv_olap bfv_aggregate bfv_partition DML_over_joins
test: aggregate_with_groupingsets
test: nested_case_null
test: nested_case_null sort
test: bfv_cte bfv_joins bfv_subquery bfv_planner bfv_legacy
......
create schema sort_schema;
set search_path to sort_schema;
-- start_ignore
create language plpythonu;
-- end_ignore
-- Check if analyze output has Sort Method
create or replace function sort_schema.has_sortmethod(explain_analyze_query text)
returns setof int as
$$
rv = plpy.execute(explain_analyze_query)
search_text = 'Sort Method'
result = []
for i in range(len(rv)):
cur_line = rv[i]['QUERY PLAN']
if search_text.lower() in cur_line.lower():
result.append(1)
return result
$$
language plpythonu;
set gp_enable_mk_sort = on;
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g limit 100;');
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g;');
set gp_enable_mk_sort = off;
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g limit 100;');
select sort_schema.has_sortmethod('explain analyze select * from generate_series(1, 100) g order by g;');
-- start_ignore
create table sort_a(i int, j int);
insert into sort_a values(1, 2);
-- end_ignore
set gp_enable_mk_sort = on;
select sort_schema.has_sortmethod('explain analyze select i from sort_a order by i;');
set gp_enable_mk_sort = off;
select sort_schema.has_sortmethod('explain analyze select i from sort_a order by i;');
-- start_ignore
drop schema sort_schema cascade;
-- end_ignore
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册