diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index b27ee8ea2d946763d20c6e6b47f43f7463761f6e..648866dfa77803d26744e177a728f6c241f8cb9d 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -676,7 +676,19 @@ BitmapAppendOnlyNext(BitmapHeapScanState *node) if (QueryFinishPending) return NULL; - if (!node->baos_gotpage) + /* + * When ExecReScanBitmapHeapScan get executed, bitmap state (tbmiterator and + * tbmres) gets freed in freeBitmapState. So the tbmres is NULL, and we need + * to reinit bitmap state to start scan from begining and reset AO/AOCS bitmap + * pages' flags(baos_gotpage, baos_lossy, baos_cindex and baos_ntuples). + * + * Especially when ExecReScan happens on the bitmap append only scan and not all the + * matched tuples in bitmap are consumed, for example, Bitmap Heap Scan as inner plan + * of the Nest Loop Semi Join. If tbmres not get init, and not read all tuples + * in last bitmap, BitmapAppendOnlyNext will assume the current bitmap page still + * has data to return. but bitmap state already freed. + */ + if (!node->baos_gotpage || tbmres == NULL) { /* * Obtain the next psuedo-heap-page-info with item bit-map. Later, we'll @@ -733,6 +745,9 @@ BitmapAppendOnlyNext(BitmapHeapScanState *node) continue; } + /* Make sure the bitmap state get initalized */ + Assert(tbmres); + if (node->baos_lossy || tbmres->recheck) need_recheck = true; diff --git a/src/test/regress/expected/co_nestloop_idxscan.out b/src/test/regress/expected/co_nestloop_idxscan.out index 7369ee9f2946be740a8f04efafcf3024622013ec..fe7bf333b7c81ed134b6ba7706ad58ff59bdbd1a 100644 --- a/src/test/regress/expected/co_nestloop_idxscan.out +++ b/src/test/regress/expected/co_nestloop_idxscan.out @@ -1,6 +1,18 @@ -- -- Nested loop join with index scan on CO table, test for MPP-17658 -- +-- The test should also make sure the AO/AOCO table's bitmap state +-- get re-init in BitmapHeapScanState if the current scan on AO/AOCO table +-- not finished, and after ExecReScanBitmapHeapScan get called which will free +-- current bitmap state. +-- If the scan read all from AO/AOCO, the bitmap state in BitmapHeapScanState +-- always get re-init, so this case is not considered. +-- This is test through Nested Loop Semi Join, since it garentees that if +-- find a match, a new outer slot is request, which the inner plan may not +-- read all tuples. The inner plan of the Nested Loop Semi Join is Bitmap +-- Heap Scan. So for a new outer slot, the inner plan need to rescan from +-- the begining. +-- create schema co_nestloop_idxscan; create table co_nestloop_idxscan.foo (id bigint, data text) with (appendonly=true, orientation=column) distributed by (id); @@ -76,6 +88,42 @@ select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id (1 row) set optimizer_enable_hashjoin = on; +-- test with Nested Loop Semi Join for AO/AOCS freed bitmap state get re-init. +-- Make sure each bitmap index scan contains more than 1 matched tuples, +-- to make sure rescan frees bitmap state in BitmapHeapScanState. Since if only +-- 1 tuple matched, the bitmap state in BitmapHeapScanState always get re-init +-- when read all matched tuples. +insert into co_nestloop_idxscan.foo select i%10, repeat('xxxxxxxxxx', 100000) from generate_series(1,20) i; +-- Fill enouth tuples on same segment for the outer relation in next loop join +-- to make sure rescan get called for inner plan. +insert into co_nestloop_idxscan.bar values (1); +-- turn off the optimizer since we can not make the orca generate the same plan with planner. +set optimizer = off; +-- The outher plan of the Nested Loop Semi Join should be Seq Scan on bar b. +-- The inner plain should be a Bitmap Heap Scan on foo f. +-- So the Bitmap Heap Scan will call ExecReScanBitmapHeapScan for new outer slot. +explain select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000048.90..10000000054.01 rows=4 width=8) + -> Nested Loop Semi Join (cost=10000000048.90..10000000053.95 rows=2 width=8) + -> Seq Scan on bar b (cost=10000000000.00..10000000001.02 rows=1 width=8) + Filter: (id = ANY ('{1,2,3,4,5,6}'::bigint[])) + -> Bitmap Heap Scan on foo f (cost=48.90..52.92 rows=1 width=8) + Recheck Cond: ((id = b.id) AND (id = ANY ('{1,2,3,4,5,6}'::bigint[]))) + -> Bitmap Index Scan on foo_id_idx (cost=0.00..48.90 rows=1 width=0) + Index Cond: ((id = b.id) AND (id = ANY ('{1,2,3,4,5,6}'::bigint[]))) + Optimizer: Postgres query optimizer +(9 rows) + +select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); + id +---- + 1 + 1 +(2 rows) + +reset optimizer; drop schema co_nestloop_idxscan cascade; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to append only columnar table co_nestloop_idxscan.foo diff --git a/src/test/regress/expected/co_nestloop_idxscan_optimizer.out b/src/test/regress/expected/co_nestloop_idxscan_optimizer.out index 9af84751f4e37b9489e8a543e6927418e8eb08fc..1d1d338faf02c3a12de1efaf2ef96bc52b3fda08 100644 --- a/src/test/regress/expected/co_nestloop_idxscan_optimizer.out +++ b/src/test/regress/expected/co_nestloop_idxscan_optimizer.out @@ -1,6 +1,18 @@ -- -- Nested loop join with index scan on CO table, test for MPP-17658 -- +-- The test should also make sure the AO/AOCO table's bitmap state +-- get re-init in BitmapHeapScanState if the current scan on AO/AOCO table +-- not finished, and after ExecReScanBitmapHeapScan get called which will free +-- current bitmap state. +-- If the scan read all from AO/AOCO, the bitmap state in BitmapHeapScanState +-- always get re-init, so this case is not considered. +-- This is test through Nested Loop Semi Join, since it garentees that if +-- find a match, a new outer slot is request, which the inner plan may not +-- read all tuples. The inner plan of the Nested Loop Semi Join is Bitmap +-- Heap Scan. So for a new outer slot, the inner plan need to rescan from +-- the begining. +-- create schema co_nestloop_idxscan; create table co_nestloop_idxscan.foo (id bigint, data text) with (appendonly=true, orientation=column) distributed by (id); @@ -80,6 +92,42 @@ select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id (1 row) set optimizer_enable_hashjoin = on; +-- test with Nested Loop Semi Join for AO/AOCS freed bitmap state get re-init. +-- Make sure each bitmap index scan contains more than 1 matched tuples, +-- to make sure rescan frees bitmap state in BitmapHeapScanState. Since if only +-- 1 tuple matched, the bitmap state in BitmapHeapScanState always get re-init +-- when read all matched tuples. +insert into co_nestloop_idxscan.foo select i%10, repeat('xxxxxxxxxx', 100000) from generate_series(1,20) i; +-- Fill enouth tuples on same segment for the outer relation in next loop join +-- to make sure rescan get called for inner plan. +insert into co_nestloop_idxscan.bar values (1); +-- turn off the optimizer since we can not make the orca generate the same plan with planner. +set optimizer = off; +-- The outher plan of the Nested Loop Semi Join should be Seq Scan on bar b. +-- The inner plain should be a Bitmap Heap Scan on foo f. +-- So the Bitmap Heap Scan will call ExecReScanBitmapHeapScan for new outer slot. +explain select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=10000000048.90..10000000054.01 rows=4 width=8) + -> Nested Loop Semi Join (cost=10000000048.90..10000000053.95 rows=2 width=8) + -> Seq Scan on bar b (cost=10000000000.00..10000000001.02 rows=1 width=8) + Filter: (id = ANY ('{1,2,3,4,5,6}'::bigint[])) + -> Bitmap Heap Scan on foo f (cost=48.90..52.92 rows=1 width=8) + Recheck Cond: ((id = b.id) AND (id = ANY ('{1,2,3,4,5,6}'::bigint[]))) + -> Bitmap Index Scan on foo_id_idx (cost=0.00..48.90 rows=1 width=0) + Index Cond: ((id = b.id) AND (id = ANY ('{1,2,3,4,5,6}'::bigint[]))) + Optimizer: Postgres query optimizer +(9 rows) + +select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); + id +---- + 1 + 1 +(2 rows) + +reset optimizer; drop schema co_nestloop_idxscan cascade; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to append only columnar table co_nestloop_idxscan.foo diff --git a/src/test/regress/sql/co_nestloop_idxscan.sql b/src/test/regress/sql/co_nestloop_idxscan.sql index a1ce900e559db16f041dd0c125b288a307607318..b9a0af61921913c613f3edb7d2762711820f17f1 100644 --- a/src/test/regress/sql/co_nestloop_idxscan.sql +++ b/src/test/regress/sql/co_nestloop_idxscan.sql @@ -1,7 +1,18 @@ -- -- Nested loop join with index scan on CO table, test for MPP-17658 -- - +-- The test should also make sure the AO/AOCO table's bitmap state +-- get re-init in BitmapHeapScanState if the current scan on AO/AOCO table +-- not finished, and after ExecReScanBitmapHeapScan get called which will free +-- current bitmap state. +-- If the scan read all from AO/AOCO, the bitmap state in BitmapHeapScanState +-- always get re-init, so this case is not considered. +-- This is test through Nested Loop Semi Join, since it garentees that if +-- find a match, a new outer slot is request, which the inner plan may not +-- read all tuples. The inner plan of the Nested Loop Semi Join is Bitmap +-- Heap Scan. So for a new outer slot, the inner plan need to rescan from +-- the begining. +-- create schema co_nestloop_idxscan; create table co_nestloop_idxscan.foo (id bigint, data text) with (appendonly=true, orientation=column) @@ -34,4 +45,24 @@ explain select f.id from co_nestloop_idxscan.bar b, co_nestloop_idxscan.foo f wh select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id; set optimizer_enable_hashjoin = on; +-- test with Nested Loop Semi Join for AO/AOCS freed bitmap state get re-init. +-- Make sure each bitmap index scan contains more than 1 matched tuples, +-- to make sure rescan frees bitmap state in BitmapHeapScanState. Since if only +-- 1 tuple matched, the bitmap state in BitmapHeapScanState always get re-init +-- when read all matched tuples. +insert into co_nestloop_idxscan.foo select i%10, repeat('xxxxxxxxxx', 100000) from generate_series(1,20) i; + +-- Fill enouth tuples on same segment for the outer relation in next loop join +-- to make sure rescan get called for inner plan. +insert into co_nestloop_idxscan.bar values (1); + +-- turn off the optimizer since we can not make the orca generate the same plan with planner. +set optimizer = off; +-- The outher plan of the Nested Loop Semi Join should be Seq Scan on bar b. +-- The inner plain should be a Bitmap Heap Scan on foo f. +-- So the Bitmap Heap Scan will call ExecReScanBitmapHeapScan for new outer slot. +explain select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); +select b.id from co_nestloop_idxscan.bar b where b.id in (select f.id from co_nestloop_idxscan.foo f where f.id in (1, 2, 3, 4, 5, 6)); + +reset optimizer; drop schema co_nestloop_idxscan cascade;