diff --git a/src/test/regress/expected/bitmapscan.out b/src/test/regress/expected/bitmapscan.out index 3cd6eae8d725e8dff71307d4b7c2dd2693b469c9..9829af268421032c8a7cfd9e39f033ff44c84081 100644 --- a/src/test/regress/expected/bitmapscan.out +++ b/src/test/regress/expected/bitmapscan.out @@ -741,3 +741,265 @@ select * from bm_test where a in (1,3,5); 5 | 5 (3 rows) +-- Create a heap table. +CREATE TABLE card_heap_table_w_bitmap (id INTEGER, v VARCHAR) DISTRIBUTED BY (id); +-- Insert a few rows. +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (1, + 'Water molecules cling because their electrostatic charge is polarized.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (2, + 'The first law of thermodynamics is that matter and energy can neither be created nor destroyed.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (3, + 'The second law of thermodynamics essentially says that you cannot recycle energy.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (4, + 'The mass of the universe is finite and static.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (5, + 'Population is growing exponentially.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (6, + 'What happens next?'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (7, + 'Fusion works by fusing 4 hydrogen atoms into 1 helium atom, or by fusing 2 deuterium atoms (deuterium is an isotope of hydrogen in which the nucleus contains a neutron).'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (8, + 'Give a man a fish, and he will eat for a day. Teach a man to fission, and he will blow up the planet for all eternity.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (9, + 'Mercury, lead, and cadmium -- a day ago I had me some. Now I feel really dense.'); +-- Now force the values in the "v" column to use up more space. +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +-- Create a bitmap index. +CREATE INDEX card_heap_bitmap_idx1 ON card_heap_table_w_bitmap USING BITMAP (v); +-- Insert some more rows. +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (10, + 'Rare earth metals are not the only rare metals in the earth.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (11, + 'Who needs cinnabar when you have tuna?'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (12, + 'This drunk walk cinnabar...'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (13, + 'Hydrogen, helium, lithium.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (14, + 'Hydrosphere, heliopause, lithosphere.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (15, + 'Spelunking is not for the claustrophobic.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (16, + 'Beam me up Spock. There is NO intelligent life on this planet.'); +-- Add column to the table. +-- The first column will have a low cardinality but a large domain. There +-- will be only a few distinct values in this column, but the values will +-- cover a wide range (from -2147483548 to +2147483647. Note that the 16 +-- existing rows will get a value of NULL for this column. +ALTER TABLE card_heap_table_w_bitmap ADD COLUMN lowCardinalityHighDomain INTEGER DEFAULT NULL; +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 17, 'Can we stop malaria by breeding a mosquito that cannot host malaria?', + -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 18, 'Andes, Butte, Cascades, Denali, Everest', + 0); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 19, 'Sawtooth, Sierras, Sangre de Cristos', + 2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 20, 'Ganges, Brahmaputra, Indus', + -2147483648); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 21, NULL, -2147483648); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 22, 'Amazon, Mad, Mississipi, Ohio, Sacramento, Merced', -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 23, 'Yellow, Red, Green, Blue Nile, White Nile, denial', + -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (24, + 'Earthquake supplies: water, sleeping bag, hand sanitizer', + 0); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (25, + 'radio, batteries, flashlight, camp stove', 2147483646); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (26, + 'books, first aid equipment, axe, water purifier', 2147483647); +-- Insert enough rows to get us up to 10,000 rows. +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) + SELECT i, CAST(i AS VARCHAR), i % 100 FROM generate_series(27, 10000) i; +-- The following CREATE INDEX statements helps us test all of the following +-- conditions: +-- a multi-column index. +-- an index that contains columns that are also in another index. +-- a bitmap index on a column with a large domain but a small cardinality. +CREATE INDEX index2 ON card_heap_table_w_bitmap USING BITMAP (lowCardinalityHighDomain, v); +-- analyze the table +ANALYZE card_heap_table_w_bitmap; +-- Although we have 10,000 rows or more, the lowCardinalityHighDomain column +-- has only about 104 distinct values: 0-99, -2147483648, -2147483647, +-- 2147483647 and 2147483646. +SELECT COUNT(DISTINCT lowCardinalityHighDomain) FROM card_heap_table_w_bitmap; + count +------- + 104 +(1 row) + +-- There should be 99 rows with this value. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 5; + count +------- + 99 +(1 row) + +-- Each of these tests a "single-sided range". +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain < 0; + count +------- + 5 +(1 row) + +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 100; + count +------- + 3 +(1 row) + +-- Select an individual row. +SELECT * FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 2147483646; + id | v | lowcardinalityhighdomain +----+------------------------------------------+-------------------------- + 25 | radio, batteries, flashlight, camp stove | 2147483646 +(1 row) + +UPDATE card_heap_table_w_bitmap SET lowCardinalityHighDomain = NULL WHERE lowCardinalityHighDomain = 4; +SELECT COUNT(DISTINCT lowCardinalityHighDomain) FROM card_heap_table_w_bitmap; + count +------- + 103 +(1 row) + +-- There should be approximately 115 NULL values (99 that we just updated, +-- and 16 original rows that got NULL when we added the column). +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain IS NULL; + count +------- + 115 +(1 row) + +-- There should no longer be any rows with value 4. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 4; + count +------- + 0 +(1 row) + +-- We should have 10,000 rows now. +SELECT COUNT(*) FROM card_heap_table_w_bitmap; + count +------- + 10000 +(1 row) + +-- This should delete 99 rows. +DELETE FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 3; +-- There should be 99 records like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 22; + count +------- + 99 +(1 row) + +-- Now reduce the cardinality +DELETE FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 30 + AND lowCardinalityHighDomain <= 50 AND lowCardinalityHighDomain % 2 = 0; +SELECT COUNT(*) FROM card_heap_table_w_bitmap; + count +------- + 8901 +(1 row) + +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain < 10; + count +------- + 800 +(1 row) + +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 100; + count +------- + 3 +(1 row) + +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain IS NULL; + count +------- + 115 +(1 row) + +-- The number of rows updated here should be equal to the total number of rows +-- minus the number that have lowCardinalityHighDomain less than 10 +-- minus the number that have lowCardinalityHighDomain greater than 100 +-- minus the number that are NULL (if any). +UPDATE card_heap_table_w_bitmap SET lowCardinalityHighDomain = 200 WHERE lowCardinalityHighDomain >= 10 and lowCardinalityHighDomain <= 100; +-- Should be around 14 rows. +SELECT DISTINCT lowCardinalityHighDomain FROM card_heap_table_w_bitmap; + lowcardinalityhighdomain +-------------------------- + + 0 + 1 + 2 + 5 + 6 + 7 + 8 + 9 + 200 + 2147483646 + 2147483647 + -2147483647 + -2147483648 +(14 rows) + +REINDEX INDEX card_heap_bitmap_idx1; +-- Should still be around 14 rows. +SELECT DISTINCT lowCardinalityHighDomain FROM card_heap_table_w_bitmap; + lowcardinalityhighdomain +-------------------------- + + 0 + 1 + 2 + 5 + 6 + 7 + 8 + 9 + 200 + 2147483646 + 2147483647 + -2147483647 + -2147483648 +(14 rows) + +-- There should be 99 records like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 7; + count +------- + 99 +(1 row) + +-- There should be 7983 rows like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 200; + count +------- + 7983 +(1 row) + +ALTER TABLE card_heap_table_w_bitmap RENAME COLUMN lowCardinalityHighDomain TO highCardinalityHighDomain; +-- Now add a lot more rows with few repeated values so that the +-- cardinality becomes quite high (around 50,000 distinct values) +INSERT INTO card_heap_table_w_bitmap (id, v, highCardinalityHighDomain) + SELECT i, CAST(i AS VARCHAR), i % 50000 FROM generate_series(1000001, 1050000) i; +SELECT COUNT(DISTINCT(highCardinalityHighDomain)) AS distinct_hchd FROM card_heap_table_w_bitmap ORDER BY distinct_hchd; + distinct_hchd +--------------- + 50004 +(1 row) + diff --git a/src/test/regress/input/aocs.source b/src/test/regress/input/aocs.source index 32a8ed8b4897db68001c47e11e8df16d577bfeb6..362375a0e55de53d848fb89b50b99de0f9e2d95e 100644 --- a/src/test/regress/input/aocs.source +++ b/src/test/regress/input/aocs.source @@ -358,6 +358,12 @@ select * from co where j = 2; select i from co where j = 2; select j,i from co where k = 'aaa' or k = 'bbb'; +-- small test on a performance bug in bitmap indexes due to large tid gaps +insert into co select i, 0, 'aaaaaaa' from generate_series(1, 20) i; +insert into co select i, 1, 'aaa' from generate_series(1, 20) i; +insert into co select i, 2, 'a' from generate_series(1, 20) i; +select distinct j from co where j > -1 and j < 3 order by j; + -- Test clustering errors out cluster co_j_cluster on co_j; @@ -488,7 +494,8 @@ insert into aocs_compress_results values (pg_relation_size('aocs_compress_table' insert into aocs_compress_table (id, v) values (1, 'ifyouwantto99knowwhatist8329histhenkeepreadingit;;untilyou]findoutyoureyeshurtandyoustil0ldontknow103kwhatitisdoyouunderstandmeyetandifyoustillwanttoknowthenyoupleasekeepreading'); insert into aocs_compress_results values (pg_relation_size('aocs_compress_table'), pg_relation_size('aocs_compress_id_index'), pg_relation_size('aocs_compress_v_index')); -select get_ao_compression_ratio('aocs_compress_table'); +-- compression ratio should be between 1.2 and 1.3 +select get_ao_compression_ratio('aocs_compress_table') > 1.2 and get_ao_compression_ratio('aocs_compress_table') < 1.3; select get_ao_distribution('aocs_compress_table'); truncate table aocs_compress_table; -- after truncate, reclaim space from the table and index diff --git a/src/test/regress/input/appendonly.source b/src/test/regress/input/appendonly.source index 21c5a2fa9bc854cb9839891f8d62f86bc3c95c88..401ec1cad65386a030348112a80a90aa183ea3dc 100644 --- a/src/test/regress/input/appendonly.source +++ b/src/test/regress/input/appendonly.source @@ -319,6 +319,12 @@ insert into ao values (9,2,'b'), (10,2,'bb'), (11,2,'bbb'), (12,2,'bbbb'), (13,5,'aaaaa'), (14,6,'aaaaaa'), (15,7,'aaaaaaa'), (16,8,'aaaaaaaa'); select * from ao where j = 2; +-- small test on a performance bug in bitmap indexes due to large tid gaps +insert into ao select i, 0, 'aaaaaaa' from generate_series(1, 20) i; +insert into ao select i, 1, 'aaa' from generate_series(1, 20) i; +insert into ao select i, 2, 'a' from generate_series(1, 20) i; +select distinct j from ao where j > -1 and j < 3 order by j; + -- Test clustering errors out cluster ao_j_cluster on ao_j; @@ -446,7 +452,8 @@ insert into ao_compress_results values (pg_relation_size('ao_compress_table'), p insert into ao_compress_table (id, v) values (1, 'ifyouwantto99knowwhatist8329histhenkeepreadingit;;untilyou]findoutyoureyeshurtandyoustil0ldontknow103kwhatitisdoyouunderstandmeyetandifyoustillwanttoknowthenyoupleasekeepreading'); insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index')); -select get_ao_compression_ratio('ao_compress_table'); +-- compression ratio should be between 1.2 and 1.3 +select get_ao_compression_ratio('ao_compress_table') > 1.2 and get_ao_compression_ratio('ao_compress_table') < 1.3; select get_ao_distribution('ao_compress_table'); truncate table ao_compress_table; -- after truncate, reclaim space from the table and index diff --git a/src/test/regress/output/aocs.source b/src/test/regress/output/aocs.source index b967369c80582507b809a00c2b79fdb292775597..f92b7bc2e14dd171457a327e044a2031b197a6e8 100644 --- a/src/test/regress/output/aocs.source +++ b/src/test/regress/output/aocs.source @@ -744,6 +744,18 @@ select j,i from co where k = 'aaa' or k = 'bbb'; 2 | 11 (3 rows) +-- small test on a performance bug in bitmap indexes due to large tid gaps +insert into co select i, 0, 'aaaaaaa' from generate_series(1, 20) i; +insert into co select i, 1, 'aaa' from generate_series(1, 20) i; +insert into co select i, 2, 'a' from generate_series(1, 20) i; +select distinct j from co where j > -1 and j < 3 order by j; + j +--- + 0 + 1 + 2 +(3 rows) + -- Test clustering errors out cluster co_j_cluster on co_j; ERROR: "co_j" is an index @@ -974,10 +986,11 @@ create index aocs_compress_v_index on aocs_compress_table (v); insert into aocs_compress_results values (pg_relation_size('aocs_compress_table'), pg_relation_size('aocs_compress_id_index'), pg_relation_size('aocs_compress_v_index')); insert into aocs_compress_table (id, v) values (1, 'ifyouwantto99knowwhatist8329histhenkeepreadingit;;untilyou]findoutyoureyeshurtandyoustil0ldontknow103kwhatitisdoyouunderstandmeyetandifyoustillwanttoknowthenyoupleasekeepreading'); insert into aocs_compress_results values (pg_relation_size('aocs_compress_table'), pg_relation_size('aocs_compress_id_index'), pg_relation_size('aocs_compress_v_index')); -select get_ao_compression_ratio('aocs_compress_table'); - get_ao_compression_ratio --------------------------- - 1.26 +-- compression ratio should be between 1.2 and 1.3 +select get_ao_compression_ratio('aocs_compress_table') > 1.2 and get_ao_compression_ratio('aocs_compress_table') < 1.3; + ?column? +---------- + t (1 row) select get_ao_distribution('aocs_compress_table'); diff --git a/src/test/regress/output/appendonly.source b/src/test/regress/output/appendonly.source index 66c4c5e890cdefe41060a826e2dbac481ccf48b1..374adfa4c6dda07feb154b06965f2cdee9f82ca7 100644 --- a/src/test/regress/output/appendonly.source +++ b/src/test/regress/output/appendonly.source @@ -691,6 +691,18 @@ select * from ao where j = 2; 10 | 2 | bb (6 rows) +-- small test on a performance bug in bitmap indexes due to large tid gaps +insert into ao select i, 0, 'aaaaaaa' from generate_series(1, 20) i; +insert into ao select i, 1, 'aaa' from generate_series(1, 20) i; +insert into ao select i, 2, 'a' from generate_series(1, 20) i; +select distinct j from ao where j > -1 and j < 3 order by j; + j +--- + 0 + 1 + 2 +(3 rows) + -- Test clustering errors out cluster ao_j_cluster on ao_j; ERROR: "ao_j" is an index @@ -954,10 +966,11 @@ create index ao_compress_v_index on ao_compress_table (v); insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index')); insert into ao_compress_table (id, v) values (1, 'ifyouwantto99knowwhatist8329histhenkeepreadingit;;untilyou]findoutyoureyeshurtandyoustil0ldontknow103kwhatitisdoyouunderstandmeyetandifyoustillwanttoknowthenyoupleasekeepreading'); insert into ao_compress_results values (pg_relation_size('ao_compress_table'), pg_relation_size('ao_compress_id_index'), pg_relation_size('ao_compress_v_index')); -select get_ao_compression_ratio('ao_compress_table'); - get_ao_compression_ratio --------------------------- - 1.27 +-- compression ratio should be between 1.2 and 1.3 +select get_ao_compression_ratio('ao_compress_table') > 1.2 and get_ao_compression_ratio('ao_compress_table') < 1.3; + ?column? +---------- + t (1 row) select get_ao_distribution('ao_compress_table'); diff --git a/src/test/regress/sql/bitmapscan.sql b/src/test/regress/sql/bitmapscan.sql index 5804c942176a5c1ea1b60531683806e7282da65a..bcc98493a4c5dcb259e4deb78fe4054fa03d1e68 100644 --- a/src/test/regress/sql/bitmapscan.sql +++ b/src/test/regress/sql/bitmapscan.sql @@ -109,3 +109,171 @@ set enable_indexscan=off; set enable_bitmapscan=on; select * from bm_test where a in (1,3,5); + +-- Create a heap table. +CREATE TABLE card_heap_table_w_bitmap (id INTEGER, v VARCHAR) DISTRIBUTED BY (id); + +-- Insert a few rows. +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (1, + 'Water molecules cling because their electrostatic charge is polarized.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (2, + 'The first law of thermodynamics is that matter and energy can neither be created nor destroyed.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (3, + 'The second law of thermodynamics essentially says that you cannot recycle energy.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (4, + 'The mass of the universe is finite and static.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (5, + 'Population is growing exponentially.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (6, + 'What happens next?'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (7, + 'Fusion works by fusing 4 hydrogen atoms into 1 helium atom, or by fusing 2 deuterium atoms (deuterium is an isotope of hydrogen in which the nucleus contains a neutron).'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (8, + 'Give a man a fish, and he will eat for a day. Teach a man to fission, and he will blow up the planet for all eternity.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (9, + 'Mercury, lead, and cadmium -- a day ago I had me some. Now I feel really dense.'); + +-- Now force the values in the "v" column to use up more space. +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; +UPDATE card_heap_table_w_bitmap SET v = v || v; + +-- Create a bitmap index. +CREATE INDEX card_heap_bitmap_idx1 ON card_heap_table_w_bitmap USING BITMAP (v); + +-- Insert some more rows. +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (10, + 'Rare earth metals are not the only rare metals in the earth.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (11, + 'Who needs cinnabar when you have tuna?'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (12, + 'This drunk walk cinnabar...'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (13, + 'Hydrogen, helium, lithium.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (14, + 'Hydrosphere, heliopause, lithosphere.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (15, + 'Spelunking is not for the claustrophobic.'); +INSERT INTO card_heap_table_w_bitmap (id, v) VALUES (16, + 'Beam me up Spock. There is NO intelligent life on this planet.'); + +-- Add column to the table. +-- The first column will have a low cardinality but a large domain. There +-- will be only a few distinct values in this column, but the values will +-- cover a wide range (from -2147483548 to +2147483647. Note that the 16 +-- existing rows will get a value of NULL for this column. +ALTER TABLE card_heap_table_w_bitmap ADD COLUMN lowCardinalityHighDomain INTEGER DEFAULT NULL; +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 17, 'Can we stop malaria by breeding a mosquito that cannot host malaria?', + -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 18, 'Andes, Butte, Cascades, Denali, Everest', + 0); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 19, 'Sawtooth, Sierras, Sangre de Cristos', + 2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 20, 'Ganges, Brahmaputra, Indus', + -2147483648); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 21, NULL, -2147483648); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 22, 'Amazon, Mad, Mississipi, Ohio, Sacramento, Merced', -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES ( + 23, 'Yellow, Red, Green, Blue Nile, White Nile, denial', + -2147483647); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (24, + 'Earthquake supplies: water, sleeping bag, hand sanitizer', + 0); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (25, + 'radio, batteries, flashlight, camp stove', 2147483646); +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) VALUES (26, + 'books, first aid equipment, axe, water purifier', 2147483647); + +-- Insert enough rows to get us up to 10,000 rows. +INSERT INTO card_heap_table_w_bitmap (id, v, lowCardinalityHighDomain) + SELECT i, CAST(i AS VARCHAR), i % 100 FROM generate_series(27, 10000) i; + +-- The following CREATE INDEX statements helps us test all of the following +-- conditions: +-- a multi-column index. +-- an index that contains columns that are also in another index. +-- a bitmap index on a column with a large domain but a small cardinality. +CREATE INDEX index2 ON card_heap_table_w_bitmap USING BITMAP (lowCardinalityHighDomain, v); + +-- analyze the table +ANALYZE card_heap_table_w_bitmap; + +-- Although we have 10,000 rows or more, the lowCardinalityHighDomain column +-- has only about 104 distinct values: 0-99, -2147483648, -2147483647, +-- 2147483647 and 2147483646. +SELECT COUNT(DISTINCT lowCardinalityHighDomain) FROM card_heap_table_w_bitmap; + +-- There should be 99 rows with this value. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 5; + +-- Each of these tests a "single-sided range". +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain < 0; +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 100; + +-- Select an individual row. +SELECT * FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 2147483646; + +UPDATE card_heap_table_w_bitmap SET lowCardinalityHighDomain = NULL WHERE lowCardinalityHighDomain = 4; +SELECT COUNT(DISTINCT lowCardinalityHighDomain) FROM card_heap_table_w_bitmap; +-- There should be approximately 115 NULL values (99 that we just updated, +-- and 16 original rows that got NULL when we added the column). +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain IS NULL; +-- There should no longer be any rows with value 4. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 4; + +-- We should have 10,000 rows now. +SELECT COUNT(*) FROM card_heap_table_w_bitmap; + +-- This should delete 99 rows. +DELETE FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 3; + +-- There should be 99 records like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 22; + +-- Now reduce the cardinality +DELETE FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 30 + AND lowCardinalityHighDomain <= 50 AND lowCardinalityHighDomain % 2 = 0; + +SELECT COUNT(*) FROM card_heap_table_w_bitmap; +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain < 10; +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain > 100; +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain IS NULL; +-- The number of rows updated here should be equal to the total number of rows +-- minus the number that have lowCardinalityHighDomain less than 10 +-- minus the number that have lowCardinalityHighDomain greater than 100 +-- minus the number that are NULL (if any). +UPDATE card_heap_table_w_bitmap SET lowCardinalityHighDomain = 200 WHERE lowCardinalityHighDomain >= 10 and lowCardinalityHighDomain <= 100; + +-- Should be around 14 rows. +SELECT DISTINCT lowCardinalityHighDomain FROM card_heap_table_w_bitmap; + +REINDEX INDEX card_heap_bitmap_idx1; + +-- Should still be around 14 rows. +SELECT DISTINCT lowCardinalityHighDomain FROM card_heap_table_w_bitmap; + +-- There should be 99 records like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 7; + +-- There should be 7983 rows like this. +SELECT COUNT(*) FROM card_heap_table_w_bitmap WHERE lowCardinalityHighDomain = 200; + +ALTER TABLE card_heap_table_w_bitmap RENAME COLUMN lowCardinalityHighDomain TO highCardinalityHighDomain; + +-- Now add a lot more rows with few repeated values so that the +-- cardinality becomes quite high (around 50,000 distinct values) +INSERT INTO card_heap_table_w_bitmap (id, v, highCardinalityHighDomain) + SELECT i, CAST(i AS VARCHAR), i % 50000 FROM generate_series(1000001, 1050000) i; + +SELECT COUNT(DISTINCT(highCardinalityHighDomain)) AS distinct_hchd FROM card_heap_table_w_bitmap ORDER BY distinct_hchd;