From a680cd44393e32ab5aaa1100cd4eb04b66e4e00c Mon Sep 17 00:00:00 2001 From: Adam Berlin Date: Fri, 22 Mar 2019 18:26:12 +0000 Subject: [PATCH] Allow large Append-only table to have GIN index. Postgres optimizes the storage of the ItemPointer in the Gin Posting List. It only stores 11 bits for the offset number because heap tables only have enough tuples per block to fit in 11 bits. However, Greenplum append-only tables store 16 bits worth of offset numbers. Initially we thought we'd need to modify decode_varbyte() but it turns out that it is OK. It handles 48 bits already. Co-authored-by: Alexandra Wang Co-authored-by: Ashwin Agrawal --- src/backend/access/gin/ginpostinglist.c | 9 +- src/backend/access/gin/test/Makefile | 33 +++++ src/backend/access/gin/test/debugger.c | 1 + src/backend/access/gin/test/debugger.h | 1 + .../access/gin/test/ginpostinglist_fakes.c | 55 +++++++ .../access/gin/test/ginpostinglist_test.c | 135 ++++++++++++++++++ .../expected/appendonly_with_gin_index.out | 21 +++ .../regress/sql/appendonly_with_gin_index.sql | 19 +++ 8 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 src/backend/access/gin/test/Makefile create mode 100644 src/backend/access/gin/test/debugger.c create mode 100644 src/backend/access/gin/test/debugger.h create mode 100644 src/backend/access/gin/test/ginpostinglist_fakes.c create mode 100644 src/backend/access/gin/test/ginpostinglist_test.c create mode 100644 src/test/regress/expected/appendonly_with_gin_index.out create mode 100644 src/test/regress/sql/appendonly_with_gin_index.sql diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c index 606a824f12..c6c26e0f4d 100644 --- a/src/backend/access/gin/ginpostinglist.c +++ b/src/backend/access/gin/ginpostinglist.c @@ -70,8 +70,15 @@ * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and * use the minimum number of bits, but that would require changing the on-disk * format if MaxHeapTuplesPerPage changes. Better to leave some slack. + * + * + * Greenplum modification: + * + * Greenplum's append-only tables use the full 16 bit offset number range, so + * Greenplum removes the storage optimization made by Postgres for heap tables. + * */ -#define MaxHeapTuplesPerPageBits 11 +#define MaxHeapTuplesPerPageBits 16 static inline uint64 itemptr_to_uint64(const ItemPointer iptr) diff --git a/src/backend/access/gin/test/Makefile b/src/backend/access/gin/test/Makefile new file mode 100644 index 0000000000..e2df8a3fe2 --- /dev/null +++ b/src/backend/access/gin/test/Makefile @@ -0,0 +1,33 @@ +root_dir=../../../../.. +cmockery_dir=$(root_dir)/src/test/unit/cmockery +gpdb_include_dir=$(root_dir)/src/include/ +gin_dir=$(root_dir)/src/backend/access/gin + + +clean: + rm -f ginpostinglist_test.o + + +install: + make -C $(gin_dir); + gcc -g ginpostinglist_test.c \ + debugger.c \ + ginpostinglist_fakes.c \ + $(cmockery_dir)/cmockery.c \ + -Wall \ + -I $(cmockery_dir) \ + -I $(gpdb_include_dir) \ + -I $(gin_dir)/test/ \ + $(gin_dir)/ginpostinglist.o \ + -o ginpostinglist_test.o + + +unittest-check: install + ./ginpostinglist_test.o + + +check: unittest-check + + +debug: install + gdb --eval-command='run' ./ginpostinglist_test.o diff --git a/src/backend/access/gin/test/debugger.c b/src/backend/access/gin/test/debugger.c new file mode 100644 index 0000000000..5fd558d077 --- /dev/null +++ b/src/backend/access/gin/test/debugger.c @@ -0,0 +1 @@ +void debugger(void) {}; diff --git a/src/backend/access/gin/test/debugger.h b/src/backend/access/gin/test/debugger.h new file mode 100644 index 0000000000..456e18df1f --- /dev/null +++ b/src/backend/access/gin/test/debugger.h @@ -0,0 +1 @@ +void debugger(void); diff --git a/src/backend/access/gin/test/ginpostinglist_fakes.c b/src/backend/access/gin/test/ginpostinglist_fakes.c new file mode 100644 index 0000000000..32c52b5b04 --- /dev/null +++ b/src/backend/access/gin/test/ginpostinglist_fakes.c @@ -0,0 +1,55 @@ +#include "postgres.h" +#include "debugger.h" + +#include "access/gin_private.h" +#include + +/* + * + * ginpostlist Fakes + * + */ + +/* + * Ensure that assertions will trigger + */ +bool assert_enabled = true; + + +void ExceptionalCondition(const char *conditionName, const char *errorType, const char *fileName, int lineNumber) { + fprintf(stderr, + "\n\nassertion failed: %s, %s, %s, line number: %d\n\n", + conditionName, + errorType, + fileName, + lineNumber); + exit(1); +} + + +/* + * Fake memory allocation methods + */ +void *palloc(Size size) { + return calloc(1, size); +}; + + +void pfree(void *pointer) { + free(pointer); +}; + + +/* + * not used. + */ +void tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids, bool recheck) { + fprintf(stderr, "tbm_add_tuples: actually used and is not expected."); +}; + + +void *repalloc(void *pointer, Size size) { + fprintf(stderr, "repalloc: actually used and is not expected."); + return NULL; +}; + diff --git a/src/backend/access/gin/test/ginpostinglist_test.c b/src/backend/access/gin/test/ginpostinglist_test.c new file mode 100644 index 0000000000..5a833944a3 --- /dev/null +++ b/src/backend/access/gin/test/ginpostinglist_test.c @@ -0,0 +1,135 @@ +#include +#include +#include +#include + + +#include "cmockery.h" +#include "debugger.h" + + +#include "postgres.h" +#include "access/gin_private.h" + + +BlockId make_block_id(uint16 high, uint16 low) { + BlockId block_id; + block_id = (BlockId) calloc(1, sizeof(BlockIdData)); + block_id->bi_hi = high; + block_id->bi_lo = low; + return block_id; +} + + +ItemPointer make_item_pointer(BlockId block_id, OffsetNumber offset_number) { + ItemPointer item_pointer; + item_pointer = (ItemPointer) calloc(1, sizeof(ItemPointerData)); + item_pointer->ip_blkid = *block_id; + item_pointer->ip_posid = offset_number; + return item_pointer; +} + + +/* + * Postgres expects item pointers' offsets to be less than 11 bits. + * Greenplum append-only tables allow for the full 16 bits of OffsetNumber + */ +void test_compress_gin_posting_list_with_item_pointer_with_offset_larger_than_eleven_bits() { + OffsetNumber offset_number_larger_than_11_bits = 3000; + int number_of_item_pointers = 1; + int max_size = 100 * sizeof(ItemPointerData); + + ItemPointer item_pointer = make_item_pointer( + make_block_id(0, 0), + offset_number_larger_than_11_bits); + + int *number_written = calloc(1, sizeof(int)); + + GinPostingList *gin_posting_list = ginCompressPostingList( + item_pointer, + number_of_item_pointers, + max_size, + number_written); + + assert_int_equal(*gin_posting_list->bytes, 0); + assert_int_equal(gin_posting_list->nbytes, 0); + assert_int_equal(*number_written, 1); + assert_int_equal(gin_posting_list->first.ip_posid, item_pointer->ip_posid); +} + +void test_compress_gin_posting_list_with_multiple_item_pointers() { + OffsetNumber offset_number_with_all_bits_on = 65535; + OffsetNumber offset_number_larger_than_11_bits = 5000; + OffsetNumber other_offset_number_larger_than_11_bits = 5000; + + ItemPointer first_item_pointer = make_item_pointer( + make_block_id(0, 0), + offset_number_larger_than_11_bits); + + ItemPointer second_item_pointer = make_item_pointer( + make_block_id(65534, 65535), + offset_number_with_all_bits_on); + + /* + * Last itempointer in the list does not get encoded or decoded + */ + ItemPointer third_item_pointer = make_item_pointer( + make_block_id(65535, 65535), + other_offset_number_larger_than_11_bits); + + ItemPointerData item_pointer_datas[3]; + ItemPointerCopy(first_item_pointer, &item_pointer_datas[0]); + ItemPointerCopy(second_item_pointer, &item_pointer_datas[1]); + ItemPointerCopy(third_item_pointer, &item_pointer_datas[2]); + + int number_of_item_pointers = 3; + int max_size = 100 * sizeof(ItemPointerData); + int *number_written = calloc(1, sizeof(int)); + + GinPostingList *gin_posting_list = ginCompressPostingList( + &item_pointer_datas[0], + number_of_item_pointers, + max_size, + number_written); + + int number_of_decoded_item_pointers = 0; + ItemPointer decoded_item_pointers = ginPostingListDecode( + gin_posting_list, + &number_of_decoded_item_pointers); + + // Number of items compressed successfully + assert_int_equal(*number_written, 3); + + // Number of items are decoded successfully + assert_int_equal(number_of_decoded_item_pointers, 3); + + // Block ids can be decoded successfuly + assert_int_equal(decoded_item_pointers[0].ip_blkid.bi_hi, 0); + assert_int_equal(decoded_item_pointers[0].ip_blkid.bi_lo, 0); + + assert_int_equal(decoded_item_pointers[1].ip_blkid.bi_hi, 65534); + assert_int_equal(decoded_item_pointers[1].ip_blkid.bi_lo, 65535); + + // Offsets are decoded successfully + assert_int_equal( + decoded_item_pointers[0].ip_posid, + 5000); + + assert_int_equal( + decoded_item_pointers[1].ip_posid, + 65535); +} + + +int +main(int argc, char *argv[]) +{ + cmockery_parse_arguments(argc, argv); + + const UnitTest tests[] = { + unit_test(test_compress_gin_posting_list_with_item_pointer_with_offset_larger_than_eleven_bits), + unit_test(test_compress_gin_posting_list_with_multiple_item_pointers) + }; + + return run_tests(tests); +} diff --git a/src/test/regress/expected/appendonly_with_gin_index.out b/src/test/regress/expected/appendonly_with_gin_index.out new file mode 100644 index 0000000000..e093ccf181 --- /dev/null +++ b/src/test/regress/expected/appendonly_with_gin_index.out @@ -0,0 +1,21 @@ +-- Given I have an append-only table +create table users( + first_name tsvector +) with (appendonly=true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry. +-- And I have a large amount of data in the table +insert into users + select to_tsvector(md5(random()::text)) + from generate_series(1, 6000) i; +insert into users values ('John'); +-- When I create a GIN index on users +set gp_debug_linger=1; +CREATE INDEX users_search_idx ON users USING gin (first_name); +reset gp_debug_linger; +-- Then I should be able to query the table +select * from users where first_name = 'John'; + first_name +------------ + 'John' +(1 row) + diff --git a/src/test/regress/sql/appendonly_with_gin_index.sql b/src/test/regress/sql/appendonly_with_gin_index.sql new file mode 100644 index 0000000000..ca8a4196ab --- /dev/null +++ b/src/test/regress/sql/appendonly_with_gin_index.sql @@ -0,0 +1,19 @@ +-- Given I have an append-only table +create table users( + first_name tsvector +) with (appendonly=true); + +-- And I have a large amount of data in the table +insert into users + select to_tsvector(md5(random()::text)) + from generate_series(1, 6000) i; + +insert into users values ('John'); + +-- When I create a GIN index on users +set gp_debug_linger=1; +CREATE INDEX users_search_idx ON users USING gin (first_name); +reset gp_debug_linger; + +-- Then I should be able to query the table +select * from users where first_name = 'John'; -- GitLab