diff --git a/Makefile b/Makefile index 15d24d0581..7b3e9dc21c 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,17 @@ MODULE_big = rum EXTENSION = rum -EXTVERSION = 1.3 +EXTVERSION = 1.4 PGFILEDESC = "RUM index access method" OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ src/rumbtree.o src/rumbulk.o src/rumdatapage.o \ src/rumentrypage.o src/rumget.o src/ruminsert.o \ src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \ - src/btree_rum.o src/rum_arr_utils.o $(WIN32RES) + src/btree_rum.o src/rum_arr_utils.o src/rum_debug_funcs.o $(WIN32RES) DATA = rum--1.0--1.1.sql rum--1.1--1.2.sql \ - rum--1.2--1.3.sql + rum--1.2--1.3.sql rum--1.3--1.4.sql DATA_built = $(EXTENSION)--$(EXTVERSION).sql diff --git a/README.md b/README.md index 28b967ea2b..1351af7e87 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,134 @@ For type: `anyarray` This operator class stores `anyarray` elements with any supported by module field. +## Functions for low-level inspect of the RUM index pages + +The RUM index provides several functions for low-level inspect of all types of its pages: + +### `rum_metapage_info(rel_name text, blk_num int4) returns record` + +`rum_metapage_info` returns information about a RUM index metapage. For example: + +```SQL +SELECT * FROM rum_metapage_info('rum_index', 0); +-[ RECORD 1 ]----+----------- +pending_head | 4294967295 +pending_tail | 4294967295 +tail_free_size | 0 +n_pending_pages | 0 +n_pending_tuples | 0 +n_total_pages | 87 +n_entry_pages | 80 +n_data_pages | 6 +n_entries | 1650 +version | 0xC0DE0002 +``` + +### `rum_page_opaque_info(rel_name text, blk_num int4) returns record` + +`rum_page_opaque_info` returns information about a RUM index opaque area: `left` and `right` links, `maxoff` -- the number of elements that are stored on the page (this parameter is used differently for different types of pages), `freespace` -- free space on the page. + +For example: + +```SQL +SELECT * FROM rum_page_opaque_info('rum_index', 10); + leftlink | rightlink | maxoff | freespace | flags +----------+-----------+--------+-----------+-------- + 6 | 11 | 0 | 0 | {leaf} +``` + +### `rum_internal_entry_page_items(rel_name text, blk_num int4) returns set of record` + +`rum_internal_entry_page_items` returns information that is stored on the internal pages of the entry tree (it is extracted from `IndexTuples`). For example: + +```SQL +SELECT * FROM rum_internal_entry_page_items('rum_index', 1); + key | attrnum | category | down_link +---------------------------------+---------+------------------+----------- + 3d | 1 | RUM_CAT_NORM_KEY | 3 + 6k | 1 | RUM_CAT_NORM_KEY | 2 + a8 | 1 | RUM_CAT_NORM_KEY | 4 +... + Tue May 10 21:21:22.326724 2016 | 2 | RUM_CAT_NORM_KEY | 83 + Sat May 14 19:21:22.326724 2016 | 2 | RUM_CAT_NORM_KEY | 84 + Wed May 18 17:21:22.326724 2016 | 2 | RUM_CAT_NORM_KEY | 85 + +inf | | | 86 +(79 rows) +``` + +RUM (like GIN) on the internal pages of the entry tree packs the downward link and the key in pairs of the following type: `(P_n, K_{n+1})`. It turns out that there is no key for `P_0` (it is assumed to be equal to `-inf`), and for the last key `K_{n+1}` there is no downward link (it is assumed that it is the largest key (or high key) in the subtree to which the `P_n` link leads). For this reason (the key is `+inf` because it is the rightmost page at the current level of the tree), in the example above, the last line contains the key `+inf` (this key does not have a downward link). + +### `rum_leaf_entry_page_items(rel_name text, blk_num int4) returns set of record` + +`rum_leaf_entry_page_items` returns information that is stored on the entry tree leaf pages (it is extracted from compressed posting lists). For example: + +```SQL +SELECT * FROM rum_leaf_entry_page_items('rum_index', 10); + key | attrnum | category | tuple_id | add_info_is_null | add_info | is_posting_tree | posting_tree_root +-----+---------+------------------+----------+------------------+----------+------------------+-------------------- + ay | 1 | RUM_CAT_NORM_KEY | (0,16) | t | | f | + ay | 1 | RUM_CAT_NORM_KEY | (0,23) | t | | f | + ay | 1 | RUM_CAT_NORM_KEY | (2,1) | t | | f | +... + az | 1 | RUM_CAT_NORM_KEY | (0,15) | t | | f | + az | 1 | RUM_CAT_NORM_KEY | (0,22) | t | | f | + az | 1 | RUM_CAT_NORM_KEY | (1,4) | t | | f | +... + b9 | 1 | RUM_CAT_NORM_KEY | | | | t | 7 +... +(1602 rows) +``` + +Each posting list is an `IndexTuple` that stores the key value and a compressed list of `tids`. In the function `rum_leaf_entry_page_items()`, the key value is attached to each `tid` for convenience, but on the page it is stored in a single instance. + +If the number of `tids` is too large, then instead of a posting list, a posting tree will be used for storage. In the example above, a posting tree was created (the key in the posting tree is the `tid`) for the key with the value `b9`. In this case, instead of the posting list, the magic number and the page number, which is the root of the posting tree, are stored inside the `IndexTuple`. + +### `rum_internal_data_page_items(rel_name text, blk_num int4) returns set of record` + +`rum_internal_data_page_items` returns information that is stored on the internal pages of the posting tree (it is extracted from arrays of `RumPostingItem` structures). For example: + +```SQL +SELECT * FROM rum_internal_data_page_items('rum_index', 7); + is_high_key | block_number | tuple_id | add_info_is_null | add_info +-------------+--------------+----------+------------------+---------- + t | | (0,0) | t | + f | 9 | (138,79) | t | + f | 8 | (0,0) | t | +(3 rows) +``` + +Each element on the internal pages of the posting tree contains the high key (`tid`) value for the child page and a link to this child page (as well as additional information if it was added when creating the index). + +At the beginning of the internal pages of the posting tree, the high key of this page is always stored (if it has the value `(0,0)`, this is equivalent to `+inf`; this is always performed if the page is the rightmost). + +At the moment, RUM does not support storing (as additional information) the data type that is pass by reference on the internal pages of the posting tree. Therefore, this output is possible: + +```SQL + is_high_key | block_number | tuple_id | add_info_is_null | add_info +-------------+--------------+----------+------------------+------------------------------------------------ +... + f | 23 | (39,43) | f | varlena types in posting tree is not supported + f | 22 | (74,9) | f | varlena types in posting tree is not supported +... +``` + +### `rum_leaf_data_page_items(rel_name text, blk_num int4) returns set of record` + +`rum_leaf_data_page_items` the function returns information that is stored on the leaf pages of the postnig tree (it is extracted from compressed posting lists). For example: + +```SQL +SELECT * FROM rum_leaf_data_page_items('rum_idx', 9); + is_high_key | tuple_id | add_info_is_null | add_info +-------------+-----------+------------------+---------- + t | (138,79) | t | + f | (0,9) | t | + f | (1,23) | t | + f | (3,5) | t | + f | (3,22) | t | +``` + +Unlike entry tree leaf pages, on posting tree leaf pages, compressed posting lists are not stored in an `IndexTuple`. The high key is the largest key on the page. + ## Todo - Allow multiple additional information (lexemes positions + timestamp). diff --git a/expected/security_1.out b/expected/security_1.out index 03f86e3cc1..c7fac1b12b 100644 --- a/expected/security_1.out +++ b/expected/security_1.out @@ -18,5 +18,5 @@ CONTEXT: SQL statement "CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS '$libdir/rum' LANGUAGE C STRICT STABLE" -extension script file "rum--1.3.sql", near line 1530 +extension script file "rum--1.4.sql", near line 1530 DROP FUNCTION rum_anyarray_similar(anyarray,anyarray); diff --git a/meson.build b/meson.build index b4336f0668..97f5c8b6df 100644 --- a/meson.build +++ b/meson.build @@ -4,7 +4,7 @@ # of the contrib source tree. extension = 'rum' -extversion = '1.3' +extversion = '1.4' rum_sources = files( 'src/btree_rum.c', @@ -49,6 +49,7 @@ install_data( 'rum--1.0--1.1.sql', 'rum--1.1--1.2.sql', 'rum--1.2--1.3.sql', + 'rum--1.3--1.4.sql', kwargs: contrib_data_args, ) diff --git a/rum--1.3--1.4.sql b/rum--1.3--1.4.sql new file mode 100644 index 0000000000..37764bc95e --- /dev/null +++ b/rum--1.3--1.4.sql @@ -0,0 +1,131 @@ +/* + * RUM version 1.4 + */ + +/*--------------------RUM debug functions-----------------------*/ + +CREATE FUNCTION rum_metapage_info( + IN rel_name text, + IN blk_num int4, + OUT pending_head bigint, + OUT pending_tail bigint, + OUT tail_free_size int4, + OUT n_pending_pages bigint, + OUT n_pending_tuples bigint, + OUT n_total_pages bigint, + OUT n_entry_pages bigint, + OUT n_data_pages bigint, + OUT n_entries bigint, + OUT version varchar) +AS 'MODULE_PATHNAME', 'rum_metapage_info' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION rum_page_opaque_info( + IN rel_name text, + IN blk_num int4, + OUT leftlink bigint, + OUT rightlink bigint, + OUT maxoff int4, + OUT freespace int4, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'rum_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION +rum_page_items_info(rel_name text, blk_num int4, page_type int4) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'rum_page_items_info' +LANGUAGE C STRICT; + +CREATE FUNCTION rum_leaf_data_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + is_high_key bool, + tuple_id tid, + add_info_is_null bool, + add_info varchar +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 0) + AS rum_page_items_info( + is_high_key bool, + tuple_id tid, + add_info_is_null bool, + add_info varchar + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_internal_data_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + is_high_key bool, + block_number int4, + tuple_id tid, + add_info_is_null bool, + add_info varchar +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 1) + AS rum_page_items_info( + is_high_key bool, + block_number int4, + tuple_id tid, + add_info_is_null bool, + add_info varchar + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_leaf_entry_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + key varchar, + attrnum int4, + category varchar, + tuple_id tid, + add_info_is_null bool, + add_info varchar, + is_postring_tree bool, + postring_tree_root int4 +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 2) + AS rum_page_items_info( + key varchar, + attrnum int4, + category varchar, + tuple_id tid, + add_info_is_null bool, + add_info varchar, + is_postring_tree bool, + postring_tree_root int4 + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_internal_entry_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + key varchar, + attrnum int4, + category varchar, + down_link int4) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 3) + AS rum_page_items_info( + key varchar, + attrnum int4, + category varchar, + down_link int4 + ); +$$ LANGUAGE sql; diff --git a/rum.control b/rum.control index 30a00ccf67..d9e56582b7 100644 --- a/rum.control +++ b/rum.control @@ -1,5 +1,5 @@ # RUM extension comment = 'RUM index access method' -default_version = '1.3' +default_version = '1.4' module_pathname = '$libdir/rum' relocatable = true diff --git a/rum_init.sql b/rum_init.sql index 621c4d2b9f..e5fd924a6b 100644 --- a/rum_init.sql +++ b/rum_init.sql @@ -412,7 +412,7 @@ AS /* * RUM version 1.1 */ - + CREATE FUNCTION rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal) RETURNS bool AS 'MODULE_PATHNAME' @@ -1724,3 +1724,134 @@ RETURNS float4 AS 'MODULE_PATHNAME', 'rum_ts_score_td' LANGUAGE C IMMUTABLE STRICT; +/* + * RUM version 1.4 + */ + +/*--------------------RUM debug functions-----------------------*/ + +CREATE FUNCTION rum_metapage_info( + IN rel_name text, + IN blk_num int4, + OUT pending_head bigint, + OUT pending_tail bigint, + OUT tail_free_size int4, + OUT n_pending_pages bigint, + OUT n_pending_tuples bigint, + OUT n_total_pages bigint, + OUT n_entry_pages bigint, + OUT n_data_pages bigint, + OUT n_entries bigint, + OUT version varchar) +AS 'MODULE_PATHNAME', 'rum_metapage_info' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION rum_page_opaque_info( + IN rel_name text, + IN blk_num int4, + OUT leftlink bigint, + OUT rightlink bigint, + OUT maxoff int4, + OUT freespace int4, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'rum_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE OR REPLACE FUNCTION +rum_page_items_info(rel_name text, blk_num int4, page_type int4) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'rum_page_items_info' +LANGUAGE C STRICT; + +CREATE FUNCTION rum_leaf_data_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + is_high_key bool, + tuple_id tid, + add_info_is_null bool, + add_info varchar +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 0) + AS rum_page_items_info( + is_high_key bool, + tuple_id tid, + add_info_is_null bool, + add_info varchar + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_internal_data_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + is_high_key bool, + block_number int4, + tuple_id tid, + add_info_is_null bool, + add_info varchar +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 1) + AS rum_page_items_info( + is_high_key bool, + block_number int4, + tuple_id tid, + add_info_is_null bool, + add_info varchar + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_leaf_entry_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + key varchar, + attrnum int4, + category varchar, + tuple_id tid, + add_info_is_null bool, + add_info varchar, + is_posting_tree bool, + posting_tree_root int4 +) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 2) + AS rum_page_items_info( + key varchar, + attrnum int4, + category varchar, + tuple_id tid, + add_info_is_null bool, + add_info varchar, + is_posting_tree bool, + posting_tree_root int4 + ); +$$ LANGUAGE sql; + +CREATE FUNCTION rum_internal_entry_page_items( + rel_name text, + blk_num int4 +) +RETURNS TABLE( + key varchar, + attrnum int4, + category varchar, + down_link int4) +AS $$ + SELECT * + FROM rum_page_items_info(rel_name, blk_num, 3) + AS rum_page_items_info( + key varchar, + attrnum int4, + category varchar, + down_link int4 + ); +$$ LANGUAGE sql; diff --git a/src/rum.h b/src/rum.h index 48589144cc..d628754e76 100644 --- a/src/rum.h +++ b/src/rum.h @@ -21,6 +21,7 @@ #include "storage/bufmgr.h" #include "utils/datum.h" #include "utils/memutils.h" +#include "tsearch/ts_type.h" #include "rumsort.h" @@ -836,6 +837,8 @@ extern RumItem *rumGetBAEntry(BuildAccumulator *accum, #define RUM_ADDINFO_JOIN 10 #define RUMNProcs 10 +#define LOWERMASK 0x1F + extern PGDLLEXPORT Datum rum_extract_tsvector(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum rum_extract_tsquery(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum rum_tsvector_config(PG_FUNCTION_ARGS); @@ -847,6 +850,9 @@ extern PGDLLEXPORT Datum rum_ts_distance_td(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum tsquery_to_distance_query(PG_FUNCTION_ARGS); +extern char* decompress_pos(char *ptr, WordEntryPos *pos); +extern unsigned int count_pos(char *ptr, int len); + /* rum_arr_utils.c */ typedef enum SimilarityType { diff --git a/src/rum_debug_funcs.c b/src/rum_debug_funcs.c new file mode 100644 index 0000000000..b138ef00de --- /dev/null +++ b/src/rum_debug_funcs.c @@ -0,0 +1,1650 @@ +/*------------------------------------------------------------------------- + * + * rum_debug_funcs.c + * Functions to investigate the content of RUM indexes + * + * Copyright (c) 2025, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/itup.h" +#include "access/relation.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/lockdefs.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/varlena.h" +#include "tsearch/ts_type.h" + +#include "rum.h" + +PG_FUNCTION_INFO_V1(rum_metapage_info); +PG_FUNCTION_INFO_V1(rum_page_opaque_info); +PG_FUNCTION_INFO_V1(rum_page_items_info); + +/* + * Below are declarations of enums, structures, and macros + * for the rum_page_items_info() function, which is used + * to extract data from different types of pages in the RUM + * index. Their use is implied only inside rum_debug_funcs.c. + */ + +#define RumCurPitemAddInfoIsNormal(piState) \ + (!((piState)->curPitem.item.addInfoIsNull) && \ + (piState)->curKeyAddInfoOid != InvalidOid) + +#define RumAddInfoIsPositions(piState) \ + ((piState)->curKeyAddInfoOid == BYTEAOID) + +#define RumIsEntryInternalHighKey(piState) \ + (RumPageRightMost((piState)->page) && \ + (piState)->curTupleNum == (piState)->maxoff) + +#define RumIsDataPage(piState) \ + ((piState)->pageType == LEAF_DATA_PAGE || \ + (piState)->pageType == INTERNAL_DATA_PAGE) + +#define RumIsEntryPage(piState) \ + ((piState)->pageType == LEAF_ENTRY_PAGE || \ + (piState)->pageType == INTERNAL_ENTRY_PAGE) + +#define RumGetAddInfoAttr(piState) \ + ((piState)->rumState->addAttrs[(piState)->curKeyAttnum - 1]) + +#define RumGetNewIndexTuple(piState) \ +do { \ + (piState)->curItup = \ + (IndexTuple) PageGetItem((piState)->page, \ + PageGetItemId((piState)->page, \ + (piState)->curTupleNum)); \ +} while(0) + +#define RumGetNewItemPostingList(piState) \ +do { \ + (piState)->itemPtr = \ + rumDataPageLeafRead((piState)->itemPtr, \ + (piState)->curKeyAttnum, \ + &((piState)->curPitem.item), \ + false, (piState)->rumState); \ +} while(0); + +#define RumWriteResAddInfoIsNullToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + BoolGetDatum((piState)->curPitem.item.addInfoIsNull); \ +} while(0) + +#if PG_VERSION_NUM >= 160000 +#define RumWriteResIptrToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + ItemPointerGetDatum(&((piState)->curPitem.item.iptr)); \ +} while(0) +#else +#define RumWriteResIptrToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + PointerGetDatum(&((piState)->curPitem.item.iptr)); \ +} while(0) +#endif + +#define RumWriteResBlckNumToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + UInt32GetDatum(RumPostingItemGetBlockNumber(&((piState)->curPitem))); \ +} while(0) + +#define RumWriteResAddInfoToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + get_datum_text_by_oid((piState)->curPitem.item.addInfo, \ + (piState)->curKeyAddInfoOid); \ +} while(0) + +#define RumWriteResAddInfoPosToValues(piState, counter) \ +do { \ + (piState)->values[(counter)] = \ + get_positions_to_text_datum((piState)->curPitem.item.addInfo); \ +} while(0) + +#define RumReadHighKeyDataPage(piState) \ +do { \ + memcpy(&((piState)->curPitem.item), \ + RumDataPageGetRightBound((piState)->page), \ + sizeof(RumItem)); \ +} while(0) + +#define RumReadKeyDataPage(piState) \ +do { \ + memcpy(&((piState)->curPitem), \ + RumDataPageGetItem((piState)->page, \ + (piState)->srfFctx->call_cntr), sizeof(RumPostingItem)); \ +} while(0) + +#define RumPrepareResultTuple(piState) \ +do { \ + (piState)->resultTuple = \ + heap_form_tuple((piState)->srfFctx->tuple_desc, \ + (piState)->values, (piState)->nulls); \ + (piState)->result = \ + HeapTupleGetDatum((piState)->resultTuple); \ +} while(0) + +#define RumPrepareCurPitemToPostingList(piState) \ + memset(&((piState)->curPitem), 0, sizeof(RumPostingItem)) + +/* + * This is necessary in order for the prepare_scan() + * function to determine the type of the scanned page. + */ +typedef enum pageTypeFlags +{ + LEAF_DATA_PAGE = 0, + INTERNAL_DATA_PAGE = 1, + LEAF_ENTRY_PAGE = 2, + INTERNAL_ENTRY_PAGE = 3 +} pageTypeFlags; + +/* + * The size of the result arrays (values + * and nulls, see RumPageItemsStateData + * structure) depends on the type of page. + */ +typedef enum pageTypeResSize +{ + LEAF_DATA_PAGE_RES_SIZE = 4, + INTERNAL_DATA_PAGE_RES_SIZE = 5, + LEAF_ENTRY_PAGE_RES_SIZE = 8, + INTERNAL_ENTRY_PAGE_RES_SIZE = 5, +} pageTypeResSize; + +/* + * A structure that stores information between + * calls to the rum_page_items_info() function. + * This information is necessary to scan the page. + */ +typedef struct RumPageItemsStateData +{ + /* + * A pointer to the RumState structure + * that describes the scanned index. + */ + RumState *rumState; + + /* Scanned page info */ + Page page; + uint32 pageNum; + + /* + * The type of the scanned page, can be: + * {} -- INTERNAL_ENTRY_PAGE + * {leaf} -- LEAF_ENTRY_PAGE + * {data} -- INTERNAL_DATA_PAGE + * {data, leaf} -- LEAF_DATA_PAGE + */ + pageTypeFlags pageType; + + /* + * The number of scanned items per page. + * + * On the {leaf, data} page, this is the number of + * RumItem structures that are in the compressed posting list. + * + * On the {data} page, this is the number of RumPostingItem structures. + * + * On the {leaf} page, this is the number of IndexTuple, each of + * which contains a compressed posting list. In this case, the size + * of the Posting list is determined using RumGetNPosting(itup). + * + * On the {} page, this is the number of IndexTuple. + */ + int maxoff; + + /* Pointer to the current scanning item */ + Pointer itemPtr; + + /* + * It is used where posting lists are scanned. + * Sometimes only the RumItem it contains is used. + */ + RumPostingItem curPitem; + + /* Current IndexTuple on the page */ + IndexTuple curItup; + + /* The number of the current IndexTuple on the page */ + OffsetNumber curTupleNum; + + /* The number of the current element in the current IndexTuple */ + int curTupleItemNum; + + /* + * The number of the child page that + * is stored in the current IndexTuple + */ + BlockNumber curTupleDownLink; + + /* + * If the current IndexTuple is scanned, then + * you need to move on to the next one. + */ + bool needNewTuple; + + /* + * Parameters of the current key in the IndexTuple + * or the key for which the posting tree was built. + */ + OffsetNumber curKeyAttnum; + Datum curKey; + RumNullCategory curKeyCategory; + Oid curKeyOid; + + /* Information about the type of additional information */ + bool curKeyAddInfoIsNull; + Oid curKeyAddInfoOid; + bool curKeyAddInfoByval; + + /* + * To generate the results of each + * function call rum_page_items_info() + */ + Datum result; + HeapTuple resultTuple; + Datum *values; + bool *nulls; + FuncCallContext *srfFctx; +} RumPageItemsStateData; + +typedef RumPageItemsStateData *RumPageItemsState; + +/* + * This function and get_rel_raw_page() are derived + * from the separation of the get_raw_page_internal() + * function, which was copied from the pageinspect code. + * It is needed in order to call the initRumState() + * function if necessary. + */ +static Relation +get_rel_from_name(text *relName) +{ + RangeVar *relrv; + Relation rel; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relName)); + rel = relation_openrv(relrv, AccessShareLock); + +#if PG_VERSION_NUM >= 150000 + if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from relation \"%s\"", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); +#else + /* Check that this relation has storage */ + if (rel->rd_rel->relkind == RELKIND_VIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from view \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from composite type \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from foreign table \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from partitioned table \"%s\"", + RelationGetRelationName(rel)))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get raw page from partitioned index \"%s\"", + RelationGetRelationName(rel)))); +#endif + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + return rel; +} + +/* + * Get a copy of the relation page. + */ +static Page +get_rel_page(Relation rel, BlockNumber blkNo) +{ + Buffer buf; + Page page; + + if (blkNo >= RelationGetNumberOfBlocksInFork(rel, MAIN_FORKNUM)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block number %u is out of range for relation \"%s\"", + blkNo, RelationGetRelationName(rel)))); + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkNo, RBM_NORMAL, NULL); + if (!BufferIsValid(buf)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not read block %u of relation \"%s\"", + blkNo, RelationGetRelationName(rel)))); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = (Page) palloc(BLCKSZ); + memcpy(page, BufferGetPage(buf), BLCKSZ); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buf); + + return page; +} + +/* + * Functions for checks. + */ +static void +check_superuser(void) +{ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use this function"))); +} + +static void +check_page_opaque_data_size(Page page) +{ + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(RumPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a valid RUM metapage"), + errdetail("Expected special size %d, got %d.", + (int) MAXALIGN(sizeof(RumPageOpaqueData)), + (int) PageGetSpecialSize(page)))); +} + +static void +check_page_is_meta_page(RumPageOpaque opaq) +{ + if (opaq->flags != RUM_META) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a RUM metapage"), + errdetail("Flags %04X, expected %04X", + opaq->flags, RUM_META))); +} + +static void +check_page_is_leaf_data_page(RumPageOpaque opaq) +{ + if (opaq->flags != (RUM_DATA | RUM_LEAF)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a RUM {data, leaf} page"), + errdetail("Flags %04X, expected %04X", + opaq->flags, (RUM_DATA | RUM_LEAF)))); +} + +static void +check_page_is_internal_data_page(RumPageOpaque opaq) +{ + if (opaq->flags != (RUM_DATA & ~RUM_LEAF)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a RUM {data} page"), + errdetail("Flags %04X, expected %04X", + opaq->flags, (RUM_DATA & ~RUM_LEAF)))); +} + +static void +check_page_is_leaf_entry_page(RumPageOpaque opaq) +{ + if (opaq->flags != RUM_LEAF) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a RUM {leaf} page"), + errdetail("Flags %04X, expected %04X", + opaq->flags, RUM_LEAF))); +} + +static void +check_page_is_internal_entry_page(RumPageOpaque opaq) +{ + if (opaq->flags != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page is not a RUM {} page"), + errdetail("Flags %04X, expected %04X", + opaq->flags, 0))); +} + +/* + * The function is used to output + * information stored in Datum as text. + */ +static Datum +get_datum_text_by_oid(Datum info, Oid infoId) +{ + Oid funcId; + bool isVarlena = false; + char *infoStr; + + Assert(OidIsValid(infoId)); + + getTypeOutputInfo(infoId, &funcId, &isVarlena); + + infoStr = OidOutputFunctionCall(funcId, info); + + return CStringGetTextDatum(infoStr); +} + +/* + * This function returns the key category as text. + */ +static Datum +category_get_datum_text(RumNullCategory category) +{ + char categoryArr[][20] = {"RUM_CAT_NORM_KEY", + "RUM_CAT_NULL_KEY", + "RUM_CAT_EMPTY_ITEM", + "RUM_CAT_NULL_ITEM", + "RUM_CAT_EMPTY_QUERY"}; + + switch (category) + { + case RUM_CAT_NORM_KEY: + return CStringGetTextDatum(categoryArr[0]); + + case RUM_CAT_NULL_KEY: + return CStringGetTextDatum(categoryArr[1]); + + case RUM_CAT_EMPTY_ITEM: + return CStringGetTextDatum(categoryArr[2]); + + case RUM_CAT_NULL_ITEM: + return CStringGetTextDatum(categoryArr[3]); + + case RUM_CAT_EMPTY_QUERY: + return CStringGetTextDatum(categoryArr[4]); + } + + /* In case of an error, return 0 */ + return (Datum) 0; +} + +/* + * The function extracts the weight and + * returns the corresponding letter. + */ +static char +pos_get_weight(WordEntryPos position) +{ + char res = 'D'; + + switch (WEP_GETWEIGHT(position)) + { + case 3: + return 'A'; + case 2: + return 'B'; + case 1: + return 'C'; + } + + return res; +} + +/* + * A function for extracting the positions of lexemes + * from additional information. Returns a string in + * which the positions of the lexemes are recorded. + */ + +#define POS_STR_BUF_LENGTH 1024 +#define POS_MAX_VAL_LENGTH 16 + +static Datum +get_positions_to_text_datum(Datum addInfo) +{ + bytea *positions; + char *ptrt; + WordEntryPos position = 0; + int32 npos; + + Datum res; + char *positionsStr; + char *positionsStrCurPtr; + int curMaxStrLenght; + + positions = DatumGetByteaP(addInfo); + ptrt = (char *) VARDATA_ANY(positions); + npos = count_pos(VARDATA_ANY(positions), + VARSIZE_ANY_EXHDR(positions)); + + /* Initialize the string */ + positionsStr = (char *) palloc(POS_STR_BUF_LENGTH * sizeof(char)); + positionsStr[0] = '\0'; + curMaxStrLenght = POS_STR_BUF_LENGTH - 1; + positionsStrCurPtr = positionsStr; + + /* Extract the positions of the lexemes and put them in the string */ + for (int i = 0; i < npos; i++) + { + /* At each iteration decode the position */ + ptrt = decompress_pos(ptrt, &position); + + /* Write this position and weight in the string */ + if (pos_get_weight(position) == 'D') + sprintf(positionsStrCurPtr, "%d,", WEP_GETPOS(position)); + else + sprintf(positionsStrCurPtr, "%d%c,", + WEP_GETPOS(position), pos_get_weight(position)); + + /* Moving the pointer forward */ + positionsStrCurPtr += strlen(positionsStrCurPtr); + + /* + * Check that there is not too little left to the end of the line and, + * if necessary, overspend the memory. + */ + if (curMaxStrLenght - (positionsStrCurPtr - positionsStr) + <= POS_MAX_VAL_LENGTH) + { + curMaxStrLenght += POS_STR_BUF_LENGTH; + positionsStr = (char *) repalloc(positionsStr, + curMaxStrLenght * sizeof(char)); + positionsStrCurPtr = positionsStr + strlen(positionsStr); + } + } + + /* + * Delete the last comma if there has been at least one iteration of the + * loop. + */ + if (npos > 0) + positionsStr[strlen(positionsStr) - 1] = '\0'; + + res = CStringGetTextDatum(positionsStr); + pfree(positionsStr); + return res; +} + +/* + * This function gets the attribute number of the + * current tuple key from the RumState structure. + */ +static Oid +get_cur_tuple_key_oid(RumPageItemsState piState) +{ + TupleDesc origTupleDesc; + OffsetNumber attnum; + + attnum = piState->curKeyAttnum; + origTupleDesc = piState->rumState->origTupdesc; + + return TupleDescAttr(origTupleDesc, attnum - 1)->atttypid; +} + +/* + * The function is used to extract values + * from a previously read IndexTuple. + */ +static void +get_entry_index_tuple_values(RumPageItemsState piState) +{ + RumState *rumState = piState->rumState; + + /* Scanning the IndexTuple */ + piState->curKeyAttnum = rumtuple_get_attrnum(rumState, piState->curItup); + + piState->curKey = rumtuple_get_key(rumState, + piState->curItup, + &(piState->curKeyCategory)); + + piState->curKeyOid = get_cur_tuple_key_oid(piState); + + if (piState->pageType == INTERNAL_ENTRY_PAGE) + piState->curTupleDownLink = RumGetDownlink(piState->curItup); + + if (piState->pageType == LEAF_ENTRY_PAGE && + RumGetAddInfoAttr(piState)) + { + piState->curKeyAddInfoOid = RumGetAddInfoAttr(piState)->atttypid; + piState->curKeyAddInfoByval = RumGetAddInfoAttr(piState)->attbyval; + } +} + +/* + * The leaf and internal pages of the Entry Tree contain IndexTuples. + * On leaf pages, IndexTuples contain the key and the posting list + * (or a link to the posting tree) that relates to it. On internal + * pages, IndexTuples contain a key and a link to a child page. + * + * This function is used to put the key values in the result arrays + * values and nulls. + */ +static void +write_res_cur_tuple_key_to_values(RumPageItemsState piState) +{ + int counter = 0; + + if (piState->curKeyCategory == RUM_CAT_NORM_KEY) + piState->values[counter++] = get_datum_text_by_oid(piState->curKey, + piState->curKeyOid); + else + piState->nulls[counter++] = true; + + piState->values[counter++] = UInt16GetDatum(piState->curKeyAttnum); + + piState->values[counter++] = + category_get_datum_text(piState->curKeyCategory); + + if (piState->pageType == INTERNAL_ENTRY_PAGE) + piState->values[counter] = UInt32GetDatum(piState->curTupleDownLink); +} + +/* + * This function is designed to search for a + * link to a target page on the pages of the + * posting tree. It sequentially scans each + * level of the posting tree and returns true + * if a link to the target page was found on + * any of the pages of the posting tree. + */ +static bool +find_page_in_posting_tree(BlockNumber targetPageNum, + BlockNumber curPageNum, + RumState * rumState) +{ + Page curPage; + RumPageOpaque curOpaq; + RumPostingItem curPitem; + BlockNumber nextPageNum; + + /* Page loop */ + for (;;) + { + curPage = get_rel_page(rumState->index, curPageNum); + + /* The page cannot be new */ + if (PageIsNew(curPage)) + { + pfree(curPage); + return false; + } + + /* Getting a page description from an opaque area */ + curOpaq = RumPageGetOpaque(curPage); + + /* If this is a leaf page, we stop the loop */ + if (curOpaq->flags == (RUM_DATA | RUM_LEAF)) + { + pfree(curPage); + return false; + } + + /* + * Reading the first RumPostingItem from the current page. This is + * necessary to remember the link down. + */ + memcpy(&curPitem, + RumDataPageGetItem(curPage, 1), sizeof(RumPostingItem)); + nextPageNum = RumPostingItemGetBlockNumber(&curPitem); + + /* The loop that scans the page */ + for (int i = 1; i <= curOpaq->maxoff; i++) + { + /* Reading the RumPostingItem from the current page */ + memcpy(&curPitem, + RumDataPageGetItem(curPage, i), sizeof(RumPostingItem)); + + if (targetPageNum == RumPostingItemGetBlockNumber(&curPitem)) + { + pfree(curPage); + return true; + } + } + + /* Go to the next page */ + + /* If a step to the right is impossible, step down */ + if (curOpaq->rightlink == InvalidBlockNumber) + curPageNum = nextPageNum; + + /* Step to the right */ + else + curPageNum = curOpaq->rightlink; + + pfree(curPage); + } +} + +/* + * This function is used to sequentially find + * the roots of the posting tree. In the first + * call, *curPageNum should be the leftmost + * leaf page of the entry tree, and *curTupleNum + * should be equal to FirstOffsetNumber. Then, for + * each call, the function will return the root + * number of the posting tree until they exhaust. + */ +static bool +find_posting_tree_root(BlockNumber *curPageNum, + OffsetNumber *curTupleNum, + OffsetNumber *curKeyAttnum, + BlockNumber *postingRootNum, + RumState * rumState) +{ + Page curPage; + RumPageOpaque curOpaq; + IndexTuple curItup; + + for (;;) + { + /* Getting rel by name and page by number */ + curPage = get_rel_page(rumState->index, *curPageNum); + + /* The page cannot be new */ + if (PageIsNew(curPage)) + break; + + /* Getting a page description from an opaque area */ + curOpaq = RumPageGetOpaque(curPage); + + Assert(curOpaq->flags == RUM_LEAF); + + /* Scanning current page */ + while (*curTupleNum <= PageGetMaxOffsetNumber(curPage)) + { + curItup = (IndexTuple) + PageGetItem(curPage, PageGetItemId(curPage, *curTupleNum)); + + (*curTupleNum)++; + + *curKeyAttnum = rumtuple_get_attrnum(rumState, curItup); + + if (RumIsPostingTree(curItup)) + { + *postingRootNum = RumGetPostingTree(curItup); + pfree(curPage); + return true; + } + } + + /* + * If haven't found anything, need to move on or terminate the + * function if the pages are over. + */ + if (curOpaq->rightlink == InvalidBlockNumber) + break; + else + { + *curPageNum = curOpaq->rightlink; + *curTupleNum = FirstOffsetNumber; + pfree(curPage); + } + } + + /* Error case */ + *curPageNum = InvalidBlockNumber; + *postingRootNum = InvalidBlockNumber; + *curTupleNum = InvalidOffsetNumber; + *curKeyAttnum = InvalidOffsetNumber; + pfree(curPage); + return false; +} + +/* + * The function is used to find the number + * of the leftmost leaf page of the entry tree. + */ +static BlockNumber +find_min_entry_leaf_page(RumPageItemsState piState) +{ + RumState *rumState = piState->rumState; + + /* + * The page search starts from the first internal page of the entry tree. + */ + BlockNumber curPageNum = 1; + Page curPage; + RumPageOpaque curOpaq; + IndexTuple curItup; + + for (;;) + { + /* Getting page by number */ + curPage = get_rel_page(rumState->index, curPageNum); + + /* The page cannot be new */ + if (PageIsNew(curPage)) + return InvalidOffsetNumber; + + /* Getting a page description from an opaque area */ + curOpaq = RumPageGetOpaque(curPage); + + /* If the required page is found */ + if (curOpaq->flags == RUM_LEAF && RumPageLeftMost(curPage)) + { + pfree(curPage); + return curPageNum; + } + + /* If curPage is still an internal page */ + else if (curOpaq->flags == 0) + { + /* Read the first IndexTuple */ + curItup = (IndexTuple) PageGetItem(curPage, + PageGetItemId(curPage, 1)); + + /* Step onto the child page */ + curPageNum = RumGetDownlink(curItup); + pfree(curPage); + } + + else /* Error case */ + { + pfree(curPage); + return InvalidBlockNumber; + } + } +} + +/* + * When scanning a posting tree page, the key used to build the posting tree and + * the corresponding attribute number are not known. This function determines + * the attribute number of the key for which the posting tree was built. + * + * First, the function descends to the leftmost leaf page of the entry tree, + * then searches for links to the posting tree there. In each posting tree, + * it searches for the page number being scanned. If the desired page is found, + * it returns the key attribute number contained in the IndexTuple, along with + * a reference to the posting tree. If nothing is found, the function returns + * InvalidOffsetNumber. + */ +static OffsetNumber +find_attnum_posting_tree_key(RumPageItemsState piState) +{ + BlockNumber targetPageNum = piState->pageNum; + RumState *rumState = piState->rumState; + + /* Returned result */ + OffsetNumber keyAttnum = InvalidOffsetNumber; + + /* + * The page search starts from the first internal page of the entry tree. + */ + BlockNumber curPageNum = 1; + OffsetNumber curTupleNum = FirstOffsetNumber; + BlockNumber postingRootNum = InvalidBlockNumber; + + /* Search for the leftmost leaf page of the entry tree */ + curPageNum = find_min_entry_leaf_page(piState); + + /* + * At each iteration of the loop, we find the root of the posting tree, + * then we search for the desired page in this posting tree. The loop ends + * when a page is found, or when there is no longer a posting tree. + */ + while (find_posting_tree_root(&curPageNum, &curTupleNum, + &keyAttnum, &postingRootNum, rumState)) + { + if (postingRootNum == targetPageNum || + find_page_in_posting_tree(targetPageNum, + postingRootNum, rumState)) + break; + } + + return keyAttnum; +} + +/* + * An auxiliary function for preparing the scan. + * Depending on the type of page, it fills in + * piState and makes the necessary checks. + */ +static bool +prepare_scan(text *relName, uint32 blkNo, + RumPageItemsState * piState, + FuncCallContext *srfFctx, + pageTypeFlags pageType) +{ + Relation rel; /* needed to initialize the RumState structure */ + + Page page; /* the page to be scanned */ + RumPageOpaque opaq; /* data from the opaque area of the page */ + + int resSize; + + /* Getting rel by name and page by number */ + rel = get_rel_from_name(relName); + page = get_rel_page(rel, blkNo); + + /* The page cannot be new */ + if (PageIsNew(page)) + return false; + + /* Checking the size of the opaque area of the page */ + check_page_opaque_data_size(page); + + /* Getting a page description from an opaque area */ + opaq = RumPageGetOpaque(page); + + /* Allocating memory for a long-lived structure */ + *piState = palloc(sizeof(RumPageItemsStateData)); + + /* Initializing the RumState structure */ + (*piState)->rumState = palloc(sizeof(RumState)); + initRumState((*piState)->rumState, rel); + + relation_close(rel, AccessShareLock); + + /* Writing the page and page type into a long-lived structure */ + (*piState)->srfFctx = srfFctx; + (*piState)->page = page; + (*piState)->pageNum = blkNo; + (*piState)->pageType = pageType; + + /* The number of results returned depends on the type of page */ + if ((*piState)->pageType == LEAF_DATA_PAGE) + resSize = LEAF_DATA_PAGE_RES_SIZE; + + else if ((*piState)->pageType == INTERNAL_DATA_PAGE) + resSize = INTERNAL_DATA_PAGE_RES_SIZE; + + else if ((*piState)->pageType == LEAF_ENTRY_PAGE) + resSize = LEAF_ENTRY_PAGE_RES_SIZE; + + else + resSize = INTERNAL_ENTRY_PAGE_RES_SIZE; + + /* Allocating memory for arrays of results */ + (*piState)->values = (Datum *) palloc(resSize * sizeof(Datum)); + (*piState)->nulls = (bool *) palloc(resSize * sizeof(bool)); + + /* + * Depending on the type of page, it performs the necessary checks and + * writes the necessary data into a long-lived structure. + */ + if (RumIsDataPage(*piState)) + { + if ((*piState)->pageType == LEAF_DATA_PAGE) + check_page_is_leaf_data_page(opaq); + + else + check_page_is_internal_data_page(opaq); + + (*piState)->maxoff = opaq->maxoff; + (*piState)->itemPtr = RumDataPageGetData(page); + + /* + * If the scanned page belongs to a posting tree, we do not know which + * key this posting tree was built for. However, we need to know the + * attribute number of the key in order to correctly determine the + * type of additional information that can be associated with it. + * + * The find_attnum_posting_tree_key() function is used to find the key + * attribute number. The function scans the index and searches for the + * page we are scanning in the posting tree, while remembering which + * key this posting tree was built for. + */ + (*piState)->curKeyAttnum = find_attnum_posting_tree_key(*piState); + + /* Error handling find_attnum_posting_tree_key() */ + if ((*piState)->curKeyAttnum == InvalidOffsetNumber) + return false; + + if (RumGetAddInfoAttr(*piState)) + { + (*piState)->curKeyAddInfoOid = + RumGetAddInfoAttr(*piState)->atttypid; + (*piState)->curKeyAddInfoByval = + RumGetAddInfoAttr(*piState)->attbyval; + } + } + + else /* The entry tree page case */ + { + if ((*piState)->pageType == LEAF_ENTRY_PAGE) + { + check_page_is_leaf_entry_page(opaq); + + (*piState)->needNewTuple = true; + } + + else + check_page_is_internal_entry_page(opaq); + + (*piState)->maxoff = PageGetMaxOffsetNumber(page); + (*piState)->curTupleNum = FirstOffsetNumber; + } + + return true; +} + +/* + * An auxiliary function for reading information from leaf + * and internal pages of the Posting Tree. For each call, + * it returns the next result to be returned from the + * rum_page_items_info() function. + */ + +#define VARLENA_MSG "varlena types in posting tree is " \ + "not supported" + +static void +data_page_get_next_result(RumPageItemsState piState) +{ + int counter = 0; + + /* Before returning the result, need to reset the nulls array */ + if (piState->pageType == LEAF_DATA_PAGE) + memset(piState->nulls, 0, + LEAF_DATA_PAGE_RES_SIZE * sizeof(bool)); + else + memset(piState->nulls, 0, + INTERNAL_DATA_PAGE_RES_SIZE * sizeof(bool)); + + Assert(RumIsDataPage(piState)); + + /* Reading high key */ + if (piState->srfFctx->call_cntr == 0) + RumReadHighKeyDataPage(piState); + + /* Reading information from Posting List */ + else if (piState->pageType == LEAF_DATA_PAGE) + { + /* + * it is necessary for the correct reading of the tid (see the + * function rumdatapageleafread()) + */ + if (piState->srfFctx->call_cntr == 1) + RumPrepareCurPitemToPostingList(piState); + + /* Read new item */ + RumGetNewItemPostingList(piState); + } + + /* Reading information from the internal data page */ + else + RumReadKeyDataPage(piState); + + /* Write the read information into arrays of results */ + + /* + * This means whether the result tuple is the high key or not. + */ + if (piState->srfFctx->call_cntr == 0) + { + piState->values[counter++] = BoolGetDatum(true); + + if (piState->pageType == INTERNAL_DATA_PAGE) + piState->nulls[counter++] = true; + } + + else /* If the result is not the high key */ + { + piState->values[counter++] = BoolGetDatum(false); + + if (piState->pageType == INTERNAL_DATA_PAGE) + RumWriteResBlckNumToValues(piState, counter++); + } + + RumWriteResIptrToValues(piState, counter++); + RumWriteResAddInfoIsNullToValues(piState, counter++); + + /* + * Return of additional information depends on the type of page and the + * type of additional information. + */ + if (RumCurPitemAddInfoIsNormal(piState)) + { + if (piState->pageType == LEAF_DATA_PAGE && + piState->srfFctx->call_cntr != 0) + { + if (RumAddInfoIsPositions(piState)) + RumWriteResAddInfoPosToValues(piState, counter); + + else + RumWriteResAddInfoToValues(piState, counter); + } + + else /* If the page is internal or result is high + * key */ + { + if (piState->curKeyAddInfoByval == false) + piState->values[counter] = CStringGetTextDatum(VARLENA_MSG); + else + RumWriteResAddInfoToValues(piState, counter); + } + } + + /* If no additional information is available */ + else + piState->nulls[counter] = true; + + /* Forming the returned tuple */ + RumPrepareResultTuple(piState); +} + +/* + * IndexTuples are located on the internal pages of the Etnry Tree. + * Each IndexTuple contains a key and a link to a child page. This + * function reads these values and generates the result tuple. + */ +static void +entry_internal_page_get_next_result(RumPageItemsState piState) +{ + Assert(piState->pageType == INTERNAL_ENTRY_PAGE); + + /* Before returning the result, need to reset the nulls array */ + memset(piState->nulls, 0, INTERNAL_ENTRY_PAGE_RES_SIZE * sizeof(bool)); + + /* Read the new IndexTuple */ + RumGetNewIndexTuple(piState); + + /* Scanning the IndexTuple that we received earlier */ + get_entry_index_tuple_values(piState); + + /* + * On the rightmost page, in the last IndexTuple, there is a high key, + * which is assumed to be equal to +inf. + */ + if (RumIsEntryInternalHighKey(piState)) + { + piState->values[0] = CStringGetTextDatum("+inf"); + piState->nulls[1] = true; + piState->nulls[2] = true; + piState->values[3] = UInt32GetDatum(piState->curTupleDownLink); + } + + /* Is not high key */ + else + write_res_cur_tuple_key_to_values(piState); + + /* Forming the returned tuple */ + RumPrepareResultTuple(piState); + + /* Increase the counter before the next SRF call */ + piState->curTupleNum++; +} + +/* + * The Entry Tree leaf pages contain IndexTuples containing + * the key and either a compressed posting list or a link to + * the root page of the Posting Tree. This function reads all + * values from posting list and generates the result tuple. + */ +static void +get_entry_leaf_posting_list_result(RumPageItemsState piState) +{ + /* + * Start writing from 3, because the previous ones are occupied by a + * cur_tuple_key + */ + int counter = 3; + + Assert(piState->pageType == LEAF_ENTRY_PAGE); + + /* Reading the RumItem structures from the IndexTuple */ + RumGetNewItemPostingList(piState); + + /* Write the read information into arrays of results */ + write_res_cur_tuple_key_to_values(piState); + RumWriteResIptrToValues(piState, counter++); + RumWriteResAddInfoIsNullToValues(piState, counter++); + + if (RumCurPitemAddInfoIsNormal(piState)) + { + if (RumAddInfoIsPositions(piState)) + RumWriteResAddInfoPosToValues(piState, counter++); + + else + RumWriteResAddInfoToValues(piState, counter++); + } + + else + piState->nulls[counter++] = true; + + /* The current IndexTuple does not contain a posting tree */ + piState->values[counter++] = BoolGetDatum(false); + piState->nulls[counter] = true; + + /* + * If the current IndexTuple has ended, i.e. we have scanned all its + * RumItems, then we need to enable the need_new_tuple flag so that the + * next time the function is called, we can read a new IndexTuple from the + * page. + */ + piState->curTupleItemNum++; + if (piState->curTupleItemNum > + RumGetNPosting(piState->curItup)) + piState->needNewTuple = true; + + /* Forming the returned tuple */ + RumPrepareResultTuple(piState); +} + +/* + * This function is used to prepare for scanning + * the posting list on Entry Tree leaf pages. + */ +static void +prepare_new_entry_leaf_posting_list(RumPageItemsState piState) +{ + Assert(piState->pageType == LEAF_ENTRY_PAGE); + + /* Getting the posting list */ + piState->itemPtr = RumGetPosting(piState->curItup); + piState->curTupleItemNum = 1; + piState->needNewTuple = false; + piState->curTupleNum++; + + /* + * Every time you read a new IndexTuple, you need to reset the tid for the + * rumDataPageLeafRead() function to work correctly. + */ + RumPrepareCurPitemToPostingList(piState); +} + +/* + * The Entry Tree leaf pages contain IndexTuples containing + * the key and either a compressed posting list or a link to + * the root page of the Posting Tree. This function reads all + * values from Posting Tree and generates the result tuple. + */ +static void +get_entry_leaf_posting_tree_result(RumPageItemsState piState) +{ + /* + * Start writing from 3, because the previous ones are occupied by a + * cur_tuple_key + */ + int counter = 3; + + Assert(piState->pageType == LEAF_ENTRY_PAGE); + + /* Returning the key value */ + write_res_cur_tuple_key_to_values(piState); + + /* Everything stored in the RumItem structure has a NULL value */ + piState->nulls[counter++] = true; + piState->nulls[counter++] = true; + piState->nulls[counter++] = true; + + /* Returning the root of the posting tree */ + piState->values[counter++] = true; + piState->values[counter++] = + UInt32GetDatum(RumGetPostingTree(piState->curItup)); + + /* Forming the returned tuple */ + RumPrepareResultTuple(piState); + + /* The next call will require a new IndexTuple */ + piState->needNewTuple = true; +} + +/* + * The function reads information from compressed Posting lists on + * Entry Tree leaf pages, each of which is located in the + * corresponding IndexTuple. Therefore, first, if the previous + * IndexTuple has ended, the new one is read. After that, the + * current IndexTuple is scanned until it runs out. The IndexTuple + * themselves are read until they end on the page. + */ +static void +entry_leaf_page_get_next_result(RumPageItemsState piState) +{ + Assert(piState->pageType == LEAF_ENTRY_PAGE); + + /* Before returning the result, need to reset the nulls array */ + memset(piState->nulls, 0, LEAF_ENTRY_PAGE_RES_SIZE * sizeof(bool)); + + if (piState->needNewTuple) + { + /* Read the new IndexTuple */ + RumGetNewIndexTuple(piState); + + /* Getting key and key attribute number */ + get_entry_index_tuple_values(piState); + + /* Getting the posting list */ + prepare_new_entry_leaf_posting_list(piState); + + /* + * The case when there is a posting tree instead of a compressed + * posting list + */ + if (RumIsPostingTree(piState->curItup)) + { + get_entry_leaf_posting_tree_result(piState); + return; + } + } + + get_entry_leaf_posting_list_result(piState); +} + +/* + * The rum_metapage_info() function is used to retrieve + * information stored on the meta page of the rum index. + * To scan, need the index name and the page number. + * (for the meta page blkNo = 0). + */ +Datum +rum_metapage_info(PG_FUNCTION_ARGS) +{ + /* Reading input arguments */ + text *relName = PG_GETARG_TEXT_PP(0); + uint32 blkNo = PG_GETARG_UINT32(1); + + Relation rel; /* needed to initialize the RumState structure */ + + RumPageOpaque opaq; /* data from the opaque area of the page */ + RumMetaPageData *metaData; /* data stored on the meta page */ + Page page; /* the page to be scanned */ + + TupleDesc tupDesc; /* description of the result tuple */ + HeapTuple resultTuple; /* for the results */ + Datum values[10]; /* return values */ + bool nulls[10]; /* true if the corresponding value is NULL */ + + /* + * To output the index version. If you change the index version, you may + * need to increase the buffer size. + */ + char versionBuf[20]; + + /* Only the superuser can use this */ + check_superuser(); + + /* Getting rel by name and page by number */ + rel = get_rel_from_name(relName); + page = get_rel_page(rel, blkNo); + relation_close(rel, AccessShareLock); + + /* If the page is new, the function should return NULL */ + if (PageIsNew(page)) + PG_RETURN_NULL(); + + /* Checking the size of the opaque area of the page */ + check_page_opaque_data_size(page); + + /* Getting a page description from an opaque area */ + opaq = RumPageGetOpaque(page); + + /* Checking the flags */ + check_page_is_meta_page(opaq); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Getting information from the meta page */ + metaData = RumPageGetMeta(page); + + memset(nulls, 0, sizeof(nulls)); + + /* + * Writing data from metaData to values. + * + * The first five values are obsolete because the pending list was removed + * from the rum index. + */ + values[0] = Int64GetDatum(metaData->head); + values[1] = Int64GetDatum(metaData->tail); + values[2] = Int32GetDatum(metaData->tailFreeSize); + values[3] = Int64GetDatum(metaData->nPendingPages); + values[4] = Int64GetDatum(metaData->nPendingHeapTuples); + values[5] = Int64GetDatum(metaData->nTotalPages); + values[6] = Int64GetDatum(metaData->nEntryPages); + values[7] = Int64GetDatum(metaData->nDataPages); + values[8] = Int64GetDatum(metaData->nEntries); + snprintf(versionBuf, sizeof(versionBuf), "0x%X", metaData->rumVersion); + values[9] = CStringGetTextDatum(versionBuf); + + pfree(page); + + /* Build and return the result tuple */ + resultTuple = heap_form_tuple(tupDesc, values, nulls); + + /* Returning the result */ + return HeapTupleGetDatum(resultTuple); +} + +/* + * The rum_page_opaque_info() function is used to retrieve + * information stored in the opaque area of the index rum + * page. To scan, need the index name and the page number. + */ +Datum +rum_page_opaque_info(PG_FUNCTION_ARGS) +{ + /* Reading input arguments */ + text *relName = PG_GETARG_TEXT_PP(0); + uint32 blkNo = PG_GETARG_UINT32(1); + + Relation rel; /* needed to initialize the RumState structure */ + + RumPageOpaque opaq; /* data from the opaque area of the page */ + Page page; /* the page to be scanned */ + + HeapTuple resultTuple; /* for the results */ + TupleDesc tupDesc; /* description of the result tuple */ + + Datum values[5]; /* return values */ + bool nulls[5]; /* true if the corresponding value is NULL */ + Datum flags[16]; /* array with flags in text format */ + int nFlags = 0; /* index in the array of flags */ + uint16 flagBits; /* flags in the opaque area of the page */ + + /* Only the superuser can use this */ + check_superuser(); + + /* Getting rel by name and raw page by number */ + rel = get_rel_from_name(relName); + page = get_rel_page(rel, blkNo); + relation_close(rel, AccessShareLock); + + /* If the page is new, the function should return NULL */ + if (PageIsNew(page)) + PG_RETURN_NULL(); + + /* Checking the size of the opaque area of the page */ + check_page_opaque_data_size(page); + + /* Getting a page description from an opaque area */ + opaq = RumPageGetOpaque(page); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Convert the flags bitmask to an array of human-readable names */ + flagBits = opaq->flags; + if (flagBits & RUM_DATA) + flags[nFlags++] = CStringGetTextDatum("data"); + if (flagBits & RUM_LEAF) + flags[nFlags++] = CStringGetTextDatum("leaf"); + if (flagBits & RUM_DELETED) + flags[nFlags++] = CStringGetTextDatum("deleted"); + if (flagBits & RUM_META) + flags[nFlags++] = CStringGetTextDatum("meta"); + if (flagBits & RUM_LIST) + flags[nFlags++] = CStringGetTextDatum("list"); + if (flagBits & RUM_LIST_FULLROW) + flags[nFlags++] = CStringGetTextDatum("list_fullrow"); + flagBits &= ~(RUM_DATA | RUM_LEAF | RUM_DELETED | RUM_META | RUM_LIST | + RUM_LIST_FULLROW); + if (flagBits) + { + /* any flags we don't recognize are printed in hex */ + flags[nFlags++] = DirectFunctionCall1(to_hex32, Int32GetDatum(flagBits)); + } + + memset(nulls, 0, sizeof(nulls)); + + /* + * Writing data from metaData to values. + */ + values[0] = Int64GetDatum(opaq->leftlink); + values[1] = Int64GetDatum(opaq->rightlink); + values[2] = Int32GetDatum(opaq->maxoff); + values[3] = Int32GetDatum(opaq->freespace); + +#if PG_VERSION_NUM >= 160000 + values[4] = PointerGetDatum(construct_array_builtin(flags, nFlags, TEXTOID)); +#elif PG_VERSION_NUM >= 130000 + values[4] = PointerGetDatum(construct_array(flags, nFlags, + TEXTOID, -1, false, TYPALIGN_INT)); +#else + values[4] = PointerGetDatum(construct_array(flags, nFlags, + TEXTOID, -1, false, 'i')); +#endif + + pfree(page); + + /* Build and return the result tuple. */ + resultTuple = heap_form_tuple(tupDesc, values, nulls); + + /* Returning the result */ + return HeapTupleGetDatum(resultTuple); +} + +/* + * The main universal function used to scan all + * page types (except for the meta page). There + * are four SQL wrappers around this function, + * each of which scans a specific page type. The + * page_type argument is used to select the type + * of page to scan. + */ +Datum +rum_page_items_info(PG_FUNCTION_ARGS) +{ + /* Reading input arguments */ + text *relName = PG_GETARG_TEXT_PP(0); + uint32 blkNo = PG_GETARG_UINT32(1); + pageTypeFlags pageType = PG_GETARG_UINT32(2); + + int counter; + + /* + * The context of the function calls and the pointer to the long-lived + * piState structure. + */ + FuncCallContext *fctx; + RumPageItemsStateData *piState; + + /* Only the superuser can use this */ + check_superuser(); + + /* + * In the case of the first function call, it is necessary to get the page + * by its number and create a RumState structure for scanning the page. + */ + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupDesc; /* description of the result tuple */ + MemoryContext oldMctx; /* the old function memory context */ + + /* + * Initializing the FuncCallContext structure and switching the memory + * context to the one needed for structures that must be saved during + * multiple calls. + */ + fctx = SRF_FIRSTCALL_INIT(); + oldMctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + /* Before scanning the page, you need to prepare piState */ + if (!prepare_scan(relName, blkNo, &piState, fctx, pageType)) + { + MemoryContextSwitchTo(oldMctx); + PG_RETURN_NULL(); + } + + Assert(RumIsDataPage(piState) || RumIsEntryPage(piState)); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Needed to for subsequent recording tupledesc in fctx */ + BlessTupleDesc(tupDesc); + + /* + * Save a pointer to a long-lived structure and tuple descriptor for + * our result type in fctx. + */ + fctx->user_fctx = piState; + fctx->tuple_desc = tupDesc; + + /* Switching to the old memory context */ + MemoryContextSwitchTo(oldMctx); + } + + /* Preparing to use the FuncCallContext */ + fctx = SRF_PERCALL_SETUP(); + + /* In the current call, we are reading data from the previous one */ + piState = fctx->user_fctx; + + /* The counter is defined differently on different pages */ + if (RumIsDataPage(piState)) + counter = fctx->call_cntr; + else + counter = piState->curTupleNum; + + /* + * Go through the page. + * + * When scanning a Posting Tree page, the counter is fctx->call_cntr, + * which is 0 on the first call. The first call is special because it + * returns the high key from the pages of the Posting Tree (the high key + * is not counted in maxoff). + * + * On Entry tree pages, the high key is stored in the IndexTuple. + */ + if (counter <= piState->maxoff) + { + if (RumIsDataPage(piState)) + data_page_get_next_result(piState); + + else if (piState->pageType == LEAF_ENTRY_PAGE) + entry_leaf_page_get_next_result(piState); + + else + entry_internal_page_get_next_result(piState); + + /* Returning the result of the current call */ + SRF_RETURN_NEXT(fctx, piState->result); + } + + /* Completing the function */ + SRF_RETURN_DONE(fctx); +} diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 83529faa69..5294ff3c10 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -80,8 +80,6 @@ PG_FUNCTION_INFO_V1(rum_ts_join_pos); PG_FUNCTION_INFO_V1(tsquery_to_distance_query); -static unsigned int count_pos(char *ptr, int len); -static char *decompress_pos(char *ptr, WordEntryPos *pos); static Datum build_tsvector_entry(TSVector vector, WordEntry *we); static Datum build_tsvector_hash_entry(TSVector vector, WordEntry *we); static Datum build_tsquery_entry(TSQuery query, QueryOperand *operand); @@ -964,7 +962,6 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) } #define SIXTHBIT 0x20 -#define LOWERMASK 0x1F static unsigned int compress_pos(char *target, WordEntryPos *pos, int npos) @@ -999,7 +996,7 @@ compress_pos(char *target, WordEntryPos *pos, int npos) return ptr - target; } -static char * +extern char * decompress_pos(char *ptr, WordEntryPos *pos) { int i; @@ -1027,7 +1024,7 @@ decompress_pos(char *ptr, WordEntryPos *pos) } } -static unsigned int +extern unsigned int count_pos(char *ptr, int len) { int count = 0, diff --git a/t/003_rum_debug_funcs.pl b/t/003_rum_debug_funcs.pl new file mode 100644 index 0000000000..2354dec94f --- /dev/null +++ b/t/003_rum_debug_funcs.pl @@ -0,0 +1,341 @@ +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# See storage/block.h +my $invalid_block_number = '4294967295'; + +# The function finds the leftmost leaf page of the Entry Tree. +# To do this, starting from the first page, it goes down the tree to the leaf. +sub find_min_leaf_entry_page +{ + my ($idx_name, $node) = @_; + my $cur_page_num = 1; + + my $cur_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('$idx_name', $cur_page_num); + }); + + while ($cur_flags ne '{leaf}') + { + $cur_page_num = $node->safe_psql( + "postgres", qq{ + SELECT down_link + FROM rum_internal_entry_page_items('$idx_name', $cur_page_num) + LIMIT 1; + }); + + $cur_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('$idx_name', $cur_page_num); + }); + } + + return $cur_page_num; +} + +# The function goes through the leaf pages of the Entry Tree from left to right +# and searches for the first entry with a link to the root of the Posting Tree. +# +# In $cur_page_num, it expects to get the leftmost leaf page of the Entry Tree. +# If no records with a link to the root of the Posting Tree are found, it +# returns -1. +sub find_root_posting_tree +{ + my ($idx_name, $cur_page_num, $node) = @_; + + while ($cur_page_num ne $invalid_block_number) + { + my $posting_tree_root = $node->safe_psql( + "postgres", qq{ + SELECT posting_tree_root + FROM rum_leaf_entry_page_items('$idx_name', $cur_page_num) + WHERE is_posting_tree = 't' LIMIT 1; + }); + + if ($posting_tree_root ne '') + { + chomp $posting_tree_root; + return $posting_tree_root; + } + + $cur_page_num = $node->safe_psql( + "postgres", qq{ + SELECT rightlink FROM rum_page_opaque_info('$idx_name', $cur_page_num); + }); + } + + return -1; +} + +# The function finds the leftmost leaf page of the Posting Tree. It works the +# same way as find_min_leaf_entry_page(). +# +# In $cur_page_num, it expects to receive the root page of the Posting Tree. +sub find_min_leaf_posting_tree +{ + my ($idx_name, $cur_page_num, $node) = @_; + + my $cur_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('$idx_name', $cur_page_num); + }); + + while ($cur_flags ne '{data,leaf}') + { + $cur_page_num = $node->safe_psql( + "postgres", qq{ + SELECT block_number + FROM rum_internal_data_page_items('$idx_name', $cur_page_num) + WHERE is_high_key = 'f' LIMIT 1; + }); + + $cur_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('$idx_name', $cur_page_num); + }); + } + + return $cur_page_num; +} + +# A function for comparing TIDs. +sub tid_less_or_equal +{ + my ($a, $b) = @_; + + $a =~ /\((\d+),(\d+)\)/ or die "Invalid TID: $a"; + my ($blk_a, $off_a) = ($1, $2); + $b =~ /\((\d+),(\d+)\)/ or die "Invalid TID: $b"; + my ($blk_b, $off_b) = ($1, $2); + + return ($blk_a < $blk_b) || ($blk_a == $blk_b && $off_a <= $off_b); +} + +# A function to check that an array of TIDs is sorted. +sub is_tid_list_sorted +{ + my (@tids) = @_; + + for my $i (0 .. $#tids - 1) + { + return 0 unless tid_less_or_equal($tids[$i], $tids[ $i + 1 ]); + } + + return 1; +} + +# tsts.data is the data file for the test table. +my $data_file_path = Cwd::getcwd() . "/data/tsts.data"; +if (-e $data_file_path) +{ + plan tests => 11; +} +else +{ + plan skip_all => "tsts.data not found"; +} + +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init; +$node->start; +$node->safe_psql("postgres", "CREATE EXTENSION rum;"); + +# Create a test table, fill it with data, and create an index. + +$node->safe_psql( + "postgres", qq{ + CREATE TABLE test_table (id int, t tsvector, d timestamp); +}); + +$node->safe_psql( + "postgres", qq{ + DO \$\$ + BEGIN + FOR i IN 1..5 LOOP + COPY test_table FROM '$data_file_path'; + END LOOP; + END; + \$\$; +}); + +# It is necessary to create all types of pages in the Posting Tree. +$node->safe_psql( + "postgres", qq{ + DO \$\$ + BEGIN + FOR i IN 1..5000 LOOP + INSERT INTO test_table(id, t, d) + VALUES(i, 'b9', '2016-05-02 00:21:22.326724'); + END LOOP; + END; + \$\$; +}); + +$node->safe_psql( + "postgres", qq{ + CREATE INDEX test_rum_idx_false ON test_table + USING rum (t rum_tsvector_addon_ops, d) + WITH (attach = 'd', to = 't', order_by_attach='f'); +}); + +# Testing the rum_metapage_info() function. +my $meta = $node->safe_psql( + "postgres", qq{ + SELECT n_total_pages, n_entry_pages, n_data_pages, n_entries + FROM rum_metapage_info('test_rum_idx_false', 0); +}); +my ($n_total_pages, $n_entry_pages, $n_data_pages, $n_entries) = split /\|/, + $meta; +ok($n_total_pages > 0, "Total pages count is valid"); +ok($n_entry_pages > 0, "Entry pages count > 0"); +ok($n_data_pages > 0, "Data (posting tree) pages count > 0"); +ok($n_total_pages == $n_entry_pages + $n_data_pages + 1, + "Total pages = entry + data + metapage"); +ok($n_entries == 1650, + "The number of entries is equal to the number copied from data.tsts"); + +# Testing the rum_page_opaque_info() function. +my $opaque_meta = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('test_rum_idx_false', 0); +}); +ok($opaque_meta eq '{meta}', + qq{rum_page_opaque_info('rum_idx', 0) returns {meta} flag}); + +$opaque_meta = $node->safe_psql( + "postgres", qq{ + SELECT rightlink FROM rum_page_opaque_info('test_rum_idx_false', 0); +}); +ok($opaque_meta eq $invalid_block_number, + qq{InvalidBlockNumber should be equal to '4294967295'}); + +# Testing the rum_internal_entry_page_items() function. +my $entry_internal_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags FROM rum_page_opaque_info('test_rum_idx_false', 1); +}); +SKIP: +{ + skip 'Page 1 is not an internal entry page', 1 + if $entry_internal_flags ne '{}'; + + my $entry_internal_key_attnum = $node->safe_psql( + "postgres", qq{ + SELECT attrnum + FROM rum_internal_entry_page_items('test_rum_idx_false', 1) + WHERE attrnum IS NOT NULL + GROUP BY attrnum + LIMIT 1; + }); + + my @entry_internal_keys = split( + /\n/, + $node->safe_psql( + "postgres", qq{ + SELECT key + FROM rum_internal_entry_page_items('test_rum_idx_false', 1) + WHERE attrnum = $entry_internal_key_attnum; + })); + my @entry_internal_keys_sorted = sort @entry_internal_keys; + + is_deeply( + \@entry_internal_keys, + \@entry_internal_keys_sorted, + "rum_internal_entry_page_items() returns sorted keys"); +} + +# Testing the rum_leaf_entry_page_items() function. +my $entry_leaf_min_num = + find_min_leaf_entry_page('test_rum_idx_false', $node); +my $entry_leaf_key_attnum = $node->safe_psql( + "postgres", qq{ + SELECT attrnum + FROM rum_leaf_entry_page_items('test_rum_idx_false', $entry_leaf_min_num) + WHERE attrnum IS NOT NULL AND is_posting_tree = 'f' + GROUP BY attrnum + LIMIT 1; +}); +SKIP: +{ + skip "The leftmost entry leaf page contains only the posting tree roots.", + if $entry_leaf_key_attnum eq ''; + + my @entry_leaf_keys = split( + /\n/, + $node->safe_psql( + "postgres", qq{ + SELECT key + FROM rum_leaf_entry_page_items('test_rum_idx_false', $entry_leaf_min_num) + WHERE is_posting_tree = 'f' AND attrnum = $entry_leaf_key_attnum; + })); + my @entry_leaf_keys_sorted = sort @entry_leaf_keys; + is_deeply(\@entry_leaf_keys_sorted, \@entry_leaf_keys, + "rum_leaf_entry_page_items() returns sorted keys"); +} + +# Testing the rum_internal_data_page_items() function. +my $posting_tree_root_num = + find_root_posting_tree('test_rum_idx_false', $entry_leaf_min_num, $node); +my $posting_tree_root_flags; +if ($posting_tree_root_num != -1) +{ + $posting_tree_root_flags = $node->safe_psql( + "postgres", qq{ + SELECT flags + FROM rum_page_opaque_info('test_rum_idx_false', $posting_tree_root_num); + }); +} +SKIP: +{ + skip 'The root of the posting tree was not found', 1 + if $posting_tree_root_num == -1 or $posting_tree_root_flags ne '{data}'; + + my @posting_tree_root_tids = split( + /\n/, + $node->safe_psql( + "postgres", qq{ + SELECT tuple_id + FROM rum_internal_data_page_items('test_rum_idx_false', $posting_tree_root_num) + WHERE is_high_key = 'f'; + })); + + # deleting the high key + pop @posting_tree_root_tids; + + my @posting_tree_root_tids_sorted = sort @posting_tree_root_tids; + is_deeply(\@posting_tree_root_tids_sorted, + \@posting_tree_root_tids, + "rum_internal_data_page_items() returns sorted tids"); +} + +# Testing the rum_leaf_data_page_items() function. +my $posting_tree_leaf_num = -1; +if ($posting_tree_root_num != -1) +{ + $posting_tree_leaf_num = find_min_leaf_posting_tree('test_rum_idx_false', + $posting_tree_root_num, $node); +} +SKIP: +{ + skip 'The leaf page of the posting tree was not found', 1 + if $posting_tree_leaf_num == -1; + + my @posting_tree_leaf_tids = split( + /\n/, + $node->safe_psql( + "postgres", qq{ + SELECT tuple_id + FROM rum_leaf_data_page_items('test_rum_idx_false', $posting_tree_leaf_num) + WHERE is_high_key = 'f'; + })); + + ok( is_tid_list_sorted(@posting_tree_leaf_tids) == 1, + "rum_leaf_data_page_items() returns sorted tids"); +} + +$node->stop('fast'); +done_testing();