1414 * 2) I/O functions were not available for all types in 
1515 *	  in the get_datum_text_by_oid() function. 
1616 * 
17-  * 3) SIGSEGV in case of bytea output as additional information. 
17+  * 3) The output of lexeme positions in the high keys of the posting  
18+  * 	  tree is not supported. 
1819 */ 
1920
2021#include  "postgres.h" 
22+ #include  "miscadmin.h" 
2123#include  "fmgr.h" 
2224#include  "funcapi.h" 
2325#include  "catalog/namespace.h" 
@@ -115,8 +117,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115117static  Datum  category_get_datum_text (RumNullCategory  category );
116118static  Oid  find_add_info_oid (RumState  * rum_state_ptr );
117119static  OffsetNumber  find_add_info_atrr_num (RumState  * rum_state_ptr );
118- 
119120static  Datum  get_positions_to_text_datum (Datum  add_info );
121+ static  char  pos_get_weight (WordEntryPos  position );
120122
121123/* 
122124 * The rum_metapage_info() function is used to retrieve  
@@ -472,7 +474,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472474	 */ 
473475	if (fctx -> call_cntr  <= inter_call_data -> maxoff )
474476	{
475- 		RumItem 					* high_key_ptr ;			 		
477+ 		RumItem 					* high_key_ptr ; 		 /* to read high key from a page */ 		
476478		RumItem  				* rum_item_ptr ;		/* to read data from a page */ 
477479		Datum  					values [4 ];			/* return values */ 
478480		bool  					nulls [4 ];			/* true if the corresponding value is NULL */ 
@@ -497,7 +499,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497499			values [2 ] =  BoolGetDatum (high_key_ptr -> addInfoIsNull );
498500
499501			/* Returning add info */ 
500- 			if (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0  
502+ 			if (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid  
501503				&&  inter_call_data -> add_info_oid  !=  BYTEAOID )
502504			{
503505				values [3 ] =  get_datum_text_by_oid (high_key_ptr -> addInfo , 
@@ -506,12 +508,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506508
507509			/*  
508510			 * In this case, we are dealing with the positions  
509- 			 * of tokens  and they need to be decoded.  
511+ 			 * of lexemes  and they need to be decoded.  
510512			 */ 
511- 			else  if  (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0  
513+ 			else  if  (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid  
512514					&&  inter_call_data -> add_info_oid  ==  BYTEAOID ) 
513515			{
514- 				/* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */ 
515516				values [3 ] =  CStringGetTextDatum ("high key positions in posting tree is not supported" );
516517			}
517518
@@ -525,26 +526,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525526			SRF_RETURN_NEXT (fctx , result );
526527		}
527528
528- 		/*  
529- 		 * Reading information from the page in rum_item.  
530- 		 * 
531- 		 * TODO: The fact is that being on the posting tree page, we don't know which  
532- 		 * index attribute this posting tree was built for, so we don't know the  
533- 		 * attribute number of the additional information. But the rumDataPageLeafRead()  
534- 		 * function requires it to read information from the page. Here we use the auxiliary  
535- 		 * function find_add_info_atr_num(), which simply iterates through the array with  
536- 		 * attributes that are additional information and selects the attribute number for  
537- 		 * which the additional information attribute is not NULL. This approach is incorrect  
538- 		 * because there may not be additional information for the attribute on the page,  
539- 		 * but we hope that in this case add_info_is_null will have the value true and the  
540- 		 * additional information will not be read. 
541- 		 * 
542- 		 * This problem can be solved by asking the user for the attribute number of  
543- 		 * additional information, because going through the index from top to bottom,  
544- 		 * he saw it next to the link to the posting tree root. 
545- 		 */ 
529+ 		/* Reading information from the page in rum_item */ 
546530		inter_call_data -> item_ptr  =  rumDataPageLeafRead (inter_call_data -> item_ptr , 
547- 									/* inter_call_data->cur_tuple_key_attnum, */ 
548531									find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549532									rum_item_ptr , false, inter_call_data -> rum_state_ptr );
550533
@@ -554,7 +537,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554537		values [2 ] =  BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555538
556539		/* Returning add info */ 
557- 		if (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0   
540+ 		if (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid 
558541			&&  inter_call_data -> add_info_oid  !=  BYTEAOID )
559542		{
560543			values [3 ] =  get_datum_text_by_oid (rum_item_ptr -> addInfo , 
@@ -563,9 +546,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563546
564547		/*  
565548		 * In this case, we are dealing with the positions  
566- 		 * of tokens  and they need to be decoded.  
549+ 		 * of lexemes  and they need to be decoded.  
567550		 */ 
568- 		else  if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0   
551+ 		else  if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid 
569552				&&  inter_call_data -> add_info_oid  ==  BYTEAOID ) 
570553		{
571554			values [3 ] =  get_positions_to_text_datum (rum_item_ptr -> addInfo ); 
@@ -729,7 +712,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729712	 */ 
730713	if (fctx -> call_cntr  <= inter_call_data -> maxoff )
731714	{
732- 		RumItem 					* high_key_ptr ;
715+ 		RumItem 					* high_key_ptr ;		 /* to read high key from a page */ 
733716		PostingItem  			* posting_item_ptr ;	/* to read data from a page */ 
734717		Datum  					values [5 ];			/* returned values */ 
735718		bool  					nulls [5 ];			/* true if the corresponding returned value is NULL */ 
@@ -754,7 +737,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754737			values [3 ] =  BoolGetDatum (high_key_ptr -> addInfoIsNull );
755738
756739			/* Returning add info */ 
757- 			if (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0 
740+ 			if (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid 
758741				&&  inter_call_data -> add_info_oid  !=  BYTEAOID )
759742			{
760743				values [4 ] =  get_datum_text_by_oid (high_key_ptr -> addInfo , 
@@ -763,12 +746,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763746
764747			/*  
765748			 * In this case, we are dealing with the positions  
766- 			 * of tokens  and they need to be decoded.  
749+ 			 * of lexemes  and they need to be decoded.  
767750			 */ 
768- 			else  if  (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0  
751+ 			else  if  (!(high_key_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid  
769752					&&  inter_call_data -> add_info_oid  ==  BYTEAOID ) 
770753			{
771- 				/* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */ 
772754				values [4 ] =  CStringGetTextDatum ("high key positions in posting tree is not supported" );
773755			}
774756
@@ -793,7 +775,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793775		values [3 ] =  BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794776
795777		/* Returning add info */ 
796- 		if (!posting_item_ptr -> item .addInfoIsNull  &&  inter_call_data -> add_info_oid  !=  0 
778+ 		if (!posting_item_ptr -> item .addInfoIsNull  &&  inter_call_data -> add_info_oid  !=  InvalidOid 
797779			&&  inter_call_data -> add_info_oid  !=  BYTEAOID )
798780		{
799781			values [4 ] =  get_datum_text_by_oid (posting_item_ptr -> item .addInfo , 
@@ -802,12 +784,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802784
803785		/*  
804786		 * In this case, we are dealing with the positions  
805- 		 * of tokens  and they need to be decoded.  
787+ 		 * of lexemes  and they need to be decoded.  
806788		 */ 
807- 		else  if  (!posting_item_ptr -> item .addInfoIsNull  &&  inter_call_data -> add_info_oid  !=  0   
789+ 		else  if  (!posting_item_ptr -> item .addInfoIsNull  &&  inter_call_data -> add_info_oid  !=  InvalidOid 
808790				&&  inter_call_data -> add_info_oid  ==  BYTEAOID ) 
809791		{
810- 			/* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */ 
811792			values [4 ] =  CStringGetTextDatum ("high key positions in posting tree is not supported" );
812793		}
813794
@@ -1072,17 +1053,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721053		values [4 ] =  BoolGetDatum (rum_item_ptr -> addInfoIsNull );
10731054
10741055		/* Returning add info */ 
1075- 		if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0  &&  
1056+ 		if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid  &&  
10761057			inter_call_data -> add_info_oid  !=  BYTEAOID )
10771058		{
10781059			values [5 ] =  get_datum_text_by_oid (rum_item_ptr -> addInfo , inter_call_data -> add_info_oid );
10791060		}
10801061
10811062		/*  
10821063		 * In this case, we are dealing with the positions  
1083- 		 * of tokens  and they need to be decoded.  
1064+ 		 * of lexemes  and they need to be decoded.  
10841065		 */ 
1085- 		else  if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  0  
1066+ 		else  if  (!(rum_item_ptr -> addInfoIsNull ) &&  inter_call_data -> add_info_oid  !=  InvalidOid  
10861067				&&  inter_call_data -> add_info_oid  ==  BYTEAOID ) 
10871068		{
10881069			values [5 ] =  get_positions_to_text_datum (rum_item_ptr -> addInfo ); 
@@ -1427,22 +1408,16 @@ get_page_from_raw(bytea *raw_page)
14271408 * int2, int4, int8, float4, float8, money, oid, timestamp,  
14281409 * timestamptz, time, timetz, date, interval, macaddr, inet,  
14291410 * cidr, text, varchar, char, bytea, bit, varbit, numeric. 
1430-  * 
1431-  * TODO: All types accepted by rum must be checked, but  
1432-  * perhaps some types are missing or some are superfluous. 
14331411 */ 
14341412static  Datum 
14351413get_datum_text_by_oid (Datum  info , Oid  info_oid )
14361414{
14371415	char  * str_info  =  NULL ;
14381416
1439- 	/* info cannot be NULL */ 
1440- 	Assert (DatumGetPointer (info ) !=  NULL );
1441- 
14421417	/* 
14431418	 * Form a string depending on the type of info. 
14441419	 * 
1445- 	 * FIXME : The macros used below are taken from the  
1420+ 	 * TODO : The macros used below are taken from the  
14461421	 * pg_type_d file.h, and it says not to use them  
14471422	 * in the new code. 
14481423	 */ 
@@ -1528,18 +1503,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281503			str_info  =  OidOutputFunctionCall (F_CHAROUT , info );
15291504			break ;
15301505
1531- 		/*  
1532- 		 * TODO: For some reason, the rum index created for a single tsv  
1533- 		 * field contains additional information as bytea. In addition,  
1534- 		 * if additional information in this format is extracted from  
1535- 		 * posting tree pages, it cannot be displayed correctly as text.  
1536- 		 * If the additional information was extracted from the entry  
1537- 		 * tree pages, then it is displayed correctly. 
1538- 		 */ 
15391506		case  BYTEAOID :
1540- 			/* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */ 
1541- 			/* break; */ 
1542- 			return  CStringGetTextDatum ("BYTEAOID is not supported" );
1507+ 			str_info  =  OidOutputFunctionCall (F_BYTEAOUT , info );
1508+ 			break ;
15431509
15441510		case  BITOID :
15451511			str_info  =  OidOutputFunctionCall (F_BIT_OUT , info );
@@ -1634,14 +1600,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341600 * the Oid of additional information for an attribute for  
16351601 * which it is not NULL. 
16361602 * 
1637-  * TODO:  The logic of the function assumes that there cannot  
1603+  * The logic of the function assumes that there cannot  
16381604 * be several types of additional information in the index,  
16391605 * otherwise it will not work.  
16401606 */ 
16411607static  Oid 
16421608find_add_info_oid (RumState  * rum_state_ptr )
16431609{
1644- 	Oid  add_info_oid  =  0 ;
1610+ 	Oid  add_info_oid  =  InvalidOid ;
16451611
16461612	/* Number of index attributes */ 
16471613	int  num_attrs  =  rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1617,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511617	 * oid of additional information. 
16521618	 */ 
16531619	for  (int  i  =  0 ; i  <  num_attrs ; i ++ ) 
1620+ 	{
16541621		if  ((rum_state_ptr -> addAttrs )[i ] !=  NULL )
1622+ 		{
1623+ 			Assert (add_info_oid  ==  InvalidOid );
16551624			add_info_oid  =  ((rum_state_ptr -> addAttrs )[i ])-> atttypid ; 
1625+ 		}
1626+ 	}
16561627
16571628	return  add_info_oid ;
16581629}
@@ -1661,19 +1632,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611632 * This is an auxiliary function to get the attribute number  
16621633 * for additional information. It is used in the rum_leaf_data_page_items()  
16631634 * function to call the rumDataPageLeafRead() function. 
1635+  * 
1636+  * The logic of the function assumes that there cannot  
1637+  * be several types of additional information in the index,  
1638+  * otherwise it will not work.  
16641639 */ 
16651640static  OffsetNumber 
16661641find_add_info_atrr_num (RumState  * rum_state_ptr )
16671642{
1668- 	OffsetNumber  add_info_attr_num  =  0 ;
1643+ 	OffsetNumber  add_info_attr_num  =  InvalidOffsetNumber ;
16691644
16701645	/* Number of index attributes */ 
16711646	int  num_attrs  =  rum_state_ptr -> origTupdesc -> natts ;
16721647
16731648	/* Go through the addAttrs array */ 
1674- 	for  (int  i  =  0 ; i  <  num_attrs ; i ++ ) 
1649+ 	for  (int  i  =  0 ; i  <  num_attrs ; i ++ )
1650+ 	{
16751651		if  ((rum_state_ptr -> addAttrs )[i ] !=  NULL )
1652+ 		{
1653+ 			Assert (add_info_attr_num  ==  InvalidOffsetNumber );
16761654			add_info_attr_num  =  i ;
1655+ 		}
1656+ 	}
16771657
16781658	/* Need to add 1 because the attributes are numbered from 1 */ 
16791659	return  add_info_attr_num  +  1 ;
@@ -1683,8 +1663,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831663#define  POS_MAX_VAL_LENGHT  6
16841664
16851665/* 
1686-  * A function for extracting the positions of tokens  from additional  
1687-  * information. Returns a string in which the positions of the tokens   
1666+  * A function for extracting the positions of lexemes  from additional  
1667+  * information. Returns a string in which the positions of the lexemes   
16881668 * are recorded. The memory that the string occupies must be cleared later. 
16891669 */ 
16901670static  Datum 
@@ -1711,14 +1691,17 @@ get_positions_to_text_datum(Datum add_info)
17111691	cur_max_str_lenght  =  POS_STR_BUF_LENGHT ;
17121692	positions_str_cur_ptr  =  positions_str ;
17131693
1714- 	/* Extract the positions of the tokens  and put them in the string */ 
1694+ 	/* Extract the positions of the lexemes  and put them in the string */ 
17151695	for  (int  i  =  0 ; i  <  npos ; i ++ )
17161696	{
17171697		/* At each iteration decode the position */ 
17181698		ptrt  =  decompress_pos (ptrt , & position );
17191699
1720- 		/* Write this position in the string */ 
1721- 		sprintf (positions_str_cur_ptr , "%d," , position );
1700+ 		/* Write this position and weight in the string */ 
1701+ 		if (pos_get_weight (position ) ==  'D' )
1702+ 			sprintf (positions_str_cur_ptr , "%d," , WEP_GETPOS (position ));
1703+ 		else 
1704+ 			sprintf (positions_str_cur_ptr , "%d%c," , WEP_GETPOS (position ), pos_get_weight (position ));
17221705
17231706		/* Moving the pointer forward */ 
17241707		positions_str_cur_ptr  +=  strlen (positions_str_cur_ptr );
@@ -1744,3 +1727,25 @@ get_positions_to_text_datum(Datum add_info)
17441727	pfree (positions_str );
17451728	return  res ;
17461729}
1730+ 
1731+ /* 
1732+  * The function extracts the weight and  
1733+  * returns the corresponding letter. 
1734+  */ 
1735+ static  char 
1736+ pos_get_weight (WordEntryPos  position )
1737+ {
1738+ 	char  res  =  'D' ;
1739+ 
1740+ 	switch (WEP_GETWEIGHT (position ))
1741+ 	{
1742+ 		case  3 :
1743+ 			return  'A' ;
1744+ 		case  2 :
1745+ 			return  'B' ;
1746+ 		case  1 :
1747+ 			return  'C' ;
1748+ 	}
1749+ 
1750+ 	return  res ;
1751+ }
0 commit comments