tsvector.h 2.4 KB
Newer Older
T
Teodor Sigaev 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#ifndef __TXTIDX_H__
#define __TXTIDX_H__

/*
#define TXTIDX_DEBUG
*/

#include "postgres.h"

#include "access/gist.h"
#include "access/itup.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"

B
Bruce Momjian 已提交
15 16
typedef struct
{
T
Teodor Sigaev 已提交
17
	uint32
B
Bruce Momjian 已提交
18 19 20
				haspos:1,
				len:11,			/* MAX 2Kb */
				pos:20;			/* MAX 1Mb */
T
Teodor Sigaev 已提交
21
}	WordEntry;
B
Bruce Momjian 已提交
22

T
Teodor Sigaev 已提交
23 24 25
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )

T
Change  
Teodor Sigaev 已提交
26
/*
B
Bruce Momjian 已提交
27
Equivalent to
B
Bruce Momjian 已提交
28 29
typedef struct
{
B
Bruce Momjian 已提交
30 31 32 33
	   uint16
								weight:2,
								pos:14;
}	WordEntryPos;
T
Change  
Teodor Sigaev 已提交
34 35 36 37 38 39

*/

typedef uint16 WordEntryPos;

#define  WEP_GETWEIGHT(x)	( (x) >> 14 )
40
#define  WEP_GETPOS(x)		( (x) & 0x3fff )
T
Change  
Teodor Sigaev 已提交
41

B
Bruce Momjian 已提交
42 43
#define  WEP_SETWEIGHT(x,v)  (x) = ( (v) << 14 ) | ( (x) & 0x3fff )
#define  WEP_SETPOS(x,v)	(x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff )
T
Change  
Teodor Sigaev 已提交
44

B
Bruce Momjian 已提交
45 46

#define MAXENTRYPOS (1<<14)
T
Teodor Sigaev 已提交
47
#define MAXNUMPOS	256
B
Bruce Momjian 已提交
48
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
T
Teodor Sigaev 已提交
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63
/*
 * Structure of tsvector datatype:
 * 1) int4 	len - varlena's length 
 * 2) int4		size - number of lexemes or WordEntry array, which is the same
 * 3) Array of WordEntry - sorted array, comparison based on word's length
 *						and strncmp(). WordEntry->pos points number of
 *						bytes from end of WordEntry array to start of
 *						corresponding lexeme.
 * 4) Lexeme's storage:
 *	  SHORTALIGNED(lexeme) and position information if it exists 
 * 	  Position information: first int2 - is a number of positions and it 
 *    follows array of WordEntryPos
 */ 

T
Teodor Sigaev 已提交
64 65 66 67 68 69 70
typedef struct
{
	int4		len;
	int4		size;
	char		data[1];
}	tsvector;

71 72 73 74 75
#define DATAHDRSIZE (sizeof(int4) * 2)
#define CALCDATASIZE(x, lenstr) ( (x) * sizeof(WordEntry) + DATAHDRSIZE + (lenstr) )
#define ARRPTR(x)	( (WordEntry*) ( (char*)(x) + DATAHDRSIZE ) )
#define STRPTR(x)	( (char*)(x) + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
#define STRSIZE(x)	( ((tsvector*)(x))->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)(x))->size ) )
T
Teodor Sigaev 已提交
76
#define _POSDATAPTR(x,e)	(STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
B
Bruce Momjian 已提交
77 78
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
T
Teodor Sigaev 已提交
79 80


B
Bruce Momjian 已提交
81 82 83 84
typedef struct
{
	WordEntry	entry;
	WordEntryPos *pos;
T
Teodor Sigaev 已提交
85 86 87 88 89 90 91 92 93 94
}	WordEntryIN;

typedef struct
{
	char	   *prsbuf;
	char	   *word;
	char	   *curpos;
	int4		len;
	int4		state;
	int4		alen;
B
Bruce Momjian 已提交
95
	WordEntryPos *pos;
T
Teodor Sigaev 已提交
96 97 98 99 100 101
	bool		oprisdelim;
}	TI_IN_STATE;

int4		gettoken_tsvector(TI_IN_STATE * state);

#endif