varlena.c 41.3 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * varlena.c
4
 *	  Functions for the variable-length built-in types.
5
 *
B
Bruce Momjian 已提交
6
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
7
 * Portions Copyright (c) 1994, Regents of the University of California
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.94 2002/12/06 05:20:17 momjian Exp $
12 13 14 15 16
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

17 18
#include <ctype.h>

M
 
Marc G. Fournier 已提交
19
#include "mb/pg_wchar.h"
20
#include "miscadmin.h"
B
Add:  
Bruce Momjian 已提交
21 22
#include "access/tuptoaster.h"
#include "lib/stringinfo.h"
B
Bruce Momjian 已提交
23
#include "utils/builtins.h"
24
#include "utils/pg_locale.h"
M
Marc G. Fournier 已提交
25

26
extern bool md5_hash(const void *buff, size_t len, char *hexsum);
27 28 29 30 31 32

typedef struct varlena unknown;

#define DatumGetUnknownP(X)			((unknown *) PG_DETOAST_DATUM(X))
#define PG_GETARG_UNKNOWN_P(n)		DatumGetUnknownP(PG_GETARG_DATUM(n))
#define PG_RETURN_UNKNOWN_P(x)		PG_RETURN_POINTER(x)
B
Add:  
Bruce Momjian 已提交
33
#define PG_TEXTARG_GET_STR(arg_) \
B
Bruce Momjian 已提交
34
	DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
B
Add:  
Bruce Momjian 已提交
35
#define PG_TEXT_GET_STR(textp_) \
B
Bruce Momjian 已提交
36
	DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
B
Add:  
Bruce Momjian 已提交
37
#define PG_STR_GET_TEXT(str_) \
B
Bruce Momjian 已提交
38
	DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
B
Add:  
Bruce Momjian 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52
#define TEXTLEN(textp) \
	text_length(PointerGetDatum(textp))
#define TEXTPOS(buf_text, from_sub_text) \
	text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
#define TEXTDUP(textp) \
	DatumGetTextPCopy(PointerGetDatum(textp))
#define LEFT(buf_text, from_sub_text) \
	text_substring(PointerGetDatum(buf_text), \
					1, \
					TEXTPOS(buf_text, from_sub_text) - 1, false)
#define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
	text_substring(PointerGetDatum(buf_text), \
					TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
					-1, true)
53

54
static int	text_cmp(text *arg1, text *arg2);
B
Add:  
Bruce Momjian 已提交
55 56 57
static int32 text_length(Datum str);
static int32 text_position(Datum str, Datum search_str, int matchnum);
static text *text_substring(Datum str,
B
Bruce Momjian 已提交
58 59 60
			   int32 start,
			   int32 length,
			   bool length_not_specified);
61 62


63 64
/*****************************************************************************
 *	 USER I/O ROUTINES														 *
65 66 67
 *****************************************************************************/


68 69
#define VAL(CH)			((CH) - '0')
#define DIG(VAL)		((VAL) + '0')
70 71

/*
72
 *		byteain			- converts from printable representation of byte array
73
 *
74 75
 *		Non-printable characters must be passed as '\nnn' (octal) and are
 *		converted to internal form.  '\' must be passed as '\\'.
76
 *		elog(ERROR, ...) if bad form.
77
 *
78 79 80
 *		BUGS:
 *				The input is scaned twice.
 *				The error checking of input is minimal.
81
 */
82 83
Datum
byteain(PG_FUNCTION_ARGS)
84
{
85
	char	   *inputText = PG_GETARG_CSTRING(0);
T
Thomas G. Lockhart 已提交
86 87 88
	char	   *tp;
	char	   *rp;
	int			byte;
89
	bytea	   *result;
90 91

	for (byte = 0, tp = inputText; *tp != '\0'; byte++)
92
	{
93 94
		if (tp[0] != '\\')
			tp++;
95 96 97 98
		else if ((tp[0] == '\\') &&
				 (tp[1] >= '0' && tp[1] <= '3') &&
				 (tp[2] >= '0' && tp[2] <= '7') &&
				 (tp[3] >= '0' && tp[3] <= '7'))
99
			tp += 4;
100 101
		else if ((tp[0] == '\\') &&
				 (tp[1] == '\\'))
102 103 104 105 106 107 108
			tp += 2;
		else
		{
			/*
			 * one backslash, not followed by 0 or ### valid octal
			 */
			elog(ERROR, "Bad input string for type bytea");
109
		}
110 111
	}

B
Bruce Momjian 已提交
112
	byte += VARHDRSZ;
113
	result = (bytea *) palloc(byte);
114 115 116
	result->vl_len = byte;		/* set varlena length */

	tp = inputText;
117 118
	rp = result->vl_dat;
	while (*tp != '\0')
119
	{
120
		if (tp[0] != '\\')
121
			*rp++ = *tp++;
122 123 124 125
		else if ((tp[0] == '\\') &&
				 (tp[1] >= '0' && tp[1] <= '3') &&
				 (tp[2] >= '0' && tp[2] <= '7') &&
				 (tp[3] >= '0' && tp[3] <= '7'))
126
		{
127
			byte = VAL(tp[1]);
128
			byte <<= 3;
129
			byte += VAL(tp[2]);
130
			byte <<= 3;
131 132 133
			*rp++ = byte + VAL(tp[3]);
			tp += 4;
		}
134 135
		else if ((tp[0] == '\\') &&
				 (tp[1] == '\\'))
136 137 138 139 140 141 142
		{
			*rp++ = '\\';
			tp += 2;
		}
		else
		{
			/*
143 144
			 * We should never get here. The first pass should not allow
			 * it.
145 146
			 */
			elog(ERROR, "Bad input string for type bytea");
147
		}
148 149 150
	}

	PG_RETURN_BYTEA_P(result);
151 152 153
}

/*
154
 *		byteaout		- converts to printable representation of byte array
155
 *
156 157
 *		Non-printable characters are inserted as '\nnn' (octal) and '\' as
 *		'\\'.
158
 *
159
 *		NULL vlena should be an error--returning string with NULL for now.
160
 */
161 162
Datum
byteaout(PG_FUNCTION_ARGS)
163
{
164
	bytea	   *vlena = PG_GETARG_BYTEA_P(0);
T
Thomas G. Lockhart 已提交
165 166 167 168
	char	   *result;
	char	   *vp;
	char	   *rp;
	int			val;			/* holds unprintable chars */
169 170
	int			i;
	int			len;
171 172

	len = 1;					/* empty string has 1 char */
173
	vp = vlena->vl_dat;
B
Bruce Momjian 已提交
174
	for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
175
	{
176 177
		if (*vp == '\\')
			len += 2;
178
		else if (isprint((unsigned char) *vp))
179 180
			len++;
		else
181 182
			len += 4;
	}
183 184
	rp = result = (char *) palloc(len);
	vp = vlena->vl_dat;
185 186
	for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
	{
187 188 189 190 191
		if (*vp == '\\')
		{
			*rp++ = '\\';
			*rp++ = '\\';
		}
192
		else if (isprint((unsigned char) *vp))
193
			*rp++ = *vp;
194 195
		else
		{
196 197 198
			val = *vp;
			rp[0] = '\\';
			rp[3] = DIG(val & 07);
199
			val >>= 3;
200
			rp[2] = DIG(val & 07);
201
			val >>= 3;
202 203
			rp[1] = DIG(val & 03);
			rp += 4;
204
		}
205
	}
206
	*rp = '\0';
207
	PG_RETURN_CSTRING(result);
208 209 210 211
}


/*
212
 *		textin			- converts "..." to internal representation
213
 */
214 215
Datum
textin(PG_FUNCTION_ARGS)
216
{
217
	char	   *inputText = PG_GETARG_CSTRING(0);
T
Thomas G. Lockhart 已提交
218
	text	   *result;
219
	int			len;
220 221

	char	   *ermsg;
222 223

	len = strlen(inputText) + VARHDRSZ;
T
Tatsuo Ishii 已提交
224 225

	if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
226
		elog(ERROR, "%s", ermsg);
T
Tatsuo Ishii 已提交
227

T
Thomas G. Lockhart 已提交
228
	result = (text *) palloc(len);
J
TOAST  
Jan Wieck 已提交
229
	VARATT_SIZEP(result) = len;
T
Thomas G. Lockhart 已提交
230

231
	memcpy(VARDATA(result), inputText, len - VARHDRSZ);
232 233

#ifdef CYR_RECODE
234
	convertstr(VARDATA(result), len - VARHDRSZ, 0);
235 236
#endif

237
	PG_RETURN_TEXT_P(result);
238 239 240
}

/*
241
 *		textout			- converts internal representation to "..."
242
 */
243 244
Datum
textout(PG_FUNCTION_ARGS)
245
{
246
	text	   *t = PG_GETARG_TEXT_P(0);
247 248
	int			len;
	char	   *result;
249

250
	len = VARSIZE(t) - VARHDRSZ;
251
	result = (char *) palloc(len + 1);
252
	memcpy(result, VARDATA(t), len);
253
	result[len] = '\0';
254 255

#ifdef CYR_RECODE
256
	convertstr(result, len, 1);
257 258
#endif

259
	PG_RETURN_CSTRING(result);
260 261 262
}


263 264 265 266 267 268 269
/*
 *		unknownin			- converts "..." to internal representation
 */
Datum
unknownin(PG_FUNCTION_ARGS)
{
	char	   *inputStr = PG_GETARG_CSTRING(0);
B
Bruce Momjian 已提交
270
	unknown    *result;
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
	int			len;

	len = strlen(inputStr) + VARHDRSZ;

	result = (unknown *) palloc(len);
	VARATT_SIZEP(result) = len;

	memcpy(VARDATA(result), inputStr, len - VARHDRSZ);

	PG_RETURN_UNKNOWN_P(result);
}


/*
 *		unknownout			- converts internal representation to "..."
 */
Datum
unknownout(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
290
	unknown    *t = PG_GETARG_UNKNOWN_P(0);
291 292 293 294 295 296 297 298 299 300 301 302
	int			len;
	char	   *result;

	len = VARSIZE(t) - VARHDRSZ;
	result = (char *) palloc(len + 1);
	memcpy(result, VARDATA(t), len);
	result[len] = '\0';

	PG_RETURN_CSTRING(result);
}


303 304
/* ========== PUBLIC ROUTINES ========== */

305 306
/*
 * textlen -
M
Marc G. Fournier 已提交
307
 *	  returns the logical length of a text*
308
 *	   (which is less than the VARSIZE of the text*)
309
 */
310 311
Datum
textlen(PG_FUNCTION_ARGS)
312
{
B
Add:  
Bruce Momjian 已提交
313 314
	PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
}
B
Bruce Momjian 已提交
315

B
Add:  
Bruce Momjian 已提交
316 317 318 319 320 321 322 323 324 325 326 327
/*
 * text_length -
 *	Does the real work for textlen()
 *	This is broken out so it can be called directly by other string processing
 *	functions.
 */
static int32
text_length(Datum str)
{
	/* fastpath when max encoding length is one */
	if (pg_database_encoding_max_length() == 1)
		PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
328

B
Add:  
Bruce Momjian 已提交
329 330 331 332 333
	if (pg_database_encoding_max_length() > 1)
	{
		text	   *t = DatumGetTextP(str);

		PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
B
Bruce Momjian 已提交
334
											 VARSIZE(t) - VARHDRSZ));
B
Add:  
Bruce Momjian 已提交
335 336 337 338
	}

	/* should never get here */
	elog(ERROR, "Invalid backend encoding; encoding max length "
B
Bruce Momjian 已提交
339
		 "is less than one.");
B
Add:  
Bruce Momjian 已提交
340 341 342

	/* not reached: suppress compiler warning */
	return 0;
343
}
344

M
Marc G. Fournier 已提交
345 346 347 348 349
/*
 * textoctetlen -
 *	  returns the physical length of a text*
 *	   (which is less than the VARSIZE of the text*)
 */
350 351
Datum
textoctetlen(PG_FUNCTION_ARGS)
M
Marc G. Fournier 已提交
352
{
B
Add:  
Bruce Momjian 已提交
353
	PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
354
}
M
Marc G. Fournier 已提交
355

356 357
/*
 * textcat -
358
 *	  takes two text* and returns a text* that is the concatenation of
359
 *	  the two.
360 361 362 363 364
 *
 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 * Allocate space for output in all cases.
 * XXX - thomas 1997-07-10
365
 */
366 367
Datum
textcat(PG_FUNCTION_ARGS)
368
{
369 370
	text	   *t1 = PG_GETARG_TEXT_P(0);
	text	   *t2 = PG_GETARG_TEXT_P(1);
371 372 373 374
	int			len1,
				len2,
				len;
	text	   *result;
375
	char	   *ptr;
376

377
	len1 = (VARSIZE(t1) - VARHDRSZ);
378 379
	if (len1 < 0)
		len1 = 0;
380

381
	len2 = (VARSIZE(t2) - VARHDRSZ);
382 383
	if (len2 < 0)
		len2 = 0;
384

385
	len = len1 + len2 + VARHDRSZ;
386
	result = (text *) palloc(len);
387 388

	/* Set size of result string... */
J
TOAST  
Jan Wieck 已提交
389
	VARATT_SIZEP(result) = len;
390

391 392
	/* Fill data field of result string... */
	ptr = VARDATA(result);
393
	if (len1 > 0)
394
		memcpy(ptr, VARDATA(t1), len1);
395
	if (len2 > 0)
396
		memcpy(ptr + len1, VARDATA(t2), len2);
397

398 399
	PG_RETURN_TEXT_P(result);
}
T
Thomas G. Lockhart 已提交
400 401 402 403 404 405 406

/*
 * text_substr()
 * Return a substring starting at the specified position.
 * - thomas 1997-12-31
 *
 * Input:
407 408 409
 *	- string
 *	- starting position (is one-based)
 *	- string length
T
Thomas G. Lockhart 已提交
410
 *
411
 * If the starting position is zero or less, then return from the start of the string
T
Tom Lane 已提交
412
 *	adjusting the length to be consistent with the "negative start" per SQL92.
T
Thomas G. Lockhart 已提交
413 414 415
 * If the length is less than zero, return the remaining string.
 *
 * Note that the arguments operate on octet length,
416
 *	so not aware of multibyte character sets.
M
Marc G. Fournier 已提交
417
 *
418
 * Added multibyte support.
M
Marc G. Fournier 已提交
419
 * - Tatsuo Ishii 1998-4-21
420 421 422
 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 * Formerly returned the entire string; now returns a portion.
 * - Thomas Lockhart 1998-12-10
423 424
 * Now uses faster TOAST-slicing interface
 * - John Gray 2002-02-22
B
Add:  
Bruce Momjian 已提交
425 426 427 428
 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 * error; if E < 1, return '', not entire string). Fixed MB related bug when
 * S > LC and < LC + 4 sometimes garbage characters are returned.
B
Bruce Momjian 已提交
429
 * - Joe Conway 2002-08-10
T
Thomas G. Lockhart 已提交
430
 */
431 432
Datum
text_substr(PG_FUNCTION_ARGS)
T
Thomas G. Lockhart 已提交
433
{
B
Add:  
Bruce Momjian 已提交
434 435 436 437 438
	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
									PG_GETARG_INT32(1),
									PG_GETARG_INT32(2),
									false));
}
B
Bruce Momjian 已提交
439

B
Add:  
Bruce Momjian 已提交
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
/*
 * text_substr_no_len -
 *	  Wrapper to avoid opr_sanity failure due to
 *	  one function accepting a different number of args.
 */
Datum
text_substr_no_len(PG_FUNCTION_ARGS)
{
	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
									PG_GETARG_INT32(1),
									-1, true));
}

/*
 * text_substring -
 *	Does the real work for text_substr() and text_substr_no_len()
 *	This is broken out so it can be called directly by other string processing
 *	functions.
 */
B
Bruce Momjian 已提交
459
static text *
B
Add:  
Bruce Momjian 已提交
460 461 462
text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
{
	int32		eml = pg_database_encoding_max_length();
B
Bruce Momjian 已提交
463 464 465
	int32		S = start;		/* start position */
	int32		S1;				/* adjusted start position */
	int32		L1;				/* adjusted substring length */
B
Add:  
Bruce Momjian 已提交
466 467 468

	/* life is easy if the encoding max length is 1 */
	if (eml == 1)
T
Thomas G. Lockhart 已提交
469
	{
B
Add:  
Bruce Momjian 已提交
470
		S1 = Max(S, 1);
T
Thomas G. Lockhart 已提交
471

B
Bruce Momjian 已提交
472 473
		if (length_not_specified)		/* special case - get length to
										 * end of string */
B
Add:  
Bruce Momjian 已提交
474 475 476 477
			L1 = -1;
		else
		{
			/* end position */
B
Bruce Momjian 已提交
478
			int			E = S + length;
479

B
Add:  
Bruce Momjian 已提交
480 481 482 483 484 485 486
			/*
			 * A negative value for L is the only way for the end position
			 * to be before the start. SQL99 says to throw an error.
			 */
			if (E < S)
				elog(ERROR, "negative substring length not allowed");

B
Bruce Momjian 已提交
487 488 489 490
			/*
			 * A zero or negative value for the end position can happen if
			 * the start was negative or one. SQL99 says to return a
			 * zero-length string.
B
Add:  
Bruce Momjian 已提交
491 492 493
			 */
			if (E < 1)
				return PG_STR_GET_TEXT("");
494

B
Add:  
Bruce Momjian 已提交
495 496 497
			L1 = E - S1;
		}

B
Bruce Momjian 已提交
498 499 500 501
		/*
		 * If the start position is past the end of the string, SQL99 says
		 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
		 * do that for us. Convert to zero-based starting position
B
Add:  
Bruce Momjian 已提交
502 503 504 505
		 */
		return DatumGetTextPSlice(str, S1 - 1, L1);
	}
	else if (eml > 1)
506
	{
B
Add:  
Bruce Momjian 已提交
507 508
		/*
		 * When encoding max length is > 1, we can't get LC without
B
Bruce Momjian 已提交
509 510
		 * detoasting, so we'll grab a conservatively large slice now and
		 * go back later to do the right thing
B
Add:  
Bruce Momjian 已提交
511 512 513 514
		 */
		int32		slice_start;
		int32		slice_size;
		int32		slice_strlen;
B
Bruce Momjian 已提交
515
		text	   *slice;
B
Add:  
Bruce Momjian 已提交
516 517 518 519 520 521 522
		int32		E1;
		int32		i;
		char	   *p;
		char	   *s;
		text	   *ret;

		/*
B
Bruce Momjian 已提交
523 524
		 * if S is past the end of the string, the tuple toaster will
		 * return a zero-length string to us
B
Add:  
Bruce Momjian 已提交
525 526 527 528
		 */
		S1 = Max(S, 1);

		/*
B
Bruce Momjian 已提交
529 530 531
		 * We need to start at position zero because there is no way to
		 * know in advance which byte offset corresponds to the supplied
		 * start position.
B
Add:  
Bruce Momjian 已提交
532 533 534
		 */
		slice_start = 0;

B
Bruce Momjian 已提交
535 536
		if (length_not_specified)		/* special case - get length to
										 * end of string */
B
Add:  
Bruce Momjian 已提交
537
			slice_size = L1 = -1;
538
		else
B
Add:  
Bruce Momjian 已提交
539
		{
B
Bruce Momjian 已提交
540
			int			E = S + length;
B
Add:  
Bruce Momjian 已提交
541 542 543 544 545 546 547

			/*
			 * A negative value for L is the only way for the end position
			 * to be before the start. SQL99 says to throw an error.
			 */
			if (E < S)
				elog(ERROR, "negative substring length not allowed");
548

B
Bruce Momjian 已提交
549 550 551 552
			/*
			 * A zero or negative value for the end position can happen if
			 * the start was negative or one. SQL99 says to return a
			 * zero-length string.
B
Add:  
Bruce Momjian 已提交
553 554 555
			 */
			if (E < 1)
				return PG_STR_GET_TEXT("");
556

B
Add:  
Bruce Momjian 已提交
557
			/*
B
Bruce Momjian 已提交
558 559
			 * if E is past the end of the string, the tuple toaster will
			 * truncate the length for us
B
Add:  
Bruce Momjian 已提交
560 561
			 */
			L1 = E - S1;
562

B
Add:  
Bruce Momjian 已提交
563
			/*
B
Bruce Momjian 已提交
564 565 566
			 * Total slice size in bytes can't be any longer than the
			 * start position plus substring length times the encoding max
			 * length.
B
Add:  
Bruce Momjian 已提交
567 568 569 570
			 */
			slice_size = (S1 + L1) * eml;
		}
		slice = DatumGetTextPSlice(str, slice_start, slice_size);
571

B
Add:  
Bruce Momjian 已提交
572 573 574
		/* see if we got back an empty string */
		if ((VARSIZE(slice) - VARHDRSZ) == 0)
			return PG_STR_GET_TEXT("");
575

B
Add:  
Bruce Momjian 已提交
576
		/* Now we can get the actual length of the slice in MB characters */
B
Bruce Momjian 已提交
577
		slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
T
Thomas G. Lockhart 已提交
578

B
Bruce Momjian 已提交
579 580
		/*
		 * Check that the start position wasn't > slice_strlen. If so,
B
Add:  
Bruce Momjian 已提交
581 582 583 584
		 * SQL99 says to return a zero-length string.
		 */
		if (S1 > slice_strlen)
			return PG_STR_GET_TEXT("");
T
Thomas G. Lockhart 已提交
585

B
Add:  
Bruce Momjian 已提交
586 587
		/*
		 * Adjust L1 and E1 now that we know the slice string length.
B
Bruce Momjian 已提交
588 589
		 * Again remember that S1 is one based, and slice_start is zero
		 * based.
B
Add:  
Bruce Momjian 已提交
590 591
		 */
		if (L1 > -1)
B
Bruce Momjian 已提交
592
			E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
B
Add:  
Bruce Momjian 已提交
593 594 595 596
		else
			E1 = slice_start + 1 + slice_strlen;

		/*
B
Bruce Momjian 已提交
597 598
		 * Find the start position in the slice; remember S1 is not zero
		 * based
B
Add:  
Bruce Momjian 已提交
599 600 601 602 603 604 605 606 607
		 */
		p = VARDATA(slice);
		for (i = 0; i < S1 - 1; i++)
			p += pg_mblen(p);

		/* hang onto a pointer to our start position */
		s = p;

		/*
B
Bruce Momjian 已提交
608 609
		 * Count the actual bytes used by the substring of the requested
		 * length.
B
Add:  
Bruce Momjian 已提交
610 611 612 613 614 615 616 617 618 619 620 621
		 */
		for (i = S1; i < E1; i++)
			p += pg_mblen(p);

		ret = (text *) palloc(VARHDRSZ + (p - s));
		VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
		memcpy(VARDATA(ret), s, (p - s));

		return ret;
	}
	else
		elog(ERROR, "Invalid backend encoding; encoding max length "
B
Bruce Momjian 已提交
622
			 "is less than one.");
B
Add:  
Bruce Momjian 已提交
623 624 625

	/* not reached: suppress compiler warning */
	return PG_STR_GET_TEXT("");
626
}
627 628 629

/*
 * textpos -
630 631 632
 *	  Return the position of the specified substring.
 *	  Implements the SQL92 POSITION() function.
 *	  Ref: A Guide To The SQL Standard, Date & Darwen, 1997
633 634
 * - thomas 1997-07-27
 */
635 636
Datum
textpos(PG_FUNCTION_ARGS)
637
{
B
Add:  
Bruce Momjian 已提交
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
	PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
}

/*
 * text_position -
 *	Does the real work for textpos()
 *	This is broken out so it can be called directly by other string processing
 *	functions.
 */
static int32
text_position(Datum str, Datum search_str, int matchnum)
{
	int			eml = pg_database_encoding_max_length();
	text	   *t1 = DatumGetTextP(str);
	text	   *t2 = DatumGetTextP(search_str);
	int			match = 0,
				pos = 0,
				p = 0,
				px,
				len1,
658
				len2;
B
Bruce Momjian 已提交
659

B
Bruce Momjian 已提交
660 661
	if (matchnum == 0)
		return 0;				/* result for 0th match */
662

663 664
	if (VARSIZE(t2) <= VARHDRSZ)
		PG_RETURN_INT32(1);		/* result for empty pattern */
665 666 667

	len1 = (VARSIZE(t1) - VARHDRSZ);
	len2 = (VARSIZE(t2) - VARHDRSZ);
B
Add:  
Bruce Momjian 已提交
668 669

	/* no use in searching str past point where search_str will fit */
670
	px = (len1 - len2);
B
Add:  
Bruce Momjian 已提交
671

B
Bruce Momjian 已提交
672
	if (eml == 1)				/* simple case - single byte encoding */
673
	{
B
Bruce Momjian 已提交
674 675
		char	   *p1,
				   *p2;
B
Add:  
Bruce Momjian 已提交
676 677 678 679 680

		p1 = VARDATA(t1);
		p2 = VARDATA(t2);

		for (p = 0; p <= px; p++)
681
		{
B
Add:  
Bruce Momjian 已提交
682 683 684 685 686 687 688 689 690 691 692
			if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
			{
				if (++match == matchnum)
				{
					pos = p + 1;
					break;
				}
			}
			p1++;
		}
	}
B
Bruce Momjian 已提交
693
	else if (eml > 1)			/* not as simple - multibyte encoding */
B
Add:  
Bruce Momjian 已提交
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
	{
		pg_wchar   *p1,
				   *p2,
				   *ps1,
				   *ps2;

		ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
		(void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
		len1 = pg_wchar_strlen(p1);
		ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
		(void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
		len2 = pg_wchar_strlen(p2);

		for (p = 0; p <= px; p++)
		{
			if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
			{
				if (++match == matchnum)
				{
					pos = p + 1;
					break;
				}
			}
			p1++;
		}

		pfree(ps1);
		pfree(ps2);
	}
	else
		elog(ERROR, "Invalid backend encoding; encoding max length "
B
Bruce Momjian 已提交
725
			 "is less than one.");
B
Add:  
Bruce Momjian 已提交
726

727 728
	PG_RETURN_INT32(pos);
}
729

B
Hello!  
Bruce Momjian 已提交
730 731
/* varstr_cmp()
 * Comparison function for text strings with given lengths.
732
 * Includes locale support, but must copy strings to temporary memory
733
 *	to allow null-termination for inputs to strcoll().
B
Hello!  
Bruce Momjian 已提交
734
 * Returns -1, 0 or 1
735
 */
B
Hello!  
Bruce Momjian 已提交
736
int
737
varstr_cmp(char *arg1, int len1, char *arg2, int len2)
738
{
739
	int			result;
740

741
	/*
B
Bruce Momjian 已提交
742 743 744
	 * Unfortunately, there is no strncoll(), so in the non-C locale case
	 * we have to do some memory copying.  This turns out to be
	 * significantly slower, so we optimize the case where LC_COLLATE is
745 746
	 * C.  We also try to optimize relatively-short strings by avoiding
	 * palloc/pfree overhead.
747
	 */
748 749
#define STACKBUFLEN		1024

750 751
	if (!lc_collate_is_c())
	{
752 753 754 755 756 757 758 759 760 761 762 763 764
		char	a1buf[STACKBUFLEN];
		char	a2buf[STACKBUFLEN];
		char   *a1p,
			   *a2p;

		if (len1 >= STACKBUFLEN)
			a1p = (char *) palloc(len1 + 1);
		else
			a1p = a1buf;
		if (len2 >= STACKBUFLEN)
			a2p = (char *) palloc(len2 + 1);
		else
			a2p = a2buf;
765

766
		memcpy(a1p, arg1, len1);
767
		a1p[len1] = '\0';
768
		memcpy(a2p, arg2, len2);
769
		a2p[len2] = '\0';
B
Hello!  
Bruce Momjian 已提交
770

771
		result = strcoll(a1p, a2p);
772

773 774 775 776
		if (len1 >= STACKBUFLEN)
			pfree(a1p);
		if (len2 >= STACKBUFLEN)
			pfree(a2p);
777 778 779
	}
	else
	{
780
		result = strncmp(arg1, arg2, Min(len1, len2));
781 782 783
		if ((result == 0) && (len1 != len2))
			result = (len1 < len2) ? -1 : 1;
	}
784

785
	return result;
786
}
787

788

B
Hello!  
Bruce Momjian 已提交
789
/* text_cmp()
790
 * Internal comparison function for text strings.
B
Hello!  
Bruce Momjian 已提交
791
 * Returns -1, 0 or 1
792
 */
793
static int
B
Hello!  
Bruce Momjian 已提交
794
text_cmp(text *arg1, text *arg2)
795
{
796 797 798 799
	char	   *a1p,
			   *a2p;
	int			len1,
				len2;
800

801 802
	a1p = VARDATA(arg1);
	a2p = VARDATA(arg2);
803

B
Hello!  
Bruce Momjian 已提交
804 805
	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;
806

B
Hello!  
Bruce Momjian 已提交
807
	return varstr_cmp(a1p, len1, a2p, len2);
808
}
809

810 811
/*
 * Comparison functions for text strings.
812 813 814 815
 *
 * Note: btree indexes need these routines not to leak memory; therefore,
 * be careful to free working copies of toasted datums.  Most places don't
 * need to be so careful.
B
Hello!  
Bruce Momjian 已提交
816
 */
817

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
Datum
texteq(PG_FUNCTION_ARGS)
{
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
	bool		result;

	/* fast path for different-length inputs */
	if (VARSIZE(arg1) != VARSIZE(arg2))
		result = false;
	else
		result = (text_cmp(arg1, arg2) == 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
}

Datum
textne(PG_FUNCTION_ARGS)
{
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
	bool		result;

	/* fast path for different-length inputs */
	if (VARSIZE(arg1) != VARSIZE(arg2))
		result = true;
	else
		result = (text_cmp(arg1, arg2) != 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
}

856 857
Datum
text_lt(PG_FUNCTION_ARGS)
B
Hello!  
Bruce Momjian 已提交
858
{
859 860
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
861
	bool		result;
B
Hello!  
Bruce Momjian 已提交
862

863 864 865 866 867 868
	result = (text_cmp(arg1, arg2) < 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
869 870 871 872
}

Datum
text_le(PG_FUNCTION_ARGS)
B
Hello!  
Bruce Momjian 已提交
873
{
874 875
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
876 877 878 879 880 881
	bool		result;

	result = (text_cmp(arg1, arg2) <= 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);
882

883
	PG_RETURN_BOOL(result);
884 885 886 887
}

Datum
text_gt(PG_FUNCTION_ARGS)
888
{
889 890
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
891
	bool		result;
892

893 894 895 896 897 898
	result = (text_cmp(arg1, arg2) > 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
899 900
}

901 902
Datum
text_ge(PG_FUNCTION_ARGS)
903
{
904 905
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
906 907 908 909 910 911
	bool		result;

	result = (text_cmp(arg1, arg2) >= 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);
912

913
	PG_RETURN_BOOL(result);
914 915
}

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
	int32		result;

	result = text_cmp(arg1, arg2);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_INT32(result);
}


932 933
Datum
text_larger(PG_FUNCTION_ARGS)
934
{
935 936
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
B
Bruce Momjian 已提交
937
	text	   *result;
938

939
	result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
940

941
	PG_RETURN_TEXT_P(result);
942 943
}

944 945
Datum
text_smaller(PG_FUNCTION_ARGS)
946
{
947 948
	text	   *arg1 = PG_GETARG_TEXT_P(0);
	text	   *arg2 = PG_GETARG_TEXT_P(1);
B
Bruce Momjian 已提交
949
	text	   *result;
950

951
	result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
952

953
	PG_RETURN_TEXT_P(result);
954 955
}

956
/*-------------------------------------------------------------
957
 * byteaoctetlen
958 959 960 961
 *
 * get the number of bytes contained in an instance of type 'bytea'
 *-------------------------------------------------------------
 */
962 963
Datum
byteaoctetlen(PG_FUNCTION_ARGS)
964
{
B
Add:  
Bruce Momjian 已提交
965
	PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
966 967
}

968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
/*
 * byteacat -
 *	  takes two bytea* and returns a bytea* that is the concatenation of
 *	  the two.
 *
 * Cloned from textcat and modified as required.
 */
Datum
byteacat(PG_FUNCTION_ARGS)
{
	bytea	   *t1 = PG_GETARG_BYTEA_P(0);
	bytea	   *t2 = PG_GETARG_BYTEA_P(1);
	int			len1,
				len2,
				len;
	bytea	   *result;
	char	   *ptr;

	len1 = (VARSIZE(t1) - VARHDRSZ);
	if (len1 < 0)
		len1 = 0;

	len2 = (VARSIZE(t2) - VARHDRSZ);
	if (len2 < 0)
		len2 = 0;

	len = len1 + len2 + VARHDRSZ;
	result = (bytea *) palloc(len);

	/* Set size of result string... */
	VARATT_SIZEP(result) = len;

	/* Fill data field of result string... */
	ptr = VARDATA(result);
	if (len1 > 0)
		memcpy(ptr, VARDATA(t1), len1);
	if (len2 > 0)
		memcpy(ptr + len1, VARDATA(t2), len2);

	PG_RETURN_BYTEA_P(result);
}

B
Add:  
Bruce Momjian 已提交
1010
#define PG_STR_GET_BYTEA(str_) \
B
Bruce Momjian 已提交
1011
	DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1012 1013 1014 1015 1016 1017 1018 1019
/*
 * bytea_substr()
 * Return a substring starting at the specified position.
 * Cloned from text_substr and modified as required.
 *
 * Input:
 *	- string
 *	- starting position (is one-based)
B
Add:  
Bruce Momjian 已提交
1020
 *	- string length (optional)
1021 1022
 *
 * If the starting position is zero or less, then return from the start of the string
T
Tom Lane 已提交
1023
 * adjusting the length to be consistent with the "negative start" per SQL92.
B
Add:  
Bruce Momjian 已提交
1024 1025
 * If the length is less than zero, an ERROR is thrown. If no third argument
 * (length) is provided, the length to the end of the string is assumed.
1026 1027 1028 1029
 */
Datum
bytea_substr(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
1030 1031 1032
	int			S = PG_GETARG_INT32(1); /* start position */
	int			S1;				/* adjusted start position */
	int			L1;				/* adjusted substring length */
1033

B
Add:  
Bruce Momjian 已提交
1034 1035 1036
	S1 = Max(S, 1);

	if (fcinfo->nargs == 2)
1037
	{
B
Add:  
Bruce Momjian 已提交
1038
		/*
B
Bruce Momjian 已提交
1039 1040 1041
		 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
		 * everything to the end of the string if we pass it a negative
		 * value for length.
B
Add:  
Bruce Momjian 已提交
1042 1043
		 */
		L1 = -1;
1044
	}
B
Add:  
Bruce Momjian 已提交
1045 1046 1047
	else
	{
		/* end position */
B
Bruce Momjian 已提交
1048
		int			E = S + PG_GETARG_INT32(2);
1049

B
Add:  
Bruce Momjian 已提交
1050
		/*
B
Bruce Momjian 已提交
1051 1052
		 * A negative value for L is the only way for the end position to
		 * be before the start. SQL99 says to throw an error.
B
Add:  
Bruce Momjian 已提交
1053 1054 1055
		 */
		if (E < S)
			elog(ERROR, "negative substring length not allowed");
1056

B
Bruce Momjian 已提交
1057 1058 1059 1060
		/*
		 * A zero or negative value for the end position can happen if the
		 * start was negative or one. SQL99 says to return a zero-length
		 * string.
B
Add:  
Bruce Momjian 已提交
1061 1062 1063 1064 1065 1066 1067
		 */
		if (E < 1)
			PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));

		L1 = E - S1;
	}

B
Bruce Momjian 已提交
1068 1069 1070 1071
	/*
	 * If the start position is past the end of the string, SQL99 says to
	 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
	 * that for us. Convert to zero-based starting position
B
Add:  
Bruce Momjian 已提交
1072
	 */
B
Bruce Momjian 已提交
1073
	PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
B
Add:  
Bruce Momjian 已提交
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
}

/*
 * bytea_substr_no_len -
 *	  Wrapper to avoid opr_sanity failure due to
 *	  one function accepting a different number of args.
 */
Datum
bytea_substr_no_len(PG_FUNCTION_ARGS)
{
	return bytea_substr(fcinfo);
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
}

/*
 * byteapos -
 *	  Return the position of the specified substring.
 *	  Implements the SQL92 POSITION() function.
 * Cloned from textpos and modified as required.
 */
Datum
byteapos(PG_FUNCTION_ARGS)
{
	bytea	   *t1 = PG_GETARG_BYTEA_P(0);
	bytea	   *t2 = PG_GETARG_BYTEA_P(1);
	int			pos;
	int			px,
				p;
	int			len1,
				len2;
1103 1104
	char	   *p1,
			   *p2;
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129

	if (VARSIZE(t2) <= VARHDRSZ)
		PG_RETURN_INT32(1);		/* result for empty pattern */

	len1 = (VARSIZE(t1) - VARHDRSZ);
	len2 = (VARSIZE(t2) - VARHDRSZ);

	p1 = VARDATA(t1);
	p2 = VARDATA(t2);

	pos = 0;
	px = (len1 - len2);
	for (p = 0; p <= px; p++)
	{
		if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
		{
			pos = p + 1;
			break;
		};
		p1++;
	};

	PG_RETURN_INT32(pos);
}

1130 1131 1132 1133
/*-------------------------------------------------------------
 * byteaGetByte
 *
 * this routine treats "bytea" as an array of bytes.
1134
 * It returns the Nth byte (a number between 0 and 255).
1135 1136
 *-------------------------------------------------------------
 */
1137 1138
Datum
byteaGetByte(PG_FUNCTION_ARGS)
1139
{
1140 1141
	bytea	   *v = PG_GETARG_BYTEA_P(0);
	int32		n = PG_GETARG_INT32(1);
1142 1143
	int			len;
	int			byte;
1144

1145 1146 1147 1148
	len = VARSIZE(v) - VARHDRSZ;

	if (n < 0 || n >= len)
		elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1149
			 n, len - 1);
1150 1151 1152

	byte = ((unsigned char *) VARDATA(v))[n];

1153
	PG_RETURN_INT32(byte);
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
}

/*-------------------------------------------------------------
 * byteaGetBit
 *
 * This routine treats a "bytea" type like an array of bits.
 * It returns the value of the Nth bit (0 or 1).
 *
 *-------------------------------------------------------------
 */
1164 1165
Datum
byteaGetBit(PG_FUNCTION_ARGS)
1166
{
1167 1168
	bytea	   *v = PG_GETARG_BYTEA_P(0);
	int32		n = PG_GETARG_INT32(1);
1169 1170
	int			byteNo,
				bitNo;
1171
	int			len;
1172
	int			byte;
1173

1174 1175
	len = VARSIZE(v) - VARHDRSZ;

1176
	if (n < 0 || n >= len * 8)
1177
		elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1178
			 n, len * 8 - 1);
1179

1180 1181 1182
	byteNo = n / 8;
	bitNo = n % 8;

1183
	byte = ((unsigned char *) VARDATA(v))[byteNo];
1184 1185

	if (byte & (1 << bitNo))
1186
		PG_RETURN_INT32(1);
1187
	else
1188
		PG_RETURN_INT32(0);
1189
}
1190

1191 1192 1193 1194 1195 1196 1197 1198
/*-------------------------------------------------------------
 * byteaSetByte
 *
 * Given an instance of type 'bytea' creates a new one with
 * the Nth byte set to the given value.
 *
 *-------------------------------------------------------------
 */
1199 1200
Datum
byteaSetByte(PG_FUNCTION_ARGS)
1201
{
1202 1203 1204
	bytea	   *v = PG_GETARG_BYTEA_P(0);
	int32		n = PG_GETARG_INT32(1);
	int32		newByte = PG_GETARG_INT32(2);
1205
	int			len;
1206
	bytea	   *res;
1207

1208 1209 1210 1211
	len = VARSIZE(v) - VARHDRSZ;

	if (n < 0 || n >= len)
		elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1212 1213 1214 1215 1216
			 n, len - 1);

	/*
	 * Make a copy of the original varlena.
	 */
1217 1218
	res = (bytea *) palloc(VARSIZE(v));
	memcpy((char *) res, (char *) v, VARSIZE(v));
1219 1220 1221 1222

	/*
	 * Now set the byte.
	 */
1223
	((unsigned char *) VARDATA(res))[n] = newByte;
1224

1225
	PG_RETURN_BYTEA_P(res);
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
}

/*-------------------------------------------------------------
 * byteaSetBit
 *
 * Given an instance of type 'bytea' creates a new one with
 * the Nth bit set to the given value.
 *
 *-------------------------------------------------------------
 */
1236 1237
Datum
byteaSetBit(PG_FUNCTION_ARGS)
1238
{
1239 1240 1241
	bytea	   *v = PG_GETARG_BYTEA_P(0);
	int32		n = PG_GETARG_INT32(1);
	int32		newBit = PG_GETARG_INT32(2);
1242 1243
	bytea	   *res;
	int			len;
1244 1245 1246 1247
	int			oldByte,
				newByte;
	int			byteNo,
				bitNo;
1248

1249 1250
	len = VARSIZE(v) - VARHDRSZ;

1251
	if (n < 0 || n >= len * 8)
1252
		elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1253
			 n, len * 8 - 1);
1254 1255 1256 1257

	byteNo = n / 8;
	bitNo = n % 8;

1258 1259 1260 1261
	/*
	 * sanity check!
	 */
	if (newBit != 0 && newBit != 1)
1262
		elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1263 1264

	/*
1265
	 * Make a copy of the original varlena.
1266
	 */
1267 1268
	res = (bytea *) palloc(VARSIZE(v));
	memcpy((char *) res, (char *) v, VARSIZE(v));
1269 1270

	/*
1271
	 * Update the byte.
1272
	 */
1273 1274
	oldByte = ((unsigned char *) VARDATA(res))[byteNo];

1275 1276 1277 1278 1279
	if (newBit == 0)
		newByte = oldByte & (~(1 << bitNo));
	else
		newByte = oldByte | (1 << bitNo);

1280
	((unsigned char *) VARDATA(res))[byteNo] = newByte;
1281

1282
	PG_RETURN_BYTEA_P(res);
1283
}
1284 1285 1286


/* text_name()
1287
 * Converts a text type to a Name type.
1288
 */
1289 1290
Datum
text_name(PG_FUNCTION_ARGS)
1291
{
1292 1293
	text	   *s = PG_GETARG_TEXT_P(0);
	Name		result;
1294 1295 1296
	int			len;

	len = VARSIZE(s) - VARHDRSZ;
1297 1298 1299

	/* Truncate oversize input */
	if (len >= NAMEDATALEN)
B
Bruce Momjian 已提交
1300
		len = NAMEDATALEN - 1;
1301 1302

#ifdef STRINGDEBUG
1303 1304
	printf("text- convert string length %d (%d) ->%d\n",
		   VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1305 1306
#endif

1307 1308
	result = (Name) palloc(NAMEDATALEN);
	memcpy(NameStr(*result), VARDATA(s), len);
1309 1310

	/* now null pad to full length... */
1311 1312
	while (len < NAMEDATALEN)
	{
1313
		*(NameStr(*result) + len) = '\0';
1314 1315 1316
		len++;
	}

1317 1318
	PG_RETURN_NAME(result);
}
1319 1320

/* name_text()
1321
 * Converts a Name type to a text type.
1322
 */
1323 1324
Datum
name_text(PG_FUNCTION_ARGS)
1325
{
1326
	Name		s = PG_GETARG_NAME(0);
1327 1328 1329
	text	   *result;
	int			len;

1330
	len = strlen(NameStr(*s));
1331 1332

#ifdef STRINGDEBUG
1333 1334
	printf("text- convert string length %d (%d) ->%d\n",
		   VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1335 1336 1337
#endif

	result = palloc(VARHDRSZ + len);
1338 1339
	VARATT_SIZEP(result) = VARHDRSZ + len;
	memcpy(VARDATA(result), NameStr(*s), len);
1340

1341 1342
	PG_RETURN_TEXT_P(result);
}
1343 1344


1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357
/*
 * textToQualifiedNameList - convert a text object to list of names
 *
 * This implements the input parsing needed by nextval() and other
 * functions that take a text parameter representing a qualified name.
 * We split the name at dots, downcase if not double-quoted, and
 * truncate names if they're too long.
 */
List *
textToQualifiedNameList(text *textval, const char *caller)
{
	char	   *rawname;
	List	   *result = NIL;
1358 1359
	List	   *namelist;
	List	   *l;
1360 1361 1362 1363

	/* Convert to C string (handles possible detoasting). */
	/* Note we rely on being able to modify rawname below. */
	rawname = DatumGetCString(DirectFunctionCall1(textout,
B
Bruce Momjian 已提交
1364
											  PointerGetDatum(textval)));
1365

1366 1367 1368 1369 1370 1371 1372 1373
	if (!SplitIdentifierString(rawname, '.', &namelist))
		elog(ERROR, "%s: invalid name syntax", caller);

	if (namelist == NIL)
		elog(ERROR, "%s: invalid name syntax", caller);

	foreach(l, namelist)
	{
B
Bruce Momjian 已提交
1374
		char	   *curname = (char *) lfirst(l);
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388

		result = lappend(result, makeString(pstrdup(curname)));
	}

	pfree(rawname);
	freeList(namelist);

	return result;
}

/*
 * SplitIdentifierString --- parse a string containing identifiers
 *
 * This is the guts of textToQualifiedNameList, and is exported for use in
B
Bruce Momjian 已提交
1389
 * other situations such as parsing GUC variables.	In the GUC case, it's
1390 1391 1392 1393
 * important to avoid memory leaks, so the API is designed to minimize the
 * amount of stuff that needs to be allocated and freed.
 *
 * Inputs:
B
Bruce Momjian 已提交
1394
 *	rawstring: the input string; must be overwritable!	On return, it's
1395 1396
 *			   been modified to contain the separated identifiers.
 *	separator: the separator punctuation expected between identifiers
B
Bruce Momjian 已提交
1397
 *			   (typically '.' or ',').	Whitespace may also appear around
1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
 *			   identifiers.
 * Outputs:
 *	namelist: filled with a palloc'd list of pointers to identifiers within
 *			  rawstring.  Caller should freeList() this even on error return.
 *
 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
 *
 * Note that an empty string is considered okay here, though not in
 * textToQualifiedNameList.
 */
bool
SplitIdentifierString(char *rawstring, char separator,
					  List **namelist)
{
	char	   *nextp = rawstring;
	bool		done = false;

	*namelist = NIL;

	while (isspace((unsigned char) *nextp))
		nextp++;				/* skip leading whitespace */

	if (*nextp == '\0')
		return true;			/* allow empty string */

	/* At the top of the loop, we are at start of a new identifier. */
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
	do
	{
		char	   *curname;
		char	   *endp;
		int			curlen;

		if (*nextp == '\"')
		{
			/* Quoted name --- collapse quote-quote pairs, no downcasing */
			curname = nextp + 1;
			for (;;)
			{
				endp = strchr(nextp + 1, '\"');
				if (endp == NULL)
B
Bruce Momjian 已提交
1438
					return false;		/* mismatched quotes */
1439 1440 1441
				if (endp[1] != '\"')
					break;		/* found end of quoted name */
				/* Collapse adjacent quotes into one quote, and look again */
B
Bruce Momjian 已提交
1442
				memmove(endp, endp + 1, strlen(endp));
1443 1444
				nextp = endp;
			}
1445
			/* endp now points at the terminating quote */
1446 1447 1448 1449
			nextp = endp + 1;
		}
		else
		{
1450
			/* Unquoted name --- extends to separator or whitespace */
1451
			curname = nextp;
1452 1453
			while (*nextp && *nextp != separator &&
				   !isspace((unsigned char) *nextp))
1454
			{
1455
				/*
B
Bruce Momjian 已提交
1456 1457
				 * It's important that this match the identifier
				 * downcasing code used by backend/parser/scan.l.
1458 1459 1460 1461
				 */
				if (isupper((unsigned char) *nextp))
					*nextp = tolower((unsigned char) *nextp);
				nextp++;
1462
			}
1463 1464 1465
			endp = nextp;
			if (curname == nextp)
				return false;	/* empty unquoted name not allowed */
1466 1467
		}

1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
		while (isspace((unsigned char) *nextp))
			nextp++;			/* skip trailing whitespace */

		if (*nextp == separator)
		{
			nextp++;
			while (isspace((unsigned char) *nextp))
				nextp++;		/* skip leading whitespace for next */
			/* we expect another name, so done remains false */
		}
		else if (*nextp == '\0')
			done = true;
		else
			return false;		/* invalid syntax */

		/* Now safe to overwrite separator with a null */
		*endp = '\0';

1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496
		/* Truncate name if it's overlength; again, should match scan.l */
		curlen = strlen(curname);
		if (curlen >= NAMEDATALEN)
		{
			curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
			curname[curlen] = '\0';
		}

		/*
		 * Finished isolating current name --- add it to list
		 */
1497
		*namelist = lappend(*namelist, curname);
1498

1499 1500
		/* Loop back if we didn't reach end of string */
	} while (!done);
1501

1502
	return true;
1503 1504 1505
}


1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
/*****************************************************************************
 *	Comparison Functions used for bytea
 *
 * Note: btree indexes need these routines not to leak memory; therefore,
 * be careful to free working copies of toasted datums.  Most places don't
 * need to be so careful.
 *****************************************************************************/

Datum
byteaeq(PG_FUNCTION_ARGS)
{
1517 1518
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
	int			len1,
				len2;
	bool		result;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	/* fast path for different-length inputs */
	if (len1 != len2)
		result = false;
	else
		result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
}

Datum
byteane(PG_FUNCTION_ARGS)
{
1541 1542
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
	int			len1,
				len2;
	bool		result;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	/* fast path for different-length inputs */
	if (len1 != len2)
		result = true;
	else
		result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL(result);
}

Datum
bytealt(PG_FUNCTION_ARGS)
{
1565 1566
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584
	int			len1,
				len2;
	int			cmp;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
}

Datum
byteale(PG_FUNCTION_ARGS)
{
1585 1586
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604
	int			len1,
				len2;
	int			cmp;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
}

Datum
byteagt(PG_FUNCTION_ARGS)
{
1605 1606
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
	int			len1,
				len2;
	int			cmp;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
}

Datum
byteage(PG_FUNCTION_ARGS)
{
1625 1626
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644
	int			len1,
				len2;
	int			cmp;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
}

Datum
byteacmp(PG_FUNCTION_ARGS)
{
1645 1646
	bytea	   *arg1 = PG_GETARG_BYTEA_P(0);
	bytea	   *arg2 = PG_GETARG_BYTEA_P(1);
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662
	int			len1,
				len2;
	int			cmp;

	len1 = VARSIZE(arg1) - VARHDRSZ;
	len2 = VARSIZE(arg2) - VARHDRSZ;

	cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
	if ((cmp == 0) && (len1 != len2))
		cmp = (len1 < len2) ? -1 : 1;

	PG_FREE_IF_COPY(arg1, 0);
	PG_FREE_IF_COPY(arg2, 1);

	PG_RETURN_INT32(cmp);
}
B
Add:  
Bruce Momjian 已提交
1663 1664 1665 1666 1667

/*
 * replace_text
 * replace all occurences of 'old_sub_str' in 'orig_str'
 * with 'new_sub_str' to form 'new_str'
B
Bruce Momjian 已提交
1668
 *
B
Add:  
Bruce Momjian 已提交
1669
 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
B
Bruce Momjian 已提交
1670
 * otherwise returns 'new_str'
B
Add:  
Bruce Momjian 已提交
1671 1672 1673 1674
 */
Datum
replace_text(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
1675 1676 1677 1678
	text	   *left_text;
	text	   *right_text;
	text	   *buf_text;
	text	   *ret_text;
B
Add:  
Bruce Momjian 已提交
1679
	int			curr_posn;
B
Bruce Momjian 已提交
1680
	text	   *src_text = PG_GETARG_TEXT_P(0);
B
Add:  
Bruce Momjian 已提交
1681
	int			src_text_len = TEXTLEN(src_text);
B
Bruce Momjian 已提交
1682
	text	   *from_sub_text = PG_GETARG_TEXT_P(1);
B
Add:  
Bruce Momjian 已提交
1683
	int			from_sub_text_len = TEXTLEN(from_sub_text);
B
Bruce Momjian 已提交
1684 1685
	text	   *to_sub_text = PG_GETARG_TEXT_P(2);
	char	   *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
B
Add:  
Bruce Momjian 已提交
1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
	StringInfo	str = makeStringInfo();

	if (src_text_len == 0 || from_sub_text_len == 0)
		PG_RETURN_TEXT_P(src_text);

	buf_text = TEXTDUP(src_text);
	curr_posn = TEXTPOS(buf_text, from_sub_text);

	while (curr_posn > 0)
	{
		left_text = LEFT(buf_text, from_sub_text);
		right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);

		appendStringInfo(str, PG_TEXT_GET_STR(left_text));
		appendStringInfo(str, to_sub_str);

		pfree(buf_text);
		pfree(left_text);
		buf_text = right_text;
		curr_posn = TEXTPOS(buf_text, from_sub_text);
	}

	appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
	pfree(buf_text);

	ret_text = PG_STR_GET_TEXT(str->data);
	pfree(str->data);
	pfree(str);

	PG_RETURN_TEXT_P(ret_text);
}

/*
 * split_text
 * parse input string
 * return ord item (1 based)
 * based on provided field separator
 */
Datum
split_text(PG_FUNCTION_ARGS)
{
	text	   *inputstring = PG_GETARG_TEXT_P(0);
	int			inputstring_len = TEXTLEN(inputstring);
	text	   *fldsep = PG_GETARG_TEXT_P(1);
	int			fldsep_len = TEXTLEN(fldsep);
	int			fldnum = PG_GETARG_INT32(2);
	int			start_posn = 0;
	int			end_posn = 0;
B
Bruce Momjian 已提交
1734
	text	   *result_text;
B
Add:  
Bruce Momjian 已提交
1735 1736 1737 1738 1739 1740 1741 1742

	/* return empty string for empty input string */
	if (inputstring_len < 1)
		PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));

	/* empty field separator */
	if (fldsep_len < 1)
	{
B
Bruce Momjian 已提交
1743 1744
		if (fldnum == 1)		/* first field - just return the input
								 * string */
B
Add:  
Bruce Momjian 已提交
1745
			PG_RETURN_TEXT_P(inputstring);
B
Bruce Momjian 已提交
1746 1747
		else
/* otherwise return an empty string */
B
Add:  
Bruce Momjian 已提交
1748 1749 1750 1751 1752 1753 1754 1755
			PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
	}

	/* field number is 1 based */
	if (fldnum < 1)
		elog(ERROR, "field position must be > 0");

	start_posn = text_position(PointerGetDatum(inputstring),
B
Bruce Momjian 已提交
1756 1757
							   PointerGetDatum(fldsep),
							   fldnum - 1);
B
Add:  
Bruce Momjian 已提交
1758
	end_posn = text_position(PointerGetDatum(inputstring),
B
Bruce Momjian 已提交
1759 1760
							 PointerGetDatum(fldsep),
							 fldnum);
B
Add:  
Bruce Momjian 已提交
1761 1762 1763

	if ((start_posn == 0) && (end_posn == 0))	/* fldsep not found */
	{
B
Bruce Momjian 已提交
1764 1765
		if (fldnum == 1)		/* first field - just return the input
								 * string */
B
Add:  
Bruce Momjian 已提交
1766
			PG_RETURN_TEXT_P(inputstring);
B
Bruce Momjian 已提交
1767 1768
		else
/* otherwise return an empty string */
B
Add:  
Bruce Momjian 已提交
1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798
			PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
	}
	else if ((start_posn != 0) && (end_posn == 0))
	{
		/* last field requested */
		result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
		PG_RETURN_TEXT_P(result_text);
	}
	else if ((start_posn == 0) && (end_posn != 0))
	{
		/* first field requested */
		result_text = LEFT(inputstring, fldsep);
		PG_RETURN_TEXT_P(result_text);
	}
	else
	{
		/* prior to last field requested */
		result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
		PG_RETURN_TEXT_P(result_text);
	}
}

#define HEXBASE 16
/*
 * Convert a int32 to a string containing a base 16 (hex) representation of
 * the number.
 */
Datum
to_hex32(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
1799 1800 1801 1802 1803
	static char digits[] = "0123456789abcdef";
	char		buf[32];		/* bigger than needed, but reasonable */
	char	   *ptr;
	text	   *result_text;
	int32		value = PG_GETARG_INT32(0);
B
Add:  
Bruce Momjian 已提交
1804

1805
	ptr = buf + sizeof(buf) - 1;
B
Add:  
Bruce Momjian 已提交
1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
	*ptr = '\0';

	do
	{
		*--ptr = digits[value % HEXBASE];
		value /= HEXBASE;
	} while (ptr > buf && value);

	result_text = PG_STR_GET_TEXT(ptr);
	PG_RETURN_TEXT_P(result_text);
}

/*
 * Convert a int64 to a string containing a base 16 (hex) representation of
 * the number.
 */
Datum
to_hex64(PG_FUNCTION_ARGS)
{
B
Bruce Momjian 已提交
1825 1826 1827 1828 1829
	static char digits[] = "0123456789abcdef";
	char		buf[32];		/* bigger than needed, but reasonable */
	char	   *ptr;
	text	   *result_text;
	int64		value = PG_GETARG_INT64(0);
B
Add:  
Bruce Momjian 已提交
1830

1831
	ptr = buf + sizeof(buf) - 1;
B
Add:  
Bruce Momjian 已提交
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842
	*ptr = '\0';

	do
	{
		*--ptr = digits[value % HEXBASE];
		value /= HEXBASE;
	} while (ptr > buf && value);

	result_text = PG_STR_GET_TEXT(ptr);
	PG_RETURN_TEXT_P(result_text);
}
1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868

/*
 * Create an md5 hash of a text string and return it as hex
 *
 * md5 produces a 16 byte (128 bit) hash; double it for hex
 */
#define MD5_HASH_LEN  32

Datum
md5_text(PG_FUNCTION_ARGS)
{
	char	   *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
	size_t		len = strlen(buff);
	char	   *hexsum;
	text	   *result_text;

	/* leave room for the terminating '\0' */
	hexsum = (char *) palloc(MD5_HASH_LEN + 1);

	/* get the hash result */
	md5_hash((void *) buff, len, hexsum);

	/* convert to text and return it */
	result_text = PG_STR_GET_TEXT(hexsum);
	PG_RETURN_TEXT_P(result_text);
}