convert.c 14.0 KB
Newer Older
L
Linus Torvalds 已提交
1
#include "cache.h"
J
Junio C Hamano 已提交
2
#include "attr.h"
J
Junio C Hamano 已提交
3
#include "run-command.h"
J
Junio C Hamano 已提交
4

L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13
/*
 * convert.c - convert a file when checking it out and checking it in.
 *
 * This should use the pathname to decide on whether it wants to do some
 * more interesting conversions (automatic gzip/unzip, general format
 * conversions etc etc), but by default it just does automatic CRLF<->LF
 * translation when the "auto_crlf" option is set.
 */

14 15 16 17 18
#define CRLF_GUESS	(-1)
#define CRLF_BINARY	0
#define CRLF_TEXT	1
#define CRLF_INPUT	2

L
Linus Torvalds 已提交
19
struct text_stat {
20 21
	/* NUL, CR, LF and CRLF counts */
	unsigned nul, cr, lf, crlf;
L
Linus Torvalds 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

	/* These are just approximations! */
	unsigned printable, nonprintable;
};

static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
{
	unsigned long i;

	memset(stats, 0, sizeof(*stats));

	for (i = 0; i < size; i++) {
		unsigned char c = buf[i];
		if (c == '\r') {
			stats->cr++;
			if (i+1 < size && buf[i+1] == '\n')
				stats->crlf++;
			continue;
		}
		if (c == '\n') {
			stats->lf++;
			continue;
		}
		if (c == 127)
			/* DEL */
			stats->nonprintable++;
		else if (c < 32) {
			switch (c) {
				/* BS, HT, ESC and FF */
			case '\b': case '\t': case '\033': case '\014':
				stats->printable++;
				break;
54 55 56
			case 0:
				stats->nul++;
				/* fall through */
L
Linus Torvalds 已提交
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
			default:
				stats->nonprintable++;
			}
		}
		else
			stats->printable++;
	}
}

/*
 * The same heuristics as diff.c::mmfile_is_binary()
 */
static int is_binary(unsigned long size, struct text_stat *stats)
{

72 73
	if (stats->nul)
		return 1;
L
Linus Torvalds 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87
	if ((stats->printable >> 7) < stats->nonprintable)
		return 1;
	/*
	 * Other heuristics? Average line length might be relevant,
	 * as might LF vs CR vs CRLF counts..
	 *
	 * NOTE! It might be normal to have a low ratio of CRLF to LF
	 * (somebody starts with a LF-only file and edits it with an editor
	 * that adds CRLF only to lines that are added..). But do  we
	 * want to support CR-only? Probably not.
	 */
	return 0;
}

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
static void check_safe_crlf(const char *path, int action,
                            struct text_stat *stats, enum safe_crlf checksafe)
{
	if (!checksafe)
		return;

	if (action == CRLF_INPUT || auto_crlf <= 0) {
		/*
		 * CRLFs would not be restored by checkout:
		 * check if we'd remove CRLFs
		 */
		if (stats->crlf) {
			if (checksafe == SAFE_CRLF_WARN)
				warning("CRLF will be replaced by LF in %s.", path);
			else /* i.e. SAFE_CRLF_FAIL */
				die("CRLF would be replaced by LF in %s.", path);
		}
	} else if (auto_crlf > 0) {
		/*
		 * CRLFs would be added by checkout:
		 * check if we have "naked" LFs
		 */
		if (stats->lf != stats->crlf) {
			if (checksafe == SAFE_CRLF_WARN)
				warning("LF will be replaced by CRLF in %s", path);
			else /* i.e. SAFE_CRLF_FAIL */
				die("LF would be replaced by CRLF in %s", path);
		}
	}
}

119
static int crlf_to_git(const char *path, const char *src, size_t len,
120
                       struct strbuf *buf, int action, enum safe_crlf checksafe)
L
Linus Torvalds 已提交
121 122
{
	struct text_stat stats;
123
	char *dst;
L
Linus Torvalds 已提交
124

125 126
	if ((action == CRLF_BINARY) || !auto_crlf || !len)
		return 0;
L
Linus Torvalds 已提交
127

128
	gather_stats(src, len, &stats);
L
Linus Torvalds 已提交
129

130
	if (action == CRLF_GUESS) {
J
Junio C Hamano 已提交
131 132 133 134 135 136
		/*
		 * We're currently not going to even try to convert stuff
		 * that has bare CR characters. Does anybody do that crazy
		 * stuff?
		 */
		if (stats.cr != stats.crlf)
137
			return 0;
J
Junio C Hamano 已提交
138 139 140 141

		/*
		 * And add some heuristics for binary vs text, of course...
		 */
142 143
		if (is_binary(len, &stats))
			return 0;
J
Junio C Hamano 已提交
144
	}
L
Linus Torvalds 已提交
145

146 147 148 149 150 151
	check_safe_crlf(path, action, &stats, checksafe);

	/* Optimization: No CR? Nothing to convert, regardless. */
	if (!stats.cr)
		return 0;

152 153 154
	/* only grow if not in place */
	if (strbuf_avail(buf) + buf->len < len)
		strbuf_grow(buf, len - buf->len);
155
	dst = buf->buf;
156 157 158 159 160 161
	if (action == CRLF_GUESS) {
		/*
		 * If we guessed, we already know we rejected a file with
		 * lone CR, and we can strip a CR without looking at what
		 * follow it.
		 */
J
Junio C Hamano 已提交
162
		do {
163
			unsigned char c = *src++;
J
Junio C Hamano 已提交
164
			if (c != '\r')
165
				*dst++ = c;
166
		} while (--len);
J
Junio C Hamano 已提交
167 168
	} else {
		do {
169
			unsigned char c = *src++;
170
			if (! (c == '\r' && (1 < len && *src == '\n')))
171
				*dst++ = c;
172
		} while (--len);
J
Junio C Hamano 已提交
173
	}
174 175
	strbuf_setlen(buf, dst - buf->buf);
	return 1;
L
Linus Torvalds 已提交
176 177
}

178 179
static int crlf_to_worktree(const char *path, const char *src, size_t len,
                            struct strbuf *buf, int action)
L
Linus Torvalds 已提交
180
{
181
	char *to_free = NULL;
L
Linus Torvalds 已提交
182 183
	struct text_stat stats;

184
	if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
185
	    auto_crlf <= 0)
186
		return 0;
L
Linus Torvalds 已提交
187

188 189
	if (!len)
		return 0;
L
Linus Torvalds 已提交
190

191
	gather_stats(src, len, &stats);
L
Linus Torvalds 已提交
192 193 194

	/* No LF? Nothing to convert, regardless. */
	if (!stats.lf)
195
		return 0;
L
Linus Torvalds 已提交
196 197 198

	/* Was it already in CRLF format? */
	if (stats.lf == stats.crlf)
199
		return 0;
L
Linus Torvalds 已提交
200

201
	if (action == CRLF_GUESS) {
J
Junio C Hamano 已提交
202 203
		/* If we have any bare CR characters, we're not going to touch it */
		if (stats.cr != stats.crlf)
204
			return 0;
L
Linus Torvalds 已提交
205

206 207
		if (is_binary(len, &stats))
			return 0;
J
Junio C Hamano 已提交
208
	}
L
Linus Torvalds 已提交
209

210 211
	/* are we "faking" in place editing ? */
	if (src == buf->buf)
212
		to_free = strbuf_detach(buf, NULL);
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

	strbuf_grow(buf, len + stats.lf - stats.crlf);
	for (;;) {
		const char *nl = memchr(src, '\n', len);
		if (!nl)
			break;
		if (nl > src && nl[-1] == '\r') {
			strbuf_add(buf, src, nl + 1 - src);
		} else {
			strbuf_add(buf, src, nl - src);
			strbuf_addstr(buf, "\r\n");
		}
		len -= nl + 1 - src;
		src  = nl + 1;
	}
	strbuf_add(buf, src, len);

	free(to_free);
	return 1;
L
Linus Torvalds 已提交
232
}
J
Junio C Hamano 已提交
233

234 235 236 237 238 239 240
struct filter_params {
	const char *src;
	unsigned long size;
	const char *cmd;
};

static int filter_buffer(int fd, void *data)
241 242 243 244 245
{
	/*
	 * Spawn cmd and feed the buffer contents through its stdin.
	 */
	struct child_process child_process;
246
	struct filter_params *params = (struct filter_params *)data;
247
	int write_err, status;
248
	const char *argv[] = { "sh", "-c", params->cmd, NULL };
249 250

	memset(&child_process, 0, sizeof(child_process));
251 252
	child_process.argv = argv;
	child_process.in = -1;
253
	child_process.out = fd;
254

255
	if (start_command(&child_process))
256
		return error("cannot fork to run external filter %s", params->cmd);
257

258
	write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
259
	if (close(child_process.in))
260 261
		write_err = 1;
	if (write_err)
262
		error("cannot feed the input to external filter %s", params->cmd);
263 264 265

	status = finish_command(&child_process);
	if (status)
266
		error("external filter %s failed %d", params->cmd, -status);
267 268 269
	return (write_err || status);
}

270 271
static int apply_filter(const char *path, const char *src, size_t len,
                        struct strbuf *dst, const char *cmd)
272 273 274 275 276 277 278
{
	/*
	 * Create a pipeline to have the command filter the buffer's
	 * contents.
	 *
	 * (child --> cmd) --> us
	 */
279
	int ret = 1;
280
	struct strbuf nbuf;
281 282
	struct async async;
	struct filter_params params;
283 284

	if (!cmd)
285
		return 0;
286

287 288 289 290 291 292
	memset(&async, 0, sizeof(async));
	async.proc = filter_buffer;
	async.data = &params;
	params.src = src;
	params.size = len;
	params.cmd = cmd;
293 294

	fflush(NULL);
295 296
	if (start_async(&async))
		return 0;	/* error was already reported */
297

298
	strbuf_init(&nbuf, 0);
299
	if (strbuf_read(&nbuf, async.out, len) < 0) {
300 301
		error("read from external filter %s failed", cmd);
		ret = 0;
302
	}
303
	if (close(async.out)) {
304
		error("read from external filter %s failed", cmd);
305
		ret = 0;
306
	}
307 308
	if (finish_async(&async)) {
		error("external filter %s failed", cmd);
309
		ret = 0;
310 311
	}

312
	if (ret) {
313
		strbuf_swap(dst, &nbuf);
314
	}
315
	strbuf_release(&nbuf);
316
	return ret;
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
}

static struct convert_driver {
	const char *name;
	struct convert_driver *next;
	char *smudge;
	char *clean;
} *user_convert, **user_convert_tail;

static int read_convert_config(const char *var, const char *value)
{
	const char *ep, *name;
	int namelen;
	struct convert_driver *drv;

	/*
	 * External conversion drivers are configured using
	 * "filter.<name>.variable".
	 */
	if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
		return 0;
	name = var + 7;
	namelen = ep - name;
	for (drv = user_convert; drv; drv = drv->next)
		if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
			break;
	if (!drv) {
		drv = xcalloc(1, sizeof(struct convert_driver));
P
Pierre Habouzit 已提交
345
		drv->name = xmemdupz(name, namelen);
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
		*user_convert_tail = drv;
		user_convert_tail = &(drv->next);
	}

	ep++;

	/*
	 * filter.<name>.smudge and filter.<name>.clean specifies
	 * the command line:
	 *
	 *	command-line
	 *
	 * The command-line will not be interpolated in any way.
	 */

	if (!strcmp("smudge", ep)) {
		if (!value)
363
			return config_error_nonbool(var);
364 365 366 367 368 369
		drv->smudge = strdup(value);
		return 0;
	}

	if (!strcmp("clean", ep)) {
		if (!value)
370
			return config_error_nonbool(var);
371 372 373 374 375 376
		drv->clean = strdup(value);
		return 0;
	}
	return 0;
}

377
static void setup_convert_check(struct git_attr_check *check)
J
Junio C Hamano 已提交
378 379
{
	static struct git_attr *attr_crlf;
J
Junio C Hamano 已提交
380
	static struct git_attr *attr_ident;
381
	static struct git_attr *attr_filter;
J
Junio C Hamano 已提交
382

J
Junio C Hamano 已提交
383
	if (!attr_crlf) {
J
Junio C Hamano 已提交
384
		attr_crlf = git_attr("crlf", 4);
J
Junio C Hamano 已提交
385
		attr_ident = git_attr("ident", 5);
386 387 388
		attr_filter = git_attr("filter", 6);
		user_convert_tail = &user_convert;
		git_config(read_convert_config);
J
Junio C Hamano 已提交
389 390 391
	}
	check[0].attr = attr_crlf;
	check[1].attr = attr_ident;
392
	check[2].attr = attr_filter;
J
Junio C Hamano 已提交
393 394 395 396 397
}

static int count_ident(const char *cp, unsigned long size)
{
	/*
398
	 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
J
Junio C Hamano 已提交
399 400 401 402 403 404 405 406 407
	 */
	int cnt = 0;
	char ch;

	while (size) {
		ch = *cp++;
		size--;
		if (ch != '$')
			continue;
408
		if (size < 3)
J
Junio C Hamano 已提交
409
			break;
410
		if (memcmp("Id", cp, 2))
J
Junio C Hamano 已提交
411
			continue;
412 413 414
		ch = cp[2];
		cp += 3;
		size -= 3;
J
Junio C Hamano 已提交
415
		if (ch == '$')
416
			cnt++; /* $Id$ */
J
Junio C Hamano 已提交
417 418 419 420
		if (ch != ':')
			continue;

		/*
421
		 * "$Id: ... "; scan up to the closing dollar sign and discard.
J
Junio C Hamano 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434
		 */
		while (size) {
			ch = *cp++;
			size--;
			if (ch == '$') {
				cnt++;
				break;
			}
		}
	}
	return cnt;
}

435 436
static int ident_to_git(const char *path, const char *src, size_t len,
                        struct strbuf *buf, int ident)
J
Junio C Hamano 已提交
437
{
438
	char *dst, *dollar;
J
Junio C Hamano 已提交
439

440 441 442
	if (!ident || !count_ident(src, len))
		return 0;

443 444 445
	/* only grow if not in place */
	if (strbuf_avail(buf) + buf->len < len)
		strbuf_grow(buf, len - buf->len);
446 447 448 449 450 451 452 453 454 455 456 457 458 459
	dst = buf->buf;
	for (;;) {
		dollar = memchr(src, '$', len);
		if (!dollar)
			break;
		memcpy(dst, src, dollar + 1 - src);
		dst += dollar + 1 - src;
		len -= dollar + 1 - src;
		src  = dollar + 1;

		if (len > 3 && !memcmp(src, "Id:", 3)) {
			dollar = memchr(src + 3, '$', len - 3);
			if (!dollar)
				break;
460 461
			memcpy(dst, "Id$", 3);
			dst += 3;
462 463
			len -= dollar + 1 - src;
			src  = dollar + 1;
J
Junio C Hamano 已提交
464 465
		}
	}
466 467 468
	memcpy(dst, src, len);
	strbuf_setlen(buf, dst + len - buf->buf);
	return 1;
J
Junio C Hamano 已提交
469 470
}

471 472
static int ident_to_worktree(const char *path, const char *src, size_t len,
                             struct strbuf *buf, int ident)
J
Junio C Hamano 已提交
473 474
{
	unsigned char sha1[20];
475 476
	char *to_free = NULL, *dollar;
	int cnt;
J
Junio C Hamano 已提交
477 478

	if (!ident)
479
		return 0;
J
Junio C Hamano 已提交
480

481
	cnt = count_ident(src, len);
J
Junio C Hamano 已提交
482
	if (!cnt)
483
		return 0;
J
Junio C Hamano 已提交
484

485 486
	/* are we "faking" in place editing ? */
	if (src == buf->buf)
487
		to_free = strbuf_detach(buf, NULL);
488
	hash_sha1_file(src, len, "blob", sha1);
J
Junio C Hamano 已提交
489

490 491 492 493 494 495 496 497 498
	strbuf_grow(buf, len + cnt * 43);
	for (;;) {
		/* step 1: run to the next '$' */
		dollar = memchr(src, '$', len);
		if (!dollar)
			break;
		strbuf_add(buf, src, dollar + 1 - src);
		len -= dollar + 1 - src;
		src  = dollar + 1;
499

500 501
		/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
		if (len < 3 || memcmp("Id", src, 2))
J
Junio C Hamano 已提交
502 503
			continue;

504 505 506 507 508 509 510 511 512 513 514 515 516 517
		/* step 3: skip over Id$ or Id:xxxxx$ */
		if (src[2] == '$') {
			src += 3;
			len -= 3;
		} else if (src[2] == ':') {
			/*
			 * It's possible that an expanded Id has crept its way into the
			 * repository, we cope with that by stripping the expansion out
			 */
			dollar = memchr(src + 3, '$', len - 3);
			if (!dollar) {
				/* incomplete keyword, no more '$', so just quit the loop */
				break;
			}
518

519 520 521 522 523 524
			len -= dollar + 1 - src;
			src  = dollar + 1;
		} else {
			/* it wasn't a "Id$" or "Id:xxxx$" */
			continue;
		}
525

526 527 528 529
		/* step 4: substitute */
		strbuf_addstr(buf, "Id: ");
		strbuf_add(buf, sha1_to_hex(sha1), 40);
		strbuf_addstr(buf, " $");
J
Junio C Hamano 已提交
530
	}
531
	strbuf_add(buf, src, len);
J
Junio C Hamano 已提交
532

533 534
	free(to_free);
	return 1;
J
Junio C Hamano 已提交
535 536
}

537
static int git_path_check_crlf(const char *path, struct git_attr_check *check)
J
Junio C Hamano 已提交
538
{
539 540 541 542 543 544 545 546 547 548
	const char *value = check->value;

	if (ATTR_TRUE(value))
		return CRLF_TEXT;
	else if (ATTR_FALSE(value))
		return CRLF_BINARY;
	else if (ATTR_UNSET(value))
		;
	else if (!strcmp(value, "input"))
		return CRLF_INPUT;
549
	return CRLF_GUESS;
J
Junio C Hamano 已提交
550 551
}

552 553 554 555 556 557 558 559 560 561 562 563 564 565
static struct convert_driver *git_path_check_convert(const char *path,
					     struct git_attr_check *check)
{
	const char *value = check->value;
	struct convert_driver *drv;

	if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
		return NULL;
	for (drv = user_convert; drv; drv = drv->next)
		if (!strcmp(value, drv->name))
			return drv;
	return NULL;
}

J
Junio C Hamano 已提交
566 567 568 569 570 571 572
static int git_path_check_ident(const char *path, struct git_attr_check *check)
{
	const char *value = check->value;

	return !!ATTR_TRUE(value);
}

573 574
int convert_to_git(const char *path, const char *src, size_t len,
                   struct strbuf *dst, enum safe_crlf checksafe)
J
Junio C Hamano 已提交
575
{
576
	struct git_attr_check check[3];
577
	int crlf = CRLF_GUESS;
578
	int ident = 0, ret = 0;
579
	char *filter = NULL;
580 581

	setup_convert_check(check);
J
Junio C Hamano 已提交
582
	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
583
		struct convert_driver *drv;
J
Junio C Hamano 已提交
584 585
		crlf = git_path_check_crlf(path, check + 0);
		ident = git_path_check_ident(path, check + 1);
586 587 588
		drv = git_path_check_convert(path, check + 2);
		if (drv && drv->clean)
			filter = drv->clean;
J
Junio C Hamano 已提交
589 590
	}

591 592 593 594
	ret |= apply_filter(path, src, len, dst, filter);
	if (ret) {
		src = dst->buf;
		len = dst->len;
595
	}
596
	ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
597 598 599
	if (ret) {
		src = dst->buf;
		len = dst->len;
600
	}
601
	return ret | ident_to_git(path, src, len, dst, ident);
J
Junio C Hamano 已提交
602 603
}

604
int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
J
Junio C Hamano 已提交
605
{
606
	struct git_attr_check check[3];
607
	int crlf = CRLF_GUESS;
608
	int ident = 0, ret = 0;
609
	char *filter = NULL;
610 611

	setup_convert_check(check);
J
Junio C Hamano 已提交
612
	if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
613
		struct convert_driver *drv;
J
Junio C Hamano 已提交
614 615
		crlf = git_path_check_crlf(path, check + 0);
		ident = git_path_check_ident(path, check + 1);
616 617 618
		drv = git_path_check_convert(path, check + 2);
		if (drv && drv->smudge)
			filter = drv->smudge;
619
	}
J
Junio C Hamano 已提交
620

621 622 623 624
	ret |= ident_to_worktree(path, src, len, dst, ident);
	if (ret) {
		src = dst->buf;
		len = dst->len;
J
Junio C Hamano 已提交
625
	}
626 627 628 629
	ret |= crlf_to_worktree(path, src, len, dst, crlf);
	if (ret) {
		src = dst->buf;
		len = dst->len;
630
	}
631
	return ret | apply_filter(path, src, len, dst, filter);
J
Junio C Hamano 已提交
632
}