fast-import.c 46.9 KB
Newer Older
1 2 3 4 5 6
/*
Format of STDIN stream:

  stream ::= cmd*;

  cmd ::= new_blob
7
        | new_commit
8
        | new_tag
9
        | reset_branch
10 11
        ;

12 13 14 15
  new_blob ::= 'blob' lf
	mark?
    file_content;
  file_content ::= data;
16

17
  new_commit ::= 'commit' sp ref_str lf
18 19 20 21
    mark?
    ('author' sp name '<' email '>' ts tz lf)?
    'committer' sp name '<' email '>' ts tz lf
    commit_msg
22
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
23
    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
24 25 26
    file_change*
    lf;
  commit_msg ::= data;
27

28 29
  file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
                | 'D' sp path_str lf
30
                ;
31 32 33 34 35 36 37 38
  mode ::= '644' | '755';

  new_tag ::= 'tag' sp tag_str lf
    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
	'tagger' sp name '<' email '>' ts tz lf
    tag_msg;
  tag_msg ::= data;

39 40 41
  reset_branch ::= 'reset' sp ref_str lf
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
    lf;
42

43 44 45
  checkpoint ::= 'checkpoint' lf
    lf;

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
     # note: the first idnum in a stream should be 1 and subsequent
     # idnums should not have gaps between values as this will cause
     # the stream parser to reserve space for the gapped values.  An
	 # idnum can be updated in the future to a new object by issuing
     # a new mark directive with the old idnum.
	 #
  mark ::= 'mark' sp idnum lf;

     # note: declen indicates the length of binary_data in bytes.
     # declen does not include the lf preceeding or trailing the
     # binary data.
     #
  data ::= 'data' sp declen lf
    binary_data
	lf;

     # note: quoted strings are C-style quoting supporting \c for
     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
	 # is the signed byte value in octal.  Note that the only
     # characters which must actually be escaped to protect the
     # stream formatting is: \, " and LF.  Otherwise these values
	 # are UTF8.
     #
  ref_str     ::= ref     | '"' quoted(ref)     '"' ;
  sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
  tag_str     ::= tag     | '"' quoted(tag)     '"' ;
  path_str    ::= path    | '"' quoted(path)    '"' ;

  declen ::= # unsigned 32 bit value, ascii base10 notation;
75
  bigint ::= # unsigned integer value, ascii base10 notation;
76
  binary_data ::= # file content, not interpreted;
77

78 79
  sp ::= # ASCII space character;
  lf ::= # ASCII newline (LF) character;
80 81 82 83 84

     # note: a colon (':') must precede the numerical value assigned to
	 # an idnum.  This is to distinguish it from a ref or tag name as
     # GIT does not permit ':' in ref or tag strings.
	 #
85
  idnum   ::= ':' bigint;
86 87 88
  path    ::= # GIT style file path, e.g. "a/b/c";
  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
89 90
  sha1exp ::= # Any valid GIT SHA1 expression;
  hexsha1 ::= # SHA1 in hexadecimal format;
91 92 93 94 95 96

     # note: name and email are UTF8 strings, however name must not
	 # contain '<' or lf and email must not contain any of the
     # following: '<', '>', lf.
	 #
  name  ::= # valid GIT author/committer name;
97
  email ::= # valid GIT author/committer email;
98 99
  ts    ::= # time since the epoch in seconds, ascii base10 notation;
  tz    ::= # GIT style timezone;
100 101
*/

102 103 104 105
#include "builtin.h"
#include "cache.h"
#include "object.h"
#include "blob.h"
106
#include "tree.h"
107 108
#include "delta.h"
#include "pack.h"
109
#include "refs.h"
110
#include "csum-file.h"
111 112
#include "strbuf.h"
#include "quote.h"
113

114 115 116 117
struct object_entry
{
	struct object_entry *next;
	unsigned long offset;
118
	unsigned type : TYPE_BITS;
119
	unsigned pack_id : 16;
120 121 122
	unsigned char sha1[20];
};

123
struct object_entry_pool
124
{
125
	struct object_entry_pool *next_pool;
126 127
	struct object_entry *next_free;
	struct object_entry *end;
128
	struct object_entry entries[FLEX_ARRAY]; /* more */
129 130
};

131 132 133 134 135 136
struct mark_set
{
	union {
		struct object_entry *marked[1024];
		struct mark_set *sets[1024];
	} data;
137
	unsigned int shift;
138 139
};

140 141 142
struct last_object
{
	void *data;
143
	unsigned long len;
144
	unsigned long offset;
145
	unsigned int depth;
146
	unsigned no_free:1;
147 148
};

149 150 151 152 153 154 155 156 157 158 159
struct mem_pool
{
	struct mem_pool *next_pool;
	char *next_free;
	char *end;
	char space[FLEX_ARRAY]; /* more */
};

struct atom_str
{
	struct atom_str *next_atom;
160
	unsigned int str_len;
161 162 163 164
	char str_dat[FLEX_ARRAY]; /* more */
};

struct tree_content;
165 166
struct tree_entry
{
167 168
	struct tree_content *tree;
	struct atom_str* name;
169 170 171 172 173
	struct tree_entry_ms
	{
		unsigned int mode;
		unsigned char sha1[20];
	} versions[2];
174 175
};

176
struct tree_content
177
{
178 179
	unsigned int entry_capacity; /* must match avail_tree_content */
	unsigned int entry_count;
180
	unsigned int delta_depth;
181 182 183 184 185 186 187
	struct tree_entry *entries[FLEX_ARRAY]; /* more */
};

struct avail_tree_content
{
	unsigned int entry_capacity; /* must match tree_content */
	struct avail_tree_content *next_avail;
188 189 190 191
};

struct branch
{
192 193
	struct branch *table_next_branch;
	struct branch *active_next_branch;
194
	const char *name;
195
	struct tree_entry branch_tree;
196
	unsigned long last_commit;
197
	unsigned int pack_id;
198
	unsigned char sha1[20];
199 200
};

201 202 203 204
struct tag
{
	struct tag *next_tag;
	const char *name;
205
	unsigned int pack_id;
206 207 208
	unsigned char sha1[20];
};

209 210 211 212 213 214
struct dbuf
{
	void *buffer;
	size_t capacity;
};

215 216 217 218 219
struct hash_list
{
	struct hash_list *next;
	unsigned char sha1[20];
};
220

221
/* Configured limits on output */
222
static unsigned long max_depth = 10;
223
static unsigned long max_packsize = (1LL << 32) - 1;
224 225 226 227 228 229 230 231
static uintmax_t max_objects = -1;

/* Stats and misc. counters */
static uintmax_t alloc_count;
static uintmax_t marks_set_count;
static uintmax_t object_count_by_type[1 << TYPE_BITS];
static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
static uintmax_t delta_count_by_type[1 << TYPE_BITS];
232
static unsigned long object_count;
233
static unsigned long branch_count;
234
static unsigned long branch_load_count;
235

236 237 238 239 240
/* Memory pools */
static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static size_t total_allocd;
static struct mem_pool *mem_pool;

241
/* Atom management */
242 243 244 245 246
static unsigned int atom_table_sz = 4451;
static unsigned int atom_cnt;
static struct atom_str **atom_table;

/* The .pack file being generated */
247
static unsigned int pack_id;
248
static struct packed_git *pack_data;
249
static struct packed_git **all_packs;
250
static unsigned long pack_size;
251 252

/* Table of objects we've written. */
253
static unsigned int object_entry_alloc = 5000;
254 255
static struct object_entry_pool *blocks;
static struct object_entry *object_table[1 << 16];
256
static struct mark_set *marks;
257
static const char* mark_file;
258 259

/* Our last blob */
260 261 262 263 264 265 266
static struct last_object last_blob;

/* Tree management */
static unsigned int tree_entry_alloc = 1000;
static void *avail_tree_entry;
static unsigned int avail_tree_table_sz = 100;
static struct avail_tree_content **avail_tree_table;
267 268
static struct dbuf old_tree;
static struct dbuf new_tree;
269

270
/* Branch data */
271 272 273
static unsigned long max_active_branches = 5;
static unsigned long cur_active_branches;
static unsigned long branch_table_sz = 1039;
274 275 276
static struct branch **branch_table;
static struct branch *active_branches;

277 278 279 280
/* Tag data */
static struct tag *first_tag;
static struct tag *last_tag;

281 282
/* Input stream parsing */
static struct strbuf command_buf;
283
static uintmax_t next_mark;
284
static struct dbuf new_data;
285
static FILE* branch_log;
286

287

288
static void alloc_objects(unsigned int cnt)
289
{
290
	struct object_entry_pool *b;
291

292
	b = xmalloc(sizeof(struct object_entry_pool)
293
		+ cnt * sizeof(struct object_entry));
294
	b->next_pool = blocks;
295 296 297 298 299
	b->next_free = b->entries;
	b->end = b->entries + cnt;
	blocks = b;
	alloc_count += cnt;
}
300

301
static struct object_entry* new_object(unsigned char *sha1)
302
{
303
	struct object_entry *e;
304

305
	if (blocks->next_free == blocks->end)
306
		alloc_objects(object_entry_alloc);
307

308
	e = blocks->next_free++;
309
	hashcpy(e->sha1, sha1);
310
	return e;
311 312
}

313 314 315 316 317
static struct object_entry* find_object(unsigned char *sha1)
{
	unsigned int h = sha1[0] << 8 | sha1[1];
	struct object_entry *e;
	for (e = object_table[h]; e; e = e->next)
318
		if (!hashcmp(sha1, e->sha1))
319 320 321 322
			return e;
	return NULL;
}

323 324 325
static struct object_entry* insert_object(unsigned char *sha1)
{
	unsigned int h = sha1[0] << 8 | sha1[1];
326
	struct object_entry *e = object_table[h];
327
	struct object_entry *p = NULL;
328 329

	while (e) {
330
		if (!hashcmp(sha1, e->sha1))
331 332 333 334 335 336
			return e;
		p = e;
		e = e->next;
	}

	e = new_object(sha1);
337
	e->next = NULL;
338 339 340 341
	e->offset = 0;
	if (p)
		p->next = e;
	else
342
		object_table[h] = e;
343 344
	return e;
}
345

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
static unsigned int hc_str(const char *s, size_t len)
{
	unsigned int r = 0;
	while (len-- > 0)
		r = r * 31 + *s++;
	return r;
}

static void* pool_alloc(size_t len)
{
	struct mem_pool *p;
	void *r;

	for (p = mem_pool; p; p = p->next_pool)
		if ((p->end - p->next_free >= len))
			break;

	if (!p) {
		if (len >= (mem_pool_alloc/2)) {
			total_allocd += len;
			return xmalloc(len);
		}
		total_allocd += sizeof(struct mem_pool) + mem_pool_alloc;
		p = xmalloc(sizeof(struct mem_pool) + mem_pool_alloc);
		p->next_pool = mem_pool;
		p->next_free = p->space;
		p->end = p->next_free + mem_pool_alloc;
		mem_pool = p;
	}

	r = p->next_free;
377 378 379
	/* round out to a pointer alignment */
	if (len & (sizeof(void*) - 1))
		len += sizeof(void*) - (len & (sizeof(void*) - 1));
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
	p->next_free += len;
	return r;
}

static void* pool_calloc(size_t count, size_t size)
{
	size_t len = count * size;
	void *r = pool_alloc(len);
	memset(r, 0, len);
	return r;
}

static char* pool_strdup(const char *s)
{
	char *r = pool_alloc(strlen(s) + 1);
	strcpy(r, s);
	return r;
}

399 400 401 402 403 404 405 406 407 408 409
static void size_dbuf(struct dbuf *b, size_t maxlen)
{
	if (b->buffer) {
		if (b->capacity >= maxlen)
			return;
		free(b->buffer);
	}
	b->capacity = ((maxlen / 1024) + 1) * 1024;
	b->buffer = xmalloc(b->capacity);
}

410
static void insert_mark(uintmax_t idnum, struct object_entry *oe)
411 412 413 414 415 416 417 418 419
{
	struct mark_set *s = marks;
	while ((idnum >> s->shift) >= 1024) {
		s = pool_calloc(1, sizeof(struct mark_set));
		s->shift = marks->shift + 10;
		s->data.sets[0] = marks;
		marks = s;
	}
	while (s->shift) {
420
		uintmax_t i = idnum >> s->shift;
421 422 423 424 425 426 427 428 429 430 431 432
		idnum -= i << s->shift;
		if (!s->data.sets[i]) {
			s->data.sets[i] = pool_calloc(1, sizeof(struct mark_set));
			s->data.sets[i]->shift = s->shift - 10;
		}
		s = s->data.sets[i];
	}
	if (!s->data.marked[idnum])
		marks_set_count++;
	s->data.marked[idnum] = oe;
}

433
static struct object_entry* find_mark(uintmax_t idnum)
434
{
435
	uintmax_t orig_idnum = idnum;
436 437 438 439
	struct mark_set *s = marks;
	struct object_entry *oe = NULL;
	if ((idnum >> s->shift) < 1024) {
		while (s && s->shift) {
440
			uintmax_t i = idnum >> s->shift;
441 442 443 444 445 446 447
			idnum -= i << s->shift;
			s = s->data.sets[i];
		}
		if (s)
			oe = s->data.marked[idnum];
	}
	if (!oe)
448
		die("mark :%ju not declared", orig_idnum);
449 450 451
	return oe;
}

452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
static struct atom_str* to_atom(const char *s, size_t len)
{
	unsigned int hc = hc_str(s, len) % atom_table_sz;
	struct atom_str *c;

	for (c = atom_table[hc]; c; c = c->next_atom)
		if (c->str_len == len && !strncmp(s, c->str_dat, len))
			return c;

	c = pool_alloc(sizeof(struct atom_str) + len + 1);
	c->str_len = len;
	strncpy(c->str_dat, s, len);
	c->str_dat[len] = 0;
	c->next_atom = atom_table[hc];
	atom_table[hc] = c;
	atom_cnt++;
	return c;
}

static struct branch* lookup_branch(const char *name)
{
	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
	struct branch *b;

	for (b = branch_table[hc]; b; b = b->table_next_branch)
		if (!strcmp(name, b->name))
			return b;
	return NULL;
}

static struct branch* new_branch(const char *name)
{
	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
	struct branch* b = lookup_branch(name);

	if (b)
		die("Invalid attempt to create duplicate branch: %s", name);
489 490
	if (check_ref_format(name))
		die("Branch name doesn't conform to GIT standards: %s", name);
491 492 493 494

	b = pool_calloc(1, sizeof(struct branch));
	b->name = pool_strdup(name);
	b->table_next_branch = branch_table[hc];
495 496
	b->branch_tree.versions[0].mode = S_IFDIR;
	b->branch_tree.versions[1].mode = S_IFDIR;
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
	branch_table[hc] = b;
	branch_count++;
	return b;
}

static unsigned int hc_entries(unsigned int cnt)
{
	cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
	return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
}

static struct tree_content* new_tree_content(unsigned int cnt)
{
	struct avail_tree_content *f, *l = NULL;
	struct tree_content *t;
	unsigned int hc = hc_entries(cnt);

	for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail)
		if (f->entry_capacity >= cnt)
			break;

	if (f) {
		if (l)
			l->next_avail = f->next_avail;
		else
			avail_tree_table[hc] = f->next_avail;
	} else {
		cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
		f = pool_alloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
		f->entry_capacity = cnt;
	}

	t = (struct tree_content*)f;
	t->entry_count = 0;
531
	t->delta_depth = 0;
532 533 534 535 536 537 538 539
	return t;
}

static void release_tree_entry(struct tree_entry *e);
static void release_tree_content(struct tree_content *t)
{
	struct avail_tree_content *f = (struct avail_tree_content*)t;
	unsigned int hc = hc_entries(f->entry_capacity);
540 541 542 543 544 545
	f->next_avail = avail_tree_table[hc];
	avail_tree_table[hc] = f;
}

static void release_tree_content_recursive(struct tree_content *t)
{
546 547 548
	unsigned int i;
	for (i = 0; i < t->entry_count; i++)
		release_tree_entry(t->entries[i]);
549
	release_tree_content(t);
550 551 552 553 554 555 556 557
}

static struct tree_content* grow_tree_content(
	struct tree_content *t,
	int amt)
{
	struct tree_content *r = new_tree_content(t->entry_count + amt);
	r->entry_count = t->entry_count;
558
	r->delta_depth = t->delta_depth;
559 560 561 562 563 564 565 566 567 568 569
	memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
	release_tree_content(t);
	return r;
}

static struct tree_entry* new_tree_entry()
{
	struct tree_entry *e;

	if (!avail_tree_entry) {
		unsigned int n = tree_entry_alloc;
570
		total_allocd += n * sizeof(struct tree_entry);
571
		avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
572
		while (n-- > 1) {
573 574 575
			*((void**)e) = e + 1;
			e++;
		}
576
		*((void**)e) = NULL;
577 578 579 580 581 582 583 584 585 586
	}

	e = avail_tree_entry;
	avail_tree_entry = *((void**)e);
	return e;
}

static void release_tree_entry(struct tree_entry *e)
{
	if (e->tree)
587
		release_tree_content_recursive(e->tree);
588 589 590 591
	*((void**)e) = avail_tree_entry;
	avail_tree_entry = e;
}

592 593
static void start_packfile()
{
594
	static char tmpfile[PATH_MAX];
595
	struct packed_git *p;
596
	struct pack_header hdr;
597
	int pack_fd;
598

599 600 601
	snprintf(tmpfile, sizeof(tmpfile),
		"%s/pack_XXXXXX", get_object_directory());
	pack_fd = mkstemp(tmpfile);
602
	if (pack_fd < 0)
603 604 605
		die("Can't create %s: %s", tmpfile, strerror(errno));
	p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
	strcpy(p->pack_name, tmpfile);
606
	p->pack_fd = pack_fd;
607 608 609 610

	hdr.hdr_signature = htonl(PACK_SIGNATURE);
	hdr.hdr_version = htonl(2);
	hdr.hdr_entries = 0;
611
	write_or_die(p->pack_fd, &hdr, sizeof(hdr));
612 613

	pack_data = p;
614 615
	pack_size = sizeof(hdr);
	object_count = 0;
616 617 618

	all_packs = xrealloc(all_packs, sizeof(*all_packs) * (pack_id + 1));
	all_packs[pack_id] = p;
619 620 621 622
}

static void fixup_header_footer()
{
623
	int pack_fd = pack_data->pack_fd;
624 625 626 627 628 629 630 631 632
	SHA_CTX c;
	char hdr[8];
	unsigned long cnt;
	char *buf;

	if (lseek(pack_fd, 0, SEEK_SET) != 0)
		die("Failed seeking to start: %s", strerror(errno));

	SHA1_Init(&c);
633 634
	if (read_in_full(pack_fd, hdr, 8) != 8)
		die("Unable to reread header of %s", pack_data->pack_name);
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
	SHA1_Update(&c, hdr, 8);

	cnt = htonl(object_count);
	SHA1_Update(&c, &cnt, 4);
	write_or_die(pack_fd, &cnt, 4);

	buf = xmalloc(128 * 1024);
	for (;;) {
		size_t n = xread(pack_fd, buf, 128 * 1024);
		if (n <= 0)
			break;
		SHA1_Update(&c, buf, n);
	}
	free(buf);

650 651
	SHA1_Final(pack_data->sha1, &c);
	write_or_die(pack_fd, pack_data->sha1, sizeof(pack_data->sha1));
652
	close(pack_fd);
653 654 655 656 657 658 659 660 661
}

static int oecmp (const void *a_, const void *b_)
{
	struct object_entry *a = *((struct object_entry**)a_);
	struct object_entry *b = *((struct object_entry**)b_);
	return hashcmp(a->sha1, b->sha1);
}

662
static char* create_index()
663
{
664 665
	static char tmpfile[PATH_MAX];
	SHA_CTX ctx;
666 667 668 669
	struct sha1file *f;
	struct object_entry **idx, **c, **last, *e;
	struct object_entry_pool *o;
	unsigned int array[256];
670
	int i, idx_fd;
671 672 673 674 675

	/* Build the sorted table of object IDs. */
	idx = xmalloc(object_count * sizeof(struct object_entry*));
	c = idx;
	for (o = blocks; o; o = o->next_pool)
676 677 678
		for (e = o->next_free; e-- != o->entries;)
			if (pack_id == e->pack_id)
				*c++ = e;
679
	last = idx + object_count;
680 681
	if (c != last)
		die("internal consistency error creating the index");
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
	qsort(idx, object_count, sizeof(struct object_entry*), oecmp);

	/* Generate the fan-out array. */
	c = idx;
	for (i = 0; i < 256; i++) {
		struct object_entry **next = c;;
		while (next < last) {
			if ((*next)->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - idx);
		c = next;
	}

697 698 699 700 701 702
	snprintf(tmpfile, sizeof(tmpfile),
		"%s/index_XXXXXX", get_object_directory());
	idx_fd = mkstemp(tmpfile);
	if (idx_fd < 0)
		die("Can't create %s: %s", tmpfile, strerror(errno));
	f = sha1fd(idx_fd, tmpfile);
703
	sha1write(f, array, 256 * sizeof(int));
704
	SHA1_Init(&ctx);
705 706 707 708
	for (c = idx; c != last; c++) {
		unsigned int offset = htonl((*c)->offset);
		sha1write(f, &offset, 4);
		sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
709
		SHA1_Update(&ctx, (*c)->sha1, 20);
710
	}
711
	sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
712
	sha1close(f, NULL, 1);
713
	free(idx);
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
	SHA1_Final(pack_data->sha1, &ctx);
	return tmpfile;
}

static char* keep_pack(char *curr_index_name)
{
	static char name[PATH_MAX];
	static char *keep_msg = "fast-import";
	int keep_fd;

	chmod(pack_data->pack_name, 0444);
	chmod(curr_index_name, 0444);

	snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
		 get_object_directory(), sha1_to_hex(pack_data->sha1));
	keep_fd = open(name, O_RDWR|O_CREAT|O_EXCL, 0600);
	if (keep_fd < 0)
		die("cannot create keep file");
	write(keep_fd, keep_msg, strlen(keep_msg));
	close(keep_fd);

	snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
		 get_object_directory(), sha1_to_hex(pack_data->sha1));
	if (move_temp_to_file(pack_data->pack_name, name))
		die("cannot store pack file");

	snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
		 get_object_directory(), sha1_to_hex(pack_data->sha1));
	if (move_temp_to_file(curr_index_name, name))
		die("cannot store index file");
	return name;
}

static void unkeep_all_packs()
{
	static char name[PATH_MAX];
	int k;

	for (k = 0; k < pack_id; k++) {
		struct packed_git *p = all_packs[k];
		snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
			 get_object_directory(), sha1_to_hex(p->sha1));
		unlink(name);
	}
758 759 760 761
}

static void end_packfile()
{
762 763
	struct packed_git *old_p = pack_data, *new_p;

764
	if (object_count) {
765
		char *idx_name;
766 767 768
		int i;
		struct branch *b;
		struct tag *t;
769

770
		fixup_header_footer();
771
		idx_name = keep_pack(create_index());
772 773 774 775 776 777

		/* Register the packfile with core git's machinary. */
		new_p = add_packed_git(idx_name, strlen(idx_name), 1);
		if (!new_p)
			die("core git rejected index %s", idx_name);
		new_p->windows = old_p->windows;
778
		all_packs[pack_id] = new_p;
779
		install_packed_git(new_p);
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795

		/* Print the boundary */
		fprintf(stdout, "%s:", new_p->pack_name);
		for (i = 0; i < branch_table_sz; i++) {
			for (b = branch_table[i]; b; b = b->table_next_branch) {
				if (b->pack_id == pack_id)
					fprintf(stdout, " %s", sha1_to_hex(b->sha1));
			}
		}
		for (t = first_tag; t; t = t->next_tag) {
			if (t->pack_id == pack_id)
				fprintf(stdout, " %s", sha1_to_hex(t->sha1));
		}
		fputc('\n', stdout);

		pack_id++;
796
	}
797
	else
798
		unlink(old_p->pack_name);
799 800 801 802 803 804 805 806
	free(old_p);

	/* We can't carry a delta across packfiles. */
	free(last_blob.data);
	last_blob.data = NULL;
	last_blob.len = 0;
	last_blob.offset = 0;
	last_blob.depth = 0;
807 808
}

809 810 811 812 813 814
static void checkpoint()
{
	end_packfile();
	start_packfile();
}

815
static size_t encode_header(
816
	enum object_type type,
817
	size_t size,
818
	unsigned char *hdr)
819 820 821 822
{
	int n = 1;
	unsigned char c;

823
	if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
824 825 826 827 828 829 830 831 832 833 834 835 836 837
		die("bad type %d", type);

	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
		n++;
	}
	*hdr = c;
	return n;
}

838 839 840
static int store_object(
	enum object_type type,
	void *dat,
841
	size_t datlen,
842
	struct last_object *last,
843
	unsigned char *sha1out,
844
	uintmax_t mark)
845 846
{
	void *out, *delta;
847 848 849
	struct object_entry *e;
	unsigned char hdr[96];
	unsigned char sha1[20];
850
	unsigned long hdrlen, deltalen;
851 852 853 854 855 856 857 858
	SHA_CTX c;
	z_stream s;

	hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
	SHA1_Init(&c);
	SHA1_Update(&c, hdr, hdrlen);
	SHA1_Update(&c, dat, datlen);
	SHA1_Final(sha1, &c);
859
	if (sha1out)
860
		hashcpy(sha1out, sha1);
861 862

	e = insert_object(sha1);
863 864
	if (mark)
		insert_mark(mark, e);
865
	if (e->offset) {
866
		duplicate_count_by_type[type]++;
867
		return 1;
868
	}
869

870
	if (last && last->data && last->depth < max_depth) {
871
		delta = diff_delta(last->data, last->len,
872 873
			dat, datlen,
			&deltalen, 0);
874 875 876 877 878 879
		if (delta && deltalen >= datlen) {
			free(delta);
			delta = NULL;
		}
	} else
		delta = NULL;
880 881 882

	memset(&s, 0, sizeof(s));
	deflateInit(&s, zlib_compression_level);
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
	if (delta) {
		s.next_in = delta;
		s.avail_in = deltalen;
	} else {
		s.next_in = dat;
		s.avail_in = datlen;
	}
	s.avail_out = deflateBound(&s, s.avail_in);
	s.next_out = out = xmalloc(s.avail_out);
	while (deflate(&s, Z_FINISH) == Z_OK)
		/* nothing */;
	deflateEnd(&s);

	/* Determine if we should auto-checkpoint. */
	if ((object_count + 1) > max_objects
		|| (object_count + 1) < object_count
		|| (pack_size + 60 + s.total_out) > max_packsize
		|| (pack_size + 60 + s.total_out) < pack_size) {

		/* This new object needs to *not* have the current pack_id. */
		e->pack_id = pack_id + 1;
		checkpoint();

		/* We cannot carry a delta into the new pack. */
		if (delta) {
			free(delta);
			delta = NULL;
910 911 912 913 914 915 916 917 918 919

			memset(&s, 0, sizeof(s));
			deflateInit(&s, zlib_compression_level);
			s.next_in = dat;
			s.avail_in = datlen;
			s.avail_out = deflateBound(&s, s.avail_in);
			s.next_out = out = xrealloc(out, s.avail_out);
			while (deflate(&s, Z_FINISH) == Z_OK)
				/* nothing */;
			deflateEnd(&s);
920 921 922 923 924 925 926 927
		}
	}

	e->type = type;
	e->pack_id = pack_id;
	e->offset = pack_size;
	object_count++;
	object_count_by_type[type]++;
928 929

	if (delta) {
930 931 932
		unsigned long ofs = e->offset - last->offset;
		unsigned pos = sizeof(hdr) - 1;

933
		delta_count_by_type[type]++;
934
		last->depth++;
935 936

		hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
937
		write_or_die(pack_data->pack_fd, hdr, hdrlen);
938 939 940 941 942
		pack_size += hdrlen;

		hdr[pos] = ofs & 127;
		while (ofs >>= 7)
			hdr[--pos] = 128 | (--ofs & 127);
943
		write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
944
		pack_size += sizeof(hdr) - pos;
945
	} else {
946 947
		if (last)
			last->depth = 0;
948
		hdrlen = encode_header(type, datlen, hdr);
949
		write_or_die(pack_data->pack_fd, hdr, hdrlen);
950
		pack_size += hdrlen;
951 952
	}

953
	write_or_die(pack_data->pack_fd, out, s.total_out);
954
	pack_size += s.total_out;
955 956 957 958

	free(out);
	if (delta)
		free(delta);
959
	if (last) {
960
		if (last->data && !last->no_free)
961 962
			free(last->data);
		last->data = dat;
963
		last->offset = e->offset;
964 965 966 967 968
		last->len = datlen;
	}
	return 0;
}

969 970 971
static void *gfi_unpack_entry(
	struct object_entry *oe,
	unsigned long *sizep)
972
{
973 974 975 976 977
	static char type[20];
	struct packed_git *p = all_packs[oe->pack_id];
	if (p == pack_data)
		p->pack_size = pack_size + 20;
	return unpack_entry(p, oe->offset, type, sizep);
978 979
}

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
static const char *get_mode(const char *str, unsigned int *modep)
{
	unsigned char c;
	unsigned int mode = 0;

	while ((c = *str++) != ' ') {
		if (c < '0' || c > '7')
			return NULL;
		mode = (mode << 3) + (c - '0');
	}
	*modep = mode;
	return str;
}

static void load_tree(struct tree_entry *root)
{
996
	unsigned char* sha1 = root->versions[1].sha1;
997 998 999 1000 1001 1002 1003
	struct object_entry *myoe;
	struct tree_content *t;
	unsigned long size;
	char *buf;
	const char *c;

	root->tree = t = new_tree_content(8);
1004
	if (is_null_sha1(sha1))
1005 1006
		return;

1007
	myoe = find_object(sha1);
1008
	if (myoe) {
1009
		if (myoe->type != OBJ_TREE)
1010
			die("Not a tree: %s", sha1_to_hex(sha1));
1011
		t->delta_depth = 0;
1012
		buf = gfi_unpack_entry(myoe, &size);
1013
	} else {
1014
		char type[20];
1015
		buf = read_sha1_file(sha1, type, &size);
1016
		if (!buf || strcmp(type, tree_type))
1017
			die("Can't load tree %s", sha1_to_hex(sha1));
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
	}

	c = buf;
	while (c != (buf + size)) {
		struct tree_entry *e = new_tree_entry();

		if (t->entry_count == t->entry_capacity)
			root->tree = t = grow_tree_content(t, 8);
		t->entries[t->entry_count++] = e;

		e->tree = NULL;
1029
		c = get_mode(c, &e->versions[1].mode);
1030
		if (!c)
1031 1032
			die("Corrupt mode in %s", sha1_to_hex(sha1));
		e->versions[0].mode = e->versions[1].mode;
1033 1034
		e->name = to_atom(c, strlen(c));
		c += e->name->str_len + 1;
1035 1036
		hashcpy(e->versions[0].sha1, (unsigned char*)c);
		hashcpy(e->versions[1].sha1, (unsigned char*)c);
1037 1038 1039 1040 1041
		c += 20;
	}
	free(buf);
}

1042
static int tecmp0 (const void *_a, const void *_b)
1043 1044 1045 1046
{
	struct tree_entry *a = *((struct tree_entry**)_a);
	struct tree_entry *b = *((struct tree_entry**)_b);
	return base_name_compare(
1047 1048
		a->name->str_dat, a->name->str_len, a->versions[0].mode,
		b->name->str_dat, b->name->str_len, b->versions[0].mode);
1049 1050
}

1051
static int tecmp1 (const void *_a, const void *_b)
1052
{
1053 1054 1055 1056 1057 1058 1059
	struct tree_entry *a = *((struct tree_entry**)_a);
	struct tree_entry *b = *((struct tree_entry**)_b);
	return base_name_compare(
		a->name->str_dat, a->name->str_len, a->versions[1].mode,
		b->name->str_dat, b->name->str_len, b->versions[1].mode);
}

1060 1061 1062 1063
static void mktree(struct tree_content *t,
	int v,
	unsigned long *szp,
	struct dbuf *b)
1064 1065
{
	size_t maxlen = 0;
1066
	unsigned int i;
1067
	char *c;
1068

1069 1070 1071 1072
	if (!v)
		qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp0);
	else
		qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp1);
1073 1074

	for (i = 0; i < t->entry_count; i++) {
1075 1076
		if (t->entries[i]->versions[v].mode)
			maxlen += t->entries[i]->name->str_len + 34;
1077 1078
	}

1079
	size_dbuf(b, maxlen);
1080
	c = b->buffer;
1081 1082
	for (i = 0; i < t->entry_count; i++) {
		struct tree_entry *e = t->entries[i];
1083 1084 1085
		if (!e->versions[v].mode)
			continue;
		c += sprintf(c, "%o", e->versions[v].mode);
1086 1087 1088
		*c++ = ' ';
		strcpy(c, e->name->str_dat);
		c += e->name->str_len + 1;
1089
		hashcpy((unsigned char*)c, e->versions[v].sha1);
1090 1091
		c += 20;
	}
1092
	*szp = c - (char*)b->buffer;
1093 1094 1095 1096 1097 1098
}

static void store_tree(struct tree_entry *root)
{
	struct tree_content *t = root->tree;
	unsigned int i, j, del;
1099
	unsigned long new_len;
1100
	struct last_object lo;
1101
	struct object_entry *le;
1102 1103 1104 1105 1106 1107 1108 1109 1110

	if (!is_null_sha1(root->versions[1].sha1))
		return;

	for (i = 0; i < t->entry_count; i++) {
		if (t->entries[i]->tree)
			store_tree(t->entries[i]);
	}

1111
	le = find_object(root->versions[0].sha1);
1112 1113 1114
	if (!S_ISDIR(root->versions[0].mode)
		|| !le
		|| le->pack_id != pack_id) {
1115 1116 1117
		lo.data = NULL;
		lo.depth = 0;
	} else {
1118 1119
		mktree(t, 0, &lo.len, &old_tree);
		lo.data = old_tree.buffer;
1120
		lo.offset = le->offset;
1121
		lo.depth = t->delta_depth;
1122
		lo.no_free = 1;
1123 1124
	}

1125
	mktree(t, 1, &new_len, &new_tree);
1126
	store_object(OBJ_TREE, new_tree.buffer, new_len,
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
		&lo, root->versions[1].sha1, 0);

	t->delta_depth = lo.depth;
	for (i = 0, j = 0, del = 0; i < t->entry_count; i++) {
		struct tree_entry *e = t->entries[i];
		if (e->versions[1].mode) {
			e->versions[0].mode = e->versions[1].mode;
			hashcpy(e->versions[0].sha1, e->versions[1].sha1);
			t->entries[j++] = e;
		} else {
			release_tree_entry(e);
			del++;
		}
	}
	t->entry_count -= del;
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
}

static int tree_content_set(
	struct tree_entry *root,
	const char *p,
	const unsigned char *sha1,
	const unsigned int mode)
{
	struct tree_content *t = root->tree;
	const char *slash1;
	unsigned int i, n;
	struct tree_entry *e;

	slash1 = strchr(p, '/');
	if (slash1)
		n = slash1 - p;
	else
		n = strlen(p);

	for (i = 0; i < t->entry_count; i++) {
		e = t->entries[i];
		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
			if (!slash1) {
1165 1166
				if (e->versions[1].mode == mode
						&& !hashcmp(e->versions[1].sha1, sha1))
1167
					return 0;
1168 1169
				e->versions[1].mode = mode;
				hashcpy(e->versions[1].sha1, sha1);
1170
				if (e->tree) {
1171
					release_tree_content_recursive(e->tree);
1172 1173
					e->tree = NULL;
				}
1174
				hashclr(root->versions[1].sha1);
1175 1176
				return 1;
			}
1177
			if (!S_ISDIR(e->versions[1].mode)) {
1178
				e->tree = new_tree_content(8);
1179
				e->versions[1].mode = S_IFDIR;
1180 1181 1182 1183
			}
			if (!e->tree)
				load_tree(e);
			if (tree_content_set(e, slash1 + 1, sha1, mode)) {
1184
				hashclr(root->versions[1].sha1);
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
				return 1;
			}
			return 0;
		}
	}

	if (t->entry_count == t->entry_capacity)
		root->tree = t = grow_tree_content(t, 8);
	e = new_tree_entry();
	e->name = to_atom(p, n);
1195 1196
	e->versions[0].mode = 0;
	hashclr(e->versions[0].sha1);
1197 1198 1199
	t->entries[t->entry_count++] = e;
	if (slash1) {
		e->tree = new_tree_content(8);
1200
		e->versions[1].mode = S_IFDIR;
1201 1202 1203
		tree_content_set(e, slash1 + 1, sha1, mode);
	} else {
		e->tree = NULL;
1204 1205
		e->versions[1].mode = mode;
		hashcpy(e->versions[1].sha1, sha1);
1206
	}
1207
	hashclr(root->versions[1].sha1);
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
	return 1;
}

static int tree_content_remove(struct tree_entry *root, const char *p)
{
	struct tree_content *t = root->tree;
	const char *slash1;
	unsigned int i, n;
	struct tree_entry *e;

	slash1 = strchr(p, '/');
	if (slash1)
		n = slash1 - p;
	else
		n = strlen(p);

	for (i = 0; i < t->entry_count; i++) {
		e = t->entries[i];
		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
1227
			if (!slash1 || !S_ISDIR(e->versions[1].mode))
1228 1229 1230 1231
				goto del_entry;
			if (!e->tree)
				load_tree(e);
			if (tree_content_remove(e, slash1 + 1)) {
1232 1233 1234 1235 1236 1237 1238
				for (n = 0; n < e->tree->entry_count; n++) {
					if (e->tree->entries[n]->versions[1].mode) {
						hashclr(root->versions[1].sha1);
						return 1;
					}
				}
				goto del_entry;
1239 1240 1241 1242 1243 1244 1245
			}
			return 0;
		}
	}
	return 0;

del_entry:
1246 1247 1248 1249 1250 1251 1252
	if (e->tree) {
		release_tree_content_recursive(e->tree);
		e->tree = NULL;
	}
	e->versions[1].mode = 0;
	hashclr(e->versions[1].sha1);
	hashclr(root->versions[1].sha1);
1253
	return 1;
1254 1255
}

1256 1257 1258 1259 1260 1261 1262 1263 1264
static void dump_branches()
{
	static const char *msg = "fast-import";
	unsigned int i;
	struct branch *b;
	struct ref_lock *lock;

	for (i = 0; i < branch_table_sz; i++) {
		for (b = branch_table[i]; b; b = b->table_next_branch) {
1265
			lock = lock_any_ref_for_update(b->name, NULL);
1266 1267 1268 1269 1270 1271
			if (!lock || write_ref_sha1(lock, b->sha1, msg) < 0)
				die("Can't write %s", b->name);
		}
	}
}

1272 1273 1274 1275 1276 1277 1278 1279 1280
static void dump_tags()
{
	static const char *msg = "fast-import";
	struct tag *t;
	struct ref_lock *lock;
	char path[PATH_MAX];

	for (t = first_tag; t; t = t->next_tag) {
		sprintf(path, "refs/tags/%s", t->name);
1281
		lock = lock_any_ref_for_update(path, NULL);
1282 1283 1284 1285 1286
		if (!lock || write_ref_sha1(lock, t->sha1, msg) < 0)
			die("Can't write %s", path);
	}
}

1287
static void dump_marks_helper(FILE *f,
1288
	uintmax_t base,
1289 1290
	struct mark_set *m)
{
1291
	uintmax_t k;
1292 1293 1294 1295 1296 1297 1298 1299 1300
	if (m->shift) {
		for (k = 0; k < 1024; k++) {
			if (m->data.sets[k])
				dump_marks_helper(f, (base + k) << m->shift,
					m->data.sets[k]);
		}
	} else {
		for (k = 0; k < 1024; k++) {
			if (m->data.marked[k])
1301
				fprintf(f, ":%ju %s\n", base + k,
1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316
					sha1_to_hex(m->data.marked[k]->sha1));
		}
	}
}

static void dump_marks()
{
	if (mark_file)
	{
		FILE *f = fopen(mark_file, "w");
		dump_marks_helper(f, 0, marks);
		fclose(f);
	}
}

1317 1318 1319 1320 1321 1322 1323 1324
static void read_next_command()
{
	read_line(&command_buf, stdin, '\n');
}

static void cmd_mark()
{
	if (!strncmp("mark :", command_buf.buf, 6)) {
1325
		next_mark = strtoumax(command_buf.buf + 6, NULL, 10);
1326 1327 1328
		read_next_command();
	}
	else
1329
		next_mark = 0;
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357
}

static void* cmd_data (size_t *size)
{
	size_t n = 0;
	void *buffer;
	size_t length;

	if (strncmp("data ", command_buf.buf, 5))
		die("Expected 'data n' command, found: %s", command_buf.buf);

	length = strtoul(command_buf.buf + 5, NULL, 10);
	buffer = xmalloc(length);

	while (n < length) {
		size_t s = fread((char*)buffer + n, 1, length - n, stdin);
		if (!s && feof(stdin))
			die("EOF in data (%lu bytes remaining)", length - n);
		n += s;
	}

	if (fgetc(stdin) != '\n')
		die("An lf did not trail the binary data as expected.");

	*size = length;
	return buffer;
}

1358
static void cmd_new_blob()
1359
{
1360 1361
	size_t l;
	void *d;
1362 1363 1364

	read_next_command();
	cmd_mark();
1365
	d = cmd_data(&l);
1366

1367 1368
	if (store_object(OBJ_BLOB, d, l, &last_blob, NULL, next_mark))
		free(d);
1369 1370
}

1371
static void unload_one_branch()
1372
{
1373 1374
	while (cur_active_branches
		&& cur_active_branches >= max_active_branches) {
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
		unsigned long min_commit = ULONG_MAX;
		struct branch *e, *l = NULL, *p = NULL;

		for (e = active_branches; e; e = e->active_next_branch) {
			if (e->last_commit < min_commit) {
				p = l;
				min_commit = e->last_commit;
			}
			l = e;
		}

		if (p) {
			e = p->active_next_branch;
			p->active_next_branch = e->active_next_branch;
		} else {
			e = active_branches;
			active_branches = e->active_next_branch;
		}
		e->active_next_branch = NULL;
		if (e->branch_tree.tree) {
1395
			release_tree_content_recursive(e->branch_tree.tree);
1396 1397 1398
			e->branch_tree.tree = NULL;
		}
		cur_active_branches--;
1399 1400 1401
	}
}

1402
static void load_branch(struct branch *b)
1403
{
1404 1405 1406 1407
	load_tree(&b->branch_tree);
	b->active_next_branch = active_branches;
	active_branches = b;
	cur_active_branches++;
1408
	branch_load_count++;
1409 1410
}

1411
static void file_change_m(struct branch *b)
1412
{
1413 1414 1415
	const char *p = command_buf.buf + 2;
	char *p_uq;
	const char *endp;
1416
	struct object_entry *oe;
1417
	unsigned char sha1[20];
1418
	unsigned int mode;
1419
	char type[20];
1420

1421 1422 1423 1424 1425 1426
	p = get_mode(p, &mode);
	if (!p)
		die("Corrupt mode: %s", command_buf.buf);
	switch (mode) {
	case S_IFREG | 0644:
	case S_IFREG | 0755:
1427
	case S_IFLNK:
1428 1429 1430 1431 1432 1433 1434 1435
	case 0644:
	case 0755:
		/* ok */
		break;
	default:
		die("Corrupt mode: %s", command_buf.buf);
	}

1436 1437
	if (*p == ':') {
		char *x;
1438
		oe = find_mark(strtoumax(p + 1, &x, 10));
1439
		hashcpy(sha1, oe->sha1);
1440 1441 1442 1443 1444 1445 1446
		p = x;
	} else {
		if (get_sha1_hex(p, sha1))
			die("Invalid SHA1: %s", command_buf.buf);
		oe = find_object(sha1);
		p += 40;
	}
1447 1448 1449 1450 1451 1452 1453 1454 1455
	if (*p++ != ' ')
		die("Missing space after SHA1: %s", command_buf.buf);

	p_uq = unquote_c_style(p, &endp);
	if (p_uq) {
		if (*endp)
			die("Garbage after path in: %s", command_buf.buf);
		p = p_uq;
	}
1456

1457 1458
	if (oe) {
		if (oe->type != OBJ_BLOB)
1459 1460
			die("Not a blob (actually a %s): %s",
				command_buf.buf, type_names[oe->type]);
1461 1462
	} else {
		if (sha1_object_info(sha1, type, NULL))
1463
			die("Blob not found: %s", command_buf.buf);
1464
		if (strcmp(blob_type, type))
1465 1466
			die("Not a blob (actually a %s): %s",
				command_buf.buf, type);
1467
	}
1468

1469 1470 1471 1472
	tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode);

	if (p_uq)
		free(p_uq);
1473
}
1474

1475 1476
static void file_change_d(struct branch *b)
{
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
	const char *p = command_buf.buf + 2;
	char *p_uq;
	const char *endp;

	p_uq = unquote_c_style(p, &endp);
	if (p_uq) {
		if (*endp)
			die("Garbage after path in: %s", command_buf.buf);
		p = p_uq;
	}
	tree_content_remove(&b->branch_tree, p);
	if (p_uq)
		free(p_uq);
1490 1491
}

1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
static void cmd_from(struct branch *b)
{
	const char *from, *endp;
	char *str_uq;
	struct branch *s;

	if (strncmp("from ", command_buf.buf, 5))
		return;

	if (b->last_commit)
		die("Can't reinitailize branch %s", b->name);

	from = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(from, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after string in: %s", command_buf.buf);
		from = str_uq;
	}

	s = lookup_branch(from);
	if (b == s)
		die("Can't create a branch from itself: %s", b->name);
	else if (s) {
1516
		unsigned char *t = s->branch_tree.versions[1].sha1;
1517
		hashcpy(b->sha1, s->sha1);
1518 1519
		hashcpy(b->branch_tree.versions[0].sha1, t);
		hashcpy(b->branch_tree.versions[1].sha1, t);
1520
	} else if (*from == ':') {
1521
		uintmax_t idnum = strtoumax(from + 1, NULL, 10);
1522 1523 1524 1525
		struct object_entry *oe = find_mark(idnum);
		unsigned long size;
		char *buf;
		if (oe->type != OBJ_COMMIT)
1526
			die("Mark :%ju not a commit", idnum);
1527
		hashcpy(b->sha1, oe->sha1);
1528
		buf = gfi_unpack_entry(oe, &size);
1529 1530 1531
		if (!buf || size < 46)
			die("Not a valid commit: %s", from);
		if (memcmp("tree ", buf, 5)
1532
			|| get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
1533 1534
			die("The commit %s is corrupt", sha1_to_hex(b->sha1));
		free(buf);
1535 1536
		hashcpy(b->branch_tree.versions[0].sha1,
			b->branch_tree.versions[1].sha1);
1537
	} else if (!get_sha1(from, b->sha1)) {
1538 1539 1540 1541
		if (is_null_sha1(b->sha1)) {
			hashclr(b->branch_tree.versions[0].sha1);
			hashclr(b->branch_tree.versions[1].sha1);
		} else {
1542 1543 1544 1545 1546 1547 1548 1549
			unsigned long size;
			char *buf;

			buf = read_object_with_reference(b->sha1,
				type_names[OBJ_COMMIT], &size, b->sha1);
			if (!buf || size < 46)
				die("Not a valid commit: %s", from);
			if (memcmp("tree ", buf, 5)
1550
				|| get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
1551 1552
				die("The commit %s is corrupt", sha1_to_hex(b->sha1));
			free(buf);
1553 1554
			hashcpy(b->branch_tree.versions[0].sha1,
				b->branch_tree.versions[1].sha1);
1555 1556 1557 1558 1559 1560 1561
		}
	} else
		die("Invalid ref name or SHA1 expression: %s", from);

	read_next_command();
}

1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
static struct hash_list* cmd_merge(unsigned int *count)
{
	struct hash_list *list = NULL, *n, *e;
	const char *from, *endp;
	char *str_uq;
	struct branch *s;

	*count = 0;
	while (!strncmp("merge ", command_buf.buf, 6)) {
		from = strchr(command_buf.buf, ' ') + 1;
		str_uq = unquote_c_style(from, &endp);
		if (str_uq) {
			if (*endp)
				die("Garbage after string in: %s", command_buf.buf);
			from = str_uq;
		}

		n = xmalloc(sizeof(*n));
		s = lookup_branch(from);
		if (s)
			hashcpy(n->sha1, s->sha1);
		else if (*from == ':') {
1584
			uintmax_t idnum = strtoumax(from + 1, NULL, 10);
1585 1586
			struct object_entry *oe = find_mark(idnum);
			if (oe->type != OBJ_COMMIT)
1587
				die("Mark :%ju not a commit", idnum);
1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
			hashcpy(n->sha1, oe->sha1);
		} else if (get_sha1(from, n->sha1))
			die("Invalid ref name or SHA1 expression: %s", from);

		n->next = NULL;
		if (list)
			e->next = n;
		else
			list = n;
		e = n;
		*count++;
		read_next_command();
	}
	return list;
}

1604
static void cmd_new_commit()
1605
{
1606 1607 1608 1609 1610 1611 1612 1613
	struct branch *b;
	void *msg;
	size_t msglen;
	char *str_uq;
	const char *endp;
	char *sp;
	char *author = NULL;
	char *committer = NULL;
1614 1615
	struct hash_list *merge_list = NULL;
	unsigned int merge_count;
1616 1617 1618 1619 1620 1621 1622 1623 1624 1625

	/* Obtain the branch name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after ref in: %s", command_buf.buf);
		sp = str_uq;
	}
	b = lookup_branch(sp);
1626
	if (!b)
1627
		b = new_branch(sp);
1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643
	if (str_uq)
		free(str_uq);

	read_next_command();
	cmd_mark();
	if (!strncmp("author ", command_buf.buf, 7)) {
		author = strdup(command_buf.buf);
		read_next_command();
	}
	if (!strncmp("committer ", command_buf.buf, 10)) {
		committer = strdup(command_buf.buf);
		read_next_command();
	}
	if (!committer)
		die("Expected committer but didn't get one");
	msg = cmd_data(&msglen);
1644 1645
	read_next_command();
	cmd_from(b);
1646
	merge_list = cmd_merge(&merge_count);
1647 1648

	/* ensure the branch is active/loaded */
1649
	if (!b->branch_tree.tree || !max_active_branches) {
1650 1651 1652
		unload_one_branch();
		load_branch(b);
	}
1653

1654 1655
	/* file_change* */
	for (;;) {
1656
		if (1 == command_buf.len)
1657
			break;
1658
		else if (!strncmp("M ", command_buf.buf, 2))
1659
			file_change_m(b);
1660
		else if (!strncmp("D ", command_buf.buf, 2))
1661 1662
			file_change_d(b);
		else
1663
			die("Unsupported file_change: %s", command_buf.buf);
1664
		read_next_command();
1665 1666
	}

1667
	/* build the tree and the commit */
1668
	store_tree(&b->branch_tree);
1669 1670
	hashcpy(b->branch_tree.versions[0].sha1,
		b->branch_tree.versions[1].sha1);
1671
	size_dbuf(&new_data, 97 + msglen
1672
		+ merge_count * 49
1673 1674 1675
		+ (author
			? strlen(author) + strlen(committer)
			: 2 * strlen(committer)));
1676
	sp = new_data.buffer;
1677 1678
	sp += sprintf(sp, "tree %s\n",
		sha1_to_hex(b->branch_tree.versions[1].sha1));
1679
	if (!is_null_sha1(b->sha1))
1680
		sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
1681 1682 1683 1684 1685 1686
	while (merge_list) {
		struct hash_list *next = merge_list->next;
		sp += sprintf(sp, "parent %s\n", sha1_to_hex(merge_list->sha1));
		free(merge_list);
		merge_list = next;
	}
1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698
	if (author)
		sp += sprintf(sp, "%s\n", author);
	else
		sp += sprintf(sp, "author %s\n", committer + 10);
	sp += sprintf(sp, "%s\n\n", committer);
	memcpy(sp, msg, msglen);
	sp += msglen;
	if (author)
		free(author);
	free(committer);
	free(msg);

1699 1700 1701
	store_object(OBJ_COMMIT,
		new_data.buffer, sp - (char*)new_data.buffer,
		NULL, b->sha1, next_mark);
1702
	b->last_commit = object_count_by_type[OBJ_COMMIT];
1703
	b->pack_id = pack_id;
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713

	if (branch_log) {
		int need_dq = quote_c_style(b->name, NULL, NULL, 0);
		fprintf(branch_log, "commit ");
		if (need_dq) {
			fputc('"', branch_log);
			quote_c_style(b->name, NULL, branch_log, 0);
			fputc('"', branch_log);
		} else
			fprintf(branch_log, "%s", b->name);
1714
		fprintf(branch_log," :%ju %s\n",next_mark,sha1_to_hex(b->sha1));
1715
	}
1716 1717
}

1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
static void cmd_new_tag()
{
	char *str_uq;
	const char *endp;
	char *sp;
	const char *from;
	char *tagger;
	struct branch *s;
	void *msg;
	size_t msglen;
	struct tag *t;
1729
	uintmax_t from_mark = 0;
1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765
	unsigned char sha1[20];

	/* Obtain the new tag name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after tag name in: %s", command_buf.buf);
		sp = str_uq;
	}
	t = pool_alloc(sizeof(struct tag));
	t->next_tag = NULL;
	t->name = pool_strdup(sp);
	if (last_tag)
		last_tag->next_tag = t;
	else
		first_tag = t;
	last_tag = t;
	if (str_uq)
		free(str_uq);
	read_next_command();

	/* from ... */
	if (strncmp("from ", command_buf.buf, 5))
		die("Expected from command, got %s", command_buf.buf);

	from = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(from, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after string in: %s", command_buf.buf);
		from = str_uq;
	}

	s = lookup_branch(from);
	if (s) {
1766
		hashcpy(sha1, s->sha1);
1767
	} else if (*from == ':') {
1768
		from_mark = strtoumax(from + 1, NULL, 10);
1769
		struct object_entry *oe = find_mark(from_mark);
1770
		if (oe->type != OBJ_COMMIT)
1771
			die("Mark :%ju not a commit", from_mark);
1772
		hashcpy(sha1, oe->sha1);
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798
	} else if (!get_sha1(from, sha1)) {
		unsigned long size;
		char *buf;

		buf = read_object_with_reference(sha1,
			type_names[OBJ_COMMIT], &size, sha1);
		if (!buf || size < 46)
			die("Not a valid commit: %s", from);
		free(buf);
	} else
		die("Invalid ref name or SHA1 expression: %s", from);

	if (str_uq)
		free(str_uq);
	read_next_command();

	/* tagger ... */
	if (strncmp("tagger ", command_buf.buf, 7))
		die("Expected tagger command, got %s", command_buf.buf);
	tagger = strdup(command_buf.buf);

	/* tag payload/message */
	read_next_command();
	msg = cmd_data(&msglen);

	/* build the tag object */
1799 1800
	size_dbuf(&new_data, 67+strlen(t->name)+strlen(tagger)+msglen);
	sp = new_data.buffer;
1801 1802 1803 1804 1805 1806 1807 1808 1809
	sp += sprintf(sp, "object %s\n", sha1_to_hex(sha1));
	sp += sprintf(sp, "type %s\n", type_names[OBJ_COMMIT]);
	sp += sprintf(sp, "tag %s\n", t->name);
	sp += sprintf(sp, "%s\n\n", tagger);
	memcpy(sp, msg, msglen);
	sp += msglen;
	free(tagger);
	free(msg);

1810 1811
	store_object(OBJ_TAG, new_data.buffer, sp - (char*)new_data.buffer,
		NULL, t->sha1, 0);
1812
	t->pack_id = pack_id;
1813 1814 1815 1816 1817 1818 1819 1820 1821 1822

	if (branch_log) {
		int need_dq = quote_c_style(t->name, NULL, NULL, 0);
		fprintf(branch_log, "tag ");
		if (need_dq) {
			fputc('"', branch_log);
			quote_c_style(t->name, NULL, branch_log, 0);
			fputc('"', branch_log);
		} else
			fprintf(branch_log, "%s", t->name);
1823
		fprintf(branch_log," :%ju %s\n",from_mark,sha1_to_hex(t->sha1));
1824
	}
1825 1826
}

1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849
static void cmd_reset_branch()
{
	struct branch *b;
	char *str_uq;
	const char *endp;
	char *sp;

	/* Obtain the branch name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after ref in: %s", command_buf.buf);
		sp = str_uq;
	}
	b = lookup_branch(sp);
	if (b) {
		b->last_commit = 0;
		if (b->branch_tree.tree) {
			release_tree_content_recursive(b->branch_tree.tree);
			b->branch_tree.tree = NULL;
		}
	}
1850 1851
	else
		b = new_branch(sp);
1852 1853
	if (str_uq)
		free(str_uq);
1854 1855
	read_next_command();
	cmd_from(b);
1856 1857
}

1858 1859
static void cmd_checkpoint()
{
1860 1861
	if (object_count)
		checkpoint();
1862 1863 1864
	read_next_command();
}

1865
static const char fast_import_usage[] =
1866
"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log]";
1867

1868 1869
int main(int argc, const char **argv)
{
1870
	int i;
1871
	uintmax_t est_obj_cnt = object_entry_alloc;
1872
	uintmax_t total_count, duplicate_count;
1873

1874 1875 1876
	setup_ident();
	git_config(git_default_config);

1877 1878 1879 1880 1881 1882
	for (i = 1; i < argc; i++) {
		const char *a = argv[i];

		if (*a != '-' || !strcmp(a, "--"))
			break;
		else if (!strncmp(a, "--objects=", 10))
1883
			est_obj_cnt = strtoumax(a + 10, NULL, 0);
1884
		else if (!strncmp(a, "--max-objects-per-pack=", 23))
1885
			max_objects = strtoumax(a + 23, NULL, 0);
1886
		else if (!strncmp(a, "--max-pack-size=", 16))
1887
			max_packsize = strtoumax(a + 16, NULL, 0) * 1024 * 1024;
1888 1889 1890 1891
		else if (!strncmp(a, "--depth=", 8))
			max_depth = strtoul(a + 8, NULL, 0);
		else if (!strncmp(a, "--active-branches=", 18))
			max_active_branches = strtoul(a + 18, NULL, 0);
1892 1893
		else if (!strncmp(a, "--export-marks=", 15))
			mark_file = a + 15;
1894 1895 1896 1897 1898
		else if (!strncmp(a, "--branch-log=", 13)) {
			branch_log = fopen(a + 13, "w");
			if (!branch_log)
				die("Can't create %s: %s", a + 13, strerror(errno));
		}
1899 1900 1901
		else
			die("unknown option %s", a);
	}
1902
	if (i != argc)
1903 1904
		usage(fast_import_usage);

1905
	alloc_objects(est_obj_cnt);
1906
	strbuf_init(&command_buf);
1907 1908 1909 1910

	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
1911
	marks = pool_calloc(1, sizeof(struct mark_set));
1912

1913
	start_packfile();
1914
	for (;;) {
1915 1916
		read_next_command();
		if (command_buf.eof)
1917
			break;
1918 1919 1920 1921
		else if (!strcmp("blob", command_buf.buf))
			cmd_new_blob();
		else if (!strncmp("commit ", command_buf.buf, 7))
			cmd_new_commit();
1922 1923
		else if (!strncmp("tag ", command_buf.buf, 4))
			cmd_new_tag();
1924 1925
		else if (!strncmp("reset ", command_buf.buf, 6))
			cmd_reset_branch();
1926 1927
		else if (!strcmp("checkpoint", command_buf.buf))
			cmd_checkpoint();
1928 1929
		else
			die("Unsupported command: %s", command_buf.buf);
1930
	}
1931
	end_packfile();
1932

1933
	dump_branches();
1934
	dump_tags();
1935
	unkeep_all_packs();
1936
	dump_marks();
1937 1938
	if (branch_log)
		fclose(branch_log);
1939

1940 1941 1942
	total_count = 0;
	for (i = 0; i < ARRAY_SIZE(object_count_by_type); i++)
		total_count += object_count_by_type[i];
1943
	duplicate_count = 0;
1944 1945 1946
	for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
		duplicate_count += duplicate_count_by_type[i];

1947
	fprintf(stderr, "%s statistics:\n", argv[0]);
1948
	fprintf(stderr, "---------------------------------------------------------------------\n");
1949
	fprintf(stderr, "Alloc'd objects: %10ju (%10ju overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
1950
	fprintf(stderr, "Total objects:   %10ju (%10ju duplicates                  )\n", total_count, duplicate_count);
1951 1952 1953 1954
	fprintf(stderr, "      blobs  :   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
	fprintf(stderr, "      trees  :   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
	fprintf(stderr, "      commits:   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
	fprintf(stderr, "      tags   :   %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
1955
	fprintf(stderr, "Total branches:  %10lu (%10lu loads     )\n", branch_count, branch_load_count);
1956
	fprintf(stderr, "      marks:     %10ju (%10ju unique    )\n", (((uintmax_t)1) << marks->shift) * 1024, marks_set_count);
1957
	fprintf(stderr, "      atoms:     %10u\n", atom_cnt);
1958
	fprintf(stderr, "Memory total:    %10ju KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
1959
	fprintf(stderr, "       pools:    %10lu KiB\n", total_allocd/1024);
1960
	fprintf(stderr, "     objects:    %10ju KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
1961
	fprintf(stderr, "---------------------------------------------------------------------\n");
1962 1963
	pack_report();
	fprintf(stderr, "---------------------------------------------------------------------\n");
1964
	fprintf(stderr, "\n");
1965 1966 1967

	return 0;
}