fast-import.c 43.5 KB
Newer Older
1 2 3 4 5 6
/*
Format of STDIN stream:

  stream ::= cmd*;

  cmd ::= new_blob
7
        | new_commit
8
        | new_tag
9
        | reset_branch
10 11
        ;

12 13 14 15
  new_blob ::= 'blob' lf
	mark?
    file_content;
  file_content ::= data;
16

17
  new_commit ::= 'commit' sp ref_str lf
18 19 20 21
    mark?
    ('author' sp name '<' email '>' ts tz lf)?
    'committer' sp name '<' email '>' ts tz lf
    commit_msg
22
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
23 24 25
    file_change*
    lf;
  commit_msg ::= data;
26

27 28
  file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
                | 'D' sp path_str lf
29
                ;
30 31 32 33 34 35 36 37
  mode ::= '644' | '755';

  new_tag ::= 'tag' sp tag_str lf
    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
	'tagger' sp name '<' email '>' ts tz lf
    tag_msg;
  tag_msg ::= data;

38 39
  reset_branch ::= 'reset' sp ref_str lf;

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
     # note: the first idnum in a stream should be 1 and subsequent
     # idnums should not have gaps between values as this will cause
     # the stream parser to reserve space for the gapped values.  An
	 # idnum can be updated in the future to a new object by issuing
     # a new mark directive with the old idnum.
	 #
  mark ::= 'mark' sp idnum lf;

     # note: declen indicates the length of binary_data in bytes.
     # declen does not include the lf preceeding or trailing the
     # binary data.
     #
  data ::= 'data' sp declen lf
    binary_data
	lf;

     # note: quoted strings are C-style quoting supporting \c for
     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
	 # is the signed byte value in octal.  Note that the only
     # characters which must actually be escaped to protect the
     # stream formatting is: \, " and LF.  Otherwise these values
	 # are UTF8.
     #
  ref_str     ::= ref     | '"' quoted(ref)     '"' ;
  sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
  tag_str     ::= tag     | '"' quoted(tag)     '"' ;
  path_str    ::= path    | '"' quoted(path)    '"' ;

  declen ::= # unsigned 32 bit value, ascii base10 notation;
69
  binary_data ::= # file content, not interpreted;
70

71 72
  sp ::= # ASCII space character;
  lf ::= # ASCII newline (LF) character;
73 74 75 76 77 78 79 80 81

     # note: a colon (':') must precede the numerical value assigned to
	 # an idnum.  This is to distinguish it from a ref or tag name as
     # GIT does not permit ':' in ref or tag strings.
	 #
  idnum   ::= ':' declen;
  path    ::= # GIT style file path, e.g. "a/b/c";
  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
82 83
  sha1exp ::= # Any valid GIT SHA1 expression;
  hexsha1 ::= # SHA1 in hexadecimal format;
84 85 86 87 88 89

     # note: name and email are UTF8 strings, however name must not
	 # contain '<' or lf and email must not contain any of the
     # following: '<', '>', lf.
	 #
  name  ::= # valid GIT author/committer name;
90
  email ::= # valid GIT author/committer email;
91 92
  ts    ::= # time since the epoch in seconds, ascii base10 notation;
  tz    ::= # GIT style timezone;
93 94
*/

95 96 97 98
#include "builtin.h"
#include "cache.h"
#include "object.h"
#include "blob.h"
99
#include "tree.h"
100 101
#include "delta.h"
#include "pack.h"
102
#include "refs.h"
103
#include "csum-file.h"
104 105
#include "strbuf.h"
#include "quote.h"
106

107 108 109
struct object_entry
{
	struct object_entry *next;
110
	enum object_type type;
111 112 113 114
	unsigned long offset;
	unsigned char sha1[20];
};

115
struct object_entry_pool
116
{
117
	struct object_entry_pool *next_pool;
118 119
	struct object_entry *next_free;
	struct object_entry *end;
120
	struct object_entry entries[FLEX_ARRAY]; /* more */
121 122
};

123 124 125 126 127 128 129 130 131
struct mark_set
{
	int shift;
	union {
		struct object_entry *marked[1024];
		struct mark_set *sets[1024];
	} data;
};

132 133 134
struct last_object
{
	void *data;
135
	unsigned long len;
136
	unsigned int depth;
137 138 139
	unsigned char sha1[20];
};

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
struct mem_pool
{
	struct mem_pool *next_pool;
	char *next_free;
	char *end;
	char space[FLEX_ARRAY]; /* more */
};

struct atom_str
{
	struct atom_str *next_atom;
	int str_len;
	char str_dat[FLEX_ARRAY]; /* more */
};

struct tree_content;
156 157
struct tree_entry
{
158 159
	struct tree_content *tree;
	struct atom_str* name;
160 161 162 163 164
	struct tree_entry_ms
	{
		unsigned int mode;
		unsigned char sha1[20];
	} versions[2];
165 166
};

167
struct tree_content
168
{
169 170
	unsigned int entry_capacity; /* must match avail_tree_content */
	unsigned int entry_count;
171
	unsigned int delta_depth;
172 173 174 175 176 177 178
	struct tree_entry *entries[FLEX_ARRAY]; /* more */
};

struct avail_tree_content
{
	unsigned int entry_capacity; /* must match tree_content */
	struct avail_tree_content *next_avail;
179 180 181 182
};

struct branch
{
183 184
	struct branch *table_next_branch;
	struct branch *active_next_branch;
185
	const char *name;
186 187 188
	unsigned long last_commit;
	struct tree_entry branch_tree;
	unsigned char sha1[20];
189 190
};

191 192 193 194 195 196 197
struct tag
{
	struct tag *next_tag;
	const char *name;
	unsigned char sha1[20];
};

198 199

/* Stats and misc. counters */
200
static unsigned long max_depth = 10;
201
static unsigned long alloc_count;
202
static unsigned long branch_count;
203
static unsigned long branch_load_count;
204
static unsigned long remap_count;
205
static unsigned long object_count;
206
static unsigned long duplicate_count;
207
static unsigned long marks_set_count;
208 209
static unsigned long object_count_by_type[9];
static unsigned long duplicate_count_by_type[9];
210
static unsigned long delta_count_by_type[9];
211

212 213 214 215 216
/* Memory pools */
static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static size_t total_allocd;
static struct mem_pool *mem_pool;

217
/* Atom management */
218 219 220 221 222
static unsigned int atom_table_sz = 4451;
static unsigned int atom_cnt;
static struct atom_str **atom_table;

/* The .pack file being generated */
223
static int pack_fd;
224
static unsigned long pack_size;
225
static unsigned char pack_sha1[20];
226 227 228 229
static unsigned char* pack_base;
static unsigned long pack_moff;
static unsigned long pack_mlen = 128*1024*1024;
static unsigned long page_size;
230 231

/* Table of objects we've written. */
232
static unsigned int object_entry_alloc = 5000;
233 234
static struct object_entry_pool *blocks;
static struct object_entry *object_table[1 << 16];
235
static struct mark_set *marks;
236
static const char* mark_file;
237 238

/* Our last blob */
239 240 241 242 243 244 245
static struct last_object last_blob;

/* Tree management */
static unsigned int tree_entry_alloc = 1000;
static void *avail_tree_entry;
static unsigned int avail_tree_table_sz = 100;
static struct avail_tree_content **avail_tree_table;
246

247
/* Branch data */
248 249 250
static unsigned long max_active_branches = 5;
static unsigned long cur_active_branches;
static unsigned long branch_table_sz = 1039;
251 252 253
static struct branch **branch_table;
static struct branch *active_branches;

254 255 256 257
/* Tag data */
static struct tag *first_tag;
static struct tag *last_tag;

258 259
/* Input stream parsing */
static struct strbuf command_buf;
260
static unsigned long next_mark;
261
static FILE* branch_log;
262

263

264
static void alloc_objects(int cnt)
265
{
266
	struct object_entry_pool *b;
267

268
	b = xmalloc(sizeof(struct object_entry_pool)
269
		+ cnt * sizeof(struct object_entry));
270
	b->next_pool = blocks;
271 272 273 274 275
	b->next_free = b->entries;
	b->end = b->entries + cnt;
	blocks = b;
	alloc_count += cnt;
}
276

277
static struct object_entry* new_object(unsigned char *sha1)
278
{
279
	struct object_entry *e;
280

281
	if (blocks->next_free == blocks->end)
282
		alloc_objects(object_entry_alloc);
283

284
	e = blocks->next_free++;
285
	hashcpy(e->sha1, sha1);
286
	return e;
287 288
}

289 290 291 292 293
static struct object_entry* find_object(unsigned char *sha1)
{
	unsigned int h = sha1[0] << 8 | sha1[1];
	struct object_entry *e;
	for (e = object_table[h]; e; e = e->next)
294
		if (!hashcmp(sha1, e->sha1))
295 296 297 298
			return e;
	return NULL;
}

299 300 301
static struct object_entry* insert_object(unsigned char *sha1)
{
	unsigned int h = sha1[0] << 8 | sha1[1];
302
	struct object_entry *e = object_table[h];
303
	struct object_entry *p = NULL;
304 305

	while (e) {
306
		if (!hashcmp(sha1, e->sha1))
307 308 309 310 311 312
			return e;
		p = e;
		e = e->next;
	}

	e = new_object(sha1);
313
	e->next = NULL;
314 315 316 317
	e->offset = 0;
	if (p)
		p->next = e;
	else
318
		object_table[h] = e;
319 320
	return e;
}
321

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
static unsigned int hc_str(const char *s, size_t len)
{
	unsigned int r = 0;
	while (len-- > 0)
		r = r * 31 + *s++;
	return r;
}

static void* pool_alloc(size_t len)
{
	struct mem_pool *p;
	void *r;

	for (p = mem_pool; p; p = p->next_pool)
		if ((p->end - p->next_free >= len))
			break;

	if (!p) {
		if (len >= (mem_pool_alloc/2)) {
			total_allocd += len;
			return xmalloc(len);
		}
		total_allocd += sizeof(struct mem_pool) + mem_pool_alloc;
		p = xmalloc(sizeof(struct mem_pool) + mem_pool_alloc);
		p->next_pool = mem_pool;
		p->next_free = p->space;
		p->end = p->next_free + mem_pool_alloc;
		mem_pool = p;
	}

	r = p->next_free;
353 354 355
	/* round out to a pointer alignment */
	if (len & (sizeof(void*) - 1))
		len += sizeof(void*) - (len & (sizeof(void*) - 1));
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
	p->next_free += len;
	return r;
}

static void* pool_calloc(size_t count, size_t size)
{
	size_t len = count * size;
	void *r = pool_alloc(len);
	memset(r, 0, len);
	return r;
}

static char* pool_strdup(const char *s)
{
	char *r = pool_alloc(strlen(s) + 1);
	strcpy(r, s);
	return r;
}

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
static void insert_mark(unsigned long idnum, struct object_entry *oe)
{
	struct mark_set *s = marks;
	while ((idnum >> s->shift) >= 1024) {
		s = pool_calloc(1, sizeof(struct mark_set));
		s->shift = marks->shift + 10;
		s->data.sets[0] = marks;
		marks = s;
	}
	while (s->shift) {
		unsigned long i = idnum >> s->shift;
		idnum -= i << s->shift;
		if (!s->data.sets[i]) {
			s->data.sets[i] = pool_calloc(1, sizeof(struct mark_set));
			s->data.sets[i]->shift = s->shift - 10;
		}
		s = s->data.sets[i];
	}
	if (!s->data.marked[idnum])
		marks_set_count++;
	s->data.marked[idnum] = oe;
}

static struct object_entry* find_mark(unsigned long idnum)
{
	unsigned long orig_idnum = idnum;
	struct mark_set *s = marks;
	struct object_entry *oe = NULL;
	if ((idnum >> s->shift) < 1024) {
		while (s && s->shift) {
			unsigned long i = idnum >> s->shift;
			idnum -= i << s->shift;
			s = s->data.sets[i];
		}
		if (s)
			oe = s->data.marked[idnum];
	}
	if (!oe)
		die("mark :%lu not declared", orig_idnum);
	return oe;
}

417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
static struct atom_str* to_atom(const char *s, size_t len)
{
	unsigned int hc = hc_str(s, len) % atom_table_sz;
	struct atom_str *c;

	for (c = atom_table[hc]; c; c = c->next_atom)
		if (c->str_len == len && !strncmp(s, c->str_dat, len))
			return c;

	c = pool_alloc(sizeof(struct atom_str) + len + 1);
	c->str_len = len;
	strncpy(c->str_dat, s, len);
	c->str_dat[len] = 0;
	c->next_atom = atom_table[hc];
	atom_table[hc] = c;
	atom_cnt++;
	return c;
}

static struct branch* lookup_branch(const char *name)
{
	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
	struct branch *b;

	for (b = branch_table[hc]; b; b = b->table_next_branch)
		if (!strcmp(name, b->name))
			return b;
	return NULL;
}

static struct branch* new_branch(const char *name)
{
	unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
	struct branch* b = lookup_branch(name);

	if (b)
		die("Invalid attempt to create duplicate branch: %s", name);
454 455
	if (check_ref_format(name))
		die("Branch name doesn't conform to GIT standards: %s", name);
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493

	b = pool_calloc(1, sizeof(struct branch));
	b->name = pool_strdup(name);
	b->table_next_branch = branch_table[hc];
	branch_table[hc] = b;
	branch_count++;
	return b;
}

static unsigned int hc_entries(unsigned int cnt)
{
	cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
	return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
}

static struct tree_content* new_tree_content(unsigned int cnt)
{
	struct avail_tree_content *f, *l = NULL;
	struct tree_content *t;
	unsigned int hc = hc_entries(cnt);

	for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail)
		if (f->entry_capacity >= cnt)
			break;

	if (f) {
		if (l)
			l->next_avail = f->next_avail;
		else
			avail_tree_table[hc] = f->next_avail;
	} else {
		cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
		f = pool_alloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
		f->entry_capacity = cnt;
	}

	t = (struct tree_content*)f;
	t->entry_count = 0;
494
	t->delta_depth = 0;
495 496 497 498 499 500 501 502
	return t;
}

static void release_tree_entry(struct tree_entry *e);
static void release_tree_content(struct tree_content *t)
{
	struct avail_tree_content *f = (struct avail_tree_content*)t;
	unsigned int hc = hc_entries(f->entry_capacity);
503 504 505 506 507 508
	f->next_avail = avail_tree_table[hc];
	avail_tree_table[hc] = f;
}

static void release_tree_content_recursive(struct tree_content *t)
{
509 510 511
	unsigned int i;
	for (i = 0; i < t->entry_count; i++)
		release_tree_entry(t->entries[i]);
512
	release_tree_content(t);
513 514 515 516 517 518 519 520
}

static struct tree_content* grow_tree_content(
	struct tree_content *t,
	int amt)
{
	struct tree_content *r = new_tree_content(t->entry_count + amt);
	r->entry_count = t->entry_count;
521
	r->delta_depth = t->delta_depth;
522 523 524 525 526 527 528 529 530 531 532
	memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
	release_tree_content(t);
	return r;
}

static struct tree_entry* new_tree_entry()
{
	struct tree_entry *e;

	if (!avail_tree_entry) {
		unsigned int n = tree_entry_alloc;
533
		total_allocd += n * sizeof(struct tree_entry);
534
		avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
535
		while (n-- > 1) {
536 537 538
			*((void**)e) = e + 1;
			e++;
		}
539
		*((void**)e) = NULL;
540 541 542 543 544 545 546 547 548 549
	}

	e = avail_tree_entry;
	avail_tree_entry = *((void**)e);
	return e;
}

static void release_tree_entry(struct tree_entry *e)
{
	if (e->tree)
550
		release_tree_content_recursive(e->tree);
551 552 553 554 555
	*((void**)e) = avail_tree_entry;
	avail_tree_entry = e;
}

static void yread(int fd, void *buffer, size_t length)
556 557 558 559
{
	ssize_t ret = 0;
	while (ret < length) {
		ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
560 561 562 563 564 565 566 567 568
		if (!size)
			die("Read from descriptor %i: end of stream", fd);
		if (size < 0)
			die("Read from descriptor %i: %s", fd, strerror(errno));
		ret += size;
	}
}

static void ywrite(int fd, void *buffer, size_t length)
569 570 571 572
{
	ssize_t ret = 0;
	while (ret < length) {
		ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
573 574 575 576
		if (!size)
			die("Write to descriptor %i: end of file", fd);
		if (size < 0)
			die("Write to descriptor %i: %s", fd, strerror(errno));
577 578 579 580
		ret += size;
	}
}

581
static size_t encode_header(
582
	enum object_type type,
583
	size_t size,
584
	unsigned char *hdr)
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
{
	int n = 1;
	unsigned char c;

	if (type < OBJ_COMMIT || type > OBJ_DELTA)
		die("bad type %d", type);

	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
		n++;
	}
	*hdr = c;
	return n;
}

604 605 606
static int store_object(
	enum object_type type,
	void *dat,
607
	size_t datlen,
608
	struct last_object *last,
609 610
	unsigned char *sha1out,
	unsigned long mark)
611 612
{
	void *out, *delta;
613 614 615
	struct object_entry *e;
	unsigned char hdr[96];
	unsigned char sha1[20];
616
	unsigned long hdrlen, deltalen;
617 618 619 620 621 622 623 624
	SHA_CTX c;
	z_stream s;

	hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
	SHA1_Init(&c);
	SHA1_Update(&c, hdr, hdrlen);
	SHA1_Update(&c, dat, datlen);
	SHA1_Final(sha1, &c);
625
	if (sha1out)
626
		hashcpy(sha1out, sha1);
627 628

	e = insert_object(sha1);
629 630
	if (mark)
		insert_mark(mark, e);
631 632
	if (e->offset) {
		duplicate_count++;
633
		duplicate_count_by_type[type]++;
634
		return 1;
635
	}
636
	e->type = type;
637
	e->offset = pack_size;
638
	object_count++;
639
	object_count_by_type[type]++;
640

641
	if (last && last->data && last->depth < max_depth)
642
		delta = diff_delta(last->data, last->len,
643 644
			dat, datlen,
			&deltalen, 0);
645
	else
646 647 648 649 650 651
		delta = 0;

	memset(&s, 0, sizeof(s));
	deflateInit(&s, zlib_compression_level);

	if (delta) {
652
		delta_count_by_type[type]++;
653
		last->depth++;
654 655 656
		s.next_in = delta;
		s.avail_in = deltalen;
		hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
657 658
		ywrite(pack_fd, hdr, hdrlen);
		ywrite(pack_fd, last->sha1, sizeof(sha1));
659
		pack_size += hdrlen + sizeof(sha1);
660
	} else {
661 662
		if (last)
			last->depth = 0;
663 664
		s.next_in = dat;
		s.avail_in = datlen;
665
		hdrlen = encode_header(type, datlen, hdr);
666
		ywrite(pack_fd, hdr, hdrlen);
667
		pack_size += hdrlen;
668 669 670 671 672 673 674 675
	}

	s.avail_out = deflateBound(&s, s.avail_in);
	s.next_out = out = xmalloc(s.avail_out);
	while (deflate(&s, Z_FINISH) == Z_OK)
		/* nothing */;
	deflateEnd(&s);

676
	ywrite(pack_fd, out, s.total_out);
677
	pack_size += s.total_out;
678 679 680 681

	free(out);
	if (delta)
		free(delta);
682 683 684 685 686
	if (last) {
		if (last->data)
			free(last->data);
		last->data = dat;
		last->len = datlen;
687
		hashcpy(last->sha1, sha1);
688 689 690 691
	}
	return 0;
}

692
static unsigned char* map_pack(unsigned long offset, unsigned int *left)
693 694 695
{
	if (offset >= pack_size)
		die("object offset outside of pack file");
696 697 698
	if (!pack_base
			|| offset < pack_moff
			|| (offset + 20) >= (pack_moff + pack_mlen)) {
699 700
		if (pack_base)
			munmap(pack_base, pack_mlen);
701 702 703
		pack_moff = (offset / page_size) * page_size;
		pack_base = mmap(NULL,pack_mlen,PROT_READ,MAP_SHARED,
			pack_fd,pack_moff);
704 705 706 707
		if (pack_base == MAP_FAILED)
			die("Failed to map generated pack: %s", strerror(errno));
		remap_count++;
	}
708 709 710 711
	offset -= pack_moff;
	if (left)
		*left = pack_mlen - offset;
	return pack_base + offset;
712 713 714 715 716 717 718 719 720 721
}

static unsigned long unpack_object_header(unsigned long offset,
	enum object_type *type,
	unsigned long *sizep)
{
	unsigned shift;
	unsigned char c;
	unsigned long size;

722
	c = *map_pack(offset++, NULL);
723 724 725 726
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
727
		c = *map_pack(offset++, NULL);
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
		size += (c & 0x7f) << shift;
		shift += 7;
	}
	*sizep = size;
	return offset;
}

static void *unpack_non_delta_entry(unsigned long o, unsigned long sz)
{
	z_stream stream;
	unsigned char *result;

	result = xmalloc(sz + 1);
	result[sz] = 0;

	memset(&stream, 0, sizeof(stream));
744
	stream.next_in = map_pack(o, &stream.avail_in);
745 746 747 748 749 750 751 752
	stream.next_out = result;
	stream.avail_out = sz;

	inflateInit(&stream);
	for (;;) {
		int st = inflate(&stream, Z_FINISH);
		if (st == Z_STREAM_END)
			break;
753 754 755
		if (st == Z_OK || st == Z_BUF_ERROR) {
			o = stream.next_in - pack_base + pack_moff;
			stream.next_in = map_pack(o, &stream.avail_in);
756 757
			continue;
		}
758
		die("Error %i from zlib during inflate.", st);
759 760 761 762 763 764 765
	}
	inflateEnd(&stream);
	if (stream.total_out != sz)
		die("Error after inflate: sizes mismatch");
	return result;
}

766 767 768
static void *unpack_entry(unsigned long offset,
	unsigned long *sizep,
	unsigned int *delta_depth);
769 770 771

static void *unpack_delta_entry(unsigned long offset,
	unsigned long delta_size,
772 773
	unsigned long *sizep,
	unsigned int *delta_depth)
774 775 776 777 778 779
{
	struct object_entry *base_oe;
	unsigned char *base_sha1;
	void *delta_data, *base, *result;
	unsigned long base_size, result_size;

780
	base_sha1 = map_pack(offset, NULL);
781 782 783
	base_oe = find_object(base_sha1);
	if (!base_oe)
		die("I'm broken; I can't find a base I know must be here.");
784
	base = unpack_entry(base_oe->offset, &base_size, delta_depth);
785 786 787 788 789 790 791 792 793
	delta_data = unpack_non_delta_entry(offset + 20, delta_size);
	result = patch_delta(base, base_size,
			     delta_data, delta_size,
			     &result_size);
	if (!result)
		die("failed to apply delta");
	free(delta_data);
	free(base);
	*sizep = result_size;
794
	(*delta_depth)++;
795 796 797
	return result;
}

798 799 800
static void *unpack_entry(unsigned long offset,
	unsigned long *sizep,
	unsigned int *delta_depth)
801 802 803 804 805 806 807
{
	unsigned long size;
	enum object_type kind;

	offset = unpack_object_header(offset, &kind, &size);
	switch (kind) {
	case OBJ_DELTA:
808
		return unpack_delta_entry(offset, size, sizep, delta_depth);
809 810 811 812 813
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
		*sizep = size;
814
		*delta_depth = 0;
815 816 817 818 819 820
		return unpack_non_delta_entry(offset, size);
	default:
		die("I created an object I can't read!");
	}
}

821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
static const char *get_mode(const char *str, unsigned int *modep)
{
	unsigned char c;
	unsigned int mode = 0;

	while ((c = *str++) != ' ') {
		if (c < '0' || c > '7')
			return NULL;
		mode = (mode << 3) + (c - '0');
	}
	*modep = mode;
	return str;
}

static void load_tree(struct tree_entry *root)
{
837
	unsigned char* sha1 = root->versions[1].sha1;
838 839 840 841 842 843 844
	struct object_entry *myoe;
	struct tree_content *t;
	unsigned long size;
	char *buf;
	const char *c;

	root->tree = t = new_tree_content(8);
845
	if (is_null_sha1(sha1))
846 847
		return;

848
	myoe = find_object(sha1);
849
	if (myoe) {
850
		if (myoe->type != OBJ_TREE)
851 852
			die("Not a tree: %s", sha1_to_hex(sha1));
		buf = unpack_entry(myoe->offset, &size, &t->delta_depth);
853
	} else {
854
		char type[20];
855
		buf = read_sha1_file(sha1, type, &size);
856
		if (!buf || strcmp(type, tree_type))
857
			die("Can't load tree %s", sha1_to_hex(sha1));
858 859 860 861 862 863 864 865 866 867 868
	}

	c = buf;
	while (c != (buf + size)) {
		struct tree_entry *e = new_tree_entry();

		if (t->entry_count == t->entry_capacity)
			root->tree = t = grow_tree_content(t, 8);
		t->entries[t->entry_count++] = e;

		e->tree = NULL;
869
		c = get_mode(c, &e->versions[1].mode);
870
		if (!c)
871 872
			die("Corrupt mode in %s", sha1_to_hex(sha1));
		e->versions[0].mode = e->versions[1].mode;
873 874
		e->name = to_atom(c, strlen(c));
		c += e->name->str_len + 1;
875 876
		hashcpy(e->versions[0].sha1, (unsigned char*)c);
		hashcpy(e->versions[1].sha1, (unsigned char*)c);
877 878 879 880 881
		c += 20;
	}
	free(buf);
}

882
static int tecmp0 (const void *_a, const void *_b)
883 884 885 886
{
	struct tree_entry *a = *((struct tree_entry**)_a);
	struct tree_entry *b = *((struct tree_entry**)_b);
	return base_name_compare(
887 888
		a->name->str_dat, a->name->str_len, a->versions[0].mode,
		b->name->str_dat, b->name->str_len, b->versions[0].mode);
889 890
}

891
static int tecmp1 (const void *_a, const void *_b)
892
{
893 894 895 896 897 898 899 900 901 902
	struct tree_entry *a = *((struct tree_entry**)_a);
	struct tree_entry *b = *((struct tree_entry**)_b);
	return base_name_compare(
		a->name->str_dat, a->name->str_len, a->versions[1].mode,
		b->name->str_dat, b->name->str_len, b->versions[1].mode);
}

static void* mktree(struct tree_content *t, int v, unsigned long *szp)
{
	size_t maxlen = 0;
903 904 905
	unsigned int i;
	char *buf, *c;

906 907 908 909
	if (!v)
		qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp0);
	else
		qsort(t->entries,t->entry_count,sizeof(t->entries[0]),tecmp1);
910 911

	for (i = 0; i < t->entry_count; i++) {
912 913
		if (t->entries[i]->versions[v].mode)
			maxlen += t->entries[i]->name->str_len + 34;
914 915 916 917 918
	}

	buf = c = xmalloc(maxlen);
	for (i = 0; i < t->entry_count; i++) {
		struct tree_entry *e = t->entries[i];
919 920 921
		if (!e->versions[v].mode)
			continue;
		c += sprintf(c, "%o", e->versions[v].mode);
922 923 924
		*c++ = ' ';
		strcpy(c, e->name->str_dat);
		c += e->name->str_len + 1;
925
		hashcpy((unsigned char*)c, e->versions[v].sha1);
926 927
		c += 20;
	}
928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978

	*szp = c - buf;
	return buf;
}

static void store_tree(struct tree_entry *root)
{
	struct tree_content *t = root->tree;
	unsigned int i, j, del;
	unsigned long vers1len;
	void **vers1dat;
	struct last_object lo;

	if (!is_null_sha1(root->versions[1].sha1))
		return;

	for (i = 0; i < t->entry_count; i++) {
		if (t->entries[i]->tree)
			store_tree(t->entries[i]);
	}

	if (is_null_sha1(root->versions[0].sha1)
			|| !find_object(root->versions[0].sha1)) {
		lo.data = NULL;
		lo.depth = 0;
	} else {
		lo.data = mktree(t, 0, &lo.len);
		lo.depth = t->delta_depth;
		hashcpy(lo.sha1, root->versions[0].sha1);
	}
	vers1dat = mktree(t, 1, &vers1len);

	store_object(OBJ_TREE, vers1dat, vers1len,
		&lo, root->versions[1].sha1, 0);
	/* note: lo.dat (if created) was freed by store_object */
	free(vers1dat);

	t->delta_depth = lo.depth;
	hashcpy(root->versions[0].sha1, root->versions[1].sha1);
	for (i = 0, j = 0, del = 0; i < t->entry_count; i++) {
		struct tree_entry *e = t->entries[i];
		if (e->versions[1].mode) {
			e->versions[0].mode = e->versions[1].mode;
			hashcpy(e->versions[0].sha1, e->versions[1].sha1);
			t->entries[j++] = e;
		} else {
			release_tree_entry(e);
			del++;
		}
	}
	t->entry_count -= del;
979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
}

static int tree_content_set(
	struct tree_entry *root,
	const char *p,
	const unsigned char *sha1,
	const unsigned int mode)
{
	struct tree_content *t = root->tree;
	const char *slash1;
	unsigned int i, n;
	struct tree_entry *e;

	slash1 = strchr(p, '/');
	if (slash1)
		n = slash1 - p;
	else
		n = strlen(p);

	for (i = 0; i < t->entry_count; i++) {
		e = t->entries[i];
		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
			if (!slash1) {
1002 1003
				if (e->versions[1].mode == mode
						&& !hashcmp(e->versions[1].sha1, sha1))
1004
					return 0;
1005 1006
				e->versions[1].mode = mode;
				hashcpy(e->versions[1].sha1, sha1);
1007
				if (e->tree) {
1008
					release_tree_content_recursive(e->tree);
1009 1010
					e->tree = NULL;
				}
1011
				hashclr(root->versions[1].sha1);
1012 1013
				return 1;
			}
1014
			if (!S_ISDIR(e->versions[1].mode)) {
1015
				e->tree = new_tree_content(8);
1016
				e->versions[1].mode = S_IFDIR;
1017 1018 1019 1020
			}
			if (!e->tree)
				load_tree(e);
			if (tree_content_set(e, slash1 + 1, sha1, mode)) {
1021
				hashclr(root->versions[1].sha1);
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
				return 1;
			}
			return 0;
		}
	}

	if (t->entry_count == t->entry_capacity)
		root->tree = t = grow_tree_content(t, 8);
	e = new_tree_entry();
	e->name = to_atom(p, n);
1032 1033
	e->versions[0].mode = 0;
	hashclr(e->versions[0].sha1);
1034 1035 1036
	t->entries[t->entry_count++] = e;
	if (slash1) {
		e->tree = new_tree_content(8);
1037
		e->versions[1].mode = S_IFDIR;
1038 1039 1040
		tree_content_set(e, slash1 + 1, sha1, mode);
	} else {
		e->tree = NULL;
1041 1042
		e->versions[1].mode = mode;
		hashcpy(e->versions[1].sha1, sha1);
1043
	}
1044
	hashclr(root->versions[1].sha1);
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	return 1;
}

static int tree_content_remove(struct tree_entry *root, const char *p)
{
	struct tree_content *t = root->tree;
	const char *slash1;
	unsigned int i, n;
	struct tree_entry *e;

	slash1 = strchr(p, '/');
	if (slash1)
		n = slash1 - p;
	else
		n = strlen(p);

	for (i = 0; i < t->entry_count; i++) {
		e = t->entries[i];
		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
1064
			if (!slash1 || !S_ISDIR(e->versions[1].mode))
1065 1066 1067 1068 1069 1070
				goto del_entry;
			if (!e->tree)
				load_tree(e);
			if (tree_content_remove(e, slash1 + 1)) {
				if (!e->tree->entry_count)
					goto del_entry;
1071
				hashclr(root->versions[1].sha1);
1072 1073 1074 1075 1076 1077 1078 1079
				return 1;
			}
			return 0;
		}
	}
	return 0;

del_entry:
1080 1081 1082 1083 1084 1085 1086
	if (e->tree) {
		release_tree_content_recursive(e->tree);
		e->tree = NULL;
	}
	e->versions[1].mode = 0;
	hashclr(e->versions[1].sha1);
	hashclr(root->versions[1].sha1);
1087
	return 1;
1088 1089
}

1090
static void init_pack_header()
1091
{
1092 1093 1094 1095 1096 1097 1098
	struct pack_header hdr;

	hdr.hdr_signature = htonl(PACK_SIGNATURE);
	hdr.hdr_version = htonl(2);
	hdr.hdr_entries = 0;

	ywrite(pack_fd, &hdr, sizeof(hdr));
1099
	pack_size = sizeof(hdr);
1100 1101
}

1102
static void fixup_header_footer()
1103 1104 1105 1106 1107 1108 1109
{
	SHA_CTX c;
	char hdr[8];
	unsigned long cnt;
	char *buf;
	size_t n;

1110
	if (lseek(pack_fd, 0, SEEK_SET) != 0)
1111 1112 1113
		die("Failed seeking to start: %s", strerror(errno));

	SHA1_Init(&c);
1114
	yread(pack_fd, hdr, 8);
1115 1116 1117 1118
	SHA1_Update(&c, hdr, 8);

	cnt = htonl(object_count);
	SHA1_Update(&c, &cnt, 4);
1119
	ywrite(pack_fd, &cnt, 4);
1120 1121 1122

	buf = xmalloc(128 * 1024);
	for (;;) {
1123
		n = xread(pack_fd, buf, 128 * 1024);
1124 1125 1126 1127 1128 1129
		if (n <= 0)
			break;
		SHA1_Update(&c, buf, n);
	}
	free(buf);

1130
	SHA1_Final(pack_sha1, &c);
1131
	ywrite(pack_fd, pack_sha1, sizeof(pack_sha1));
1132 1133
}

1134
static int oecmp (const void *_a, const void *_b)
1135
{
1136 1137
	struct object_entry *a = *((struct object_entry**)_a);
	struct object_entry *b = *((struct object_entry**)_b);
1138
	return hashcmp(a->sha1, b->sha1);
1139 1140 1141 1142 1143 1144 1145
}

static void write_index(const char *idx_name)
{
	struct sha1file *f;
	struct object_entry **idx, **c, **last;
	struct object_entry *e;
1146
	struct object_entry_pool *o;
1147 1148 1149 1150 1151 1152
	unsigned int array[256];
	int i;

	/* Build the sorted table of object IDs. */
	idx = xmalloc(object_count * sizeof(struct object_entry*));
	c = idx;
1153
	for (o = blocks; o; o = o->next_pool)
1154 1155
		for (e = o->entries; e != o->next_free; e++)
			*c++ = e;
1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
	last = idx + object_count;
	qsort(idx, object_count, sizeof(struct object_entry*), oecmp);

	/* Generate the fan-out array. */
	c = idx;
	for (i = 0; i < 256; i++) {
		struct object_entry **next = c;;
		while (next < last) {
			if ((*next)->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - idx);
		c = next;
	}

	f = sha1create("%s", idx_name);
	sha1write(f, array, 256 * sizeof(int));
	for (c = idx; c != last; c++) {
		unsigned int offset = htonl((*c)->offset);
		sha1write(f, &offset, 4);
		sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
	}
1179
	sha1write(f, pack_sha1, sizeof(pack_sha1));
1180 1181 1182 1183
	sha1close(f, NULL, 1);
	free(idx);
}

1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
static void dump_branches()
{
	static const char *msg = "fast-import";
	unsigned int i;
	struct branch *b;
	struct ref_lock *lock;

	for (i = 0; i < branch_table_sz; i++) {
		for (b = branch_table[i]; b; b = b->table_next_branch) {
			lock = lock_any_ref_for_update(b->name, NULL, 0);
			if (!lock || write_ref_sha1(lock, b->sha1, msg) < 0)
				die("Can't write %s", b->name);
		}
	}
}

1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
static void dump_tags()
{
	static const char *msg = "fast-import";
	struct tag *t;
	struct ref_lock *lock;
	char path[PATH_MAX];

	for (t = first_tag; t; t = t->next_tag) {
		sprintf(path, "refs/tags/%s", t->name);
		lock = lock_any_ref_for_update(path, NULL, 0);
		if (!lock || write_ref_sha1(lock, t->sha1, msg) < 0)
			die("Can't write %s", path);
	}
}

1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
static void dump_marks_helper(FILE *f,
	unsigned long base,
	struct mark_set *m)
{
	int k;
	if (m->shift) {
		for (k = 0; k < 1024; k++) {
			if (m->data.sets[k])
				dump_marks_helper(f, (base + k) << m->shift,
					m->data.sets[k]);
		}
	} else {
		for (k = 0; k < 1024; k++) {
			if (m->data.marked[k])
1229
				fprintf(f, ":%lu %s\n", base + k,
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
					sha1_to_hex(m->data.marked[k]->sha1));
		}
	}
}

static void dump_marks()
{
	if (mark_file)
	{
		FILE *f = fopen(mark_file, "w");
		dump_marks_helper(f, 0, marks);
		fclose(f);
	}
}

1245 1246 1247 1248 1249 1250 1251 1252
static void read_next_command()
{
	read_line(&command_buf, stdin, '\n');
}

static void cmd_mark()
{
	if (!strncmp("mark :", command_buf.buf, 6)) {
1253
		next_mark = strtoul(command_buf.buf + 6, NULL, 10);
1254 1255 1256
		read_next_command();
	}
	else
1257
		next_mark = 0;
1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
}

static void* cmd_data (size_t *size)
{
	size_t n = 0;
	void *buffer;
	size_t length;

	if (strncmp("data ", command_buf.buf, 5))
		die("Expected 'data n' command, found: %s", command_buf.buf);

	length = strtoul(command_buf.buf + 5, NULL, 10);
	buffer = xmalloc(length);

	while (n < length) {
		size_t s = fread((char*)buffer + n, 1, length - n, stdin);
		if (!s && feof(stdin))
			die("EOF in data (%lu bytes remaining)", length - n);
		n += s;
	}

	if (fgetc(stdin) != '\n')
		die("An lf did not trail the binary data as expected.");

	*size = length;
	return buffer;
}

1286
static void cmd_new_blob()
1287
{
1288 1289
	size_t l;
	void *d;
1290 1291 1292

	read_next_command();
	cmd_mark();
1293
	d = cmd_data(&l);
1294

1295 1296
	if (store_object(OBJ_BLOB, d, l, &last_blob, NULL, next_mark))
		free(d);
1297 1298
}

1299
static void unload_one_branch()
1300
{
1301 1302
	while (cur_active_branches
		&& cur_active_branches >= max_active_branches) {
1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
		unsigned long min_commit = ULONG_MAX;
		struct branch *e, *l = NULL, *p = NULL;

		for (e = active_branches; e; e = e->active_next_branch) {
			if (e->last_commit < min_commit) {
				p = l;
				min_commit = e->last_commit;
			}
			l = e;
		}

		if (p) {
			e = p->active_next_branch;
			p->active_next_branch = e->active_next_branch;
		} else {
			e = active_branches;
			active_branches = e->active_next_branch;
		}
		e->active_next_branch = NULL;
		if (e->branch_tree.tree) {
1323
			release_tree_content_recursive(e->branch_tree.tree);
1324 1325 1326
			e->branch_tree.tree = NULL;
		}
		cur_active_branches--;
1327 1328 1329
	}
}

1330
static void load_branch(struct branch *b)
1331
{
1332 1333 1334 1335
	load_tree(&b->branch_tree);
	b->active_next_branch = active_branches;
	active_branches = b;
	cur_active_branches++;
1336
	branch_load_count++;
1337 1338
}

1339
static void file_change_m(struct branch *b)
1340
{
1341 1342 1343
	const char *p = command_buf.buf + 2;
	char *p_uq;
	const char *endp;
1344
	struct object_entry *oe;
1345
	unsigned char sha1[20];
1346
	unsigned int mode;
1347
	char type[20];
1348

1349 1350 1351 1352 1353 1354
	p = get_mode(p, &mode);
	if (!p)
		die("Corrupt mode: %s", command_buf.buf);
	switch (mode) {
	case S_IFREG | 0644:
	case S_IFREG | 0755:
1355
	case S_IFLNK:
1356 1357 1358 1359 1360 1361 1362 1363
	case 0644:
	case 0755:
		/* ok */
		break;
	default:
		die("Corrupt mode: %s", command_buf.buf);
	}

1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
	if (*p == ':') {
		char *x;
		oe = find_mark(strtoul(p + 1, &x, 10));
		p = x;
	} else {
		if (get_sha1_hex(p, sha1))
			die("Invalid SHA1: %s", command_buf.buf);
		oe = find_object(sha1);
		p += 40;
	}
1374 1375 1376 1377 1378 1379 1380 1381 1382
	if (*p++ != ' ')
		die("Missing space after SHA1: %s", command_buf.buf);

	p_uq = unquote_c_style(p, &endp);
	if (p_uq) {
		if (*endp)
			die("Garbage after path in: %s", command_buf.buf);
		p = p_uq;
	}
1383

1384 1385
	if (oe) {
		if (oe->type != OBJ_BLOB)
1386 1387
			die("Not a blob (actually a %s): %s",
				command_buf.buf, type_names[oe->type]);
1388 1389
	} else {
		if (sha1_object_info(sha1, type, NULL))
1390
			die("Blob not found: %s", command_buf.buf);
1391
		if (strcmp(blob_type, type))
1392 1393
			die("Not a blob (actually a %s): %s",
				command_buf.buf, type);
1394
	}
1395

1396 1397 1398 1399
	tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode);

	if (p_uq)
		free(p_uq);
1400
}
1401

1402 1403
static void file_change_d(struct branch *b)
{
1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
	const char *p = command_buf.buf + 2;
	char *p_uq;
	const char *endp;

	p_uq = unquote_c_style(p, &endp);
	if (p_uq) {
		if (*endp)
			die("Garbage after path in: %s", command_buf.buf);
		p = p_uq;
	}
	tree_content_remove(&b->branch_tree, p);
	if (p_uq)
		free(p_uq);
1417 1418
}

1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442
static void cmd_from(struct branch *b)
{
	const char *from, *endp;
	char *str_uq;
	struct branch *s;

	if (strncmp("from ", command_buf.buf, 5))
		return;

	if (b->last_commit)
		die("Can't reinitailize branch %s", b->name);

	from = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(from, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after string in: %s", command_buf.buf);
		from = str_uq;
	}

	s = lookup_branch(from);
	if (b == s)
		die("Can't create a branch from itself: %s", b->name);
	else if (s) {
1443
		unsigned char *t = s->branch_tree.versions[1].sha1;
1444
		hashcpy(b->sha1, s->sha1);
1445 1446
		hashcpy(b->branch_tree.versions[0].sha1, t);
		hashcpy(b->branch_tree.versions[1].sha1, t);
1447 1448 1449 1450
	} else if (*from == ':') {
		unsigned long idnum = strtoul(from + 1, NULL, 10);
		struct object_entry *oe = find_mark(idnum);
		unsigned long size;
1451
		unsigned int depth;
1452 1453 1454
		char *buf;
		if (oe->type != OBJ_COMMIT)
			die("Mark :%lu not a commit", idnum);
1455
		hashcpy(b->sha1, oe->sha1);
1456
		buf = unpack_entry(oe->offset, &size, &depth);
1457 1458 1459
		if (!buf || size < 46)
			die("Not a valid commit: %s", from);
		if (memcmp("tree ", buf, 5)
1460
			|| get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
1461 1462
			die("The commit %s is corrupt", sha1_to_hex(b->sha1));
		free(buf);
1463 1464
		hashcpy(b->branch_tree.versions[0].sha1,
			b->branch_tree.versions[1].sha1);
1465
	} else if (!get_sha1(from, b->sha1)) {
1466 1467 1468 1469
		if (is_null_sha1(b->sha1)) {
			hashclr(b->branch_tree.versions[0].sha1);
			hashclr(b->branch_tree.versions[1].sha1);
		} else {
1470 1471 1472 1473 1474 1475 1476 1477
			unsigned long size;
			char *buf;

			buf = read_object_with_reference(b->sha1,
				type_names[OBJ_COMMIT], &size, b->sha1);
			if (!buf || size < 46)
				die("Not a valid commit: %s", from);
			if (memcmp("tree ", buf, 5)
1478
				|| get_sha1_hex(buf + 5, b->branch_tree.versions[1].sha1))
1479 1480
				die("The commit %s is corrupt", sha1_to_hex(b->sha1));
			free(buf);
1481 1482
			hashcpy(b->branch_tree.versions[0].sha1,
				b->branch_tree.versions[1].sha1);
1483 1484 1485 1486 1487 1488 1489
		}
	} else
		die("Invalid ref name or SHA1 expression: %s", from);

	read_next_command();
}

1490
static void cmd_new_commit()
1491
{
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510
	struct branch *b;
	void *msg;
	size_t msglen;
	char *str_uq;
	const char *endp;
	char *sp;
	char *author = NULL;
	char *committer = NULL;
	char *body;

	/* Obtain the branch name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after ref in: %s", command_buf.buf);
		sp = str_uq;
	}
	b = lookup_branch(sp);
1511
	if (!b)
1512
		b = new_branch(sp);
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
	if (str_uq)
		free(str_uq);

	read_next_command();
	cmd_mark();
	if (!strncmp("author ", command_buf.buf, 7)) {
		author = strdup(command_buf.buf);
		read_next_command();
	}
	if (!strncmp("committer ", command_buf.buf, 10)) {
		committer = strdup(command_buf.buf);
		read_next_command();
	}
	if (!committer)
		die("Expected committer but didn't get one");
	msg = cmd_data(&msglen);
1529 1530
	read_next_command();
	cmd_from(b);
1531 1532

	/* ensure the branch is active/loaded */
1533
	if (!b->branch_tree.tree || !max_active_branches) {
1534 1535 1536
		unload_one_branch();
		load_branch(b);
	}
1537

1538 1539
	/* file_change* */
	for (;;) {
1540
		if (1 == command_buf.len)
1541
			break;
1542
		else if (!strncmp("M ", command_buf.buf, 2))
1543
			file_change_m(b);
1544
		else if (!strncmp("D ", command_buf.buf, 2))
1545 1546
			file_change_d(b);
		else
1547
			die("Unsupported file_change: %s", command_buf.buf);
1548
		read_next_command();
1549 1550
	}

1551
	/* build the tree and the commit */
1552
	store_tree(&b->branch_tree);
1553 1554 1555 1556 1557
	body = xmalloc(97 + msglen
		+ (author
			? strlen(author) + strlen(committer)
			: 2 * strlen(committer)));
	sp = body;
1558 1559
	sp += sprintf(sp, "tree %s\n",
		sha1_to_hex(b->branch_tree.versions[1].sha1));
1560
	if (!is_null_sha1(b->sha1))
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
		sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
	if (author)
		sp += sprintf(sp, "%s\n", author);
	else
		sp += sprintf(sp, "author %s\n", committer + 10);
	sp += sprintf(sp, "%s\n\n", committer);
	memcpy(sp, msg, msglen);
	sp += msglen;
	if (author)
		free(author);
	free(committer);
	free(msg);

1574
	store_object(OBJ_COMMIT, body, sp - body, NULL, b->sha1, next_mark);
1575 1576
	free(body);
	b->last_commit = object_count_by_type[OBJ_COMMIT];
1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588

	if (branch_log) {
		int need_dq = quote_c_style(b->name, NULL, NULL, 0);
		fprintf(branch_log, "commit ");
		if (need_dq) {
			fputc('"', branch_log);
			quote_c_style(b->name, NULL, branch_log, 0);
			fputc('"', branch_log);
		} else
			fprintf(branch_log, "%s", b->name);
		fprintf(branch_log," :%lu %s\n",next_mark,sha1_to_hex(b->sha1));
	}
1589 1590
}

1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
static void cmd_new_tag()
{
	char *str_uq;
	const char *endp;
	char *sp;
	const char *from;
	char *tagger;
	struct branch *s;
	void *msg;
	size_t msglen;
	char *body;
	struct tag *t;
1603
	unsigned long from_mark = 0;
1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
	unsigned char sha1[20];

	/* Obtain the new tag name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after tag name in: %s", command_buf.buf);
		sp = str_uq;
	}
	t = pool_alloc(sizeof(struct tag));
	t->next_tag = NULL;
	t->name = pool_strdup(sp);
	if (last_tag)
		last_tag->next_tag = t;
	else
		first_tag = t;
	last_tag = t;
	if (str_uq)
		free(str_uq);
	read_next_command();

	/* from ... */
	if (strncmp("from ", command_buf.buf, 5))
		die("Expected from command, got %s", command_buf.buf);

	from = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(from, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after string in: %s", command_buf.buf);
		from = str_uq;
	}

	s = lookup_branch(from);
	if (s) {
1640
		hashcpy(sha1, s->sha1);
1641
	} else if (*from == ':') {
1642 1643
		from_mark = strtoul(from + 1, NULL, 10);
		struct object_entry *oe = find_mark(from_mark);
1644
		if (oe->type != OBJ_COMMIT)
1645
			die("Mark :%lu not a commit", from_mark);
1646
		hashcpy(sha1, oe->sha1);
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685
	} else if (!get_sha1(from, sha1)) {
		unsigned long size;
		char *buf;

		buf = read_object_with_reference(sha1,
			type_names[OBJ_COMMIT], &size, sha1);
		if (!buf || size < 46)
			die("Not a valid commit: %s", from);
		free(buf);
	} else
		die("Invalid ref name or SHA1 expression: %s", from);

	if (str_uq)
		free(str_uq);
	read_next_command();

	/* tagger ... */
	if (strncmp("tagger ", command_buf.buf, 7))
		die("Expected tagger command, got %s", command_buf.buf);
	tagger = strdup(command_buf.buf);

	/* tag payload/message */
	read_next_command();
	msg = cmd_data(&msglen);

	/* build the tag object */
	body = xmalloc(67 + strlen(t->name) + strlen(tagger) + msglen);
	sp = body;
	sp += sprintf(sp, "object %s\n", sha1_to_hex(sha1));
	sp += sprintf(sp, "type %s\n", type_names[OBJ_COMMIT]);
	sp += sprintf(sp, "tag %s\n", t->name);
	sp += sprintf(sp, "%s\n\n", tagger);
	memcpy(sp, msg, msglen);
	sp += msglen;
	free(tagger);
	free(msg);

	store_object(OBJ_TAG, body, sp - body, NULL, t->sha1, 0);
	free(body);
1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697

	if (branch_log) {
		int need_dq = quote_c_style(t->name, NULL, NULL, 0);
		fprintf(branch_log, "tag ");
		if (need_dq) {
			fputc('"', branch_log);
			quote_c_style(t->name, NULL, branch_log, 0);
			fputc('"', branch_log);
		} else
			fprintf(branch_log, "%s", t->name);
		fprintf(branch_log," :%lu %s\n",from_mark,sha1_to_hex(t->sha1));
	}
1698 1699
}

1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
static void cmd_reset_branch()
{
	struct branch *b;
	char *str_uq;
	const char *endp;
	char *sp;

	/* Obtain the branch name from the rest of our command */
	sp = strchr(command_buf.buf, ' ') + 1;
	str_uq = unquote_c_style(sp, &endp);
	if (str_uq) {
		if (*endp)
			die("Garbage after ref in: %s", command_buf.buf);
		sp = str_uq;
	}
	b = lookup_branch(sp);
	if (b) {
		b->last_commit = 0;
		if (b->branch_tree.tree) {
			release_tree_content_recursive(b->branch_tree.tree);
			b->branch_tree.tree = NULL;
		}
	}
	if (str_uq)
		free(str_uq);
}

1727
static const char fast_import_usage[] =
1728
"git-fast-import [--objects=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file] [--branch-log=log] temp.pack";
1729

1730 1731
int main(int argc, const char **argv)
{
1732 1733 1734
	const char *base_name;
	int i;
	unsigned long est_obj_cnt = 1000;
1735 1736
	char *pack_name;
	char *idx_name;
1737
	struct stat sb;
1738

1739 1740
	setup_ident();
	git_config(git_default_config);
1741
	page_size = getpagesize();
1742

1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
	for (i = 1; i < argc; i++) {
		const char *a = argv[i];

		if (*a != '-' || !strcmp(a, "--"))
			break;
		else if (!strncmp(a, "--objects=", 10))
			est_obj_cnt = strtoul(a + 10, NULL, 0);
		else if (!strncmp(a, "--depth=", 8))
			max_depth = strtoul(a + 8, NULL, 0);
		else if (!strncmp(a, "--active-branches=", 18))
			max_active_branches = strtoul(a + 18, NULL, 0);
1754 1755
		else if (!strncmp(a, "--export-marks=", 15))
			mark_file = a + 15;
1756 1757 1758 1759 1760
		else if (!strncmp(a, "--branch-log=", 13)) {
			branch_log = fopen(a + 13, "w");
			if (!branch_log)
				die("Can't create %s: %s", a + 13, strerror(errno));
		}
1761 1762 1763 1764 1765 1766 1767
		else
			die("unknown option %s", a);
	}
	if ((i+1) != argc)
		usage(fast_import_usage);
	base_name = argv[i];

1768 1769 1770 1771 1772
	pack_name = xmalloc(strlen(base_name) + 6);
	sprintf(pack_name, "%s.pack", base_name);
	idx_name = xmalloc(strlen(base_name) + 5);
	sprintf(idx_name, "%s.idx", base_name);

1773 1774
	pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
	if (pack_fd < 0)
1775
		die("Can't create %s: %s", pack_name, strerror(errno));
1776

1777
	init_pack_header();
1778
	alloc_objects(est_obj_cnt);
1779
	strbuf_init(&command_buf);
1780 1781 1782 1783

	atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
	branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
	avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
1784
	marks = pool_calloc(1, sizeof(struct mark_set));
1785

1786
	for (;;) {
1787 1788
		read_next_command();
		if (command_buf.eof)
1789
			break;
1790 1791 1792 1793
		else if (!strcmp("blob", command_buf.buf))
			cmd_new_blob();
		else if (!strncmp("commit ", command_buf.buf, 7))
			cmd_new_commit();
1794 1795
		else if (!strncmp("tag ", command_buf.buf, 4))
			cmd_new_tag();
1796 1797
		else if (!strncmp("reset ", command_buf.buf, 6))
			cmd_reset_branch();
1798 1799
		else
			die("Unsupported command: %s", command_buf.buf);
1800
	}
1801

1802
	fixup_header_footer();
1803
	close(pack_fd);
1804
	write_index(idx_name);
1805
	dump_branches();
1806
	dump_tags();
1807
	dump_marks();
1808 1809
	if (branch_log)
		fclose(branch_log);
1810

1811 1812 1813 1814
	fprintf(stderr, "%s statistics:\n", argv[0]);
	fprintf(stderr, "---------------------------------------------------\n");
	fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
	fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
1815 1816 1817 1818
	fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
	fprintf(stderr, "      trees  :   %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
	fprintf(stderr, "      commits:   %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
	fprintf(stderr, "      tags   :   %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
1819
	fprintf(stderr, "Total branches:  %10lu (%10lu loads     )\n", branch_count, branch_load_count);
1820
	fprintf(stderr, "      marks:     %10u (%10lu unique    )\n", (1 << marks->shift) * 1024, marks_set_count);
1821
	fprintf(stderr, "      atoms:     %10u\n", atom_cnt);
1822 1823 1824
	fprintf(stderr, "Memory total:    %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
	fprintf(stderr, "       pools:    %10lu KiB\n", total_allocd/1024);
	fprintf(stderr, "     objects:    %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
1825
	fprintf(stderr, "Pack remaps:     %10lu\n", remap_count);
1826 1827 1828 1829 1830 1831 1832 1833
	fprintf(stderr, "---------------------------------------------------\n");

	stat(pack_name, &sb);
	fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
	stat(idx_name, &sb);
	fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));

	fprintf(stderr, "\n");
1834 1835 1836

	return 0;
}