git-cvsimport.perl 28.9 KB
Newer Older
1
#!/usr/bin/perl
2

3 4 5 6 7 8 9 10 11 12 13 14 15
# This tool is copyright (c) 2005, Matthias Urlichs.
# It is released under the Gnu Public License, version 2.
#
# The basic idea is to aggregate CVS check-ins into related changes.
# Fortunately, "cvsps" does that for us; all we have to do is to parse
# its output.
#
# Checking out the files is done by a single long-running CVS connection
# / server process.
#
# The head revision is on branch "origin" by default.
# You can change that with the '-o' option.

16
use 5.008;
17 18
use strict;
use warnings;
19
use Getopt::Long;
20
use File::Spec;
21
use File::Temp qw(tempfile tmpnam);
22 23 24
use File::Path qw(mkpath);
use File::Basename qw(basename dirname);
use Time::Local;
M
Matthias Urlichs 已提交
25 26
use IO::Socket;
use IO::Pipe;
J
Jeff King 已提交
27
use POSIX qw(strftime dup2 ENOENT);
28
use IPC::Open2;
29 30 31 32

$SIG{'PIPE'}="IGNORE";
$ENV{'TZ'}="UTC";

33
our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34
my (%conv_author_name, %conv_author_email);
35

36 37 38
sub usage(;$) {
	my $msg = shift;
	print(STDERR "Error: $msg\n") if $msg;
39
	print STDERR <<END;
S
Stephan Beyer 已提交
40
Usage: git cvsimport     # fetch/update GIT from CVS
41
       [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 43
       [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
       [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44
       [-r remote] [-R] [CVS_module]
45 46 47 48
END
	exit(1);
}

49 50 51 52 53 54
sub read_author_info($) {
	my ($file) = @_;
	my $user;
	open my $f, '<', "$file" or die("Failed to open $file: $!\n");

	while (<$f>) {
55
		# Expected format is this:
56
		#   exon=Andreas Ericsson <ae@op5.se>
57
		if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58
			$user = $1;
59 60
			$conv_author_name{$user} = $2;
			$conv_author_email{$user} = $3;
61
		}
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
		# However, we also read from CVSROOT/users format
		# to ease migration.
		elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
			my $mapped;
			($user, $mapped) = ($1, $3);
			if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
				$conv_author_name{$user} = $1;
				$conv_author_email{$user} = $2;
			}
			elsif ($mapped =~ /^<?(.*)>?$/) {
				$conv_author_name{$user} = $user;
				$conv_author_email{$user} = $1;
			}
		}
		# NEEDSWORK: Maybe warn on unrecognized lines?
77 78 79 80 81 82 83 84 85 86
	}
	close ($f);
}

sub write_author_info($) {
	my ($file) = @_;
	open my $f, '>', $file or
	  die("Failed to open $file for writing: $!");

	foreach (keys %conv_author_name) {
87
		print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 89 90 91
	}
	close ($f);
}

92
# convert getopts specs for use by git config
93 94 95 96 97 98 99 100
my %longmap = (
	'A:' => 'authors-file',
	'M:' => 'merge-regex',
	'P:' => undef,
	'R' => 'track-revisions',
	'S:' => 'ignore-paths',
);

101
sub read_repo_config {
102 103
	# Split the string between characters, unless there is a ':'
	# So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 105 106 107
	my @opts = split(/ *(?!:)/, shift);
	foreach my $o (@opts) {
		my $key = $o;
		$key =~ s/://g;
108
		my $arg = 'git config';
109
		$arg .= ' --bool' if ($o !~ /:$/);
110
		my $ckey = $key;
111

112 113 114 115 116 117 118 119 120
		if (exists $longmap{$o}) {
			# An uppercase option like -R cannot be
			# expressed in the configuration, as the
			# variable names are downcased.
			$ckey = $longmap{$o};
			next if (! defined $ckey);
			$ckey =~ s/-//g;
		}
		chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121
		if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 123 124 125 126
			no strict 'refs';
			my $opt_name = "opt_" . $key;
			if (!$$opt_name) {
				$$opt_name = $tmp;
			}
127 128 129 130
		}
	}
}

131
my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132
read_repo_config($opts);
133 134 135 136 137 138
Getopt::Long::Configure( 'no_ignore_case', 'bundling' );

# turn the Getopt::Std specification in a Getopt::Long one,
# with support for multiple -M options
GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
    or usage();
139 140
usage if $opt_h;

141
if (@ARGV == 0) {
142
		chomp(my $module = `git config --get cvsimport.module`);
143 144
		push(@ARGV, $module) if $? == 0;
}
145
@ARGV <= 1 or usage("You can't specify more than one CVS module");
146

J
Junio C Hamano 已提交
147
if ($opt_d) {
M
Matthias Urlichs 已提交
148
	$ENV{"CVSROOT"} = $opt_d;
J
Junio C Hamano 已提交
149
} elsif (-f 'CVS/Root') {
150 151 152 153 154
	open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
	$opt_d = <$f>;
	chomp $opt_d;
	close $f;
	$ENV{"CVSROOT"} = $opt_d;
J
Junio C Hamano 已提交
155
} elsif ($ENV{"CVSROOT"}) {
M
Matthias Urlichs 已提交
156 157
	$opt_d = $ENV{"CVSROOT"};
} else {
158
	usage("CVSROOT needs to be set");
M
Matthias Urlichs 已提交
159
}
160
$opt_s ||= "-";
161 162
$opt_a ||= 0;

163
my $git_tree = $opt_C;
M
Matthias Urlichs 已提交
164 165
$git_tree ||= ".";

166 167 168 169 170 171 172 173 174
my $remote;
if (defined $opt_r) {
	$remote = 'refs/remotes/' . $opt_r;
	$opt_o ||= "master";
} else {
	$opt_o ||= "origin";
	$remote = 'refs/heads';
}

175 176 177 178
my $cvs_tree;
if ($#ARGV == 0) {
	$cvs_tree = $ARGV[0];
} elsif (-f 'CVS/Repository') {
J
Junio C Hamano 已提交
179
	open my $f, '<', 'CVS/Repository' or
180 181 182
	    die 'Failed to open CVS/Repository';
	$cvs_tree = <$f>;
	chomp $cvs_tree;
183
	close $f;
184
} else {
185
	usage("CVS module has to be specified");
186 187
}

188 189
our @mergerx = ();
if ($opt_m) {
190
	@mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
191
}
192 193
if (@opt_M) {
	push (@mergerx, map { qr/$_/ } @opt_M);
194 195
}

196 197 198 199 200
# Remember UTC of our starting time
# we'll want to avoid importing commits
# that are too recent
our $starttime = time();

201 202 203 204 205
select(STDERR); $|=1; select(STDOUT);


package CVSconn;
# Basic CVS dialog.
M
Matthias Urlichs 已提交
206
# We're only interested in connecting and downloading, so ...
207

208 209
use File::Spec;
use File::Temp qw(tempfile);
M
Matthias Urlichs 已提交
210 211
use POSIX qw(strftime dup2);

212
sub new {
J
Junio C Hamano 已提交
213
	my ($what,$repo,$subdir) = @_;
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
	$what=ref($what) if ref($what);

	my $self = {};
	$self->{'buffer'} = "";
	bless($self,$what);

	$repo =~ s#/+$##;
	$self->{'fullrep'} = $repo;
	$self->conn();

	$self->{'subdir'} = $subdir;
	$self->{'lines'} = undef;

	return $self;
}

sub conn {
	my $self = shift;
	my $repo = $self->{'fullrep'};
J
Junio C Hamano 已提交
233 234
	if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
		my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
235

J
Junio C Hamano 已提交
236 237
		my ($proxyhost,$proxyport);
		if ($param && ($param =~ m/proxy=([^;]+)/)) {
238 239 240
			$proxyhost = $1;
			# Default proxyport, if not specified, is 8080.
			$proxyport = 8080;
J
Junio C Hamano 已提交
241
			if ($ENV{"CVS_PROXY_PORT"}) {
242 243
				$proxyport = $ENV{"CVS_PROXY_PORT"};
			}
J
Junio C Hamano 已提交
244
			if ($param =~ m/proxyport=([^;]+)/) {
245 246 247
				$proxyport = $1;
			}
		}
248
		$repo ||= '/';
249

250 251
		# if username is not explicit in CVSROOT, then use current user, as cvs would
		$user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
M
Matthias Urlichs 已提交
252
		my $rr2 = "-";
J
Junio C Hamano 已提交
253
		unless ($port) {
254 255 256 257 258
			$rr2 = ":pserver:$user\@$serv:$repo";
			$port=2401;
		}
		my $rr = ":pserver:$user\@$serv:$port$repo";

259 260 261
		if ($pass) {
			$pass = $self->_scramble($pass);
		} else {
262 263
			open(H,$ENV{'HOME'}."/.cvspass") and do {
				# :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
J
Junio C Hamano 已提交
264
				while (<H>) {
265 266 267
					chomp;
					s/^\/\d+\s+//;
					my ($w,$p) = split(/\s/,$_,2);
J
Junio C Hamano 已提交
268
					if ($w eq $rr or $w eq $rr2) {
269 270 271 272 273
						$pass = $p;
						last;
					}
				}
			};
274
			$pass = "A" unless $pass;
275
		}
276

277
		my ($s, $rep);
J
Junio C Hamano 已提交
278
		if ($proxyhost) {
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293

			# Use a HTTP Proxy. Only works for HTTP proxies that
			# don't require user authentication
			#
			# See: http://www.ietf.org/rfc/rfc2817.txt

			$s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
			die "Socket to $proxyhost: $!\n" unless defined $s;
			$s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
	                        or die "Write to $proxyhost: $!\n";
	                $s->flush();

			$rep = <$s>;

			# The answer should look like 'HTTP/1.x 2yy ....'
J
Junio C Hamano 已提交
294
			if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
295 296 297 298 299 300 301 302 303 304 305 306 307 308
				die "Proxy connect: $rep\n";
			}
			# Skip up to the empty line of the proxy server output
			# including the response headers.
			while ($rep = <$s>) {
				last if (!defined $rep ||
					 $rep eq "\n" ||
					 $rep eq "\r\n");
			}
		} else {
			$s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
			die "Socket to $serv: $!\n" unless defined $s;
		}

309 310 311 312
		$s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
			or die "Write to $serv: $!\n";
		$s->flush();

313
		$rep = <$s>;
314

J
Junio C Hamano 已提交
315
		if ($rep ne "I LOVE YOU\n") {
316 317 318 319 320
			$rep="<unknown>" unless $rep;
			die "AuthReply: $rep\n";
		}
		$self->{'socketo'} = $s;
		$self->{'socketi'} = $s;
S
Sven Verdoolaege 已提交
321
	} else { # local or ext: Fork off our own cvs server.
322 323 324 325
		my $pr = IO::Pipe->new();
		my $pw = IO::Pipe->new();
		my $pid = fork();
		die "Fork: $!\n" unless defined $pid;
S
Sven Verdoolaege 已提交
326 327
		my $cvs = 'cvs';
		$cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
S
Sven Verdoolaege 已提交
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
		my $rsh = 'rsh';
		$rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};

		my @cvs = ($cvs, 'server');
		my ($local, $user, $host);
		$local = $repo =~ s/:local://;
		if (!$local) {
		    $repo =~ s/:ext://;
		    $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
		    ($user, $host) = ($1, $2);
		}
		if (!$local) {
		    if ($user) {
			unshift @cvs, $rsh, '-l', $user, $host;
		    } else {
			unshift @cvs, $rsh, $host;
		    }
		}

J
Junio C Hamano 已提交
347
		unless ($pid) {
348 349 350 351 352 353
			$pr->writer();
			$pw->reader();
			dup2($pw->fileno(),0);
			dup2($pr->fileno(),1);
			$pr->close();
			$pw->close();
S
Sven Verdoolaege 已提交
354
			exec(@cvs);
355 356 357 358 359 360 361 362 363
		}
		$pw->writer();
		$pr->reader();
		$self->{'socketo'} = $pw;
		$self->{'socketi'} = $pr;
	}
	$self->{'socketo'}->write("Root $repo\n");

	# Trial and error says that this probably is the minimum set
364
	$self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
365 366 367 368 369

	$self->{'socketo'}->write("valid-requests\n");
	$self->{'socketo'}->flush();

	chomp(my $rep=$self->readline());
J
Junio C Hamano 已提交
370
	if ($rep !~ s/^Valid-requests\s*//) {
371 372 373 374 375 376 377 378 379 380 381
		$rep="<unknown>" unless $rep;
		die "Expected Valid-requests from server, but got: $rep\n";
	}
	chomp(my $res=$self->readline());
	die "validReply: $res\n" if $res ne "ok";

	$self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
	$self->{'repo'} = $repo;
}

sub readline {
J
Junio C Hamano 已提交
382
	my ($self) = @_;
383 384 385 386 387 388
	return $self->{'socketi'}->getline();
}

sub _file {
	# Request a file with a given revision.
	# Trial and error says this is a good way to do it. :-/
J
Junio C Hamano 已提交
389
	my ($self,$fn,$rev) = @_;
390 391
	$self->{'socketo'}->write("Argument -N\n") or return undef;
	$self->{'socketo'}->write("Argument -P\n") or return undef;
392 393 394 395
	# -kk: Linus' version doesn't use it - defaults to off
	if ($opt_k) {
	    $self->{'socketo'}->write("Argument -kk\n") or return undef;
	}
396 397 398 399 400 401
	$self->{'socketo'}->write("Argument -r\n") or return undef;
	$self->{'socketo'}->write("Argument $rev\n") or return undef;
	$self->{'socketo'}->write("Argument --\n") or return undef;
	$self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
	$self->{'socketo'}->write("Directory .\n") or return undef;
	$self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
402
	# $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
403 404 405 406 407 408 409 410
	$self->{'socketo'}->write("co\n") or return undef;
	$self->{'socketo'}->flush() or return undef;
	$self->{'lines'} = 0;
	return 1;
}
sub _line {
	# Read a line from the server.
	# ... except that 'line' may be an entire file. ;-)
J
Junio C Hamano 已提交
411
	my ($self, $fh) = @_;
412 413 414
	die "Not in lines" unless defined $self->{'lines'};

	my $line;
415
	my $res=0;
J
Junio C Hamano 已提交
416
	while (defined($line = $self->readline())) {
417 418 419 420 421 422 423 424
		# M U gnupg-cvs-rep/AUTHORS
		# Updated gnupg-cvs-rep/
		# /daten/src/rsync/gnupg-cvs-rep/AUTHORS
		# /AUTHORS/1.1///T1.1
		# u=rw,g=rw,o=rw
		# 0
		# ok

J
Junio C Hamano 已提交
425
		if ($line =~ s/^(?:Created|Updated) //) {
426 427 428 429 430 431 432 433 434
			$line = $self->readline(); # path
			$line = $self->readline(); # Entries line
			my $mode = $self->readline(); chomp $mode;
			$self->{'mode'} = $mode;
			defined (my $cnt = $self->readline())
				or die "EOF from server after 'Changed'\n";
			chomp $cnt;
			die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
			$line="";
435
			$res = $self->_fetchfile($fh, $cnt);
J
Junio C Hamano 已提交
436
		} elsif ($line =~ s/^ //) {
437 438
			print $fh $line;
			$res += length($line);
J
Junio C Hamano 已提交
439
		} elsif ($line =~ /^M\b/) {
440
			# output, do nothing
J
Junio C Hamano 已提交
441
		} elsif ($line =~ /^Mbinary\b/) {
442 443 444 445 446
			my $cnt;
			die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
			chomp $cnt;
			die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
			$line="";
447
			$res += $self->_fetchfile($fh, $cnt);
448 449
		} else {
			chomp $line;
J
Junio C Hamano 已提交
450
			if ($line eq "ok") {
451 452
				# print STDERR "S: ok (".length($res).")\n";
				return $res;
J
Junio C Hamano 已提交
453
			} elsif ($line =~ s/^E //) {
454
				# print STDERR "S: $line\n";
J
Junio C Hamano 已提交
455
			} elsif ($line =~ /^(Remove-entry|Removed) /i) {
456 457 458 459 460
				$line = $self->readline(); # filename
				$line = $self->readline(); # OK
				chomp $line;
				die "Unknown: $line" if $line ne "ok";
				return -1;
461 462 463 464 465
			} else {
				die "Unknown: $line\n";
			}
		}
	}
M
Martin Mares 已提交
466
	return undef;
467 468
}
sub file {
J
Junio C Hamano 已提交
469
	my ($self,$fn,$rev) = @_;
470 471
	my $res;

J
Junio C Hamano 已提交
472
	my ($fh, $name) = tempfile('gitcvs.XXXXXX',
473 474 475 476 477
		    DIR => File::Spec->tmpdir(), UNLINK => 1);

	$self->_file($fn,$rev) and $res = $self->_line($fh);

	if (!defined $res) {
M
Martin Mares 已提交
478 479
	    print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
	    truncate $fh, 0;
480
	    $self->conn();
M
Martin Mares 已提交
481
	    $self->_file($fn,$rev) or die "No file command send";
482
	    $res = $self->_line($fh);
M
Martin Mares 已提交
483
	    die "Retry failed" unless defined $res;
484
	}
485
	close ($fh);
486

487
	return ($name, $res);
488
}
489 490
sub _fetchfile {
	my ($self, $fh, $cnt) = @_;
491
	my $res = 0;
492
	my $bufsize = 1024 * 1024;
J
Junio C Hamano 已提交
493
	while ($cnt) {
494 495 496 497 498 499 500 501 502 503 504 505
	    if ($bufsize > $cnt) {
		$bufsize = $cnt;
	    }
	    my $buf;
	    my $num = $self->{'socketi'}->read($buf,$bufsize);
	    die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
	    print $fh $buf;
	    $res += $num;
	    $cnt -= $num;
	}
	return $res;
}
506

507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
sub _scramble {
	my ($self, $pass) = @_;
	my $scrambled = "A";

	return $scrambled unless $pass;

	my $pass_len = length($pass);
	my @pass_arr = split("", $pass);
	my $i;

	# from cvs/src/scramble.c
	my @shifts = (
		  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
		 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
		114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
		111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
		 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
		125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
		 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
		 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
		225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
		199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
		174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
		207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
		192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
		227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
		182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
		243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
	);

	for ($i = 0; $i < $pass_len; $i++) {
		$scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
	}

	return $scrambled;
}
543 544 545

package main;

M
Matthias Urlichs 已提交
546
my $cvs = CVSconn->new($opt_d, $cvs_tree);
547 548 549


sub pdate($) {
J
Junio C Hamano 已提交
550
	my ($d) = @_;
551 552 553 554
	m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
		or die "Unparseable date: $d\n";
	my $y=$1; $y-=1900 if $y>1900;
	return timegm($6||0,$5,$4,$3,$2-1,$y);
555 556
}

557
sub pmode($) {
J
Junio C Hamano 已提交
558
	my ($mode) = @_;
559 560 561 562
	my $m = 0;
	my $mm = 0;
	my $um = 0;
	for my $x(split(//,$mode)) {
J
Junio C Hamano 已提交
563
		if ($x eq ",") {
564 565 566
			$m |= $mm&$um;
			$mm = 0;
			$um = 0;
J
Junio C Hamano 已提交
567 568 569 570 571 572 573
		} elsif ($x eq "u") { $um |= 0700;
		} elsif ($x eq "g") { $um |= 0070;
		} elsif ($x eq "o") { $um |= 0007;
		} elsif ($x eq "r") { $mm |= 0444;
		} elsif ($x eq "w") { $mm |= 0222;
		} elsif ($x eq "x") { $mm |= 0111;
		} elsif ($x eq "=") { # do nothing
574 575 576 577 578 579
		} else { die "Unknown mode: $mode\n";
		}
	}
	$m |= $mm&$um;
	return $m;
}
580

581 582 583 584
sub getwd() {
	my $pwd = `pwd`;
	chomp $pwd;
	return $pwd;
585 586
}

J
Jeff King 已提交
587 588 589 590
sub is_sha1 {
	my $s = shift;
	return $s =~ /^[a-f0-9]{40}$/;
}
591

592 593 594 595 596 597
sub get_headref ($) {
	my $name = shift;
	my $r = `git rev-parse --verify '$name' 2>/dev/null`;
	return undef unless $? == 0;
	chomp $r;
	return $r;
598 599
}

600 601 602 603 604 605 606 607
my $user_filename_prepend = '';
sub munge_user_filename {
	my $name = shift;
	return File::Spec->file_name_is_absolute($name) ?
		$name :
		$user_filename_prepend . $name;
}

608 609 610
-d $git_tree
	or mkdir($git_tree,0777)
	or die "Could not create $git_tree: $!";
611 612 613 614
if ($git_tree ne '.') {
	$user_filename_prepend = getwd() . '/';
	chdir($git_tree);
}
615

616
my $last_branch = "";
617
my $orig_branch = "";
618
my %branch_date;
619
my $tip_at_start = undef;
620 621 622 623

my $git_dir = $ENV{"GIT_DIR"} || ".git";
$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
$ENV{"GIT_DIR"} = $git_dir;
624 625
my $orig_git_index;
$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
626 627

my %index; # holds filenames of one index per branch
628

J
Junio C Hamano 已提交
629
unless (-d $git_dir) {
630
	system(qw(git init));
631
	die "Cannot init the GIT db at $git_tree: $?\n" if $?;
632
	system(qw(git read-tree --empty));
633 634 635
	die "Cannot init an empty tree: $?\n" if $?;

	$last_branch = $opt_o;
636
	$orig_branch = "";
637
} else {
638
	open(F, "-|", qw(git symbolic-ref HEAD)) or
639
		die "Cannot run git symbolic-ref: $!\n";
P
Pavel Roskin 已提交
640 641 642
	chomp ($last_branch = <F>);
	$last_branch = basename($last_branch);
	close(F);
J
Junio C Hamano 已提交
643
	unless ($last_branch) {
644 645 646 647
		warn "Cannot read the last branch name: $! -- assuming 'master'\n";
		$last_branch = "master";
	}
	$orig_branch = $last_branch;
648
	$tip_at_start = `git rev-parse --verify HEAD`;
649 650

	# Get the last import timestamps
651
	my $fmt = '($ref, $author) = (%(refname), %(author));';
652 653
	my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
	open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
J
Junio C Hamano 已提交
654
	while (defined(my $entry = <H>)) {
655 656
		my ($ref, $author);
		eval($entry) || die "cannot eval refs list: $@";
657
		my ($head) = ($ref =~ m|^$remote/(.*)|);
658 659
		$author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
		$branch_date{$head} = $1;
660
	}
661
	close(H);
662 663 664 665 666
        if (!exists $branch_date{$opt_o}) {
		die "Branch '$opt_o' does not exist.\n".
		       "Either use the correct '-o branch' option,\n".
		       "or import to a new repository.\n";
        }
667 668 669 670 671
}

-d $git_dir
	or die "Could not create git subdir ($git_dir).\n";

672 673 674 675
# now we read (and possibly save) author-info as well
-f "$git_dir/cvs-authors" and
  read_author_info("$git_dir/cvs-authors");
if ($opt_A) {
676
	read_author_info(munge_user_filename($opt_A));
677 678 679
	write_author_info("$git_dir/cvs-authors");
}

680 681 682 683 684
# open .git/cvs-revisions, if requested
open my $revision_map, '>>', "$git_dir/cvs-revisions"
    or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
	if defined $opt_R;

685 686 687 688 689

#
# run cvsps into a file unless we are getting
# it passed as a file via $opt_P
#
690
my $cvspsfile;
691 692 693
unless ($opt_P) {
	print "Running cvsps...\n" if $opt_v;
	my $pid = open(CVSPS,"-|");
694
	my $cvspsfh;
695
	die "Cannot fork: $!\n" unless defined $pid;
J
Junio C Hamano 已提交
696
	unless ($pid) {
697 698 699 700 701 702 703 704 705
		my @opt;
		@opt = split(/,/,$opt_p) if defined $opt_p;
		unshift @opt, '-z', $opt_z if defined $opt_z;
		unshift @opt, '-q'         unless defined $opt_v;
		unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
			push @opt, '--cvs-direct';
		}
		exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
		die "Could not start cvsps: $!\n";
706
	}
707 708
	($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
					  DIR => File::Spec->tmpdir());
709 710
	while (<CVSPS>) {
	    print $cvspsfh $_;
711
	}
712
	close CVSPS;
713
	$? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
714
	close $cvspsfh;
715
} else {
716
	$cvspsfile = munge_user_filename($opt_P);
717 718
}

719
open(CVS, "<$cvspsfile") or die $!;
720

721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
## cvsps output:
#---------------------
#PatchSet 314
#Date: 1999/09/18 13:03:59
#Author: wkoch
#Branch: STABLE-BRANCH-1-0
#Ancestor branch: HEAD
#Tag: (none)
#Log:
#    See ChangeLog: Sat Sep 18 13:03:28 CEST 1999  Werner Koch
#Members:
#	README:1.57->1.57.2.1
#	VERSION:1.96->1.96.2.1
#
#---------------------

my $state = 0;

J
Jeff King 已提交
739 740 741
sub update_index (\@\@) {
	my $old = shift;
	my $new = shift;
742 743
	open(my $fh, '|-', qw(git update-index -z --index-info))
		or die "unable to open git update-index: $!";
744 745
	print $fh
		(map { "0 0000000000000000000000000000000000000000\t$_\0" }
J
Jeff King 已提交
746
			@$old),
747
		(map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
J
Jeff King 已提交
748
			@$new)
749
		or die "unable to write to git update-index: $!";
750
	close $fh
751 752
		or die "unable to write to git update-index: $!";
	$? and die "git update-index reported error: $?";
J
Jeff King 已提交
753
}
754

J
Jeff King 已提交
755
sub write_tree () {
756
	open(my $fh, '-|', qw(git write-tree))
757
		or die "unable to open git write-tree: $!";
J
Jeff King 已提交
758 759 760 761
	chomp(my $tree = <$fh>);
	is_sha1($tree)
		or die "Cannot get tree id ($tree): $!";
	close($fh)
762
		or die "Error running git write-tree: $?\n";
763
	print "Tree ID $tree\n" if $opt_v;
J
Jeff King 已提交
764 765
	return $tree;
}
766

J
Junio C Hamano 已提交
767
my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
768
my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
769 770 771 772

# commits that cvsps cannot place anywhere...
$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;

J
Jeff King 已提交
773
sub commit {
774 775
	if ($branch eq $opt_o && !$index{branch} &&
		!get_headref("$remote/$branch")) {
776
	    # looks like an initial commit
777
	    # use the index primed by git init
778 779
	    $ENV{GIT_INDEX_FILE} = "$git_dir/index";
	    $index{$branch} = "$git_dir/index";
780 781 782 783 784 785 786
	} else {
	    # use an index per branch to speed up
	    # imports of projects with many branches
	    unless ($index{$branch}) {
		$index{$branch} = tmpnam();
		$ENV{GIT_INDEX_FILE} = $index{$branch};
		if ($ancestor) {
787
		    system("git", "read-tree", "$remote/$ancestor");
788
		} else {
789
		    system("git", "read-tree", "$remote/$branch");
790 791 792 793 794 795
		}
		die "read-tree failed: $?\n" if $?;
	    }
	}
        $ENV{GIT_INDEX_FILE} = $index{$branch};

J
Jeff King 已提交
796 797 798
	update_index(@old, @new);
	@old = @new = ();
	my $tree = write_tree();
799
	my $parent = get_headref("$remote/$last_branch");
J
Jeff King 已提交
800 801 802 803 804 805 806 807 808 809
	print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;

	my @commit_args;
	push @commit_args, ("-p", $parent) if $parent;

	# loose detection of merges
	# based on the commit msg
	foreach my $rx (@mergerx) {
		next unless $logmsg =~ $rx && $1;
		my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
810
		if (my $sha1 = get_headref("$remote/$mparent")) {
811
			push @commit_args, '-p', "$remote/$mparent";
J
Jeff King 已提交
812
			print "Merge parent branch: $mparent\n" if $opt_v;
813
		}
814
	}
J
Jeff King 已提交
815 816

	my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
817 818 819 820 821 822
	$ENV{GIT_AUTHOR_NAME} = $author_name;
	$ENV{GIT_AUTHOR_EMAIL} = $author_email;
	$ENV{GIT_AUTHOR_DATE} = $commit_date;
	$ENV{GIT_COMMITTER_NAME} = $author_name;
	$ENV{GIT_COMMITTER_EMAIL} = $author_email;
	$ENV{GIT_COMMITTER_DATE} = $commit_date;
J
Jeff King 已提交
823
	my $pid = open2(my $commit_read, my $commit_write,
824
		'git', 'commit-tree', $tree, @commit_args);
825 826 827 828 829

	# compatibility with git2cvs
	substr($logmsg,32767) = "" if length($logmsg) > 32767;
	$logmsg =~ s/[\s\n]+\z//;

830 831 832
	if (@skipped) {
	    $logmsg .= "\n\n\nSKIPPED:\n\t";
	    $logmsg .= join("\n\t", @skipped) . "\n";
M
Martin Langhoff 已提交
833
	    @skipped = ();
834 835
	}

J
Jeff King 已提交
836
	print($commit_write "$logmsg\n") && close($commit_write)
837
		or die "Error writing to git commit-tree: $!\n";
M
Matthias Urlichs 已提交
838

J
Jeff King 已提交
839 840 841
	print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
	chomp(my $cid = <$commit_read>);
	is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
842
	print "Commit ID $cid\n" if $opt_v;
J
Jeff King 已提交
843
	close($commit_read);
M
Matthias Urlichs 已提交
844 845

	waitpid($pid,0);
846
	die "Error running git commit-tree: $?\n" if $?;
847

848
	system('git' , 'update-ref', "$remote/$branch", $cid) == 0
849 850
		or die "Cannot write branch $branch for update: $!\n";

851 852 853 854 855
	if ($revision_map) {
		print $revision_map "@$_ $cid\n" for @commit_revisions;
	}
	@commit_revisions = ();

J
Junio C Hamano 已提交
856 857
	if ($tag) {
	        my ($xtag) = $tag;
858 859
		$xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
		$xtag =~ tr/_/\./ if ( $opt_u );
860
		$xtag =~ s/[\/]/$opt_s/g;
861
		$xtag =~ s/\[//g;
J
Junio C Hamano 已提交
862

863
		system('git' , 'tag', '-f', $xtag, $cid) == 0
864 865 866
			or die "Cannot create tag $xtag: $!\n";

		print "Created tag '$xtag' on '$branch'\n" if $opt_v;
867 868 869
	}
};

870
my $commitcount = 1;
J
Junio C Hamano 已提交
871
while (<CVS>) {
872
	chomp;
J
Junio C Hamano 已提交
873
	if ($state == 0 and /^-+$/) {
874
		$state = 1;
J
Junio C Hamano 已提交
875
	} elsif ($state == 0) {
876 877
		$state = 1;
		redo;
J
Junio C Hamano 已提交
878
	} elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
879 880
		$patchset = 0+$_;
		$state=2;
J
Junio C Hamano 已提交
881
	} elsif ($state == 2 and s/^Date:\s+//) {
882
		$date = pdate($_);
J
Junio C Hamano 已提交
883
		unless ($date) {
884 885 886 887 888
			print STDERR "Could not parse date: $_\n";
			$state=0;
			next;
		}
		$state=3;
J
Junio C Hamano 已提交
889
	} elsif ($state == 3 and s/^Author:\s+//) {
890
		s/\s+$//;
891 892
		if (/^(.*?)\s+<(.*)>/) {
		    ($author_name, $author_email) = ($1, $2);
893 894 895
		} elsif ($conv_author_name{$_}) {
			$author_name = $conv_author_name{$_};
			$author_email = $conv_author_email{$_};
896 897 898
		} else {
		    $author_name = $author_email = $_;
		}
899
		$state = 4;
J
Junio C Hamano 已提交
900
	} elsif ($state == 4 and s/^Branch:\s+//) {
901
		s/\s+$//;
902
		tr/_/\./ if ( $opt_u );
903
		s/[\/]/$opt_s/g;
904 905
		$branch = $_;
		$state = 5;
J
Junio C Hamano 已提交
906
	} elsif ($state == 5 and s/^Ancestor branch:\s+//) {
907 908
		s/\s+$//;
		$ancestor = $_;
909
		$ancestor = $opt_o if $ancestor eq "HEAD";
910
		$state = 6;
J
Junio C Hamano 已提交
911
	} elsif ($state == 5) {
912 913 914
		$ancestor = undef;
		$state = 6;
		redo;
J
Junio C Hamano 已提交
915
	} elsif ($state == 6 and s/^Tag:\s+//) {
916
		s/\s+$//;
J
Junio C Hamano 已提交
917
		if ($_ eq "(none)") {
918 919 920 921 922
			$tag = undef;
		} else {
			$tag = $_;
		}
		$state = 7;
J
Junio C Hamano 已提交
923
	} elsif ($state == 7 and /^Log:/) {
924 925
		$logmsg = "";
		$state = 8;
J
Junio C Hamano 已提交
926
	} elsif ($state == 8 and /^Members:/) {
927
		$branch = $opt_o if $branch eq "HEAD";
J
Junio C Hamano 已提交
928
		if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
929
			# skip
930
			print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
931 932 933
			$state = 11;
			next;
		}
934
		if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
935
			# skip if the commit is too recent
936
			# given that the cvsps default fuzz is 300s, we give ourselves another
937 938 939 940 941 942
			# 300s just in case -- this also prevents skipping commits
			# due to server clock drift
			print "skip patchset $patchset: $date too recent\n" if $opt_v;
			$state = 11;
			next;
		}
943 944 945 946 947
		if (exists $ignorebranch{$branch}) {
			print STDERR "Skipping $branch\n";
			$state = 11;
			next;
		}
J
Junio C Hamano 已提交
948 949
		if ($ancestor) {
			if ($ancestor eq $branch) {
950 951 952
				print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
				$ancestor = $opt_o;
			}
953
			if (defined get_headref("$remote/$branch")) {
954 955 956 957
				print STDERR "Branch $branch already exists!\n";
				$state=11;
				next;
			}
958 959
			my $id = get_headref("$remote/$ancestor");
			if (!$id) {
960
				print STDERR "Branch $ancestor does not exist!\n";
961
				$ignorebranch{$branch} = 1;
962 963 964
				$state=11;
				next;
			}
965 966 967 968 969

			system(qw(git update-ref -m cvsimport),
				"$remote/$branch", $id);
			if($? != 0) {
				print STDERR "Could not create branch $branch\n";
970
				$ignorebranch{$branch} = 1;
971 972 973 974
				$state=11;
				next;
			}
		}
975
		$last_branch = $branch if $branch ne $last_branch;
976
		$state = 9;
J
Junio C Hamano 已提交
977
	} elsif ($state == 8) {
978
		$logmsg .= "$_\n";
J
Junio C Hamano 已提交
979
	} elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
980
#	VERSION:1.96->1.96.2.1
M
Matthias Urlichs 已提交
981
		my $init = ($2 eq "INITIAL");
982
		my $fn = $1;
M
Matthias Urlichs 已提交
983 984
		my $rev = $3;
		$fn =~ s#^/+##;
985 986 987 988 989
		if ($opt_S && $fn =~ m/$opt_S/) {
		    print "SKIPPING $fn v $rev\n";
		    push(@skipped, $fn);
		    next;
		}
990
		push @commit_revisions, [$fn, $rev];
991
		print "Fetching $fn   v $rev\n" if $opt_v;
992
		my ($tmpname, $size) = $cvs->file($fn,$rev);
J
Junio C Hamano 已提交
993
		if ($size == -1) {
994 995 996 997
			push(@old,$fn);
			print "Drop $fn\n" if $opt_v;
		} else {
			print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
998 999 1000
			my $pid = open(my $F, '-|');
			die $! unless defined $pid;
			if (!$pid) {
1001
			    exec("git", "hash-object", "-w", $tmpname)
1002
				or die "Cannot create object: $!\n";
1003
			}
1004 1005 1006 1007 1008 1009
			my $sha = <$F>;
			chomp $sha;
			close $F;
			my $mode = pmode($cvs->{'mode'});
			push(@new,[$mode, $sha, $fn]); # may be resurrected!
		}
1010
		unlink($tmpname);
J
Junio C Hamano 已提交
1011
	} elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
M
Matthias Urlichs 已提交
1012
		my $fn = $1;
1013
		my $rev = $2;
M
Matthias Urlichs 已提交
1014
		$fn =~ s#^/+##;
1015
		push @commit_revisions, [$fn, $rev];
M
Matthias Urlichs 已提交
1016
		push(@old,$fn);
1017
		print "Delete $fn\n" if $opt_v;
J
Junio C Hamano 已提交
1018
	} elsif ($state == 9 and /^\s*$/) {
1019
		$state = 10;
J
Junio C Hamano 已提交
1020
	} elsif (($state == 9 or $state == 10) and /^-+$/) {
1021 1022
		$commitcount++;
		if ($opt_L && $commitcount > $opt_L) {
1023 1024
			last;
		}
1025
		commit();
1026
		if (($commitcount & 1023) == 0) {
1027
			system(qw(git repack -a -d));
1028
		}
1029
		$state = 1;
J
Junio C Hamano 已提交
1030
	} elsif ($state == 11 and /^-+$/) {
1031
		$state = 1;
J
Junio C Hamano 已提交
1032
	} elsif (/^-+$/) { # end of unknown-line processing
1033
		$state = 1;
J
Junio C Hamano 已提交
1034
	} elsif ($state != 11) { # ignore stuff when skipping
1035
		print STDERR "* UNKNOWN LINE * $_\n";
1036 1037
	}
}
1038
commit() if $branch and $state != 11;
1039

1040 1041 1042 1043
unless ($opt_P) {
	unlink($cvspsfile);
}

1044 1045 1046
# The heuristic of repacking every 1024 commits can leave a
# lot of unpacked data.  If there is more than 1MB worth of
# not-packed objects, repack once more.
1047
my $line = `git count-objects`;
1048 1049 1050
if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
  my ($n_objects, $kb) = ($1, $2);
  1024 < $kb
1051
    and system(qw(git repack -a -d));
1052 1053
}

1054
foreach my $git_index (values %index) {
1055
    if ($git_index ne "$git_dir/index") {
1056 1057
	unlink($git_index);
    }
1058
}
1059

1060 1061 1062 1063 1064 1065
if (defined $orig_git_index) {
	$ENV{GIT_INDEX_FILE} = $orig_git_index;
} else {
	delete $ENV{GIT_INDEX_FILE};
}

1066
# Now switch back to the branch we were in before all of this happened
J
Junio C Hamano 已提交
1067
if ($orig_branch) {
1068 1069 1070 1071
	print "DONE.\n" if $opt_v;
	if ($opt_i) {
		exit 0;
	}
1072
	my $tip_at_end = `git rev-parse --verify HEAD`;
1073
	if ($tip_at_start ne $tip_at_end) {
1074
		for ($tip_at_start, $tip_at_end) { chomp; }
1075
		print "Fetched into the current branch.\n" if $opt_v;
1076
		system(qw(git read-tree -u -m),
1077 1078 1079 1080
		       $tip_at_start, $tip_at_end);
		die "Fast-forward update failed: $?\n" if $?;
	}
	else {
1081
		system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1082 1083
		die "Could not merge $opt_o into the current branch.\n" if $?;
	}
1084 1085 1086
} else {
	$orig_branch = "master";
	print "DONE; creating $orig_branch branch\n" if $opt_v;
1087
	system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1088
		unless defined get_headref('refs/heads/master');
1089
	system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1090
		if ($opt_r && $opt_o ne 'HEAD');
1091
	system('git', 'update-ref', 'HEAD', "$orig_branch");
1092
	unless ($opt_i) {
1093
		system(qw(git checkout -f));
1094 1095
		die "checkout failed: $?\n" if $?;
	}
1096
}