From 1ebfaaad6545a130839ecfebcd586e41c49e9e5a Mon Sep 17 00:00:00 2001 From: Steve McIntyre Date: Tue, 12 Jun 2018 23:43:44 +0100 Subject: YA attempt to fix up our list of git hashes for the webwml repo The bugs I've been seeing were not consistent, which was annoying... Listing all merge commits left us with phantom commits showing up due to merge noise. Leaving out all the merge commits caused us to miss a load of the older commits. Eureka! The problem *seems* to be the way that the old CVS commits have been converted into git commits. Lots of them show up as merge commits due to the way cvs2git worked. So we *do* need to include merge commits there. However, we *don't* want to include merge commits for the stuff that's been committed directly into git after the transition. So, there's a hacky solution which actually seems to work well! Call "git log" twice: * Once on the newer commits, without the merge commits * Once on the older commits, including merge commits I've encoded the knowledge of the point where the conversion happened so we can do this. The results look good, and we seem to have a clean set of commit hashes all over our history. There are a few translated files that I'll need to fix after this change - places where the translators were misled into updating their translation-check headers due to the old broken behaviour. Sorry! :-/ --- Perl/Local/VCS_git.pm | 79 ++++++++++++++++++--------------------------------- 1 file changed, 27 insertions(+), 52 deletions(-) (limited to 'Perl') diff --git a/Perl/Local/VCS_git.pm b/Perl/Local/VCS_git.pm index fb4049630c2..9c76c0ba969 100644 --- a/Perl/Local/VCS_git.pm +++ b/Perl/Local/VCS_git.pm @@ -63,6 +63,15 @@ my $cache_db = ".git-revs-cache.db"; my $cache_lock = ".git-revs-cache.lock"; my $git_index = ".git/index"; +# The timestamp where we finished the CVS to git transition. Before +# this date, we need to include merge commits in our output (due to +# the way the transition code worked). After this date, we do *not* +# want to include merge commits. Ugh... :-/ +my $timestamp_cvs2git = 1527734365; +my $git_log_common = 'git log --no-renames --name-only --numstat --format=format:"%H %ct"'; +my $git_log_old = "$git_log_common --before $timestamp_cvs2git -m --first-parent"; +my $git_log_new = "$git_log_common --since $timestamp_cvs2git --no-merges"; + use strict; use warnings; @@ -189,28 +198,17 @@ sub cache_file { _safe_chdir($topdir, $startdir) or die "Can't chdir to $topdir: $!\n"; my (@commits); - # We want to see all the commits, but we don't want the noise from - # merges. Track what commits we've seen so we can ignore the - # "right-hand" commit files - my %commits_seen; - open (GITLOG, "git log --no-renames -m --name-only --numstat --format=format:\"%H %ct\" -- $file |") or die "Can't fork git log: $!\n"; + # Complication of the cvs2git transition. We want merges from before that, but not since. + open (GITLOG, "($git_log_new -- $file ; $git_log_old -- $file) |") or die "Can't fork git log: $!\n"; my ($cmt_date, $cmt_rev); while (my $line = ) { chomp $line; if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { $cmt_rev = $1; $cmt_date = $2; - if ($commits_seen{$cmt_rev}) { - $commits_seen{$cmt_rev}++; - $self->_debug("Seen $cmt_rev again, ignoring"); - } else { - $commits_seen{$cmt_rev} = 1; - } next; } elsif ($line =~ m{^$file$}) { - if ($commits_seen{$cmt_rev} and $commits_seen{$cmt_rev} == 1) { - $self->_add_cache_entry($file, $cmt_date, $cmt_rev); - } + $self->_add_cache_entry($file, $cmt_date, $cmt_rev); } } close GITLOG; @@ -333,31 +331,19 @@ sub cache_repo { # print __LINE__ . ": " . Dumper(%cache); my (@commits); - # We want to see all the commits, but we don't want the noise from - # merges. Track what commits we've seen so we can ignore the - # "right-hand" commit files - my %commits_seen; my $count = 0; - open (GITLOG, "git log --no-renames -m --name-only --numstat --format=format:\"%H %ct\" |") or die "Can't fork git log: $!\n"; + open (GITLOG, "($git_log_new ; $git_log_old) |") or die "Can't fork git log: $!\n"; my ($cmt_date, $cmt_rev); while (my $line = ) { chomp $line; if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { $cmt_rev = $1; $cmt_date = $2; - if ($commits_seen{$cmt_rev}) { - $commits_seen{$cmt_rev}++; - $self->_debug("Seen $cmt_rev again, ignoring"); - } else { - $commits_seen{$cmt_rev} = 1; - } next; } elsif ($line =~ m{^(\S+)$}) { my $file = $1; - if ($commits_seen{$cmt_rev} and $commits_seen{$cmt_rev} == 1) { - $self->_add_cache_entry($file, $cmt_date, $cmt_rev); - $count++; - } + $self->_add_cache_entry($file, $cmt_date, $cmt_rev); + $count++; } } close GITLOG; @@ -718,13 +704,9 @@ sub path_info } } } else { - # We want to see all the commits, but we don't want the noise from - # merges. Track what commits we've seen so we can ignore the - # "right-hand" commit files - my %commits_seen; # We don't, so we need to talk to git. (2a above) - open (GITLOG, "git log --no-renames -m --name-only --numstat --format=format:\"%H %ct\" $dir|") - or die "Failed to fork git log: $!\n"; + open (GITLOG, "($git_log_new $dir; $git_log_old $dir) |") + or die "Can't fork git log: $!\n"; my $cmt_date; my $cmt_rev; my $file; @@ -733,25 +715,17 @@ sub path_info if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { $cmt_rev = $1; $cmt_date = $2; - if ($commits_seen{$cmt_rev}) { - $commits_seen{$cmt_rev}++; - $self->_debug("Seen $cmt_rev again, ignoring"); - } else { - $commits_seen{$cmt_rev} = 1; - } next; } elsif ($line =~ m{^$dir/(\S+)$}) { - if ($commits_seen{$cmt_rev} and $commits_seen{$cmt_rev} == 1) { - $file = $1; - # Only store information if: - # We want this file, and - # We don't have data for it yet (i.e. only show - # the most recent version of a file) - if ($files_wanted{"$dir/$file"} and not defined $pathinfo{$file}) { - $pathinfo{$file}{'type'} = _typeoffile("$dir/$file"); - $pathinfo{$file}{'cmt_date'} = $cmt_date; - $pathinfo{$file}{'cmt_rev'} = $cmt_rev; - } + $file = $1; + # Only store information if: + # We want this file, and + # We don't have data for it yet (i.e. only show + # the most recent version of a file) + if ($files_wanted{"$dir/$file"} and not defined $pathinfo{$file}) { + $pathinfo{$file}{'type'} = _typeoffile("$dir/$file"); + $pathinfo{$file}{'cmt_date'} = $cmt_date; + $pathinfo{$file}{'cmt_rev'} = $cmt_rev; } } } @@ -1252,6 +1226,7 @@ sub next_revision my $reldir = abs2rel($indir, cwd); my $relfile = catdir($reldir, $basefile); $self->_debug( "next_revision(): looking for details of file $relfile, indir $indir"); + $self->_debug( "rev1 $rev1, move $move"); my @commits = $self->_grab_commits($relfile); # print Dumper(@commits); -- cgit v1.2.3