From afdb29732ca24242f1a2e94f98e90034f9925f46 Mon Sep 17 00:00:00 2001 From: Steve McIntyre <93sam> Date: Wed, 30 May 2018 02:26:19 +0000 Subject: Major updates to perl scripts Add new git backend in VCS_git.pm Switch from an old-style set of function calls to an OO API. This allows for initialisation and some state to be kept in the VCS_git.pm module - namely a per-file cache of commit hashes for a massive performance boost when doing lots of lookups. Extend the API with 2 new utility functions: * get_oldest_revision() * next_revision() Extended the vcs_cmp_rev() function to take a filename too. Add a test harness to validate the git and cvs backends. Add switch_to_git_translations.pl to walk the tree and switch from cvs revisions to git commit hashes in translation-check headers. Change all of our local scripts to use the new Local::VCS frontend *where it makes sense*. Some scripts will behave slightly differently, as the new world can't exactly match the old behaviour. CVS version numbers check_desc_trans.pl: 1.9 -> 1.10 check_trans.pl: 1.93 -> 1.94 copypage.pl: 1.42 -> 1.43 karma.pl: 1.6 -> 1.7 remove_stale.pl: 1.22 -> 1.23 smart_change.pl: 1.8 -> 1.9 stattrans.pl: 1.127 -> 1.128 switch_to_git_translations.pl: INITIAL -> 1.1 touch_translations.pl: 1.9 -> 1.10 vcs-test.pl: INITIAL -> 1.1 Perl/Local/Util.pm: 1.4 -> 1.5 Perl/Local/VCS.pm: 1.3 -> 1.4 Perl/Local/VCS_CVS.pm: 1.13 -> 1.14 Perl/Local/VCS_git.pm: 1.12 -> 1.13 Perl/Webwml/Langs.pm: 1.5 -> 1.6 Perl/Webwml/TransIgnore.pm: 1.3 -> 1.4 --- Perl/Local/Util.pm | 3 +- Perl/Local/VCS.pm | 15 + Perl/Local/VCS_CVS.pm | 439 +++++++++---------------- Perl/Local/VCS_git.pm | 780 ++++++++++++++++++++++++++++++++------------- Perl/Webwml/Langs.pm | 7 +- Perl/Webwml/TransIgnore.pm | 5 +- 6 files changed, 729 insertions(+), 520 deletions(-) (limited to 'Perl') diff --git a/Perl/Local/Util.pm b/Perl/Local/Util.pm index 8b11106e8d1..4277ef78964 100644 --- a/Perl/Local/Util.pm +++ b/Perl/Local/Util.pm @@ -29,8 +29,7 @@ use warnings; BEGIN { use base qw( Exporter ); - - our $VERSION = sprintf "%s", q$Revision$ =~ /([0-9.]+)/; + our $VERSION = "1.2.3"; our @EXPORT_OK = qw( uniq read_file ); our %EXPORT_TAGS = ( 'all' => [@EXPORT_OK] ); } diff --git a/Perl/Local/VCS.pm b/Perl/Local/VCS.pm index 59cd609bef1..802688525a4 100644 --- a/Perl/Local/VCS.pm +++ b/Perl/Local/VCS.pm @@ -39,4 +39,19 @@ sub import } } +sub new +{ + if ( -d 'CVS' ) + { + require Local::VCS_CVS; + return Local::VCS_CVS->new(@_); + } + # fall back to git + else + { + require Local::VCS_git; + return Local::VCS_git->new(@_); + } +} + 42; diff --git a/Perl/Local/VCS_CVS.pm b/Perl/Local/VCS_CVS.pm index ef93c482947..890f544c3a6 100644 --- a/Perl/Local/VCS_CVS.pm +++ b/Perl/Local/VCS_CVS.pm @@ -15,7 +15,8 @@ Local::VCS_CVS - generic wrapper around version control systems -- CVS version use Local::VCS_CVS; use Data::Dumper; - my %info = vcs_path_info( '.', recursive => 1, verbose => 1 ); + my $VCS = Local::VCS->new(); + my %info = $VCS->path_info( '.', recursive => 1, verbose => 1 ); print Dumper($info{'foo.wml'}); my %info2 = svn_file_info( 'foo.wml' ); @@ -52,18 +53,50 @@ use warnings; BEGIN { use base qw( Exporter ); - our $VERSION = sprintf "%s", q$Revision$ =~ /([0-9.]+)/; + our $VERSION = "1.14"; our @EXPORT_OK = qw( - &vcs_cmp_rev &vcs_count_changes - &vcs_get_topdir - &vcs_path_info &vcs_file_info - &vcs_get_log &vcs_get_diff - &vcs_get_file + &new ); our %EXPORT_TAGS = ( 'all' => [@EXPORT_OK] ); } +=item new + +This is the constructor. + + my $VCS = Local::VCS->new(); + +=cut + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = { + CACHE => {}, + }; + bless ($self, $class); + return $self; +} + +sub cache_file { + my $self = shift; + my $file = shift; + + if ($self->{CACHE}{"$file"}) { + print "$file is already cached...\n"; + return; + } + print "Adding $file to cache\n"; + $self->{CACHE}{"$file"} = 1; + return; +} + +sub cache_repo { + my $self = shift; + # Does nothing here... +} + # handling debugging my $DEBUG = 0; sub _debug @@ -78,17 +111,19 @@ sub _debug sub _typeoffile; -=item vcs_cmp_rev +=item cmp_rev -Compare two revision strings. +Compare two revision strings for a given file -Takes two revision strings as arguments, and -returns 1 if the first one is largest, --1 if the second one is largest, +Takes the file path and two revision strings as arguments, and +returns 1 if the first one is newer, +-1 if the second one is newer, 0 if they are equal =cut -sub vcs_cmp_rev +sub cmp_rev { + my $self = shift; + my $file = shift || ""; # unused here my $a = shift || '0'; my $b = shift || '0'; @@ -117,7 +152,7 @@ sub vcs_cmp_rev croak "Internal error: this should never be executed"; } -=item vcs_count_changes +=item count_changes Return the number of changes to particular file between two revisions @@ -131,8 +166,9 @@ Example use: =cut -sub vcs_count_changes +sub count_changes { + my $self = shift; my $file = shift or return undef; my $rev1 = shift || '1.1'; my $rev2 = shift || 'HEAD'; @@ -142,7 +178,7 @@ sub vcs_count_changes # find the version number of HEAD, if it was specified if ( $rev2 eq 'HEAD' ) { - my %info = vcs_file_info( $file ); + my %info = $self->file_info( $file ); return -1 if not %info; $rev2 = $info{'cmt_rev'}; } @@ -167,7 +203,7 @@ sub vcs_count_changes } -=item vcs_path_info +=item path_info Return CVS information and status about a tree of files. @@ -190,7 +226,7 @@ Optional remaining arguments are a hash array with options: Example uses: - my %info1 = $vcs_path_info( 'src' ); + my %info1 = $path_info( 'src' ); my %info2 = $readinfo( 'src', recursive => 1 ); my %info3 = $readinfo( 'src', recursive => 1, match_pat => '\.c$' ); @@ -198,8 +234,9 @@ Example uses: # todo: verbose -sub vcs_path_info +sub path_info { + my $self = shift; my ($dir,%options) = @_; croak("No file or directory specified") unless $dir; @@ -245,7 +282,7 @@ sub vcs_path_info return %data; } -=item vcs_file_info +=item file_info Return VCS information and status about a single file @@ -260,12 +297,13 @@ the specified file: Example use: - my %info = $vcs_file_info( 'foo.wml' ); + my %info = $file_info( 'foo.wml' ); =cut -sub vcs_file_info +sub file_info { + my $self = shift; my $file = shift or carp("No file specified"); my %options = @_; @@ -274,7 +312,7 @@ sub vcs_file_info my ($basename,$dirname) = fileparse( rel2abs $file ); # note: for some weird reason, the file is returned as '.' - my %info = vcs_path_info( $dirname, 'recursive' => 0 ); + my %info = $self->path_info( $dirname, 'recursive' => 0 ); if ( not ( exists $info{$basename} and $info{$basename} ) ) { @@ -285,7 +323,7 @@ sub vcs_file_info return %{ $info{$basename} }; } -=item vcs_get_log +=item get_log Return the log info about a specified file @@ -295,12 +333,13 @@ of the log entries Example use: - my @log_entries = vcs_get_log( 'foo.wml' ); + my @log_entries = get_log( 'foo.wml' ); =cut -sub vcs_get_log +sub get_log { + my $self = shift; my $file = shift or return; my $rev1 = shift || '0'; my $rev2 = shift || ''; @@ -353,7 +392,7 @@ sub vcs_get_log return reverse @logdata; } -=item vcs_get_diff +=item get_diff Returns a hash of (filename,diff) pairs containing the unified diff between two version of a (number of) files. @@ -365,14 +404,15 @@ modified) version is diffed against the latest checked in version. Example use: - my %diffs = vcs_get_diff( 'foo.wml', '1.4', '1.17' ); - my %diffs = vcs_get_diff( 'bla.wml', '1.8' ); - my %diffs = vcs_get_diff( 'bas.wml' ); + my %diffs = get_diff( 'foo.wml', '1.4', '1.17' ); + my %diffs = get_diff( 'bla.wml', '1.8' ); + my %diffs = get_diff( 'bas.wml' ); =cut -sub vcs_get_diff +sub get_diff { + my $self = shift; my $file = shift or return; my $rev1 = shift; my $rev2 = shift; @@ -388,6 +428,7 @@ sub vcs_get_diff # set the record separator for cvs diff output local $/ = "\n" . ('=' x 67) . "\n"; +# print "$command\n"; open( my $cvs, '-|', $command ) or croak("Couldn't run `$command': $!"); @@ -401,17 +442,17 @@ sub vcs_get_diff $record =~ s{ $/ \n? \Z }{}msx; # remove the "Index:" line from the end of the record - $record =~ s{ ^Index: [^\n]+ \n+ \Z }{}msx; +# $record =~ s{ ^Index: [^\n]+ \n+ \Z }{}msx; - # remove the first four lines - $record =~ s{ \A (?: .*? \n ){4} }{}msx; + # remove the first three lines + $record =~ s{ \A (?: .*? \n ){3} }{}msx; - # get the file name - if ( not $record =~ m{ \A --- \s+ ([^\t]+) \t }msx ) - { - croak("Parse error in output of `$command'"); - } - my $file = $1; +# # get the file name +# if ( not $record =~ m{ \A --- \s+ ([^\t]+) \t }msx ) +# { +# croak("Parse error in output of `$command'"); +# } +# my $file = $1; $data{$file} = $record; } @@ -433,7 +474,7 @@ sub _get_repository return $repo; } -=item vcs_get_file +=item get_file Return a particular revision of a file @@ -445,12 +486,13 @@ and returns it (without writing anything in the current checked-out tree) Example use: - my $text = vcs_get_file( 'foo.c', '1.12' ); + my $text = get_file( 'foo.c', '1.12' ); =cut -sub vcs_get_file +sub get_file { + my $self = shift; my $file = shift or croak("No file specified"); my $rev = shift or croak("No revision specified"); @@ -478,7 +520,67 @@ sub vcs_get_file return $text; } -=item vcs_get_topdir +=item get_oldest_revision + +Return the version of the oldest version of a file + +The first argument is a name of a file. + +This function finds the oldest revision of a file that is known in the +repository and returns it. + +Example use: + + my $rev = get_oldest_revision( 'foo.c'); + +=cut + +sub get_oldest_revision +{ + my $self = shift; + my $file = shift or croak("No file specified"); + + croak( "No such file: $file" ) unless -f $file; + + return '1.1'; # earliest possible revision of any file, easy! +} + +=item next_revision + +Given a file path and a current revision of that file, move backwards +or forwards through the commit history and return a related revision. + +Takes three arguments: the file path, the start revision and an +integer to specify which direction to move *and* how far. A negative +number specifies backwards in history, a positive number specifies +forwards. + +Example use: + + my $rev = next_revision( 'foo.c', '72f6700577c79e6d284fbeac44366f6aa357d56d', -1); + +Returns the requested revision if it exists, otherwise *undef* + +=cut +sub next_revision +{ + # For the file we're looking at, we can easily generate an + # array (list) of all the commit hashes. Then we can see where + # in that list the specified revision lies. + # + # FIXME: Only deals with simple moves backwards/forwards along the trunk + + my $self = shift; + my $file = shift or return undef; + my $rev1 = shift or return undef; + my $move = shift or return undef; + + my $newrev = $rev1; + $newrev =~ s/(\d+)$/($1 + $move)/e; + return $newrev; +} + +=item get_topdir Return the top dir of the webwml repository @@ -487,12 +589,13 @@ If it is not specified, by default the current directory is used. Example use: - my $dir = vcs_get_topdir( 'foo.c' ); + my $dir = get_topdir( 'foo.c' ); =cut -sub vcs_get_topdir +sub get_topdir { + my $self = shift; my $file = shift || '.'; my $cvs = Local::Cvsinfo->new(); @@ -552,247 +655,3 @@ Free Software Foundation. # ignore for now, I will get rid of it as the rewrite progresses # -- Bas. __END__ - - - -=item svn_diff - -Return diff for the specified file - -The first argument is a name of a file. -The second argument is the revision against which to take the diff (use undef for HEAD) -If a thrird argument is present, it signifies that hte use want a diff between -the version specified in the second agument and the one in the third argument - -Example use: - - # get diff of local changes against head - my $diff1 = $svn_diff( 'foo.c' ); - # get diff of local changes against revision 12 - my $diff2 = $svn_diff( 'foo.c', 'r12' ); - # get diff between version 11 and 12 of the file - my $diff1 = $svn_diff( 'foo.c', 'r11', 'r12' ); - -=cut - -sub svn_diff -{ - my $file = shift or carp("No file specified"); - my $rev1 = shift or carp("No orig revision specified"); - my $rev2 = shift or carp("No target revision specified"); - - carp( "No such file: $file" ) unless -e $file; - - # intitalize SVN client - my $ctx = SVN::Client->new(); - - # create twoo filehandles for output and error streams - - # TODO: this doesn't work (bug in SVN::Client?) - my ($out,$err); - #open ( my $fd_out, '>', \$out ) or croak("Couldn't open \\\$out"); - #open ( my $fd_err, '>', \$err ) or croak("Couldn't open \\\$err"); - - open ( my $fd_out, '+>', undef ) or croak("Couldn't open anonymous output"); - open ( my $fd_err, '+>', undef ) or croak("Couldn't open anonymous error"); - - $ctx->diff( - [], # options to diff (-u is default) - $file, $rev1, # first file - $file, $rev2, # second file - 1, # recursiveness - 1, # don't bother with ancestors - 0, # do diff deleted files - $fd_out, # output file - $fd_err, # error output file - ); - - # read the stuff from the files - seek( $fd_out, 0, SEEK_SET ); - seek( $fd_err, 0, SEEK_SET ); - { - local $/; - $out = <$fd_out>; - $err = <$fd_err>; - } - - # done with the files - close( $fd_out ); - close( $fd_err ); - - # croak on error - croak( $err ) if $err; - - # return the diff - return $out; -} - - -=item svn_log - -Return the log entries of a particular file - -The first argument is a name of a file. -The (optional) second and third argument specify the revision range - -This function retrieves the log entry/entries of the specified revision(s) for -the specified file. If only a file name is given, the entire history is -returned; if only 1 revision is specified, the log entrie of that particular -revision is returned; if two revisions are specified, all log entries in -between are returned. - -Example use: - - my @log = svn_log( 'foo.c' ); - my @log = svn_log( 'foo.c', 'r42' ); - my @log = svn_log( 'foo.c', 'r42', 'r112' ); - my @log = svn_log( 'foo.c', 'r42', 'HEAD' ); - -=cut - -my @_log_collection; - -sub _log_receiver -{ - my $paths = shift; # NOTE: not used - my $rev = shift; - my $author = shift; - my $date = str2time( shift ); - my $msg = shift; - - push @_log_collection, { - 'rev' => $rev, - 'author' => $author, - 'date' => $date, - 'message' => $msg, - }; - -} - -sub svn_log -{ - my $file = shift or carp("No file specified"); - my $rev1 = shift || '0'; - my $rev2 = shift || 'HEAD'; - - carp( "No such file: $file" ) unless -e $file; - - # clear log - @_log_collection = (); - - # intitialize SVN client - my $ctx = SVN::Client->new(); - - eval { - $ctx->log( - $file, - $rev1, - $rev2, - 0, # determine which files were changed on each revision? - 0, # don't traverse history of copies? - \&_log_receiver - ); - }; - carp($@) if $@; - - - # return a copy of the logs - return @_log_collection; -} - -=item svn_get_info - -Return info about the (local) Subversion repository - -The first argument is a name of a checked-out file or directory. - -Example use: - - my %info = svn_info( 'foo.c' ); - -=cut - -my %_info_data; - -sub _info_receiver -{ - my( $path, $info, $pool ) = @_; - - # return if the info is already known - return if %_info_data; - - %_info_data = ( - 'url' => $info->URL(), - 'rev' => $info->rev(), - 'kind' => $info->kind(), - 'root' => $info->repos_root_URL(), - 'uuid' => $info->repos_UUID(), - 'last_changed_rev' => $info->last_changed_rev(), - 'last_changed_date' => $info->last_changed_date(), - 'last_changed_author' => $info->last_changed_author(), - ); -}; - -sub svn_get_info -{ - my $file = shift or carp("No file specified"); - - $file = rel2abs( $file ); - - # intitialize SVN client - my $ctx = SVN::Client->new(); - - # reset the info hash - %_info_data = (); - - eval { - $ctx->info( - $file, - undef, # no revision info, so... - undef, # ...only use local info - \&_info_receiver, - 0, # no, don't recurse - ); - }; - carp($@) if $@; - - return %_info_data; -} - - -=item svn_get_depth - -Find how deep we are inside the repository - -The first argument is a name of a checked-out file or directory. - -Example use: - - my $depth = svn_get_topdir( 'foo.c' ); - -=cut - -sub svn_get_depth -{ - my $file = shift or carp("No file specified"); - - - my %info = svn_get_info( $file ); - my $top = $info{'url'}; - my $root = $info{'root'}; - - # if $file really is a file (not a dir), only look at the directory part of - # the filename - $top = dirname( $top ) if $info{'kind'} == 1; - - # remove the root from the start of url to get a top dir - $top =~ s{^\Q$root\E}{}; - $top =~ s{/+}{/}; - - # count the number of elements in the path - my $num = scalar splitdir( $top ); - - # minus 1, because $top starts with a '/', and thus splitdir adds an empty - # field at the beginning - return $num - 1; -} diff --git a/Perl/Local/VCS_git.pm b/Perl/Local/VCS_git.pm index cfa5affef03..410bf81c785 100644 --- a/Perl/Local/VCS_git.pm +++ b/Perl/Local/VCS_git.pm @@ -1,10 +1,10 @@ #!/usr/bin/perl -## Copyright (C) 2008 Bas Zoetekouw +## Copyright (C) 2018 Steve McIntyre <93sam@debian.org> ## -## This program is free software; you can redistribute it and/or modify it -## under the terms of version 2 of the GNU General Public License as published -## by the Free Software Foundation. +## This program is free software; you can redistribute it and/or modify it +## under the terms of version 2 of the GNU General Public License as published +## by the Free Software Foundation. =head1 NAME @@ -15,7 +15,8 @@ Local::VCS_git - generic wrapper around version control systems -- git version use Local::VCS_git; use Data::Dumper; - my %info = vcs_path_info( '.', recursive => 1, verbose => 1 ); + my $VCS = Local::VCS->new(); + my %info = $VCS->path_info( '.', recursive => 1, verbose => 1 ); print Dumper($info{'foo.wml'}); =head1 DESCRIPTION @@ -33,8 +34,8 @@ package Local::VCS_git; use 5.008; -use Local::Gitinfo; use File::Basename; +use File::Find; use File::Spec::Functions qw( rel2abs splitdir catfile rootdir catdir ); use File::stat; use Carp; @@ -42,6 +43,10 @@ use Fcntl qw/ SEEK_SET /; use Data::Dumper; use Date::Parse; use POSIX qw/ WIFEXITED /; +use List::MoreUtils 'first_index'; +use Cwd qw(cwd); +use Time::HiRes qw/gettimeofday/; +use Data::Dumper; use strict; use warnings; @@ -49,75 +54,226 @@ use warnings; BEGIN { use base qw( Exporter ); - our $VERSION = sprintf "%s", q$Revision$ =~ /([0-9.]+)/; + our $VERSION = "1.13"; our @EXPORT_OK = qw( - &vcs_cmp_rev &vcs_count_changes - &vcs_get_topdir - &vcs_path_info &vcs_file_info - &vcs_get_log &vcs_get_diff - &vcs_get_file + &new ); our %EXPORT_TAGS = ( 'all' => [@EXPORT_OK] ); } +=item new + +This is the constructor. + + my $VCS = Local::VCS->new(); + +=cut + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = { + CACHE => {}, + REPO_CACHED => 0, + }; + bless ($self, $class); + return $self; +} # handling debugging my $DEBUG = 0; + +sub _get_time() +{ + my @tm; + my $text; + my ($seconds, $microseconds) = gettimeofday; + + @tm = gmtime(); + $text = sprintf("%4d-%02d-%02d %02d:%02d:%02d.%6d UTC", + (1900 + $tm[5]),(1 + $tm[4]),$tm[3],$tm[2],$tm[1],$tm[0], $microseconds); + return $text; +} + sub _debug { my @text = @_; return unless $DEBUG; - print STDERR "=> ", @text, "\n"; + my $timestamp = _get_time(); + print STDERR "=> ", $timestamp, " ", @text, "\n"; + return; +} + +sub _add_cache_entry { + my $self = shift; + my $file = shift; + my %entry; + $entry{'cmt_date'} = shift; + $entry{'cmt_rev'} = shift; + my $tmp; + my @list; + if ($self->{CACHE}{"$file"}) { + $tmp = $self->{CACHE}{"$file"}; + @list = @$tmp; + } + push(@list, \%entry); + $self->{CACHE}{"$file"} = \@list; +} + +sub cache_file { + my $self = shift; + my $file = shift; + + _debug "cache_file($file)"; + if ($self->{CACHE}{"$file"}) { +# print "$file is already cached...\n"; + _debug "cache_file($file) returning early"; return; + } +# print "Adding $file to cache\n"; + my (@commits); + open (GITLOG, "git log -p -m --first-parent --name-only --numstat --format=format:\"%H %ct\" -- $file |") or die "Can't fork git log: $!\n"; + my ($cmt_date, $cmt_rev); + while (my $line = ) { + chomp $line; + if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { + $cmt_rev = $1; + $cmt_date = $2; + next; + } elsif ($line =~ m{^$file$}) { + $self->_add_cache_entry($file, $cmt_date, $cmt_rev); + } + } + close GITLOG; + _debug "cache_file($file) done"; + return; } -# return the type of the specified file -sub _typeoffile; +sub cache_repo { + my $self = shift; + _debug "cache_repo()"; + # If we've already read the commit history into our cache, return + # immediately + if ($self->{REPO_CACHED}) { + _debug "cache_repo() returning early"; + return; + } + + # If not, clear the cache and rebuild from scratch. We might havs + # some files cached individually, and that would confuse things + # now. + $self->{CACHE} = {}; + + # Store the current directory so we can return there + my $start_dir = cwd; + my $topdir = $self->get_topdir(); + chdir ($topdir) or die "Can't chdir to $topdir: $!\n"; + + my (@commits); + my $count = 0; + open (GITLOG, "git log -p -m --first-parent --name-only --numstat --format=format:\"%H %ct\" |") or die "Can't fork git log: $!\n"; + my ($cmt_date, $cmt_rev); + while (my $line = ) { + chomp $line; + if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { + $cmt_rev = $1; + $cmt_date = $2; + next; + } elsif ($line =~ m{^(\S+)$}) { + my $file = $1; + $self->_add_cache_entry($file, $cmt_date, $cmt_rev); + $count++; + } + } + close GITLOG; + chdir ($start_dir); + $self->{REPO_CACHED} = 1; +# print Dumper($self->{CACHE}); + my $tmp = $self->{CACHE}; + my $num_files = scalar(keys %$tmp); + _debug "cache_repo() done, $count file commits, $num_files files"; +} + +# Internal helper function - grab a list of all the commits to a given +# file +sub _grab_commits +{ + my $self = shift; + my $file = shift or return undef; + $self->cache_file($file); + my $tmp = $self->{CACHE}{"$file"}; + if (defined $tmp) { + my @commits = @$tmp; +# print Dumper(@commits); + return @commits; + } + return undef; +} -=item vcs_cmp_rev +=item cmp_rev -Compare two revision strings. +Compare two revision strings for a given file. -Takes two revision strings as arguments, and -returns 1 if the first one is largest, --1 if the second one is largest, +Takes the file path and two revision strings as arguments, and +returns 1 if the first one is newer, +-1 if the second one is newer, 0 if they are equal -FIXME: this needs to be "translated" into git and hashes - =cut -sub vcs_cmp_rev +sub cmp_rev { - my $a = shift || '0'; - my $b = shift || '0'; - - my @a = split( /\./, $a ); - my @b = split( /\./, $b ); - - # compare the two revision string term by term - my $i = 0; - while (1) - { - # first check if there are terms left in both revisions - if ( $i > $#a and $i>$#b ) { return 0 }; - if ( $i > $#a ) { return -1 }; - if ( $i > $#b ) { return 1 }; - - # then check this term - if ( $a[$i] < $b[$i] ) { return -1; } - if ( $a[$i] > $b[$i] ) { return 1; } - - # terms are equal, look at the next one - - $i++; + # For the file we're looking at, we can easily generate an + # array (list) of all the commit hashes. Then we can see where + # in that list the specified revisions lie (from newest to + # oldest) - that's what we're going to compare. + + my $self = shift; + my $file = shift or return undef; + my $rev1 = shift or return undef; + my $rev2 = shift or return undef; + + my @commits = $self->_grab_commits($file); +# print Dumper(@commits); + my $pos1 = -1; + my $pos2 = -1; + my $count = 0; + foreach my $tmp(@commits) { + my %entry = %$tmp; + if ($entry{'cmt_rev'} =~ /\Q$rev1\E/) { + $pos1 = $count; + } + if ($entry{'cmt_rev'} =~ /\Q$rev2\E/) { + $pos2 = $count; + } + if ($pos1 != -1 and $pos2 != -1) { + last; + } + $count++; + } + if ($pos1 == -1) { + # Not found + print "ERROR: commit $rev1 not found in revisions of $file\n"; + return undef; + } + if ($pos2 == -1) { + # Not found + print "ERROR: commit $rev2 not found in revisions of $file\n"; + return undef; + } + if ($pos1 == $pos2) { + return 0; + } elsif ($pos1 < $pos2) { + return 1; + } else { + return -1; } # should never be reached croak "Internal error: this should never be executed"; } -=item vcs_count_changes +=item count_changes Return the number of changes to particular file between two revisions @@ -126,48 +282,83 @@ The second and third argument specify the revision range Example use: - my $num1 = vcs_count_changes( 'foo.c', 'r42', 'r70' ); - my $num2 = vcs_count_changes( 'foo.c', 'r42', 'HEAD' ); + my $num1 = count_changes( 'foo.c', 'r42', 'r70' ); + my $num2 = count_changes( 'foo.c', 'r42', 'HEAD' ); -FIXME: converted into git and hashes, needs review and test - =cut -sub vcs_count_changes +sub count_changes { # FIXME: not sure if we need to handle the issue of wrong $rev1/$rev2 here # or the caller function will care. # Not sure if this function makes sense in a git context, either, - # but just in case + # but just in case + my $self = shift; my $file = shift or return undef; my $rev1 = shift || ''; - my $rev2 = shift || 'HEAD'; + my $rev2 = shift || ''; - $rev1 = '' if $rev1 eq 'n/a'; + if ($rev1 =~ m/^\Q$rev2\E$/) { # same revisions + return 0; + } - # for git, this is pretty easy: we simply compare the two version numbers - # note: we don't support branches (aren't used in the webwml repo anyway) + my @commits = $self->_grab_commits($file); - my $command = sprintf( 'git rev-list --count %s..%s %s', $rev1, $rev2, $file ); - - open ( my $git, '-|', $command ) - or croak("Error while executing `$command': $!"); - my $text; - while ( my $line = <$git> ) - { - $text .= $line; + # If unset, go for the last revision + if (length($rev2) == 0) { + $rev2 = $commits[$#commits]; } - close( $git ); - croak("Error while executing `$command': $!") unless WIFEXITED($?); - # return the number of changes (or error text) - - return $text; + my $pos1 = -1; + my $pos2 = -1; + my $count = 0; + foreach my $tmp(@commits) { + my %entry = %$tmp; + if ($entry{'cmt_rev'} =~ /\Q$rev1\E/) { + $pos1 = $count; + } + if ($entry{'cmt_rev'} =~ /\Q$rev2\E/) { + $pos2 = $count; + } + if ($pos1 != -1 and $pos2 != -1) { + last; + } + $count++; + } + if ($pos1 == -1) { + # Not found + print "ERROR: commit $rev1 not found in revisions of $file\n"; + return undef; + } + if ($pos2 == -1) { + # Not found + print "ERROR: commit $rev2 not found in revisions of $file\n"; + return undef; + } + return $pos2 - $pos1; } +# return the type of the input argument (file, dir, symlink, etc) +sub _typeoffile +{ + my $file = shift or return; -=item vcs_path_info + stat $file or return 'f'; # File is not here now; assume it + # was a file! + + return 'f' if ( -f _ ); + return 'd' if ( -d _ ); + return 'l' if ( -l _ ); + return 'S' if ( -S _ ); + return 'p' if ( -p _ ); + return 'b' if ( -b _ ); + return 'c' if ( -c _ ); + + return ''; +} + +=item path_info Return git information and status about a tree of files. @@ -190,20 +381,41 @@ Optional remaining arguments are a hash array with options: Example uses: - my %info1 = $vcs_path_info( 'src' ); - my %info2 = $readinfo( 'src', recursive => 1 ); - my %info3 = $readinfo( 'src', recursive => 1, match_pat => '\.c$' ); + my %info1 = $VCS->path_info( 'src' ); + my %info2 = $VCS->path_info( 'src', recursive => 1 ); + my %info3 = $VCS->path_info( 'src', recursive => 1, match_pat => '\.c$' ); =cut # todo: verbose -sub vcs_path_info +sub path_info { + # Two parts: + # + # (1) Build a hash of all the files we want to know about + # + # (2a) Ask git about all the files it has, and pull out + # information for ones in the hash from (1) + # + # OR + # + # (2b) Grab that information from our cache of all the commit data, + # if we have already been told to generate that cache. + # *Don't* generate that cache automatically - for just a few + # files it's quicker to just look up the data directly. If the + # user is going to do a lot of lookups, let them populate the + # cache up-front themselves if performance matters. + # + # This may not sound very quick, but it's much better than asking git + # about all the details individually! + + my $self = shift; my ($dir,%options) = @_; + my %files_wanted; croak("No file or directory specified") unless $dir; - _debug "Called with $dir"; + _debug "path_info ($dir)"; my $recurse = $options{recursive} || $options{recurse} || 0; my $match_pat = $options{match_pat} || undef; @@ -213,39 +425,74 @@ sub vcs_path_info _debug "Match pattern is '$match_pat'" if defined $match_pat; _debug "Skip pattern is '$skip_pat'" if defined $skip_pat; - # $git->readinfo expects a matchfile input; if nothing is specified, we - # pass a pattern that matches everything - $match_pat ||= '.'; - - $dir = rel2abs( $dir ); - - # use Local::Gitinfo to do the actual work in git - my $git = Local::Gitinfo->new(); - $git->readinfo( $dir, recursive => $recurse, matchfile => [$match_pat] ); - my $files = $git->files; - - # construct a nice hash from the data we received from Gitinfo - my %data; - for my $file (keys %{$git->{FILES}}) - { - # we return relative paths, so strip off the dir name - my $file_rel = $file; - $file_rel =~ s{^$dir/?}{}; - - # skip files that match the skip pattern - next if $skip_pat and $file_rel =~ m{$skip_pat}; - - $data{$file_rel} = { - 'cmt_rev' => $git->{FILES}->{$file}->{'REV'}, - 'cmt_date' => str2time( $git->{FILES}->{$file}->{'DATE'} ), - 'type' => _typeoffile $file, - }; + if ($recurse) { + find( sub { $files_wanted{$File::Find::name} = 1 if -f and + (!defined $match_pat or m/$match_pat/) and + (!defined $skip_pat or not ($File::Find::name =~ m/$skip_pat/))}, $dir ); + } else { + find( sub { $files_wanted{$File::Find::name} = 1 if -f and + ($File::Find::dir eq $dir) and + (!defined $match_pat or m/$match_pat/) and + (!defined $skip_pat or not ($File::Find::name =~ m/$skip_pat/))}, $dir ); } - return %data; + # Calling "git log" for each file individually is hatefully + # slow. Better option: call it once with appropriate options + # for the directory we're targeting, and parse the + # output. Gross, but it works. + + my %pathinfo; + + # Do we have the repo commit history cached? + if ($self->{REPO_CACHED}) { + # We do, use it! (2b above) + foreach my $file (keys %files_wanted) { + my @commits = $self->_grab_commits($file); + if (@commits) { + my $outfile = $file; + $outfile =~ s,^$dir/,,; + # Grab the data we want from the first entry in the + # commits list, i.e. the most recent commit. + $pathinfo{$outfile}{'type'} = _typeoffile("$file"); + $pathinfo{$outfile}{'cmt_date'} = $commits[0]{'cmt_date'}; + $pathinfo{$outfile}{'cmt_rev'} = $commits[0]{'cmt_rev'}; + } else { + _debug "Ignoring file $file"; + } + } + } else { + # We don't, so we need to talk to git. (2a above) + open (GITLOG, "git log -p -m --first-parent --name-only --numstat --format=format:\"%H %ct\" $dir|") + or die "Failed to fork git log: $!\n"; + my $cmt_date; + my $cmt_rev; + my $file; + while (my $line = ) { + chomp $line; + if ($line =~ m/^([[:xdigit:]]+) (\d+)$/) { + $cmt_rev = $1; + $cmt_date = $2; + next; + } elsif ($line =~ m{^$dir/(\S+)$}) { + $file = $1; + # Only store information if: + # We want this file, and + # We don't have data for it yet (i.e. only show + # the most recent version of a file) + if ($files_wanted{"$dir/$file"} and not defined $pathinfo{$file}) { + $pathinfo{$file}{'type'} = _typeoffile("$dir/$file"); + $pathinfo{$file}{'cmt_date'} = $cmt_date; + $pathinfo{$file}{'cmt_rev'} = $cmt_rev; + } + } + } + close GITLOG; + } + _debug "path_info ($dir) returning"; + return %pathinfo; } -=item vcs_file_info +=item file_info Return VCS information and status about a single file @@ -260,32 +507,30 @@ the specified file: Example use: - my %info = $vcs_file_info( 'foo.wml' ); + my %info = $VCS->file_info( 'foo.wml' ); =cut -sub vcs_file_info +sub file_info { + my $self = shift; my $file = shift or carp("No file specified"); my %options = @_; - my $quiet = $options{quiet} || undef; - - my ($basename,$dirname) = fileparse( rel2abs $file ); - - # note: for some weird reason, the file is returned as '.' - my %info = vcs_path_info( $dirname, 'recursive' => 0 ); - - if ( not ( exists $info{$basename} and $info{$basename} ) ) - { - carp("No info found about `$file' (does the file exist?)") if ( ! $quiet ); - return; + my %pathinfo; + my @commits = $self->_grab_commits($file); + if (@commits) { + # Grab the data we want from the first entry in the + # commits list, i.e. the most recent commit. + $pathinfo{'type'} = _typeoffile("$file"); + $pathinfo{'cmt_date'} = $commits[0]{'cmt_date'}; + $pathinfo{'cmt_rev'} = $commits[0]{'cmt_rev'}; } - return %{ $info{$basename} }; + return %pathinfo; } -=item vcs_get_log +=item get_log Return the log info about a specified file @@ -295,66 +540,91 @@ of the log entries Example use: - my @log_entries = vcs_get_log( 'foo.wml' ); + my @log_entries = get_log( 'foo.wml' ); -FIXME: converted into git and hashes, needs review and test - =cut -sub vcs_get_log +sub get_log { + my $self = shift; my $file = shift or return; my $rev1 = shift || ''; my $rev2 = shift || ''; - my @logdata; + my $command; + if ($rev1 eq '' and $rev2 eq '') { + $command = sprintf( 'git log --date=unix %s', $file ); + } elsif ($rev2 eq '') { + $command = sprintf( 'git log --date=unix %s^..%s %s', $rev1, $rev1, $file ); + } elsif ($rev1 eq '') { + $command = sprintf( 'git log --date=unix ..%s %s', $rev2, $file ); + } else { + $command = sprintf( 'git log --date=unix %s..%s %s', $rev1, $rev2, $file ); + } + + _debug "get_log: running $command"; - # set the record separator for git log output - local $/ = "\n----------------------------\n"; - - my $command = sprintf( 'git log %s..%s %s', $rev1, $rev2, $file ); open( my $git, '-|', $command ) or croak("Couldn't run `$command': $!"); - # read the consequetive records - while ( my $record = <$git> ) + my $revision; + my $author; + my $date; + my $message; + my $first_record_done = 0; + + # read and parse the log records + while ( my $line = <$git> ) { - #print "==> $record\n"; - - # the first 3 lines of a record contain metadata that looks like this: - # commit abcdefg435768938de - # Author: Jane Doe - # Date: Tue Nov 7 11:47:24 2017 +0100 - - # first split off the record - my ($metadata1,$metadata2,$metadata3,$logmessage) = split( /\n/, $record, 4 ); - - my ($revision) = $metadata1 =~ m{^commit (.+)}; - my ($author) = $metadata2 =~ m{^Author: (.+)}; - my ($date) = $metadata3 =~ m{^Date: (.+)}; - - croak( "Couldn't parse output of `$command'" ) - unless $revision and $date and $author; - - # convert date to unixtime - $date = str2time( $date ); - - # last line of the log message is still the record separator - $logmessage =~ s{\n[^\n]+\n$}{}ms; - - push @logdata, { + # the first 3 lines of a record contain metadata that looks like this: + # commit abcdefg435768938de + # Author: Jane Doe + # Date: 1504881409 + + if ($line =~ m{^commit (.+)}) { + # Second and subsequent record, push a result + if ($first_record_done) { + _debug "pushing a record (rev $revision)"; + push @logdata, { 'rev' => $revision, - 'author' => $author, - 'date' => $date, - 'message' => $logmessage, - }; + 'author' => $author, + 'date' => $date, + 'message' => $message, + }; + $message = ""; + } + $first_record_done = 1; + $revision = $1; + } elsif ($line =~ m{^Author: (.+)}) { + $author = $1; + } elsif ($line =~ m{^Date: (.+)}) { + $date = $1; + } else { + # Lose leading whitespace, but retain blank lines + if ($line =~ m{\S}) { + $line =~ s{^\s+}{}; + } + $message .= $line; + } + } close( $git ); - return reverse @logdata; + if ($first_record_done) { + # _debug "pushing last record (rev $revision)"; + # Last record, push a result + push @logdata, { + 'rev' => $revision, + 'author' => $author, + 'date' => $date, + 'message' => $message, + }; + return reverse @logdata; + } + return undef; } -=item vcs_get_diff +=item get_diff Returns a hash of (filename,diff) pairs containing the unified diff between two version of a (number of) files. @@ -366,17 +636,16 @@ modified) version is diffed against the latest checked in version. Example use: - my %diffs = vcs_get_diff( 'foo.wml', '1.4', '1.17' ); - my %diffs = vcs_get_diff( 'bla.wml', '1.8' ); - my %diffs = vcs_get_diff( 'bas.wml' ); + my %diffs = get_diff( 'foo.wml', '1.4', '1.17' ); + my %diffs = get_diff( 'bla.wml', '1.8' ); + my %diffs = get_diff( 'bas.wml' ); -FIXME: converted using git log format, probably does not work. Needs review and test - =cut -sub vcs_get_diff +sub get_diff { + my $self = shift; my $file = shift or return; my $rev1 = shift; my $rev2 = shift; @@ -389,36 +658,25 @@ sub vcs_get_diff defined $rev2 ? "$rev2" : '', $file ); - # set the record separator for git diff output - # not sure if the below expression is correct - local $/ = "\n" . 'diff --git' . (.+) . "\n"; +# print "$command\n"; open( my $git, '-|', $command ) or croak("Couldn't run `$command': $!"); - # the first "record" is bogus - <$git>; - # read the consecutive records while ( my $record = <$git> ) { # remove the record separator from the end of the record - $record =~ s{ $/ \n? \Z }{}msx; +# $record =~ s{ $/ \n? \Z }{}msx; # remove the "index" line from the beginning of the record - $record =~ s{ ^index [^\n] }{}msx; +# $record =~ s{ ^index [^\n] }{}msx; # remove the first 2 lines - $record =~ s{ \A (?: .*? \n ){2} }{}msx; + $record =~ s{^diff\s+--git.*}{}msx; + $record =~ s{^index\s+.*}{}msx; - # get the file name - if ( not $record =~ m{ \A --- \s+ ([^\t]+) \t }msx ) - { - croak("Parse error in output of `$command'"); - } - my $file = $1; - - $data{$file} = $record; + $data{$file} .= $record; } close( $git ); @@ -426,7 +684,7 @@ sub vcs_get_diff } -=item vcs_get_file +=item get_file Return a particular revision of a file @@ -438,14 +696,13 @@ and returns it (without writing anything in the current checked-out tree) Example use: - my $text = vcs_get_file( 'foo.c', '1.12' ); - -FIXME: converted to use git show :. Assumes we provide in $file the complete path. Needs review and test + my $text = get_file( 'foo.c', '1.12' ); =cut -sub vcs_get_file +sub get_file { + my $self = shift; my $file = shift or croak("No file specified"); my $rev = shift or croak("No revision specified"); @@ -456,8 +713,8 @@ sub vcs_get_file $rev, $file ); my $text; - open ( my $git, '-|', $command1 ) - or croak("Error while executing `$command1': $!"); + open ( my $git, '-|', $command ) + or croak("Error while executing `$command': $!"); while ( my $line = <$git> ) { $text .= $line; @@ -469,70 +726,147 @@ sub vcs_get_file return $text; } -=item vcs_get_topdir +=item get_oldest_revision -Return the top dir of the webwml repository +Return the version of the oldest version of a file -The first argument is a name of a checked-out file or directory. -If it is not specified, by default the current directory is used. +The first argument is a name of a file. + +This function finds the oldest revision of a file that is known in the repository and returns it. Example use: - my $dir = vcs_get_topdir( 'foo.c' ); + my $rev = get_oldest_revision( 'foo.c'); =cut -sub vcs_get_topdir +sub get_oldest_revision { - my $file = shift || '.'; - - my $git = Local::Gitinfo->new(); - $git->readinfo( $file ); - my $root = $git->topdir() - or croak ("Unable to determine top-level directory"); + my $self = shift; + my $file = shift or croak("No file specified"); - # TODO: add some check that this really is the top level dir + croak( "No such file: $file" ) unless -f $file; - return $root; + my @commits = $self->_grab_commits($file); + if (@commits) { + # Simply return the last revision in our list + return $commits[$#commits]{'cmt_rev'}; + } + # Should hopefully never get here! + croak(" Could not find any revisions for $file"); } +=item next_revision +Given a file path and a current revision of that file, move backwards +or forwards through the commit history and return a related revision. +Takes three arguments: the file path, the start revision and an +integer to specify which direction to move *and* how far. A negative +number specifies backwards in history, a positive number specifies +forwards. +Example use: -###################################### -## internal functions -###################################### + my $rev = next_revision( 'foo.c', '72f6700577c79e6d284fbeac44366f6aa357d56d', -1); +Returns the requested revision if it exists, otherwise *undef* -# return the type of the input argument (file, dir, symlink, etc) -sub _typeoffile +=cut +sub next_revision { - my $file = shift or return; + # For the file we're looking at, we can easily generate an + # array (list) of all the commit hashes. Then we can see where + # in that list the specified revision lies. + + my $self = shift; + my $file = shift or return undef; + my $rev1 = shift or return undef; + my $move = shift or return undef; + + my @commits = $self->_grab_commits($file); +# print Dumper(@commits); + my $pos1 = -1; + my $pos2 = -1; + my $count = 0; + my %entry; + foreach my $tmp(@commits) { + %entry = %$tmp; + if ($entry{'cmt_rev'} =~ /\Q$rev1\E/) { + $pos1 = $count; + } + if ($pos1 != -1) { + last; + } + $count++; + } - stat $file or croak("Couldn't stat file `$file'"); + if ($pos1 == -1) { + # Can't find the specified revision + return undef; + } - return 'f' if ( -f _ ); - return 'd' if ( -d _ ); - return 'l' if ( -l _ ); - return 'S' if ( -S _ ); - return 'p' if ( -p _ ); - return 'b' if ( -b _ ); - return 'c' if ( -c _ ); + # API specifies -ve as older, but out list is sorted the other + # way... + $pos2 = $pos1 - $move; - return ''; + if ($pos2 < 0 or $pos2 >= $#commits) { + # Out of range when looking for the new revision + return undef; + } + + return $commits[$pos2]{'cmt_rev'}; } +=item get_topdir + +Return the top dir of the webwml repository + +The first argument is a name of a checked-out file or directory. +If it is not specified, by default the current directory is used. + +Example use: + + my $dir = get_topdir( 'foo.c' ); + +=cut + +sub get_topdir +{ + my $self = shift; + my $file = shift || '.'; + + # Are we in topdir? Easy! + if (-d ".git") { + return "."; + } + + # Otherwise, walk up the tree until we find a .git or hit the + # root directory + my $dir = ".."; + my ($root_dev, $root_ino) = stat("/"); + + while (1 == 1) { + if (-d "$dir/.git") { + return "$dir"; + } + my ($dev, $ino) = stat("$dir"); + if ($root_dev == $dev and $root_ino == $ino) { + croak ("Unable to determine top-level directory"); + } + $dir = "../$dir"; + } +} =back =head1 AUTHOR -Copyright (C) 2008 Bas Zoetekouw +Copyright (C) 2018 Steve McIntyre <93sam@debian.org> -This program is free software; you can redistribute it and/or modify it under -the terms of version2 of the GNU General Public License as published by the -Free Software Foundation. +This program is free software; you can redistribute it and/or modify +it under the terms of version 2 of the GNU General Public License as +published by the Free Software Foundation. =cut diff --git a/Perl/Webwml/Langs.pm b/Perl/Webwml/Langs.pm index 4df3ecb6631..16857146ff2 100644 --- a/Perl/Webwml/Langs.pm +++ b/Perl/Webwml/Langs.pm @@ -34,7 +34,7 @@ the list of languages in which Debian web site is translated. package Webwml::Langs; use Carp; -use Local::VCS 'vcs_get_topdir'; +use Local::VCS; use strict; use warnings; @@ -52,13 +52,14 @@ determined from VCS meta-info. sub new { my $proto = shift; my $class = ref($proto) || $proto; + my $VCS = Local::VCS->new(); my $root; if (@_) { $root = shift; } else { - $root = vcs_get_topdir; + $root = $VCS->get_topdir(); } my $self = _read($root); bless ($self, $class); @@ -72,7 +73,7 @@ sub new { # TODO: or rather define the lnaguages in a module, and include those in # languages.wml sub _read { - my $root = shift || vcs_get_topdir; + my $root = shift || Local::VCS->get_topdir(); open( my $file, '<', "$root/english/template/debian/languages.wml") or croak ("Unable to read $root/english/template/debian/languages.wml"); diff --git a/Perl/Webwml/TransIgnore.pm b/Perl/Webwml/TransIgnore.pm index d3f6b7e9234..a06f987ff87 100644 --- a/Perl/Webwml/TransIgnore.pm +++ b/Perl/Webwml/TransIgnore.pm @@ -33,7 +33,7 @@ package Webwml::TransIgnore; use Carp; use File::Spec::Functions; -use Local::VCS qw{ vcs_get_topdir }; +use Local::VCS; use strict; use warnings; @@ -64,7 +64,8 @@ sub new bless ($self, $class); # determine the root dir - my $root = shift || vcs_get_topdir('.'); + my $VCS = Local::VCS->new(); + my $root = shift || $VCS->get_topdir('.'); # Read global .transignore file $self->_read($root); -- cgit v1.2.3