From 3eaadb192b4d55e9c85aae27b3a699587a10f6c4 Mon Sep 17 00:00:00 2001 From: Bas Zoetekouw Date: Thu, 22 Jul 2010 15:17:54 +0000 Subject: Rewritten remove_stale.pl: it now no longer depends on CVS CVS version numbers remove_stale.pl: 1.18 -> 1.19 --- remove_stale.pl | 469 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 260 insertions(+), 209 deletions(-) (limited to 'remove_stale.pl') diff --git a/remove_stale.pl b/remove_stale.pl index 7f50a9ac449..ce979f1e502 100755 --- a/remove_stale.pl +++ b/remove_stale.pl @@ -1,268 +1,319 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl # This script searches through all the translation directories for HTML # files not having a matching WML file, and removes those HTML files from # both the local directory and the install directory. This is needed so that -# a removing a WML file from CVS causes the corresponding HTML file to go -# away. +# a removing a WML file from the repository causes the corresponding HTML file +# to go away. # Originally written 2001-03-22 by Peter Krefting -# © Copyright 2001-2008 Software in the public interest, Inc. +# Revised in 2010 by Bas Zoetekouw +# © Copyright 2001-2010 Software in the public interest, Inc. # This program is released under the GNU General Public License, v2. -# $Id$ +## $Id$ use strict; -use vars qw($opt_d); +use warnings; + use Getopt::Std; +use Data::Dumper; +use File::Spec::Functions; +use File::Find; -unless (-d 'english' && getopts('d')) -{ - print "Usage: $0 [-d]\n\n"; - print "Run this script from the webwml directory to remove stale HTML files.\n\n"; - print " -d Remove files, just not report.\n"; - exit; -} +use FindBin; +FindBin::again(); +use lib "$FindBin::Bin/Perl"; -# Read the list of languages -my @languages = readlanguages('Makefile'); +use Webwml::Langs; +use Local::VCS 'vcs_file_info'; -# Recurse. -my $files = 0; -foreach my $language (@languages) -{ - $files += &recurse("./$language"); -} -$files ||= 'No'; -print "\n$files stale translations found.\n"; -print "Use -d option to remove files.\n" - if $files ne 'No' && !$opt_d; -# Done. -exit; +# directory where "make install" installs the website +use constant INSTALLDIR => '../www'; -# The function that does the heavy work, recursing down the directory tree. -sub recurse +############################################################### +# "main" { - # Get parameter. - my $directory = shift; - # Don't try to do anything in sub directories of l10n. - return 0 if $directory =~ m'l10n/[^/]+$'; + my %opts; + show_help("Unknown option\n") if not getopts('dh',\%opts); + show_help() if exists $opts{'h'}; + show_help("Not in webwml root\n") if not -d 'english'; + + my $reallyremove = exists( $opts{'d'} ); - # Load all entries for this directory. - opendir THISDIR, $directory - or die "Unable to open directory $directory: $!\n"; - my @entries = - map { $directory . '/' . $_ } grep { !/^\./ } readdir(THISDIR); - closedir THISDIR; + # Read the list of languages + my @languages = sort Webwml::Langs->new()->names(); + #@languages = ('spanish'); + @languages = ('dutch','spanish'); - # Read through the CVS/Entries file. - if (!open ENTRIES, "$directory/CVS/Entries") + # check all subdirs to find stale html files + my @files; + foreach my $language (@languages) { - warn "Not a CVS directory, ignoring $directory\n"; - return 0; + push @files, find_stale_files($language); } - my @wmlfiles = (); - my @htmlfiles = (); - while () + + # remove or report the files + foreach my $file (@files) { - if (m'^/([^/]+.wml)/[0-9\.]+/.*/.*/$') + if ( $reallyremove ) { - push @wmlfiles, $directory . '/' . $1; + remove_file( $file, \@languages ); } - elsif (m'^/([^/]+.html)/[0-9\.]+/.*/.*/$') + else { - push @htmlfiles, $directory . '/' . $1; + report_file( $file, \@languages ); } } + my $numfiles = @files; + print "\n$numfiles stale translations found.\n"; + print "Use -d option to remove files.\n" + if @files and not $reallyremove; + + # Done. + exit; +} + + +############################################################# +# show help text +sub show_help +{ + my $help = shift; + + print $help if $help; + + open(my $fd, '<:utf8', $0) or return; + while (my $line = <$fd>) + { + next if $line =~ m/^#!/; + next if $line =~ m/^##/; + last if $line =~ /^[^#\s]/; + + chomp $line; + $line =~ s/^#\s+//; + + print $line, "\n"; + } + close($fd); + + print "Run this script from the webwml directory to remove stale HTML files.\n\n"; + print "Usage: $0 [-d]\n\n"; + print " -d Remove files, just not report.\n"; + + exit defined($help) ? 1 : 0; +} + +############################################################# +# Find the stale html files in the specified directory +sub find_stale_files +{ + # Get parameter. + my $dir = shift or die('No directory specified'); + + print "Recursing into `$dir'\n"; + + # the language subdir possibly doesn;t exist yet for newly + # started translations + return 0 unless -d $dir; + + # Don't try to do anything in subdirectories of l10n. + return 0 if $dir =~ m'l10n/[^/]+$'; + + # create a list of *.html files and a hash of *.wml files in this translation + #my (%wmlfiles,@htmlfiles); + #find( sub { $wmlfiles{$File::Find::name}++ if -f and /\.wml$/ }, $dir ); + #find( sub { push @htmlfiles, $File::Find::name if -f and /\.html$/ }, $dir ); + + my @htmlfiles = find_files_ext( $dir, 'html' ); + my %wmlfiles = map { $_ => 1 } find_files_ext( $dir, 'wml' ); + # Locate all HTML files, and find out which ones do not correspond # to a WML file, and does not live in the CVS by itself. - my @subdirs = (); - my $count = 0; - my $direntry; - foreach $direntry (@entries) + my @toremove; + foreach my $htmlfile (sort @htmlfiles) { - # sitemap.??.html files should be ignored since they don't have a .wml - # file, except in the english dir - if (-f $direntry && $direntry =~ /\.html$/ && $direntry !~ /sitemap\..*\.html$/) - { - my ($haswml, $incvs) = (0, 0); - - # Check for WML file. - my $source = $direntry; - $source =~ s/\...(-..)?\.html$/.wml/; - if ($source =~ /wml$/) - { - my $wmlfile; - WMLS: foreach $wmlfile (@wmlfiles) - { - $haswml = 1, last WMLS - if $wmlfile eq $source; - } - - unless ($haswml) - { - # Check if WML file is in the directory, even if it is - # not in the CVS (for auto-generated WML files). - $haswml =1 if -f $source; - } - } - - unless ($haswml) - { - # Check if HTML file is in CVS by itself. - my $htmlfile; - HTMLS: foreach $htmlfile (@htmlfiles) - { - $incvs = 1, last HTMLS - if $htmlfile eq $direntry; - } - } - - unless ($haswml || $incvs) - { - # File has no reason for being here. - $count ++; - - # Name of file installed by make install. - my $installed = $direntry; - $installed =~ s(^\./[^/]*/)(../www/); - - # Name of corresponding ICS file for events. - my $icslocal = $direntry; - $icslocal =~ s/html$/ics/; - my $icsinstalled = $installed; - $icsinstalled =~ s/html$/ics/; - - # Extra symlinks for languages - my $extra = $installed; - $extra =~ s/\.no\.html$/.nb.html/; - - # Check for translations to other languages, they - # need to have their .wml file touched - my @translations = (); - @translations = &findtranslations($source); - - # Remove or report. - if ($opt_d) - { - if (-f $extra || -l $extra) - { - print "Removing $extra\n"; - unlink $extra - or die "Unable to remove $extra: $!\n"; - } - if (-f $installed) - { - print "Removing $installed\n"; - unlink $installed - or die "Unable to remove $installed: $!\n"; - } - if (-f $icsinstalled) - { - print "Removing $icsinstalled\n"; - unlink $icsinstalled - or die "Unable to remove $icsinstalled: $!\n"; - } - if (-f $icslocal) - { - print "Removing $icslocal\n"; - unlink $icslocal - or die "Unable to remove $icslocal: $!\n"; - } - - # Touch translation sources to update the list of - # translations on them - foreach my $translation (@translations) - { - print "Touching $translation\n"; - system('/usr/bin/touch', $translation) == 0 - or warn "touch: error code $?"; - } - - print "Removing $direntry\n"; - unlink $direntry - or die "Unable to remove $direntry: $!\n"; - } - else - { - print "$direntry is stale\n"; - print " installed file is $installed\n"; - print " (does not exist)\n" - unless -f $installed; - print " and $extra\n" - if (-f $extra || -l $extra) and $extra ne $installed; - print " installed ICS file: $icsinstalled\n" - if -f $icsinstalled; - print " local ICS file: $icslocal\n" - if -f $icslocal; - foreach my $translation (@translations) - { - print " translation in $translation\n"; - } - } - } - } - elsif (-d $direntry && !($direntry =~ /CVS$/)) - { - push @subdirs, $direntry; - } + # the name of the wml file that this html file is potentially + # generated from + my $source = $htmlfile; + $source =~ s/(?:\.[-\w]+)?\.html$/.wml/ + or die("Can't determine WML source file for `$htmlfile'"); + + # does the wml source file exist? + my $haswml = exists( $wmlfiles{$source} ) || -f $source || 0; + + # is the html file checked in the VCS? + my $checkedin = vcs_file_info($htmlfile) ? 1 : 0; + + #if ($checkedin) + #{ print "==> `$htmlfile' : `$source' : $haswml : $checkedin\n"; } + + # we're done if the file has a corresponding wml source, or if the html + # file itself is checked in + next if $haswml or $checkedin; + + # as a special exception, sitemaps don't have a wml source in the + # translation tree (they are generated from english/) + next if $htmlfile =~ m{/sitemap\.[-\w]+\.html$}; + + # File has no reason for being here. + push @toremove, $htmlfile; + } + + return @toremove; +} + +############################################################# +# returns a list of filenames with the given extension +sub find_files_ext +{ + my $dir = shift or die('Internal error: No dir specified'); + my $ext = shift or die('Internal error: No ext specified'); + + my @files; + find( sub { push @files, $File::Find::name if -f and m/\.$ext$/ }, $dir ); + return @files; +} + +############################################################# +# get the filenames of files related to the given htmlfile +sub gather_file_info +{ + my $htmlfile = shift or die('Internal error: No htmlfile specified'); + my $languages = shift or die('Internal error: No languages specified'); + + die("Not an html file: `$htmlfile'\n") unless $htmlfile =~ m/\.html$/; + + my $wmlsrc = $htmlfile; + $wmlsrc =~ s{\.[-\w]+\.html$}{.wml}; + die("No valid wml source for `$htmlfile' could be constructed\n") + if $wmlsrc eq $htmlfile; + + # Name of file installed by make install. + my $installed = $htmlfile; + $installed =~ s{^[^/]+/}{}; # remove "dutch/" at the beginning + $installed = catfile(INSTALLDIR,$installed); + + # Name of corresponding ICS file for events. + my $icslocal = $htmlfile; + $icslocal =~ s/html$/ics/; + my $icsinstalled = $installed; + $icsinstalled =~ s/html$/ics/; + + # Extra symlinks for languages + my $extra = $installed; + $extra =~ s/\.no\.html$/.nb.html/ or $extra = ''; + #what about en-us and en-gb ? + + # Check for translations to other languages, they + # need to have their .wml file touched + my @translations = findtranslations($wmlsrc,$languages); + + return ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations); +} + +############################################################# +# remove the given html file and all files related to it +sub remove_file +{ + my $htmlfile = shift or die('Internal error: No htmlfile specified'); + my $languages = shift or die('Internal error: No languages specified'); + + my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) + = gather_file_info( $htmlfile, $languages ); + + if (-f $extra or -l $extra) + { + print "Removing $extra\n"; + unlink $extra + or die "Unable to remove $extra: $!\n"; + } + if (-f $installed) + { + print "Removing $installed\n"; + unlink $installed + or die "Unable to remove $installed: $!\n"; + } + if (-f $icsinstalled) + { + print "Removing $icsinstalled\n"; + unlink $icsinstalled + or die "Unable to remove $icsinstalled: $!\n"; + } + if (-f $icslocal) + { + print "Removing $icslocal\n"; + unlink $icslocal + or die "Unable to remove $icslocal: $!\n"; } - # Recurse into subdirectories. - my $subdir; - foreach $subdir (@subdirs) + # Touch translation sources to update the list of + # translations on them + if (@translations) { - $count += recurse($subdir); + utime(undef,undef,@translations) + or warn "touch: error code $?"; } - return $count; + print "Removing $htmlfile\n"; + unlink $htmlfile + or die "Unable to remove $htmlfile: $!\n"; + } -# Read the list of active languages from the Makefile -sub readlanguages +############################################################# +# report files that would be removed +sub report_file { - my $source = shift; - my (@languages, $langsrc); - open MAKE, '<', $source or die "Cannot read $source: $!\n"; - LANGUAGES: while () + my $htmlfile = shift or die('Internal error: No htmlfile specified'); + my $languages = shift or die('Internal error: No languages specified'); + + my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) + = gather_file_info( $htmlfile, $languages ); + + print "$htmlfile is stale\n"; + print " installed file is $installed"; + print " (does not exist)" unless -f $installed; + print "\n"; + print " and $extra\n" + if (-f $extra || -l $extra) and $extra ne $installed; + print " installed ICS file: $icsinstalled\n" + if -f $icsinstalled; + print " local ICS file: $icslocal\n" + if -f $icslocal; + + foreach my $translation (@translations) { - if (/LANGUAGES := (.*)/) - { - $langsrc = $1; - while ($langsrc =~ /\\$/) - { - my $nextline = ; - chomp $nextline; - $langsrc =~ s/\\$/$nextline/; - } - } + print " translation in $translation\n"; } - close MAKE; - - return split /\s+/, $langsrc; } +############################################################# # Locate all translated copies of this wml file sub findtranslations { - my $wml = shift; + my $wml = shift or die('Internal error: no wml file specified'); + my $languages = shift or die('Internal error: no languages specified'); + my @files; # Remove the first component of the path (which contains the # current language) my $tail = $wml; - $tail =~ s(^\./[^/]+/)(); + $tail =~ s{^[^/]+/}{}; - # Locte all translated copies - foreach my $language (@languages) + # Locate all translated copies + foreach my $language (@$languages) { - my $translated = "./$language/$tail"; - push @files, $translated if -f $translated; + my $translated = "$language/$tail"; + push @files, $translated if -f $translated; } return @files; } + +__END__ -- cgit v1.2.3