aboutsummaryrefslogtreecommitdiffstats
path: root/remove_stale.pl
diff options
context:
space:
mode:
authorBas Zoetekouw <bas>2010-07-22 15:17:54 +0000
committerBas Zoetekouw <bas>2010-07-22 15:17:54 +0000
commit3eaadb192b4d55e9c85aae27b3a699587a10f6c4 (patch)
tree481d4891f8ab4271168b4f8f2ffbc7ac26413f1f /remove_stale.pl
parent0ff6906adf2b13e1c6ef82f8b62e40be721295d5 (diff)
Rewritten remove_stale.pl: it now no longer depends on CVS
CVS version numbers remove_stale.pl: 1.18 -> 1.19
Diffstat (limited to 'remove_stale.pl')
-rwxr-xr-xremove_stale.pl469
1 files changed, 260 insertions, 209 deletions
diff --git a/remove_stale.pl b/remove_stale.pl
index 7f50a9ac449..ce979f1e502 100755
--- a/remove_stale.pl
+++ b/remove_stale.pl
@@ -1,268 +1,319 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl
# This script searches through all the translation directories for HTML
# files not having a matching WML file, and removes those HTML files from
# both the local directory and the install directory. This is needed so that
-# a removing a WML file from CVS causes the corresponding HTML file to go
-# away.
+# a removing a WML file from the repository causes the corresponding HTML file
+# to go away.
# Originally written 2001-03-22 by Peter Krefting <peterk@debian.org>
-# © Copyright 2001-2008 Software in the public interest, Inc.
+# Revised in 2010 by Bas Zoetekouw <bas@debian.org>
+# © Copyright 2001-2010 Software in the public interest, Inc.
# This program is released under the GNU General Public License, v2.
-# $Id$
+## $Id$
use strict;
-use vars qw($opt_d);
+use warnings;
+
use Getopt::Std;
+use Data::Dumper;
+use File::Spec::Functions;
+use File::Find;
-unless (-d 'english' && getopts('d'))
-{
- print "Usage: $0 [-d]\n\n";
- print "Run this script from the webwml directory to remove stale HTML files.\n\n";
- print " -d Remove files, just not report.\n";
- exit;
-}
+use FindBin;
+FindBin::again();
+use lib "$FindBin::Bin/Perl";
-# Read the list of languages
-my @languages = readlanguages('Makefile');
+use Webwml::Langs;
+use Local::VCS 'vcs_file_info';
-# Recurse.
-my $files = 0;
-foreach my $language (@languages)
-{
- $files += &recurse("./$language");
-}
-$files ||= 'No';
-print "\n$files stale translations found.\n";
-print "Use -d option to remove files.\n"
- if $files ne 'No' && !$opt_d;
-# Done.
-exit;
+# directory where "make install" installs the website
+use constant INSTALLDIR => '../www';
-# The function that does the heavy work, recursing down the directory tree.
-sub recurse
+###############################################################
+# "main"
{
- # Get parameter.
- my $directory = shift;
- # Don't try to do anything in sub directories of l10n.
- return 0 if $directory =~ m'l10n/[^/]+$';
+ my %opts;
+ show_help("Unknown option\n") if not getopts('dh',\%opts);
+ show_help() if exists $opts{'h'};
+ show_help("Not in webwml root\n") if not -d 'english';
+
+ my $reallyremove = exists( $opts{'d'} );
- # Load all entries for this directory.
- opendir THISDIR, $directory
- or die "Unable to open directory $directory: $!\n";
- my @entries =
- map { $directory . '/' . $_ } grep { !/^\./ } readdir(THISDIR);
- closedir THISDIR;
+ # Read the list of languages
+ my @languages = sort Webwml::Langs->new()->names();
+ #@languages = ('spanish');
+ @languages = ('dutch','spanish');
- # Read through the CVS/Entries file.
- if (!open ENTRIES, "$directory/CVS/Entries")
+ # check all subdirs to find stale html files
+ my @files;
+ foreach my $language (@languages)
{
- warn "Not a CVS directory, ignoring $directory\n";
- return 0;
+ push @files, find_stale_files($language);
}
- my @wmlfiles = ();
- my @htmlfiles = ();
- while (<ENTRIES>)
+
+ # remove or report the files
+ foreach my $file (@files)
{
- if (m'^/([^/]+.wml)/[0-9\.]+/.*/.*/$')
+ if ( $reallyremove )
{
- push @wmlfiles, $directory . '/' . $1;
+ remove_file( $file, \@languages );
}
- elsif (m'^/([^/]+.html)/[0-9\.]+/.*/.*/$')
+ else
{
- push @htmlfiles, $directory . '/' . $1;
+ report_file( $file, \@languages );
}
}
+ my $numfiles = @files;
+ print "\n$numfiles stale translations found.\n";
+ print "Use -d option to remove files.\n"
+ if @files and not $reallyremove;
+
+ # Done.
+ exit;
+}
+
+
+#############################################################
+# show help text
+sub show_help
+{
+ my $help = shift;
+
+ print $help if $help;
+
+ open(my $fd, '<:utf8', $0) or return;
+ while (my $line = <$fd>)
+ {
+ next if $line =~ m/^#!/;
+ next if $line =~ m/^##/;
+ last if $line =~ /^[^#\s]/;
+
+ chomp $line;
+ $line =~ s/^#\s+//;
+
+ print $line, "\n";
+ }
+ close($fd);
+
+ print "Run this script from the webwml directory to remove stale HTML files.\n\n";
+ print "Usage: $0 [-d]\n\n";
+ print " -d Remove files, just not report.\n";
+
+ exit defined($help) ? 1 : 0;
+}
+
+#############################################################
+# Find the stale html files in the specified directory
+sub find_stale_files
+{
+ # Get parameter.
+ my $dir = shift or die('No directory specified');
+
+ print "Recursing into `$dir'\n";
+
+ # the language subdir possibly doesn;t exist yet for newly
+ # started translations
+ return 0 unless -d $dir;
+
+ # Don't try to do anything in subdirectories of l10n.
+ return 0 if $dir =~ m'l10n/[^/]+$';
+
+ # create a list of *.html files and a hash of *.wml files in this translation
+ #my (%wmlfiles,@htmlfiles);
+ #find( sub { $wmlfiles{$File::Find::name}++ if -f and /\.wml$/ }, $dir );
+ #find( sub { push @htmlfiles, $File::Find::name if -f and /\.html$/ }, $dir );
+
+ my @htmlfiles = find_files_ext( $dir, 'html' );
+ my %wmlfiles = map { $_ => 1 } find_files_ext( $dir, 'wml' );
+
# Locate all HTML files, and find out which ones do not correspond
# to a WML file, and does not live in the CVS by itself.
- my @subdirs = ();
- my $count = 0;
- my $direntry;
- foreach $direntry (@entries)
+ my @toremove;
+ foreach my $htmlfile (sort @htmlfiles)
{
- # sitemap.??.html files should be ignored since they don't have a .wml
- # file, except in the english dir
- if (-f $direntry && $direntry =~ /\.html$/ && $direntry !~ /sitemap\..*\.html$/)
- {
- my ($haswml, $incvs) = (0, 0);
-
- # Check for WML file.
- my $source = $direntry;
- $source =~ s/\...(-..)?\.html$/.wml/;
- if ($source =~ /wml$/)
- {
- my $wmlfile;
- WMLS: foreach $wmlfile (@wmlfiles)
- {
- $haswml = 1, last WMLS
- if $wmlfile eq $source;
- }
-
- unless ($haswml)
- {
- # Check if WML file is in the directory, even if it is
- # not in the CVS (for auto-generated WML files).
- $haswml =1 if -f $source;
- }
- }
-
- unless ($haswml)
- {
- # Check if HTML file is in CVS by itself.
- my $htmlfile;
- HTMLS: foreach $htmlfile (@htmlfiles)
- {
- $incvs = 1, last HTMLS
- if $htmlfile eq $direntry;
- }
- }
-
- unless ($haswml || $incvs)
- {
- # File has no reason for being here.
- $count ++;
-
- # Name of file installed by make install.
- my $installed = $direntry;
- $installed =~ s(^\./[^/]*/)(../www/);
-
- # Name of corresponding ICS file for events.
- my $icslocal = $direntry;
- $icslocal =~ s/html$/ics/;
- my $icsinstalled = $installed;
- $icsinstalled =~ s/html$/ics/;
-
- # Extra symlinks for languages
- my $extra = $installed;
- $extra =~ s/\.no\.html$/.nb.html/;
-
- # Check for translations to other languages, they
- # need to have their .wml file touched
- my @translations = ();
- @translations = &findtranslations($source);
-
- # Remove or report.
- if ($opt_d)
- {
- if (-f $extra || -l $extra)
- {
- print "Removing $extra\n";
- unlink $extra
- or die "Unable to remove $extra: $!\n";
- }
- if (-f $installed)
- {
- print "Removing $installed\n";
- unlink $installed
- or die "Unable to remove $installed: $!\n";
- }
- if (-f $icsinstalled)
- {
- print "Removing $icsinstalled\n";
- unlink $icsinstalled
- or die "Unable to remove $icsinstalled: $!\n";
- }
- if (-f $icslocal)
- {
- print "Removing $icslocal\n";
- unlink $icslocal
- or die "Unable to remove $icslocal: $!\n";
- }
-
- # Touch translation sources to update the list of
- # translations on them
- foreach my $translation (@translations)
- {
- print "Touching $translation\n";
- system('/usr/bin/touch', $translation) == 0
- or warn "touch: error code $?";
- }
-
- print "Removing $direntry\n";
- unlink $direntry
- or die "Unable to remove $direntry: $!\n";
- }
- else
- {
- print "$direntry is stale\n";
- print " installed file is $installed\n";
- print " (does not exist)\n"
- unless -f $installed;
- print " and $extra\n"
- if (-f $extra || -l $extra) and $extra ne $installed;
- print " installed ICS file: $icsinstalled\n"
- if -f $icsinstalled;
- print " local ICS file: $icslocal\n"
- if -f $icslocal;
- foreach my $translation (@translations)
- {
- print " translation in $translation\n";
- }
- }
- }
- }
- elsif (-d $direntry && !($direntry =~ /CVS$/))
- {
- push @subdirs, $direntry;
- }
+ # the name of the wml file that this html file is potentially
+ # generated from
+ my $source = $htmlfile;
+ $source =~ s/(?:\.[-\w]+)?\.html$/.wml/
+ or die("Can't determine WML source file for `$htmlfile'");
+
+ # does the wml source file exist?
+ my $haswml = exists( $wmlfiles{$source} ) || -f $source || 0;
+
+ # is the html file checked in the VCS?
+ my $checkedin = vcs_file_info($htmlfile) ? 1 : 0;
+
+ #if ($checkedin)
+ #{ print "==> `$htmlfile' : `$source' : $haswml : $checkedin\n"; }
+
+ # we're done if the file has a corresponding wml source, or if the html
+ # file itself is checked in
+ next if $haswml or $checkedin;
+
+ # as a special exception, sitemaps don't have a wml source in the
+ # translation tree (they are generated from english/)
+ next if $htmlfile =~ m{/sitemap\.[-\w]+\.html$};
+
+ # File has no reason for being here.
+ push @toremove, $htmlfile;
+ }
+
+ return @toremove;
+}
+
+#############################################################
+# returns a list of filenames with the given extension
+sub find_files_ext
+{
+ my $dir = shift or die('Internal error: No dir specified');
+ my $ext = shift or die('Internal error: No ext specified');
+
+ my @files;
+ find( sub { push @files, $File::Find::name if -f and m/\.$ext$/ }, $dir );
+ return @files;
+}
+
+#############################################################
+# get the filenames of files related to the given htmlfile
+sub gather_file_info
+{
+ my $htmlfile = shift or die('Internal error: No htmlfile specified');
+ my $languages = shift or die('Internal error: No languages specified');
+
+ die("Not an html file: `$htmlfile'\n") unless $htmlfile =~ m/\.html$/;
+
+ my $wmlsrc = $htmlfile;
+ $wmlsrc =~ s{\.[-\w]+\.html$}{.wml};
+ die("No valid wml source for `$htmlfile' could be constructed\n")
+ if $wmlsrc eq $htmlfile;
+
+ # Name of file installed by make install.
+ my $installed = $htmlfile;
+ $installed =~ s{^[^/]+/}{}; # remove "dutch/" at the beginning
+ $installed = catfile(INSTALLDIR,$installed);
+
+ # Name of corresponding ICS file for events.
+ my $icslocal = $htmlfile;
+ $icslocal =~ s/html$/ics/;
+ my $icsinstalled = $installed;
+ $icsinstalled =~ s/html$/ics/;
+
+ # Extra symlinks for languages
+ my $extra = $installed;
+ $extra =~ s/\.no\.html$/.nb.html/ or $extra = '';
+ #what about en-us and en-gb ?
+
+ # Check for translations to other languages, they
+ # need to have their .wml file touched
+ my @translations = findtranslations($wmlsrc,$languages);
+
+ return ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations);
+}
+
+#############################################################
+# remove the given html file and all files related to it
+sub remove_file
+{
+ my $htmlfile = shift or die('Internal error: No htmlfile specified');
+ my $languages = shift or die('Internal error: No languages specified');
+
+ my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations)
+ = gather_file_info( $htmlfile, $languages );
+
+ if (-f $extra or -l $extra)
+ {
+ print "Removing $extra\n";
+ unlink $extra
+ or die "Unable to remove $extra: $!\n";
+ }
+ if (-f $installed)
+ {
+ print "Removing $installed\n";
+ unlink $installed
+ or die "Unable to remove $installed: $!\n";
+ }
+ if (-f $icsinstalled)
+ {
+ print "Removing $icsinstalled\n";
+ unlink $icsinstalled
+ or die "Unable to remove $icsinstalled: $!\n";
+ }
+ if (-f $icslocal)
+ {
+ print "Removing $icslocal\n";
+ unlink $icslocal
+ or die "Unable to remove $icslocal: $!\n";
}
- # Recurse into subdirectories.
- my $subdir;
- foreach $subdir (@subdirs)
+ # Touch translation sources to update the list of
+ # translations on them
+ if (@translations)
{
- $count += recurse($subdir);
+ utime(undef,undef,@translations)
+ or warn "touch: error code $?";
}
- return $count;
+ print "Removing $htmlfile\n";
+ unlink $htmlfile
+ or die "Unable to remove $htmlfile: $!\n";
+
}
-# Read the list of active languages from the Makefile
-sub readlanguages
+#############################################################
+# report files that would be removed
+sub report_file
{
- my $source = shift;
- my (@languages, $langsrc);
- open MAKE, '<', $source or die "Cannot read $source: $!\n";
- LANGUAGES: while (<MAKE>)
+ my $htmlfile = shift or die('Internal error: No htmlfile specified');
+ my $languages = shift or die('Internal error: No languages specified');
+
+ my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations)
+ = gather_file_info( $htmlfile, $languages );
+
+ print "$htmlfile is stale\n";
+ print " installed file is $installed";
+ print " (does not exist)" unless -f $installed;
+ print "\n";
+ print " and $extra\n"
+ if (-f $extra || -l $extra) and $extra ne $installed;
+ print " installed ICS file: $icsinstalled\n"
+ if -f $icsinstalled;
+ print " local ICS file: $icslocal\n"
+ if -f $icslocal;
+
+ foreach my $translation (@translations)
{
- if (/LANGUAGES := (.*)/)
- {
- $langsrc = $1;
- while ($langsrc =~ /\\$/)
- {
- my $nextline = <MAKE>;
- chomp $nextline;
- $langsrc =~ s/\\$/$nextline/;
- }
- }
+ print " translation in $translation\n";
}
- close MAKE;
-
- return split /\s+/, $langsrc;
}
+#############################################################
# Locate all translated copies of this wml file
sub findtranslations
{
- my $wml = shift;
+ my $wml = shift or die('Internal error: no wml file specified');
+ my $languages = shift or die('Internal error: no languages specified');
+
my @files;
# Remove the first component of the path (which contains the
# current language)
my $tail = $wml;
- $tail =~ s(^\./[^/]+/)();
+ $tail =~ s{^[^/]+/}{};
- # Locte all translated copies
- foreach my $language (@languages)
+ # Locate all translated copies
+ foreach my $language (@$languages)
{
- my $translated = "./$language/$tail";
- push @files, $translated if -f $translated;
+ my $translated = "$language/$tail";
+ push @files, $translated if -f $translated;
}
return @files;
}
+
+__END__

© 2014-2024 Faster IT GmbH | imprint | privacy policy