aboutsummaryrefslogtreecommitdiffstats
path: root/remove_stale.pl
blob: c2488576e324832d2cc5c0aa56596168ebb3ee21 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#!/usr/bin/perl

# This script is still used in 2024

# This script searches through all the translation directories for HTML
# files not having a matching WML file, and removes those HTML files from
# both the local directory and the install directory. This is needed so that
# a removing a WML file from the repository causes the corresponding HTML file
# to go away.

# Originally written 2001-03-22 by Peter Krefting <peterk@debian.org>
# Revised in 2010 by Bas Zoetekouw <bas@debian.org>
# Updated in 2018 by Steve McIntyre <93sam@debian.org> to use the
# generic VCS infrastructure as part of the git migration.
# © Copyright 2001-2018 Software in the public interest, Inc.
# This program is released under the GNU General Public License, v2.

## $Id$

use strict;
use warnings;

use Getopt::Std;
use Data::Dumper;
use File::Spec::Functions;
use File::Find;

use FindBin;
FindBin::again();
use lib "$FindBin::Bin/Perl";

use Webwml::Langs;
use Local::VCS;


# directory where "make install" installs the website
use constant INSTALLDIR => '../www';

my $VCS = Local::VCS->new();

my $verbose = 0;
our $opts_v;

###############################################################
# "main"
{

	my %opts;
	show_help("Unknown option\n")     if not getopts('dhv:',\%opts);
	show_help()                       if exists $opts{'h'};
	show_help("Not in webwml root\n") if not -d 'english';

	my $reallyremove =  exists( $opts{'d'} );
	if (exists ($opts{'v'})) {
		$verbose = $opts{'v'};
	}

	# Read the list of languages
	my @languages = sort Webwml::Langs->new()->names();

	# Cache the repo history for performance
	print "Initialising VCS cache\n";
	$VCS->cache_repo();

	# check all subdirs to find stale html files
	my @files;
	foreach my $language (@languages)
	{
		push @files, find_stale_files($language);
	}


	# remove or report the files
	foreach my $file (@files)
	{
		if ( $reallyremove )
		{
			remove_file( $file, \@languages );
		}
		else
		{
			report_file( $file, \@languages );
		}
	}

	my $numfiles = @files;
	print "\n$numfiles stale translations found.\n";
	print "Use -d option to remove files.\n"
		if @files and not $reallyremove;

	# Done.
	exit;
}


# log very verbose messages
sub vvlog {
    if ($verbose >= 2) {
	print STDOUT $_[0] . "\n";
    }
}

# log verbose messages
sub vlog {
    if ($verbose >= 1) {
	print STDOUT $_[0] . "\n";
    }
}

#############################################################
# show help text
sub show_help
{
	my $help = shift;

	print $help if $help;

	open(my $fd, '<:utf8', $0) or return;
	while (my $line = <$fd>)
	{
		next if $line =~ m/^#!/;
		next if $line =~ m/^##/;
		last if $line =~ /^[^#\s]/;

		chomp $line;
		$line =~ s/^#\s+//;

		print $line, "\n";
	}
	close($fd);

	print "Run this script from the webwml directory to remove stale HTML files.\n\n";
	print "Usage: $0 [-d]\n\n";
	print "  -d  Remove files, just not report.\n";

	exit defined($help) ? 1 : 0;
}

#############################################################
# Find the stale html files in the specified directory
sub find_stale_files
{
	# Get parameter.
	my $dir       = shift or die('No directory specified');
	my $scanned_count = 0;
	my $remove_count = 0;

	print "Recursing into `$dir'\n";

	# the language subdir possibly doesn't exist yet for newly 
	# started translations
	return 0  unless  -d $dir;

	# create a list of *.html files and a hash of *.wml files in this translation
	#my (%wmlfiles,@htmlfiles);
	#find( sub { $wmlfiles{$File::Find::name}++     if -f and /\.wml$/  }, $dir );
	#find( sub { push @htmlfiles, $File::Find::name if -f and /\.html$/ }, $dir );

	my @htmlfiles =                 find_files_ext( $dir, 'html' );
	my %wmlfiles  = map { $_ => 1 } find_files_ext( $dir, 'wml' );

	# Locate all HTML files, and find out which ones do not correspond
	# to a WML file, and does not live in the VCS by itself.
	my @toremove;
	foreach my $htmlfile (sort @htmlfiles)
	{
		$scanned_count++;
		if (0 == ($scanned_count % 500)) {
			vlog("  scanned $scanned_count files, found $remove_count to remove");
		}
		vvlog("  Looking at $htmlfile");
		# the name of the wml file that this html file is potentially
		# generated from
		my $source = $htmlfile;
		$source =~ s/(?:\.[-\w]+)?\.html$/.wml/  
			or die("Can't determine WML source file for `$htmlfile'");

		# Don't try to do anything in subdirectories of l10n.
		next  if  $htmlfile =~ m{/international/l10n/po[-\w]*/[\w_\@]+\.[-\w]+\.html$};
		
		# Don't try to do anything in stats either.
		next  if  $htmlfile =~ m{/devel/website/stats/[-\w]+\.[-\w]+\.html$};

		# Don't try to remove yaboot-howto.
		next  if  $htmlfile =~ m{/ports/powerpc/inst/yaboot-howto.html};

		# as a special exception, sitemaps don't have a wml source in the
		# translation tree (they are generated from english/)
		next  if  $htmlfile =~ m{/sitemap\.[-\w]+\.html$};

		# does the wml source file exist?
		my $haswml = exists( $wmlfiles{$source} ) || -f $source || 0;
		next  if  $haswml;

		# is the html file checked in the VCS?
        my %file_info = $VCS->file_info( $htmlfile );
        my $checkedin = $file_info{'cmt_rev'} or 0;
        vvlog("is $htmlfile checked in the VCS? $checkedin");
		next  if  $checkedin;

		#if ($checkedin) 
		#{ print "==> `$htmlfile' : `$source' : $haswml : $checkedin\n"; }

		# File has no reason for being here.
		push @toremove, $htmlfile;
		$remove_count++;
		vlog("$htmlfile needs to be removed");
	}

	vlog("  scanned $scanned_count files, found $remove_count to remove");
	return @toremove;
}

#############################################################
# returns a list of filenames with the given extension
sub find_files_ext
{
	my $dir = shift or die('Internal error: No dir specified');
	my $ext = shift or die('Internal error: No ext specified');

	my @files;
	find( sub { push @files, $File::Find::name if -f and m/\.$ext$/ }, $dir );
	return @files;
}

#############################################################
# get the filenames of files related to the given htmlfile
sub gather_file_info 
{
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	die("Not an html file: `$htmlfile'\n") unless $htmlfile =~ m/\.html$/;

	my $wmlsrc = $htmlfile;
	$wmlsrc =~ s{\.[-\w]+\.html$}{.wml};
	die("No valid wml source for `$htmlfile' could be constructed\n")
		if $wmlsrc eq $htmlfile;

	# Name of file installed by make install.
	my $installed = $htmlfile;
	$installed =~ s{^[^/]+/}{}; # remove "dutch/" at the beginning
	$installed = catfile(INSTALLDIR,$installed);

	# Name of corresponding ICS file for events.
	my $icslocal = $htmlfile;
	$icslocal =~ s/html$/ics/;
	my $icsinstalled = $installed;
	$icsinstalled =~ s/html$/ics/;

	# Extra symlinks for languages
	my $extra = $installed;
	$extra =~ s/\.no\.html$/.nb.html/  or  $extra = '';
	#what about en-us and en-gb ?

	# Check for translations to other languages, they
	# need to have their .wml file touched
	my @translations = findtranslations($wmlsrc,$languages);

	return ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations);
}

#############################################################
# remove the given html file and all files related to it
sub remove_file
{
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) 
		= gather_file_info( $htmlfile, $languages );

	if (-f $extra or -l $extra)
	{
		print "Removing $extra\n";
		unlink $extra
			or die "Unable to remove $extra: $!\n";
	}
	if (-f $installed)
	{
		print "Removing $installed\n";
		unlink $installed
			or die "Unable to remove $installed: $!\n";
	}
	if (-f $icsinstalled)
	{
		print "Removing $icsinstalled\n";
		unlink $icsinstalled
			or die "Unable to remove $icsinstalled: $!\n";
	}
	if (-f $icslocal)
	{
		print "Removing $icslocal\n";
		unlink $icslocal
			or die "Unable to remove $icslocal: $!\n";
	}

	# Touch translation sources to update the list of
	# translations on them
	if (@translations)
	{
		utime(undef,undef,@translations)
			or warn "touch: error code $?";
	}

	print "Removing $htmlfile\n";
	unlink $htmlfile
		or die "Unable to remove $htmlfile: $!\n";

}

#############################################################
# report files that would be removed
sub report_file 
{
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) 
		= gather_file_info( $htmlfile, $languages );

	print "$htmlfile is stale\n";
	print "  installed file is $installed";
	print "  (does not exist)"  unless  -f $installed;
	print "\n";
	print "  and $extra\n"
		if (-f $extra || -l $extra) and $extra ne $installed;
	print "  installed ICS file: $icsinstalled\n"
		if -f $icsinstalled;
	print "  local ICS file: $icslocal\n"
		if -f $icslocal;

	foreach my $translation (@translations)
	{
		print "  translation in $translation\n";
	}
}

#############################################################
# Locate all translated copies of this wml file
sub findtranslations
{
	my $wml       = shift or die('Internal error: no wml file specified');
	my $languages = shift or die('Internal error: no languages specified');

	my @files;

	# Remove the first component of the path (which contains the
	# current language)
	my $tail = $wml;
	$tail =~ s{^[^/]+/}{};

	# Locate all translated copies
	foreach my $language (@$languages)
	{
		my $translated = "$language/$tail";
		push @files, $translated   if  -f $translated;
	}

	return @files;
}

__END__

© 2014-2024 Faster IT GmbH | imprint | privacy policy