-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathlinkcheck.pl
executable file
·86 lines (78 loc) · 1.69 KB
/
linkcheck.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env perl
# This script checks the HTML files for bad internal links and poor
# formatting, such as single quotes '' around href in a link.
use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use Getopt::Long;
# This requires installation using "cpanm File::Slurper".
use File::Slurper qw!read_text!;
my $ok = GetOptions (
verbose => \my $verbose,
);
if (! $ok) {
print <<EOF;
--verbose - print debugging messages
EOF
exit;
}
binmode STDOUT, ":encoding(utf8)";
msg ("Reading glossary");
my $glossary = read_text ("$Bin/glossary.html");
my %gloss_id;
while ($glossary =~ /id="(.*)"/g) {
$gloss_id{$1} = 1;
}
my @links;
msg ("Reading files");
my @files = <*.html>;
for my $file (@files) {
msg ("\tReading $file");
my $text = read_text ($file);
my %ids;
while ($text =~ /(href|id)='(.*)'/g) {
my $type = $1;
my $id = $2;
print "Single quotes around $type $id in $file\n";
}
while ($text =~ /id="(.*?)"/g) {
my $id = $1;
$ids{$id} = 1;
if ($id =~ /^#/) {
print "$file: bad id $id with #\n";
}
}
while ($text =~ /href="(.*?)"/g) {
my $link = $1;
if ($link =~ /glossary\.html#(.*)/) {
my $id = $1;
if (! $gloss_id{$id}) {
print "Bad link in $file to glossary id $id\n";
}
}
if ($link =~ /^#(.*)/) {
my $id = $1;
if (! $ids{$id}) {
print "Bad link in $file to self-id $id\n";
}
}
if ($link !~ /^#/ &&
$link !~ /^[a-z_-]+\.html(?:#[a-z_-]+)?$/ &&
$link !~ m!^https?://! &&
$link !~ /viewer/ &&
$link !~ /site\.group/) {
print "$file: Possible bad link $link\n";
}
push @links, $link;
}
}
msg ("Finished");
exit;
sub msg
{
if (! $verbose) {
return;
}
print "@_\n";
}