-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathchars.pl
executable file
·44 lines (44 loc) · 1.18 KB
/
chars.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env perl
use warnings;
#Give a list of all characters for input files
use utf8;
my @INFILES = glob(shift);
my $OUTDIR = shift;
binmode(STDOUT,'utf8');
binmode(STDERR,'utf8');
foreach $file (@INFILES) {
#($fName) = $file =~ m|([^/]+)\.txt|;
print STDERR "Processing $file\n";
if ($file =~ m|\.xml$|) {$format = 'xml'}
elsif ($file =~ m|\.txt$|) {$format = 'text'}
else {$format = 'text'}
$fName=$file;
open(TBL, '<:utf8', $file)
or die "FATAL ERROR: Cant find input file $file\n";
undef $/;
$txt = <TBL>;
undef %c;
if ($format eq 'xml') {
$txt =~ s| +||g; #most spaces are fake spaces
$txt =~ s|<[^>]+>||g;
$txt =~ s|<|<|g;
$txt =~ s|>|>|g;
$txt =~ s|'|'|g;
$txt =~ s|"|"|g;
$txt =~ s|&|&|g;
}
for $c (split(//, $txt)) {
if (ord($c) < 33) {$c="&#".ord($c).';'}
elsif ($c eq "&") {$c = '&'}
elsif ($c eq ":") {$c = ':'}
$c{$c}++;
}
close TBL;
$n=0;
@chars=();
for $c (sort keys %c) {
push(@chars,"$c:$c{$c}");
$n+=$c{$c};
}
print "$fName\t$n\t".join(" ",@chars)."\n";
}