#! /usr/bin/perl my %word_list; # this hash will have an entry for every distinct word my $count = 0; # count of distinct words my $total_words =0; # total # of words used my $file = shift; # name of text file to parse
open FILE, "<$file";
while (<FILE>){ #for each line in the file
my @words = split; # split each line into separate words
foreach my $word (@words) { # for each word on that line
$word =~ s/[.]//g; # strip any periods $word =~ s/"//g; # strip any quotes $word =~ s/,//g; # strip any commas $word =~ s/'//g; # strip any apostrophes
# unless ($word =~ /ly/) {next;} # remove the first # sign if you want to look for adverbs
if ($word_list{$word}) { # if the word has already been seen increment the count $word_list{$word}++; } else { $word_list{$word} = 1; # else it's a new word, start with 1 } $total_words++; # increment counter for total # of words }
print "\nWord usage in $file\n\n\n";
foreach $key (sort sort_values (keys(%word_list))) { # get the keys, sort them by value, for each one print "$word_list{$key} \t $key\n"; $count ++; # increment counter for distinct # of words }
print "\n\nTotal of $total_words words, $count distinct words used\n"; }
sub sort_values { # sort a hash by value $word_list{$a} <=> $word_list{$b}; }