#!/usr/bin/perl # urls.pl - extract the URLs from a file # Eric Lease Morgan # September 13, 2011 - based on previous work at c4l11 # configure use constant TWEETS => './urls.txt'; # require use strict; # initialize my %urls = (); # process the file open INPUT, ' < ' . TWEETS or die "Can't open " . TWEETS . ": $!\n"; while ( ) { chop; /(http:\/\/.*)/; my ( $url, $cruft ) = split / /, $1; $url =~ s/\.$//; $url =~ s/\)$//; $url =~ s/,$//; $url =~ s/,$//; $urls{ $url }++; } # clean up close INPUT; # output foreach ( sort { $urls{ $b } <=> $urls{ $a } } keys %urls ) { print $urls{ $_ } . "\t$_\n" } # done exit;