#!/usr/bin/perl # dat2cloud.pl - create rudimentary tag cloud against text mined mbox file # Eric Lease Morgan # April 10, 2010 - first investigations # configure use constant AUTHOR => 'author%3D%22##TAG##%22'; use constant SUBJECT => 'subject%3D%22##TAG##%22'; use constant WORD => '%22##TAG##%22'; use constant CODE4LIB => 'http://serials.infomotions.com/code4lib/sru/?operation=searchRetrieve&version=1.1&stylesheet=%2Fcode4lib%2Fsru%2Fstyle.xsl&query=##QUERY##+AND+date%3D2009'; use constant NGC4LIB => 'http://serials.infomotions.com/ngc4lib/sru/?operation=searchRetrieve&version=1.1&stylesheet=%2Fngc4lib%2Fsru%2Fstyle.xsl&query=##QUERY##+AND+date%3D2009'; # require use HTML::TagCloud::Sortable; use strict; # get input my $type = $ARGV[ 0 ]; my $list = $ARGV[ 1 ]; my $data = $ARGV[ 2 ]; if ( ! $type or ! $list or ! $data ) { &usage } # initialize my $cloud = HTML::TagCloud::Sortable->new; # initialize query my $query_skeleton = ''; if ( $type eq 'author' ) { $query_skeleton = AUTHOR } elsif ( $type eq 'subject' ) { $query_skeleton = SUBJECT } elsif ( $type eq 'word' ) { $query_skeleton = WORD } else { &usage } # initialize url my $url_skeleton = ''; if ( $list eq 'code4lib' ) { $url_skeleton = CODE4LIB } elsif ( $list eq 'ngc4lib' ) { $url_skeleton = NGC4LIB } else { &usage } # process input open DATA, " < $data " or die "Can't open $data: $!\n"; while ( ) { # parse data my ( $tag, $score ) = split /:/, $_; # build the query my $query = $query_skeleton; $query =~ s/##TAG##/$tag/e; # build the url my $url = $url_skeleton; $url =~ s/##QUERY##/$query/e; # update the cloud $cloud->add( { name => $tag, url => $url, count => $score } ); } # output print $cloud->html_and_css( { limit => 100, sort_field => 'count', sort_type => 'numeric', sort_order => 'desc'} ); # done exit; sub usage { print "Usage: $0 \n"; print "where is one of:\n"; print " * author\n"; print " * subject\n"; print " * word\n"; print "and where is one of:\n"; print " * code4lib\n"; print " * ngc4lib\n"; exit; }