#!/usr/bin/perl # tweets2mysql.pl - ingest harvested tweets into a database # Eric Lease Morgan # Feburary 13, 2011 - based on work done at #c4l11 # March 19, 2011 - added 5 hours to datetime to account for timezone # configure use constant DSN => 'dbi:mysql:c4l11'; use constant USERNAME => '---'; use constant PASSWORD => '---'; use constant TWEETS => './tweets.txt'; use constant ECHO => 1; # requires use DBI; use DateTime; use strict; # initialize my $dbh = DBI->connect( DSN, USERNAME, PASSWORD ); # open the tweet and process every entry open FILE, ' < ' . TWEETS or die "Can't open " . TWEETS . ": $!\n"; while ( ) { chop; my ( $tweet_id, $date, $author_id, $tweet ) = split /\t/, $_; # Tue, 08 Feb 2011 22:23:53 +0000 my ( $day, $date, $month, $year, $time, $zone ) = split / /, $date; # map the month if ( $month eq 'Jan' ) { $month = '01' } elsif ( $month eq 'Feb' ) { $month = '02' } elsif ( $month eq 'Mar' ) { $month = '03' } elsif ( $month eq 'Apr' ) { $month = '04' } elsif ( $month eq 'May' ) { $month = '05' } elsif ( $month eq 'Jun' ) { $month = '06' } elsif ( $month eq 'Jul' ) { $month = '06' } elsif ( $month eq 'Aug' ) { $month = '08' } elsif ( $month eq 'Sep' ) { $month = '09' } elsif ( $month eq 'Oct' ) { $month = '10' } elsif ( $month eq 'Nov' ) { $month = '11' } elsif ( $month eq 'Dec' ) { $month = '12' } else { # error print "Unknown value for month: $month. ($_) Call Eric.\n"; next; } # add 5 hours to time to account for timezone my ( $hour, $minute, $second ) = split /:/, $time; my $dt = DateTime->new( year => $year, month => $month, day => $date, hour => $hour, minute => $minute, second => $second ); $dt->subtract( hours => 5 ); # datestamp; my $datetime = $dt->strftime( '%F %T' ); # echo if ( ECHO ) { print " tweet id: $tweet_id\n"; print " date stamp: $datetime\n"; print " author id: $author_id\n"; print " tweet: $tweet\n"; print "\n"; } # update database $dbh->do( qq(INSERT INTO `tweets` ( `tweet_id`, `datetime`, `author_id`, `tweet` ) VALUES ( '$tweet_id', '$datetime', '$author_id', "$tweet") ) ); } # done close FILE; exit;