#!/usr/bin/perl # tag.pl - produce a list of words (tokens), lemmas, and parts-of-speech from a text file # Feburary 4, 2011 - first investigations # require use strict; use Lingua::TreeTagger; # sanity check my $file = $ARGV[ 0 ]; if ( ! $file ) { print "Usage: $0 \n"; exit; } # intialize, tag, and output my $tagger = Lingua::TreeTagger->new( 'language' => 'english' ); my $tagged_text = $tagger->tag_file( $file ); foreach my $token ( @{ $tagged_text->sequence() } ) { print lc( $token->original ) . "\t" . $token->lemma . "\t" . $token->tag . "\n" } # done exit;