#!/usr/bin/perl # harvest.pl - given a set of URI's, cache MARCXML locally # Eric Lease Morgan # March 29, 2016 - first cut # configure use constant WGET => 'wget -q -O ##FILE## ##URI##'; # require use strict; # sanity check my $db = $ARGV[ 0 ]; if ( ! $db ) { print "Usage: $0 \n"; exit; } # open the database file and process each record open LIST, " < $db" or die "Can't open database file ($db): $!\n"; my $index = 0; while ( ) { # parse chop; my ( $id, $briefName, $date, $fullName, $tries, $total, $authorizedName, $uri, $lccn ) = split /\t/, $_; # only process found items next if ( ! $uri ); # configure my $file = $id . '.xml'; $uri = $uri . '.marcxml.xml'; # do the work only if the file does not exist next if ( -e $file ); # create the command my $cmd = WGET; $cmd =~ s/##FILE##/$file/e; $cmd =~ s/##URI##/$uri/e; # increment $index++; # do the work print "\t$index\t$cmd\r"; `$cmd`; } # done exit;