#!/usr/bin/perl # marc2compare.pl - given two MARC files, extract a short list of field in order to compare & contrast # Eric Lease Morgan # April 4, 2016 - first cut # configure use constant KOHA => 'http://library.aarome.org/cgi-bin/koha/opac-authoritiesdetail.pl?authid='; use constant LOC => 'http://id.loc.gov/authorities/'; # require use MARC::Batch; use strict; # sanity check my $old = $ARGV[ 0 ]; my $new = $ARGV[ 1 ]; if ( ! $new or ! $old ) { print "Usage: $0 \n"; exit; } # initialize my %db = (); # process each record in the new file storing the result in a "database" my $batch = MARC::Batch->new( 'USMARC', $new ); while ( my $record = $batch->next ) { # local identifier my $id = $record->field( '001' )->as_string; my $koha = KOHA . $id; # LOC identifier; results in invalid data if there is more than one (rare) my $_010 = $record->field( '010', 'a' )->as_string; $_010 =~ s/ +//g; my $loc = LOC; if ( $_010 =~ /^n/ ) { $loc .= "names/$_010" } elsif ( $_010 =~ /^s/ ) { $loc .= "subjects/$_010" } else { die "Unknown initial value of 010 subfield a: $_010. Call Eric.\n" } # process each name-like field foreach my $field ( $record->field( '1..' ) ) { # re-initialize my $_a = ''; my $_b = ''; my $_c = ''; my $_d = ''; # parse if ( $field->subfield( 'a' ) ) { $_a = $field->subfield( 'a' ) } if ( $field->subfield( 'b' ) ) { $_b = $field->subfield( 'b' ) } if ( $field->subfield( 'c' ) ) { $_c = $field->subfield( 'c' ) } if ( $field->subfield( 'd' ) ) { $_d = $field->subfield( 'd' ) } # munge my $author = join ' ', ( $_a, $_b, $_c, $_d ); $author =~ s/ +/ /g; # create a database record, for later use my @record = (); $record[ 0 ] = ''; $record[ 1 ] = $koha; $record[ 2 ] = $author; $record[ 3 ] = $_010; $record[ 4 ] = $loc; # store $db{ $id } = [ @record ]; } } # process each record in the old file my $batch = MARC::Batch->new( 'USMARC', $old ); while ( my $record = $batch->next ) { # get the key my $id = $record->field( '001' )->as_string; # find it in the database if ( $db{ $id } ) { # process each name-like field foreach my $field ( $record->field( '1..' ) ) { # re-initialize my $_a = ''; my $_b = ''; my $_c = ''; my $_d = ''; # parse if ( $field->subfield( 'a' ) ) { $_a = $field->subfield( 'a' ) } if ( $field->subfield( 'b' ) ) { $_b = $field->subfield( 'b' ) } if ( $field->subfield( 'c' ) ) { $_c = $field->subfield( 'c' ) } if ( $field->subfield( 'd' ) ) { $_d = $field->subfield( 'd' ) } # munge my $oldAuthor = join ' ', ( $_a, $_b, $_c, $_d ); $oldAuthor =~ s/ +/ /g; # update the database my $record = $db{ $id }; $$record[ 0 ] = $oldAuthor; } } } # dump the database print "id\told name\tKoha URL\tnew name\tLCCN\tLOC URL\n"; foreach ( sort keys %db ) { # get the key and the record my $key = $_; my $record = $db{ $key }; # do the work print $key, "\t", ( join "\t", @$record ), "\n"; } # done exit;