#!/usr/bin/perl # build.pl - create an epub file # Eric Lease Morgan # Feburary 28, 2010 - first investigations (version 0.01) # configure use constant STYLE => '/home/eric/sandbox/epub/etc/tei2xhtml.xsl'; use constant LOGO => '/home/eric/sandbox/epub/etc/logo.gif'; # require use strict; use XML::XPath; use XML::LibXML; use XML::LibXSLT; # sanity check my $directory = $ARGV[ 0 ]; my $xml = $ARGV[ 1 ]; if ( ! $xml or ! $directory ) { print "Usage: $0 \n"; exit; } # extract metadata my $xp = XML::XPath->new( filename => $xml ); my $title = $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/titleStmt/title' ); my $creator = $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/titleStmt/author' ); my $publisher = $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/publicationStmt/publisher' ); my $creation_date = $xp->getNodeText( '/TEI.2/teiHeader/profileDesc/creation/date' ); my $identifier = $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/publicationStmt/idno/@type' ) . '-' . $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/publicationStmt/idno' ); my $rights = $xp->getNodeText( '/TEI.2/teiHeader/fileDesc/publicationStmt/availability/p' ); my $today = &today; my @subjects = (); foreach ( $xp->findnodes( '/TEI.2/teiHeader/profileDesc/textClass/keywords/list/item' )->get_nodelist ) { push @subjects, $_->string_value() } my $abstract = $xp->getNodeText( '/TEI.2/text/body/div1/p' ); # echo print "Metadata:\n"; print " title - $title\n"; print " creator - $creator\n"; print " publisher - $publisher\n"; print " creation date - $creation_date\n"; print " id - $identifier\n"; print " rights - $rights\n"; print " today - $today\n"; foreach ( @subjects ) { print " subject - $_\n" } # create home directory mkdir $directory; # create mime-type file print "Creating $directory/mimetype\n"; open OUT, " > $directory/mimetype" or die "Can't open mimetype file: $!\n"; print OUT &mimetype; close OUT; # container.xml print "Creating $directory/META-INF/container.xml\n"; mkdir "$directory/META-INF"; open OUT, " > $directory/META-INF/container.xml" or die "Can't open container.xml: $!\n"; print OUT &container; close OUT; # content.opf print "Creating $directory/OPS/content.opf\n"; my $content = &opf; $content =~ s/##TITLE##/$title/g; $content =~ s/##CREATOR##/$creator/g; $content =~ s/##PUBLISHER##/$publisher/g; $content =~ s/##CREATIONDATE##/$creation_date/g; $content =~ s/##IDENTIFIER##/$identifier/g; $content =~ s/##RIGHTS##/$rights/g; $content =~ s/##TODAY##/$today/g; my $subjects = ''; foreach ( @subjects ) { $subjects .= "$_\n" } $content =~ s/##SUBJECTS##/$subjects/g; mkdir "$directory/OPS"; open OUT, " > $directory/OPS/content.opf" or die "Can't open content.opf: $!\n"; print OUT $content; close OUT; # content.ncx print "Creating $directory/OPS/content.ncx\n"; my $ncx = &ncx; $ncx =~ s/##TITLE##/$title/g; $ncx =~ s/##CREATOR##/$creator/g; $ncx =~ s/##IDENTIFIER##/$identifier/g; open OUT, " > $directory/OPS/content.ncx" or die "Can't open content.ncx: $!\n"; print OUT $ncx; close OUT; # content.xml print "Creating OPS/content.xml\n"; my $parser = XML::LibXML->new; my $xslt = XML::LibXSLT->new; my $source = $parser->parse_file( $xml ) or croak $!; my $style = $parser->parse_file( STYLE ) or croak $!; my $stylesheet = $xslt->parse_stylesheet( $style ) or croak $!; my $results = $stylesheet->transform( $source ) or croak $!; open OUT, " > $directory/OPS/content.xml" or die "Can't open content.xml: $!\n"; print OUT $stylesheet->output_string( $results ); close OUT; # add logo print "Adding logo\n"; mkdir "$directory/OPS/images"; link LOGO, "$directory/OPS/images/logo.gif"; # done print "Done\n"; exit; sub container { return < EOF } sub mimetype { return "application/epub+zip" } sub opf { return < ##TITLE## ##CREATOR## ##CREATIONDATE## ##PUBLISHER## ##TODAY## ##SUBJECTS## ##RIGHTS## ##IDENTIFIER## en EOF } sub ncx { return < ##TITLE## ##CREATOR## ##TITLE## EOF } sub today { my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = localtime( time ); $mon++; if ( length( $mon ) < 2 ) { $mon = '0' . $mon } $year += 1900; return "$year-$mon-$mday"; }