diff --git a/MANIFEST b/MANIFEST index 2575a79a5e..16418b4736 100644 --- a/MANIFEST +++ b/MANIFEST @@ -156,6 +156,27 @@ lib/LaTeXML/Util/Transform.pm lib/LaTeXML/Util/Unicode.pm lib/LaTeXML/Util/WWW.pm +#================================================== +# BibTeX Module +#================================================== +lib/LaTeXML/BibTeX.pm +lib/LaTeXML/BibTeX/BibStyle.pm +lib/LaTeXML/BibTeX/BibStyle/Precompiled.pm +lib/LaTeXML/BibTeX/BibStyle/StyCommand.pm +lib/LaTeXML/BibTeX/BibStyle/StyString.pm +lib/LaTeXML/BibTeX/Bibliography.pm +lib/LaTeXML/BibTeX/Bibliography/BibEntry.pm +lib/LaTeXML/BibTeX/Bibliography/BibField.pm +lib/LaTeXML/BibTeX/Common/Object.pm +lib/LaTeXML/BibTeX/Common/StreamReader.pm +lib/LaTeXML/BibTeX/Runtime.pm +lib/LaTeXML/BibTeX/Runtime/Buffer.pm +lib/LaTeXML/BibTeX/Runtime/Builtins.pm +lib/LaTeXML/BibTeX/Runtime/Entry.pm +lib/LaTeXML/BibTeX/Runtime/Names.pm +lib/LaTeXML/BibTeX/Runtime/Strings.pm +lib/LaTeXML/BibTeX/Runtime/Utils.pm + #================================================== # Document Model #================================================== diff --git a/lib/LaTeXML/BibTeX.pm b/lib/LaTeXML/BibTeX.pm new file mode 100644 index 0000000000..e28e4b95ea --- /dev/null +++ b/lib/LaTeXML/BibTeX.pm @@ -0,0 +1,124 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX | # +# | Make an bibliography from cited entries | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::BibTeX; +use strict; +use warnings; +use LaTeXML::Util::Pathname; +use LaTeXML::Common::XML; +use LaTeXML::Common::Error; +use Encode; +use LaTeXML::BibTeX::BibStyle; +use LaTeXML::BibTeX::Bibliography; +use LaTeXML::BibTeX::Runtime; +use LaTeXML::BibTeX::Runtime::Buffer; +use LaTeXML::BibTeX::Runtime::Utils; + +use Module::Load; + +# options: searchpaths +sub new { + my ($class, %options) = @_; + return bless {%options}, $class; } + +sub loadStyle { + my ($self, $stylename) = @_; + my $stage = "Reading bst file"; + ProgressSpinup($stage); + $$self{style} = LaTeXML::BibTeX::BibStyle->new($stylename, $$self{searchpaths}); + ProgressSpindown($stage); + return $$self{style}; } + +sub loadBibliographies { + my ($self, @bibfiles) = @_; + my @bibs = (); + foreach my $bibfile (@bibfiles) { + my $stage = "Parsing $bibfile"; + ProgressSpinup($stage); + # find the bibfile, or error out and try the next one + my $bibpath = pathname_find($bibfile, paths => $$self{searchpaths}, types => ['bib']) + || pathname_kpsewhich("$bibfile.bib"); + if (!defined($bibpath)) { + Error('missing_file', $bibfile, undef, "Can't find Bibliography file $bibfile"); + next; } + # open the bibfile, or cause a fatal error + my $bib = LaTeXML::BibTeX::Bibliography->new($bibfile, $bibpath); + return unless $bib; + push(@bibs, $bib); + ProgressSpindown($stage); } + $$self{bibliographies} = [@bibs]; + return $$self{bibliographies}; } + +#====================================================================== +# given an appropriate context, emulate what BibTeX does and produce a bbl string +# returns a pair ($buffer, $runtime) +sub run { + my ($self, $cites) = @_; + # create a string to write things into + my $bblbuffer = ""; + open(my $ofh, '>', \$bblbuffer); +## binmode($ofh, ":utf8"); + #====================================================================== + # prepare the code to be run + my $macro = 'lxBibitemFrom'; # huh? + my $btbuffer = LaTeXML::BibTeX::Runtime::Buffer->new($ofh, 0, $macro); + # Create a configuration that optionally wraps things inside a macro + my $runtime = LaTeXML::BibTeX::Runtime->new(undef, $btbuffer, $$self{bibliographies}, [@$cites]); + #====================================================================== + # and run the code + my $stage = "Running BibTeX"; + ProgressSpinup($stage); + my $ok = 0; + eval { + $runtime->initContext; + $runtime->run($$self{style}->getProgram); + $ok = 1; }; + Error('bibtex', 'runtime', undef, $@) unless $ok; + $btbuffer->finalize; + ProgressSpindown($stage); + return unless $ok; + # prepend the bbl preamble + my $bblPreamble = $self->buildBBLPreamble($runtime); + $bblbuffer = $bblPreamble . $bblbuffer if defined($bblPreamble); + return $bblbuffer, $runtime; } + +# build the preamble +# NOTE: Hmmmmmm.... +sub buildBBLPreamble { + my ($self, $runtime) = @_; + my $buffer = '\makeatletter'; + my $entries = $runtime->getEntries; + my ($field); + foreach my $entry (@$entries) { + # begin the hook + $buffer .= '\expandafter\def\csname lx@bibentry@taghook@' . $entry->getKey . '\endcsname{'; + # key, bibtype + $buffer .= '\lx@tag@intags[key]{' . $entry->getKey . '}'; + $buffer .= '\lx@tag@intags[bibtype]{' . $entry->getType . '}'; + # author + $field = $entry->getPlainField('author'); + $buffer .= '\lx@tag@intags[authors]{' . $field . '}' if defined($field); + # editor + $field = $entry->getPlainField('editor'); + $buffer .= '\lx@tag@intags[editors]{' . $field . '}' if defined($field); + # title + $field = $entry->getPlainField('title'); + $buffer .= '\lx@tag@intags[title]{' . $field . '}' if defined($field); + # year + $field = $entry->getPlainField('year'); + $buffer .= '\lx@tag@intags[year]{' . $field . '}' if defined($field); + # close the hook + $buffer .= '}'; + } + $buffer .= '\makeatother'; + return $buffer; } +#====================================================================== +1; diff --git a/lib/LaTeXML/BibTeX/BibStyle.pm b/lib/LaTeXML/BibTeX/BibStyle.pm new file mode 100644 index 0000000000..19810f0f19 --- /dev/null +++ b/lib/LaTeXML/BibTeX/BibStyle.pm @@ -0,0 +1,248 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::BibStyle. | # +# | A Parser for .bst files | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef); + +package LaTeXML::BibTeX::BibStyle; +use strict; +use warnings; +use LaTeXML::Common::Error; +use LaTeXML::Util::Pathname; +use LaTeXML::BibTeX::Common::StreamReader; +use LaTeXML::BibTeX::BibStyle::StyString; +use LaTeXML::BibTeX::BibStyle::StyCommand; + +# compiles the bst file or errors out +## sub compileBst { +## my ($self, $doc, $style) = @_; +sub new { + my ($class, $style, $searchpaths) = @_; + # try to find the bst file, if not fallback to a precompiled one + my $bstfile = pathname_find($style, paths => $searchpaths, types => ['bst']) + || pathname_kpsewhich("$style.bst"); + my $program; + if (defined($bstfile)) { + # we found the file => open it + eval { + my $reader = LaTeXML::BibTeX::Common::StreamReader->newFromLTXML($style, $bstfile); + Fatal('missing_file', $style, undef, "Unable to open Bibliography Style File $bstfile") + unless (defined($reader)); + $program = readFile($reader); + $reader->finalize; }; } + if (!defined $program) { + # we did not find it => fallback to the default + Warn('missing_file', $style, undef, + "Can't find Bibliography Style '$style'; Using builtin default"); + require LaTeXML::BibTeX::BibStyle::Precompiled; + $program = $LaTeXML::BibTeX::BibStyle::Precompiled::DEFAULT; } + return bless { + name => $style, pathname => $bstfile, + program => $program }, $class; } + +sub getStyle { + my ($self) = @_; + return $$self{name}; } + +sub getProgram { + my ($self) = @_; + return $$self{program}; } + +# ======================================================================= # +# Parsing Commands +# ======================================================================= # + +# read a .bst file and return the list of entries +# is not smart, and returns the first error if it can not understand the file +sub readFile { + my ($reader) = @_; + my @commands = (); + # reads commands + while (my $command = readCommand($reader)) { + push(@commands, $command); } + return [@commands]; } + +# eats all spaces or comments +sub skipSpacesOrComments { + my ($reader) = @_; + my ($char); + while (1) { + # eat spaces and check if there is a % comment char + $reader->skipSpaces; + $char = $reader->peekChar; + # return if we are not '%' + last unless defined($char); + last unless $char eq '%'; + # skip until a new line happens + $reader->readCharWhile(sub { $_[0] ne "\n" }); } + return; } + +# commands and how many arguments +our %COMMAND_ARGS = ( + ENTRY => 3, + EXECUTE => 1, + FUNCTION => 2, + INTEGERS => 1, + ITERATE => 1, + MACRO => 2, + READ => 0, + REVERSE => 1, + SORT => 0, + STRINGS => 1); + +# read a single command from the input +# if it exists +sub readCommand { + my ($reader) = @_; + # skip spaces, and check that we have something left to read + skipSpacesOrComments($reader); + my $char = $reader->peekChar; + return unless defined($char); + # read the command name + my $name = readLiteral($reader); + return unless $name; + # figure out how many argumeents the command takes + my $command = $name->getValue; + return Error('bibtex', 'bstparse', $reader->getLocator, 'unknown command ' . $command) + unless exists($COMMAND_ARGS{$command}); + my $nargs = $COMMAND_ARGS{$command}; + # and read them + my @arguments = (); + my $argument; + if ($nargs > 0) { + foreach my $i (1 .. $nargs) { + skipSpacesOrComments($reader); + $argument = readBlock($reader); + return unless $argument; + push(@arguments, $argument); } } + # get the ending position of the last arguments + my $locator = $name->getLocator; + $locator = $locator->merge($argument->getLocator) if defined($argument); + return LaTeXML::BibTeX::BibStyle::StyCommand->new($command, [@arguments], $locator); } +# ======================================================================= # +# Parsing Blocks +# ======================================================================= # + +# read any valid code from the sty file +sub readAny { + my ($reader) = @_; + # peek at the next char + my $char = $reader->peekChar; + return Error('bibtex', 'bstparse', $reader->getLocator, 'unexpected end of input while reading') + unless defined($char); + # check what it is + if ($char eq '#') { return readNumber($reader); } + elsif ($char eq "'") { return readReference($reader); } + elsif ($char eq '"') { return readQuote($reader); } + elsif ($char eq '{') { return readBlock($reader); } + else { return readLiteral($reader); } } + +sub readBlock { + my ($reader) = @_; + # read the opening brace + my $char = $reader->readChar; + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected "{" while reading block') + unless defined($char) && $char eq '{'; + my $locator = $reader->getLocator; + my @values = (); + skipSpacesOrComments($reader); + # if the next char is '}', finish + $char = $reader->peekChar; + return Error('bibtex', 'bstparse', $reader->getLocator, + 'unexpected end of input while reading block') + unless defined($char); + # read until we find a closing brace + while ($char ne '}') { + my $value = readAny($reader); + return unless $value; + push(@values, $value); + + # skip all the spaces and read the next character + skipSpacesOrComments($reader); + $char = $reader->peekChar; + return Error('bibtex', 'bstparse', $reader->getLocator, + 'unexpected end of input while reading block') + unless defined($char); } + $reader->readChar; + # we can add +1, because we did not read a \n + return LaTeXML::BibTeX::BibStyle::StyString->new('BLOCK', [@values], + $locator->merge($reader->getLocator)); } + +# reads a number, consisting of numbers, from the input +sub readNumber { + my ($reader) = @_; + # read anything that's not a space + my $char = $reader->readChar; + return Error('bibtex', 'bstparse', $reader->getLocator, + 'expected "#" while reading number ') + unless defined($char) && $char eq '#'; + my $locator = $reader->getLocator; + my $sign = $reader->peekChar; + return Error('bibtex', 'bstparse', $reader->getLocator, + 'unexpected end of input while reading number') + unless defined($sign); + + if ($sign eq '-' or $sign eq '+') { + $reader->readChar; } + else { + $sign = ''; } + my $literal = $reader->readCharWhile(sub { $_[0] =~ /\d/; }); + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected a non-empty number') + if $literal eq ""; + return LaTeXML::BibTeX::BibStyle::StyString->new('NUMBER', ($sign . $literal) + 0, + $locator->merge($reader->getLocator)); } + +# Reads a reference, delimited by spaces, from the input +sub readReference { + my ($reader) = @_; + my $char = $reader->readChar; + return Error('bibtex', 'bstparse', $reader->getLocator, + 'expected "\'" while reading reference') + unless defined($char) && $char eq "'"; + my $locator = $reader->getLocator; + # read anything that's not a space and not the end of a block + my $reference = $reader->readCharWhile(sub { $_[0] =~ /[^%\s\}]/; }); + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected a non-empty argument') + if $reference eq ""; + return LaTeXML::BibTeX::BibStyle::StyString->new('REFERENCE', $reference, + $locator->merge($reader->getLocator)); } + +# Reads a literal, delimited by spaces, from the input +sub readLiteral { + my ($reader) = @_; + # read anything that's not a space or the boundary of a block + my $locator = $reader->getLocator; + my $literal = $reader->readCharWhile(sub { $_[0] =~ /[^%\s\{\}]/; }); + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected a non-empty literal') + unless $literal; + return LaTeXML::BibTeX::BibStyle::StyString->new('LITERAL', $literal, + $locator->merge($reader->getLocator)); } + +# read a quoted quote from reader +# does not skip any spaces +sub readQuote { + my ($reader) = @_; + # read the first quote, or die if we are at the end + my $char = $reader->readChar; + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected to find an \'"\'') + unless defined($char) && $char eq '"'; + my $locator = $reader->getLocator; + # record the starting position and read until the next quote + my $result = $reader->readCharWhile(sub { $_[0] =~ /[^"]/ }); + return Error('bibtex', 'bstparse', $reader->getLocator, 'unexpected end of input in quote') + if !defined $char; + # read the end quote, or die if we are at the end + $char = $reader->readChar; + return Error('bibtex', 'bstparse', $reader->getLocator, 'expected to find an \'"\'') + unless defined($char) && $char eq '"'; + # we can add a +1 here, because we did not read a \n + return LaTeXML::BibTeX::BibStyle::StyString->new('QUOTE', $result, + $locator->merge($reader->getLocator)); } + +#====================================================================== + +1; diff --git a/lib/LaTeXML/BibTeX/BibStyle/Precompiled.pm b/lib/LaTeXML/BibTeX/BibStyle/Precompiled.pm new file mode 100644 index 0000000000..1c1ac91199 --- /dev/null +++ b/lib/LaTeXML/BibTeX/BibStyle/Precompiled.pm @@ -0,0 +1,22 @@ +# /=====================================================================\ # +# | Precompiled $style.bst | # +# | for LaTeXML | # +# |=====================================================================| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +# THIS IS A GENERATED FILE! DO NOT EDIT +package LaTeXML::BibTeX::BibStyle::Precompiled; +use strict; +use warnings; +use LaTeXML::BibTeX::BibStyle::StyCommand; +use LaTeXML::BibTeX::BibStyle::StyString; +sub Cmd { return LaTeXML::BibTeX::BibStyle::StyCommand->new(@_); } +sub Nmb { return LaTeXML::BibTeX::BibStyle::StyString->new('NUMBER', @_); } +sub Quo { return LaTeXML::BibTeX::BibStyle::StyString->new('QUOTE', @_); } +sub Lit { return LaTeXML::BibTeX::BibStyle::StyString->new('LITERAL', @_); } +sub Ref { return LaTeXML::BibTeX::BibStyle::StyString->new('REFERENCE', @_); } +sub Blk { return LaTeXML::BibTeX::BibStyle::StyString->new('BLOCK', @_); } +our $DEFAULT; +$DEFAULT = [Cmd('ENTRY', [Blk([Lit('address'), Lit('author'), Lit('booktitle'), Lit('chapter'), Lit('edition'), Lit('editor'), Lit('howpublished'), Lit('institution'), Lit('journal'), Lit('key'), Lit('month'), Lit('note'), Lit('number'), Lit('organization'), Lit('pages'), Lit('publisher'), Lit('school'), Lit('series'), Lit('title'), Lit('type'), Lit('volume'), Lit('year')]), Blk([]), Blk([Lit('label')])]), Cmd('INTEGERS', [Blk([Lit('output.state'), Lit('before.all'), Lit('mid.sentence'), Lit('after.sentence'), Lit('after.block')])]), Cmd('FUNCTION', [Blk([Lit('init.state.consts')]), Blk([Nmb('0'), Ref('before.all'), Lit(':='), Nmb('1'), Ref('mid.sentence'), Lit(':='), Nmb('2'), Ref('after.sentence'), Lit(':='), Nmb('3'), Ref('after.block'), Lit(':=')])]), Cmd('STRINGS', [Blk([Lit('s'), Lit('t')])]), Cmd('FUNCTION', [Blk([Lit('output.nonnull')]), Blk([Ref('s'), Lit(':='), Lit('output.state'), Lit('mid.sentence'), Lit('='), Blk([Quo(', '), Lit('*'), Lit('write$')]), Blk([Lit('output.state'), Lit('after.block'), Lit('='), Blk([Lit('add.period$'), Lit('write$'), Lit('newline$'), Quo('\newblock '), Lit('write$')]), Blk([Lit('output.state'), Lit('before.all'), Lit('='), Ref('write$'), Blk([Lit('add.period$'), Quo(' '), Lit('*'), Lit('write$')]), Lit('if$')]), Lit('if$'), Lit('mid.sentence'), Ref('output.state'), Lit(':=')]), Lit('if$'), Lit('s')])]), Cmd('FUNCTION', [Blk([Lit('output')]), Blk([Lit('duplicate$'), Lit('empty$'), Ref('pop$'), Ref('output.nonnull'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('output.check')]), Blk([Ref('t'), Lit(':='), Lit('duplicate$'), Lit('empty$'), Blk([Lit('pop$'), Quo('empty '), Lit('t'), Lit('*'), Quo(' in '), Lit('*'), Lit('cite$'), Lit('*'), Lit('warning$')]), Ref('output.nonnull'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('output.bibitem')]), Blk([Lit('newline$'), Quo('\bibitem{'), Lit('write$'), Lit('cite$'), Lit('write$'), Quo('}'), Lit('write$'), Lit('newline$'), Quo(''), Lit('before.all'), Ref('output.state'), Lit(':=')])]), Cmd('FUNCTION', [Blk([Lit('fin.entry')]), Blk([Lit('add.period$'), Lit('write$'), Lit('newline$')])]), Cmd('FUNCTION', [Blk([Lit('new.block')]), Blk([Lit('output.state'), Lit('before.all'), Lit('='), Ref('skip$'), Blk([Lit('after.block'), Ref('output.state'), Lit(':=')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('new.sentence')]), Blk([Lit('output.state'), Lit('after.block'), Lit('='), Ref('skip$'), Blk([Lit('output.state'), Lit('before.all'), Lit('='), Ref('skip$'), Blk([Lit('after.sentence'), Ref('output.state'), Lit(':=')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('not')]), Blk([Blk([Nmb('0')]), Blk([Nmb('1')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('and')]), Blk([Ref('skip$'), Blk([Lit('pop$'), Nmb('0')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('or')]), Blk([Blk([Lit('pop$'), Nmb('1')]), Ref('skip$'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('new.block.checka')]), Blk([Lit('empty$'), Ref('skip$'), Ref('new.block'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('new.block.checkb')]), Blk([Lit('empty$'), Lit('swap$'), Lit('empty$'), Lit('and'), Ref('skip$'), Ref('new.block'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('new.sentence.checka')]), Blk([Lit('empty$'), Ref('skip$'), Ref('new.sentence'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('new.sentence.checkb')]), Blk([Lit('empty$'), Lit('swap$'), Lit('empty$'), Lit('and'), Ref('skip$'), Ref('new.sentence'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('field.or.null')]), Blk([Lit('duplicate$'), Lit('empty$'), Blk([Lit('pop$'), Quo('')]), Ref('skip$'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('emphasize')]), Blk([Lit('duplicate$'), Lit('empty$'), Blk([Lit('pop$'), Quo('')]), Blk([Quo('{\em '), Lit('swap$'), Lit('*'), Quo('}'), Lit('*')]), Lit('if$')])]), Cmd('INTEGERS', [Blk([Lit('nameptr'), Lit('namesleft'), Lit('numnames')])]), Cmd('FUNCTION', [Blk([Lit('format.names')]), Blk([Ref('s'), Lit(':='), Nmb('1'), Ref('nameptr'), Lit(':='), Lit('s'), Lit('num.names$'), Ref('numnames'), Lit(':='), Lit('numnames'), Ref('namesleft'), Lit(':='), Blk([Lit('namesleft'), Nmb('0'), Lit('>')]), Blk([Lit('s'), Lit('nameptr'), Quo('{ff~}{vv~}{ll}{, jj}'), Lit('format.name$'), Ref('t'), Lit(':='), Lit('nameptr'), Nmb('1'), Lit('>'), Blk([Lit('namesleft'), Nmb('1'), Lit('>'), Blk([Quo(', '), Lit('*'), Lit('t'), Lit('*')]), Blk([Lit('numnames'), Nmb('2'), Lit('>'), Blk([Quo(','), Lit('*')]), Ref('skip$'), Lit('if$'), Lit('t'), Quo('others'), Lit('='), Blk([Quo(' et~al.'), Lit('*')]), Blk([Quo(' and '), Lit('*'), Lit('t'), Lit('*')]), Lit('if$')]), Lit('if$')]), Ref('t'), Lit('if$'), Lit('nameptr'), Nmb('1'), Lit('+'), Ref('nameptr'), Lit(':='), Lit('namesleft'), Nmb('1'), Lit('-'), Ref('namesleft'), Lit(':=')]), Lit('while$')])]), Cmd('FUNCTION', [Blk([Lit('format.authors')]), Blk([Lit('author'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('author'), Lit('format.names')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.editors')]), Blk([Lit('editor'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('editor'), Lit('format.names'), Lit('editor'), Lit('num.names$'), Nmb('1'), Lit('>'), Blk([Quo(', editors'), Lit('*')]), Blk([Quo(', editor'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.title')]), Blk([Lit('title'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('title'), Quo('t'), Lit('change.case$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('n.dashify')]), Blk([Ref('t'), Lit(':='), Quo(''), Blk([Lit('t'), Lit('empty$'), Lit('not')]), Blk([Lit('t'), Nmb('1'), Nmb('1'), Lit('substring$'), Quo('-'), Lit('='), Blk([Lit('t'), Nmb('1'), Nmb('2'), Lit('substring$'), Quo('--'), Lit('='), Lit('not'), Blk([Quo('--'), Lit('*'), Lit('t'), Nmb('2'), Lit('global.max$'), Lit('substring$'), Ref('t'), Lit(':=')]), Blk([Blk([Lit('t'), Nmb('1'), Nmb('1'), Lit('substring$'), Quo('-'), Lit('=')]), Blk([Quo('-'), Lit('*'), Lit('t'), Nmb('2'), Lit('global.max$'), Lit('substring$'), Ref('t'), Lit(':=')]), Lit('while$')]), Lit('if$')]), Blk([Lit('t'), Nmb('1'), Nmb('1'), Lit('substring$'), Lit('*'), Lit('t'), Nmb('2'), Lit('global.max$'), Lit('substring$'), Ref('t'), Lit(':=')]), Lit('if$')]), Lit('while$')])]), Cmd('FUNCTION', [Blk([Lit('format.date')]), Blk([Lit('year'), Lit('empty$'), Blk([Lit('month'), Lit('empty$'), Blk([Quo('')]), Blk([Quo('there\'s a month but no year in '), Lit('cite$'), Lit('*'), Lit('warning$'), Lit('month')]), Lit('if$')]), Blk([Lit('month'), Lit('empty$'), Ref('year'), Blk([Lit('month'), Quo(' '), Lit('*'), Lit('year'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.btitle')]), Blk([Lit('title'), Lit('emphasize')])]), Cmd('FUNCTION', [Blk([Lit('tie.or.space.connect')]), Blk([Lit('duplicate$'), Lit('text.length$'), Nmb('3'), Lit('<'), Blk([Quo('~')]), Blk([Quo(' ')]), Lit('if$'), Lit('swap$'), Lit('*'), Lit('*')])]), Cmd('FUNCTION', [Blk([Lit('either.or.check')]), Blk([Lit('empty$'), Ref('pop$'), Blk([Quo('can\'t use both '), Lit('swap$'), Lit('*'), Quo(' fields in '), Lit('*'), Lit('cite$'), Lit('*'), Lit('warning$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.bvolume')]), Blk([Lit('volume'), Lit('empty$'), Blk([Quo('')]), Blk([Quo('volume'), Lit('volume'), Lit('tie.or.space.connect'), Lit('series'), Lit('empty$'), Ref('skip$'), Blk([Quo(' of '), Lit('*'), Lit('series'), Lit('emphasize'), Lit('*')]), Lit('if$'), Quo('volume and number'), Lit('number'), Lit('either.or.check')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.number.series')]), Blk([Lit('volume'), Lit('empty$'), Blk([Lit('number'), Lit('empty$'), Blk([Lit('series'), Lit('field.or.null')]), Blk([Lit('output.state'), Lit('mid.sentence'), Lit('='), Blk([Quo('number')]), Blk([Quo('Number')]), Lit('if$'), Lit('number'), Lit('tie.or.space.connect'), Lit('series'), Lit('empty$'), Blk([Quo('there\'s a number but no series in '), Lit('cite$'), Lit('*'), Lit('warning$')]), Blk([Quo(' in '), Lit('*'), Lit('series'), Lit('*')]), Lit('if$')]), Lit('if$')]), Blk([Quo('')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.edition')]), Blk([Lit('edition'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('output.state'), Lit('mid.sentence'), Lit('='), Blk([Lit('edition'), Quo('l'), Lit('change.case$'), Quo(' edition'), Lit('*')]), Blk([Lit('edition'), Quo('t'), Lit('change.case$'), Quo(' edition'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('INTEGERS', [Blk([Lit('multiresult')])]), Cmd('FUNCTION', [Blk([Lit('multi.page.check')]), Blk([Ref('t'), Lit(':='), Nmb('0'), Ref('multiresult'), Lit(':='), Blk([Lit('multiresult'), Lit('not'), Lit('t'), Lit('empty$'), Lit('not'), Lit('and')]), Blk([Lit('t'), Nmb('1'), Nmb('1'), Lit('substring$'), Lit('duplicate$'), Quo('-'), Lit('='), Lit('swap$'), Lit('duplicate$'), Quo(','), Lit('='), Lit('swap$'), Quo('+'), Lit('='), Lit('or'), Lit('or'), Blk([Nmb('1'), Ref('multiresult'), Lit(':=')]), Blk([Lit('t'), Nmb('2'), Lit('global.max$'), Lit('substring$'), Ref('t'), Lit(':=')]), Lit('if$')]), Lit('while$'), Lit('multiresult')])]), Cmd('FUNCTION', [Blk([Lit('format.pages')]), Blk([Lit('pages'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('pages'), Lit('multi.page.check'), Blk([Quo('pages'), Lit('pages'), Lit('n.dashify'), Lit('tie.or.space.connect')]), Blk([Quo('page'), Lit('pages'), Lit('tie.or.space.connect')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.vol.num.pages')]), Blk([Lit('volume'), Lit('field.or.null'), Lit('number'), Lit('empty$'), Ref('skip$'), Blk([Quo('('), Lit('number'), Lit('*'), Quo(')'), Lit('*'), Lit('*'), Lit('volume'), Lit('empty$'), Blk([Quo('there\'s a number but no volume in '), Lit('cite$'), Lit('*'), Lit('warning$')]), Ref('skip$'), Lit('if$')]), Lit('if$'), Lit('pages'), Lit('empty$'), Ref('skip$'), Blk([Lit('duplicate$'), Lit('empty$'), Blk([Lit('pop$'), Lit('format.pages')]), Blk([Quo(':'), Lit('*'), Lit('pages'), Lit('n.dashify'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.chapter.pages')]), Blk([Lit('chapter'), Lit('empty$'), Ref('format.pages'), Blk([Lit('type'), Lit('empty$'), Blk([Quo('chapter')]), Blk([Lit('type'), Quo('l'), Lit('change.case$')]), Lit('if$'), Lit('chapter'), Lit('tie.or.space.connect'), Lit('pages'), Lit('empty$'), Ref('skip$'), Blk([Quo(', '), Lit('*'), Lit('format.pages'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.in.ed.booktitle')]), Blk([Lit('booktitle'), Lit('empty$'), Blk([Quo('')]), Blk([Lit('editor'), Lit('empty$'), Blk([Quo('In '), Lit('booktitle'), Lit('emphasize'), Lit('*')]), Blk([Quo('In '), Lit('format.editors'), Lit('*'), Quo(', '), Lit('*'), Lit('booktitle'), Lit('emphasize'), Lit('*')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('empty.misc.check')]), Blk([Lit('author'), Lit('empty$'), Lit('title'), Lit('empty$'), Lit('howpublished'), Lit('empty$'), Lit('month'), Lit('empty$'), Lit('year'), Lit('empty$'), Lit('note'), Lit('empty$'), Lit('and'), Lit('and'), Lit('and'), Lit('and'), Lit('and'), Lit('key'), Lit('empty$'), Lit('not'), Lit('and'), Blk([Quo('all relevant fields are empty in '), Lit('cite$'), Lit('*'), Lit('warning$')]), Ref('skip$'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.thesis.type')]), Blk([Lit('type'), Lit('empty$'), Ref('skip$'), Blk([Lit('pop$'), Lit('type'), Quo('t'), Lit('change.case$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.tr.number')]), Blk([Lit('type'), Lit('empty$'), Blk([Quo('Technical Report')]), Ref('type'), Lit('if$'), Lit('number'), Lit('empty$'), Blk([Quo('t'), Lit('change.case$')]), Blk([Lit('number'), Lit('tie.or.space.connect')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.article.crossref')]), Blk([Lit('key'), Lit('empty$'), Blk([Lit('journal'), Lit('empty$'), Blk([Quo('need key or journal for '), Lit('cite$'), Lit('*'), Quo(' to crossref '), Lit('*'), Lit('crossref'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Quo('In {\em '), Lit('journal'), Lit('*'), Quo('\/}'), Lit('*')]), Lit('if$')]), Blk([Quo('In '), Lit('key'), Lit('*')]), Lit('if$'), Quo(' \cite{'), Lit('*'), Lit('crossref'), Lit('*'), Quo('}'), Lit('*')])]), Cmd('FUNCTION', [Blk([Lit('format.crossref.editor')]), Blk([Lit('editor'), Nmb('1'), Quo('{vv~}{ll}'), Lit('format.name$'), Lit('editor'), Lit('num.names$'), Lit('duplicate$'), Nmb('2'), Lit('>'), Blk([Lit('pop$'), Quo(' et~al.'), Lit('*')]), Blk([Nmb('2'), Lit('<'), Ref('skip$'), Blk([Lit('editor'), Nmb('2'), Quo('{ff }{vv }{ll}{ jj}'), Lit('format.name$'), Quo('others'), Lit('='), Blk([Quo(' et~al.'), Lit('*')]), Blk([Quo(' and '), Lit('*'), Lit('editor'), Nmb('2'), Quo('{vv~}{ll}'), Lit('format.name$'), Lit('*')]), Lit('if$')]), Lit('if$')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('format.book.crossref')]), Blk([Lit('volume'), Lit('empty$'), Blk([Quo('empty volume in '), Lit('cite$'), Lit('*'), Quo('\'s crossref of '), Lit('*'), Lit('crossref'), Lit('*'), Lit('warning$'), Quo('In ')]), Blk([Quo('Volume'), Lit('volume'), Lit('tie.or.space.connect'), Quo(' of '), Lit('*')]), Lit('if$'), Lit('editor'), Lit('empty$'), Lit('editor'), Lit('field.or.null'), Lit('author'), Lit('field.or.null'), Lit('='), Lit('or'), Blk([Lit('key'), Lit('empty$'), Blk([Lit('series'), Lit('empty$'), Blk([Quo('need editor, key, or series for '), Lit('cite$'), Lit('*'), Quo(' to crossref '), Lit('*'), Lit('crossref'), Lit('*'), Lit('warning$'), Quo(''), Lit('*')]), Blk([Quo('{\em '), Lit('*'), Lit('series'), Lit('*'), Quo('\/}'), Lit('*')]), Lit('if$')]), Blk([Lit('key'), Lit('*')]), Lit('if$')]), Blk([Lit('format.crossref.editor'), Lit('*')]), Lit('if$'), Quo(' \cite{'), Lit('*'), Lit('crossref'), Lit('*'), Quo('}'), Lit('*')])]), Cmd('FUNCTION', [Blk([Lit('format.incoll.inproc.crossref')]), Blk([Lit('editor'), Lit('empty$'), Lit('editor'), Lit('field.or.null'), Lit('author'), Lit('field.or.null'), Lit('='), Lit('or'), Blk([Lit('key'), Lit('empty$'), Blk([Lit('booktitle'), Lit('empty$'), Blk([Quo('need editor, key, or booktitle for '), Lit('cite$'), Lit('*'), Quo(' to crossref '), Lit('*'), Lit('crossref'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Quo('In {\em '), Lit('booktitle'), Lit('*'), Quo('\/}'), Lit('*')]), Lit('if$')]), Blk([Quo('In '), Lit('key'), Lit('*')]), Lit('if$')]), Blk([Quo('In '), Lit('format.crossref.editor'), Lit('*')]), Lit('if$'), Quo(' \cite{'), Lit('*'), Lit('crossref'), Lit('*'), Quo('}'), Lit('*')])]), Cmd('FUNCTION', [Blk([Lit('article')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Lit('crossref'), Lit('missing$'), Blk([Lit('journal'), Lit('emphasize'), Quo('journal'), Lit('output.check'), Lit('format.vol.num.pages'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check')]), Blk([Lit('format.article.crossref'), Lit('output.nonnull'), Lit('format.pages'), Lit('output')]), Lit('if$'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('book')]), Blk([Lit('output.bibitem'), Lit('author'), Lit('empty$'), Blk([Lit('format.editors'), Quo('author and editor'), Lit('output.check')]), Blk([Lit('format.authors'), Lit('output.nonnull'), Lit('crossref'), Lit('missing$'), Blk([Quo('author and editor'), Lit('editor'), Lit('either.or.check')]), Ref('skip$'), Lit('if$')]), Lit('if$'), Lit('new.block'), Lit('format.btitle'), Quo('title'), Lit('output.check'), Lit('crossref'), Lit('missing$'), Blk([Lit('format.bvolume'), Lit('output'), Lit('new.block'), Lit('format.number.series'), Lit('output'), Lit('new.sentence'), Lit('publisher'), Quo('publisher'), Lit('output.check'), Lit('address'), Lit('output')]), Blk([Lit('new.block'), Lit('format.book.crossref'), Lit('output.nonnull')]), Lit('if$'), Lit('format.edition'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('booklet')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Lit('output'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('howpublished'), Lit('address'), Lit('new.block.checkb'), Lit('howpublished'), Lit('output'), Lit('address'), Lit('output'), Lit('format.date'), Lit('output'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('inbook')]), Blk([Lit('output.bibitem'), Lit('author'), Lit('empty$'), Blk([Lit('format.editors'), Quo('author and editor'), Lit('output.check')]), Blk([Lit('format.authors'), Lit('output.nonnull'), Lit('crossref'), Lit('missing$'), Blk([Quo('author and editor'), Lit('editor'), Lit('either.or.check')]), Ref('skip$'), Lit('if$')]), Lit('if$'), Lit('new.block'), Lit('format.btitle'), Quo('title'), Lit('output.check'), Lit('crossref'), Lit('missing$'), Blk([Lit('format.bvolume'), Lit('output'), Lit('format.chapter.pages'), Quo('chapter and pages'), Lit('output.check'), Lit('new.block'), Lit('format.number.series'), Lit('output'), Lit('new.sentence'), Lit('publisher'), Quo('publisher'), Lit('output.check'), Lit('address'), Lit('output')]), Blk([Lit('format.chapter.pages'), Quo('chapter and pages'), Lit('output.check'), Lit('new.block'), Lit('format.book.crossref'), Lit('output.nonnull')]), Lit('if$'), Lit('format.edition'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('incollection')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Lit('crossref'), Lit('missing$'), Blk([Lit('format.in.ed.booktitle'), Quo('booktitle'), Lit('output.check'), Lit('format.bvolume'), Lit('output'), Lit('format.number.series'), Lit('output'), Lit('format.chapter.pages'), Lit('output'), Lit('new.sentence'), Lit('publisher'), Quo('publisher'), Lit('output.check'), Lit('address'), Lit('output'), Lit('format.edition'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check')]), Blk([Lit('format.incoll.inproc.crossref'), Lit('output.nonnull'), Lit('format.chapter.pages'), Lit('output')]), Lit('if$'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('inproceedings')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Lit('crossref'), Lit('missing$'), Blk([Lit('format.in.ed.booktitle'), Quo('booktitle'), Lit('output.check'), Lit('format.bvolume'), Lit('output'), Lit('format.number.series'), Lit('output'), Lit('format.pages'), Lit('output'), Lit('address'), Lit('empty$'), Blk([Lit('organization'), Lit('publisher'), Lit('new.sentence.checkb'), Lit('organization'), Lit('output'), Lit('publisher'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check')]), Blk([Lit('address'), Lit('output.nonnull'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.sentence'), Lit('organization'), Lit('output'), Lit('publisher'), Lit('output')]), Lit('if$')]), Blk([Lit('format.incoll.inproc.crossref'), Lit('output.nonnull'), Lit('format.pages'), Lit('output')]), Lit('if$'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('conference')]), Blk([Lit('inproceedings')])]), Cmd('FUNCTION', [Blk([Lit('manual')]), Blk([Lit('output.bibitem'), Lit('author'), Lit('empty$'), Blk([Lit('organization'), Lit('empty$'), Ref('skip$'), Blk([Lit('organization'), Lit('output.nonnull'), Lit('address'), Lit('output')]), Lit('if$')]), Blk([Lit('format.authors'), Lit('output.nonnull')]), Lit('if$'), Lit('new.block'), Lit('format.btitle'), Quo('title'), Lit('output.check'), Lit('author'), Lit('empty$'), Blk([Lit('organization'), Lit('empty$'), Blk([Lit('address'), Lit('new.block.checka'), Lit('address'), Lit('output')]), Ref('skip$'), Lit('if$')]), Blk([Lit('organization'), Lit('address'), Lit('new.block.checkb'), Lit('organization'), Lit('output'), Lit('address'), Lit('output')]), Lit('if$'), Lit('format.edition'), Lit('output'), Lit('format.date'), Lit('output'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('mastersthesis')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Quo('Master\'s thesis'), Lit('format.thesis.type'), Lit('output.nonnull'), Lit('school'), Quo('school'), Lit('output.check'), Lit('address'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('misc')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Lit('output'), Lit('title'), Lit('howpublished'), Lit('new.block.checkb'), Lit('format.title'), Lit('output'), Lit('howpublished'), Lit('new.block.checka'), Lit('howpublished'), Lit('output'), Lit('format.date'), Lit('output'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry'), Lit('empty.misc.check')])]), Cmd('FUNCTION', [Blk([Lit('phdthesis')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.btitle'), Quo('title'), Lit('output.check'), Lit('new.block'), Quo('PhD thesis'), Lit('format.thesis.type'), Lit('output.nonnull'), Lit('school'), Quo('school'), Lit('output.check'), Lit('address'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('proceedings')]), Blk([Lit('output.bibitem'), Lit('editor'), Lit('empty$'), Blk([Lit('organization'), Lit('output')]), Blk([Lit('format.editors'), Lit('output.nonnull')]), Lit('if$'), Lit('new.block'), Lit('format.btitle'), Quo('title'), Lit('output.check'), Lit('format.bvolume'), Lit('output'), Lit('format.number.series'), Lit('output'), Lit('address'), Lit('empty$'), Blk([Lit('editor'), Lit('empty$'), Blk([Lit('publisher'), Lit('new.sentence.checka')]), Blk([Lit('organization'), Lit('publisher'), Lit('new.sentence.checkb'), Lit('organization'), Lit('output')]), Lit('if$'), Lit('publisher'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check')]), Blk([Lit('address'), Lit('output.nonnull'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.sentence'), Lit('editor'), Lit('empty$'), Ref('skip$'), Blk([Lit('organization'), Lit('output')]), Lit('if$'), Lit('publisher'), Lit('output')]), Lit('if$'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('techreport')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Lit('format.tr.number'), Lit('output.nonnull'), Lit('institution'), Quo('institution'), Lit('output.check'), Lit('address'), Lit('output'), Lit('format.date'), Quo('year'), Lit('output.check'), Lit('new.block'), Lit('note'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('unpublished')]), Blk([Lit('output.bibitem'), Lit('format.authors'), Quo('author'), Lit('output.check'), Lit('new.block'), Lit('format.title'), Quo('title'), Lit('output.check'), Lit('new.block'), Lit('note'), Quo('note'), Lit('output.check'), Lit('format.date'), Lit('output'), Lit('fin.entry')])]), Cmd('FUNCTION', [Blk([Lit('default.type')]), Blk([Lit('misc')])]), Cmd('MACRO', [Blk([Lit('jan')]), Blk([Quo('January')])]), Cmd('MACRO', [Blk([Lit('feb')]), Blk([Quo('February')])]), Cmd('MACRO', [Blk([Lit('mar')]), Blk([Quo('March')])]), Cmd('MACRO', [Blk([Lit('apr')]), Blk([Quo('April')])]), Cmd('MACRO', [Blk([Lit('may')]), Blk([Quo('May')])]), Cmd('MACRO', [Blk([Lit('jun')]), Blk([Quo('June')])]), Cmd('MACRO', [Blk([Lit('jul')]), Blk([Quo('July')])]), Cmd('MACRO', [Blk([Lit('aug')]), Blk([Quo('August')])]), Cmd('MACRO', [Blk([Lit('sep')]), Blk([Quo('September')])]), Cmd('MACRO', [Blk([Lit('oct')]), Blk([Quo('October')])]), Cmd('MACRO', [Blk([Lit('nov')]), Blk([Quo('November')])]), Cmd('MACRO', [Blk([Lit('dec')]), Blk([Quo('December')])]), Cmd('MACRO', [Blk([Lit('acmcs')]), Blk([Quo('ACM Computing Surveys')])]), Cmd('MACRO', [Blk([Lit('acta')]), Blk([Quo('Acta Informatica')])]), Cmd('MACRO', [Blk([Lit('cacm')]), Blk([Quo('Communications of the ACM')])]), Cmd('MACRO', [Blk([Lit('ibmjrd')]), Blk([Quo('IBM Journal of Research and Development')])]), Cmd('MACRO', [Blk([Lit('ibmsj')]), Blk([Quo('IBM Systems Journal')])]), Cmd('MACRO', [Blk([Lit('ieeese')]), Blk([Quo('IEEE Transactions on Software Engineering')])]), Cmd('MACRO', [Blk([Lit('ieeetc')]), Blk([Quo('IEEE Transactions on Computers')])]), Cmd('MACRO', [Blk([Lit('ieeetcad')]), Blk([Quo('IEEE Transactions on Computer-Aided Design of Integrated Circuits')])]), Cmd('MACRO', [Blk([Lit('ipl')]), Blk([Quo('Information Processing Letters')])]), Cmd('MACRO', [Blk([Lit('jacm')]), Blk([Quo('Journal of the ACM')])]), Cmd('MACRO', [Blk([Lit('jcss')]), Blk([Quo('Journal of Computer and System Sciences')])]), Cmd('MACRO', [Blk([Lit('scp')]), Blk([Quo('Science of Computer Programming')])]), Cmd('MACRO', [Blk([Lit('sicomp')]), Blk([Quo('SIAM Journal on Computing')])]), Cmd('MACRO', [Blk([Lit('tocs')]), Blk([Quo('ACM Transactions on Computer Systems')])]), Cmd('MACRO', [Blk([Lit('tods')]), Blk([Quo('ACM Transactions on Database Systems')])]), Cmd('MACRO', [Blk([Lit('tog')]), Blk([Quo('ACM Transactions on Graphics')])]), Cmd('MACRO', [Blk([Lit('toms')]), Blk([Quo('ACM Transactions on Mathematical Software')])]), Cmd('MACRO', [Blk([Lit('toois')]), Blk([Quo('ACM Transactions on Office Information Systems')])]), Cmd('MACRO', [Blk([Lit('toplas')]), Blk([Quo('ACM Transactions on Programming Languages and Systems')])]), Cmd('MACRO', [Blk([Lit('tcs')]), Blk([Quo('Theoretical Computer Science')])]), Cmd('READ', []), Cmd('FUNCTION', [Blk([Lit('sortify')]), Blk([Lit('purify$'), Quo('l'), Lit('change.case$')])]), Cmd('INTEGERS', [Blk([Lit('len')])]), Cmd('FUNCTION', [Blk([Lit('chop.word')]), Blk([Ref('s'), Lit(':='), Ref('len'), Lit(':='), Lit('s'), Nmb('1'), Lit('len'), Lit('substring$'), Lit('='), Blk([Lit('s'), Lit('len'), Nmb('1'), Lit('+'), Lit('global.max$'), Lit('substring$')]), Ref('s'), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('sort.format.names')]), Blk([Ref('s'), Lit(':='), Nmb('1'), Ref('nameptr'), Lit(':='), Quo(''), Lit('s'), Lit('num.names$'), Ref('numnames'), Lit(':='), Lit('numnames'), Ref('namesleft'), Lit(':='), Blk([Lit('namesleft'), Nmb('0'), Lit('>')]), Blk([Lit('nameptr'), Nmb('1'), Lit('>'), Blk([Quo(' '), Lit('*')]), Ref('skip$'), Lit('if$'), Lit('s'), Lit('nameptr'), Quo('{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}'), Lit('format.name$'), Ref('t'), Lit(':='), Lit('nameptr'), Lit('numnames'), Lit('='), Lit('t'), Quo('others'), Lit('='), Lit('and'), Blk([Quo('et al'), Lit('*')]), Blk([Lit('t'), Lit('sortify'), Lit('*')]), Lit('if$'), Lit('nameptr'), Nmb('1'), Lit('+'), Ref('nameptr'), Lit(':='), Lit('namesleft'), Nmb('1'), Lit('-'), Ref('namesleft'), Lit(':=')]), Lit('while$')])]), Cmd('FUNCTION', [Blk([Lit('sort.format.title')]), Blk([Ref('t'), Lit(':='), Quo('A '), Nmb('2'), Quo('An '), Nmb('3'), Quo('The '), Nmb('4'), Lit('t'), Lit('chop.word'), Lit('chop.word'), Lit('chop.word'), Lit('sortify'), Nmb('1'), Lit('global.max$'), Lit('substring$')])]), Cmd('FUNCTION', [Blk([Lit('author.sort')]), Blk([Lit('author'), Lit('empty$'), Blk([Lit('key'), Lit('empty$'), Blk([Quo('to sort, need author or key in '), Lit('cite$'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Lit('key'), Lit('sortify')]), Lit('if$')]), Blk([Lit('author'), Lit('sort.format.names')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('author.editor.sort')]), Blk([Lit('author'), Lit('empty$'), Blk([Lit('editor'), Lit('empty$'), Blk([Lit('key'), Lit('empty$'), Blk([Quo('to sort, need author, editor, or key in '), Lit('cite$'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Lit('key'), Lit('sortify')]), Lit('if$')]), Blk([Lit('editor'), Lit('sort.format.names')]), Lit('if$')]), Blk([Lit('author'), Lit('sort.format.names')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('author.organization.sort')]), Blk([Lit('author'), Lit('empty$'), Blk([Lit('organization'), Lit('empty$'), Blk([Lit('key'), Lit('empty$'), Blk([Quo('to sort, need author, organization, or key in '), Lit('cite$'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Lit('key'), Lit('sortify')]), Lit('if$')]), Blk([Quo('The '), Nmb('4'), Lit('organization'), Lit('chop.word'), Lit('sortify')]), Lit('if$')]), Blk([Lit('author'), Lit('sort.format.names')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('editor.organization.sort')]), Blk([Lit('editor'), Lit('empty$'), Blk([Lit('organization'), Lit('empty$'), Blk([Lit('key'), Lit('empty$'), Blk([Quo('to sort, need editor, organization, or key in '), Lit('cite$'), Lit('*'), Lit('warning$'), Quo('')]), Blk([Lit('key'), Lit('sortify')]), Lit('if$')]), Blk([Quo('The '), Nmb('4'), Lit('organization'), Lit('chop.word'), Lit('sortify')]), Lit('if$')]), Blk([Lit('editor'), Lit('sort.format.names')]), Lit('if$')])]), Cmd('FUNCTION', [Blk([Lit('presort')]), Blk([Lit('type$'), Quo('book'), Lit('='), Lit('type$'), Quo('inbook'), Lit('='), Lit('or'), Ref('author.editor.sort'), Blk([Lit('type$'), Quo('proceedings'), Lit('='), Ref('editor.organization.sort'), Blk([Lit('type$'), Quo('manual'), Lit('='), Ref('author.organization.sort'), Ref('author.sort'), Lit('if$')]), Lit('if$')]), Lit('if$'), Quo(' '), Lit('*'), Lit('year'), Lit('field.or.null'), Lit('sortify'), Lit('*'), Quo(' '), Lit('*'), Lit('title'), Lit('field.or.null'), Lit('sort.format.title'), Lit('*'), Nmb('1'), Lit('entry.max$'), Lit('substring$'), Ref('sort.key$'), Lit(':=')])]), Cmd('ITERATE', [Blk([Lit('presort')])]), Cmd('SORT', []), Cmd('STRINGS', [Blk([Lit('longest.label')])]), Cmd('INTEGERS', [Blk([Lit('number.label'), Lit('longest.label.width')])]), Cmd('FUNCTION', [Blk([Lit('initialize.longest.label')]), Blk([Quo(''), Ref('longest.label'), Lit(':='), Nmb('1'), Ref('number.label'), Lit(':='), Nmb('0'), Ref('longest.label.width'), Lit(':=')])]), Cmd('FUNCTION', [Blk([Lit('longest.label.pass')]), Blk([Lit('number.label'), Lit('int.to.str$'), Ref('label'), Lit(':='), Lit('number.label'), Nmb('1'), Lit('+'), Ref('number.label'), Lit(':='), Lit('label'), Lit('width$'), Lit('longest.label.width'), Lit('>'), Blk([Lit('label'), Ref('longest.label'), Lit(':='), Lit('label'), Lit('width$'), Ref('longest.label.width'), Lit(':=')]), Ref('skip$'), Lit('if$')])]), Cmd('EXECUTE', [Blk([Lit('initialize.longest.label')])]), Cmd('ITERATE', [Blk([Lit('longest.label.pass')])]), Cmd('FUNCTION', [Blk([Lit('begin.bib')]), Blk([Lit('preamble$'), Lit('empty$'), Ref('skip$'), Blk([Lit('preamble$'), Lit('write$'), Lit('newline$')]), Lit('if$'), Quo('\begin{thebibliography}{'), Lit('longest.label'), Lit('*'), Quo('}'), Lit('*'), Lit('write$'), Lit('newline$')])]), Cmd('EXECUTE', [Blk([Lit('begin.bib')])]), Cmd('EXECUTE', [Blk([Lit('init.state.consts')])]), Cmd('ITERATE', [Blk([Lit('call.type$')])]), Cmd('FUNCTION', [Blk([Lit('end.bib')]), Blk([Lit('newline$'), Quo('\end{thebibliography}'), Lit('write$'), Lit('newline$')])]), Cmd('EXECUTE', [Blk([Lit('end.bib')])])]; +1; diff --git a/lib/LaTeXML/BibTeX/BibStyle/StyCommand.pm b/lib/LaTeXML/BibTeX/BibStyle/StyCommand.pm new file mode 100644 index 0000000000..e64951aeba --- /dev/null +++ b/lib/LaTeXML/BibTeX/BibStyle/StyCommand.pm @@ -0,0 +1,42 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::BibStyle::StyCommand | # +# | Representations for commands with source refs to a .bst file | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::BibStyle::StyCommand; +use strict; +use warnings; + +use base qw(LaTeXML::BibTeX::Common::Object); + +sub new { + my ($class, $name, $arguments, $locator) = @_; + return bless { + name => $name || '', # the name of the command (see getName) + arguments => $arguments, # the arguments to the command (see getArguments) + locator => $locator, # the locator position of the command (see getLocator) + }, $class; } + +sub getKind { + my ($self) = @_; + return 'COMMAND ' . $$self{name}; } + +# the name of the command. Should be a STYString of type Literal. +sub getName { + my ($self) = @_; + return $$self{name}; } + +# the arguments of this command. Should be StyStrings of type LITERAL. +sub getArguments { + my ($self) = @_; + return @{ $$self{arguments} }; } + +sub stringify { + my ($self) = @_; + return 'StyCommand(' . join(', ', $$self{name}, map { $_->stringify; } @{ $$self{arguments} }) . ")"; } + +1; diff --git a/lib/LaTeXML/BibTeX/BibStyle/StyString.pm b/lib/LaTeXML/BibTeX/BibStyle/StyString.pm new file mode 100644 index 0000000000..8fd7927de5 --- /dev/null +++ b/lib/LaTeXML/BibTeX/BibStyle/StyString.pm @@ -0,0 +1,53 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::BibStyle::StyString | # +# | Representations for strings with source refs to a .bst file | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::BibStyle::StyString; +use strict; +use warnings; + +use base qw(LaTeXML::BibTeX::Common::Object); + +sub new { + my ($class, $kind, $value, $locator) = @_; + return bless { + kind => $kind || '', # the kind of string we have (see getKind) + value => $value, # the value in this string (see getValue) + locator => $locator, # the locator position (see getLocator) + }, $class; } + +# get the kind this StyString represents. One of: +# '' (other) +# 'NUMBER' (a literal number) +# 'QUOTE' (a literal string) +# 'LITERAL' (any unquoted value) +# 'REFERENCE' (a reference to a function or variable) +# 'BLOCK' (a {} enclosed list of other StyStrings) +sub getKind { + my ($self) = @_; + return $$self{kind}; } + +# get the value of this StyString +sub getValue { + my ($self) = @_; + return $$self{value}; } + +sub stringify { + my ($self) = @_; + my ($kind) = $$self{kind}; + my $value; + if ($kind eq 'BLOCK') { + my @content = map { $_->stringify; } @{ $$self{value} }; + $value = '[' . join(', ', @content) . ']'; } + elsif ($kind eq 'NUMBER') { + $value = $$self{value}; } + else { + $value = $$self{value}; } + return 'StyString(' . $kind . ', ' . $value . ")"; } + +1; diff --git a/lib/LaTeXML/BibTeX/Bibliography.pm b/lib/LaTeXML/BibTeX/Bibliography.pm new file mode 100644 index 0000000000..eac30b0e49 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Bibliography.pm @@ -0,0 +1,251 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Bibliography | # +# | A Parser for .bib files | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef); + +package LaTeXML::BibTeX::Bibliography; +use strict; +use warnings; +use LaTeXML::Common::Error; +use LaTeXML::BibTeX::Common::StreamReader; +use LaTeXML::BibTeX::Bibliography::BibField; +use LaTeXML::BibTeX::Bibliography::BibEntry; + +# ======================================================================= # +sub new { + my ($class, $name, $pathname) = @_; + my $reader = LaTeXML::BibTeX::Common::StreamReader->newFromLTXML($name, $pathname); + if (!defined($reader)) { + Fatal('missing_file', $name, undef, "Unable to open Bibliography file $pathname"); + return undef; } + my $bibliography = bless { + name => $name, pathname => $pathname, + reader => $reader, + preamble => [], strings => {} }, $class; + return $bibliography; } + +sub getName { + my ($self) = @_; + return $$self{name}; } + +sub getPathname { + my ($self) = @_; + return $$self{pathname}; } + +sub getPreamble { + my ($self) = @_; + return @{ $$self{preamble} }; } + +# Defer reading the bibliography until requested by the BST. +# Thus we can make all macro substitutions (from bib's @STRING and bst's MACRO) ONCE! +# @STRINGs can change during reading, but should use the last/current value +# whereas bst's MACRO's can't change. So, we should be accurate. +sub getEntries { + my ($self, $macros) = @_; + if (!$$self{entries}) { + $self->readFile($macros); } + return @{ $$self{entries} }; } + +# ======================================================================= # +# Parsing a file +# ======================================================================= # +# Read the bib file +# makes substitutions of @STRING's within the bib file, but defers .bst macros +sub readFile { + my ($self, $macros) = @_; + my $reader = $$self{reader}; + # values to be returned + my @entries = (); + while (my $type = $self->readEntryType()) { + my $locator = $reader->getLocator; # WRONG! should be before type! + my $begin = $self->readDelimiter('{', '('); + if ($type eq 'comment') { + my $ignore = $self->readValue($macros); } + elsif ($type eq 'preamble') { # just a string (possibly interpolated) + my $pre = $self->readValue($macros); + push(@{ $$self{preamble} }, $pre); } + elsif ($type eq 'string') { + my $name = $self->readKeyword; + $self->readDelimiter('='); + my $value = $self->readValue($macros); + $$self{strings}{ lc $name } = $value; } + else { # Random bibliographic entry + my $key = $self->readKeyword; + my @fields = (); # !!!! key is 1st field!!!!! Bad! + while (defined(my $char = $reader->peekChar)) { + last if ($char ne ','); + $reader->readChar; + $reader->skipSpaces; + my $flocator = $reader->getLocator; + my $name = $self->readKeyword; + next unless $name; + last unless $self->readDelimiter('='); + my $value = $self->readValue($macros); + push(@fields, LaTeXML::BibTeX::Bibliography::BibField->new(lc($name), $value, + $flocator->merge($reader->getLocator))); } + push(@entries, LaTeXML::BibTeX::Bibliography::BibEntry->new(lc($type), $key, [@fields], + $locator->merge($reader->getLocator))); } + $self->readDelimiter(($begin eq '{' ? '}' : ')')); } # closing } or ) of entry + $reader->finalize; + $$self{entries} = [@entries]; + return $self; } + +sub readDelimiter { + my ($self, @expected) = @_; + my $reader = $$self{reader}; + $reader->skipSpaces; + my $char = $reader->peekChar; + if ((defined $char) && grep { $char eq $_; } @expected) { + $reader->readChar; + return $char; } + Warn('bibtex', 'bibparse', $reader->getLocator, + 'unexpected ' . (scalar(@expected) > 1 ? 'one of ' : '') . join(' ', map { "'$_'"; } @expected)); + return; } + +# ======================================================================= # +# Parsing an entry +# ======================================================================= # + +# reads the next @command name from the bib file +sub readEntryType { + my ($self) = @_; + my $reader = $$self{reader}; + my $char; + do { # Skip till '@' + $char = $reader->readChar; + } while (defined $char) && ($char ne '@'); + return unless defined $char; + my $type = $self->readKeyword(); + return Warn('bibtex', 'bibparse', $reader->getLocator, 'expected a non-empty name ') + unless $type; + return lc($type); } + +# ======================================================================= # +# Parsing a Value +# ======================================================================= # + +# reads a string value, incorporating substitutions from bib @STRINGS and bst MACROS +sub readValue { + my ($self, $macros) = @_; + my $reader = $$self{reader}; + # skip spaces and start reading a field + $reader->skipSpaces; + my $locator = $reader->getLocator; + # if we only have a closing brace + # we may have tried to read a closing brace + # so return undef and also no error. + my $char = $reader->peekChar; + return Warn('bibtex', 'bibparse', $reader->getLocator, + 'unexpected end of input while reading field',) + unless defined($char); + return if ($char eq '}' or $char eq ','); + # results and if we had an error + my @content = (); + # read until we encounter a , or a closing brace + while ($char ne ',' && $char ne '}') { + # Read some kind of value (quoted, braced, keyword) + if ($char eq '"') { + my $value = $self->readQuoted(); + return unless defined $value; + push(@content, $value); } + # if we had a brace, allow only a concat next + elsif ($char eq '{') { + my $value = $self->readBraced(); + return unless defined $value; + push(@content, $value); } + else { + my $value = $self->readKeyword(); + return unless defined $value; + if (my $name = lc($value)) { + if (my $repl = $$self{strings}{$name} // $$macros{$name}) { + $value = $repl; } } + push(@content, $value); } + # Now look next for possible concatenation + $reader->skipSpaces; + $char = $reader->peekChar; + last unless $char eq '#'; + $reader->readChar; + $reader->skipSpaces; + $char = $reader->peekChar; } + return join('', @content); } + +# ======================================================================= # +# Parsing Keywords, Quotes & Braces +# ======================================================================= # +our %keyword_specials = ('{' => 1, '}' => 1, '=' => 1, '#' => 1, ',' => 1); + +sub readKeyword { + my ($self) = @_; + my $reader = $$self{reader}; + # get the starting position + my @chars = (); + while (defined(my $char = $reader->peekChar)) { + last if $keyword_specials{$char}; + push(@chars, $char); + $reader->readChar; } + return unless @chars; + my $keyword = join('', @chars); + $keyword =~ s/^\s+//; # Trim + $keyword =~ s/\s+$//; + return $keyword; } + +# read a string of balanced braces from the input +# does not skip any spaces before or after +sub readBraced { + my ($self) = @_; + my $reader = $$self{reader}; + # read the first bracket, or die if we are at the end + my $char = $reader->readChar; + Warn('bibtex', 'bibparse', $reader->getLocator, 'expected to find an "{"') + unless defined($char) && $char eq '{'; + # record the starting position of the bracket + my $locator = $reader->getLocator; + # setup where we are + my $result = ''; + my $level = 1; + $char = ''; + while ($level) { + # add the previous character, and read the next one. + $result .= $char; + $char = $reader->readChar; + return Warn('bibtex', 'bibparse', $reader->getLocator, 'unexpected end of input in quote ') + if !defined $char; + # keep count of what level we are in + if ($char eq '{') { + $level++; } + elsif ($char eq '}') { + $level--; } } + return $result; } + +# read a quoted quote from reader +# does not skip any spaces +sub readQuoted { + my ($self) = @_; + my $reader = $$self{reader}; + # read the first quote, or die if we are at the end + my $char = $reader->readChar; + return Warn('bibtex', 'bibparse', $reader->getLocator, 'expected to find an \'"\'') + unless defined($char) && $char eq '"'; + my $locator = $reader->getLocator; + my $result = ''; + my $level = 0; + while (1) { + $char = $reader->readChar; + return Warn('bibtex', 'bibparse', $reader->getLocator, 'unexpected end of input in quote') + if !defined $char; + # if we find a {, or a }, keep track of levels, and don't do anything inside + if ($char eq '"') { + last unless $level; } + elsif ($char eq '{') { + $level++; } + elsif ($char eq '}') { + $level--; } + $result .= $char; } + return $result; } + +1; diff --git a/lib/LaTeXML/BibTeX/Bibliography/BibEntry.pm b/lib/LaTeXML/BibTeX/Bibliography/BibEntry.pm new file mode 100644 index 0000000000..2e763d5724 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Bibliography/BibEntry.pm @@ -0,0 +1,45 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Bibliography::BibEntry | # +# | Representation for .bib file entries | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::Bibliography::BibEntry; +use strict; +use warnings; + +use base qw(LaTeXML::BibTeX::Common::Object); + +sub new { + my ($class, $type, $key, $fields, $locator) = @_; + return bless { + type => $type, # the type of entry we have (see getType) + key => $key, # THe identifing key + fields => $fields, # a list of fields in this BiBFile + locator => $locator # a locator reference + }, $class; } + +# the type of this entry (lowercase string) +sub getType { + my ($self) = @_; + return $$self{type}; } + +# Get the identifying key (string, case-sensitive) +sub getKey { + my ($self) = @_; + return $$self{key}; } + +# a list of BiBFields s contained in this entry +sub getFields { + my ($self) = @_; + return @{ $$self{fields} }; } + +sub stringify { + my ($self) = @_; + return 'BibEntry(' . $$self{type} . ', ' . $$self{key} . ', [' . + join(',', map { $_->stringify; } $self->getFields) . "])"; } + +1; diff --git a/lib/LaTeXML/BibTeX/Bibliography/BibField.pm b/lib/LaTeXML/BibTeX/Bibliography/BibField.pm new file mode 100644 index 0000000000..7b74c76379 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Bibliography/BibField.pm @@ -0,0 +1,40 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Bibliography::BibField | # +# | Representation for tags inside .bib entries | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::Bibliography::BibField; +use strict; +use warnings; + +###use List::Util qw(reduce); + +use base qw(LaTeXML::BibTeX::Common::Object); + +sub new { + my ($class, $name, $content, $locator) = @_; + return bless { + name => $name, # name of this tag + content => $content, # content of this tag (see getContent) + locator => $locator, # the locator position (see getLocator) + }, $class; } + +# the name of this field (lowercase string) +sub getName { + my ($self) = @_; + return $$self{name}; } + +# gets the content of this BibField, a string +sub getContent { + my ($self) = @_; + return $$self{content}; } + +sub stringify { + my ($self) = @_; + return 'BibField(' . $$self{name} . ', ' . $$self{content} . ")"; } + +1; diff --git a/lib/LaTeXML/BibTeX/Common/Object.pm b/lib/LaTeXML/BibTeX/Common/Object.pm new file mode 100644 index 0000000000..dfe07a9917 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Common/Object.pm @@ -0,0 +1,23 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Common::Object | # +# | Common function for LaTeXML::BibTeX objects | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::Common::Object; +use strict; +use warnings; + +# gets the starting position of this object +# a five-tuple ($path, $startRow, $startColumn, $endRow, $endColumn) +# row-indexes are one-based, column-indexes zero-based +# the start position is inclusive, the end position is not +# never includes any whitespace in positioning +sub getLocator { + my ($self) = @_; + return $$self{locator}; } + +1; diff --git a/lib/LaTeXML/BibTeX/Common/StreamReader.pm b/lib/LaTeXML/BibTeX/Common/StreamReader.pm new file mode 100644 index 0000000000..e9946362be --- /dev/null +++ b/lib/LaTeXML/BibTeX/Common/StreamReader.pm @@ -0,0 +1,280 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Common::StreamReader | # +# | A primitive reader for input streams | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef); + +package LaTeXML::BibTeX::Common::StreamReader; +use strict; +use warnings; +use LaTeXML::Common::Error; +use LaTeXML::Common::Locator; + +use Encode; + +# 'new' creates a new StreamReader +sub new { + my ($class) = @_; + return bless { + # input and stuff + IN => undef, encoding => undef, buffer => undef, + # filename of this reader + filename => undef, + # current line information + line => '', + nchars => 0, + colno => 0, + lineno => 0, + eof => 0, + # pushback, contains ($char, $line, $col, $eof) + pushback => undef + }, $class; } + +# 'newFromFile' creates a new StreamReader from a file. +# Roughly equivalent to: +# my $reader = LaTeXML::BibTeX::Common::StreamReader->new(); +# $reader->openFile(@_); +sub newFromFile { + my ($class, $filename, $encoding) = @_; + my $reader = $class->new(); + return undef unless $reader->openFile($filename, $encoding); + return $reader; } + +# 'newFromLTXML' creates a new StreamReader from a latexml path. +# Roughly equivalent to: +# my $reader = LaTeXML::BibTeX::Common::StreamReader->new(); +# $reader->openLTXML(@_); +sub newFromLTXML { + my ($class, $name, $path) = @_; + my $reader = $class->new(); + return undef unless $reader->openLTXML($name, $path); + return $reader; } + +# ===================================================================== # +# Open / Close +# ===================================================================== # + +# 'openFile' opens a filename using either either a provided encoding, or an auto-detected one. +sub openFile { + my ($self, $pathname, $encoding) = @_; + # make sure that the filename exists + return 0 if (!-r $pathname); + return 0 if (!-z $pathname) && (-B $pathname); + # open filehandle and set encoding + open($$self{IN}, '<', $pathname) || return 0; + $$self{buffer} = []; + $$self{encoding} = find_encoding($encoding || 'utf-8'); + # reset the state + $$self{filename} = $pathname; + $$self{lineno} = 0; + $$self{colno} = 0; + $$self{line} = ''; + $$self{nchars} = 0; + return 1; } + +# 'openString' opens a raw string to be opened +sub openString { + my ($self, $string) = @_; + # in case of a string, we can buffer everythint at once + $$self{buffer} = [splitLines($string)]; + $$self{IN} = undef; + # reset all the counters + $$self{filename} = undef; + $$self{lineno} = 0; + $$self{colno} = 0; + $$self{line} = ''; + $$self{nchars} = 0; + return 1; } + +# openLTXML opens a file using a latexml path that could either be a file or a 'literal:' path +sub openLTXML { + my ($self, $name, $path) = @_; + # if it is a file, open it + if (-e $path) { + $self->openFile($path); } + elsif ($path =~ /^literal:(.*)$/) { + my ($literal) = ($path =~ m/^literal:(.*)$/); + $self->openString($literal); } + else { + return 0; } + $$self{filename} = $name if defined($name); + return 1; } + +# 'finalize' closes whatever is still left open by this reader and resets the state. +sub finalize { + my ($self) = @_; + # close the input if it exists + close(\*{ $$self{IN} }) if defined($$self{IN}); + $$self{IN} = undef; + # reset the state so we can reuse this instance + $$self{filename} = undef; + $$self{buffer} = []; + $$self{lineno} = 0; + $$self{colno} = 0; + $$self{line} = ''; + $$self{nchars} = 0; + return; } + +# 'getFilename' returns the filename used by this reader or undef. +sub getFilename { + my ($self) = @_; + return $$self{filename}; } + +# ===================================================================== # +# Reading Primitives +# ===================================================================== # + +# 'readChar' reads the next character from this reader and returns a 4-tuple ($char, $lineNo, $colNo, $eof). +# - $char contains the current character or undef +# - $lineNo contains the line number the character came from +# - $colNo contains the column number the character came from +# - $eof contains a boolean indicating if the end of file was reached +sub readChar { + my ($self) = @_; + # read our current state + my $lineNo = $$self{lineno}; + my $colNo = $$self{colno}; + my $eof = $$self{eof}; + # if we have some pushback, restore the state of it and return + my $pushback = $$self{pushback}; + if (defined($pushback)) { + my ($char, $lineno, $colno, $eofp) = @$pushback; + $$self{pushback} = undef; + $$self{lineno} = $lineno; + $$self{colno} = $colno; + $$self{eof} = $eofp; + return (wantarray ? ($char, $lineNo, $colNo, $eofp) : $char); } + # if we reached the end of the file in a previous run + # don't bother trying + return (wantarray ? (undef, $lineNo, $colNo, $eof) : undef) if $$self{eof}; + # if we still have characters left in the line, return those. + return (wantarray ? (substr($$self{line}, $$self{colno}++, 1), $lineNo, $colNo, $eof) : substr($$self{line}, $$self{colno}++, 1)) + if $colNo < $$self{nchars}; + my $line = $self->readNextLine; + # no more lines ... + unless (defined($line)) { + $$self{eof} = 1; + $$self{colno} = 0; + $$self{lineno}++; + return (wantarray ? (undef, $lineNo, $colNo, $eof) : undef); } + $$self{line} = $line; + $$self{nchars} = length $line; + $$self{lineno}++; + $$self{colno} = 1; + return (wantarray ? (substr($line, 0, 1), $lineNo, $colNo, $eof) : substr($line, 0, 1)); } + +# 'unreadChar' unreads a single read character from this reader so that the next call to readChar (and friends) returns it. +# At most a single unread character at the same time is supported. +sub unreadChar { + my ($self, $char, $lineNo, $colNo, $eof) = @_; + # if we did not change any lines, it is sufficient to revert the counter + # and we do not need to use (potentially expensive) pushback + my $nextLineNo = $$self{lineno}; + if ($nextLineNo eq $lineNo) { + $$self{colno} = $colNo; } + # else we need to revert the current state onto pushback + # because we can not undo the ->readLine + else { + $$self{pushback} = + [($char, $nextLineNo, $$self{colno}, $$self{eof})]; + $$self{lineno} = $lineNo; + $$self{colno} = $colNo; + $$self{eof} = $eof; } + return; } + +# 'peekChar' returns the next character that would be read using 'readChar', but does not actually read it. +# It returns a 4-tuple like 'readChar' would. +# This function is essentially equivalent to calling readChar, immediatly followed by an unreadChar. +sub peekChar { + my ($self) = @_; + # if we have some pushback, return that immediatly + # and do not call anything else + return (wantarray ? @{ $$self{pushback} } : $$self{pushback}[0]) if defined($$self{pushback}); + # read our current state + my $lineNo = $$self{lineno}; + my $colNo = $$self{colno}; + my $eof = $$self{eof}; + # if we have reached the end of the line, we can return now + # and don't even bother trying anything else + return (wantarray ? (undef, $lineNo, $colNo, 1) : undef) if $eof; + # if we still have enough characters on the current line + # then we can just return the current character + return (wantarray ? (substr($$self{line}, $colNo, 1), $lineNo, $colNo, $eof) : substr($$self{line}, $colNo, 1)) + if $colNo < $$self{nchars}; + # in all the other cases, we need to do a real readChar, unreadChar + my @read = $self->readChar; + $self->unreadChar(@read); + return (wantarray ? @read : $read[0]); } + +# 'readCharWhile' reads characters from the input as long as they match a given function and returns a 4-tuple ($chars, $lineNo, $colNo, $eof). +# - $chars contains the read characters +# - $lineNo contains the line number the last character came from +# - $colNo contains the column number the last character came from +# - $eof contains a boolean indicating if the end of file was reached +sub readCharWhile { + my ($self, $pred) = @_; + my ($char, $colno, $lineno, $eof) = $self->readChar; + my $chars = ''; + # read while we are not at the end of the input + # and are stil ok w.r.t the filter + while (defined($char) && &{$pred}($char)) { + $chars .= $char; + ($char, $colno, $lineno, $eof) = $self->readChar; } + # unread whatever is next and put it back on the stack + $self->unreadChar($char, $colno, $lineno, $eof); + # and return how many characters we skipped. + # return ($chars, $colno, $lineno, $eof); } + return $chars; } + +# 'skipSpaces' discards all spaces from the input. +sub skipSpaces { + my ($self) = @_; + # this code is an inline version of: + # $self->eatCharWhile( sub { $_[0] =~ /\s/; } ); + # read the first character + my ($char, $colno, $lineno, $eof) = $self->readChar; + # keep reading while the filter matches + ($char, $colno, $lineno, $eof) = $self->readChar + while (defined($char) && $char =~ /\s/); + # unread whatever is next and put it back on the stack + $self->unreadChar($char, $colno, $lineno, $eof); + return; } + +# ===================================================================== # +# Reading state +# ===================================================================== # + +sub getLocator { + my ($self) = @_; + return LaTeXML::Common::Locator->new($$self{filename}, $$self{lineno}, $$self{colno}); } + +# ===================================================================== # +# Reading lines +# ===================================================================== # + +# 'readNextLine' returns a line representing the next line read from the input. +# Returns either a string terminating with '\n' or undef (if no more lines exist). +sub readNextLine { + my ($self) = @_; + unless (@{ $$self{buffer} }) { + return + unless $$self{IN}; # if we did not have an open file, return undef + my $fh = \*{ $$self{IN} }; + my $line = <$fh>; + return unless defined $line; + $$self{buffer} = [splitLines($$self{encoding}->decode($line))]; } + # add the '\n' to the end of the line + return (shift(@{ $$self{buffer} }) || '') . "\n"; } + +# This is (hopefully) a platform independent way of splitting a string +# into "lines" ending with CRLF, CR or LF (DOS, Mac or Unix). +sub splitLines { + my ($string) = @_; + $string =~ s/(?:\015\012|\015|\012)/\n/sg; + return split("\n", $string); } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime.pm b/lib/LaTeXML/BibTeX/Runtime.pm new file mode 100644 index 0000000000..3b9122920b --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime.pm @@ -0,0 +1,711 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime | # +# | Runtime for LaTeXML::BibTeX-generated perl code | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # + +package LaTeXML::BibTeX::Runtime; +use strict; +use warnings; + +use LaTeXML::Common::Error; +#use LaTeXML::BibTeX::Bibliography; +use LaTeXML::BibTeX::Runtime::Entry; +use LaTeXML::BibTeX::Runtime::Builtins; +use LaTeXML::BibTeX::BibStyle::StyCommand; +use LaTeXML::BibTeX::BibStyle::StyString; +use LaTeXML::BibTeX::Runtime::Strings; +use Scalar::Util qw(blessed); + +sub new { + my ($class, $name, $buffer, $bibliographies, $cites) = @_; + return bless { + name => $name, + buffer => $buffer, + bibliographies => [@{$bibliographies}], + cites => [@${cites}], + stack => [], ### the stack + macros => {}, ### - a set of macros + ### - a set of global string variables (with three values each, as in the stack) + ### along with the types ('GLOBAL_STRING', 'ENTRY_STRING', 'GLOBAL_INTEGER', 'ENTRY_INTEGER', 'ENTRY_FIELD'); + variables => {}, + variableTypes => {}, + ### - a list of read entries, and the current entry (if any) + entries => undef, + entryHash => undef, + entry => undef, + preambleString => [], + preambleSource => [], + }, $class; } + +#====================================================================== +# gets the buffer of this config +sub getBuffer { + my ($self, @data) = @_; + return $$self{buffer}; } + +# gets the bibliographies associated with this configuration +sub getBibliographies { + my ($self) = @_; + return @{ $$self{bibliographies} }; } + +# gets the cites associated with this configuration +sub getCites { + my ($self) = @_; + return @{ $$self{cites} }; } + +# initialises this context and registers all built-in functions +sub initContext { + my ($self) = @_; + # define the crossref field and sort.key$ + $self->defineVariable('crossref', 'ENTRY_FIELD'); + $self->defineVariable('sort.key$', 'ENTRY_STRING'); + # set pre-defined variables to their defaults + # These can technically be re-defined, but one would have to re-compile BibTeX for that. + $self->assignVariable('global.max$', 'GLOBAL_INTEGER', ['INTEGER', 20000, undef]); + $self->assignVariable('entry.max$', 'GLOBAL_INTEGER', ['INTEGER', 250, undef]); + LaTeXML::BibTeX::Runtime::Builtins::install($self); + return; } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# High-level interpreter + +# Debugging aid +sub trim { + my ($string, $len) = @_; + my $l = length($string); + return ($l > $len ? substr($string, 0, $len) : $string . (' ' x ($len - $l))); } + +sub RTDebug { + my ($self, $op, $msg) = @_; + my $d = scalar(@{ $$self{stack} }); + my ($t, $v, $xx) = $self->peekStack(1); + my $s = trim(($t || '?') . ':' . ((defined $v) && (blessed $v) ? $v->stringify : ''), 20); + $op = trim((blessed $op ? $op->stringify : $op), 120); + $msg = trim($msg, 40); + Debug("[$d: $s] $msg : $op"); + return; } + +#====================================================================== +# Execute top-level Commands +sub run { + my ($self, $program) = @_; + RTDebug($self, $program, "PROGRAM") if $LaTeXML::DEBUG{bibtex_runtime}; + foreach my $command (@$program) { + my $name = $command->getName; + if ($name eq 'ENTRY') { do_ENTRY($self, $command); } + elsif ($name eq 'STRINGS') { do_STRINGS($self, $command); } + elsif ($name eq 'INTEGERS') { do_INTEGERS($self, $command); } + elsif ($name eq 'MACRO') { do_MACRO($self, $command); } + elsif ($name eq 'FUNCTION') { do_FUNCTION($self, $command); } + elsif ($name eq 'EXECUTE') { do_EXECUTE($self, $command); } + elsif ($name eq 'READ') { do_READ($self, $command); } + elsif ($name eq 'SORT') { do_SORT($self, $command); } + elsif ($name eq 'ITERATE') { do_ITERATE($self, $command); } + elsif ($name eq 'REVERSE') { do_REVERSE($self, $command); } + else { + Error('bibtex', 'runtime', $command->getLocator, "Unknown command $name"); } } + return; } + +sub do_ENTRY { + my ($self, $command) = @_; + my ($fields, $integers, $strings) = $command->getArguments; + RTDebug($self, $command, "Define ENTRY") if $LaTeXML::DEBUG{bibtex_runtime}; + # define entry fields + foreach my $field (@{ $fields->getValue }) { + my $name = lc($field->getValue); + if ($self->hasVariable($name)) { + Warn('bibtex', 'runtime', $field->getLocator, "Entry field $name already defined"); } + else { + $self->defineVariable($name, 'ENTRY_FIELD'); } } + # define entry fields + foreach my $int (@{ $integers->getValue }) { + my $name = $int->getValue; + if ($self->hasVariable($name)) { + Warn('bibtex', 'runtime', $int->getLocator, "Entry integer $name already defined"); } + else { + $self->defineVariable($name, 'ENTRY_INTEGER'); } } + # define entry strings + foreach my $string (@{ $strings->getValue }) { + my $name = $string->getValue; + if ($self->hasVariable($name)) { + Warn('bibtex', 'runtime', $string->getLocator, "Entry string $name already defined"); } + else { + $self->defineVariable($name, 'ENTRY_STRING'); } } + return; } + +sub do_INTEGERS { + my ($self, $command) = @_; + my ($args) = $command->getArguments; + RTDebug($self, $command, "Define INTEGERS") if $LaTeXML::DEBUG{bibtex_runtime}; + # define global integers + foreach my $int (@{ $args->getValue }) { + my $name = $int->getValue; + if ($self->hasVariable($name)) { + Warn('bibtex', 'runtime', $int->getLocator, "Global integer $name already defined"); } + else { + $self->defineVariable($name, 'GLOBAL_INTEGER'); } } + return; } + +sub do_STRINGS { + my ($self, $command) = @_; + my ($args) = $command->getArguments; + RTDebug($self, $command, "Define STRINGS") if $LaTeXML::DEBUG{bibtex_runtime}; + # define global strings + foreach my $string (@{ $args->getValue }) { + my $name = $string->getValue; + if ($self->hasVariable($name)) { + Warn('bibtex', 'runtime', $string->getLocator, "Global string $name already defined"); } + else { + $self->defineVariable($name, 'GLOBAL_STRING'); } } + return; } + +sub check_one { + my ($self, $thing, $desc) = @_; + my @things = @{ $thing->getValue }; + return Error('bibtex', 'runtime', $thing->getLocator, 'Expected exactly one ' . $desc) + if (scalar(@things) != 1); + return $things[0]; } + +sub do_MACRO { + my ($self, $command) = @_; + my ($name, $value) = $command->getArguments; + return unless $name = $self->check_one($name, 'macro name'); + return unless $value = $self->check_one($value, 'macro value'); + RTDebug($self, $command, "Define Macro " . $name) if $LaTeXML::DEBUG{bibtex_runtime}; + $$self{macros}{ lc $name->getValue } = $value->getValue; + return; } + +sub do_FUNCTION { + my ($self, $command) = @_; + my ($name, $body) = $command->getArguments; + return unless $name = $self->check_one($name, 'function name'); + $name = $name->getValue; + RTDebug($self, $command, "Define Function " . $name) if $LaTeXML::DEBUG{bibtex_runtime}; + $self->assignVariable($name, 'FUNCTION', ['FUNCTION', $body, undef]); + return; } + +sub do_EXECUTE { + my ($self, $command) = @_; + my ($name) = $command->getArguments; + return unless $name = $self->check_one($name, 'function name'); + RTDebug($self, $command, "EXECUTE " . $name->getValue) if $LaTeXML::DEBUG{bibtex_runtime}; + do_Instruction($self, $name); + return; } + +sub do_READ { + my ($self, $command) = @_; + RTDebug($self, $command, "READ bibliographies") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->readEntries([$self->getBibliographies], [$self->getCites]); + return; } + +sub do_SORT { + my ($self, $command) = @_; + my %keys = (); + # find all the entries + RTDebug($self, $command, "SORT bibliographies") if $LaTeXML::DEBUG{bibtex_runtime}; + my $entries = $self->getEntries; + return Error('bibtex', 'runtime', $command->getLocator, + 'Can not sort entries: No entries read yet. ') + unless defined($entries); + # determine their purified key + foreach my $entry (@$entries) { + # get the sort.key$ variable + my ($tp, $key) = $entry->getVariable('sort.key$'); + $key = [''] unless defined($key); # iff it is undefined + $key = join('', @$key); + # and purify it + $keys{ $entry->getKey } = textPurify($key); } + # sort entries using the purified sorting key + $self->sortEntries( + sub { + my ($entryA, $entryB) = @_; + return $keys{ $entryA->getKey } cmp $keys{ $entryB->getKey }; + }); + return; } + +sub do_ITERATE { + my ($self, $command) = @_; + my ($function) = $command->getArguments; + return unless $function = $self->check_one($function, 'function name'); + RTDebug($self, $command, "ITERATE $function") if $LaTeXML::DEBUG{bibtex_runtime}; + # Check that $name corresonds to some function??? + my $entries = $self->getEntries; + return Warn('bibtex', 'runtime', $command->getLocator, + 'Can not iterate entries: No entries have been read') + unless defined($entries); + my $n = 0; + foreach my $entry (@$entries) { + RTDebug($self, $command, "Entry $n") if $LaTeXML::DEBUG{bibtex_runtime}; $n++; + $self->setEntry($entry); + do_Instruction($self, $function); + Warn('bibtex', 'runtime', $command->getLocator, + "Stack is not empty for entry " . $entry->getKey) + unless $self->stackEmpty; } + $self->leaveEntry; + RTDebug($self, $command, "ITERATE done w/$n") if $LaTeXML::DEBUG{bibtex_runtime}; + return; } + +sub do_REVERSE { + my ($self, $command) = @_; + my ($function) = $command->getArguments; + return unless $function = $self->check_one($function, 'function name'); + RTDebug($self, $command, "REVERSE $function") if $LaTeXML::DEBUG{bibtex_runtime}; + # Check that $name corresonds to some function??? + my $entries = $self->getEntries; + return Warn('bibtex', 'runtime', $command->getLocator, + 'Can not iterate entries: No entries have been read') + unless defined($entries); + my $n = 0; + foreach my $entry (reverse(@$entries)) { + RTDebug($self, $command, "Entry $n") if $LaTeXML::DEBUG{bibtex_runtime}; $n++; + $self->setEntry($entry); + do_Instruction($self, $function); + Warn('bibtex', 'runtime', $command->getLocator, + "Stack is not empty for entry " . $entry->getKey) + unless $self->stackEmpty; } + $self->leaveEntry; + RTDebug($self, $command, "REVERSE done w/$n") if $LaTeXML::DEBUG{bibtex_runtime}; + return; } + +#====================================================================== +# Executing Instructions +sub do_Instruction { + my ($self, $instruction) = @_; + my $type = $instruction->getKind; + if ($type eq 'LITERAL') { do_Literal($self, $instruction); } + elsif ($type eq 'REFERENCE') { do_Reference($self, $instruction); } + elsif ($type eq 'BLOCK') { do_Block($self, $instruction); } + elsif ($type eq 'QUOTE') { do_Quote($self, $instruction); } + elsif ($type eq 'NUMBER') { do_Integer($self, $instruction); } + else { + Error('bibtex', 'runtime', $instruction->getLocator, + "Unknown instruction of type $type"); } + return; } + +# execute a literal +sub do_Literal { + my ($self, $variable) = @_; + my $name = $variable->getValue; + my ($type, $value, $source) = $self->getVariable(lc $name); + if (!$type) { + Error('bibtex', 'runtime', $variable->getLocator, "Unknown literal $name in literal"); } + elsif ($type eq 'FUNCTION') { + RTDebug($self, $variable, "INSTRUCTION Literal Function $name") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->executeFunction($variable, $value); } + elsif (defined($type)) { + RTDebug($self, $variable, "INSTRUCTION Literal $type $name") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->pushStack($type, $value, $source); } + else { + Warn('bibtex', 'runtime', $variable->getLocator, + "Can not push $name: Does not exist. "); } + return; } + +# NOTE: Shouldn't this just turn the REFERENCE into a LITERAL ? +# Ah, but how to put that on the stack? +sub do_Reference { + my ($self, $instruction) = @_; + my $name = lc $instruction->getValue; + my $type = $instruction->getKind; + RTDebug($self, $instruction, "INSTRUCTION Literal Reference $name => $type") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->pushStack('REFERENCE', [$type, $name], undef); + return; } + +# A block in the instruction stream gets pushed onto the stack +sub do_Block { + my ($self, $block) = @_; + RTDebug($self, $block, "Push block $block") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->pushStack('FUNCTION', $block, undef); + return; } + +# execute a function (from stack) may be CODE or a BLOCK +sub executeFunction { + my ($self, $instruction, $function) = @_; + if (ref $function eq 'CODE') { + RTDebug($self, $function, "Builtin $function") if $LaTeXML::DEBUG{bibtex_runtime}; + &{$function}($self, $instruction); } + else { + RTDebug($self, $function, "Run block") if $LaTeXML::DEBUG{bibtex_runtime}; + my @instructions = @{ $function->getValue }; + foreach my $instruction (@instructions) { + do_Instruction($self, $instruction); } } + return; } + +# Execute an item popped from the stack +# weirdly re-encoded +sub executeStacked { + my ($self, $instruction, $type, $value, $source) = @_; + # Shouldn't REFERENCE's already have been DE-referenced? + if ($type eq 'REFERENCE') { + my ($vtype, $vname) = @$value; + my ($t, $v, $s) = $self->getVariable($vname); + if ($t eq 'FUNCTION') { + $type = $t; $value = $v; } + else { + $self->pushStack($t, $v, $s); + return; } } + if ($type eq 'FUNCTION') { + $self->executeFunction($instruction, $value); } + else { + Error('bibtex', 'runtime', $instruction->getLocator, "Attempt to evaluate $type"); } + return; } + +# execute a single quote +sub do_Quote { + my ($self, $quote) = @_; + RTDebug($self, $quote, "INSTRUCTION Literal Quote '" . $quote->getValue . "'") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->pushStack('STRING', [$quote->getValue], undef); + return; } + +# executes a single number +sub do_Integer { + my ($self, $number) = @_; + RTDebug($self, $number, "INSTRUCTION Literal integer '" . $number->getValue . "'") if $LaTeXML::DEBUG{bibtex_runtime}; + $self->pushStack('INTEGER', $number->getValue, undef); + return; } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Low-level stack access + +### Each entry in the runtime stack internally consists of a triple (type, valuye, source): +### - 'type' contains types of objects +### - 'value' the actual objects +### - 'source' contains the source references of objects +### Entries on the stack are considered immutable (even though Perl provides no guarantees that it is indeed so). +### Any changes to the underlying values should be performed on a copy of the data. + +### The following types are defined: + +### 0. 'UNSET' - if a variable has not been set +### 1. 'MISSING' - a missing value of a field (TODO: perhaps also an uninititialized constant) +### 2. 'STRING' - a simple string +### 3. 'INTEGER' -- an integer +### 4. 'FUNCTION' -- a function +### 5. 'REFERENCE' -- a reference to a variable or function on the stack. Starts with 'GLOBAL_' or 'ENTRY_'. + +### These have the corresponding values: + +### 0. 'UNSET' -- undef +### 1. 'MISSING' -- undef +### 2. 'STRING' -- a tuple of strings +### 3. 'INTEGER' -- a single integer +### 4. 'FUNCTION' -- the function reference +### 5. 'REFERENCE' -- a pair (variable type, reference) of the type of variable being referenced and the actual value being referened + +### The corresponding source references are: +### 0. 'UNSET' -- undef +### 1. 'MISSING' -- a tuple(key, field) this value comes from +### 2. 'STRING' -- a tuple (key, field) or undef for each string +### 3. 'INTEGER' -- a tuple (key, field) or undef, when joining take the first one +### 4. 'FUNCTION' -- undef +### 5. 'REFERENCE' -- undef + +# TODO: Allow re-running a context without having to re-parse the bib files +# (There should probably be a reset function that clear entries, but keeps the read .bib files) + +#====================================================================== +# 'popStack' pops and returns a value from the stack, or returns undef, undef, undef +# The value returned from the stack is immutable, and should be copied if any changes are made +sub popStack { + my ($self) = @_; + if (my $top = pop(@{ $$self{stack} })) { + return @$top; } + return; } + +# 'peekStack' peeks at position $index from the top of the stac, or undef, undef, undef if it is not defined. +# Note that index is 1-based, i.e. peekStack(1) returns the top-most element on the stack +sub peekStack { + my ($self, $index) = @_; + if (my $top = $$self{stack}[-($index || 1)]) { + return @$top; } + return; } + +# 'pushStack' pushes a single value onto the stack +sub pushStack { + my ($self, $type, $value, $source) = @_; + ## NOTE: Some kind of weird programming error??? + Error("STACK", "nonsource", undef, "Stacked non-array source for $type, $value, $source: " + . (ref $source ? join(',', @$source) : '')) + if ($type eq 'STRING') && (defined $source) && + (!(ref $source) || grep { (defined $_) && ($_ ne '') && (!ref $_); } @$source); + push(@{ $$self{stack} }, [$type, $value, $source]); + return; } + +# pops a string of a particular type, or throws an error; +# returns object & source if in array context, else just the object +sub popType { + my ($self, $type, $instruction) = @_; + my ($tp, $value, $src) = $self->popStack; + unless (defined($tp)) { + Warn('bibtex', 'runtime', $instruction->getLocator, + $instruction->getKind . ' attempted to pop the empty stack'); + return; } + if ($tp ne $type) { + Warn('bibtex', 'runtime', $instruction->getLocator, + $instruction->getKind . " expected to pop type $type from stack, but got type $tp: " . $value); + return; } + return (wantarray ? ($value, $src) : $value); } + +# 'pushString' pushes an string without a source refence onto the stack. +sub pushString { + my ($self, $string) = @_; + push(@{ $$self{stack} }, ['STRING', [$string], [undef]]); + return; } + +# 'pushInteger' pushes an integer without a source refence onto the stack. +sub pushInteger { + my ($self, $integer) = @_; + push(@{ $$self{stack} }, ['INTEGER', $integer, undef]); + return; } + +# 'stackEmpty' returns a boolean indicating if the stack is empty. +sub stackEmpty { + my ($self) = @_; + return @{ $$self{stack} } == 0; } + +# 'duplicateStack' duplicates the top-most entry of the stack. +# returns a boolean indicating if duplication succeeded (i.e. if the stack was empty or not). +sub duplicateStack { + my ($self) = @_; + # grab and duplicate value (if needed) + push(@{ $$self{stack} }, $$self{stack}[-1] || return 0); + return 1; } + +# 'swapStack' swaps the two top-most entries of the stack. +# returns a boolean indicating if swapping succeeded (i.e. if the stack had at least two element or not). +sub swapStack { + my ($self) = @_; + return 0 if scalar(@{ $$self{stack} }) <= 1; + @{ $$self{stack} }[-1, -2] = @{ $$self{stack} }[-2, -1]; + return 1; } + +#====================================================================== +# VARIABLES + +# 'hasVariable' checks if a variable of the given name and type exists. +# When type is omitted, checks if any variable of the given type exists +sub hasVariable { + my ($self, $name, $type) = @_; + return ($$self{variableTypes}{$name} || return 0) eq + ($type || return 1); } + +# 'defineVariable' defines a new variable of the given type. +# returns 1 if the variable was defined, 0 if it already existed. +sub defineVariable { + my ($self, $name, $type) = @_; + return 0 if defined($$self{variableTypes}{$name}); + # store the type and set initial value if global + $$self{variableTypes}{$name} = $type; + # if we don't have an entry variable, initialize them to sensible defaults here + unless ($type =~ /^ENTRY_/) { + if ($type eq 'GLOBAL_INTEGER') { + $$self{variables}{$name} = [('INTEGER', 0, undef)]; } + elsif ($type eq 'GLOBAL_STRING') { + $$self{variables}{$name} = [('STRING', [""], [undef])]; } + else { + $$self{variables}{$name} = [('UNSET', undef, undef)]; } } + return 1; } + +# 'getVariable' gets a variable of the given name +# Returns a triple (type, value, source). +sub getVariable { + my ($self, $name) = @_; + # if the variable does not exist, return nothing + my $type = $$self{variableTypes}{$name}; + return unless $type; + # we need to look up inside the current entry + if ($type eq 'ENTRY_FIELD' + or $type eq 'ENTRY_STRING' + or $type eq 'ENTRY_INTEGER') + { + my $entry = $$self{entry} || return ('UNSET', undef, undef); + return $entry->getVariable($name); } + # 'global' variable => return from our own state + return (@{ $$self{variables}{$name} }); } + +# 'setVariable' sets a variable of the given name. +# A variable is represented by a reference to a triple (type, value, source). +# returns 0 if ok, 1 if it doesn't exist, 2 if an invalid context, 3 if read-only, 4 if unknown type +sub setVariable { + my ($self, $name, $value) = @_; + # if the variable does not exist, return nothing + my $type = $$self{variableTypes}{$name}; + return 1 unless defined($type); + # normalize name of variable + $name = lc $name; + # we need to look up inside the current entry + if ($type eq 'ENTRY_FIELD' + or $type eq 'ENTRY_STRING' + or $type eq 'ENTRY_INTEGER') { + my $entry = $$self{entry} || return 2; + return $entry->setVariable($name, $value); } + # we have a global variable, so take it from our stack + elsif ($type eq 'GLOBAL_STRING' + or $type eq 'GLOBAL_INTEGER' + or $type eq 'FUNCTION') { + # else assign the value + $$self{variables}{$name} = $value; + # and return + return 0; } + # I don't know the type + return 4; } + +# 'assignVariable' defines and sets a variable to the given value. +# A variable is represented by a reference to a triple (type, value, source). +# Returns 0 if ok, 1 if it already exists, 2 if an invalid context, 3 if read-only, 4 if unknown type +sub assignVariable { + my ($self, $name, $type, $value) = @_; + # define the variable + my $def = $self->defineVariable($name, $type); + return 1 unless $def == 1; + return $self->setVariable($name, $value); } + +#====================================================================== +# ENTRIES + +# 'getEntries' gets a list of all entries +sub getEntries { + my ($self) = @_; + return $$self{entries}; } + +# 'readEntries' reads in all entries and builds an entry list. +sub readEntries { + my ($self, $bibliographies, $citations) = @_; + my @bibliographies = @{$bibliographies}; + if (defined($$self{entries})) { # Already have entries? + Warn('bibtex', 'runtime', undef, 'Can not read entries: Already read entries'); + return; } + my @entries = (); + foreach my $bibliography (@bibliographies) { + my $path = $bibliography->getPathname; + # iterate over all the entries + foreach my $entry ($bibliography->getEntries($$self{macros})) { + push(@entries, LaTeXML::BibTeX::Runtime::Entry->new($path, $self, $entry)); } + push(@{ $$self{preambleString} }, $bibliography->getPreamble); + push(@{ $$self{preambleSource} }, [($path, '', 'preamble')]); + } + # build a map of entries + my (%entryHash) = (); + my ($key); + foreach my $entry (@entries) { + $key = $entry->getKey; + if (defined($entryHash{$key})) { + Warn('bibtex', 'runtime', undef, + "Skipping duplicate entry for key $key", $$entry{entry}->getSource); } + else { + $entryHash{$key} = $entry; } } + $$self{entryHash} = \%entryHash; + # TODO: Allow numcrossref customization + $$self{entries} = $self->buildEntryList([@entries], $citations, 2); + return; } + +# build a list of entries that should be cited. +sub buildEntryList { + my ($self, $entryList, $citeList, $numCrossRefs) = @_; + my %citedKeys = (); # same as citeList, but key => 1 mapping + my %related = (); # resolved reference entries + my @xrefed = (); + my @entries = (); + my %refmap = (); # [xrefed] => referencing entries + my $entryHash = $$self{entryHash}; # hash for resolving entries + my %entryMap = %$entryHash; + + while (my $citeKey = shift(@$citeList)) { + # If we already cited something it does not need to be cited again. + # This is *not* an error, it might regularly occur if things are cited multiple times. + next if exists($citedKeys{$citeKey}); + # When we receive a '*' key, we need to add all the entries that we know of + if ($citeKey eq '*') { + push(@$citeList, map { $_->getKey; } @$entryList); + next; } + # find the current entry + if (my $entry = $entryMap{$citeKey}) { + # push this entry into the list of cited 'stuff' + push(@entries, $entry); + $citedKeys{$citeKey} = 1; + # grab the cross-referenced entry and resolve it + my ($xref, $xrefentry) = $entry->resolveCrossReference($entryHash); + if (defined $xref) { + if (defined $xrefentry) { + # Add this item to the 'cited' + if (!defined($refmap{$xref})) { + push(@xrefed, $xref); + $refmap{$xref} = [()]; } + # and add the current entry to the xrefed entry + push(@{ $refmap{$xref} }, $entry); } + else { # if the cross-referenced entry doesn't exist + Warn('bibtex', 'runtime', undef, + "A bad cross reference---entry \"$citeKey\" refers to entry \"$xref\", which doesn't exist", + $entry->getKind, $$entry{entry}->getSource); # TODO: Better warning location + $entry->clearCrossReference(); } } } + else { + Warn('bibtex', 'runtime', undef, "I didn't find a database entry for \"$citeKey\""); } } + # iterate over everything that was cross-referenced + # and either inline or add it to the citation list + foreach my $value (@xrefed) { + my @references = @{ $refmap{$value} }; + my $related = $entryMap{$value}; + my $exists = exists($citedKeys{$value}); + # We always inline cross-referenced entries. + # When few references to a specific entry is small enough we remove the 'crossref' key. + my $hideCrossref = !$exists && scalar @references < $numCrossRefs; + foreach my $reference (@references) { + $reference->inlineCrossReference($related, $hideCrossref); } + # if there are more, it is included in the list of entries + push(@entries, $related) unless ($hideCrossref || $exists); } + return [@entries]; } + +sub getPreamble { + my ($self) = @_; + return $$self{preambleString}, $$self{preambleSource}; } + +# sort entries in-place using a comparison function +# return 1 iff entriues have been sorted +sub sortEntries { + my ($self, $comp) = @_; + $$self{entries} = [sort { &{$comp}($a, $b) } @{ $$self{entries} }]; + return 1; } + +# sets the current entry +sub setEntry { + my ($self, $entry) = @_; + $$self{entry} = $entry; + return $entry; } + +# 'findEntry' finds and activates the entry with the given key and returns it. +# If no such entry exists, returns undef. +sub findEntry { + my ($self, $key) = @_; + my $theEntry; + # if we have a hash for entries (i.e. we were initialized) + # we should just lookup the key + my $entryHash = $$self{entryHash}; + if (defined($entryHash)) { + my %hash = %{$entryHash}; + $theEntry = $hash{$key}; } + # if we weren't initalized, we need to iterate + else { + foreach my $entry (@{ $self->getEntries() }) { + if ($entry->getKey eq $key) { + $theEntry = $entry; + last; } } } + # set the active entry and return it + return $self->setEntry($theEntry) if defined($theEntry); + return; } + +# gets the current entry (if any) +sub getEntry { + my ($self) = @_; + return $$self{entry}; } + +# leave the current entry (if any) +sub leaveEntry { + my ($self) = @_; + $$self{entry} = undef; + return; } + +#====================================================================== +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Buffer.pm b/lib/LaTeXML/BibTeX/Runtime/Buffer.pm new file mode 100644 index 0000000000..4f8b611c00 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Buffer.pm @@ -0,0 +1,122 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Buffer | # +# | Emulates BibTeX's buffer implementation | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +package LaTeXML::BibTeX::Runtime::Buffer; +use strict; +use warnings; +use LaTeXML::Common::Error; + +# The Buffer class emulates the output buffering implemented by BibTeX +# In addition to the raw BibTeX behavior this class also implements +# wrapping source references around specific output strings. +# Forefficency the wrapping behavior of BibTeX is only enabled +# when $wrapEnabled is set. +sub new { + my ($class, $handle, $wrapEnabled, $sourceMacro) = @_; + return bless { + # handle to send output to + handle => $handle, + wrapEnabled => $wrapEnabled, + sourceMacro => $sourceMacro, + minLineLength => 3, + maxLineLength => 79, + # state for + buffer => "", # current internal buffer + skipSpaces => + 0, # flag to indicate if whitespace is currently being skipped + }, $class; } + +# Write writes a string from the buffer to the output handle +# and emulates BibTeX's hard-wrapping +sub write { + my ($self, $string, $source) = @_; + # add the string to the buffer + $$self{buffer} .= $self->wrapSource($string, $source); + return unless $$self{wrapEnabled}; + # while the buffer is long enough + my ($candidate, $index); + while (length($$self{buffer}) > $$self{maxLineLength}) { + # find whitespace at the beginning of the string (if applicable) + $candidate = reverse( + substr( + $$self{buffer}, + $$self{minLineLength}, + $$self{maxLineLength} - $$self{minLineLength} + 1 + ) + ); + if ($candidate =~ /\s/) { + $index = $$self{maxLineLength} - $-[0]; } + # if there isn't any, find whitespace afterwards or bail out + else { + return + unless substr($$self{buffer}, $$self{maxLineLength}) =~ /\s/; + $index = $$self{maxLineLength} + $-[0]; } + # split the buffer at the index + $candidate = substr($$self{buffer}, 0, $index); + $$self{buffer} = substr($$self{buffer}, $index); + $self->writeLineInternal($candidate); +# By default, we trim all the spaces from the next line. +# However, there is a bug in the BibTeX implementation, where this does not always work. +# When there are at least two spaces beginning at exactly the boundary between two lines, an additional space is left on the line. + unless ($$self{maxLineLength} == $index && $$self{buffer} =~ /^\s\s/) { + $$self{buffer} =~ s/^\s+//; } + else { + $$self{buffer} =~ s/^\s+/ /; } + # and add two spaces to the next line + $$self{buffer} = ' ' . $$self{buffer}; } + return; } + +# WriteLn writes whatever is currently in the buffer +sub writeLn { + my ($self) = @_; + $self->writeLineInternal($$self{buffer}); + $$self{buffer} = ''; + return; } + +# writeLineInternal internally writes a line to the output +sub writeLineInternal { + my ($self, $line) = @_; + # trim trailing whitespace and then print it + $line =~ s/\s+$//; + # print it + print { $$self{handle} } $line . "\n"; + return; } + +# wrapSource wraps a source-referenced string into the appropriate +# source macro for this buffer. If source or macro are undef, returns +# the original string +sub wrapSource { + my ($self, $string, $source) = @_; +##### return $string unless defined($source) && $$self{sourceMacro}; +### NOTE: Somehow, we're getting a $source, but it's not the expected array [entry,field] + return $string unless defined($source) && (ref $source) && $$self{sourceMacro}; +##Debug("WRAP '$string' from ".(ref $source ? '['.join(',',@$source).']' : $source)); +##return $string; + my ($fn, $entry, $field) = @{$source}; + return $string unless $field; + return + '\\' + . $$self{sourceMacro} . '{' + . $fn . '}{' + . $entry . '}{' + . $field . '}{' + . $string . '}'; } + +# finalize closes this buffer and flushes whatever is left in the buffer to STDOUT +sub finalize { + my ($self) = @_; + # print whatever is left in the handle to the buffer + print { $$self{handle} } $$self{buffer}; + # state reset (not really needed, buf whatever) + $$self{buffer} = ''; + $$self{counter} = 0; + $$self{skipSpaces} = 0; + close($$self{handle}); + return; } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Builtins.pm b/lib/LaTeXML/BibTeX/Runtime/Builtins.pm new file mode 100644 index 0000000000..9b53da64c1 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Builtins.pm @@ -0,0 +1,639 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Builtin | # +# | BibTeX builtin functions | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +package LaTeXML::BibTeX::Runtime::Builtins; +use strict; +use warnings; +use LaTeXML::Common::Error; + +use LaTeXML::BibTeX::Runtime::Utils; +use LaTeXML::BibTeX::Runtime::Strings; +use LaTeXML::BibTeX::Runtime::Names; + +sub install { + my ($runtime) = @_; + # define all the built-in functions + $runtime->assignVariable('>', 'FUNCTION', ['FUNCTION', \&builtinZg, undef]); + $runtime->assignVariable('<', 'FUNCTION', ['FUNCTION', \&builtinZl, undef]); + $runtime->assignVariable('=', 'FUNCTION', ['FUNCTION', \&builtinZe, undef]); + $runtime->assignVariable('+', 'FUNCTION', ['FUNCTION', \&builtinZp, undef]); + $runtime->assignVariable('-', 'FUNCTION', ['FUNCTION', \&builtinZm, undef]); + $runtime->assignVariable('*', 'FUNCTION', ['FUNCTION', \&builtinZa, undef]); + $runtime->assignVariable(':=', 'FUNCTION', ['FUNCTION', \&builtinZcZe, undef]); + $runtime->assignVariable('add.period$', 'FUNCTION', ['FUNCTION', \&builtinAddPeriod, undef]); + $runtime->assignVariable('call.type$', 'FUNCTION', ['FUNCTION', \&builtinCallType, undef]); + $runtime->assignVariable('change.case$', 'FUNCTION', ['FUNCTION', \&builtinChangeCase, undef]); + $runtime->assignVariable('chr.to.int$', 'FUNCTION', ['FUNCTION', \&builtinChrToInt, undef]); + $runtime->assignVariable('cite$', 'FUNCTION', ['FUNCTION', \&builtinCite, undef]); + $runtime->assignVariable('duplicate$', 'FUNCTION', ['FUNCTION', \&builtinDuplicate, undef]); + $runtime->assignVariable('empty$', 'FUNCTION', ['FUNCTION', \&builtinEmpty, undef]); + $runtime->assignVariable('format.name$', 'FUNCTION', ['FUNCTION', \&builtinFormatName, undef]); + $runtime->assignVariable('if$', 'FUNCTION', ['FUNCTION', \&builtinIf, undef]); + $runtime->assignVariable('int.to.chr$', 'FUNCTION', ['FUNCTION', \&builtinIntToChr, undef]); + $runtime->assignVariable('int.to.str$', 'FUNCTION', ['FUNCTION', \&builtinIntToStr, undef]); + $runtime->assignVariable('missing$', 'FUNCTION', ['FUNCTION', \&builtinMissing, undef]); + $runtime->assignVariable('newline$', 'FUNCTION', ['FUNCTION', \&builtinNewline, undef]); + $runtime->assignVariable('num.names$', 'FUNCTION', ['FUNCTION', \&builtinNumNames, undef]); + $runtime->assignVariable('pop$', 'FUNCTION', ['FUNCTION', \&builtinPop, undef]); + $runtime->assignVariable('preamble$', 'FUNCTION', ['FUNCTION', \&builtinPreamble, undef]); + $runtime->assignVariable('purify$', 'FUNCTION', ['FUNCTION', \&builtinPurify, undef]); + $runtime->assignVariable('quote$', 'FUNCTION', ['FUNCTION', \&builtinQuote, undef]); + $runtime->assignVariable('skip$', 'FUNCTION', ['FUNCTION', \&builtinSkip, undef]); + $runtime->assignVariable('stack$', 'FUNCTION', ['FUNCTION', \&builtinStack, undef]); + $runtime->assignVariable('substring$', 'FUNCTION', ['FUNCTION', \&builtinSubstring, undef]); + $runtime->assignVariable('swap$', 'FUNCTION', ['FUNCTION', \&builtinSwap, undef]); + $runtime->assignVariable('text.length$', 'FUNCTION', ['FUNCTION', \&builtinTextLength, undef]); + $runtime->assignVariable('text.prefix$', 'FUNCTION', ['FUNCTION', \&builtinTextPrefix, undef]); + $runtime->assignVariable('top$', 'FUNCTION', ['FUNCTION', \&builtinTop, undef]); + $runtime->assignVariable('type$', 'FUNCTION', ['FUNCTION', \&builtinType, undef]); + $runtime->assignVariable('warning$', 'FUNCTION', ['FUNCTION', \&builtinWarning, undef]); + $runtime->assignVariable('while$', 'FUNCTION', ['FUNCTION', \&builtinWhile, undef]); + $runtime->assignVariable('width$', 'FUNCTION', ['FUNCTION', \&builtinWidth, undef]); + $runtime->assignVariable('write$', 'FUNCTION', ['FUNCTION', \&builtinWrite, undef]); + return; } + +# builtin function > +# pops two integers from the stack, then pushes 1 if the the latter is bigger than +# the former, 0 otherwise. +# If either stack entry is not an integer literal, loudly pushes 0 on the stack. +sub builtinZg { + my ($runtime, $instruction) = @_; + my $i1 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i1; + my $i2 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i2; + + $runtime->pushInteger($i2 > $i1 ? 1 : 0); + return; } + +# builtin function < +# pops two integers from the stack, then pushes 1 if the the latter is smaller than +# the former, 0 otherwise. +# If either stack entry is not an integer literal, loudly pushes 0 on the stack. +sub builtinZl { + my ($runtime, $instruction) = @_; + my $i1 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i1; + my $i2 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i2; + + $runtime->pushInteger($i2 < $i1 ? 1 : 0); + return; } + +# builtin function = +# pops two strings or two integers from the stack. Then pushes 1 if they are equal, 0 if not. +# If either of the types don't match, loudly pushes 0 on the stack. +sub builtinZe { + my ($runtime, $instruction) = @_; + my ($tp, $value) = $runtime->popStack; + unless (defined($tp)) { + Warn('bibtex', 'runtime', $instruction->getLocator, "Unable to pop empty stack"); + $runtime->pushInteger(0); } + if ($tp eq 'INTEGER') { + my $i1 = $value; + my $i2 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i2; + $runtime->pushInteger($i1 == $i2 ? 1 : 0); } + elsif ($tp eq 'STRING') { + my ($s1) = simplifyString($value); + my $s2 = $runtime->popType('STRING', $instruction); + return $runtime->pushInteger(0) unless defined $s2; + ($s2) = simplifyString($s2); + $runtime->pushInteger($s1 eq $s2 ? 1 : 0); } + else { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Equals(=) expected to find a STRING or an INTEGER on the stack. '); + $runtime->pushInteger(0); } + return; } + +# builtin function + +# pops two integer literals from the stack, and then pushes their sum. +# If either isn't an integer, loudly pushes a 0. +sub builtinZp { + my ($runtime, $instruction) = @_; + my $i1 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i1; + my $i2 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i2; + + $runtime->pushInteger($i2 + $i1); + return; } + +# builtin function - +# pops two integer literals from the stack, and then pushes their difference. +# If either isn't an integer, loudly pushes a 0. +sub builtinZm { + my ($runtime, $instruction) = @_; + my $i1 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i1; + my $i2 = $runtime->popType('INTEGER', $instruction); + return $runtime->pushInteger(0) unless defined $i2; + + $runtime->pushInteger($i2 - $i1); + return; } + +# builtin function * +# pops two string literals from the stack and pushes their concatination +# If either isn't an string, loudly pushes the empty string. +sub builtinZa { + my ($runtime, $instruction) = @_; + my ($s1, $ss1) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $s1; + my ($s2, $ss2) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $s2; + + my ($ns, $nss) = concatString($s2, $ss2, $s1, $ss1); + $runtime->pushStack('STRING', $ns, $nss); + return; } + +# builtin function := +# pops a function literal from the stack, and then a value of the appropriate type. +# finally assigns the literal to that value. +# complains when there is a type mismatch +# 0 if ok, 1 if it doesn't exist, 2 if an invalid context, 3 if read-only, 4 if unknown type +sub builtinZcZe { + my ($runtime, $instruction) = @_; + # pop the variable type and name to be assigned + my $rv = $runtime->popType('REFERENCE', $instruction); + return unless defined($rv); + my ($rvt, $name) = @$rv; + # pop the value to assign + my ($t, $v, $s) = $runtime->popStack; + return Warn('bibtex', 'runtime', $instruction->getLocator, + 'Attempted to pop the empty stack') unless defined($t); + # and do it! + my $asr = $runtime->setVariable($name, [$t, $v, $s]); + if ($asr == 1) { + Warn('bibtex', 'runtime', $instruction->getLocator, + "Can not set $name: Does not exist."); } + elsif ($asr == 2) { + Warn('bibtex', 'runtime', $instruction->getLocator, + "Can not set $name: Not in an entry context. "); } + elsif ($asr == 4) { + Warn('bibtex', 'runtime', $instruction->getLocator, + "Can not set $name: Read-only. "); } + elsif ($asr == 4) { + Warn('bibtex', 'runtime', $instruction->getLocator, + "Can not set $name: Unknown type. "); } + return; } + +# builtin function add.period$ +# pops a string from the stack and adds a period to it when it does not already end with a +# '.', '!' or '?'. +# when there isn't a string literal, it loudly pushes the empty string +sub builtinAddPeriod { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + # NOTE: Hopefully, we don't run afoul of any peculiar accent at end of the string? + my $last = $$strings[-1]; # or maybe last non-empty ??? or what? + if ($last !~ /\.\}*$/) { # If doesn't end in "." + ($strings, $sources) = concatString($strings, $sources, ['.'], [undef]); } + $runtime->pushStack('STRING', $strings, $sources); + return; } + +# builtin function call.type$ +sub builtinCallType { + my ($runtime, $instruction) = @_; + my $entry = $runtime->getEntry; + unless ($entry) { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Can not call.type$: No active entry. '); + return; } + my $tp = $entry->getType; + my ($ftype, $value) = $runtime->getVariable($tp); + unless (defined($ftype) && $ftype eq 'FUNCTION') { + ($ftype, $value) = $runtime->getVariable("default.type"); + unless (defined($ftype) && $ftype eq 'FUNCTION') { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Can not call.type$: Unknown entrytype ' . $tp + . ' and no default handler has been defined. '); + return; } } + # call the type function + $runtime->executeFunction($instruction, $value); + return; } + +# builtin function change.case$ +# pops two string literals from the stack and formats the first according to the second. +# when either is not a string literal, loudly pushes the empty string. +# NOTE: This LOSES sources! +sub builtinChangeCase { + my ($runtime, $instruction) = @_; + # get the case string and simplify it to be a single character + my ($cstrings, $csources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $cstrings; + my ($spec) = simplifyString($cstrings, $csources); + + # pop the final string + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + # add the text prefix and push it to the stack + my ($newStrings, $newSources) = applyPatch( + $strings, $sources, + sub { + my $result = changeCase('' . $_[0], $spec); + unless (defined($result)) { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Can not change.case$: Unknown format string' . $spec); + return '' . $_[0]; } + return $result; }, + 'inplace' + ); + $runtime->pushStack('STRING', $newStrings, $newSources); + return; } + +# builtin function chr.to.int$ +# pops the top string literal, and push the integer corresponding to it's ascii value. +# if the top literal is not a string, or the string is not of length 1, loudly pushes a 0 on the stack. +sub builtinChrToInt { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + # if we have a string, that's ok. + if (defined($strings)) { + my ($str, $src) = simplifyString($strings, $sources); + if (length($str) == 1) { + $runtime->pushStack('INTEGER', ord($str), $src); } + else { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Expected a single character string on the stack, but got ' . length($str) . ' characters.'); + $runtime->pushInteger(0); } } + else { + $runtime->pushInteger(0); } + return; } + +# builtin function cite$ +# pushes the key of the current entry or complains if there is none +sub builtinCite { + my ($runtime, $instruction) = @_; + my $entry = $runtime->getEntry; + unless ($entry) { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Can not push the entry key: No active entry.'); + return; } + $runtime->pushStack( + 'STRING', + [$entry->getKey], + [[$entry->getName, $entry->getKey, '']] + ); + return; } + +# builtin function duplicate$ +# duplicates the topmost stack entry, or complains if there is none. +sub builtinDuplicate { + my ($runtime, $instruction) = @_; + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Attempted to duplicate the empty stack') + unless $runtime->duplicateStack; + return; } + +# builtin function empty$ +# pops the top literal from the stack. +# It then pushes a 0 if it is a whitespace-only string or a missing value. +# If pushes a 1 if it is a string that contains non-whitespace charaters. +# Otherwise, it complains and pushes the integer 0. +sub builtinEmpty { + my ($runtime, $instruction) = @_; + my ($tp, $value) = $runtime->popStack; + return Warn('bibtex', 'runtime', $instruction->getLocator, + 'Attempted to pop the empty stack') + unless defined($tp); + if ($tp eq 'MISSING') { + $runtime->pushInteger(1); } + elsif ($tp eq 'STRING') { + ($value) = simplifyString($value); + $runtime->pushInteger(($value =~ /^\s*$/) ? 1 : 0); } + else { + Warn('bibtex', 'runtime', $instruction->getLocator, + 'empty$ expects a string or missing field on the stack'); + $runtime->pushInteger(0); } + return; } + +# builtin function format.name$ +# Pops a string, an integer, and a string (in that order) from the stack +# It then formats the nth name of the first string according to the specification of the latter. +# If either type does not match, it pushes the empty string. +sub builtinFormatName { + my ($runtime, $instruction) = @_; + # get the format string + my $fstrings = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless $fstrings; + ($fstrings) = simplifyString($fstrings); + + # get the length + my $integer = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $integer; + + # pop the final name string + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + # add the text prefix and push it to the stack + my ($newStrings, $newSources) = applyPatch( + $strings, $sources, + sub { + my @names = splitNames($_[0] . ''); + my $name = $names[$integer - 1] || ''; # TODO: Warn if missing + my ($fname, $error) = formatName("$name", $fstrings); + Warn('bibtex', 'runtime', $instruction->getLocator, "Unable to format name: $error") + if defined($error); + return defined($fname) ? $fname : ''; + }, + 0 + ); + $runtime->pushStack('STRING', $newStrings, $newSources); + return; } + +# builtin function if$ +# pops two function literals and an integer literal from the stack. +# it then executes the first literal if the integer is > 0, otherwise the second. +# if either type mismatches, complains but does not attempt to recover. +sub builtinIf { + my ($runtime, $instruction) = @_; + my ($f1type, $f1, $f1src) = $runtime->popStack; + return unless defined($f1type); + my ($f2type, $f2, $f2src) = $runtime->popStack; + return unless defined($f2type); + my $integer = $runtime->popType('INTEGER', $instruction); + return unless defined $integer; + + if ($integer > 0) { + $runtime->executeStacked($instruction, $f2type, $f2, $f2src); } + else { + $runtime->executeStacked($instruction, $f1type, $f1, $f1src); } + return; } + +# builtin function int.to.chr$ +# pops an integer literal from the stack, and pushes the corresponding ASCII character. +# when the stack does not contain an integer, complains and pushes the null string. +sub builtinIntToChr { + my ($runtime, $instruction) = @_; + my ($integer, $isource) = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $integer; + + $runtime->pushStack('STRING', [chr($integer)], [$isource]); + return; } + +# builtin function int.to.str$ +# pops an integer literal from the stack, and pushes the corresponding string value. +# when the stack does not contain an integer, complains and pushes the null string. +sub builtinIntToStr { + my ($runtime, $instruction) = @_; + my ($integer, $isource) = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $integer; + + $runtime->pushStack('STRING', ["$integer"], [$isource]); + return; } + +# builtin function missing$ +# pops the top literal from the stack, and pushes the integer 1 if it is a missing field, 0 otherwise. +sub builtinMissing { + my ($runtime, $instruction) = @_; + my ($tp) = $runtime->popStack; + return Warn('bibtex', 'runtime', $instruction->getLocator, "Unable to pop empty stack") + unless defined $tp; + $runtime->pushInteger(($tp eq 'MISSING') ? 1 : 0); + return; } + +# builtin function newline$ +# sends the current content of the output buffer and a newline to the output. +sub builtinNewline { + my ($runtime, $instruction) = @_; + $runtime->getBuffer->writeLn; + return; } + +# builtin function num.names$ +# pops a string literal from the stack, and then counts the number of names in it +# when the top literal is not a string, loudly pushes the number 0. +sub builtinNumNames { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushInteger(0) unless defined $strings; + + # if we have a string, that's ok. + my ($str, $src) = simplifyString($strings, $sources); + $runtime->pushStack('INTEGER', numNames($str), [$src]); + return; } + +# builtin function pop$ +# pops the top literal from the stack and does nothing +sub builtinPop { + my ($runtime, $instruction) = @_; + my ($tp) = $runtime->popStack; + unless (defined($tp)) { + Warn('bibtex', 'runtime', $instruction->getLocator, "Unable to pop empty stack"); } + return; } + +# builtin function preamble$ +# pushes the concatination of all preambles onto the stack +sub builtinPreamble { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->getPreamble; + $runtime->pushStack('STRING', $strings, $sources); + return; } + +# builtin function purify$ +# pops a string from the stack, purifies it, and then pushes it. +# when the top literal is not a string, loudly pushes the empty string. +sub builtinPurify { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + my ($newStrings, $newSources) = applyPatch($strings, $sources, \&textPurify, 'inplace'); + $runtime->pushStack('STRING', $newStrings, $newSources); + return; } + +# builtin function quote$ +# push the string containing only a double quote onto the stack +sub builtinQuote { + my ($runtime, $instruction) = @_; + $runtime->pushString("\""); + return; } + +# builtin function skip$ +# does nothing +sub builtinSkip { return; } + +# builtin function stack$ +# pops and prints the contents of the stack for debugging purposes +sub builtinStack { + my ($runtime, $instruction) = @_; + my ($tp, $value, $src) = $runtime->popStack; + while (defined($tp)) { + Degug(fmtType($tp, $value, $src)); + ($tp, $value, $src) = $runtime->popStack; } + return; } + +# builtin function substring$ +# pops two integers and a string, then pushes the substring consisting of the appropriate position and length. +# Positions are 1-based, negative starts from end. +# This does NOT make any corrections based on braces, accents, etc! +sub builtinSubstring { + my ($runtime, $instruction) = @_; + my $length = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $length; + my $pos = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $pos; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + my @strings = @$strings; + my @sources = @$sources; + # To keep things simple, if $pos < 0 (starting from end), adjust it to start from begining! + if ($pos < 0) { + $pos -= $length - 1; map { $pos += length($_); } @strings; } + else { + $pos--; } + my (@newstrings, @newsources); + if ($pos >= 0) { + while (($length > 0) && @strings) { + my $str = shift(@strings); + my $src = shift(@sources); + my $len = length($str); + if ($pos > $len) { + $pos -= $len; } + else { + push(@newstrings, substr($str, $pos, $length)); + push(@newsources, $src); + $pos = 0; $length -= length($newstrings[-1]); } } } + else { + push(@newstrings, ""); + push(@newsources, undef); } + $runtime->pushStack('STRING', [@newstrings], [@newsources]); + return; } + +# builtin function swap$ +# pops two literals from the stack, and pushes them back swapped. +sub builtinSwap { + my ($runtime, $instruction) = @_; + Warn('bibtex', 'runtime', $instruction->getLocator, + 'Need at least two elements on the stack to swap.') + unless $runtime->swapStack; + return; } + +# builtin function text.length$ +# pops a string from the top of the stack, and then pushes it's length. +# When the top literal is not a string, loudly pushes the empty string. +sub builtinTextLength { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + my ($str, $src) = simplifyString($strings, $sources); + $runtime->pushStack('INTEGER', textLength($str), $src); + return; } + +# builtin function text.prefix$ +# pops an integer and a string from the stack, then pushes the prefix of the given length of that string +# if either of the types don't match, loudly pushes the empty string. +sub builtinTextPrefix { + my ($runtime, $instruction) = @_; + # pop the integer + my $integer = $runtime->popType('INTEGER', $instruction); + return $runtime->pushString("") unless defined $integer; + # pop and simplify the string + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushString("") unless defined $strings; + + # add the text prefix and push it to the stack + my ($newStrings, $newSources) = applyPatch( + $strings, $sources, + sub { return textPrefix($_[0] . '', $integer); }, + 'inplace' + ); + $runtime->pushStack('STRING', $newStrings, $newSources); + return; } + +# builtin function top$ +# pops the topmost entry of the stack and prints it for debugging purposes +sub builtinTop { + my ($runtime, $instruction) = @_; + my ($tp, $value, $src) = $runtime->popStack; + if (defined($tp)) { + Debug(fmtType($tp, $value, $src)); } + else { + Warn('bibtex', 'runtime', $instruction->getLocator, "Unable to pop empty stack"); } + return; } + +# builtin function type$ +# pushes the type of the current entry onto the stack. +# If the string is empty or undefined, pushes the empty string. +sub builtinType { + my ($runtime, $instruction) = @_; + my $entry = $runtime->getEntry; + if ($entry) { + my $tp = $entry->getType; + $tp = '' unless $runtime->hasVariable($tp, 'FUNCTION'); + $runtime->pushStack('STRING', [$tp], + [[$entry->getName, $entry->getKey]]); } + else { + $runtime->pushString(""); } + return; } + +# builtin function warning$ +# pops the top-most string from the stack and send a warning to the user +sub builtinWarning { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return unless defined $strings; + + my ($str, $src) = simplifyString($strings, $sources); + Warn('bibtex', 'runtime', $instruction->getLocator, $str); + return; } + +# builtin function while$ +# pops two function literals from the stack and keeps executing the second +# while the integer literal returned from the first is > 0. +# If any involved type is wrong, fails silently. +sub builtinWhile { + my ($runtime, $instruction) = @_; + my ($f1type, $f1, $f1src) = $runtime->popStack; + return unless defined $f1type; + my ($f2type, $f2, $f2src) = $runtime->popStack; + return unless defined $f2type; + + while (1) { + $runtime->executeStacked($instruction, $f2type, $f2, $f2src); + my $integer = $runtime->popType('INTEGER', $instruction); + return unless defined($integer); + return if $integer <= 0; + $runtime->executeStacked($instruction, $f1type, $f1, $f1src); } + return; } + +# builtin function width$ +# pops a string from the stack and computes it's width in units. +# when the stack does not contain a string, loudly pushes 0. +sub builtinWidth { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return $runtime->pushInteger(0) unless defined $strings; + + my ($str, $src) = simplifyString($strings, $sources); + $runtime->pushStack('INTEGER', textWidth($str), $src); + return; } + +# builtin function write$ +# writes a string to the output buffer (and potentially writes +# to the output iff it is long enugh) +sub builtinWrite { + my ($runtime, $instruction) = @_; + my ($strings, $sources) = $runtime->popType('STRING', $instruction); + return unless defined $strings; + # get the ouput buffer and array references to sources and strings + my $buffer = $runtime->getBuffer; + my @theStrings = @{$strings}; +## my @theSources = @{$sources}; + my @theSources = (defined $sources ? @{$sources} : ()); + # Debug("OUTPUT: " . showStrings($strings,$sources)); + # we iterate by index to not mutate strings and sources + foreach my $i (0 .. $#theStrings) { + $buffer->write($theStrings[$i], $theSources[$i]); } + return; } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Entry.pm b/lib/LaTeXML/BibTeX/Runtime/Entry.pm new file mode 100644 index 0000000000..87d28507a2 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Entry.pm @@ -0,0 +1,160 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Entry | # +# | A read BibTeX Entry | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef); + +package LaTeXML::BibTeX::Runtime::Entry; +use strict; +use warnings; +use LaTeXML::Common::Error; +use Scalar::Util qw(blessed); + +### +### Read entries +### + +### An entry consists of the following values: +# $entry should be a BibEntry! +sub new { + my ($class, $name, $runtime, $entry) = @_; + # read our type, skip 'string's and 'comment's + my $type = $entry->getType; + my $key = $entry->getKey; + my @fields = $entry->getFields; + # make sure that we have a key + return Warn('bibtex', 'runtime', $entry->getLocator, 'Expected non-empty key') + unless $key; + my %values = (); + foreach my $field (@fields) { + my $name = $field->getName; + my $value = $field->getContent; + # if we have a duplicate valye + if (defined($values{$name})) { + Warn('bibtex', 'runtime', $field->getLocator, + 'Duplicate value in entry ' . $key . ': Field ' . $name . ' already defined. '); + next; } + # BibTeX normalizes values specifically + $value =~ s/^\s+|\s+$//g; # remove space on both sides + $value =~ s/\s+/ /g; # concat multiple whitespace into one + $values{$name} = $value; } + my $self = bless { + name => $name, + runtime => $runtime, # the runtime corresponding to this entry + type => $type, # the type, key and values for the entry + key => $key, + values => {%values}, + variables => {}, # the variables stored in this entry + entry => $entry, # the original entry + }, $class; + return $self; } + +# inlines a cross-refed entry '$xref' into this entry +sub inlineCrossReference { + my ($self, $xref, $clearCrossRefValue) = @_; + # copy over all the related keys + my ($k, $v); + keys %{ $$xref{values} }; # reset the interal iterator for each + while (($k, $v) = each(%{ $$xref{values} })) { + $$self{values}{$k} = $v unless defined($$self{values}{$k}); } + # delete the 'crossref' key manually + delete $$self{values}{crossref} if $clearCrossRefValue; + return; } + +# clears the cross-reference (if any) by this entry +sub clearCrossReference { + my ($self) = @_; + delete $$self{values}{crossref}; + return; } + +# gets the cross-referenced entry +# and returns a pair ($key, $crossref) +sub resolveCrossReference { + my ($self, $entryHash) = @_; + # get the crossref key + my $crossref = $$self{values}{crossref}; + return undef, undef unless defined($crossref); + # if is exists case-senstive, return it! + my $xref = $$entryHash{$crossref}; + return $crossref, $xref if defined($xref); + # if resolution failed, try searching case-insensitivly + foreach my $key (keys %$entryHash) { + if (lc $key eq lc $crossref) { + $$self{values}{crossref} = $key; # update to the correct case! + return $key, $$entryHash{$key}; } } + # else we failed completly + return $crossref, undef; } + +sub getName { + my ($self) = @_; + return $$self{name}; } + +sub getKey { + my ($self) = @_; + return $$self{key}; } + +sub getType { + my ($self) = @_; + return $$self{type}; } + +# gets the value of a given variable +# get a variable (type, value, source), silently define it when it doesn't exist yet +sub getVariable { + my ($self, $name) = @_; + # lookup the type and return their value + my $type = $$self{runtime}{variableTypes}{$name}; + return undef unless defined($type) && $type =~ /^ENTRY_/; + # If we have an entry field + # we need to take special care of where it comes from + if ($type eq 'ENTRY_FIELD') { + my $field = $$self{values}{ lc $name }; + return 'STRING', [$field], + [[($$self{name}, $$self{key}, lc $name)]] + if defined($field); + return 'MISSING', undef, [($$self{name}, $$self{key}, lc $name)]; } + my $value = $$self{variables}{$name}; + # silently set default values + unless (defined($value)) { + return 'INTEGER', 0, undef if $type eq 'ENTRY_INTEGER'; + return 'STRING', [""], [undef] if $type eq 'ENTRY_STRING'; + # other types do not have a sensible default + return 'UNSET', undef, undef; } + # else we can just push from our own internal value stack + # we duplicate here, where needed + my ($t, $v, $s) = @{$value}; + $v = [@{$v}] if ref($v) && ref($v) eq 'ARRAY'; + $s = [@{$s}] if ref($s) && ref($s) eq 'ARRAY'; + return ($t, $v, $s); } + +# 'getPlainField' gets a string valued field from this entry or fails +sub getPlainField { + my ($self, $name) = @_; + # if it's not an entry field, bail out + my $type = $$self{runtime}{variableTypes}{$name}; + return undef unless defined($type) && $type eq 'ENTRY_FIELD'; + # else return the value + return $$self{values}{ lc $name }; } + +# set a variable (type, value, source) +# returns 0 if ok, 1 if it doesn't exist, 2 if an invalid runtime, 3 if read-only +sub setVariable { + my ($self, $name, $value) = @_; + # if the variable does not exist, return nothing + my $type = $$self{runtime}{variableTypes}{$name}; + return 1 unless defined($type); + # we can't assign anything global here + return 2 + if ($type eq 'GLOBAL_STRING' + or $type eq 'GLOBAL_INTEGER' + or $type eq 'FUNCTION'); + # we can't assign entry fields, they're read only + return 3 if $type eq 'ENTRY_FIELD'; + # and assign the value + $$self{variables}{$name} = $value; + return 0; } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Names.pm b/lib/LaTeXML/BibTeX/Runtime/Names.pm new file mode 100644 index 0000000000..d951f75a56 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Names.pm @@ -0,0 +1,365 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Names | # +# | Runtime name parsing / processing functions | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef Subroutines::RequireArgUnpacking); + +package LaTeXML::BibTeX::Runtime::Names; +use strict; +use warnings; + +use LaTeXML::BibTeX::Runtime::Strings; + +use base qw(Exporter); +our @EXPORT = qw( + &splitNames &numNames + &splitNameParts &splitNameWords + &abbrevName &formatNameSubpattern &formatName +); + +### +### Splitting a list of names +### + +# 'splitNames' splits a string into a list of names. +# Multiple names are seperated by 'and's at brace level 0. +sub splitNames { + my ($string) = @_; + # if there is an empty string, return the empty array! + return () if ($string eq ''); + my $level = 0; + my $buffer = ''; + my @result = (''); + my @cache; + my $character; + # accumalate entries inside of a buffer + # and then split the buffer, once we reach a non-zero level + my @characters = split(//, $string); + while (defined($character = shift(@characters))) { + if ($level == 0) { + $buffer .= $character; + if ($character eq '{') { + @cache = split(/\sand\s/, $buffer); + $result[-1] .= shift(@cache); + push(@result, @cache); + # clear the buffer + $buffer = ''; + $level++; } } + else { + $level++ if $character eq '{'; + $level-- if $character eq '}'; + # because we do not split + # do not add to the buffer but into the last character + $result[-1] .= $character; } } + # split the buffer and put it into result + if ($buffer) { + @cache = split(/\sand\s/, $buffer); + $result[-1] .= shift(@cache); + push(@result, @cache); } + # and return the results + return @result; } + +# 'numNames' counts the number of names in a given string and implements num.names$ +# This corresponds to the number of times the word 'and' surrounded by spaces occors at brace level 0 +sub numNames { + return scalar(splitNames(@_)) || 0; } + +### +### Splitting a single name +### + +# 'splitNameParts' splits a name into the first, von, jr and last parts. +sub splitNameParts { + my ($string) = @_; + # split the name into words + my ($pre, $mid, $post) = splitNameWords($string); + my @prec = @$pre; + my @midc = @$mid; + my @postc = @$post; + # prepare all the parts + my @first = (); + my @von = (); + my @jr = (); + my @last = (); + # start by splitting off everything except for 'von Last' + # which we will both store in @von for now (and split below) + my $word; + my $gotlower = 0; + # Style (i): "First von Last" + if (scalar(@midc) == 0 && scalar(@postc) == 0) { + # if we only have upper case letters, they are all last names + while (defined($word = shift(@prec))) { + # if we encounter a lower-case, everything before that is first name + # and everything including and after it is "von Last" + if (getCase($word) eq 'l') { + $gotlower = 1; + @first = @von; + @von = ($word, @prec); + last; } + push(@von, $word); } + # if we did not get any lower-case words + # then the last word is the last name + # and the rest the first name. + unless ($gotlower) { + @first = @von; + @von = pop(@first); } + # we did not get any words in the 'von Last' part + # so that the last of the first name + if (scalar(@von) == 0) { + push(@von, pop(@first)); } } + # Style (ii): "von Last, First" + elsif (scalar(@postc) == 0) { + @von = @prec; + @first = @midc; } + # Style (iii): "von Last, Jr, First" + else { + @von = @prec; + @jr = @midc; + @first = @postc; } + my $haslast = 0; + # we now split the "von Last" part + while ($word = pop(@von)) { + # find the last small word and push it into last + if ($haslast && getCase($word) eq 'l') { + push(@von, $word); + last; } + # push all the big words from 'von' into 'last' + else { + unshift(@last, $word); + $haslast = 1; } } + # If the Last part follows the '-' character + # then that part belongs to the last part too + if (scalar(@von) == 0 && scalar(@last) > 0) { + while (scalar(@first) && substr($first[-1], -1, 1) eq '-') { + $last[0] = pop(@first) . $last[0]; } } + return [@first], [@von], [@jr], [@last]; } + +# 'splitNameWords' splits a single name into three lists: +# one before all commas, one after the first one, one after the second one +sub splitNameWords { + my ($string) = @_; + # HACK HACK HACK we want to support things without a comma + # for now we forcibly add a comma between them. + # TODO: Do this later on fo + $string =~ s/,(?!\s)/, /g; + my $level = 0; + my $buffer = ''; + my @result = (''); + my @cache; + my $character; + my @characters = split(//, $string); + + while (defined($character = shift(@characters))) { + if ($level == 0) { + $buffer .= $character; + if ($character eq '{') { + @cache = split(/[\s~-]+\K/, $buffer) + ; # use '\K' to split right *after* the match + $result[-1] .= shift(@cache); + push(@result, @cache); + # clear the buffer + $buffer = ''; + $level++; } } + else { + $level++ if $character eq '{'; + $level-- if $character eq '}'; + # because we do not split + # do not add to the buffer but into the last character + $result[-1] .= $character; } } + # split the buffer and put it into result + if ($buffer) { + @cache = split(/[\s~-]+\K/, $buffer) + ; # use '\K' to split right *after* the match + $result[-1] .= shift(@cache); + push(@result, @cache); } + my @precomma = (); + my @midcomma = (); + my @postcomma = (); + my $pastcomma = 0; + # iterate over our result array + # and pop into the three appropriate lists + my $seperator; + while (defined($buffer = shift(@result))) { + # split off everything except for the first seperator + $buffer =~ s/([\s~-])[\s~-]*$/$1/; + # we did not yet have a comma + # so push everything into the first array + # until we encounter a comma + if ($pastcomma == 0) { + if ($buffer =~ /,\s+$/) { + $buffer =~ s/,\s+$//; + push(@precomma, $buffer) if length($buffer) > 0; + $pastcomma++; } + else { + push(@precomma, $buffer); } } + # we had one comma + + elsif ($pastcomma == 1) { + if ($buffer =~ /,\s+$/) { + $buffer =~ s/,\s+$//; + push(@midcomma, $buffer) if length($buffer) > 0; + $pastcomma++; } + else { + push(@midcomma, $buffer); } } + # we had a third comma + else { + push(@postcomma, $buffer); } } + # and return the results + return [@precomma], [@midcomma], [@postcomma]; } + +### +### Formatting a name +### + +# 'abbrevName' abbreviates a name and return's only it's first letter +sub abbrevName { + my ($string) = @_; + my ($letters, $levels) = splitLetters($string); + # we return the first character which either + # - is an accent + # - contains an alphabetical character + foreach my $letter (@$letters) { + return $letter if isSpecial($letter); + # else, return the first letter of it + if ($letter =~ /[a-z]/i) { + ($letter) = ($letter =~ m/([a-z])/i); + return $letter; } } + # we got no letter at all + # not sure what to return here + return undef; } + +# 'formatNameSubpattern' formats a single name subpattern +sub formatNameSubpattern { + my ($tokens, $abbrevName, $sep, $pre, $post) = @_; + my $result = $pre; + # If no explicit seperator was provided, we need to insert the default one. + unless (defined($sep)) { + my ($seperator, $isDefaultSeperator, $index) = ('', '', 0, 0); + my $lastIndex = scalar(@$tokens) - 1; + # iterate through all the names and fetch the seperators from the tokens themselves + foreach my $part (@$tokens) { + # cleanup this part of the name and seperator + ($seperator) = ($part =~ m/([\s~-])$/); + $part =~ s/([\s~-]+)$//; + # abbreviate the current name if needed + $part = abbrevName($part) if $abbrevName; + # if we are at the last index, bail out + if ($index == $lastIndex) { + $result .= $part; + last; } + $part .= '.' if $abbrevName; + # if we have a seperator character (which is '~' or '-') we want to use that + if (defined($seperator) && ($seperator eq '~' || $seperator eq '-')) { + $part .= $seperator; } + elsif (($index == $lastIndex - 1) || ($index == 0 && textLength($pre . $part) <= 2)) { + $part .= '~'; } + else { + $part .= ' '; } + $result .= $part; + $index++; } } + else { + # a token + my @names = map { $_ =~ s/([\s~-]+)$//r; } @$tokens; + @names = map { abbrevName($_) } @names if $abbrevName; + $result .= join($sep, @names); } + # append all the letters that are to be inserted after the actual tokens + $result .= $post; +# handle a discretionary tilde: +# - if we have a single trailing ~, we remove it. +# - if we have two trailing ~s, we either replace it with a space (if the result is long enough), or we leave it untouched +# In some cases we get spaces even though we should have ~s and it is unclear as to why + if ($result =~ /~$/) { + $result =~ s/~$//; + unless ($result =~ /~$/) { + if (textLength($result) < 3) { + $result .= '~'; } + else { + $result .= ' '; } } } + return $result; } + +# 'formatName' formats a single name according to a BibTeX specification. +# Together with the functions above, it implements the format.name$ builtin. +sub formatName { + my ($name, $spec) = @_; + # split the specification and the name into parts + my @characters = split(//, $spec); + my ($first, $von, $jr, $last) = splitNameParts($name); + # declare a lot of variables + my ($character, $letter, $level, $partresult, $post, $result, $seperator, $short); + my (@tokens); + while (defined($character = shift(@characters))) { + if ($character eq '{') { + # iterate through the subpattern + $partresult = ''; + while ($character = shift(@characters)) { + # we finally hit the alphabetic character + if ($character =~ /[a-z]/i) { + # use the tokens for the current characters + if ($character eq 'f') { @tokens = @$first; } + elsif ($character eq 'v') { @tokens = @$von; } + elsif ($character eq 'j') { @tokens = @$jr; } + elsif ($character eq 'l') { @tokens = @$last; } + else { return undef, 'Invalid name part: ' . $character; } + # read the next part + $letter = $character; + $character = shift(@characters); + return undef, 'Unexpected end of pattern' + unless defined($character); + # if we have the letter repeated, it is a long pattern + if ($character =~ /[a-z]/i) { + return undef, + "Unexpected letter $character, $letter should be repeated. " + unless $character eq $letter; + $short = 0; + $character = shift(@characters); + return 'Unexpected end of pattern' + unless defined($character); } + # else if must be a short pattern. + else { + $short = 1; } + # if we have a '{', read the seperator + $seperator = undef; + if ($character eq '{') { + $level = 1; + $seperator = ''; + while (defined($character = shift(@characters))) { + $level++ if $character eq '{'; + $level-- if $character eq '}'; + last if $level == 0; + $seperator .= $character; } } + else { + unshift(@characters, $character); } + # read whatever comes next until we are balaned again + # until the closing '}' brace + $post = ''; + $level = 1; + while (defined($character = shift(@characters))) { + $level++ if $character eq '{'; + $level-- if $character eq '}'; + last if $level == 0; + $post .= $character; } + # now format the current part according to what we read. + unless (scalar(@tokens) == 0) { + $partresult = formatNameSubpattern([@tokens], $short, $seperator, $partresult, $post); } + else { + $partresult = ''; } + last; } + elsif ($character eq '}') { + # If we closed the part without having anything alphabetic then something weird is going on. + # Fallback to inserting literally + $partresult = '{' . $partresult . '}'; + last; } + else { + $partresult .= $character; } } + $result .= $partresult; } + else { + # at the outer brace level, we insert characters unconditionally + $result .= $character; + } } + return $result; } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Strings.pm b/lib/LaTeXML/BibTeX/Runtime/Strings.pm new file mode 100644 index 0000000000..ceb1586f74 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Strings.pm @@ -0,0 +1,584 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Strings | # +# | Runtime string functions | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (ValuesAndExpressions::ProhibitLeadingZeros Subroutines::ProhibitExcessComplexity); + +package LaTeXML::BibTeX::Runtime::Strings; +use strict; +use warnings; + +use base qw(Exporter); +our @EXPORT = qw( + &addPeriod + &splitLetters &splitSpecial &isSpecial + &changeCase &getCase + &textSubstring + &textLength + &textPrefix + &textWidth + &textPurify +); + +### +### Adding periods +### + +# takes a string and adds a ‘.’ to it +# if the last non-'}' character isn’t a ‘.’, ‘?’, or ‘!’, and pushes this resulting string. +# implements the add.period$ built-in +sub addPeriod { + my ($string) = @_; + # do not add a period if the string is empty + return "" if $string eq ""; + # find the last character that is not a '}' + my ($match) = ($string =~ m/(.)(?:\})*$/); + # and add a '.' if it's not punctiation + unless ($match && ($match eq '!' or $match eq '.' or $match eq '?')) { + return $string . '.'; } + else { + return $string; } } + +### +### Splitting text into characters +### + +# splits text into an array of semantic characters (i.e. including accents). +# includes a second array, stating the level of each characters +sub splitLetters { + my ($string) = @_; + # split the string into characters + my @characters = split(//, $string); + # current letter and brace level + my ($buffer, $hadLetter, $level) = ('', 0, 0); + my @letters = (''); + my @levels = (0); + my $char; + while (defined($char = shift(@characters))) { + if ($char eq '{') { + $level++; + if ($level == 1) { + # if the next character is a \, then we need to go into accent handling + # and read up until the end of the accent. + $char = shift(@characters); + if (defined($char) && $char eq '\\') { + $buffer = '{\\'; + # read characters until we are balanced again + while (defined($char = shift(@characters))) { + $buffer .= $char; + $level++ if $char eq '{'; + $level-- if $char eq '}'; + last if $level == 0; } + # push the collected 'accent' and go back into normal mode + shift(@letters) unless $hadLetter; + shift(@levels) unless $hadLetter; + push(@letters, $buffer); + push(@levels, isSpecial($buffer) ? 0 : 1); + $hadLetter = 1; + next; } + unshift(@characters, $char) if defined($char); + $char = '{'; } + # for nested opening braces + # add to the previous one + if ($hadLetter && substr($letters[-1], -1) eq '{') { + $letters[-1] .= '{'; + $levels[-1] = $level; } + else { + # create a new opening statement + shift(@letters) unless $hadLetter; + shift(@levels) unless $hadLetter; + push(@letters, $char); + push(@levels, $level); + $hadLetter = 1; } } + elsif ($char eq '}') { + # if we have a closing brace, just add it to the previous one + # and decrease the level (but never go negative) + $letters[-1] .= '}'; + $hadLetter = 1; + $level-- unless $level == 0; } + elsif ($hadLetter && substr($letters[-1], -1) eq '{') { + # if we had an opening brace, append to it + $letters[-1] .= $char; } + else { + # else push a normal character + shift(@letters) unless $hadLetter; + shift(@levels) unless $hadLetter; + push(@letters, $char); + push(@levels, $level); + $hadLetter = 1; } } + my @theletters = (); + my @thelevels = (); + my $letter; + while (defined($letter = shift(@letters))) { + $level = shift(@levels); + # if we have a letter that is only braces + if ($letter =~ /^[\{\}]*$/) { + # then try and prepend to the next letter + if (scalar(@letters) != 0) { + $letters[0] = $letter . $letters[0]; } + # or the last letter in the output + elsif (scalar(@theletters) != 0) { + $theletters[-1] .= $letter; } + # if we don't have anything, then only push the letter + # so that scalar(@levels) still indiciates the string length + else { + push(@theletters, $letter); } } + else { + push(@theletters, $letter); + push(@thelevels, $level); } } + return [@theletters], [@thelevels]; } + +# 'splitSpecial' splits a string starting with a potentially special character and returns a 4-tuple ($isSpecial, $head, $tail, $command) where +# - $isSpecial: 1 when the string starts with something that looks like a command sequence, 0 when not +# - $head: Part of the first letter of the string that is not affected by case-sensitivity. +# - $tail: Part of the first letter which are affected by case-sensitivity and remaining letters. +# - $command: Either 'undef' or one of the special known accented command sequences. +# For any input, $head . $tail will always equal $string. +sub splitSpecial { + my ($string) = @_; + # if we do not have an accent, don't parse it + return 0, '', $string, undef unless $string =~ /^[\{\}]*\{\\/; + # split into head + tail, but keep the command + my ($head, $tail, $command) = $string =~ m/^([\{\}]*\{\\)(([^\{\}\s]+).*)$/; + $command =~ s/[^a-zA-Z]//g; + unless ($command eq "i" || + $command eq "j" || + $command eq "oe" || + $command eq "OE" || + $command eq "ae" || + $command eq "AE" || + $command eq "aa" || + $command eq "AA" || + $command eq "o" || + $command eq "O" || + $command eq "l" || + $command eq "L" || + $command eq "ss" + ) { + # We do not have a known command sequence, hence it should not change case and be a part of the 'head' + ($command, $tail) = $tail =~ m/^([a-zA-Z]*[\s\{\}]*)(.*)$/; # split off leading command sequence + $head .= $command; + $command = undef; } + return 1, $head, $tail, $command; } + +# isSpecial checks if a character is 'special' according to the 'splitSpecial function'. +sub isSpecial { + my ($string) = @_; + return $string =~ /^[\{\}]*\{\\/; } + +### +### Changing case of a string +### + +# known accent control sequences +my %ACCENT_SEQUENCES = ( + 'i' => 1, + 'j' => 1, + 'oe' => 1, + 'OE' => 1, + 'ae' => 1, + 'AE' => 1, + 'aa' => 1, + 'AA' => 1, + 'o' => 1, + 'O' => 1, + 'l' => 1, + 'L' => 1, + 'ss' => 1 +); + +# Changes the case of $string according to $spec +# - if $spec is 't', then upper-case the first character and lower-case the rest +# - if $spec is 'u' then upper-case everything +# - if $spec is 'l' then lower-case everything +# This implements the change.case$ built-in. + +sub changeCase { + my ($string, $conversion_type) = @_; + # This code has been roughly adapted from 'bibtex.web', and in principle works as follows: + # 1. split() the string into a character array + # 2. Iterate through the array of characters + # 3. Convert the values in the array in place + # 4. join() the array back into a single string + # check that we have one of the three known conversion types + $conversion_type = lc $conversion_type; + return unless ( + $conversion_type eq 'l' || + $conversion_type eq 'u' || + $conversion_type eq 't'); + + my @chars = split(//, $string); # array of characters (ex_buf in the original source) + my $char_ptr = 0; # current index into the character array (ex_buf_ptr in the original source) + my $chars_xptr = 0; # beginning of control sequence (not always set, ex_buf_xptr in the original source) + my $chars_length = scalar(@chars); # number of chars (ex_buf_len in the original source) + my $brace_level = 0; # the current brace_level + my $prev_colon = 0; # are we following a colon (only relevant for 't' case) + # Iterate over the character array + while ($char_ptr < $chars_length) { + + if ($chars[$char_ptr] eq '{') { + $brace_level++; + # When opening a new brace the brace level increases and we need to consider accents and commands. + # This large if statement checks that all the conditions for an 'accent' or 'command' are fullfilled + if ( + ($brace_level != 1) || # only on level 1! + (($char_ptr + 4 > $chars_length) || ($chars[$char_ptr + 1] ne '\\')) || # we don't have anything that could be a command + # in title case, we need to be at the beginning of the string or following a colon + ( + ($conversion_type eq 't') && + (($char_ptr == 0) || (($prev_colon) && ($chars[$char_ptr - 1] =~ /\s/))) + ) + ) { + # In the original source this is handled with a goto ok_pascal_i_give_up. + # To be slightly cleaner we inline the code. + $prev_colon = 0; + $char_ptr++; + next; } + $char_ptr++; + # All the conditions are fullfilled, we can now convert the control sequence or special character. + while (($char_ptr < $chars_length) && ($brace_level > 0)) { + # the control sequence starts here, but we can skip the '\' + $char_ptr++; + $chars_xptr = $char_ptr; + # scan the title of the control sequence (with alphabetical characters) + my $ctrl_sequence = ''; + while (($char_ptr < $chars_length) && ($chars[$char_ptr] =~ /[a-zA-Z]/)) { + $ctrl_sequence .= $chars[$char_ptr]; + $char_ptr++; } + # If the control sequence is a special 'accented' control sequence + # convert the accented or foreign character + if (defined($ACCENT_SEQUENCES{$ctrl_sequence})) { + # 'l' || 't' => convert the upper accents to lower ones + # leave the rest of them alone. + unless ($conversion_type eq 'u') { + if ( + $ctrl_sequence eq 'L' || + $ctrl_sequence eq 'O' || + $ctrl_sequence eq 'OE' || + $ctrl_sequence eq 'AE' || + $ctrl_sequence eq 'AA') { + foreach my $i ($chars_xptr .. $char_ptr - 1) { + $chars[$i] = lc($chars[$i]); } } } + # 'u' + else { + # these sequences have an uppercase equivalent + if ( + $ctrl_sequence eq 'l' || + $ctrl_sequence eq 'o' || + $ctrl_sequence eq 'oe' || + $ctrl_sequence eq 'ae' || + $ctrl_sequence eq 'aa') { + foreach my $i ($chars_xptr .. $char_ptr - 1) { + $chars[$i] = uc($chars[$i]); } + # these sequences do not have an uppercase equivalent + # hence convert, then remove the control sequence + } elsif ( + $ctrl_sequence eq 'i' || + $ctrl_sequence eq 'j' || + $ctrl_sequence eq 'ss') { + # convert it to uppercase + foreach my $i ($chars_xptr .. $char_ptr - 1) { + $chars[$i] = uc($chars[$i]); } + # remove the '\\' + $chars[$chars_xptr - 1] = ''; + $chars_xptr = $char_ptr - 1; + # remove any trailing spaces + while (($char_ptr < $chars_length) && ($chars[$char_ptr] =~ /\s/)) { + $chars[$char_ptr] = ''; + $char_ptr++; } + # and reset $char_ptr + $char_ptr = $chars_xptr; } } } + $chars_xptr = $char_ptr; + # scan until the next control sequence + while (($char_ptr < $chars_length) && ($brace_level > 0) && ($chars[$char_ptr] ne '\\')) { + if ($chars[$char_ptr] eq '}') { + $brace_level--; } + elsif ($chars[$char_ptr] eq '{') { + $brace_level++; } + $char_ptr++; } + # and convert it + unless ($conversion_type eq 'u') { + foreach my $i ($chars_xptr .. $char_ptr - 1) { + $chars[$i] = lc($chars[$i]); } } + else { + foreach my $i ($chars_xptr .. $char_ptr - 1) { + $chars[$i] = uc($chars[$i]); } } } + # unskip the right closing '}' + $char_ptr--; + $prev_colon = 0; } + # whenever we have a closing brace, decrease the level + elsif ($chars[$char_ptr] eq '}') { + $brace_level-- unless $brace_level == 0; + $prev_colon = 0; } + elsif ($brace_level == 0) { + # Now convert a brace_level = 0 character + if ($conversion_type eq 't') { + # for 't', we need to convert to lowercase + # if we are either at the first character, or we are following a colon + whitespace + unless ( + ($char_ptr == 0) || + ($prev_colon) && ($chars[$char_ptr - 1] =~ /\s/) + ) { + $chars[$char_ptr] = lc($chars[$char_ptr]); } + # for the next iteration, we need to know if there was a ':'. + if ($chars[$char_ptr] eq ':') { + $prev_colon = 1 } + # reset the flag only if we didn't have any whitespace + elsif (!($chars[$char_ptr] =~ /\s/)) { + $prev_colon = 0; } } + elsif ($conversion_type eq 'l') { + $chars[$char_ptr] = lc($chars[$char_ptr]); } + elsif ($conversion_type eq 'u') { + $chars[$char_ptr] = uc($chars[$char_ptr]); } } + $char_ptr++; } + return join('', @chars); } + +# 'getCase' gets the case of word, that it returns either 'l' for lowercase or 'u' for uppercase. +# If no character exists, returns 'l'. +sub getCase { + my ($string, $isSpecial, $head, $tail, $command) = @_; + # keep working on the first letter of the string + while (length($string) > 0) { + # if we have a letter in the front, work with it + return 'u' if ($string =~ /^[A-Z]/); + return 'l' if ($string =~ /^[a-z]/); + # if we have a special character, ignore the head + ($isSpecial, $head, $tail, $command) = splitSpecial($string); + if ($isSpecial) { + $string = $tail; + next; } + # parse a non-accent braced string + ($head, $tail) = $string =~ m/^{([^}]+)\}(.*)$/; + if (defined($head) || defined($tail)) { + return 'u' if (defined($head) && $head =~ /[a-zA-Z]/); # if it has some letters, we have an uppercase character + $string = $tail; # else skip it + next; } + # ignore this character, it's not a letter + $string = substr($string, 1); } + # there were no letters at all + return 'l'; } + +### +### Text Length, Width and substring +### + +# 'textLength' counts the text-length of a string and implements text.length$ +sub textLength { + # This code is a somewhat optimized inline version of: + # my ( $letters, $levels ) = splitLetters(@_); + # return scalar(@$levels); + # It saves on a second loop iteration and some parsing that is only needed for 'levels' + # split the string into characters + my ($string) = @_; + my @characters = split(//, $string); + # current letter and brace level + my ($buffer, $hadLetter, $level) = ('', 0, 0); + my @letters = (''); + my @levels = (0); + my $char; + while (defined($char = shift(@characters))) { + if ($char eq '{') { + $level++; + if ($level == 1) { + # if the next character is a \, then we need to go into accent handling + # and read up until the end of the accent. + $char = shift(@characters); + if (defined($char) && $char eq '\\') { + $buffer = '{\\'; + # read characters until we are balanced again + while (defined($char = shift(@characters))) { + $buffer .= $char; + $level++ if $char eq '{'; + $level-- if $char eq '}'; + last if $level == 0; } + # push the collected accent and go back into normal mode + shift(@letters) unless $hadLetter; + push(@letters, $buffer); + $hadLetter = 1; + next; } + unshift(@characters, $char) if defined($char); + $char = '{'; } + # for nested opening braces + # add to the previous one + if ($hadLetter && substr($letters[-1], -1) eq '{') { + $letters[-1] .= '{'; + $levels[-1] = $level; } + else { + # create a new opening statement + shift(@letters) unless $hadLetter; + push(@letters, $char); + $hadLetter = 1; } } + elsif ($char eq '}') { + # if we have a closing brace, just add it to the previous one + # and decrease the level (but never go negative) + $letters[-1] .= '}'; + $hadLetter = 1; + $level-- unless $level == 0; } + elsif ($hadLetter && substr($letters[-1], -1) eq '{') { + # if we had an opening brace, append to it + $letters[-1] .= $char; } + else { + # else push a normal character + shift(@letters) unless $hadLetter; + push(@letters, $char); + $hadLetter = 1; } } + # iterate and skip over non-leading brace-only letters + my $letter; + my $count = 0; + while (defined($letter = shift(@letters))) { + $count++ + unless ($letter =~ /^[\{\}]*$/ + && ($count == 0 || scalar(@letters) == 0)); } + return $count; } + +# returns the prefix of length $length of a string +# implements text.prefix$ +sub textPrefix { + my ($string, $length) = @_; + my ($letters, $levels) = splitLetters($string); + # read a prefix of the string + my $index = 0; + my $result = ''; + foreach my $letter (@$letters) { + $result .= $letter; + $index++; + last if $index eq $length; } + # balance brackets magically + my $level = () = ($result =~ /{/g); + $level -= () = ($result =~ /}/g); + $result .= ('}' x $level) if ($level >= 0); + return $result; } + +# table adpoted from +# https://metacpan.org/source/NODINE/Text-BibTeX-BibStyle-0.03/lib/Text/BibTeX/BibStyle.pm +# contains widths of accents and basic characters +our %WIDTHS = ( + 32 => 278, 33 => 278, 34 => 500, 35 => 833, 36 => 500, 37 => 833, 38 => 778, 39 => 278, + 40 => 389, 41 => 389, 42 => 500, 43 => 778, 44 => 278, 45 => 333, 46 => 278, 47 => 500, + 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, + 56 => 500, 57 => 500, 58 => 278, 59 => 278, 60 => 278, 61 => 778, 62 => 472, 63 => 472, + 64 => 778, + + # A-Z + 65 => 750, 66 => 708, 67 => 722, 68 => 764, 69 => 681, 70 => 653, 71 => 785, 72 => 750, + 73 => 361, 74 => 514, 75 => 778, 76 => 625, 77 => 917, 78 => 750, 79 => 778, 80 => 681, + 81 => 778, 82 => 736, 83 => 556, 84 => 722, 85 => 750, 86 => 750, 87 => 1028, 88 => 750, + 89 => 750, 90 => 611, 91 => 278, 92 => 500, 93 => 278, 94 => 500, 95 => 278, 96 => 278, + + # a-z + 97 => 500, 98 => 556, 99 => 444, 100 => 556, 101 => 444, 102 => 306, 103 => 500, 104 => 556, + 105 => 278, 106 => 306, 107 => 528, 108 => 278, 109 => 833, 110 => 556, 111 => 500, + 112 => 556, 113 => 528, 114 => 392, 115 => 394, 116 => 389, 117 => 556, 118 => 528, + 119 => 722, 120 => 528, 121 => 528, 122 => 444, 123 => 500, 124 => 1000, 125 => 500, + 126 => 500, + + aa => 500, AA => 750, o => 500, O => 778, l => 278, L => 625, ss => 500, + ae => 722, oe => 778, AE => 903, OE => 1014, '?`' => 472, '!`' => 278, +); + +# compute the width of text in hundredths of a point, as specified by the June 1987 version of the cmr10 font +# implements width$ +sub textWidth { + my ($string) = @_; + my ($letters, $levels) = splitLetters($string); + # iterate over each of the letters + my $width = 0; + my @characters; + my ( + $isSpecial, $head, $tail, $command, $level + ); + foreach my $letter (@$letters) { + $level = shift(@$levels); + # on level 0 we want to check for special characters + if (defined($level) && $level == 0) { + ( + $isSpecial, $head, $tail, $command + ) = splitSpecial($letter); + if (defined($command)) { + $width += $WIDTHS{$command} || 0; + next; } + if ($isSpecial) { + $tail =~ s/\}$//; + $letter = $tail; } } + # for all other cases, we add up the width of each character in the letters + $width += ($WIDTHS{ ord $_ } || 500) foreach (split(//, $letter)); } + return $width; } + +# returns the prefix of length $length of a string +# implements substring$ +sub textSubstring { + my ($string, $start, $length) = @_; + # if we have a non-negative start, the indexes are straightforward + return substrSafe($string, $start - 1, $length) if $start > 0; + # else we have a substring of length ending at index $start + $start = length($string) + $start - $length + 1; + if ($start < 0) { + $length += $start; + $start = 0; } + return substrSafe($string, $start, $length); } + +# a variant of substr which returns "" when start overruns the string +sub substrSafe { + my ($string, $start, $length) = @_; + return "" if $start >= length($string); + return substr($string, $start, $length); } + +### +### Purification +### + +# purifies text to be used for sorting +# implements purify$ +sub textPurify { + my ($string) = @_; + my ($letters, $levels) = splitLetters($string); + # iterate over each of the letters + my $purified = ''; + my @characters; + my ( + $isSpecial, $head, $tail, $command, $level + ); + foreach my $letter (@$letters) { + $level = shift(@$levels); + # on level 0, check for accents + if (defined($level) && $level == 0) { + # parse the accent + ( + $isSpecial, $head, $tail, $command + ) = splitSpecial($letter); + # if we have one of the known command, transfer those into the appropriate ones + if (defined($command)) { + # if it is one of the special commands, use their complete commands + if ( + $command eq 'oe' || + $command eq 'OE' || + $command eq 'ae' || + $command eq 'AE' || + $command eq 'ss' + ) { + $purified .= $command; } + # else just use the first one + else { + $purified .= substr($command, 0, 1); } } + # if we had a command, but it was not one of the ones we knew + # then just reproduce the argument + elsif ($isSpecial) { + $tail =~ s/[^a-zA-Z0-9 ]//g; # side-effect: lowercase everything + $purified .= $tail; } + # else replace as if we were on level 1 + else { + $letter =~ s/[\s\-~]/ /g; + $letter =~ s/[^a-zA-Z0-9 ]//g; + $purified .= $letter; } } + # on level 1+, we replace all the - and ~s with spaces, and apart from those keep only spaces + else { + $letter =~ s/[\s\-~]/ /g; + $letter =~ s/[^a-zA-Z0-9 ]//g; + $purified .= $letter; } } + return $purified; } + +1; diff --git a/lib/LaTeXML/BibTeX/Runtime/Utils.pm b/lib/LaTeXML/BibTeX/Runtime/Utils.pm new file mode 100644 index 0000000000..5c25115d64 --- /dev/null +++ b/lib/LaTeXML/BibTeX/Runtime/Utils.pm @@ -0,0 +1,140 @@ +# /=====================================================================\ # +# | LaTeXML::BibTeX::Runtime::Utils | # +# | Various runtime utility functions | # +# |=====================================================================| # +# | Part of LaTeXML | # +# |---------------------------------------------------------------------| # +# | Tom Wiesing | # +# \=====================================================================/ # +## no critic (Subroutines::ProhibitExplicitReturnUndef); + +package LaTeXML::BibTeX::Runtime::Utils; +use strict; +use warnings; +use LaTeXML::Common::Error; + +use base qw(Exporter); +our @EXPORT = qw( + &concatString &simplifyString &applyPatch + &fmtType + &showStrings +); + +sub showStrings { + my ($strings, $sources) = @_; + my @str = @$strings; + my @src = @$sources; + return join(', ', + map { "'" . $str[$_] . "'[" . (defined $src[$_] ? $src[$_] : '') . ']'; } + 0 .. $#str); } + +# given two runtime strings, join them and their sources together +# and return a new runtime string +sub concatString { + my ($stringA, $sourceA, $stringB, $sourceB) = @_; + # join the strings and sources + # Note: Both strings and sources are copies at this point and can be treated as mutable. + my @strings = (@$stringA, @$stringB); + # Make sure we have sources (even undefined ones) corresponding to EACH string entry + my @sources = ((defined $sourceA ? @$sourceA : map { (undef); } @$stringA), + (defined $sourceB ? @$sourceB : map { (undef); } @$stringB)); + ##Debug("CONCAT : " . showStrings([@strings],[@sources])); + my @newstrings = (); + my @newsources = (); + my ($string, $source); + while (defined($string = shift(@strings))) { + $source = shift(@sources); + next if $string eq ''; + # else push it + # if no previous entry, just push this one + if (!scalar(@newstrings)) { # No previous entry, so just push this one + push(@newstrings, $string); + push(@newsources, $source); } + # if previous is same source (even undef), join them + elsif ((defined $source + ? (defined $newsources[-1]) && ($source eq $newsources[-1]) + : !defined $newsources[-1])) { + $newstrings[-1] .= $string; } # Just combine them + # if previous source is undef, but before that source is same (defined) source, join all three + elsif ((defined $source) && (!defined $newsources[-1]) + && (defined $newsources[-2]) && ($source eq $newsources[-2])) { + $newstrings[-2] .= $newstrings[-1] . $string; + pop(@newstrings); pop(@newsources); } + # Else, append new entry + else { + push(@newstrings, $string); + push(@newsources, $source); } } + # and return the strings and sources + ## Debug(" ===> " . showStrings([@newstrings],[@newsources])); + return [@newstrings], [@newsources]; } + +# given a runtime string turn it into a single string and useful source +# NOTE: This LOSES sources! +sub simplifyString { + my ($string, $sources) = @_; + # return the first 'defined' source + # i.e. one that comes from a source file. + my ($source); + foreach my $asource (@$sources) { + $source = $asource; +### last if defined($source); } + last if defined($source) && ($source ne ''); } + return join('', @$string), $source; } + +# Given a runtime string (which might be a complex object +# consisting of several parts) and it's corresponding source +# references, apply a plain-text patch() function to it. +# applyPatch attempts to maintain reasonable source references +# where possible. It does this based on the 'semantics' parameter. +# - 'inplace': +# When the length of the patched string is longer than +# the length of the original string, split the resulting +# string in two parts, one with the original source +# reference, and one ontouched. +# - any other value: +# Return the patched string as having the source reference of the +# old string. +# Whenever the patch() function returns the original string, the original source +# references are maintained in their entirety. +# NOTE: This LOSES sources! +sub applyPatch { + my ($oldString, $oldSource, $patch, $semantics) = @_; + # simplify the old string + my ($theOldString, $theOldSource) = + simplifyString($oldString, $oldSource); + # apply the patch + my $theNewString = &{$patch}($theOldString); + # if nothing changed, return as is + if ($theOldString eq $theNewString) { + return $oldString, $oldSource; } + # when we the semantics state 'inplace' + elsif ($semantics eq 'inplace' + && length($oldString) != 0 + && length($theNewString) > length($theOldString)) + { + $theOldString = substr($theNewString, 0, length($theOldString)); + $theNewString = substr($theNewString, length($theOldString)); + # we had an in-place e + return [$theOldString, $theNewString], [$theOldSource, undef]; } + # else, return only the simplified source + else { + return [$theNewString], [$theOldSource]; } } + +sub fmtType { + my ($type, $value) = @_; + if ($type eq 'UNSET' or $type eq 'MISSING') { + return "($type)"; } + elsif ($type eq 'STRING') { + return "($type) " . join('', @$value); } + elsif ($type eq 'INTEGER') { + return "($type) $value"; } + elsif ($type eq 'FUNCTION') { + return "($type) "; } + elsif ($type eq 'REFERENCE') { + my ($rtype, $rname) = @$value; + return "($type) ($rtype) $rname"; } + else { + return '(unknown)'; } +} + +1; diff --git a/lib/LaTeXML/Common/Locator.pm b/lib/LaTeXML/Common/Locator.pm index 99ce4cfd95..a048cbd4a7 100644 --- a/lib/LaTeXML/Common/Locator.pm +++ b/lib/LaTeXML/Common/Locator.pm @@ -22,18 +22,18 @@ sub new { my ($class, $source, $fromLine, $fromCol, $toLine, $toCol) = @_; my $locator = bless { source => $source, fromLine => $fromLine, fromCol => $fromCol, - toLine => $toLine, toCol => $toCol + toLine => (defined $toLine ? $toLine : $fromLine), + toCol => (defined $toCol ? $toCol : $fromCol) }, $class; return $locator; } -# creates a new locator range from a given start and end -sub newRange { - my ($class, $from, $to) = @_; +# creates a new locator covering $self (from) to $to +sub merge { + my ($self, $to) = @_; # make sure that either parameters are defined - return $to unless defined($from); - return $from unless defined($to); + return $self unless defined($to); # bail if we have differnt sources - return unless ($$from{source} || '') eq ($$to{source} || ''); + return unless ($$self{source} || '') eq ($$to{source} || ''); # the end coordinates depend on my ($toLine, $toCol); if ($to->isRange) { @@ -42,7 +42,7 @@ sub newRange { else { $toLine = $$to{fromLine}; $toCol = $$to{fromCol}; } - return new($class, $$from{source}, $$from{fromLine}, $$from{fromCol}, $toLine, $toCol); } + return (ref $self)->new($$self{source}, $$self{fromLine}, $$self{fromCol}, $toLine, $toCol); } sub isRange { my ($self) = @_; diff --git a/lib/LaTeXML/Core/Whatsit.pm b/lib/LaTeXML/Core/Whatsit.pm index ccfd99b4d2..cb93cb1ddf 100644 --- a/lib/LaTeXML/Core/Whatsit.pm +++ b/lib/LaTeXML/Core/Whatsit.pm @@ -81,7 +81,8 @@ sub setBody { $$self{properties}{trailer} = $trailer; # And copy any otherwise undefined properties from the trailer if ($trailer) { - $$self{properties}{locator} = LaTeXML::Common::Locator->newRange($self->getLocator, $trailer->getLocator); +### $$self{properties}{locator} = LaTeXML::Common::Locator->newRange($self->getLocator, $trailer->getLocator); + $$self{properties}{locator} = ($self->getLocator ? $self->getLocator->merge($trailer->getLocator) : $trailer->getLocator); my %trailerhash = $trailer->getProperties; foreach my $prop (keys %trailerhash) { $$self{properties}{$prop} = $trailer->getProperty($prop) unless defined $$self{properties}{$prop}; } } diff --git a/lib/LaTeXML/Package/IEEEtran.cls.ltxml b/lib/LaTeXML/Package/IEEEtran.cls.ltxml index c9dfe1e40f..96ded54c48 100644 --- a/lib/LaTeXML/Package/IEEEtran.cls.ltxml +++ b/lib/LaTeXML/Package/IEEEtran.cls.ltxml @@ -329,7 +329,7 @@ DefMacro('\IEEEeqnarraynumspace', ''); Let(T_CS('\appendices'), T_CS('\appendix')); -$$LaTeXML::Package::Pool::BIBSTYLES{IEEEtran} = { citestyle => 'numbers', sort => 'true' }; +$$LaTeXML::Package::Pool::BIBSTYLES{IEEEtran} = { citestyle => 'numbers'}; DefMacro('\IEEEsetlabelwidth{}', '\settowidth{\labelwidth}{#1}'); DefMacro('\IEEEusemathlabelsep', ''); diff --git a/lib/LaTeXML/Package/LaTeX.pool.ltxml b/lib/LaTeXML/Package/LaTeX.pool.ltxml index d74cd7e8a1..321eaf82fc 100644 --- a/lib/LaTeXML/Package/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Package/LaTeX.pool.ltxml @@ -3983,22 +3983,21 @@ DefConstructor('\lx@bibliography [] Semiverbatim', # NOTE: This totally needs to be made extensible (parsing *.bst!?!? OMG!) our $BIBSTYLES = { - plain => { citestyle => 'numbers', sort => 'true' }, - unsrt => { citestyle => 'numbers', sort => 'false' }, - alpha => { citestyle => 'AY', sort => 'true' }, - abbrv => { citestyle => 'numbers', sort => 'true' }, - plainnat => { citestyle => 'numbers', sort => 'true' }, - unsrtnat => { citestyle => 'numbers', sort => 'false' }, - alphanat => { citestyle => 'AY', sort => 'true' }, - abbrvnat => { citestyle => 'numbers', sort => 'true' } }; + plain => { citestyle => 'numbers'}, + unsrt => { citestyle => 'numbers' }, + alpha => { citestyle => 'AY' }, + abbrv => { citestyle => 'numbers' }, + plainnat => { citestyle => 'numbers' }, + unsrtnat => { citestyle => 'numbers' }, + alphanat => { citestyle => 'AY' }, + abbrvnat => { citestyle => 'numbers' } }; sub setBibstyle { my ($style) = @_; $style = ToString($style); AssignValue(BIBSTYLE => $style); if (my $parms = $$BIBSTYLES{$style}) { - AssignValue(CITE_STYLE => $$parms{citestyle}); - AssignValue(CITE_SORT => $$parms{sort}); } + AssignValue(CITE_STYLE => $$parms{citestyle}); } return; } DefConstructor('\bibstyle{}', sub { @@ -4007,16 +4006,13 @@ DefConstructor('\bibstyle{}', sub { # Really ? if (my $bib = $document->findnode('//ltx:bibliography')) { $document->setAttribute($bib, bibstyle => LookupValue('BIBSTYLE')); - $document->setAttribute($bib, citestyle => LookupValue('CITE_STYLE')); - $document->setAttribute($bib, sort => LookupValue('CITE_SORT')); } + $document->setAttribute($bib, citestyle => LookupValue('CITE_STYLE')); } }, afterDigest => sub { my $style = ToString($_[1]->getArg(1)); AssignValue(BIBSTYLE => $style, 'global'); if (my $parms = $$BIBSTYLES{$style}) { AssignValue(CITE_STYLE => $$parms{citestyle}); } - else { - Info('unexpected', $style, $_[0], "Unknown bibstyle '$style', it will be ignored"); } return; }); DefMacro('\bibliographystyle Semiverbatim', '\bibstyle{#1}'); @@ -4105,7 +4101,21 @@ DefMacroI('\fnum@@bibitem', undef, '{\@biblabel{\the@bibitem}}'); # Hack for abused bibliographies; see below DefMacro('\bibitem', '\if@lx@inbibliography\else\expandafter\lx@mung@bibliography\expandafter{\@currenvir}\fi' - . '\lx@bibitem', locked => 1); +#### . '\lx@bibitem', locked => 1); + . '\lx@bibitem@withhook', locked => 1); + +DefMacro('\lx@bibitem@taghook Semiverbatim', '\expandafter\ifdefined\csname lx@bibentry@taghook@#1\endcsname\csname lx@bibentry@taghook@#1\endcsname\else\fi'); +# add the hooked tag content into the lx@tags +# NOTE: This needs a complete rethink. +DefMacro('\lx@bibitem@withhook[] Semiverbatim', + '\let\save@lx@tags\lx@tags' . + '\def\lx@tags ##1{\save@lx@tags{##1\lx@bibitem@taghook{#2}}}' . + '\ifx.#1.\lx@bibitem{#2}\else\lx@bibitem[#1]{#2}\fi' . + '\let\lx@tags\save@lx@tags' . + '\let\save@lx@tags\relax'); +DefConstructor('\lxBibitemFrom Semiverbatim Semiverbatim Semiverbatim {}', + "#4"); + DefConstructor('\lx@bibitem[] Semiverbatim', "" . "#tags" @@ -4114,8 +4124,10 @@ DefConstructor('\lx@bibitem[] Semiverbatim', my $tag = $_[1]->getArg(1); my $key = CleanBibKey($_[1]->getArg(2)); if ($tag) { + Debug("BIBITEM Tags: " . ToString($tag)); $_[1]->setProperties(key => $key, - RefStepID('@bibitem'), + #RefStepID('@bibitem'), + RefStepCounter('@bibitem'), # but replace tags! tags => Digest(T_BEGIN, T_CS('\def'), T_CS('\the@bibitem'), T_BEGIN, Revert($tag), T_END, Invocation(T_CS('\lx@make@tags'), T_OTHER('@bibitem')), diff --git a/lib/LaTeXML/Package/natbib.sty.ltxml b/lib/LaTeXML/Package/natbib.sty.ltxml index 79c4975213..6032d78a57 100644 --- a/lib/LaTeXML/Package/natbib.sty.ltxml +++ b/lib/LaTeXML/Package/natbib.sty.ltxml @@ -87,10 +87,10 @@ setCitationStyle(round => 1, semicolon => 1); # [FUTURE: Maybe this can be encoded as attributes on ltx:bibliography ??? # But then we need to put in stub elements to be filled in by CrossRef! Ugh] # In any case, \bibstyle may get redefined by options, so define it now! +Let('\lx@orig@bibstyle','\bibstyle'); DefMacro('\bibstyle{}', sub { my $style = T_CS('\bibstyle@' . ToString($_[1])); - (LookupDefinition($style) ? ($style) : (T_CS('\relax'))); }); -#AtBeginDocument{\global\let\bibstyle=\@gobble} + (LookupDefinition($style) ? ($style) : Invocation(T_CS('\lx@orig@bibstyle'),$_[1])); }); Let('\@citestyle', '\bibstyle'); # # They _say_ round & semicolon but ... @@ -485,14 +485,15 @@ DefMacro('\shortcites Semiverbatim', ''); # \bibitem[\protect\citename{Jones et al., }1990]{key}... # \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}... -DefMacro('\bibitem', -## '\reset@natbib@cites\refstepcounter{@bibitem}\@ifnextchar[{\@lbibitem}{\@lbibitem[\the@bibitem]}', +DefMacro('\lx@bibitem', '\reset@natbib@cites\refstepcounter{@bibitem}\@ifnextchar[{\@lbibitem}{\@lbibitem[]}', locked => 1); RawTeX(<<'EOTeX'); %%% +%%%\let\natbib@citeauthoryear\citeauthoryear \def\citeauthoryear#1#2#3(@)(@)\@nil#4{% +%%%\def\ltx@citeauthoryear#1#2#3(@)(@)\@nil#4{% \if\relax#3\relax \NAT@wrout{\the@bibitem}{#2}{#1}{}{#4}\else \NAT@wrout{\the@bibitem}{#3}{#2}{#1}{#4}\fi} diff --git a/lib/LaTeXML/Post/MakeBibliography.pm b/lib/LaTeXML/Post/MakeBibliography.pm index d2f37f1efc..7967db8cc9 100644 --- a/lib/LaTeXML/Post/MakeBibliography.pm +++ b/lib/LaTeXML/Post/MakeBibliography.pm @@ -14,803 +14,237 @@ use strict; use warnings; use LaTeXML::Util::Pathname; use LaTeXML::Common::XML; -use LaTeXML::Util::Radix; -use charnames qw(:full); +use LaTeXML::Common::Error; +use LaTeXML::Common::Locator; +use LaTeXML::BibTeX; +#use charnames qw(:full); use LaTeXML::Post; use base qw(LaTeXML::Post::Collector); -# These are really constant, but set at bottom, for readability -our %FMT_SPEC; # CONSTANT -our @META_BLOCK; # CONSTANT +our $BIBTEX_RERUN_DEFAULT = 5; # Options: -# bibliographies : list of xml file names containing bibliographies (from bibtex) +# bibliographies : list of bib files containing bibliographies (from bibtex, may be omitted) # split : whether the split into separate pages by initial. -# NOTE: -# Ultimately needs to respond to the desired bibligraphic style -# Currently set up primarily for author-year -# What about numerical citations? (how would we split the bib?) -# But we should presumably encode a number anyway... +# reruns : maximum number of re-runs of BibTeX if we get additional \cite{}s, negative value means run forever. sub new { my ($class, %options) = @_; my $self = $class->SUPER::new(%options); $$self{split} = $options{split}; $$self{bibliographies} = $options{bibliographies}; + $$self{reruns} = $options{reruns} || $BIBTEX_RERUN_DEFAULT; return $self; } +# toProcess: Find which items should be processed sub toProcess { my ($self, $doc) = @_; return $doc->findnodes('//ltx:bibliography'); } -# Whooo, this is turning into a confusing API! -# Potentially multiple biblist's in each document -# potentially each one split into multiple output documents (need that option!) -# Inevitably duplicates? +# process: populate 'ltx:bibliography' with the right kind of bibitems +# This is the main entry point for the MakeBibliography postprocessor. +# NOTE: Weirdly carrying around $config to get $context to get $entry's to get sortkey! +# (for use by split!!!) sub process { my ($self, $doc, @bibs) = @_; my @docs = ($doc); + my ($lstdoc, $lst, $config); foreach my $bib (@bibs) { - next if $doc->findnodes('.//ltx:bibitem', $bib); # Already populated? - - local $LaTeXML::Post::MakeBibliography::NUMBER = 0; - local %LaTeXML::Post::MakeBibliography::STYLE = - (map { ($_ => $bib->getAttribute($_)) } qw(bibstyle citestyle sort)); - - my $entries = $self->getBibEntries($doc, $bib); - # Remove any bibentry's (these should have been converted to bibitems) - $doc->removeNodes($doc->findnodes('//ltx:bibentry')); + next if $doc->findnodes('.//ltx:bibitem', $bib); # already populated + ($lstdoc, $lst, $config) = $self->getBibliographyList($doc, $bib); + next unless defined($lst); + # remove any previous biblist's from the document foreach my $biblist ($doc->findnodes('//ltx:biblist')) { $doc->removeNodes($biblist) unless element_nodes($biblist); } - + # insert either a single or a split bibliography if ($$self{split}) { - # Separate by initial. - my $split = {}; - foreach my $sortkey (keys %$entries) { - my $entry = $$entries{$sortkey}; - $$split{ $$entry{initial} }{$sortkey} = $entry; } - @docs = map { $self->rescan($_) } - $self->makeSubCollectionDocuments($doc, $bib, - map { ($_ => $self->makeBibliographyList($doc, $bib, $_, $$split{$_})) } - sort keys %$split); } + @docs = $self->insertSplit($doc, $bib, $lstdoc, $lst, $config); } else { - $doc->addNodes($bib, $self->makeBibliographyList($doc, $bib, undef, $entries)); - # @docs = ($self->rescan($doc)); } - $self->rescan($doc); } } + $self->insertSingle($doc, $bib, $lstdoc, $lst, $config); } } return @docs; } -# Try to preserve the original form & case of the provided Bibkeys -# HOWEVER, we downcase them before indexing & looking them up!!!! -sub normalizeBibKey { - my ($key) = @_; - return lc($key); } +# inserts a single into a document +# this is rather straightforward and doesn't really do anything special +sub insertSingle { + my ($self, $doc, $bib, $lstdoc, $lst, $config) = @_; + $doc->addNodes($bib, $lst); + $self->rescan($doc); + return; } -# ================================================================================ -# Bibliographies can be specified either -# within the document (on ltx:bibliography, due to \bibliography{foo} -# or from an option to new (typically coming from the command line). -# The commandline option OVERRIDES the internal list (to avoid clashes) -# -# In the fist case, we only get names, (no bib extension, let alone xml) -# but we should look for pre-compiled versions of the bib anyway (pref foo.bib.xml) -# In the latter case, we may get pathnames OR literals OR even an XML Document object. -# literals can be bibtex (but eventually serialized XML should be allowed). -# When XML is supplied, it is assumed to contain an ltx:bibliography containing a ltx:biblist. -# In principle, we ought to look for named files also in the file cache (eg. from {filecontents}) -sub getBibliographies { - my ($self, $doc) = @_; - my @bibnames = (); - my $fromBibliography = 0; # coming from an 'ltx:bibliography' - # use the commandline bibliographies, if explicitly given. - if ($$self{bibliographies} && scalar(@{ $$self{bibliographies} })) { - @bibnames = @{ $$self{bibliographies} }; } - else { - my $bibnode = $doc->findnode('//ltx:bibliography'); - if (my $files = $bibnode->getAttribute('files') - || $bibnode->parentNode->getAttribute('files') # !!!!! - ) { - $fromBibliography = 1; - @bibnames = split(',', $files); } } - my @paths = $doc->getSearchPaths; - my @bibs = (); - my @rawbibs = (); - # Collect the "ready" bibliographies, while accumulating all raw sources for a single conversion pass - foreach my $bib (@bibnames) { - my $ref = ref $bib; - my $bibdoc; - if ($ref && ($ref eq 'LaTeXML::Post::Document')) { # It's already a Post::Document (somehow). - $bibdoc = $bib; } - elsif ($ref && ($ref eq 'XML::LibXML::Document')) { # Or it's a raw XML document? - $bibdoc = $doc->new($bib, sourceDirectory => '.'); } - elsif ($ref) { - Error('unexpected', $ref, $self, - "Don't know how to convert '$bib' (a '$ref') into a Bibliography document"); } - elsif (pathname_is_literaldata($bib)) { - push(@rawbibs, $bib); +# split and create document for a single from a single original document +# This roughly works as follows: +# - find all s in the biblist +# - find the corresponding original .bib entries (from the context of the bbl run) +# - split them into groups based on initial of the first author +# - create a new for each group, with appropriate ids +sub insertSplit { + my ($self, $doc, $bib, $lstdoc, $lst, $config) = @_; + my $context = $config->getContext; + my $split = {}; + foreach my $node (@{ $lstdoc->findnodes('.//ltx:bibitem', $lst) }) { + # find the key of this entry + my $key = $node->getAttribute('key'); + unless (defined($key)) { + Error('bibtex', $self, undef, "Found an without a key. Can't insert it anywhere. "); next; } - elsif ($bib =~ /\.xml$/) { - $bibdoc = $doc->newFromFile($bib); } # doc will do the searching... - elsif ($bib =~ /\.bib(?:\.xml)?$/ || $fromBibliography) { - # NOTE: We should also use kpsewhich to get the effects of $BIBINPUTS? - # NOTE: When better integrated with Core, should also check for cached bib documents. - my $xmlbib = $bib; - $xmlbib .= '.bib' if $fromBibliography && !($xmlbib =~ /\.bib$/); - if (my $xmlpath = pathname_find($xmlbib, paths => [@paths], types => ['xml'])) { - $bibdoc = $doc->newFromFile($xmlpath); } # doc will do the searching... - elsif (my $bibpath = pathname_find($bib, paths => [@paths], types => ['bib']) - || pathname_kpsewhich($bib)) { - push(@rawbibs, $bibpath); - next; } - else { - Error('missing_file', $bib, $self, - "Couldn't find Bibliography '$bib'", - "Searchpaths were " . join(',', @paths)); } } - if ($bibdoc) { - push(@bibs, $bibdoc); } - else { - Info('expected', $bib, $self, - "Couldn't find usable Bibliography for '$bib'"); } } - # Lastly, If we found any raw .bib files/literaldata, convert and include them. - if (@rawbibs) { - my $raw; - if (scalar(@rawbibs) == 1) { # Single, just convert as-is - $raw = $rawbibs[0]; } - else { - $raw = 'literal:'; - for my $rawbib (@rawbibs) { # Multiple, arrange into a single conversion payload - if ($rawbib =~ s/literal\://) { - $raw .= $rawbib; } - else { - # TODO: Is this a memory concern for large bib files? - if (open(my $bibfh, '<', $rawbib)) { - $raw .= join("", <$bibfh>); - close $bibfh; } - else { - Info("open", $rawbib, $self, "Couldn't open file $rawbib"); } } - $raw .= "%\n"; } } - my $bibdoc = $self->convertBibliography($doc, $raw); - push(@bibs, $bibdoc) if $bibdoc; } - NoteLog("MakeBibliography: using bibliographies " - . join(',', map { (length($_) > 100 ? substr($_, 100) . '...' : $_) } @bibnames) - . "]"); - return @bibs; } - -# This converts a bib into the corresponding XML -# $bib is either a filename or literal -# Note that for multiple bibliographies it is inefficient to prepare a session for EACH. -# However, we really shouldn't be preparing a new session anyway: -# In general, it should use the same STATE information as the main document, -# so IF that state is still around, we should use it! -# That's for future enhancement!!! -# Also, it probably doesn't make sense for the session to capture the log output; -# it should just continue dribbling out whereever it usually would go. -sub convertBibliography { + # find the entry itself + my $entry = $context->findEntry($key); + unless (defined($entry)) { + Error('bibtex', $self, undef, "Found an without an underlying entry. Is the bbl broken? "); + next; } + # put this entry into the right split + my $initial = $self->getSortKey($entry) || '_'; + $$split{$initial} = [] unless defined($$split{$initial}); + push(@{ $$split{$initial} }, $node); } + # and make new biblists for all the entries + return map { $self->rescan($_) } + $self->makeSubCollectionDocuments($doc, $bib, + map { ($_ => $self->makeSplitBiblist($doc, $bib, $lstdoc, $_, $$split{$_})) } + sort keys %$split); +} + +# makeSplitBiblist creates a new for a given initial with a given list of nodes +sub makeSplitBiblist { + my ($self, $doc, $bib, $lstdoc, $initial, $nodes) = @_; + # find the old and new ids + my $oldlength = length($self->getBibliographyID($doc, $bib, undef)); + my $id = $self->getBibliographyID($doc, $bib, $initial); + # replace the old ids with the new ones + foreach my $node (@$nodes) { + foreach my $subnode ($lstdoc->findnodes('(.|.//*)[@xml:id]', $node)) { + $subnode->setAttribute('xml:id', $id . substr($subnode->getAttribute('xml:id'), $oldlength)); } } + # and return the new element + return ['ltx:biblist', { 'xml:id' => $id }, @$nodes]; +} + +# getSortKey gets the key that determines which group a particular .bib entry is placed in. +# The key is either an lowercase letter, a digit, or the empty string. +# TODO: This is based on the sort.key$ at the moment which may not be meaningful (or even empty) +sub getSortKey { + my ($real) = ((getSortKeyImpl(@_) || '') =~ m/([a-z0-9])/i); + return lc($real || ''); } + +sub getSortKeyImpl { + my ($self, $entry) = @_; + # get the entry variable 'sort.key$' + my ($tp, $value) = $entry->getVariable('sort.key$'); + return unless defined($value) && $tp eq 'STRING'; + # make sure it's a perl string and not an internal string represention + my ($simple) = simplifyString($value); + return $simple; } + +# given a document and a bibliography, create an appropriate element (and also return the run config) +# Regenerate the bbl while there are additional citations in ltxml +sub getBibliographyList { my ($self, $doc, $bib) = @_; + my @cites = $self->findCites($doc); + my $bibtex = LaTeXML::BibTeX->new(searchpaths => [$doc->getSearchPaths]); + return unless $bibtex->loadStyle($bib->getAttribute('bibstyle')); + my @files = split(',', $bib->getAttribute('files')); + return unless $bibtex->loadBibliographies(@files); + # iterate to check for cross-refs + my $runsLeft = $$self{reruns} == -1 ? -1 : $$self{reruns} + 1; + my ($lst, $lstdoc); + my ($bbl, $config); + while (1) { + $runsLeft--; + ($bbl, $config) = $bibtex->run([@cites]); + return unless defined $bbl; + # convert the bbl to an acutal biblist + ($lstdoc, $lst) = $self->convertBBL($doc, $bib, $bbl); + return unless defined $lst; # Something went wrong => we can't insert it! + last if $runsLeft == 0; # no runs left => that's it + my @newcites = $self->findCites($lstdoc, $lst); + last unless $self->hasNewCites([@cites], [@newcites]); + push(@cites, @newcites); # Likely has duplicates! + Info('bibtex', $self, undef, + "Found " . scalar(@newcites) . " new citations, re-running BibTeX ($runsLeft runs left)"); } + # return the last output we got + return $lstdoc, $lst, $config; } + +# findCites finds everything that is cited in a given node +sub findCites { + my ($self, $doc, $node) = @_; + $node = $doc->getDocumentElement unless defined($node); + my @cites = grep { defined($_) && $_ } map { $_->getAttribute('bibrefs') } $doc->findnodes('.//ltx:bibref', $node); + return split(',', join(',', @cites)); } + +# hasNewCites checks if we got a new citation that hasn't been cited before +sub hasNewCites { + my ($self, $cites, $newCites) = @_; + # if we have something that didn't exist before, return 1 + my %oldcites = map { $_ => 1 } @$cites; + foreach my $new (@$newCites) { + return 1 unless defined($oldcites{$new}); } + # else there is nothing new + return 0; } + +# This converts the bbl output into a new Document +sub convertBBL { + my ($self, $doc, $bib, $bbl) = @_; + # imports require LaTeXML; require LaTeXML::Common::Config; - my @preload = (); # custom macros often used in e.g. howpublished field - # need to preload all packages used by the main article + # load the documentclass and packages of the parent document + my @preload = (); my ($classdata, @packages) = $self->find_documentclass_and_packages($doc); my ($class, $classoptions) = @$classdata; if ($class) { - - if ($classoptions) { - push(@preload, "[$classoptions]$class.cls"); } - else { - push(@preload, "$class.cls"); } } + push(@preload, ($classoptions ? "[$classoptions]$class.cls" : "$class.cls")); } foreach my $po (@packages) { my ($pkg, $options) = @$po; - if ($options) { - push(@preload, "[$options]$pkg.sty"); } - else { - push(@preload, "$pkg.sty"); } } - my $bibname = pathname_is_literaldata($bib) ? 'Anonymous Bib String' : $bib; - my $stage = "Recursive MakeBibliography $bibname"; + push(@preload, ($options ? "[$options]$pkg.sty" : "$pkg.sty")); } + # convert the bibliography + my $stage = "Recursive LaTeXML on bbl"; ProgressSpinup($stage); - my $bib_config = LaTeXML::Common::Config->new( - recursive => 1, - cache_key => 'BibTeX', - type => "BibTeX", - post => 0, - format => 'dom', - whatsin => 'document', - whatsout => 'document', - includestyles => 1, + # NOTE: Using a specific cache_key will reload all styles into a new LaTeXML config + # But if we're in latexmlc, can we reuse the same config as the main document? + # [using NO cache_key will reload on *each* bbl loop!] + my $config = LaTeXML::Common::Config->new( + recursive => 1, + cache_key => 'BibTeX', + post => 0, + format => 'dom', + whatsin => 'tex', + whatsout => 'document', + # documentid => $self->getBibliographyID($doc, $bib, undef), +### documentid => $doc->getDocumentElement->getAttribute('xml:id'), bibliographies => [], - paths => $$doc{searchpaths}, (@preload ? (preload => [@preload]) : ())); - my $bib_converter = LaTeXML->get_converter($bib_config); + my $converter = LaTeXML->get_converter($config); # Tricky and HACKY, we need to release the log to capture the inner workings separately. # ->bind_log analog: my $biblog = ''; -### my $biblog_handle; -### open($biblog_handle, ">>", \$biblog) or Error("Can't redirect STDERR to log for inner bibliography converter!"); -### *BIB_STDERR_SAVED = *STDERR; -### *STDERR = *$biblog_handle; - # end ->bind_log - - $bib_converter->prepare_session($bib_config); - my $response = $bib_converter->convert($bib); - # ->flush_log analog: -### close $biblog_handle; -### *STDERR = *BIB_STDERR_SAVED; - # end ->flush_log + $converter->prepare_session($config); + my $response = $converter->convert("literal:\\begin{document}$bbl\\end{document}"); # Trim log to look internal and report. -### $biblog =~ s/^.+?\(Digesting/\n\(Digesting/s; -### $biblog =~ s/Conversion complete:.+$//s; -### print STDERR $biblog; - MergeStatus($$bib_converter{latexml}{state}); + $biblog =~ s/^.+?\(Digesting/\n\(Digesting/s; + $biblog =~ s/Conversion complete:.+$//s; + MergeStatus($$converter{latexml}{state}); ProgressSpindown($stage); - if (my $bibdoc = $$response{result}) { - return $doc->new($bibdoc, sourceDirectory => '.'); } - return; } - -# ================================================================================ -# Get all cited bibentries from the requested bibliography files. -# Sort (cited) bibentries on author+year+title, [NOT on the key, or even proper BibTeX choices!!!] -# and then check whether author+year is unique!!! -# Returns a list of hashes containing: -# bibkey : the bibliographic entry's key -# bibentry : the bibentry node -# citations : array of bib keys that are cited somewhere within this bibentry -# referrers : array of ID's of places that refer to this bibentry -# suffix : a,b... if adjacent author/year are identical. -# NOTE: biblist's now have @lists to restrict to include ONLY -# those bibitems which have been referenced by bibref's for one of those lists! -# [If a bibref gives several lists, and entries for those lists end up in several bibliographies, -# first 'list' determines where the idref points] -sub getBibEntries { - my ($self, $doc, $bib) = @_; - - # First, scan the bib files for all ltx:bibentry's, (hash key is bibkey) - # Also, record the citations from each bibentry to others. - my %entries = (); - foreach my $bibdoc ($self->getBibliographies($doc)) { - my @lists = split(/\s+/, $doc->findnode('//ltx:bibliography')->getAttribute('lists') || 'bibliography'); - foreach my $bibentry ($bibdoc->findnodes('//ltx:bibentry')) { - my $bibkey = normalizeBibKey($bibentry->getAttribute('key')); - my $bibid = $bibentry->getAttribute('xml:id'); - $entries{$bibkey}{bibkey} = $bibkey; - $entries{$bibkey}{bibentry} = $bibentry; - $entries{$bibkey}{citations} = [map { normalizeBibKey($_) } grep { $_ } map { split(',', $_->value) } - $bibdoc->findnodes('.//@bibrefs', $bibentry)]; } } - # Now, collect all bibkeys that were cited in other documents (NOT the bibliography) - # And note any referrers to them (also only those outside the bib) - my @lists = split(/\s+/, $bib->getAttribute('lists') || 'bibliography'); - my $citestar = grep { $$self{db}->lookup("BIBLABEL:$_:*"); } @lists; - - my @queue = (); - foreach my $dbkey ($$self{db}->getKeys) { - if ($dbkey =~ /^BIBLABEL:(.*?):(.*)$/) { - my ($list, $bibkey) = ($1, $2); - next unless grep { $_ eq $list; } @lists; - my $bibdbentry = $$self{db}->lookup($dbkey); - if (my $referrers = $bibdbentry->getValue('referrers')) { - foreach my $refr (keys %$referrers) { - my ($rid, $e, $t) = ($refr, undef, undef); - while ($rid && ($e = $$self{db}->lookup("ID:$rid")) && (($t = ($e->getValue('type') || '')) ne 'ltx:bibitem')) { - $rid = $e->getValue('parent'); } - if (!$e) { - Warn('expected', 'entry', undef, - "Didn't find an entry for reference id=$rid"); } - elsif ($t ne 'ltx:bibitem') { - $entries{$bibkey}{referrers}{$refr} = 1; } } - push(@queue, $bibkey) if keys %{ $entries{$bibkey}{referrers} }; } - elsif ($citestar) { # If \cite{*} include all of them. - push(@queue, $bibkey); } } } - - # For each bibkey in the queue, complete and include the entry - # And add any keys cited from within each include entry - my %seen_keys = (); - my %missing_keys = (); - my $included = {}; # included entries (hash key is sortkey) - while (my $bibkey = shift(@queue)) { - next if $seen_keys{$bibkey}; # Done already. - $seen_keys{$bibkey} = 1; - next if $bibkey eq '*'; - if (my $bibentry = $entries{$bibkey}{bibentry}) { - my $entry = $entries{$bibkey}; - # Extract names, year and title from bibentry. - my $names = ''; - my $sortnames = ''; - my @names = $doc->findnodes('ltx:bib-name[@role="author"]', $bibentry); - @names = $doc->findnodes('ltx:bib-name[@role="editor"]', $bibentry) unless @names; - if (my $n = $doc->findnode('ltx:bib-key', $bibentry)) { - $sortnames = $names = $n->textContent; } - elsif (scalar(@names)) { - $sortnames = join(' ', map { getNameText($doc, $_) } @names); - my @ns = map { $_ && $_->textContent } map { $doc->findnodes('ltx:surname', $_) } @names; - if (@ns > 2) { $names = $ns[0] . ' et al'; } - elsif (@ns > 1) { $names = $ns[0] . ' and ' . $ns[1]; } - else { $names = $ns[0]; } } - elsif (my $t = $doc->findnode('ltx:bib-title', $bibentry)) { - $sortnames = $names = $t->textContent; } - my $date = $doc->findnode('ltx:bib-date[@role="publication"] | ltx:bib-type', $bibentry); - my $title = $doc->findnode('ltx:bib-title', $bibentry); - $date = ($date ? $date->textContent : ''); - $date = $1 if $date && $date =~ /^(\d\d\d\d)/; - $title = ($title ? $title->textContent : ''); - $$entry{ay} = "$names.$date"; - $$entry{initial} = $doc->initial($names, 1); - # Include this entry keyed using a sortkey. - $$included{ lc(join('.', $sortnames, $date, $title, $bibkey)) } = $entry; - # And, since we're including this entry, we'll need to include any that it cites! - push(@queue, @{ $$entry{citations} }) if $$entry{citations}; } - else { - $missing_keys{$bibkey} = 1; } } - # Now that we know which entries will be included, note their citations as bibreferrers. - foreach my $sortkey (keys %$included) { - my $entry = $$included{$sortkey}; - my $bibkey = $$entry{bibkey}; - map { $entries{ normalizeBibKey($_) }{bibreferrers}{$bibkey} = 1 } @{ $$entry{citations} }; } - - NoteLog("MakeBibliography: " . (scalar keys %entries) . " bibentries, " . (scalar keys %$included) . " cited"); - Warn('expected', 'bibkeys', undef, - "Missing bibkeys " . join(', ', sort keys %missing_keys)) if keys %missing_keys; - - # Finally, sort the bibentries according to author+year+title+bibkey - # If any neighboring entries have same author+year, set a suffix: a,b,... - # Actually, it isn't so much if they are adjacent; if author+year isn't unique, need a suffix - my @sortkeys = $doc->unisort(keys %$included); - my %suffixes = (); - while (my $sortkey = shift(@sortkeys)) { - # my $i=0; - # while(@sortkeys && ($$included{$sortkey}{ay} eq $$included{$sortkeys[0]}{ay})){ - # $$included{$sortkey}{suffix}='a'; - # $$included{$sortkeys[0]}{suffix} = chr(ord('a')+(++$i)); - # shift(@sortkeys); } - my $entry = $$included{$sortkey}; - my $ay = $$entry{ay}; - if (defined $suffixes{$ay}) { - my $prev = $suffixes{$ay}; - if (!$$prev{suffix}) { - $$prev{suffix_counter} = 1; - $$prev{suffix} = radix_alpha($$prev{suffix_counter}); } - $$entry{suffix_counter} = $$prev{suffix_counter} + 1; - $$entry{suffix} = radix_alpha($$entry{suffix_counter}); } - $suffixes{$ay} = $entry; - # HACKERY: AFTER all the sorting have been done, remove nodes. - # These may have been inserted to alter sorting, eg \NOOP{a}... - foreach my $sortnode ($doc->findnodes('//ltx:ERROR[@class="sort"]', $$entry{bibentry})) { - $sortnode->parentNode->removeChild($sortnode); } - } - return $included; } - -sub getNameText { - my ($doc, $namenode) = @_; - my $surname = $doc->findnodes('ltx:surname', $namenode); - my $givenname = $doc->findnodes('ltx:givenname', $namenode); - return ($surname && $givenname ? $surname . ' ' . $givenname : $surname || $givenname); } - -# ================================================================================ -# Convert hash of bibentry(s) into biblist of bibitem(s) + if (my $xml = $$response{result}) { + # Do we really need a new Document? + my $bibdoc = $doc->new($xml, sourceDirectory => '.'); + # find the biblist + my $biblist = $bibdoc->findnode('//ltx:biblist'); + Fatal('bibtex', $self, undef, "BBL did not produce a biblist") unless (defined($biblist)); + # and return it! + return ($bibdoc, $biblist); } + else { + Debug("... Failed!)"); + return; } } -sub makeBibliographyList { - my ($self, $doc, $bib, $initial, $entries) = @_; +# gets the id of a bibliography element +sub getBibliographyID { + my ($self, $doc, $bib, $inital) = @_; my $id = $bib->getAttribute('xml:id') || $doc->getDocumentElement->getAttribute('xml:id') || 'bib'; - $id .= ".L1"; - $id .= ".$initial" if $initial; - return ['ltx:biblist', { 'xml:id' => $id }, - map { $self->formatBibEntry($doc, $bib, $$entries{$_}) } $doc->unisort(keys %$entries)]; } - -# ================================================================================ -# NOTE: With multiple bibliographies, the ID of the bibentry isn't necessarily -# the ID in the "local" bibliography! (ie. bibentry can be repeated in the document!) -sub formatBibEntry { - my ($self, $doc, $bib, $entry) = @_; - my $bibentry = $$entry{bibentry}; - my $id = $bibentry->getAttribute('xml:id'); - my $key = $bibentry->getAttribute('key'); - my $type = $bibentry->getAttribute('type'); - my @blockspecs = @{ $FMT_SPEC{$type} || [] }; - - # Patch the entry's id in case there are multiple bibs. - if (my $bibid = $bib->getAttribute('xml:id') - || $doc->getDocumentElement->getAttribute('xml:id') || 'bib') { - $id =~ s/^bib//; $id = $bibid . $id; } - - local $LaTeXML::Post::MakeBibliography::SUFFIX = $$entry{suffix}; - my $number = ++$LaTeXML::Post::MakeBibliography::NUMBER; - - Warn('unexpected', $type, undef, - "No formatting specification for bibentry of type '$type'") unless @blockspecs; - - #------------------------------ - # Format the bibtag's - my @tags = (); - push(@tags, ['ltx:tag', { role => 'number', class => 'ltx_bib_number' }, $number]); # number tag - - # Set up authors and fullauthors tags - my @names = $doc->findnodes('ltx:bib-name[@role="author"]/ltx:surname', $bibentry); - @names = $doc->findnodes('ltx:bib-name[@role="editor"]/ltx:surname', $bibentry) unless @names; - my $etal = 0; - if (@names && ($names[-1]->toString eq 'others')) { # Magic! - $etal = 1; } - if (@names > 2) { - push(@tags, ['ltx:tag', { role => 'authors', class => 'ltx_bib_author' }, - $doc->cloneNodes($names[0]->childNodes), - ['ltx:text', { class => 'ltx_bib_etal' }, ' et al.']]); - my @fnames = (); - foreach my $n (@names[0 .. $#names - 1]) { - push(@fnames, $n->childNodes, ', '); } - push(@tags, ['ltx:tag', { role => 'fullauthors', class => 'ltx_bib_author' }, - $doc->cloneNodes(@fnames), - ' and ', $doc->cloneNodes($names[-1]->childNodes)]); } - elsif (@names > 1) { - push(@tags, ['ltx:tag', { role => 'authors', class => 'ltx_bib_author' }, - $doc->cloneNodes($names[0]->childNodes), - ' and ', $doc->cloneNodes($names[1]->childNodes)]); } - elsif (@names) { - push(@tags, ['ltx:tag', { role => 'authors', class => 'ltx_bib_author' }, - $doc->cloneNodes($names[0]->childNodes)]); } - - # Put a key tag, to use in place of authors if needed (esp for software, websites, etc) - my $keytag; - if ($keytag = $doc->findnode('ltx:bib-key', $bibentry)) { - push(@tags, ['ltx:tag', { role => 'key', class => 'ltx_bib_key' }, - $doc->cloneNodes($keytag->childNodes)]); } - - my @year = (); - if (my $date = $doc->findnode('ltx:bib-date[@role="publication"]', $bibentry)) { - @year = $date->childNodes; - if (my $datetext = $date->textContent) { - if ($datetext =~ /^(\d\d\d\d)/) { # Extract 4 digit year, if any - @year = ($1); } } - push(@tags, ['ltx:tag', { role => 'year', class => 'ltx_bib_year' }, - $doc->cloneNodes(@year), ($$entry{suffix} || '')]); } - - # Store a type tag, to use in place of year, if needed (esp for software, ...) - my $typetag; - if ($typetag = $doc->findnode('ltx:bib-type', $bibentry)) { - push(@tags, ['ltx:tag', { role => 'bibtype', class => 'ltx_bib_type' }, - $doc->cloneNodes($typetag->childNodes)]); } - - # put in the title - if (my $title = $doc->findnode('ltx:bib-title', $bibentry)) { - push(@tags, ['ltx:tag', { role => 'title', class => 'ltx_bib_title' }, - $doc->cloneNodes($title->childNodes)]); } - - # And finally, the refnum; we need to know the desired citation style! - # This is screwy!!! - # AND OF COURSE, we need to know the key before we know the suffix!!! - my $style = $LaTeXML::Post::MakeBibliography::STYLE{citestyle} || 'numbers'; - $style = 'numbers' unless (@names || $keytag) && (@year || $typetag); - if ($style eq 'numbers') { - push(@tags, ['ltx:tag', { role => 'refnum', class => 'ltx_bib_key', open => '[', close => ']' }, $number]); } - elsif ($style eq 'AY') { - my @rfnames; - if (my @authors = $doc->findnodes('ltx:bib-name[@role="author"]/ltx:surname', $bibentry)) { - @rfnames = @authors; } - elsif (my @editors = $doc->findnodes('ltx:bib-name[@role="editor"]/ltx:surname', $bibentry)) { - @rfnames = @editors; } - else { - @rfnames = $keytag->childNodes; } - my $aa; - if (scalar(@rfnames) > 1) { - $aa = join('', map { substr($_->textContent, 0, 1); } @rfnames); - if (length($aa) > 3) { - $aa = substr($aa, 0, 3) . "+"; } } - else { - $aa = uc(substr($rfnames[0]->textContent, 0, 3)); } - my $yrtext = (@year ? join('', map { (ref $_ ? $_->textContent : $_); } @year) : ''); - my $yy = (length($yrtext) >= 2 ? substr($yrtext, 2, 2) : $yrtext); - push(@tags, ['ltx:tag', { role => 'refnum', class => 'ltx_bib_abbrv', open => '[', close => ']' }, - $aa . $yy . ($$entry{suffix} || '')]); } - - else { - shift(@blockspecs); # Skip redundant 1st block!! - my @rfnames; - if (my @authors = $doc->findnodes('ltx:bib-name[@role="author"]', $bibentry)) { - @rfnames = do_authors(@authors); } - elsif (my @editors = $doc->findnodes('ltx:bib-name[@role="editor"]', $bibentry)) { - @rfnames = do_editorsA(@editors); } - else { - @rfnames = $keytag->childNodes; } - my @rfyear = (@year ? (@year, ($$entry{suffix} || '')) - : ($typetag ? $typetag->childNodes : ())); - push(@tags, ['ltx:tag', { role => 'refnum', class => 'ltx_bib_author-year' }, - $doc->cloneNodes(@rfnames), ' (', $doc->cloneNodes(@rfyear), ')']); } - - #------------------------------ - # Format the data in blocks, with the first being bib-label, rest bibblock. - my @blocks = (); - foreach my $blockspec (@blockspecs) { - my @x = (); - foreach my $row (@$blockspec) { - my ($xpath, $punct, $pre, $class, $op, $post) = @$row; - my $negated = $xpath =~ s/^!\s*//; - my @nodes = ($xpath eq 'true' ? () : $doc->findnodes($xpath, $bibentry)); - next if ($xpath ne 'true') && ($negated ? @nodes : !@nodes); - push(@x, $punct) if $punct && @x; - push(@x, $pre) if $pre; - push(@x, ['ltx:text', { class => 'ltx_bib_' . $class }, &$op($doc->cloneNodes(@nodes))]) if $op; - push(@x, $post) if $post; } - push(@blocks, ['ltx:bibblock', { 'xml:space' => 'preserve' }, @x]) if @x; - } - # Add a Cited by block. - - my @citedby = map { ['ltx:ref', { idref => $_, show => 'typerefnum' }] } - sort keys %{ $$entry{referrers} }; - push(@citedby, ['ltx:bibref', { bibrefs => join(',', sort keys %{ $$entry{bibreferrers} }), show => 'refnum' }]) - if $$entry{bibreferrers}; - push(@blocks, ['ltx:bibblock', { class => 'ltx_bib_cited' }, - "Cited by: ", $doc->conjoin(",\n", @citedby), '.']) if @citedby; - - return ['ltx:bibitem', { 'xml:id' => $id, key => $key, type => $type, class => "ltx_bib_$type" }, - (@tags ? (['ltx:tags', {}, @tags]) : ()), - @blocks]; } - -# ================================================================================ -# Formatting aids. -sub do_any { - my (@stuff) = @_; - return @stuff; } - -# Stuff for Author(s) & Editor(s) -sub do_name { - my ($node) = @_; - # NOTE: This should be a formatting option; use initials or full first names. - my $first = $LaTeXML::Post::DOCUMENT->findnode('ltx:givenname', $node); - if ($first) { # && use initials - $first = join('', map { (/\.$/ ? "$_ " : (/^(.)/ ? "$1. " : '')) } - split(/\s/, $first->textContent)); } - else { - $first = (); } - my $sur = $LaTeXML::Post::DOCUMENT->findnode('ltx:surname', $node); - # Why, oh Why do we need the _extra_ cloneNode ??? - return ($first, $sur->cloneNode(1)->childNodes); } - -sub do_names { - my (@names) = @_; - my @stuff = (); - my $sep = (scalar(@names) > 2 ? ', ' : ' '); - my $etal = 0; - if (@names && ($names[-1]->textContent eq 'others')) { # Magic! - pop(@names); - $etal = 1; } - my $n = scalar(@names); - while (my $name = shift(@names)) { - if (@stuff) { - push(@stuff, $sep); - push(@stuff, 'and ') if !$etal && !@names; } - push(@stuff, do_name($name)); } - if ($etal) { - push(@stuff, $sep, ['ltx:text', { class => 'ltx_bib_etal' }, 'et al.']); } - return @stuff; } - -sub do_names_short { - my (@names) = @_; - if (@names > 2) { - return ($names[0]->childNodes, ' ', ['ltx:text', { class => 'ltx_bib_etal' }, 'et al.']); } - elsif (@names > 1) { - return ($names[0]->childNodes, ' and ', $names[1]->childNodes); } - elsif (@names) { - return ($names[0]->childNodes); } } - -sub do_authors { - my (@stuff) = @_; - return do_names(@stuff); } - -sub do_editorsA { # Should be used in citation tags? - my (@stuff) = @_; - my @n = do_names(@stuff); - if (scalar(@stuff) > 1) { push(@n, " (Eds.)"); } - elsif (scalar(@stuff)) { push(@n, " (Ed.)"); } - return @n; } - -sub do_editorsB { - my (@stuff) = @_; - my @x = do_names(@stuff); - if (scalar(@stuff) > 1) { push(@x, " Eds."); } - elsif (scalar(@stuff)) { push(@x, " Ed."); } - return (@x ? ("(", @x, ")") : ()); } - -sub do_year { - my (@stuff) = @_; - return (' (', @stuff, @LaTeXML::Post::MakeBibliography::SUFFIX, ')'); } - -sub do_type { - my (@stuff) = @_; - return ('(', @stuff, ')'); } - -# Other fields. -#### sub do_title { (['ltx:text',{font=>'italic'},@_]); } -sub do_title { - my (@stuff) = @_; - return (@stuff); } -###sub do_bold { (['ltx:text',{font=>'bold'},@_]); } -sub do_edition { - my (@stuff) = @_; - return (@stuff, " edition"); } # If a number, should convert to cardinal! - -sub do_thesis_type { - my (@stuff) = @_; - return @stuff; } - -sub do_pages { - my (@stuff) = @_; - return (" pp." . pack('U', 0xA0), @stuff); } # Non breaking space - -sub do_crossref { - my ($node, @stuff) = @_; - return (['ltx:cite', {}, - ['ltx:bibref', { bibrefs => $node->getAttribute('bibrefs'), show => 'title, author' }]]); } - -my $LINKS = # CONSTANT - "ltx:bib-links | ltx:bib-review | ltx:bib-identifier | ltx:bib-url"; - -sub do_links { - my (@nodes) = @_; - my @links = (); - my $doc = $LaTeXML::Post::DOCUMENT; - foreach my $node (@nodes) { - my $scheme = $node->getAttribute('scheme') || ''; - my $href = $node->getAttribute('href'); - my $tag = $doc->getQName($node); - if (($tag eq 'ltx:bib-identifier') || ($tag eq 'ltx:bib-review')) { - if ($href) { - push(@links, ['ltx:ref', { href => $href, class => "$scheme ltx_bib_external" }, - $doc->cloneNodes($node->childNodes)]); } - else { - push(@links, ['ltx:text', { class => "$scheme ltx_bib_external" }, - $doc->cloneNodes($node->childNodes)]); } } - elsif ($tag eq 'ltx:bib-links') { - push(@links, ['ltx:text', { class => "ltx_bib_external" }, - $doc->cloneNodes($node->childNodes)]); } - elsif ($tag eq 'ltx:bib-url') { - push(@links, ['ltx:ref', { href => $href, class => 'ltx_bib_external' }, - $doc->cloneNodes($node->childNodes)]); } } - - @links = map { (",\n", $_) } @links; # non-string join() - return @links[1 .. $#links]; } - -# ================================================================================ -# Formatting specifications. -# Adpated from amsrefs.sty -#BEGIN{ - -# For each bibliographic type, -# the specification is an array representing each bibblock. -# Each biblock is an array of field specifications. -# Each field specification is: -# [xpath, punct, prestring, operatorname, poststring] -# NOTE That the first block is only shown for numeric style, -# since otherwise athors will already be shown in the bibtag@refnum!!! -# Ugh... - -# [xpath punct pre class formatter post ] -@META_BLOCK = - ([['ltx:bib-note', '', "Note: ", 'note', \&do_any, '']], - [[$LINKS, '', 'External Links: ', 'links', \&do_links, '']]); - -%FMT_SPEC = - # [xpath punct pre class formatter post ] - (article => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, '']], - [['ltx:bib-title', '', '', 'title', \&do_title, '.']], - [['ltx:bib-part[@role="part"]', '', '', 'part', \&do_any, ''], - ['ltx:bib-related/ltx:bib-title', ', ', '', 'journal', \&do_any, ''], - ['ltx:bib-part[@role="volume"]', ' ', '', 'volume', \&do_any, ''], - ['ltx:bib-part[@role="number"]', ' ', '(', 'number', \&do_any, ')'], - ['ltx:bib-status', ', ', '(', 'status', \&do_any, ')'], - ['ltx:bib-part[@role="pages"]', ', ', '', 'pages', \&do_pages, ''], - ['ltx:bib-language', ' ', '(', 'language', \&do_any, ')'], - ['true', '.']], - @META_BLOCK], - book => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-name[@role="editor"]', '', '', 'editor', \&do_editorsA, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, '']], - [['ltx:bib-title', '', '', 'title', \&do_title, '.']], - [['ltx:bib-type', '', '', 'type', \&do_any, ''], - ['ltx:bib-edition', ', ', '', 'edition', \&do_edition, ''], - ['ltx:bib-part[@role="series"]', ', ', '', 'series', \&do_any, ''], - ['ltx:bib-part[@role="volume"]', ', ', 'Vol. ', 'volume', \&do_any, ''], - ['ltx:bib-part[@role="part"]', ', ', 'Part ', 'part', \&do_any, ''], - ['ltx:bib-publisher', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-organization', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', '', 'place', \&do_any, ''], - ['ltx:bib-status', ' ', '(', 'status', \&do_any, ')'], - ['ltx:bib-language', ' ', '(', 'language', \&do_any, ')'], - ['true', '.']], - @META_BLOCK], - 'incollection' => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, '']], - [['ltx:bib-title', '', '', 'title', \&do_title, '.']], - [['ltx:bib-type', '', '', 'type', \&do_any, ''], - # Show crossref if any - ['ltx:bib-related[@bibrefs]', ' ', 'See ', 'crossref', \&do_crossref, ','], - # if NO crossref, used embedded editors & booktitle. - # ['ltx:bib-related[@type="book"]/ltx:bib-title', ' ', 'in ', 'inbook', \&do_title, ','] - ['ltx:bib-related[@type][not(../ltx:bib-related[@bibrefs])]/ltx:bib-title', - ' ', 'In ', 'inbook', \&do_title, ','], - ['ltx:bib-related[@type][not(../ltx:bib-related[@bibrefs])]/ltx:bib-name[@role="editor"]', - ' ', ' ', 'editor', \&do_editorsA, ','], - ], - [['ltx:bib-edition', '', '', 'edition', \&do_edition, ''], - ['ltx:bib-name[@role="editor"]', ', ', '', 'editor', \&do_editorsB, ''], - ['ltx:bib-related/ltx:bib-part[@role="series"]', ', ', '', 'series', \&do_any, ''], - ['ltx:bib-related/ltx:bib-part[@role="volume"]', ', ', 'Vol. ', 'volume', \&do_any, ''], - ['ltx:bib-related/ltx:bib-part[@role="part"]', ', ', 'Part ', 'part', \&do_any, ''], - ['ltx:bib-publisher', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-organization', ', ', '', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', '', 'place', \&do_any, ''], - ['ltx:bib-part[@role="pages"]', ', ', '', 'pages', \&do_pages, ''], - ['ltx:bib-status', ' ', '(', 'status', \&do_any, ')'], - ['ltx:bib-language', ' ', '(', 'language', \&do_any, ')'], - ['true', '.']], - @META_BLOCK], - report => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-name[@role="editor"]', '', '', 'editor', \&do_editorsA, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, '']], - [['ltx:bib-title', '', '', 'title', \&do_title, '.']], - [['ltx:bib-type', '', '', 'type', \&do_any, '']], - [['ltx:bib-part[@role="number"]', '', 'Technical Report ', 'number', \&do_any, ''], - ['ltx:bib-part[@role="series"]', ', ', '', 'series', \&do_any, ''], - ['ltx:bib-part[@role="volume"]', ', ', 'Vol. ', 'volume', \&do_any, ''], - ['ltx:bib-part[@role="part"]', ', ', 'Part ', 'part', \&do_any, ''], - ['ltx:bib-publisher', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-organization', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', ' ', 'place', \&do_any, ''], - ['ltx:bib-status', ', ', '(', 'status', \&do_any, ')'], - ['ltx:bib-language', ' ', '(', 'language', \&do_any, ')'], - ['true', '.']], - @META_BLOCK], - thesis => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-name[@role="editor"]', '', '', 'editor', \&do_editorsA, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, '']], - [['ltx:bib-title', '', '', 'title', \&do_title, '.']], - [['ltx:bib-type', ' ', '', 'type', \&do_thesis_type, ''], - ['ltx:bib-part[@role="part"]', ', ', 'Part ', 'part', \&do_any, ''], - ['ltx:bib-publisher', ', ', '', 'publisher', \&do_any, ''], - ['ltx:bib-organization', ', ', '', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', '', 'place', \&do_any, ''], - ['ltx:bib-status', ', ', '(', 'status', \&do_any, ')'], - ['ltx:bib-language', ', ', '(', 'language', \&do_any, ')'], - ['true', '.']], - @META_BLOCK], - website => - [[['ltx:bib-name[@role="author"]', '', '', 'author', \&do_authors, ''], - ['ltx:bib-name[@role="editor"]', '', '', 'editor', \&do_editorsA, ''], - ['ltx:bib-date[@role="publication"]', '', '', 'year', \&do_year, ''], - ['ltx:title', '', '', 'title', \&do_any, ''], - ['ltx:bib-type', '', '', 'type', \&do_any, ''], - ['! ltx:bib-type', '', '', 'type', sub { ('(Website)'); }, '']], - [['ltx:bib-organization', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', '', 'place', \&do_any, ''], - ['true', '.']], - @META_BLOCK], - software => - [[['ltx:bib-key', '', '', 'key', \&do_any, ''], - ['ltx:bib-type', '', '', 'type', \&do_type, '']], - [['ltx:bib-title', '', '', 'title', \&do_any, '']], - [['ltx:bib-organization', ', ', ' ', 'publisher', \&do_any, ''], - ['ltx:bib-place', ', ', '', 'place', \&do_any, ''], - ['true', '.']], - @META_BLOCK], - ); - -$FMT_SPEC{periodical} = $FMT_SPEC{book}; -$FMT_SPEC{collection} = $FMT_SPEC{book}; -$FMT_SPEC{proceedings} = $FMT_SPEC{book}; -$FMT_SPEC{manual} = $FMT_SPEC{book}; -$FMT_SPEC{misc} = $FMT_SPEC{book}; -$FMT_SPEC{unpublished} = $FMT_SPEC{book}; -$FMT_SPEC{booklet} = $FMT_SPEC{book}; -$FMT_SPEC{'collection.article'} = $FMT_SPEC{incollection}; -$FMT_SPEC{'proceedings.article'} = $FMT_SPEC{incollection}; -$FMT_SPEC{inproceedings} = $FMT_SPEC{incollection}; -$FMT_SPEC{inbook} = $FMT_SPEC{incollection}; -$FMT_SPEC{techreport} = $FMT_SPEC{report}; - -#} +### $id .= ".L1"; +### $id .= ".$inital" if defined($inital); + #Debug("ID=>$id"); + return $id; } # ================================================================================ 1; diff --git a/lib/LaTeXML/resources/CSS/LaTeXML.css b/lib/LaTeXML/resources/CSS/LaTeXML.css index 119f090116..22a1ec22f9 100644 --- a/lib/LaTeXML/resources/CSS/LaTeXML.css +++ b/lib/LaTeXML/resources/CSS/LaTeXML.css @@ -253,14 +253,17 @@ dl.ltx_description dl.ltx_description dd { margin-left:3em; } .ltx_bibliography dt { margin-right:0.5em; float:left; } .ltx_bibliography dd { margin-left:3em; } /*.ltx_biblist { list-style-type:none; }*/ -.ltx_bibitem { list-style-type:none; } -.ltx_bibitem .ltx_tag { font-weight:bold; margin-left:-2em; width:3em; } +.ltx_bibitem { + list-style-type:none; + text-indent:-2em; } +/*.ltx_bibitem .ltx_tag { font-weight:bold; margin-left:-2em; width:3em; }*/ /*.bibitem-tag + div { display:inline; }*/ +/* .ltx_bib_title { font-style:italic; } .ltx_bib_article .bib-title { font-style:normal !important; } .ltx_bib_journal { font-style:italic; } .ltx_bib_volume { font-weight:bold; } - +*/ /* Indices */ .ltx_indexlist li { list-style-type:none; } .ltx_indexlist { margin-left:1em; padding-left:1em;} diff --git a/lib/LaTeXML/resources/XSLT/LaTeXML-bib-xhtml.xsl b/lib/LaTeXML/resources/XSLT/LaTeXML-bib-xhtml.xsl index 733173160c..55e981302f 100644 --- a/lib/LaTeXML/resources/XSLT/LaTeXML-bib-xhtml.xsl +++ b/lib/LaTeXML/resources/XSLT/LaTeXML-bib-xhtml.xsl @@ -98,11 +98,13 @@ + diff --git a/t/93_formats.t b/t/93_formats.t index f26d297931..9abf69ce45 100644 --- a/t/93_formats.t +++ b/t/93_formats.t @@ -11,8 +11,10 @@ use LaTeXML::Util::Test; latexml_tests('t/daemon/formats', requires => { - tei => 'amsart.cls', - jats => 'amsart.cls', + citation => 'alpha.bst', + citationraw => 'alpha.bst', + tei => ['amsart.cls', 'alpha.bst'], + jats => ['amsart.cls', 'alpha.bst'], }); #********************************************************************** diff --git a/t/daemon/formats/citation.xml b/t/daemon/formats/citation.xml index f4e7521544..8c9fd85b92 100644 --- a/t/daemon/formats/citation.xml +++ b/t/daemon/formats/citation.xml @@ -12,20 +12,22 @@
-

A sample citation [AS64], then point to bibliography:

+

A sample citation [AS64], then point to bibliography:

References

-
    -
  • [AS64] -M. Abramowitz and I. A. Stegun (1964) - -Handbook of mathematical functions with formulas, graphs, and mathematical tables. +
      +
    • [AS64] + +Milton Abramowitz and Irene A. Stegun. + -ninth Dover printing, tenth GPO printing edition, Dover, New York. +Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables. + -Cited by: p1. +Dover, New York, ninth dover printing, tenth gpo printing edition, 1964. +
diff --git a/t/daemon/formats/citationraw.xml b/t/daemon/formats/citationraw.xml index f4e7521544..8c9fd85b92 100644 --- a/t/daemon/formats/citationraw.xml +++ b/t/daemon/formats/citationraw.xml @@ -12,20 +12,22 @@
-

A sample citation [AS64], then point to bibliography:

+

A sample citation [AS64], then point to bibliography:

References

-
    -
  • [AS64] -M. Abramowitz and I. A. Stegun (1964) - -Handbook of mathematical functions with formulas, graphs, and mathematical tables. +
      +
    • [AS64] + +Milton Abramowitz and Irene A. Stegun. + -ninth Dover printing, tenth GPO printing edition, Dover, New York. +Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables. + -Cited by: p1. +Dover, New York, ninth dover printing, tenth gpo printing edition, 1964. +
diff --git a/t/daemon/formats/jats.xml b/t/daemon/formats/jats.xml index f007050bac..704dad6b8b 100644 --- a/t/daemon/formats/jats.xml +++ b/t/daemon/formats/jats.xml @@ -610,7 +610,11 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor References - StegunAbramowitz and 1964Handbook of mathematical functions with formulas, graphs, and mathematical tablesM. Abramowitz and I. A. Stegun (1964)Handbook of mathematical functions with formulas, graphs, and mathematical tables.ninth Dover printing, tenth GPO printing edition, Dover, New York.Cited by: §1. + StegunMilton Abramowitz and Irene A. Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables1964 +Milton Abramowitz and Irene A. Stegun. +Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables. +Dover, New York, ninth dover printing, tenth gpo printing edition, 1964. + diff --git a/t/daemon/formats/tei.xml b/t/daemon/formats/tei.xml index 8522b5966e..d3d6c4d386 100644 --- a/t/daemon/formats/tei.xml +++ b/t/daemon/formats/tei.xml @@ -682,7 +682,11 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor
References - Abramowitz and StegunHandbook of mathematical functions with formulas, graphs, and mathematical tablesM. Abramowitz and I. A. Stegun (1964)Handbook of mathematical functions with formulas, graphs, and mathematical tables.ninth Dover printing, tenth GPO printing edition, Dover, New York.Cited by: §1. + Milton Abramowitz and Irene A. StegunHandbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables +Milton Abramowitz and Irene A. Stegun. +Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables. +Dover, New York, ninth dover printing, tenth gpo printing edition, 1964. +
diff --git a/t/expansion/textcase.xml b/t/expansion/textcase.xml index 5af9948e8d..988f4689f4 100644 --- a/t/expansion/textcase.xml +++ b/t/expansion/textcase.xml @@ -163,7 +163,7 @@ MORE TEXT

Something numeric - + [David Carlisle 1997] David Carlisle 1997 diff --git a/t/structure/bibsect.xml b/t/structure/bibsect.xml index 3fa7772ab2..c4fcf81391 100644 --- a/t/structure/bibsect.xml +++ b/t/structure/bibsect.xml @@ -15,7 +15,7 @@

some text [] more text more citations

- + References diff --git a/tools/compilebibstyle b/tools/compilebibstyle new file mode 100755 index 0000000000..96d30a0dff --- /dev/null +++ b/tools/compilebibstyle @@ -0,0 +1,86 @@ +#!/usr/bin/perl -w +# /=====================================================================\ # +# | compilebibstyle | # +# | Convert BibTex style file to internal format | # +# |=====================================================================| # +# | support tools for LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # + +use strict; +use warnings; +use FindBin; +use lib "$FindBin::RealBin/../blib/lib"; +use LaTeXML; +use LaTeXML::BibTeX::BibStyle; +#====================================================================== +our $HEADER; +our $DEST = "$FindBin::RealBin/../lib/LaTeXML/BibTeX/BibStyle/Precompiled.pm"; +our $style = 'plain'; +our $bibstyle = LaTeXML::BibTeX::BibStyle->new($style, []); +writeParsed($style, $bibstyle->getProgram); +print STDERR "Wrote $style to $DEST\n"; + +# NOTE: This program should be condensed. +# too-long class names, unnecessary source pointers, .... +sub writeParsed { + my ($style, $program) = @_; + use Data::Dumper; + my $OUT; + open($OUT, '>', $DEST) or die "Couldn't open $DEST for writing: $!"; + print $OUT $HEADER; + print $OUT '$DEFAULT = ' . Dump($program) . ";\n"; + print $OUT "1;\n"; + close($OUT); + return; } + +sub Dump { + my ($object) = @_; + my $r = ref $object; + if (!$r) { + $object =~ s/'/\\'/g; + return "'" . $object . "'"; } + elsif ($r eq 'ARRAY') { + return '[' . join(',', map { Dump($_); } @$object) . ']'; } + elsif ($r eq 'LaTeXML::BibTeX::BibStyle::StyCommand') { + return 'Cmd(' . Dump($object->getName) + . ', [' . join(',', map { Dump($_); } $object->getArguments) . '])'; } + elsif ($r eq 'LaTeXML::BibTeX::BibStyle::StyString') { + my $kind = $object->getKind; + my $value = $object->getValue; + if ($kind eq 'NUMBER') { return 'Nmb(' . Dump($value) . ')'; } + elsif ($kind eq 'QUOTE') { return 'Quo(' . Dump($value) . ')'; } + elsif ($kind eq 'LITERAL') { return 'Lit(' . Dump($value) . ')'; } + elsif ($kind eq 'REFERENCE') { return 'Ref(' . Dump($value) . ')'; } + elsif ($kind eq 'BLOCK') { return 'Blk(' . Dump($value) . ')'; } } +### return 'Str('.join(',',map { Dump($_); } $object->getKind, $object->getValue).')'; } + die "Con't know how to dump $object"; } + +BEGIN { + $HEADER = << 'EoHeader'; +# /=====================================================================\ # +# | Precompiled $style.bst | # +# | for LaTeXML | # +# |=====================================================================| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +# THIS IS A GENERATED FILE! DO NOT EDIT +package LaTeXML::BibTeX::BibStyle::Precompiled; +use strict; +use warnings; +use LaTeXML::BibTeX::BibStyle::StyCommand; +use LaTeXML::BibTeX::BibStyle::StyString; +sub Cmd { return LaTeXML::BibTeX::BibStyle::StyCommand->new( @_ ); } +sub Nmb { return LaTeXML::BibTeX::BibStyle::StyString->new('NUMBER',@_ ); } +sub Quo { return LaTeXML::BibTeX::BibStyle::StyString->new('QUOTE',@_ ); } +sub Lit { return LaTeXML::BibTeX::BibStyle::StyString->new('LITERAL',@_ ); } +sub Ref { return LaTeXML::BibTeX::BibStyle::StyString->new('REFERENCE',@_ ); } +sub Blk { return LaTeXML::BibTeX::BibStyle::StyString->new('BLOCK', @_ ); } +our $DEFAULT; +EoHeader +}