#!/usr/bin/env texlua

-- texlogsieve - filter and summarize LaTeX log files
--
-- Copyright (C) 2021 Nelson Lago <lago@ime.usp.br>
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program.  If not, see <https://www.gnu.org/licenses/>.
--
-- Code etc: https://gitlab.com/lago/texlogsieve

--[[

----------------
THE TeX LOG FILE
----------------

TeX uses three low-level commands for logging:

 1. \message -> outputs to both the console and the log file (most
    low-level TeX messages, such as open/close files, shipouts etc.
    behave *more or less* as if they were generated by \message)

 2. \write with an unallocated stream identifier (typically, 0) ->
    also outputs to both the console and the log file.

 2. \write with a negative stream identifier (typically, -1) -> outputs
    to the log file only

\write and \message behave differently:

 * Normally, \write appends a line feed character (LF) to the text, so
   a sequence of \write commands results in multiple lines.

 * \message checks whether the last thing that was sent out was another
   \message; if so, it adds a space character and outputs the text (both
   the previous text and the new one are on the same line, separated by
   a space), otherwise it just outputs the text (in this case, the new
   text is at the beginning of a line). Note, however, that there are
   some \message's that are not separated by spaces, such as "))".
   Also, in most cases (I could not figure out when this fails, but it
   happens often), if \message realizes the new text will not fit in
   the current line (the line would exceed max_print_line characters),
   instead of wrapping the line as usual, it may output a LF and start
   the text on a new line.

 * \write also checks if the last thing that was sent out was a message;
   if so, it sends LF before the text, so that it starts at the
   beginning of a line (it also sends LF after the text too, as always)

Therefore, in the console, text output with \write always appears on a
new line. A sequence of calls to \message also always starts on a new
line, but the texts of all of them appear on the same line, separated
by spaces.

However, things get messy in the log file. Basically, \message and
\write0 modify the same filedescriptor, while \write-1 modifies a
different filedescriptor. This means \message and \write0 are unaware
of \write-1 and vice-versa. As a result, the spaces and LFs that are
added according to what is output to the console get mixed up with what
is written only to the log file with \write-1. Therefore, there may be
unexpected empty lines or lines that start with a space character in
the log file.

The LaTeX command \GenericInfo uses \write-1, while \GenericWarning
uses \write0. TeX and LaTeX also define \wlog, which is an alias for
\write-1; LaTeX defines \typeout, which is an alias for \write0. Some
packages define their own aliases; for example, pgfcore.code.tex does
\def\pgf@typeout{\immediate\write0}, graphics.sty does \let\Gin@log\wlog,
etc. Package infwarerr provides a compatibility layer for the LaTeX
standard logging commands, so that they can be used both in LaTeX and
in plain TeX, as \@PackageInfo, \@ClassWarning etc.

With that in mind, we will consider that there are five kinds of message
in the LaTeX log file:

 * Ordinary messages -> messages that start at the beginning of the line
   and end at the end of the line (they are created with \write), such as

       Document Class: book 2019/08/27 v1.4j Standard LaTeX document class

       For additional information on amsmath, use the `?' option.

       \openout4 = `somefile'.

 * Short messages -> messages that may begin/end anywhere on a line,
   because the line they are in may contain multiple messages of
   this type (they are created with \message), such as

       ASCII Hyphenation patterns for American English

       ) (/usr/share/texlive/texmf-dist/tex/generic/iftex/iftex.sty
       -> these mean 'close last file' and 'open file .../iftex.sty'

       (/usr/share/texlive/texmf-dist/tex/latex/etoolbox/etoolbox.sty)
       -> these mean 'open file .../etoolbox.sty' and 'close last file'
          (which is etoolbox.sty, obviously)

       ))
       -> these mean 'close the last two files'

       [1] [2] [3]
       -> these mean 'shipout pages 1, 2, and 3'

 * Multiline messages -> messages that start at the beginning of a line,
   continue for a few lines and end at the end of a line (the programmer
   explicitly used multiple \write commands, a single \write command
   with embedded newline characters, or single/multiple \message
   commands with leading/trailing newline characters), such as

       Package biblatex Info: Automatic encoding selection.
       (biblatex)             Assuming data encoding 'utf8'.

       *************************************
       * Using libertinus math *
       *************************************

       **********************
       * No checksum found! *
       **********************
       (from ydoc-doc.sty)

 * "Best-effort" multiline messages -> similar to the previous ones,
   but created using multiple \message commands or a single one with
   embedded newline characters but no leading and/or trailing newline
   characters. They *usually* begin at the beginning of the line and
   end at the end of the line because, as mentioned before, TeX usually
   inserts a line break if the line would not fit otherwise. However,
   that is not always true, especially if max_print_line is large.
   Therefore, sometimes the message may begin in the middle of a line
   and/or the various lines may be strung together or wrapped.
   Examples:

       *****************************************************************
         GFS-Solomos style file by A. Tsolomitis
       *****************************************************************
       (from gfssolomos.sty)

       =============== Entering putframe ====================
       \pagegoal =635.97621pt, \pagetotal =368.07768pt.
       (from exam.cls)

 * Error messages TODO describe them here

To complicate things, TeX by default wraps (breaks) any line longer
than max_print_line characters (by default, 79). Since some messages
may be output together on a single line, even short messages may be
broken across lines. At the same time, there are quite a few ordinary
lines that in practice happen to be max_print_line characters long,
which makes detecting line wrapping a real challenge. Also, for no
apparent reason, LuaTeX wraps some lines at max_print_line characters
and others at max_print_line +1 characters. More: I have seen at least
two cases in which TeX "forgot" to wrap a line, and sometimes there is
a blank line between the wrapped line and its continuation line. And
even more! LuaTeX does not break a line in the middle of a multibyte
UTF-8 character. That is obviously a good idea, but it means some lines
may be broken at lengths smaller than max_print_line. While this may
seem rare, it can happen when parts of the document text are included
in the log, as is the case with over/underfull box messages.

So, if at all possible, it is a very good idea to set max_print_line
to a really large value (such as 100,000), effectively disabling line
wrapping. It was useful in the 1980s, but not anymore (your terminal
or editor wraps automatically).


----------------------
HOW THIS PROGRAM WORKS
----------------------

To read this section, pretend for a moment that TeX does not wrap long
lines.

We have a set of handlers, i.e., objects that process specific kinds
of message. Each handler checks if a given line matches some patterns
to decide whether it is supposed to process it (return true) or not
(return false).

There is a loop that, at each iteration, reads a new line from the log
file with moreData() and uses chooseHandler() to call each handler in
turn until one of them processes that line. After the right handler
does its thing, it sends the message to the output, erases this input
line and returns true, which causes the loop to restart, skipping the
remaining handlers.

The loop may behave a little differently in two cases:

 1. If the handler processes just a part of the input line, which may
    happen if the line contains multiple short messages, it removes
    the processed data from the line and leaves the rest. At the next
    iteration, no new data is read: the loop processes the remaining
    material from the same line.

 2. If the handler processes the input and expects specific lines of
    content to follow, it can preset itself as the next handler,
    temporarily bypassing the choice step.

When the line (or part of it) is not recognized by any handler, it is
appended to unrecognizedBuffer. We may sometimes concatenate multiple
text chunks together here because, during processing, we may break the
line into chunks when in fact it comprises a single message. In any
case, we know for sure that the message (or messages) currently in
unrecognizedBuffer is complete when either (1) we identify the next
short message in the line or (2) we proceed to the next input line.
When either happens, we send the buffer to the output.

We know most messages, but not all, start at the beginning of a line.
Therefore, we keep two sets of handlers: beginningOfLineHandlers and
anywhereHandlers. We use the boolean atBeginningOfLine to know when
we may skip trying the beginningOfLineHandlers.

We mentioned that the handler "sends the message to the output". The
handler actually creates a Message object and calls the dispatch()
function to send that object to a coroutine that handles output. The
coroutine aggregates messages by page, generates summaries for some
kinds of message (for example, it may list all undefined references
together) and prints out the report in different formats.

Sometimes we need to be able to check the content of the next line
to decide what to do about the current line (for example, different
messages may begin with a line like "**********"). So, we do not only
read one line from the input at each iteration; instead, we have a
buffer with the next few lines. When the current line has been
completely processed, moreData() simply refills the buffer and calls
Lines:gotoNextLine(), which makes the next line (line 1) become the
current line (line 0), the following line (line 2) become the next
line (line 1) etc.

That would be all if TeX did not wrap long lines, but it does. To unwrap
lines when needed, we (1) check that the line is max_print_lines long;
if so, we (2) check whether the next line is the beginning of a known
message; if it is not, we (3) check whether unwrapping lines makes us
recognize a message that was not identified without unwrapping. Because
of 3, we do this in each of the handlers and not beforehand. To unwrap
a line, we simply join the current line with the next one from the
buffer.

Note that, if you reconfigure the variable max_print_line to a value
larger than 9999 (which is a good thing to do), this program assumes
(quite reasonably) that there is no line wrapping.


------------
THE HANDLERS
------------

We want to be able to explicitly recognize as much messages as possible
(and leave the least amount possible for the unrecognizedBuffer) for at
least three reasons:

 1. A character such as "(", ")", "[", or "]" in an unrecognized message
    may confuse the program (these chars may indicate open/close file
    and begin/end shipout). If it is part of a known message, it will
    not be mistaken for an open/close file etc.

 2. By default, TeX wraps lines longer than max_print_line characters,
    and unwrapping them involves making sure that the following line
    is not the start of a new message. For this to be reliable,
    unknown messages should be kept to a minimum.

 3. We can assign severity levels to known messages; unknown messages
    must always be handled as high-severity, polluting the output.

At each iteration, chooseHandler() calls doit() for every handler. The
handler returns true to signal that chooseHandler() should proceed to
the next iteration. It does that if it did "something" to change the
status for the next iteration:

 * It processed a complete message in this iteration (i.e., the whole
   message was contained in the current line);

 * It processed the message partially and defined nextHandler;

 * It finalized the processing of a previous partial message (i.e., it
   realized the message has ended). This only happens if the handler
   was previously set as nextHandler. When this happens, it sometimes
   does nothing with the content of the current line, only outputs the
   complete message. Still, when the loop restarts, it will no longer
   be the nextHandler (and that is "something").

A handler must provide:

1. A doit() method that returns true or false to indicate whether it
   actually did something with the current line or not.

2. A canDoit(position) method that returns true or false to indicate
   whether the handler can/should process the line given by "position"
   (we use this to identify wrapped lines, as explained later).

3. An init() method to do any necessary setup after the command line
   has been read (it is ok to do nothing). The easiest way to do this
   is to "inherit" from HandlerPrototype.

4. For handlers that deal with messages that can appear in the middle
   of a line, a lookahead(position) method to indicate whether there is
   a message that the handler can/should process in the line indicated
   by "position", even if there is something else in the line before
   that message.

Besides true/false, canDoit() also returns a table with extra data. In
some cases, this table is empty; in others, the handler's doit() method
knows what to do with it. There are two places outside the handler
itself where this data is used:

1. In handleUnrecognizedMessage(), where we call lookahead() and use
   the value of "first" that should be embedded in this table.

2. In Lines:noHandlersForNextLine(), where we treat openParensHandler
   and openSquareBracketHandler specially and read file/page data from
   them.

A simple handler:
-----------------

  exampleHandler = {}
  exampleHandler.pattern = '^%s*L3 programming layer %b<> xparse %b<>'

  function exampleHandler:init()
  end

  function exampleHandler:canDoit(position)
      local line
      if position == nil then position = 0 end
      line = Lines:get(position)

      local first, last = string.find(line, self.pattern)
      if first == nil then return false
      return true, {first = first, last = last}
  end

  function exampleHandler:doit()
      local myTurn, data = self:canDoit()
      if not myTurn then return false end

      flushUnrecognizedMessages()
      local msg = Message:new()
      msg.severity = DEBUG
      msg.content = string.sub(Lines.current, 1, data.last)
      dispatch(msg)
      Lines:handledChars(data.last)
      return true
  end

  function exampleHandler:lookahead(position)
      local tmp = self.pattern
      self.pattern = string.sub(self.pattern, 2) -- remove leading '^'
      local result, data = self:canDoit()
      self.pattern = tmp
      return result, data
  end

There are two special handlers, which we use as prototypes
(https://www.lua.org/pil/16.1.html ) and derive some other handlers
from:

 - stringsHandler - handles a list of predefined multiline strings
   that may or may not begin at the beginning of a line and may or may
   not end at the end of a line. We identify where the last line of
   the message ends and remove that from the input line, leaving the
   rest for the next handler. In general, the pattern we look for in
   each line should match the whole line and should not end with
   something like ".*", unless we are absolutely sure that (1) each line
   always ends at the end of the line and (2) the line is short enough
   that it is never wrapped (which also implies that the first line
   always starts at the start of the line). This handler is quite
   complex because it has to deal with many different scenarios. We
   derive other handlers from the basic prototype so that we can assign
   different severity levels to each one. Derived handlers differ from
   the prototype only by severity level and the set of patterns to
   search for.

 - genericLatexHandler -> handles the multiline messages generated by
   the \PackageInfo, \ClassWarning etc. LaTeX commands. The handler
   does not need to know in advance the text for all these messages;
   it looks for generic patterns instead and extracts from the message
   itself the name of the package and severity level. It is able to
   identify multiline messages by checking if the following lines are
   prefixed with a specific pattern, so it can also handle messages
   with an unknown number of lines. We derive other handlers from the
   basic prototype because, for each kind of message, we use a different
   set of pattern captures, and we need to treat these differently.
   Derived handlers differ from the prototype by the set of patterns
   to search for and by the unpackData() method, which deals with the
   specific pattern captures.


----------------
UNWRAPPING LINES
----------------

As mentioned, each handler has a canDoit(position) method, where
"position" is the line number in the Lines input buffer (0 is the
current line, 1 is the next line etc.). As expected, doit() calls
canDoit() to check whether it should proceed of not. However, that
is not all: if canDoit() fails to find a match, it checks whether the
line might be a wrapped line. To detect a wrapped line, canDoit()
uses Lines:seemsWrapped() to do three things:

1. Check that the line is the "right" size (in Lines:wrappingLength())

2. Check that the next line is not the beginning of a known message
   (in Lines:noHandlersForNextLine())

3. Check whether unwrapping the line actually gives us something, i.e.,
   the unwrapped line matches something that its two separate parts
   did not.

The problem is in step (2): this entails calling canDoit() from all
handlers on the following line, which means that it may be called many
times and may even call itself on a different line. Therefore, it is
essential that canDoit() have no side effects, i.e., it should not set
any state besides the return values. For the same reason, canDoit()
cannot alter the content of the Lines buffer when it tries to unwrap a
line; it should only use temporary variables instead (not only that:
if unwrapping does not yield a match, we do not want to do it either).

canDoit() may, however, return a "hint" about the line wrapping - either
the text or the pattern that finally matched. Some handlers do that and,
in doit(), take the string or pattern found by canDoit() and unwrap
lines until finding a match to that string or pattern. Others cannot do
this due to various reasons and need to repeat the work already done
by canDoit() (the code is different, however).

Note also that some handlers, such as underOverFullBoxHandler and
genericLatexHandler, cannot do (3), as they do not know in advance
how the end of a given message should look like. The bottom line is,
line unwrapping is done in many different ways according to context.

Finally, this all means that we may call canDoit() from all handlers on
a given line many times. This gets really bad with stringsHandler: for
a sequence of k consecutive lines that are max_print_line long, this
handler alone is O(n^{k+1}), where n is the number of patterns that it
checks for (around 40). A previous implementation proved this to be
impractical, so we work around this problem with memoization.


-----------------------------------------
DETAILS ABOUT UNDER/OVERFULL BOX MESSAGES
-----------------------------------------

These are actually several different messages:

Overfull \[hv]box (Npt too wide) SOMEWHERE
Underfull \[hv]box (badness N) SOMEWHERE

Possible SOMEWHEREs:

1. detected at line N
   -> this is something like a makebox (horizontal) or parbox (vertical)
      with an explicit size argument. For hboxes, this is followed by
      the offending text.

2. has occurred while \output is active
   -> If horizontal, this is probably in a header, footer or something
      similar; if vertical, the vertical glues got too stretched

3. in alignment at lines LINE NUMBERS
   -> the problematic box is part of a tabular or math alignment
      environment. The lines correspond to the whole align structure,
      not only the problematic box. This should only appear as an
      horizontal problem. This is followed by the offending text, but
      it is more often than not just a bunch of "[]" to indicate nested
      boxes

4. in paragraph at lines LINE NUMBERS
   -> "Normal" text. This also only appears as an horizontal problem.
      This is followed by the offending text, which may include a
      few "[]" for whatsits, glues etc. In particular, the text often
      begins with "[]", indicating the left margin glue.

In the log file, all under/overfull box messages are followed by a
description of the boxes involved. This is *not* normally included in
the console output (but it may be, depending on \tracingonline). The
level of detail of this description is controlled by \showboxdepth
and \showboxbreadth. The default for these in LaTeX is -1, which means
this description is omitted and replaced by "[]", so it looks like this:

Underfull \vbox (badness 10000) detected at line 128
 []   <-- this is the description

If the message includes the offending text, the description comes
after it:

Underfull \hbox (badness 3417) in paragraph at lines 128--128
 []\T1/LibertinusSerif-TLF/b/n/14.4 (+20) Some document text...
 []   <-- this is the description

If there is no offending text, the description may appear in the same
line as the under/overfull box message (both are \message's). The
offending text, if any, always starts at the beginning of a line and
ends at the end of a line.

About the description: https://tex.stackexchange.com/a/367589/217608

This all means that handling these messages from a pipe is different
than from the log file, because in the log file you know there will
be a "[]" after the message. What we do here is check whether that
string is there; if it is, we remove it.

under/overfull messages that do not include the offending text are
\message's and, therefore, there may be extra text (such as a shipout)
on the same line.

--]]


--[[ ##################################################################### ]]--
--[[ ################ INIT, MAIN LOOP, CHOOSING HANDLER ################## ]]--
--[[ ##################################################################### ]]--

   DEBUG = 0
    INFO = 1
 WARNING = 2
CRITICAL = 3
 UNKNOWN = 4

function main(arg)
  processCommandLine(arg)
  initializeGlobals()
  initializeKpse()
  registerHandlers()
  registerSummaries()
  convertFilterStringsToPatterns()

  while moreData() do
      if nextHandler == nil then
          chooseHandler()
      else
          handler = nextHandler
          nextHandler = nil
          handler:doit()
      end
  end

  -- dispatch remaining messages, if any
  epilogueHandler:flush()
  flushUnrecognizedMessages()
  dispatch(nil) -- end the output coroutine
end

function moreData()
  -- if there is remaining data from the previous iteration,
  -- we leave everything as-is for it to be processed now
  local tmp = Lines.current
  if tmp ~= nil and string.len(tmp) > 0 then return true end

  -- Refill the buffer. A simple experiment suggests 8 lines
  -- is enough, but why not use a higher value?
  while Lines:numLines() < 15 do
      tmp = logfile:read("*line")
      if tmp == nil then break end
      -- We *need* to remove blank lines here because
      -- sometimes a wrapped line is followed by a blank
      -- line, which messes our detection of wrapped lines.
      if string.len(tmp) > 0 then Lines:append(tmp) end
  end

  -- proceed to the next line
  flushUnrecognizedMessages()
  Lines:gotoNextLine()

  return Lines.current ~= nil
end

--[[
chooseHandler() never tries to process more than one message in a single
iteration for at least three reasons:

 * There may be no more data available on the current line, so we
   need to call moreData();

 * Maybe the next handler is one that we have already tried in this
   iteration; skipping it and trying others may fail;

 * Maybe the handler that last processed the data predefined the next
   handler, and we should not interfere with that.
--]]

function chooseHandler()
  -- Some messages can only appear at the beginning of a line
  if Lines.atBeginningOfLine then
      for _, candidateHandler in ipairs(beginningOfLineHandlers) do
          if candidateHandler:doit() then return end
      end
  end

  -- Others may appear anywhere
  for _, candidateHandler in ipairs(anywhereHandlers) do
      if candidateHandler:doit() then return end
  end

  -- No handler succeeded, which means this is an unrecognized message
  -- (or a fragment of one); Add to unrecognizedBuffer.
  handleUnrecognizedMessage()
end

function handleUnrecognizedMessage()
  -- Before sending this to the unrecognizedBuffer, check if
  -- there is another known message later on this same line.

  local last = string.len(Lines.current)

  for _, handler in ipairs(anywhereHandlers) do
      local match, data = handler:lookahead()
      if match and data.first -1 < last then last = data.first -1 end
  end

  unrecognizedBuffer = unrecognizedBuffer .. string.sub(Lines.current, 1, last)
  Lines:handledChars(last)
end

function flushUnrecognizedMessages()
  unrecognizedBuffer = trim(unrecognizedBuffer)
  if unrecognizedBuffer == "" then return end

  local msg = Message:new()
  msg.content = unrecognizedBuffer
  dispatch(msg)
  unrecognizedBuffer = ""
end

-- Setup initial status (lots of globals, sue me)
function initializeGlobals()

  -- Chunks of text that were not recognized by any handler
  unrecognizedBuffer = ""

  -- The user may choose to silence some files. When one of these is
  -- opened/closed, this is set to true or false accordingly. The value
  -- is then used by Message:new()
  mute = false

  -- List of files that TeX had open at a given time during processing
  openFiles = Stack:new()

  -- "List" of currently active shipouts. There is only ever one shipout
  -- active at any time, but we borrow the design of openFiles because
  -- there may be "[" and "]" characters that do not correspond to any
  -- shipout, so we use this to keep track of them.
  shipouts = Stack:new()

  -- Counter, so we know the physical page number
  numShipouts = 0

  -- map physicalPage (from numShipouts) to latexPage (LaTeX counter)
  latexPages = {}

  -- After printing each message, the output coroutine stores them
  -- in currentPageMessages. When it receives a shipout message,
  -- it traverses currentPageMessages adding the page number it
  -- just learned about to each of the messages, calls :toSummary()
  -- for each of them and and clears currentPageMessages (we do this
  -- so the data in the summaries may include the page numbers). The
  -- objects representing the summary for each kind of message are
  -- stored in summaries, so after all messages we can just traverse
  -- this list calling :toString() and get all the summaries. The
  -- summaries table is populated by registerSummaries().
  currentPageMessages = {}
  summaries = {}

  -- When the same message appears several times, we only output it
  -- once, thanks to this table. This is used by showMessage()
  alreadySeen = {}

  -- All handlers should be in either of these. They are populated by
  -- registerHandlers().
  beginningOfLineHandlers = {}
  anywhereHandlers = {}

  -- Does the log file have wrapped lines?
  -- This may be changed by initializeKpse().
  badLogFile = true
end

function initializeKpse()
  -- In texlua, the texconfig table (the table that records some TeX
  -- config variables) is not initialized automatically; we need to
  -- call this to initialize it so we can read "max_print_line". If
  -- I understand things correctly, the name used here affects the
  -- loaded configuration options: using a name such as "texlogsieve"
  -- would allow us to add custom options to texmf.cnf. But since
  -- all we want to do is search for files and read the value of
  -- "max_print_line", let's just pretend we are luatex.
  kpse.set_program_name("luatex")

  max_print_line = tonumber(kpse.var_value("max_print_line"))
  if max_print_line ~= nil and max_print_line > 9999 then
      badLogfile = false
  else
      badLogfile = true
  end
end

function registerHandlers()
  table.insert(beginningOfLineHandlers, citationHandler)
  table.insert(beginningOfLineHandlers, referenceHandler)
  table.insert(beginningOfLineHandlers, labelHandler)
  table.insert(beginningOfLineHandlers, genericLatexHandler)
  table.insert(beginningOfLineHandlers, latex23MessageHandler)
  table.insert(beginningOfLineHandlers, genericLatexVariantHandler)
  table.insert(beginningOfLineHandlers, providesHandler)
  table.insert(beginningOfLineHandlers, geometryDetailsHandler)
  table.insert(beginningOfLineHandlers, epilogueHandler)
  table.insert(beginningOfLineHandlers, underOverFullBoxHandler)
  table.insert(beginningOfLineHandlers, utf8FontMapHandler)
  table.insert(beginningOfLineHandlers, missingCharHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineDebugStringsHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineInfoStringsHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineWarningStringsHandler)
  table.insert(anywhereHandlers, anywhereDebugStringsHandler)
  table.insert(anywhereHandlers, anywhereInfoStringsHandler)
  table.insert(anywhereHandlers, anywhereWarningStringsHandler)
  table.insert(anywhereHandlers, openParensHandler)
  table.insert(anywhereHandlers, closeParensHandler)
  table.insert(anywhereHandlers, openSquareBracketHandler)
  table.insert(anywhereHandlers, closeSquareBracketHandler)

  for _, handler in ipairs(beginningOfLineHandlers) do
      handler:init()
  end

  for _, handler in ipairs(anywhereHandlers) do
      handler:init()
  end
end

function registerSummaries()
  table.insert(summaries, underOverSummary)
  table.insert(summaries, missingCharSummary)
  table.insert(summaries, repetitionsSummary)
  table.insert(summaries, citationsSummary)
  table.insert(summaries, referencesSummary)
  table.insert(summaries, labelsSummary)
end

function convertFilterStringsToPatterns()
  local tmp = {}
  for _, pattern in ipairs(SEMISILENCE_FILES) do
      table.insert(tmp, globtopattern(pattern))
  end
  SEMISILENCE_FILES = tmp

  tmp = {}
  for _, pattern in ipairs(SILENCE_FILES_RECURSIVE) do
        table.insert(tmp, globtopattern(pattern))
  end
  SILENCE_FILES_RECURSIVE = tmp

  tmp = {}
  for _, str in ipairs(SILENCE_STRINGS) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  SILENCE_STRINGS = tmp

  tmp = {}
  for _, str in ipairs(SILENCE_PKGS) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  SILENCE_PKGS = tmp
end

function processCommandLine(args)
  HEARTBEAT = true
  PAGE_DELAY = true
  ONLY_SUMMARY = false
  SHOW_SUMMARY = true
  SHOW_SHIPOUTS = false
  RAW = false
  SILENCE_REPETITIONS = true
  MINLEVEL = WARNING

  SILENCE_STRINGS = {}
  SILENCE_PKGS = {} -- just the package names
  SEMISILENCE_FILES = {} -- filenames (without leading path), file globs work
  SILENCE_FILES_RECURSIVE = {} -- same


  -- "-l level -c configFile"
  local optionsWithArgs = "lc"
  local vars = simpleGetopt(args, optionsWithArgs)

  --help
  -- "-h"
  if vars.help or vars.h then
      local msg = [[
Usage: texlogsieve [OPTION]... [INPUT FILE]
texlogsieve reads a LaTeX log file (or the standard input), filters
out less relevant messages, and displays a summary report.

Options:
  --page-delay, --no-page-delay        enable/disable grouping
                                       messages by page before display
  --summary, --no-summary              enable/disable final summary
  --only-summary                       no filtering, only final summary
  --shipouts, --no-shipouts            enable/disable reporting shipouts
  --repetitions, --no-repetitions      allow/prevent repeated messages
  --heartbeat, --no-heartbeat          enable/disable progress gauge
  -l LEVEL, --minlevel=LEVEL           filter out messages with severity
                                       level lower than [LEVEL]. Valid
                                       levels are DEBUG, INFO, WARNING,
                                       CRITICAL, and UNKNOWN
  -u, --unwrap-only                    no filtering or summary, only
                                       unwrap long, wrapped lines
  --silence-package=PKGNAME            suppress messages from package
                                       PKGNAME; can be used multiple times
  --silence-string=EXCERPT             suppress messages containing text
                                       EXCERPT; can be used multiple times
  --silence-file=FILENAME              suppress messages generated during
                                       processing of FILENAME; can be used
                                       multiple times
  --semisilence-file=FILENAME          similar to --silence-file, but not
                                       recursive
  --add-debug-message=MESSAGE          add new recognizable debug message
  --add-info-message=MESSAGE           add new recognizable info message
  --add-warning-message=MESSAGE        add new recognizable warning message
  -c cfgfile, --config-file=cfgfile    read options from config file
  -h, --help                           give this help list
  --version                            print program version]]

      for _, line in ipairs(linesToTable(msg)) do print(line) end
      os.exit(0)
  end

  --version
  if vars.version then
      print("texlogsieve 1.0.0-beta-1")
      print("Copyright (C) 2021 Nelson Lago <lago@ime.usp.br>")
      print("License GPLv3+: GNU GPL version 3 or later "
            .. "<https://gnu.org/licenses/gpl.html>.")
      print("This is free software: you are free to change "
            .. "and redistribute it.")
      print("There is NO WARRANTY, to the extent permitted by law.")
      os.exit(0)
  end

  --config-file=file
  -- "-c file"
  local configFileName
  if vars['config-file'] ~= nil then configFileName = vars['config-file'] end
  if vars.c ~= nil then configFileName = vars.c end
  if configFileName ~= nil then
      configFileName = configFileName[1]
      local filevars = processConfigFile(configFileName)

      -- merge filevars with vars; vars has precedence
      for k, v in pairs(vars) do
          if type(v) == "boolean" then
              filevars[k] = v
          elseif filevars[k] == nil then
              filevars[k] = v
          else
              -- the value is a table, so append
              for _, item in ipairs(v) do
                  table.insert(filevars[k], item)
              end
          end
      end

      -- use the merged values
      vars = filevars
  end

  --unwrap-only
  -- "-u"
  if vars['unwrap-only'] or vars.u then
      -- these may be overriden below, so order matters
      RAW = true
      SHOW_SUMMARY = false
      PAGE_DELAY = false
      SHOW_SHIPOUTS = true
      SILENCE_REPETITIONS = false
      MINLEVEL = DEBUG
  end

  --page-delay
  --no-page-delay
  --page-delay=true/false
  if vars['no-page-delay']
          or vars['page-delay'] ~= nil and not vars['page-delay'] then

      PAGE_DELAY = false
      SHOW_SHIPOUTS = true -- this may be overriden below
  end
  if vars['page-delay'] then PAGE_DELAY = true end

  --only-summary
  if vars['only-summary'] then ONLY_SUMMARY = true end

  --no-summary
  --summary
  --summary=true/false
  if vars['no-summary'] or vars.summary ~= nil and not vars.summary then
      SHOW_SUMMARY = false
      SILENCE_REPETITIONS = false
  end
  if vars.summary then SHOW_SUMMARY = true end

  --no-shipouts
  --shipouts
  --shipouts=true/false
  if vars['no-shipouts'] or vars.shipouts ~= nil and not vars.shipouts then
      SHOW_SHIPOUTS = false
  end
  if vars.shipouts then SHOW_SHIPOUTS = true end

  --minlevel
  -- "-l"
  local level
  if vars.minlevel ~= nil then level = vars.minlevel end
  if vars.l ~= nil then level = vars.l end

  if level ~= nil then
      level = string.lower(level[1])
      if     level == "debug"    then MINLEVEL = DEBUG
      elseif level == "info"     then MINLEVEL = INFO
      elseif level == "warning"  then MINLEVEL = WARNING
      elseif level == "critical" then MINLEVEL = CRITICAL
      else                            MINLEVEL = UNKNOWN
      end
  end

  --no-repetitions
  --repetitions
  --repetitions=true/false
  if vars['no-repetitions']
                    or vars.repetitions ~= nil and not vars.repetitions then

      SILENCE_REPETITIONS = true
  end
  if vars.repetitions then SILENCE_REPETITIONS = false end

  --no-heartbeat
  --heartbeat
  --heartbeat=true/false
  if vars['no-heartbeat'] or vars.heartbeat ~= nil and not vars.heartbeat then
      HEARTBEAT = false
  end
  if vars.heartbeat then HEARTBEAT = true end

  if vars.filename == nil then
      logfile = io.stdin
  else
      logfile = assert(io.open(vars.filename, "r"))
  end

  if vars['silence-string'] then SILENCE_STRINGS = vars['silence-string'] end

  if vars['silence-package'] then SILENCE_PKGS = vars['silence-package'] end

  if vars['silence-file'] then SILENCE_FILES_RECURSIVE =
                                    vars['silence-file'] end

  if vars['semisilence-file'] then SEMISILENCE_FILES =
                                    vars['semisilence-file'] end

  if vars['add-debug-message'] then
      for _, msg in ipairs(vars['add-debug-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '\n')
          table.insert(anywhereDebugStringsHandler.patterns, pat)
      end
  end

  if vars['add-info-message'] then
      for _, msg in ipairs(vars['add-info-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '\n')
          table.insert(anywhereInfoStringsHandler.patterns, msg)
      end
  end

  if vars['add-warning-message'] then
      for _, msg in ipairs(vars['add-warning-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '\n')
          table.insert(anywhereWarningStringsHandler.patterns, msg)
      end
  end
end

function processConfigFile(filename)
    configfile = assert(io.open(filename, "r"))
    local fileOptions = {}

    while true do
        local line = configfile:read("*line")
        if line == nil then break end

        line = trim(line)
        local first = string.find(line, '^#')
        if first == nil and string.len(line) > 0 then

            local equals = string.find(line, '=', 1, true)
            if equals ~= nil then
                optname = string.sub(line, 1, equals -1)
                optval = string.sub(line, equals +1)
                optname = trim(optname)
                optval = trim(optval)
            else
                optname = line
                optval = true
            end
            simpleGetoptStoreVal(fileOptions, optname, optval)
        end
    end

    return fileOptions
end


--[[ ##################################################################### ]]--
--[[ ################# OUTPUT COROUTINE AND FORMATTING ################### ]]--
--[[ ##################################################################### ]]--

outputCoroutine = coroutine.create(
  function(msg)
      while msg ~= nil do
          processMessage(msg)
          msg = coroutine.yield()
      end
      finishProcessingMessages()
  end
)

dispatch = function(msg) coroutine.resume(outputCoroutine, msg) end

function processMessage(msg)
  if ONLY_SUMMARY or PAGE_DELAY then
      heartbeat:tick()
  else
      showMessage(msg)
  end

  -- aggregate until shipout
  table.insert(currentPageMessages, msg)

  if msg.shipout then
      heartbeat:stop()

      for _, tmp in ipairs(currentPageMessages) do
          tmp.physicalPage = msg.physicalPage
          tmp:toSummary()
      end

      if PAGE_DELAY and not ONLY_SUMMARY then
          showPageMessages()
      end

      currentPageMessages = {}
  end
end

function finishProcessingMessages()
  heartbeat:stop()

  -- messages after the last shipout
  if PAGE_DELAY and not ONLY_SUMMARY then
      print("")
      print("After last page:")
      print("")
      showPageMessages()
  end

  -- now, the summaries
  if SHOW_SUMMARY then showSummary() end
end

function showMessage(msg)
  local formatted = msg:toString()
  if string.len(trim(formatted)) == 0 then return end

  if RAW then
      for _, line in ipairs(linesToTable(formatted)) do print(line) end
      return
  end

  local pageinfo = ""
  if msg.physicalPage ~= nil then
      pageinfo = 'pg ' .. msg.physicalPage .. ': '
  end
  local spaces = string.rep(" ", string.len(pageinfo))

  if not SILENCE_REPETITIONS then
      local lines = linesToTable(formatted)
      for _, line in ipairs(lines) do
          print(pageinfo .. line)
          pageinfo = spaces
      end

      return
  end

  if alreadySeen[formatted] == nil then
      alreadySeen[formatted] = {msg}
      local lines = linesToTable(formatted)
      for _, line in ipairs(lines) do
          print(pageinfo .. line)
          pageinfo = spaces
      end
  else
      table.insert(alreadySeen[formatted], msg)
  end
end

function showPageMessages()
  for _, msg in ipairs(currentPageMessages) do
      showMessage(msg)
  end
end

function showSummary()
  if not ONLY_SUMMARY then for i = 1, 5 do print("") end end

  for _, summary in ipairs(summaries) do
      local formatted = summary:toString()
      local prefix = ""
      if string.len(trim(formatted)) > 0 then
          for _, line in ipairs(linesToTable(formatted)) do
              print(prefix .. line)
              prefix = '    '
          end
          print("")
      end
  end
end

heartbeat = {}
heartbeat.chars = {'/', '-', '\\', '|'}
heartbeat.idx = 0
heartbeat.cnt = 0
heartbeat.startline = true

function heartbeat:tick()
  if not HEARTBEAT then return end

  if self.cnt % 5 == 0 then
      if self.startline then
          self.startline = false
      else
          io.stderr:write('\b')
      end
      local i = self.idx %4 +1
      io.stderr:write(self.chars[i])
      self.idx = self.idx +1
  end
  self.cnt = self.cnt +1
end

function heartbeat:stop()
  if not HEARTBEAT or self.startline then return end

  io.stderr:write('\b \b')
  self.startline = true
end


--[[ ##################################################################### ]]--
--[[ ########################### THE HANDLERS ############################ ]]--
--[[ ##################################################################### ]]--

-- datepat and filepat will come in handy later on.
--
-- Note that, in some cases, it may be useful to start filepat with '^[%.]?/'.
--
-- filepat will fail:
--
-- 1. If the path or filename includes weird characters, such as ":" or "|"
-- 2. If the file has no extension or the extension has only one character
-- 3. If the extension includes "-", "_", or spaces
-- 4. If filepat should match the end of the message and the matching line is
--    wrapped in the middle of the file extension, for example "myfile.pd\nf"
--    (but we have a hack in unwrapUntilPatternMatches() to work around that)
--
-- More importantly, filepat allows for spaces and multiple dots in filenames,
-- but this means it may match something that is not really a filename. Don't
-- blindly trust it! We do not use this to detect open/close file; for that,
-- check guessFilename().

datepat = '%d%d%d%d[/%-%.]%d%d[/%-%.]%d%d'

filepat = '%a?[:]?'
          .. '[^%%:;,%=%*%?%|%&%$%#%!%@"%`\'%<%>%[%]%{%}]+'
          .. '%.'
          .. '[^/ %-%_%.%%:;,%=%*%?%|%&%$%#%!%@"%`\'%<%>%[%]%{%}]'
          .. '[^/ %-%_%.%%:;,%=%*%?%|%&%$%#%!%@"%`\'%<%>%[%]%{%}]+'


-------------------------------------------------------------------------------
-- HandlerPrototype
-------------------------------------------------------------------------------

HandlerPrototype = {}

function HandlerPrototype:new()
    local o = {}
    setmetatable(o, self)
    self.__index = self
    return o
end

-- Some handlers need an init() method; by inheriting from this,
-- we can simply iterate over all of them calling init().
function HandlerPrototype:init()
end

-- Only some of the handlers use this implementation
function HandlerPrototype:unwrapLines()
  while Lines:seemsWrapped() do Lines:unwrapOneLine() end
end

function HandlerPrototype:newMessage()
  return Message:new()
end


-------------------------------------------------------------------------------
-- epilogueHandler
--
-- This handles the generic messages at the end of each LaTeX run. We could
-- handle each one with stringsHandler, but this handler allows us to treat
-- the whole group of lines together, which means we do not need to have
-- dedicated rules for each line in them. Also, there are some lines here
-- that are not wrapped as the rest.
-------------------------------------------------------------------------------

epilogueHandler = HandlerPrototype:new()

function epilogueHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  -- This appears in the logfile but not on stdout
  local _, last = string.find(line, "^Here is how much")
  if last == nil then
      -- This appears on stdout (and in the log, of course)
      _, last = string.find(line,
              "^%(see the transcript file for additional information%)")
  else
      last = string.len(line)
  end

  if last == nil then
      return false, {}
  else
      return true, {last = last}
  end
end

function epilogueHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = self:newMessage()
  self.message.content = string.sub(Lines.current, 1, data.last)
  self.message.severity = DEBUG
  Lines:handledChars(data.last)
  self.doit = self.handleOtherLines
  nextHandler = self
  self.processingFilelist = false
  return true
end

-- We need to "manually" unwrap the file list because some
-- lines are wrapped at lengths different from max_print_line
function epilogueHandler:handleOtherLines()
  local first = string.find(Lines.current, '^Output written')
  if first ~= nil then
      self.processingFilelist = false
      self.message.content = '\n' .. self.message.content
  end

  if self.processingFilelist then
      self.message.content = self.message.content .. Lines.current
  else
      self:unwrapLines()
      self.message.content = self.message.content .. '\n' .. Lines.current
  end

  first = string.find(Lines.current, '^[%<%{]')
  if first ~= nil then self.processingFilelist = true end

  Lines:handledChars()
  nextHandler = self
  return true
end

-- We do not know when the epilogue ends, so main()
-- calls this after the last line is read
function epilogueHandler:flush()
    if self.message ~= nil then dispatch(self.message) end
end


-------------------------------------------------------------------------------
-- underOverFullBoxHandler
--
-- Handles under/overfull multiline messages. There are usually important,
-- so we want to be able to filter them specifically and also to present
-- a summary about them. Besides that, they may contain spurious "(", ")",
-- "[", and "]" characters that might confuse the program if left as
-- "unrecognized".
-------------------------------------------------------------------------------

underOverFullBoxHandler = HandlerPrototype:new()

function underOverFullBoxHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local basePattern = "^([UO][nv][de][e]?r)full \\(.)box (%b())"
  local first, last, underover,
        verthoriz, amount = string.find(line, basePattern)

  if first == nil then
      return false, {}
  else
      return true, {underover = underover, verthoriz = verthoriz,
                    amount = amount, last = last}
  end
end

function underOverFullBoxHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = underOverMessage:new()
  self.message.content = string.sub(Lines.current, 1, data.last)
  self.message.underover = data.underover
  self.message.verthoriz = data.verthoriz
  self.message.amount = data.amount
  self.message.severity = WARNING
  local first = string.find(data.amount, 'badness 10000')
  if first ~= nil then self.message.severity = CRITICAL end
  Lines:handledChars(data.last)

  self.doit = self.handleClosing

  _, last = string.find(Lines.current, "has occurred while \\output is active")
  if last == nil then
      _, last = string.find(Lines.current, "in %S+ at lines %d+%-%-%d+")
      if last == nil then
          _, last = string.find(Lines.current, "detected at line %d+")
          if last ~= nil and data.verthoriz == 'h' then
              self.doit = self.handleOffendingText
          end
      else
          self.doit = self.handleOffendingText
      end
  end

  if last == nil then
      io.stderr:write("    texlogsieve: parsing error\n")
      self.doit = self.handleFirstLine
      dispatch(self.message)
      return true
  end

  self.message.content = self.message.content
                         .. string.sub(Lines.current, 1, last)

  Lines:handledChars(last)
  nextHandler = self

  return true
end

underOverFullBoxHandler.doit = underOverFullBoxHandler.handleFirstLine

function underOverFullBoxHandler:handleOffendingText()
  self:unwrapLines()
  self.message.failedText = Lines.current
  Lines:handledChars()
  self.doit = self.handleClosing
  nextHandler = self
  return true
end

function underOverFullBoxHandler:handleClosing()
  local _, last, closing = string.find(Lines.current, '^%s*(%b[])%s*$')
  if last ~= nil then
      Lines:handledChars(last)
      self.message.closing = closing
  end

  dispatch(self.message)
  self.doit = self.handleFirstLine
  return true
end


-------------------------------------------------------------------------------
-- stringsHandler
--
-- This is the most complex handler. It deals with predefined strings that
-- may span multiple lines. The message may start anywhere on the line (this
-- handler can be in anywhereHandlers) and may end before the end of the
-- line. In fact, depending on max_print_line, it is even possible that the
-- many lines of the message are concatenated in a single line in the log
-- or, conversely, that some lines are wrapped. So, for example, the
-- developer may have envisioned a message like
--
-- ********************
-- *    Hi there!     *
-- ********************
--
-- but this might turn to
--
-- ******************** *    Hi there!     * ********************
--
-- or
--
-- ******************
-- **
-- *    Hi there!    
--  *
-- ******************
-- **
--
-- or even
--
-- ******************
-- ** *    Hi there! 
--     * ************
-- ********
--
-- So, what we do here is to consider every line in the search pattern as a
-- segment of the message that may start in the middle of a line and/or may
-- be wrapped at the end of the line. We match each of these segments in
-- turn. Because of that, the patterns should completely match all lines in
-- the message, not just the beginning of the line followed by ".*" or ".+".
--
-- Still, if you know that a specific message or line (1) always starts
-- at the beginning of the line, (2) never gets wrapped, and (3) is never
-- followed by another message in the same line, then you can match just
-- the beginning of the line and use ".*" for the rest. There are a few
-- messages below defined like this.
--
-- Note that, when the handler is executed, the pattern *is* at the beginning
-- of the current line (in some cases, with leading spaces) because, if there
-- was any leading text originally in the line, some previous handler has
-- already removed it. This is why we can (and should!) anchor the pattern
-- with "^%s*". As before, if you do know that the line always starts at the
-- beginning of a line, you may instead anchor the pattern with "^". Either
-- way, we assume we cannot know the true length of the line, so it may be
-- wrapped at any point. Also, do not forget about lookahead(), which removes
-- the "^" anchor.
-------------------------------------------------------------------------------

stringsHandler = HandlerPrototype:new()

function stringsHandler:canDoit(position)
  for _, pattern in ipairs(self.patterns) do
      local success, data = self:canDoitRecursive(pattern, position, 0, 0)
      if success then
          data.pattern = pattern
          return true, data
      end
  end

  return false, {}
end

function stringsHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = self:newMessage()
  self.message.severity = self.severity

  local _, last = string.find(Lines.current, '^%s+')
  if last ~= nil then Lines:handledChars(last) end

  self.captures = {} -- just in case we want to use captures
  self.patternLines = data.pattern -- the table with the pattern lines
  self.patternLineNumber = 1
  self.doit = self.handleLines -- for the next lines, if any

  -- after this initial setup, the first line is handled like the others
  self:handleLines()

  return true
end

stringsHandler.doit = stringsHandler.handleFirstLine

function stringsHandler:handleLines()

  local pat = self.patternLines[self.patternLineNumber]

  -- unwrapUntilPatternMatches() will probably not unwrap a line if
  -- the pattern does not represent the whole line (think about it);
  -- even if it does, it will fail to detect the correct end of the
  -- line (how could it?). Conversely, it may try to unwrap needlessly
  -- if the pattern ends with something like ".+" (think about it too).
  -- So, you should only use such patterns when the line can never be
  -- wrapped.
  local last, tmp = unwrapUntilPatternMatches(pat)
  for _, val in ipairs(tmp) do table.insert(self.captures, val) end

  if last == nil then
      io.stderr:write("    texlogsieve: parsing error\n")
      dispatch(self.message)
      self.doit = self.handleFirstLine
      return true
  end

  if self.patternLineNumber == 1 then
      self.message.content = string.sub(Lines.current, 1, last)
  else
      self.message.content = self.message.content .. '\n'
                             .. string.sub(Lines.current, 1, last)
  end

  Lines:handledChars(last)

  if self.patternLineNumber >= #self.patternLines then
      self:processCaptures()
      dispatch(self.message)
      self.doit = self.handleFirstLine
  else
      self.patternLineNumber = self.patternLineNumber +1
      nextHandler = self
  end

  return true
end

-- When a pattern takes more than one line, we handle each
-- line separately. To do that, we need to transform the
-- multiline strings of the patterns into tables where each
-- line is an element.
function stringsHandler:init()
  self.strictPatterns = {}
  self.loosePatterns = {}

  for _, pat in ipairs(self.patterns) do
      local patternLinesAsTableItems = linesToTable(pat)
      table.insert(self.strictPatterns, patternLinesAsTableItems)

      -- remove the leading '^'
      patternLinesAsTableItems = linesToTable(string.sub(pat, 2))
      table.insert(self.loosePatterns, patternLinesAsTableItems)
  end

  self.patterns = self.strictPatterns
end

-- The pattern we want to check may stretch over several lines. This
-- function recursively checks each line of the pattern against the
-- corresponding input line, but only up to three lines, as that is
-- enough to make sure the pattern really matches.
function stringsHandler:canDoitRecursive(patternLines,
                                             position, offset, depth)

  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  -- skip what was processed in a previous iteration/recursion
  if offset > 0 then line = string.sub(line, offset +1) end

  local nextline = ""

  local patternLine = patternLines[1]

  while true do
      local first, last = string.find(line, patternLine)
      local tmp = string.find(nextline, patternLine) -- see comment below

      if first ~= nil and tmp == nil then
          -- Found it!
          if depth > 2 -- 3 lines matched, that is enough
                       or #patternLines == 1 -- no more pattern lines
                       or Lines:get(position +1) == nil -- no more input lines

          then
              return true, {first = first}
          else
              -- this line matches; check the next ones too just to make sure
              if last < string.len(line) then
                  -- continue on the same line, skip what we already processed
                  offset = last + offset
              else
                  position = position +1 -- proceed to next line...
                  offset = 0 -- ...starting at the beginning of the line
              end

              depth = depth +1
              patternLines = {table.unpack(patternLines, 2)}

              local success = self:canDoitRecursive(patternLines,
                                                    position, offset, depth)

              return success, {first = first}
          end
      end

      -- no success, but can we unwrap this line?
      if not Lines:seemsWrapped(position) then return false, {} end
      -- yep! However, we need to be careful: if we match
      -- on the next line by itself, then the match is not
      -- the result of unwrapping, so we should return false.
      -- We only return true if the match occurs only when
      -- the lines are concatenated. Do not trust that the
      -- pattern is anchored to the beginning of the line,
      -- lookahead() changes this.
      offset = -1 * string.len(line)
      nextline = Lines:get(position +1)
      line = line .. nextline
      position = position +1
  end
end

-- Just like :canDoit(), but does not anchor patterns to the
-- beginning of the line (used by handleUnrecognizedMessage).
-- Notice the similarity to openCloseHandlerPrototype:lookahead().
function stringsHandler:lookahead()
  self.patterns = self.loosePatterns
  local match, data = self:canDoit()
  self.patterns = self.strictPatterns

  return match, data
end

function stringsHandler:processCaptures()
    -- by default, do nothing
end


-------------------------------------------------------------------------------
-- beginningOfLineDebugStringsHandler
-- beginningOfLineInfoStringsHandler
-- beginningOfLineWarningStringsHandler
-- anywhereDebugStringsHandler
-- anywhereInfoStringsHandler
-- anywhereWarningStringsHandler
-- (from stringsHandler)
--
-- they differ from the prototype only by severity level
-- and the set of patterns to search for.
-------------------------------------------------------------------------------

-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineDebugStringsHandler = stringsHandler:new()
beginningOfLineDebugStringsHandler.severity = DEBUG
beginningOfLineDebugStringsHandler.patterns = {
  '^This is .*TeX, Version.*',
  '^%s*restricted system commands enabled%.',
  '^%s*entering extended mode',
  '^%s*restricted \\write18 enabled%.',
  '^%s*%%%&%-line parsing enabled%.',
  '^%*%*[%w%.]+', -- "**jobname"
  '^file:line:error style messages enabled%.',
  '^\\[^%s=]+=[^%s=]+', -- "\c@chapter=\count174"
  "^\\openout%d+%s*=%s*`?[^']+'?%.?",

  '^LaTeX2e <' .. datepat .. '>.*',

  '^Lua module: lualibs%-extended ' .. datepat
                   .. ' %S+ ConTeXt Lua libraries %-%- extended collection%.',

  '^Lua module: lualibs ' .. datepat
                   .. ' %S+ ConTeXt Lua standard libraries%.',

  '^Lua module: fontspec ' .. datepat
                   .. ' %S+ Font selection for XeLaTeX and LuaLaTeX',

  '^Lua module: lualatex%-math ' .. datepat
                   .. ' %S+ Patches for mathematics typesetting with LuaLaTeX',

  '^Lua module: luaotfload ' .. datepat
                   .. ' %S+ Lua based OpenType font support',

  '^luaotfload | init : Context OpenType loader version.*',

  '^luaotfload | init : Loading fontloader '
                   .. '["“][^"]+["”] from .-["“][^"]+["”]%.',

  -- there may be dots in the path, so we need to
  -- anchor the final dot to the end of the line
  '^luaotfload | conf : Root cache directory is "?[^"]-"?%.$',
  '^luaotfload | db : Font names database loaded from .-%.luc',
  '^luaotfload | cache : Lookup cache loaded from .-%.luc%.',
  '^luaotfload | main : initialization completed in [%d%.]+ seconds',

  '^Lua%-only attribute.-=%s*%S+',
  "^Inserting %b`' at position .- in %b`'%.",

  "^For additional information on amsmath, use the `%?' option%.",

  "^Loading configuration file `" .. filepat .. "'%.",
  "^contour: Using driver file `" .. filepat .. "'%.",

  '^ABD: EverySelectfont initializing macros',
  '^ABD: EveryShipout initializing macros',

  '^%[Loading MPS to PDF converter %(version ' .. datepat .. '%)%.%]',


  '^See the caption package documentation for explanation%.',

  '^Lua module: luaotfload%-main ' .. datepat
                   .. ' [%d%.]+ luaotfload entry point',

  '^Lua module: luaotfload%-init ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / initialization',

  '^Lua module: luaotfload%-log ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / logging',

  '^Lua module: luaotfload%-parsers ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / filelist',

  '^Lua module: luaotfload%-configuration ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / config file reader',

  '^Lua module: luaotfload%-loaders ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / callback handling',

  '^Lua module: luaotfload%-database ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / database',

  '^Lua module: luaotfload%-unicode ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / Unicode helpers',

  '^Lua module: luaotfload%-colors ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-resolvers ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / resolvers',

  '^Lua module: luaotfload%-features ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / features',

  '^Lua module: luaotfload%-letterspace ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-embolden ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-notdef ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-auxiliary ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / auxiliary functions',

  '^Lua module: luaotfload%-multiscript ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / multiscript',

  '^' .. string.rep('%*', 37) .. '\n%* Using libertinus math %*\n'
                   .. string.rep('%*', 37),

  '^`inconsolata%-zi4\' v%S-, ' .. datepat
                   .. ' Text macros for Inconsolata %(msharpe%)',

  '^Requested font ".-" at [%d%.]+pt\n %-> ' .. filepat,
  '^Requested font ".-" scaled %d+\n %-> ' .. filepat,
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereDebugStringsHandler = stringsHandler:new()
anywhereDebugStringsHandler.severity = DEBUG
anywhereDebugStringsHandler.patterns = {
  '^%s*L3 programming layer %b<> xparse %b<>',
  '^%s*%{.*pdftex%.map%}',

  -- <blah.jpg, id=555, [...,] 722.7pt x 722.7pt>
  '^%s*%<' .. filepat .. ', id=.- [%d%.]+pt x [%d%.]+pt%>',
  '^%s*%<use ' .. filepat .. '%>', -- <use blah.jpg>
  '^%s*%<' .. filepat .. '%>', -- <blah.jpg>
}


-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineInfoStringsHandler = stringsHandler:new()
beginningOfLineInfoStringsHandler.severity = INFO
beginningOfLineInfoStringsHandler.patterns = {
  "^Writing index file.*%.idx",
  "^%*geometry%* driver:.*",
  "^%*geometry%* detected driver:.*",
  "^Driver file for pgf:.*%.def",
  "^%s*file:line:error style messages enabled",
  "^Applying: %b[] float order in 2%-column on input line .-%.",
  "^Already applied: %b[] float order in 2%-column on input line .-%.",
  "^\\%S+ = a dialect from .*",
  -- TODO: we should capture the jobname and use it here
  "^No file .-%.aux%.",
  "^No file .-%.ind%.",
  "^No file .-%.bbl%.",
  "^reledmac reminder:%s*\n"
    .. "%s*The number of the footnotes in this section "
    .. "has changed since the last run.\n"
    .. "%s*You will need to run LaTeX two more times "
    .. "before the footnote placement\n"
    .. "%s*and line numbering in this section are correct%.",
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereInfoStringsHandler = stringsHandler:new()
anywhereInfoStringsHandler.severity = INFO
anywhereInfoStringsHandler.patterns = {
  -- TODO: there are other "... patterns for blah blah"
  --       in texmf-dist/tex/generic/hyph-utf8/loadhyph
  "^%s*German Hyphenation Patterns %(Traditional Orthography%) "
                            .. "`dehyphts?%-x' " .. datepat .. " %(WL%)",

  '^%s*UTF%-8 German hyphenation patterns %(traditional orthography%)',
  '^%s*EC German hyphenation patterns %(traditional orthography%)',
  '^%s*German Hyphenation Patterns %(Traditional Orthography%)',

  "^%s*Swiss%-German Hyphenation Patterns %(Traditional Orthography%) "
                            .. "`dehyphts?%-x' " .. datepat .. " %(WL%)",

  '^%s*UTF%-8 Swiss%-German hyphenation patterns %(traditional orthography%)',
  '^%s*EC Swiss%-German hyphenation patterns %(traditional orthography%)',
  '^%s*Swiss%-German Hyphenation Patterns %(Traditional Orthography%)',

  '^%s*ASCII Hyphenation patterns for American English',
  '^%s*UTF%-8 %S+ hyphenation patterns',
  '^%s*EC %S+ hyphenation patterns',
}


-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineWarningStringsHandler = stringsHandler:new()
beginningOfLineWarningStringsHandler.severity = WARNING
beginningOfLineWarningStringsHandler.patterns = {
  '^luaotfload | aux : font no .- does not define feature '
                            .. '.- for script .- with language %S+',

  '^luaotfload | aux : font no .- %b() defines no feature for script %S+',
  '^luaotfload | aux : no font with id %d+',

  "^warning  %(pdf backend%): ignoring duplicate destination "
                            .. "with the name '%-%.'",

  "^Couldn't patch \\%S+",
  "^Invalid UTF%-8 byte or sequence at line %d+ replaced by U%+FFFD%.",

  '^Requested font ".-" at [%d%.]+pt\n'
                            .. "Unknown feature %b`' in font %b`'%.\n"
                            .. ' %-> ' .. filepat,
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereWarningStringsHandler = stringsHandler:new()
anywhereWarningStringsHandler.severity = WARNING
anywhereWarningStringsHandler.patterns = {
}


-------------------------------------------------------------------------------
-- missingCharHandler
-- (from stringsHandler)
--
-- this differs from the prototype by severity level, the set of
-- patterns to search for, and because it uses a different kind of
-- Message object (we want to summarize missing characters specially)
-------------------------------------------------------------------------------

missingCharHandler = stringsHandler:new()
missingCharHandler.severity = CRITICAL

missingCharHandler.patterns = {
  '^Missing character: There is no .- in font .*!',
}

function missingCharHandler:newMessage()
  return missingCharMessage:new()
end


-------------------------------------------------------------------------------
-- genericLatexHandler
--
-- Messages generated by GenericInfo, PackageWarning etc., such as
--
--   Package babel Info: \l@canadian = using hyphenrules for english
--   (babel)             (\language0) on input line 102.
--
-- or
--
--   LaTeX Info: Redefining \setlength on input line 80.
--
-- These messages always start at the beginning of a line and end at the
-- end of a line. There may be more than one line; subsequent lines have
-- a specific prefix derived from the package/class name (which can be
-- obtained from the first line). We look for this prefix to detect such
-- subsequent lines.
-------------------------------------------------------------------------------

genericLatexHandler = HandlerPrototype:new()

genericLatexHandler.patterns = {
  "^(Package)%s+(%S+)%s+(%S+): ",
  "^(Class)%s+(%S+)%s+(%S+): ",
  "^(LaTeX)%s+(%S+)%s+(%S+): ",
  "^(Module)%s+(%S+)%s+(%S+): ", -- Only ever saw "Module microtype Info"
  "^(Package)%s+(%S+)%s+(notification) ", -- pgfplots does this
}

function genericLatexHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local last, data

  for _,pattern in ipairs(self.patterns) do
      -- Because this is a prototype that may be used with
      -- other patterns, we do not know how many captures
      -- are there in the pattern; put them all in a table
      data = {string.find(line, pattern)}
      table.remove(data, 1) -- remove "first"
      last = data[1]
      if last ~= nil then break end
  end

  if last == nil then
      return false, {}
  else
      return true, data
  end
end

function genericLatexHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  local severity = data[4]
  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)
  self.message.content = Lines.current

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix
end

function genericLatexHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self:unwrapLines()

  -- erase any previous values; nil is not a good idea! If one of these
  -- is nil in a derived object, the object may grab the value of the
  -- parent object from some previous message.
  self.linenum = ""
  self.prefix = ""

  self.message = self:newMessage()
  self.message.content = ""
  self.message.severity = self.severity
  self:unpackData(data) -- process whatever canDoit() gave us

  self:extractLinenum()
  Lines:handledChars()

  self.doit = self.handleOtherLines
  nextHandler = self
  return true
end

genericLatexHandler.doit = genericLatexHandler.handleFirstLine

function genericLatexHandler:handleOtherLines()
  local _, last = string.find(Lines.current, '^' .. self.prefix)

  if last ~= nil then
      self:unwrapLines()
      Lines:handledChars(last)
      self:extractLinenum()

      self.message.content = self.message.content .. '\n' .. Lines.current

      Lines:handledChars()
      nextHandler = self
  else
      self.doit = self.handleFirstLine
      if self.linenum ~= "" then
          self.message.linenum = self.linenum
      end
      self:adjustSeverity()
      dispatch(self.message)
  end

  return true
end

function genericLatexHandler:findPrefix(lastcol, name, what)
  -- continuation lines look like
  -- ^(pkgname) [padding spaces] blah blah
  -- let's find out how many spaces and
  -- build the pattern for the prefix

  local numspaces

  if name ~= nil then
      name = "(" .. name .. ")"
      numspaces = lastcol - string.len(name)
      self.prefix = protect_metachars(name)
  else
      self.prefix = ""
      numspaces = lastcol
  end

  self.prefix = self.prefix .. string.rep(" ", numspaces)
end

function genericLatexHandler:extractLinenum()
  if self.linenum ~= "" then return end

  _, _, self.linenum = string.find(Lines.current, "on input line (%d+)%.")
  if self.linenum ~= "" then return end

  -- LaTeX3-style messages (with \msg_something)
  _, _, self.linenum = string.find(Lines.current, "on line (%d+)$")
end

function genericLatexHandler:parseSeverity(severity)
  if severity == nil or severity == "" then return self.severity end

  severity = string.lower(severity)

   -- tocbibind uses "Note"
  if severity == 'info'
          or severity == 'notification'
          or severity == 'note'
  then
      return INFO
  else
      return WARNING
  end
end

-- similar to HandlerPrototype:unwrapLines(), but with extra checks
function genericLatexHandler:unwrapLines()
  while Lines:seemsWrapped() do
      -- The current line is the right length and other handlers do
      -- not know how to handle the next line, but we still need to
      -- check another possibility: the next line might be a "normal"
      -- continuation line
      local first = string.find(Lines:get(1), '^' .. self.prefix)
      if first ~= nil then break end

      -- Ok, this is almost certainly a wrapped line, but it does
      -- not hurt to also check this just in case
      first = string.find(Lines.current, 'on input line %d+%.$')
      if first ~= nil then break end

      Lines:unwrapOneLine()
  end
end

-- LaTeX only uses severity "INFO" and "WARNING", which
-- is very limited. Let's demote some warnings to INFO
-- and promote some others to CRITICAL.
function genericLatexHandler:adjustSeverity()
  for _, pat in ipairs(self.downgradePatterns) do
      local first = string.find(self.message.content, pat)
      if first ~= nil then
          self.message.severity = INFO
          break
      end
  end

  for _, pat in ipairs(self.upgradePatterns) do
      local first = string.find(self.message.content, pat)
      if first ~= nil then
          self.message.severity = CRITICAL
          break
      end
  end
end

genericLatexHandler.downgradePatterns = {
    -- No need to include the full message
    "File %b`' already exists on the system%."
            .. "%s*Not generating it from",
    "You have requested package %b`',"
            .. "%s*but the package provides",
    "Writing file %b`'",
    "Form Feed has been converted to Blank",
    "Tab has been converted to Blank",
    "The morewrites package is unnecessary",
    'Unused \\captionsetup%b[]',
}

genericLatexHandler.upgradePatterns = {
    "Label %b`' multiply defined",
    "Command .- invalid in math mode",
    "Optional argument of \\twocolumn too tall on page",
    "Marginpar on page %S- moved",
    "Some font shapes were not available, defaults substituted%.",
    "Font shape %b`' in size %b<> not available"
            .. "%s+Font shape %b`' tried instead",
    "Font shape %b`' in size %S+ not available"
            .. "%s+external font %b`' used",
    "Font shape %b`' undefined"
            .. "%s+using %b`' instead",
}


-------------------------------------------------------------------------------
-- latex23MessageHandler
-- genericLatexVariantHandler
-- (from genericLatexHandler)
--
-- They differ from the prototype by the set of patterns to search for and by
-- the unpackData() method, which deals with the specific pattern captures
-- defined in the search patterns.
-------------------------------------------------------------------------------

latex23MessageHandler = genericLatexHandler:new()

latex23MessageHandler.patterns = {
  "^(LaTeX3?)%s+(%S+): "
}

function latex23MessageHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local severity = data[3]
  self.message.severity = self:parseSeverity(severity)
  self.message.what = what
  self.message.content = Lines.current

  local name
  -- When the message looks like "LaTeX...", there is no package
  -- name, but with "LaTeX3...", that is used as the name
  if what == 'LaTeX3' then name = 'LaTeX3' end

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix
end

genericLatexVariantHandler = genericLatexHandler:new()

genericLatexVariantHandler.patterns = {
  "^(Package) (%S+) (%S+) on input line (%S+): ",
}

function genericLatexVariantHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  local severity = data[4]
  self.linenum = data[5]
  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)
  self.message.content = Lines.current

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix
end


-------------------------------------------------------------------------------
-- citationHandler
-- referenceHandler
-- labelHandler
-- (from genericLatexHandler)
--
-- They differ from the prototype by the set of patterns to search for, by
-- the unpackData() method, and by the message type generated, because we
-- want to generate dedicated summaries for them
-------------------------------------------------------------------------------
citationHandler = genericLatexHandler:new()

citationHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Citation) (%b`') on page (.-) undefined",
  "^(LaTeX)%s+(Warning): (Citation) (%b`') undefined",
}

function citationHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local severity = data[3]
  local name = data[4]
  local key = string.sub(data[5], 2, -2) -- remove quotes
  local page = data[6] -- may be null

  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)
  self.message.key = key
  self.message.page = page
  self.message.content = Lines.current
  self:findPrefix(last, nil, what)
  self.message.prefix = self.prefix
end

function citationHandler:newMessage()
  return citationMessage:new()
end

referenceHandler = citationHandler:new()

referenceHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Reference) (%b`') on page (.-) undefined",
}

function referenceHandler:newMessage()
  return referenceMessage:new()
end

labelHandler = citationHandler:new()

labelHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Label) (%b`') multiply defined",
}

function labelHandler:newMessage()
  return labelMessage:new()
end


-------------------------------------------------------------------------------
-- providesHandler
-- (from genericLatexHandler)
--
-- Handles the lines generated by the \Provides* LaTeX commands, such as:
--
--   Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
--
-- or
--
--   File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option)
--
-- There is code to specifically handle a hardcoded line break
-- in a few packages (atbegshi-ltx, atveryend-ltx etc.)
-------------------------------------------------------------------------------

providesHandler = genericLatexHandler:new()
providesHandler.severity = INFO

providesHandler.patterns = {
  "^(Document Class):%s+(%S+)%s+",
  "^(Package):%s+(%S+)%s+",
  "^(File):%s+(%S+)%s+",
  "^(Language):%s+(%S+)%s+", -- this and the next come from babel
  "^(Dictionary):%s+(%S+)%s+"
}

function providesHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]

  self.message.what = what
  self.message.name = name
  self.message.content = Lines.current

  -- There are no continuation lines of this kind for
  -- these messages, but the generic code still wants
  -- to check for the prefix.
  self.prefix = '[^%s%S]+' -- nothing matches

  if not Lines:empty() then
      local first = string.find(Lines:get(1), 'with kernel methods')
      if first ~= nil then
          self.message.content = self.message.content .. ' ' .. Lines:get(1)
          Lines:gotoNextLine()
      end
  end
  self.message.content = string.gsub(self.message.content,
                        '%s*with kernel methods', ' with kernel methods')
end


-------------------------------------------------------------------------------
-- geometryDetailsHandler
-- (from genericLatexHandler)
--
-- Handles the output from the geometry package with the "verbose" option.
-- Iinheriting from genericLatexHandler is a little hackish, since
-- the message does not really fit the "standard" message format (we define
-- the prefix statically instead of computing it from the message itself),
-- but works well and is simple.
-------------------------------------------------------------------------------

geometryDetailsHandler = genericLatexHandler:new()
geometryDetailsHandler.severity = INFO

geometryDetailsHandler.patterns = {
    '^%*geometry%* verbose mode',
}

function geometryDetailsHandler:unpackData(data)
  self.message.name = 'geometry'
  self.message.content = Lines.current
  self.prefix = '* '
  self.message.prefix = self.prefix
end

-------------------------------------------------------------------------------
-- openParensHandler
-- closeParensHandler
-- openSquareBracketHandler
-- closeSquareBracketHandler
--
-- These determine whether the chars "(", ")", "[", and "]" are ordinary
-- characters or if they indicate open/close file or begin/end shipout.
-- Detecting files allows us to filter the output according to which file
-- is being processed; detecting shipouts allows us to indicate the page
-- that a message originated from.
--
-- Although openParensHandler is very similar to openSquareBracketsHandler
-- and closeParensHandler is very similar to closeSquareBracketsHandler,
-- we decided not to use inheritance to reuse the code (except for the
-- lookahead() method). We would avoid a little repetition, but the code
-- size would probably be the same and it would be more complex.
--
-- The fact that the code is similar may seem a little odd at first: While
-- the openFiles stack may contain many entries (because an open file may
-- request to open another file etc.), the same is obviously not true for
-- the shipouts stack. Still, we use a stack too because we need to handle
-- "[" and "]" characters that do not correspond to real shipouts, just
-- like we do with "(" and ")".
-------------------------------------------------------------------------------

openCloseHandlerPrototype = HandlerPrototype:new()

-- Just like :canDoit(), but does not anchor patterns to the
-- beginning of the line (used by handleUnrecognizedMessage).
-- Notice the similarity to stringsHandler:lookahead().
function openCloseHandlerPrototype:lookahead()
    self.pattern = self.loosePattern
    match, data = self:canDoit()
    self.pattern = self.strictPattern

    return match, data
end

openParensHandler = openCloseHandlerPrototype:new()

openParensHandler.strictPattern = "^(%s*)%("
openParensHandler.loosePattern = "%s*%("
openParensHandler.pattern = openParensHandler.strictPattern

function openParensHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local first, last = string.find(line, self.pattern)
  if first == nil then return false, {} end

  line = string.sub(line, last +1)

  local filename = guessFilename(position)

  return true, {first = first, filename = filename} -- might be nil
end

function openParensHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the open parens character
  Lines:handledChars(last)

  if data.filename ~= nil then
      flushUnrecognizedMessages()
      local last = unwrapUntilStringMatches(data.filename)
      if last == nil then
          io.stderr:write("    texlogsieve: parsing error\n")
      else
          Lines:handledChars(last)
      end
      openFiles:push(data.filename)
      mute = currentFileIsSilenced()
      local msg = openFileMessage:new()
      msg.severity = DEBUG
      msg.filename = data.filename
      msg.content = "Open file " .. data.filename
      dispatch(msg)
  else
      openFiles:push("DUMMY")
      unrecognizedBuffer = unrecognizedBuffer .. "("
  end

  return true
end

closeParensHandler = openCloseHandlerPrototype:new()

closeParensHandler.strictPattern = "^(%s*)%)"
closeParensHandler.loosePattern = "%s*%)"
closeParensHandler.pattern = closeParensHandler.strictPattern

function closeParensHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local first = string.find(line, self.pattern)
  if first == nil then return false, {} end

  return true, {first = first}
end

function closeParensHandler:doit()
  local myTurn = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the close parens character
  Lines:handledChars(last)

  local filename = openFiles:pop()
  if filename == nil or filename == "DUMMY" then
      unrecognizedBuffer = unrecognizedBuffer .. ")"
  else
      flushUnrecognizedMessages()
      local msg = closeFileMessage:new()
      msg.severity = DEBUG
      msg.content = "Close file " .. filename
      dispatch(msg)
      mute = currentFileIsSilenced()
  end

  return true
end

openSquareBracketHandler = openCloseHandlerPrototype:new()

openSquareBracketHandler.strictPattern = "^(%s*)%["
openSquareBracketHandler.loosePattern = "%s*%["
openSquareBracketHandler.pattern = openSquareBracketHandler.strictPattern

function openSquareBracketHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local first, last = string.find(line, self.pattern)
  if first == nil then return false, {} end

  line = string.sub(line, last +1)

  local latexPage = guessShipoutPage(position)

  return true, {first = first, latexPage = latexPage} -- may be nil
end

function openSquareBracketHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the open square bracket character
  Lines:handledChars(last)

  if data.latexPage ~= nil then
      flushUnrecognizedMessages()
      local last = unwrapUntilStringMatches(data.latexPage)
      if last == nil then
          io.stderr:write("    texlogsieve: parsing error\n")
      else
          Lines:handledChars(last)
      end
      shipouts:push(data.latexPage)
      numShipouts = numShipouts +1
      table.insert(latexPages, numShipouts, data.latexPage)
      local msg = beginShipoutMessage:new()
      msg.physicalPage = numShipouts
      dispatch(msg)
  else
      shipouts:push("DUMMY")
      unrecognizedBuffer = unrecognizedBuffer .. "["
  end

  return true
end

closeSquareBracketHandler = openCloseHandlerPrototype:new()

closeSquareBracketHandler.strictPattern = "^(%s*)%]"
closeSquareBracketHandler.loosePattern = "%s*%]"
closeSquareBracketHandler.pattern = closeSquareBracketHandler.strictPattern

function closeSquareBracketHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local first = string.find(line, self.pattern)
  if first == nil then return false, {} end

  return true, {first = first}
end

function closeSquareBracketHandler:doit()
  local myTurn = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the close square bracket character
  Lines:handledChars(last)

  local latexPage = shipouts:pop()
  if latexPage == nil or latexPage == "DUMMY" then
      unrecognizedBuffer = unrecognizedBuffer .. "]"
  else
      flushUnrecognizedMessages()
      local msg = endShipoutMessage:new()
      msg.physicalPage = numShipouts
      dispatch(msg)
  end
end


-------------------------------------------------------------------------------
-- utf8FontMapHandler
--
-- This handles the encoding-related multi-line messages generated by
-- inputenc/fontenc with pdftex and utf8, similar to:
--
--   Now handling font encoding LS1 ...
--   ... no UTF-8 mapping file for font encoding LS1
--
-- or
--
--   Now handling font encoding LY1 ...
--   ... processing UTF-8 mapping file for font encoding LY1
--      defining Unicode char U+00A0 (decimal 160)
--
-- We could handle these by inheriting from genericLatexHandler, which
-- might be a good idea as this handler is quite complex. Still, the
-- advantage of this handler is that it groups the first lines (that
-- indicate the encoding) and the continuation lines as a unit, even
-- though there are other messages in between that are handled by other
-- handlers. We use two strategies to deal with continuation lines to
-- accomplish this:
--
--  1. After the first line, we change the self.doit method and define
--     ourselves as nextHandler, because we know the next line "belongs"
--     to us (nothing new here, genericLatexHandler does this too)
--
--  2. After the second line, we do not know whether the next line "belongs"
--     to us (actually, it most likely does not). So, we just change the
--     self.doit method and wait to be called by chooseHandler() again a
--     few lines down the road. When one of the patterns we are looking for
--     matches, we are back in business. This is why handleOtherLines must
--     check for the base "Now handling font encoding" pattern too: there
--     might not be continuation lines and we need to handle a new message
--     instead at some other point in the input file.
-------------------------------------------------------------------------------

utf8FontMapHandler = HandlerPrototype:new()

-- we repeat here the tests we make on the other methods of this
-- object, which is somewhat dirty.
function utf8FontMapHandler:canDoit(position)
  local line
  if position == nil then position = 0 end
  line = Lines:get(position)

  local first, encoding
  if self.doit == self.handleFirstLine then
      first, _, encoding = string.find(line,
              "^Now handling font encoding (%S+) %.%.%.")
  elseif self.doit == self.handleSecondLine then
      first = string.find(line,
              "^%.%.%. no UTF%-8 mapping file for font encoding")

      if first == nil then
          first = string.find(
                  "^%.%.%. processing UTF%-8 mapping file for font encoding")
      end
  else
      first = string.find(line, "^%s*defining Unicode char")
  end

  if first == nil then
      return false, {}
  else
      return true, {encoding = encoding}
  end
end

function utf8FontMapHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = Message:new()
  self.message.severity = DEBUG
  self.message.content = Lines.current
  self.message.encoding = data.encoding
  Lines:handledChars()
  self.encoding = data.encoding
  self.doit = self.handleSecondLine
  nextHandler = self
  return true
end

utf8FontMapHandler.doit = utf8FontMapHandler.handleFirstLine

function utf8FontMapHandler:handleSecondLine()
  local first = string.find(Lines.current,
                      "^%.%.%. no UTF%-8 mapping file for font encoding")

  if first ~= nil then
      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      dispatch(self.message)
      self.doit = self.handleFirstLine
      return true
  end

  first = string.find(Lines.current,
                "^%.%.%. processing UTF%-8 mapping file for font encoding")

  if first ~= nil then
      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      self.doit = self.handleOtherLines
      self.numTries = 0
      self.foundOtherLines = false
      return true
  end

  -- The second line was neither "no UTF-8 mapping..." nor
  -- "processing UTF-8 mapping" - this should never happen
  dispatch(self.message)
  io.stderr:write("    texlogsieve: parsing error\n")
  Lines:handledChars()
  self.doit = self.handleFirstLine
  return true
end

-- Between handleSecondLine and handleOtherLines there usually are two
-- messages: "open file ENCenc.dfu" and "ProvidesFile ENCenc.dfu".
-- Therefore, we try to find the first "...defining Unicode char"
-- message for the following 4 lines before giving up.
function utf8FontMapHandler:handleOtherLines()
  local first = string.find(Lines.current, "^%s*defining Unicode char")

  if first ~= nil then
      flushUnrecognizedMessages()
      self.foundOtherLines = true
      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      nextHandler = self
      return true
  end

  -- this line does not match; why? First possibility: there are no
  -- "...defining Unicode char" lines to be found; instead, there is
  -- another encoding being defined. This obviously should not happen
  first = string.find(Lines.current,
                   "^Now handling font encoding (%S+) %.%.%.")

  if first ~=nil then
      -- give up and start processing the new message (should not happen)
      dispatch(self.message)
      flushUnrecognizedMessages()
      io.stderr:write("    texlogsieve: parsing error\n")

      self.numTries = 0
      self.foundOtherLines = false
      self.doit = self.handleFirstLine
      return self:handleFirstLine()
  end

  -- second possibility: we have not yet reached the first "...defining
  -- Unicode char" message, but we want to try again in the next line
  if not self.foundOtherLines and self.numTries < 4 then
      self.numTries = self.numTries +1
      return false, {} -- we will try again later; for now, call chooseHandler()
  end

  -- third possibility: we never reached any "...defining Unicode char" line
  -- but we already tried for too many lines (more than 4), so give up and
  -- do nothing (we might output a "parsing error" message, but that is
  -- probably unnecessary)

  -- fourth possibility: we already found all lines, so
  -- we are done and there is no need to do anything else

  -- wrap up for the third and fourth possibilities
  dispatch(self.message)
  self.doit = self.handleFirstLine
  self.numTries = 0
  self.foundOtherLines = false
  return true
end


--[[ ##################################################################### ]]--
--[[ ############################# MESSAGES ############################## ]]--
--[[ ##################################################################### ]]--

Message = {}

function Message:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  o.mute = mute
  o.content = ""
  o.prefix = ""
  if openFiles ~= nil and not openFiles:empty() then
      o.filename = openFiles:peek()
  end
  return o
end

Message.severity = UNKNOWN

function Message:toString()
    if self.mute then return "" end

    -- If we've already been here, just output the previous result
    if self.formatted ~= nil then return self.formatted end

    self.formatted = self:realToString()
    if trim(self.formatted) == "" then self.formatted = "" return "" end

    for _, val in ipairs(SILENCE_STRINGS) do
        local first = string.find(self.formatted, val)
        local other = string.find(self.content, val)
        if first ~= nil or other ~= nil then self.formatted = "" return "" end
    end

    if self.name ~= nil then
        for _, val in ipairs(SILENCE_PKGS) do
            if self.name == val then self.formatted = "" return "" end
        end
    end

    if self.severity < MINLEVEL then self.formatted = "" return "" end

    return self.formatted
end

function Message:realToString()
  if self.prefix == "" then return self.content end

  local lines = linesToTable(self.content)
  local msg = table.remove(lines, 1)
  for _, line in ipairs(lines) do
      msg = msg .. '\n' .. unprotect_metachars(self.prefix) .. line
  end
  return msg
end

function Message:toSummary()
  local formatted = self:toString()
  if string.len(trim(formatted)) == 0 then return end

  repetitionsSummary:add(self)
end


function currentFileIsSilenced()
  if openFiles:empty() then return false end

  local filename = openFiles:peek()
  local _, last = string.find(filename, '^.*/') -- get just the basename
  if last ~= nil then filename = string.sub(filename, last +1) end
  for _, pattern in ipairs(SEMISILENCE_FILES) do
      local first = string.find(filename, pattern)
      if first ~= nil then return true end
  end

  -- This is O(n*m) and gets executed for every message,
  -- but "n" and "m" are unlikely to grow much beyond 10.
  for _, filename in ipairs(openFiles) do
      -- get just the basename
      local basename = filename
      local _, last = string.find(basename, '^.*/')
      if last ~= nil then basename = string.sub(basename, last +1) end

      for _, pattern in ipairs(SILENCE_FILES_RECURSIVE) do
          _, last = string.find(basename, pattern)
          if last ~= nil then return true end
      end
  end

  return false
end


beginShipoutMessage = Message:new()
function beginShipoutMessage:realToString()
  if not SHOW_SHIPOUTS then return "" end

  if RAW then return '[' .. latexPages[self.physicalPage] end

  return ""
end


endShipoutMessage = Message:new()
endShipoutMessage.shipout = true
function endShipoutMessage:realToString()
  if not SHOW_SHIPOUTS then return "" end

  if RAW then return ']' end

  -- print counter as [cnt], just as LaTeX does. With that, if a program is
  -- monitoring output, it can show some sort of progress bar to the user
  local msg = 'shipout - physical page ' .. self.physicalPage
              .. ', LaTeX page counter ['
              .. latexPages[self.physicalPage] .. ']'

  return msg
end


openFileMessage = Message:new()
function openFileMessage:realToString()
  if RAW then return "(" .. self.filename end

  return Message.realToString(self)
end


closeFileMessage = Message:new()
function closeFileMessage:realToString()
  if RAW then return ")" end

  return Message.realToString(self)
end


underOverMessage = Message:new()
underOverMessage.severity = WARNING
function underOverMessage:realToString()
    local tmp = self.content
    if self.failedText ~= nil then
        if not RAW then tmp = tmp .. '\nOffending text:' end
        tmp = tmp .. '\n' .. self.failedText .. '\n'
    end

    -- in raw mode, add the final "[]"
    if RAW and self.closing ~= nil then
        tmp = tmp .. ' ' .. self.closing .. '\n'
    end

    return tmp
end

function underOverMessage:toSummary()
    underOverSummary:add(self)
end


missingCharMessage = Message:new()

-- This is a hack: it would be too painful to define
-- pattern captures in the handler, so we do this here
function missingCharMessage:realToString()
  if self.char == nil then
      _, _, self.char, self.font = string.find(self.content,
                        '^Missing character: There is no (.-) in font (.*)!')
  end

  return self.content
end

function missingCharMessage:toSummary()
  missingCharSummary:add(self)
end


citationMessage = Message:new()
function citationMessage:toSummary()
  citationsSummary:add(self)
end

referenceMessage = Message:new()
function referenceMessage:toSummary()
  referencesSummary:add(self)
end

labelMessage = Message:new()
function labelMessage:toSummary()
  labelsSummary:add(self)
end


--[[ ##################################################################### ]]--
--[[ ############################ SUMMARIES ############################## ]]--
--[[ ##################################################################### ]]--

-- A Summary handles a class of messages (for example, undefined citations).
-- Some of these messages may be repeated (such as one specicic undefined
-- citation that appears multiple times). We want to mention each repeated
-- group as a single item ("citation blah undefined in pages X, Y, and Z").
-- Therefore, we make self.messages a list of lists: Each "sub-list" holds
-- the messages that correspond to a specific message content (such as a
-- specific undefined citation).
SummaryPrototype = {}

function SummaryPrototype:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  o.messages = {}
  o.header = ""
  return o
end

function SummaryPrototype:add(msg)
  -- group messages by message content
  local formatted = msg:toString()

  if self.messages[formatted] == nil then
      self.messages[formatted] = {}
  end

  table.insert(self.messages[formatted], msg)
end

function SummaryPrototype:toString()
  -- check if the table is empty - https://stackoverflow.com/a/1252776
  if next(self.messages) == nil then return "" end

  local text = self:processAllMessages()

  if text == "" then return "" end -- happens with repetitionsSummary

  return self.header .. text
end

-- as mentioned above, self.messages holds a list of lists, such as
-- all the "undefined reference blah" messages in one sublist and
-- all the "undefined reference bleh" messages in another; process
-- each sublist separately and concatenate the results. We sort
-- because otherwise the order of the reports changes with each
-- execution, which makes comparing outputs harder.
function SummaryPrototype:processAllMessages()
  local allText = ""
  for _, messagesSublist in pairsSortedByKeys(self.messages) do
      local tmp = self:processSingleMessageList(messagesSublist)
      if tmp ~= "" then
          allText = allText .. '\n\n' .. tmp
      end
  end

  -- remove leading '\n\n'
  return string.sub(allText, 3)
end

-- This is where we process each individual sublist, generating
-- something like "undefined reference blah in pages X, Y Z".
-- This obviously depends on the type of summary.
function SummaryPrototype:processSingleMessageList(messages)
  return ""
end

-- This receives a list of (equal) messages and returns
-- the list of pages and files where they were found.
function SummaryPrototype:pageAndFileList(messages)

    -- Build a Set with the page numbers to eliminate repetitions...
    local pages = {}
    local files = {}
    for _, msg in ipairs(messages) do
        pages[msg.physicalPage] = true
        if msg.filename ~= nil then
            files[msg.filename] = true
        end
    end

    -- and convert it to a "normal" table so we can sort it
    local tmp = {}
    for page, _ in pairs(pages) do -- not ipairs!
        table.insert(tmp, page)
    end
    pages = tmp
    table.sort(pages)

    tmp = {}
    for file, _ in pairs(files) do
        table.insert(tmp, file)
    end
    files = tmp
    table.sort(files)

    -- Now turn these into strings
    tmp = ""
    for _, page in ipairs(pages) do
        tmp = tmp .. ", " .. page
    end
    pages = tmp

    local _, last = string.find(pages, '^, ')
    if last ~= nil then pages = string.sub(pages, last +1) end

    local tmp = ""
    for _, file in ipairs(files) do
        tmp = tmp .. ", " .. file
    end
    files = tmp

    local _, last = string.find(files, '^, ')
    if last ~= nil then files = string.sub(files, last +1) end

    return pages, files
end


repetitionsSummary = SummaryPrototype:new()
repetitionsSummary.header = 'Repeated messages:\n'

function repetitionsSummary:toString()
  if not SILENCE_REPETITIONS then return "" end

  return SummaryPrototype.toString(self)
end

function repetitionsSummary:processSingleMessageList(messages)
  local text = ""
  if #messages > 1 then
      local pages, files = self:pageAndFileList(messages)
      local content = messages[1]:toString()

      text = content .. '\n'
             .. 'in pages ' .. pages
             .. " (files " .. files .. ") - "
             .. #messages .. ' repetitions'
  end

  return text
end


missingCharSummary = SummaryPrototype:new()
missingCharSummary.header = 'Missing characters:\n'

function missingCharSummary:processSingleMessageList(messages)
  local text = ""
  local pages, files = self:pageAndFileList(messages)
  local char = messages[1].char
  local font = messages[1].font

  text = 'char ' .. char .. ', font ' .. font .. '\n'
         .. 'in pages ' .. pages .. " (files " .. files .. ")"

  return text
end


citationsSummary = SummaryPrototype:new()
citationsSummary.header = 'Undefined citations:\n'

function citationsSummary:add(msg)
  -- group messages by problem key. We do not use msg:toString()
  -- here because some messages may include the page number, making
  -- messages that are otherwise the same appear to be different.
  local key = msg.key

  if self.messages[key] == nil then
      self.messages[key] = {}
  end

  table.insert(self.messages[key], msg)
end

function citationsSummary:processSingleMessageList(messages)
  local text = ""
  local pages, files = self:pageAndFileList(messages)
  local key = messages[1].key

  text = key .. '\n'
         .. 'in pages ' .. pages .. " (files " .. files .. ")"

  return text
end


referencesSummary = citationsSummary:new()
referencesSummary.header = 'Undefined references:\n'


labelsSummary = citationsSummary:new()
labelsSummary.header = 'Multiply defined labels:\n'


-- This is a little different from the others; we do not want to
-- treat different messages differently, only report that there were
-- under/overfull boxes in pages X, Y, and Z. So we store messages
-- directly in self.messages instead of using sublists.
underOverSummary = SummaryPrototype:new()

function underOverSummary:add(msg)
  table.insert(self.messages, msg)
end

function underOverSummary:toString()
  if #self.messages == 0 then return "" end

  local pages, files = self:pageAndFileList(self.messages)

  return "Under/overfull boxes in pages "
             .. pages .. " (files " .. files .. ")"
end


--[[ ##################################################################### ]]--
--[[ ########################## AUXILIARY STUFF ########################## ]]--
--[[ ##################################################################### ]]--

function protect_metachars(s)
  s = string.gsub(s, "%%", "%%%%")
  s = string.gsub(s, "%(", "%%(")
  s = string.gsub(s, "%)", "%%)")
  s = string.gsub(s, "%.", "%%.")
  s = string.gsub(s, "%+", "%%+")
  s = string.gsub(s, "%-", "%%-")
  s = string.gsub(s, "%*", "%%*")
  s = string.gsub(s, "%?", "%%?")
  s = string.gsub(s, "%[", "%%[")
  s = string.gsub(s, "%^", "%%^")
  s = string.gsub(s, "%$", "%%$")
  return s
end

function unprotect_metachars(s)
  s = string.gsub(s, "%%%$", "$")
  s = string.gsub(s, "%%%^", "^")
  s = string.gsub(s, "%%%[", "[")
  s = string.gsub(s, "%%%?", "?")
  s = string.gsub(s, "%%%*", "*")
  s = string.gsub(s, "%%%-", "-")
  s = string.gsub(s, "%%%+", "+")
  s = string.gsub(s, "%%%.", ".")
  s = string.gsub(s, "%%%)", ")")
  s = string.gsub(s, "%%%(", "(")
  s = string.gsub(s, "%%%%", "%%")
  return s
end

function trim(s) return (string.gsub(s, '^%s*(.-)%s*$', '%1')) end

function stringToPattern(s)
  local first, _ = string.find(s, '^////')
  local pat
  if first ~= nil then
      pat = string.sub(s, 5)
  else
      pat = protect_metachars(s)
      pat = string.gsub(pat, "%s+", "%%s+")
  end

  return pat
end

-- Given a string with multiple lines, returns
-- a table in which each line is an element
function linesToTable(s)
  local size = string.len(s)
  local i = 1
  local lines = {}
  while i < size do
      -- check \r in case the user added to this file a pattern
      -- with an embedded dos-style "CR LF" sequence.
      local first, last, line = string.find(s, '(.-)[\r]?\n', i)

      if first == nil then
          table.insert(lines, string.sub(s, i))
          i = size
      else
          table.insert(lines, line)
          i = last +1
      end
  end
  return lines
end

-- copied verbatim from https://www.lua.org/pil/19.3.html
function pairsSortedByKeys (t, f)
      local a = {}
      for n in pairs(t) do table.insert(a, n) end
      table.sort(a, f)
      local i = 0      -- iterator variable
      local iter = function ()   -- iterator function
        i = i + 1
        if a[i] == nil then return nil
        else return a[i], t[a[i]]
        end
      end
      return iter
    end


--[[ ##### STACK ##### ]]--

Stack = {}

function Stack:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  return o
end

function Stack:push(val)
  table.insert(self, val)
end

function Stack:pop()
  if #self > 0 then
      return table.remove(self)
  else
      return nil
  end
end

function Stack:peek()
  return self[#self]
end

function Stack:size()
  return #self
end

function Stack:empty()
    return #self == 0
end


--[[ ##### GLOBTOPATTERN ##### ]]--

-- convert a file glob to a lua pattern

-- globtopattern (c) 2008-2011 David Manura.  Licensed under the same terms as Lua (MIT).
-- copied verbatim from https://github.com/davidm/lua-glob-pattern

function globtopattern(g)
  -- Some useful references:
  -- - apr_fnmatch in Apache APR.  For example,
  --   http://apr.apache.org/docs/apr/1.3/group__apr__fnmatch.html
  --   which cites POSIX 1003.2-1992, section B.6.

  local p = "^"  -- pattern being built
  local i = 0    -- index in g
  local c        -- char at index i in g.

  -- unescape glob char
  local function unescape()
    if c == '\\' then
      i = i + 1; c = g:sub(i,i)
      if c == '' then
        p = '[^]'
        return false
      end
    end
    return true
  end

  -- escape pattern char
  local function escape(c)
    return c:match("^%w$") and c or '%' .. c
  end

  -- Convert tokens at end of charset.
  local function charset_end()
    while 1 do
      if c == '' then
        p = '[^]'
        return false
      elseif c == ']' then
        p = p .. ']'
        break
      else
        if not unescape() then break end
        local c1 = c
        i = i + 1; c = g:sub(i,i)
        if c == '' then
          p = '[^]'
          return false
        elseif c == '-' then
          i = i + 1; c = g:sub(i,i)
          if c == '' then
            p = '[^]'
            return false
          elseif c == ']' then
            p = p .. escape(c1) .. '%-]'
            break
          else
            if not unescape() then break end
            p = p .. escape(c1) .. '-' .. escape(c)
          end
        elseif c == ']' then
          p = p .. escape(c1) .. ']'
          break
        else
          p = p .. escape(c1)
          i = i - 1 -- put back
        end
      end
      i = i + 1; c = g:sub(i,i)
    end
    return true
  end

  -- Convert tokens in charset.
  local function charset()
    i = i + 1; c = g:sub(i,i)
    if c == '' or c == ']' then
      p = '[^]'
      return false
    elseif c == '^' or c == '!' then
      i = i + 1; c = g:sub(i,i)
      if c == ']' then
        -- ignored
      else
        p = p .. '[^'
        if not charset_end() then return false end
      end
    else
      p = p .. '['
      if not charset_end() then return false end
    end
    return true
  end

  -- Convert tokens.
  while 1 do
    i = i + 1; c = g:sub(i,i)
    if c == '' then
      p = p .. '$'
      break
    elseif c == '?' then
      p = p .. '.'
    elseif c == '*' then
      p = p .. '.*'
    elseif c == '[' then
      if not charset() then break end
    elseif c == '\\' then
      i = i + 1; c = g:sub(i,i)
      if c == '' then
        p = p .. '\\$'
        break
      end
      p = p .. escape(c)
    else
      p = p .. escape(c)
    end
  end
  return p
end


--[[ ##### PARSING THE COMMAND LINE ##### ]]--

-- loosely inspired by http://lua-users.org/wiki/AlternativeGetOpt

function simpleGetopt(args, optionsWithArgs)
  local userOptions = {} -- results will be stored here

  if optionsWithArgs == nil then optionsWithArgs = "" end

  i = 1
  while i <= #args do

    -- lua does not have "continue", so we put the loop body
    -- in a "repeat/until true" block and use break instead.
    repeat
        local optname, optval

        -- this handles "--option=blah", "--option = blah",
        --              "--option= blah" and "--option =blah"
        if string.sub(args[i], 1, 2) == "--" then
            optname = string.sub(args[i], 3)

            -- check for "--option=..."
            local equals = string.find(optname, "=", 1, true)
            if equals ~= nil then
                optval = string.sub(optname, equals +1)
                optname = string.sub(optname, 1, equals -1)

            -- check for "--option =..."
            elseif i +1 <= #args and string.sub(args[i +1], 1, 1) == '=' then
                optval = string.sub(args[i +1], 2)
                i = i +1  -- do not process this again later on
            end

            if optval ~= nil then
                if optval == "" then -- check for "...= blah"
                    optval = args[i +1]
                    i = i +1 -- do not process this again later on
                end
            else
                -- check for "--option" without "="
                optval = true
            end

            simpleGetoptStoreVal(userOptions, optname, optval)

            break
        end

        -- this handles "-a -b", "-ab", "-cVAL", "-c VAL", "-abcVAL",
        -- and "-abc VAL". Obviously, "-cVALab" does not work (where
        -- does "VAL" end?).
        --
        -- To decide whether "-cVAL" means "c with param VAL" or
        -- "options c, V, A, and L", we check optionsWithArgs.
        if string.sub(args[i], 1, 1) == "-" then
            local j = 2
            local length = string.len(args[i])

            while (j <= length) do
                local optname = string.sub(args[i], j, j) -- a single letter
                if string.find(optionsWithArgs, optname, 1, true) then
                    if j < length then
                        optval = string.sub(args[i], j +1)
                        j = length
                    else
                        optval = args[i +1]
                        i = i +1 -- do not process this again later on
                    end
                else
                    optval = true
                end

                simpleGetoptStoreVal(userOptions, optname, optval)

                j = j + 1 -- next letter
            end

            break
        end

        -- the filename is the only argument that does not start with "-"
        userOptions['filename'] = args[i]
    until true

    i = i +1 -- next arg

  end -- while i <= #args

  return userOptions
end

function simpleGetoptStoreVal(userOptions, optname, optval)
  if type(optval) == 'boolean' then
      userOptions[optname] = optval
      return
  end

  local tmp = string.lower(optval)
    if tmp == "y" or tmp == "true" then
        userOptions[optname] = true
    elseif tmp == "n" or tmp == "false" then
        userOptions[optname] = false
    else
        if type(userOptions[optname]) ~= 'table' then
            userOptions[optname] = {}
        end
        table.insert(userOptions[optname], optval)
    end
end

--[[
-- TESTING THE COMMAND LINE PARSER --

optionsWithArgs = 'abdg' -- these are followed by a parameter: "-b5", "-a true"

args = {
  '--should-be-true',
  '-a', 'param-for-a',
  '-bparam-for-b',
  '-cdparam-for-d', -- c should be true
  '-e', -- e should be true
  '-fg', 'param-for-g', -- f should be true
  '--opt1', '=', 'param-for-opt1',
  '--opt2=', 'param-for-opt2',
  '--opt3', '=param-for-opt3',
  '--opt4=param-for-opt4',
  '--also-should-be-true'
}

for k, v in pairs(simpleGetopt(args, optionsWithArgs)) do
    print("Option " .. k .. ': ', v)
end
--]]


--[[ ##################################################################### ]]--
--[[ ######################### INPUT LINES BUFFER ######################## ]]--
--[[ ##################################################################### ]]--

--[[
This is a buffer of lines from the logfile. The first line in the
buffer (self.current) is treated specially: we may alter its content
during processing and, therefore, we record its original size so
later we can detect if it may be a wrapped line (i.e., if its
original length was max_print_line characters).
--]]

Lines = {}
Lines.wrapped = {} -- memoization

function Lines:gotoNextLine()
  self.current = table.remove(self, 1)
  self.currentWrapped = table.remove(self.wrapped, 1)
  self.atBeginningOfLine = true

  -- When unwrapping lines, we need to check whether a line is of
  -- the "right" size. However, we modify the content of currentLine
  -- during processing, so we capture its initial length here
  if self.current ~= nil then
      self.currentLineInitialLength = string.len(self.current)
  else
      self.currentLineInitialLength = 0
      self.currentWrapped = false
  end
end

function Lines:handledChars(n)
  -- After a handler processes currentLine, it indicates how much
  -- from the line it processed, either leaving some trailing text
  -- for the next handler or leaving nothing, indicating that it is
  -- time to get more data and call gotoNextLine().
  if n == nil then
      self.current = nil
  else
      self.current = string.sub(self.current, n +1)
      self.atBeginningOfLine = false
  end

end

function Lines:len(n)
  local n = n or 0
  if n == 0 then return self.currentLineInitialLength end
  return string.len(self[n])
end

function Lines:append(x)
    table.insert(self, x)
    table.insert(self.wrapped, "unknown") -- cannot use "nil"
end

function Lines:get(n)
    local n = n or 0
    if n == 0 then return self.current end
    return self[n]
end

function Lines:empty()
    return #self == 0
end

function Lines:numLines()
    return #self
end


--[[ ##### UNWRAPPING LINES ##### ]]--

function Lines:seemsWrappedMemo(n)
    if n == 0 then
        return self.currentWrapped
    else
        return self.wrapped[n]
    end
end

function Lines:setWrappedMemo(n, val)
    if n == 0 then
        self.currentWrapped = val
    else
        self.wrapped[n] = val
    end
end

function Lines:unwrapOneLine()
  -- We trust the caller to check :seemsWrapped() before
  -- calling this!
  local old = self.current
  self:gotoNextLine() -- this also updates currentLineInitialLength
  self.current = old .. self.current
end

function Lines:seemsWrapped(position)
  if not badLogfile then return false end

  if position == nil then position = 0 end

  if self:seemsWrappedMemo(position) ~= 'unknown' then
      return self:seemsWrappedMemo(position)
  end

  -- No next line, so the current line cannot be a wrapped line.
  -- The buffer should be large enough to guarantee this will
  -- only happen when we are actually at the end of the input
  if self:get(position +1) == nil then
      -- BUT, just in case the buffer does get
      -- empty before EOF, do not memoize
      --self:setWrappedMemo(position, false)
      return false
  end

  if not self:wrappingLength(position) then
      self:setWrappedMemo(position, false)
      return false
  end

  -- ok, the line length suggests this line continues on the
  -- next, but we need to be sure; let's check if there are
  -- no handlers that can manage the next line
  local result = self:noHandlersForNextLine(position)
  self:setWrappedMemo(position, result)
  return result
end

function Lines:wrappingLength(position)
  local line = self:get(position)
  local n = self:len(position)

  -- I have seen at least two cases where TeX "forgot" to
  -- wrap a line. In this happens, the line is not wrapped.
  if n > max_print_line +1 then return false end

  -- Is the line the "right" length?
  -- (max_print_line or max_print_line +1)
  if n >= max_print_line then return true end

  -- Ok, n < max_print_line, so it looks like this is not
  -- a wrapped line. BUT! LuaTeX does not break a multibyte
  -- UTF-8 character, which means some lines may be broken
  -- at lengths < max_print_line; let's check for this.

  -- Get the length of the first UTF-8 character on the next line:
  -- https://www.lua.org/manual/5.3/manual.html#pdf-utf8.charpattern
  local _, nextcharsize = string.find(self:get(position +1),
                                      '^' .. utf8.charpattern)

  -- if it is not a multibyte char, the line really is not wrapped
  if nextcharsize == 1 then return false end

  -- Some *very* simplistic experiments tell me that if a
  -- multibyte UTF-8 character in the last position of a line
  -- would make that line exactly max_print_line long, LuaTeX
  -- still breaks the line, even though the character would fit.
  -- This is why we do "+1" here.

  -- If the multibyte char would fit, the line really is not wrapped
  if n + nextcharsize +1 <= max_print_line then return false end

  -- The line is shorter because of an UTF-8 multibyte
  -- char, so it seems like a wrapped line
  return true
end

function Lines:noHandlersForNextLine(position)
  local line = self:get(position)

  local wrapped = true
  for _, candidateHandler in ipairs(beginningOfLineHandlers) do
      if candidateHandler:canDoit(position +1) then wrapped = false break end
  end

  for _, candidateHandler in ipairs(anywhereHandlers) do
      if candidateHandler:canDoit(position +1) then wrapped = false break end
  end

  if wrapped then return true end

  -- TODO: the way we deal with these special cases is lame...

  -- a close parens at the beginning of the next line might not be
  -- a "close file" message, but instead refer to an open parens in
  -- the current line; let's check for this exceptional case. We
  -- traverse the current line looking for open/close parens and pair
  -- every close parens to the corresponding open parens. A dangling
  -- close parens is probably a continuation too, so we ignore it.
  if closeParensHandler:canDoit(position +1) then
      -- We previously decided the line is not wrapped because
      -- there is a close parens in the following line; let's
      -- revise that decision
      local unpaired = 0
      local i = 1
      local size = string.len(line)
      while i <= size do
          local j = string.find(line, '[%(%)]', i)
          if j ~= nil then
              local open = string.find(line, '%(', i)
              if open then
                  unpaired = unpaired +1
              elseif unpaired > 0 then
                  unpaired = unpaired -1
              end
              i = j +1
          else
              i = size +1
          end
      end

      if unpaired > 0 then return true end
  end

  -- Same thing for close square bracket
  if closeSquareBracketHandler:canDoit(position +1) then
      -- We previously decided the line is not wrapped because
      -- there is a close square bracket in the following line;
      -- let's revise that decision
      local unpaired = 0
      local i = 1
      local size = string.len(line)
      while i <= size do
          local j = string.find(line, '[%[%]]', i)
          if j ~= nil then
              local open = string.find(line, '%[', i)
              if open then
                  unpaired = unpaired +1
              elseif unpaired > 0 then
                  unpaired = unpaired -1
              end
              i = j +1
          else
              i = size +1
          end
      end

      if unpaired > 0 then return true end
  end

  local parens, data = openParensHandler:canDoit(position +1)
  local filename = data[1]
  if parens and filename == nil then
      -- Next line could be handled as an "open parens" message, but
      -- there is no filename, so the message is only "(". There is
      -- a good chance that is just the continuation of a previous
      -- wrapped message. If there is a close parens character on the
      -- line too, we'll assume that is true and ignore the handler
      local first = string.find(self:get(position +1), "%)")
      if first ~= nil then return true end
  end

  local bracket, data = openSquareBracketHandler:canDoit(position +1)
  local page = data[1]
  if bracket and page == nil then
      -- Next line could be handled as an "open square bracket" message,
      -- but there is no page number, so the message is only "[". There
      -- is a good chance that is just the continuation of a previous
      -- wrapped message. If there is a close square bracket character on
      -- the line too, we'll assume that is true and ignore the handler
      local first = string.find(self:get(position +1), "%]")
      if first ~= nil then return true end
  end

  return false
end

function unwrapUntilPatternMatches(pat)
  local last, tmp
  while true do
      matches = {string.find(Lines.current, pat)}
      table.remove(matches, 1) -- remove "first"
      last = table.remove(matches, 1) -- remove "last"

      if last ~= nil then break end

      if Lines:seemsWrapped() then
          Lines:unwrapOneLine()
      else
          io.stderr:write("    texlogsieve: parsing error\n")
      end
  end

  -- We matched the given pattern. However, if the pattern ends with
  -- something like ".+", it is possible that there is still some more
  -- relevant material in the next line (this is why we should not use
  -- such patterns). There is not much we can do about that in general,
  -- but if the next line is really short, this is probably why, so we
  -- might as well try to unwrap once more. This is obviously a hack,
  -- but it saves us from problems with a couple of messages that use
  -- "filepat" at the end of the pattern.

  -- these might fit in 2 chars, so do not consider lines with that
  local realMessage = string.find(Lines:get(1), '[%)%[%]]')
  if string.len(Lines:get(1)) < 3 and not realMessage then
      local together = Lines.current .. Lines:get(1)
      local candidateMatches = {string.find(together, pat)}
      table.remove(candidateMatches, 1)
      local candidateLast = table.remove(candidateMatches, 1)
      if candidateLast ~= nil then
          Lines:unwrapOneLine()
          last = candidateLast
          matches = candidateMatches
      end
  end

  return last, matches
end

function unwrapUntilStringMatches(s)
  return unwrapUntilPatternMatches(protect_metachars(s))
end


--[[ ##################################################################### ]]--
--[[ ################## GUESSING FILENAMES AND SHIPOUTS ################## ]]--
--[[ ##################################################################### ]]--

--[[
To find the full path of a file in the TeX search path, we may use

thepath = kpse.find_file("article")
thepath = kpse.find_file("/usr/share/texlive/texmf-dist/tex/latex/base/article.cls")
thepath = kpse.find_file("latex/base/article.cls")
thepath = kpse.find_file("texmf.cnf", "cnf")

The filename may or may not include the extension; kpathsea adds the
extension if necessary according to the specified format (default is
"tex", which means extensions .tex, .sty, .cls and some others). This
is problematic, because (1) it does not recognize filenames with an
unknown extension and (2) if there is a file named "what.ever.tex" and
another one called "what.ever", it may find the wrong file.

We want to check whether a string from the log file corresponds to the
complete path of an existing filename, which means we already know the
extension. Therefore, we will use the type "other text files". This
prevents kpathsea from adding the extension automatically (which is what
we want) and also prevents kpathsea from searching the standard TeX path,
which is also nice because we are checking a complete path.
--]]

function guessFilename(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  local _, last = string.find(line, '^%s*%(')
  if last ~= nil then line = string.sub(line, last +1) end

  local quotes, filename = guessQuotedFilename(line, position)

  if filename ~= nil then return filename end

  if quotes then return nil end

  return guessUnquotedFilename(line, position)
end

function guessShipoutPage(position)
  -- In general, a shipout is identified by "[\count0.\count1.\count2..."
  -- followed by a space or "]". However, there are complications:
  --
  -- 1. Dates, such as [2020/10/03] or [2020-10-03]
  --
  -- 2. Files added to the shipout, such as
  --    "[3{somefile.tex}" or "[3<somefile.pdf>"
  --
  -- We can either:
  --
  -- 1. Specify what is allowed: "^(%d+[%d%.]*)[ %]<{]" or "^(%d+[%d%.]*)$"
  --
  -- 2. Exclude what is forbidden: "^(%d+[%d%.]*)[^/-]"
  --
  -- Let's go with option 1.

  local patterns = {
      "^(%d+[%d%.]*)[ %]<{]",
      "^(%d+[%d%.]*)$"
  }

  if position == nil then position = 0 end
  local line = Lines:get(position)
  local _, last = string.find(line, '^%s*%[')
  if last ~= nil then line = string.sub(line, last +1) end

  while true do
      for _, pattern in ipairs(patterns) do
          _, _, page = string.find(line, pattern)
          if page ~= nil then
              -- just the number, please
              local _, last = string.find(page, '^%d+[%d%.]*')
              page = string.sub(page, 1, last)
              break
          end
      end

      if not Lines:seemsWrapped(position) then break end

      local nextline = Lines:get(1)
      line = line .. nextline
      position = position +1
  end

  return page -- may be nil
end

function guessQuotedFilename(line, position)
  -- luatex puts quotes around filenames with spaces, which makes things easier
  while true do
      local _, last = string.find(line, '^"')
      if string.len(line) > 0 and last == nil then return false end -- no quotes

      last = string.find(line, '^"[%.]?/') -- relative path or unix-style path
      if last == nil then
          last = string.find(line, '^"%a:/') -- windows-style path
      end
      -- there are quotes, but what follows is not a filename
      if string.len(line) >= 4 and last == nil then return true, nil end

      _, last = string.find(line, '^"%a?[:]?[%.]?/[^"%(%)%[%]]+"')
      if last ~= nil then
          local filename = string.sub(line, 2, last -1) -- remove quotes
          if kpse.find_file(filename, 'other text files') == nil then
              return true, nil -- there are quotes, but no filename
          else
              return true, '"' .. filename .. '"'
          end
      end

      -- no closing quote or the line is too short; can we unwrap this line?
      if not Lines:seemsWrapped(position) then
          io.stderr:write("    texlogsieve: parsing error\n")
          return true, nil
      end

      -- yep!
      line = line .. Lines:get(position +1)
      position = position +1
  end
end

-- no quotes; let's exhaustively extract the substrings of currentLine
-- and use kpse.find_file() to check if any of them is an existing
-- filename.
function guessUnquotedFilename(line, position)
  -- In subsequent iterations, we do not check the smaller strings
  -- we already checked in the previous ones.
  local alreadyCheckedIdx = 0

  while true do
      local first = string.find(line, '^[%.]?/') -- relative or unix-style path
      if first == nil then
          first = string.find(line, '^%a:/') -- windows-style path
      end
      -- this does not look like a filename
      if string.len(line) >= 3 and first == nil then return nil end

      local longest = string.len(line)

      -- if there is a ")", "(", "[", or "]" char, stop before that
      first = string.find(line, "[%)%(%[%]]")
      if first ~= nil then longest = first -1 end

      -- From longest to shortest, to avoid problems if there is a
      -- substring in the filename that matches some other filename.
      for i = longest, alreadyCheckedIdx +1, -1 do
          local filename = string.sub(line, 1, i)
          if kpse.find_file(filename, 'other text files') ~= nil then
              return filename
          end
      end

      -- we did not find the filename; can we unwrap this line?
      if not Lines:seemsWrapped(position) then return nil end
      first = string.find(line, "[%)%(%[%]]")
      if first ~= nil then return nil end

      -- yep!
      alreadyCheckedIdx = longest
      line = line .. Lines:get(position +1)
      position = position +1
  end
end

--[[ ###################################################################### ]]--
--[[ ######################## BEGINNING OF SCRIPT ######################### ]]--
--[[ ###################################################################### ]]--


main(arg)
