Source code for exegis.analysis

"""Module which contains the function to analyse aphorism and commentaries line

There are two functions which are treating the references ``[W1 W2]``
and the footnotes *XXX*.

The ``references`` function has to be used before the ``footnotes``.

:Authors: Jonathan Boyle, Nicolas Gruel <nicolas.gruel@manchester.ac.uk>

:Copyright: IT Services, The University of Manchester
"""
try:
    from .baseclass import logger, XML_OSS, XML_N_OFFSET
except ImportError:
    from baseclass import logger, XML_OSS, XML_N_OFFSET


# Define an Exception
[docs]class AnalysisException(Exception):
    """Class for exception
    """
    pass


[docs]def references(line):
    """
    This helper function searches a line of text for witness references
    with the form ``[WW LL]`` and returns a string containing the original
    text with each witness reference replaced with XML with the form
    ``<locus target="WW">LL</locus>``.

    ``\\n`` characters are added at the start and end of each XML insertion
    so each instance of XML is on its own line.

    It is intended this function is called by function main()
    for each line of text from the main body of the text document before
    processing footnote references using the _footnotes() function.

    Parameters
    ----------

    line : str
        contains the line with the aphorism or the commentary to analyse.

    Raises
    ------
    AnalysisException
        if references does not follow the convention ``[W1 W2]``.
        e.g. will raise an exception if:

        - ``[W1W2]`` : missing space between the two witnesses

        - ``[W1 W2`` : missing ``]``
    """

    # Create a string to contain the return value
    result = ''

    if not line:
        return

    while True:
        # Try to partition this line at the first '[' character
        text_before, sep, text_after = line.partition('[')

        # Note: if sep is zero there are no more witnesses to add

        # Add text_before to the result string
        if text_before != '':
            result += text_before
            # If there is a witness to add start a new line
            if sep != '':
                result += '\n'

        # If sep has zero length we can stop because there are no more
        # witness _references
        if sep == '':
            break

        # Try to split text_after at the first ']' character
        reference, sep, line = text_after.partition(']')

        # If this partition failed then something went wrong,
        # so throw an error
        if sep == '':
            error = 'Unable to partition string {} at "]" ' \
                    'when looking for a reference'.format(line)
            logger.error(error)
            raise AnalysisException

        # Partition the reference into witness and location (these are
        # separated by the ' ' character)
        witness, sep, page = reference.partition(' ')

        # If this partition failed there is an error
        if sep == '':
            error = ('Unable to partition reference [{}] '
                     'because missing space probably'.format(reference))
            logger.error(error)
            raise AnalysisException

        # Add the witness and location XML to the result string
        result += '<locus target="' + witness.strip() + \
                  '">' + page.strip() + '</locus>'

        # If text has zero length we can stop
        if line == '':
            break
        else:
            # There is more text to process so start a new line
            result += '\n'

    return result


[docs]def footnotes(string_to_process, next_footnote):
    """
    This helper function takes a single string containing text and
    processes any embedded footnote symbols (describing additions,
    omissions, correxi, conieci and standard textual variations)
    to generate XML. It also deals with any XML generated using
    function _references().

    The output is two lists of XML, one for the main text, the other
    for the apparatus.

    Parameters
    ----------

    string_to_process: str

        This string contains the text to be processed. This should contain
        a single line from the text file being processed, e.g. a title,
        aphorism or commentary. This string may already contain XML
        generated using the _references() function i.e. XML
        identifying witnesses with each <locus> XML on a new line.

    next_footnote: int
        reference the footnote to find.

    Returns
    -------

    1. A Python list containing XML for the main text.
    2. A Python list containing XML for the critical apparatus.
    3. The number of the next footnote to be processed when this function
       complete.

    It is intended this function is called by main() on each line
    of text from the main document body.

    Raises
    ------
    AnalysisException
        if footnote in commentary can not be defined.
    """
    # Create lists to contain the XML
    xml_main = []
    try:
        while True:
            # Use string partition to try to split this text at
            # the next footnote symbol
            footnote_symbol = '*' + str(next_footnote) + '*'
            text_before_symbol, sep, string_to_process = \
                string_to_process.partition(footnote_symbol)

            # If the partition failed sep will have zero length and the next
            # footnote is not in this line, hence we can stop
            # processing and return
            if sep == '':
                # Add text_before_symbol to the XML and stop processing
                for next_line in text_before_symbol.splitlines():
                    xml_main.append(XML_OSS * XML_N_OFFSET +
                                    next_line.strip())
                break

            # We know sep has non-zero length and we are dealing with
            # a footnote.
            # Now use string partition to try to split text_before_symbol
            # at a '#' character.
            next_text_for_xml, sep, base_text = \
                text_before_symbol.partition('#')

            # If the above partition failed the footnote refers
            # to a single word
            if sep == '':
                # Use rpartition to partition at the LAST space in the
                # string before the footnote symbol
                next_text_for_xml, sep, base_text = \
                    text_before_symbol.rpartition(' ')

            # Check we succeeded in partitioning the text before the footnote
            # at '#' or ' '. If we didn't there's an error.
            if sep == '':
                error = ('Unable to partition text before footnote symbol '
                         '{}'.format(footnote_symbol))
                logger.error(error)
                error = ('Probably missing a space or the "#" character '
                         'to determine the word(s) to apply the footnote')
                logger.error(error)
                raise AnalysisException

            # Add the next_text_for_xml to xml_main
            for next_line in next_text_for_xml.splitlines():
                xml_main.append(XML_OSS * XML_N_OFFSET + next_line.strip())

            # Create an anchor for the app (as advised)
            xml_main.append(XML_OSS * XML_N_OFFSET +
                            '<anchor xml:id="begin_fn' +
                            str(next_footnote) + '"/>')

            # Create XML for this textural variation for xml_main
            # Add next_string to the xml_main and XML from a witness reference
            for next_line in base_text.splitlines():
                xml_main.append(XML_OSS * (XML_N_OFFSET+2) + next_line)

            # End the anchor reference
            xml_main.append(XML_OSS * XML_N_OFFSET +
                            '<anchor xml:id="end_fn' +
                            str(next_footnote) + '"/>')

            # Increment the footnote number
            next_footnote += 1

            # Test to see if there is any more text to process
            if string_to_process == '':
                break
    except (AttributeError, AnalysisException):
        error = 'Cannot analyse aphorism or commentary ' \
                '{}'.format(string_to_process)
        logger.error(error)
        raise AnalysisException

    return xml_main, next_footnote