Source code for capitains_nautilus.inventory.local

# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import io
from six import text_type as str

from MyCapytain.resources.inventory import TextInventory, TextGroup, Work, Citation
from MyCapytain.resources.texts.local import Text
from MyCapytain.common.reference import URN
from lxml.objectify import makeparser, parse as objectify
from capitains_nautilus.errors import *
from glob import glob
import os.path
from capitains_nautilus.inventory.proto import InventoryResolver
from capitains_nautilus import _cache_key
from capitains_nautilus.cache import BaseCache
import logging
import pickle


[docs]class XMLFolderResolver(InventoryResolver):
    """ XML Folder Based resolver.

    :param resource: Resource should be a list of folders retaining data as Capitains Guidelines Repositories
    :type resource: [str]
    :param name: Key used to differentiate Repository and thus enabling different repo to be used
    :type name: str
    :param inventories:
    :type inventories:
    :param cache: Cache object to be used for the inventory
    :type cache: BaseCache
    :param logger: Logging object
    :type logger: logging

    :cvar TEXT_CLASS: Text Class [not instantiated] to be used to parse Texts. Can be changed to support Cache for example
    :type TEXT_CLASS: class
    :ivar inventory_cache_key: Werkzeug Cache key to get or set cache for the TextInventory
    :ivar texts_cache_key:  Werkzeug Cache key to get or set cache for lists of metadata texts objects
    :ivar texts_parsed:  Werkzeug Cache key to get or set cache for lists of parsed texts objects
    :ivar texts: List of Text Metadata objects
    :ivar source: Original resource parameter

    .. warning :: This resolver does not support inventories
    """
    TEXT_CLASS = Text

    def __init__(self, resource, inventories=None, cache=None, name=None, logger=None, auto_parse=True):
        """ Initiate the XMLResolver

        """
        super(XMLFolderResolver, self).__init__(resource=resource)

        if not isinstance(cache, BaseCache):
            cache = BaseCache()

        self.__inventories__ = inventories
        self.__parser__ = makeparser()
        self.__cache = cache
        self.name = name

        self.logger = logger
        if not logger:
            self.logger = logging.getLogger(name)

        if not name:
            self.name = "repository"
        self.TEXT_CLASS = XMLFolderResolver.TEXT_CLASS
        self.works = []

        self.inventory_cache_key = _cache_key("Nautilus", "Inventory", "Resources", self.name)
        self.texts_metadata_cache_key = _cache_key("Nautilus", "Inventory", "TextsMetadata", self.name)
        self.texts_parsed_cache_key = _cache_key("Nautilus", "Inventory", "TextsParsed", self.name)
        __inventory__ = self.__cache.get(self.inventory_cache_key)
        __texts__ = self.__cache.get(self.texts_metadata_cache_key)

        if __inventory__ and __texts__:
            self.inventory, self.__texts__ = __inventory__, __texts__
        elif auto_parse:
            self.parse(resource)

[docs]    def xmlparse(self, file):
        """ Parse a XML file

        :param file: Opened File
        :return: Tree
        """
        return objectify(file, parser=self.__parser__)

[docs]    def cache(self, inventory, texts):
        """ Cache main objects of the resolver : TextInventory and Texts Metadata objects

        :param inventory: Inventory resource
        :type inventory: TextInventory
        :param texts: List of Text Metadata Objects
        :type texts: [MyCapytain.resources.inventory.Text]
        """
        self.inventory, self.__texts__ = inventory, texts
        self.__cache.set(self.inventory_cache_key, inventory)
        self.__cache.set(self.texts_metadata_cache_key, texts)

[docs]    def text_to_cache(self, text):
        """ Cache a text

        :param text: Text to be cached
        """
        self.__cache.set(
            _cache_key(self.texts_parsed_cache_key, str(text.urn)),
            text
        )

[docs]    def cache_to_text(self, urn):
        """ Get a text from Cache

        :param text: Text to be cached
        :return: Text object
        :rtype: Text
        """
        return self.__cache.get(
            _cache_key(self.texts_parsed_cache_key, str(urn)),
        )

[docs]    def flush(self):
        """ Flush current resolver objects and cache
        """
        self.inventory = TextInventory()
        for text in self.__texts__:
            self.__cache.delete(_cache_key(self.texts_parsed_cache_key, str(text.urn)))
        self.__texts__ = []
        self.__cache.delete(self.inventory_cache_key)
        self.__cache.delete(self.texts_metadata_cache_key)

[docs]    def parse(self, resource, cache=True):
        """ Parse a list of directories ans

        :param resource: List of folders
        :param cache: Auto cache the results
        :return: An inventory resource and a list of Text metadata-objects
        """
        for folder in resource:
            textgroups = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder))
            for __cts__ in textgroups:
                try:
                    with io.open(__cts__) as __xml__:
                        textgroup = TextGroup(
                            resource=__xml__
                        )
                        str_urn = str(textgroup.urn)
                    if str_urn in self.inventory.textgroups:
                        self.inventory.textgroups[str_urn].update(textgroup)
                    else:
                        self.inventory.textgroups[str_urn] = textgroup

                    for __subcts__ in glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(__cts__))):
                        with io.open(__subcts__) as __xml__:
                            work = Work(
                                resource=__xml__,
                                parents=[self.inventory.textgroups[str_urn]]
                            )
                            work_urn = str(work.urn)
                            if work_urn in self.inventory.textgroups[str_urn].works:
                                self.inventory.textgroups[str_urn].works[work_urn].update(work)
                            else:
                                self.inventory.textgroups[str_urn].works[work_urn] = work

                        for __textkey__ in work.texts:
                            __text__ = self.inventory.textgroups[str_urn].works[work_urn].texts[__textkey__]
                            __text__.path = "{directory}/{textgroup}.{work}.{version}.xml".format(
                                directory=os.path.dirname(__subcts__),
                                textgroup=__text__.urn.textgroup,
                                work=__text__.urn.work,
                                version=__text__.urn.version
                            )
                            if os.path.isfile(__text__.path):
                                try:
                                    with io.open(__text__.path) as f:
                                        t = Text(resource=self.xmlparse(f))
                                        cites = list()
                                        for cite in [c for c in t.citation][::-1]:
                                            if len(cites) >= 1:
                                                cites.append(Citation(
                                                    xpath=cite.xpath.replace("'", '"'),
                                                    scope=cite.scope.replace("'", '"'),
                                                    name=cite.name,
                                                    child=cites[-1]
                                                ))
                                            else:
                                                cites.append(Citation(
                                                    xpath=cite.xpath.replace("'", '"'),
                                                    scope=cite.scope.replace("'", '"'),
                                                    name=cite.name
                                                ))
                                    __text__.citation = cites[-1]
                                    self.logger.info("%s has been parsed ", __text__.path)
                                    if __text__.citation:
                                        self.__texts__.append(__text__)
                                    else:
                                        self.logger.error("%s has no passages", __text__.path)
                                except Exception:
                                    self.logger.error(
                                        "%s does not accept parsing at some level (most probably citation) ",
                                        __text__.path
                                    )
                            else:
                                self.logger.error("%s is not present", __text__.path)
                except Exception as E:
                    self.logger.error("Error parsing %s ", __cts__)

        if cache:
            self.cache(self.inventory, self.__texts__)

        return self.inventory, self.__texts__

[docs]    def getText(self, urn):
        """ Returns a Text object

        :param urn: URN of a text to retrieve
        :type urn: str, URN
        :return: Textual resource and metadata
        :rtype: (text.Text, inventory.Text)
        """
        if not isinstance(urn, URN):
            urn = URN(urn)
        if len(urn) != 5:
            raise InvalidURN

        text = self.inventory[str(urn)]
        with io.open(text.path) as __xml__:
            resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__))

        return resource, text

[docs]    def getCapabilities(self,
            urn=None, page=None, limit=None,
            inventory=None, lang=None, category=None, pagination=True
        ):
        """ Retrieve a slice of the inventory filtered by given arguments

        :param urn: Partial URN to use to filter out resources
        :type urn: str
        :param page: Page to show
        :type page: int
        :param limit: Item Per Page
        :type limit: int
        :param inventory: Inventory name
        :type inventory: str
        :param lang: Language to filter on
        :type lang: str
        :param category: Type of elements to show
        :type category: str
        :param pagination: Activate pagination
        :type pagination: bool
        :return: ([Matches], Page, Count)
        :rtype: ([Text], int, int)
        """
        __PART = None
        if urn is not None:
            _urn = URN(urn)
            __PART = [None, None, URN.NAMESPACE, URN.TEXTGROUP, URN.WORK, URN.VERSION, URN.COMPLETE][len(_urn)]

        matches = [
            text
            for text in self.__texts__
            if (lang is None or (lang is not None and lang == text.lang)) and
            (urn is None or (urn is not None and text.urn.upTo(__PART) == urn)) and
            (text.citation is not None) and
            (category not in ["edition", "translation"] or (category in ["edition", "translation"] and category.lower() == text.subtype.lower()))
        ]
        if pagination:
            start_index, end_index, page, count = XMLFolderResolver.pagination(page, limit, len(matches))
        else:
            start_index, end_index, page, count = None, None, 0, len(matches)

        return matches[start_index:end_index], page, count