# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import io
from six import text_type as str
from MyCapytain.resources.inventory import TextInventory, TextGroup, Work, Citation
from MyCapytain.resources.texts.local import Text
from MyCapytain.common.reference import URN
from lxml.objectify import makeparser, parse as objectify
from capitains_nautilus.errors import *
from glob import glob
import os.path
from capitains_nautilus.inventory.proto import InventoryResolver
from capitains_nautilus import _cache_key
from capitains_nautilus.cache import BaseCache
import logging
import pickle
[docs]class XMLFolderResolver(InventoryResolver):
""" XML Folder Based resolver.
:param resource: Resource should be a list of folders retaining data as Capitains Guidelines Repositories
:type resource: [str]
:param name: Key used to differentiate Repository and thus enabling different repo to be used
:type name: str
:param inventories:
:type inventories:
:param cache: Cache object to be used for the inventory
:type cache: BaseCache
:param logger: Logging object
:type logger: logging
:cvar TEXT_CLASS: Text Class [not instantiated] to be used to parse Texts. Can be changed to support Cache for example
:type TEXT_CLASS: class
:ivar inventory_cache_key: Werkzeug Cache key to get or set cache for the TextInventory
:ivar texts_cache_key: Werkzeug Cache key to get or set cache for lists of metadata texts objects
:ivar texts_parsed: Werkzeug Cache key to get or set cache for lists of parsed texts objects
:ivar texts: List of Text Metadata objects
:ivar source: Original resource parameter
.. warning :: This resolver does not support inventories
"""
TEXT_CLASS = Text
def __init__(self, resource, inventories=None, cache=None, name=None, logger=None, auto_parse=True):
""" Initiate the XMLResolver
"""
super(XMLFolderResolver, self).__init__(resource=resource)
if not isinstance(cache, BaseCache):
cache = BaseCache()
self.__inventories__ = inventories
self.__parser__ = makeparser()
self.__cache = cache
self.name = name
self.logger = logger
if not logger:
self.logger = logging.getLogger(name)
if not name:
self.name = "repository"
self.TEXT_CLASS = XMLFolderResolver.TEXT_CLASS
self.works = []
self.inventory_cache_key = _cache_key("Nautilus", "Inventory", "Resources", self.name)
self.texts_metadata_cache_key = _cache_key("Nautilus", "Inventory", "TextsMetadata", self.name)
self.texts_parsed_cache_key = _cache_key("Nautilus", "Inventory", "TextsParsed", self.name)
__inventory__ = self.__cache.get(self.inventory_cache_key)
__texts__ = self.__cache.get(self.texts_metadata_cache_key)
if __inventory__ and __texts__:
self.inventory, self.__texts__ = __inventory__, __texts__
elif auto_parse:
self.parse(resource)
[docs] def xmlparse(self, file):
""" Parse a XML file
:param file: Opened File
:return: Tree
"""
return objectify(file, parser=self.__parser__)
[docs] def cache(self, inventory, texts):
""" Cache main objects of the resolver : TextInventory and Texts Metadata objects
:param inventory: Inventory resource
:type inventory: TextInventory
:param texts: List of Text Metadata Objects
:type texts: [MyCapytain.resources.inventory.Text]
"""
self.inventory, self.__texts__ = inventory, texts
self.__cache.set(self.inventory_cache_key, inventory)
self.__cache.set(self.texts_metadata_cache_key, texts)
[docs] def text_to_cache(self, text):
""" Cache a text
:param text: Text to be cached
"""
self.__cache.set(
_cache_key(self.texts_parsed_cache_key, str(text.urn)),
text
)
[docs] def cache_to_text(self, urn):
""" Get a text from Cache
:param text: Text to be cached
:return: Text object
:rtype: Text
"""
return self.__cache.get(
_cache_key(self.texts_parsed_cache_key, str(urn)),
)
[docs] def flush(self):
""" Flush current resolver objects and cache
"""
self.inventory = TextInventory()
for text in self.__texts__:
self.__cache.delete(_cache_key(self.texts_parsed_cache_key, str(text.urn)))
self.__texts__ = []
self.__cache.delete(self.inventory_cache_key)
self.__cache.delete(self.texts_metadata_cache_key)
[docs] def parse(self, resource, cache=True):
""" Parse a list of directories ans
:param resource: List of folders
:param cache: Auto cache the results
:return: An inventory resource and a list of Text metadata-objects
"""
for folder in resource:
textgroups = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder))
for __cts__ in textgroups:
try:
with io.open(__cts__) as __xml__:
textgroup = TextGroup(
resource=__xml__
)
str_urn = str(textgroup.urn)
if str_urn in self.inventory.textgroups:
self.inventory.textgroups[str_urn].update(textgroup)
else:
self.inventory.textgroups[str_urn] = textgroup
for __subcts__ in glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(__cts__))):
with io.open(__subcts__) as __xml__:
work = Work(
resource=__xml__,
parents=[self.inventory.textgroups[str_urn]]
)
work_urn = str(work.urn)
if work_urn in self.inventory.textgroups[str_urn].works:
self.inventory.textgroups[str_urn].works[work_urn].update(work)
else:
self.inventory.textgroups[str_urn].works[work_urn] = work
for __textkey__ in work.texts:
__text__ = self.inventory.textgroups[str_urn].works[work_urn].texts[__textkey__]
__text__.path = "{directory}/{textgroup}.{work}.{version}.xml".format(
directory=os.path.dirname(__subcts__),
textgroup=__text__.urn.textgroup,
work=__text__.urn.work,
version=__text__.urn.version
)
if os.path.isfile(__text__.path):
try:
with io.open(__text__.path) as f:
t = Text(resource=self.xmlparse(f))
cites = list()
for cite in [c for c in t.citation][::-1]:
if len(cites) >= 1:
cites.append(Citation(
xpath=cite.xpath.replace("'", '"'),
scope=cite.scope.replace("'", '"'),
name=cite.name,
child=cites[-1]
))
else:
cites.append(Citation(
xpath=cite.xpath.replace("'", '"'),
scope=cite.scope.replace("'", '"'),
name=cite.name
))
__text__.citation = cites[-1]
self.logger.info("%s has been parsed ", __text__.path)
if __text__.citation:
self.__texts__.append(__text__)
else:
self.logger.error("%s has no passages", __text__.path)
except Exception:
self.logger.error(
"%s does not accept parsing at some level (most probably citation) ",
__text__.path
)
else:
self.logger.error("%s is not present", __text__.path)
except Exception as E:
self.logger.error("Error parsing %s ", __cts__)
if cache:
self.cache(self.inventory, self.__texts__)
return self.inventory, self.__texts__
[docs] def getText(self, urn):
""" Returns a Text object
:param urn: URN of a text to retrieve
:type urn: str, URN
:return: Textual resource and metadata
:rtype: (text.Text, inventory.Text)
"""
if not isinstance(urn, URN):
urn = URN(urn)
if len(urn) != 5:
raise InvalidURN
text = self.inventory[str(urn)]
with io.open(text.path) as __xml__:
resource = self.TEXT_CLASS(urn=urn, resource=self.xmlparse(__xml__))
return resource, text
[docs] def getCapabilities(self,
urn=None, page=None, limit=None,
inventory=None, lang=None, category=None, pagination=True
):
""" Retrieve a slice of the inventory filtered by given arguments
:param urn: Partial URN to use to filter out resources
:type urn: str
:param page: Page to show
:type page: int
:param limit: Item Per Page
:type limit: int
:param inventory: Inventory name
:type inventory: str
:param lang: Language to filter on
:type lang: str
:param category: Type of elements to show
:type category: str
:param pagination: Activate pagination
:type pagination: bool
:return: ([Matches], Page, Count)
:rtype: ([Text], int, int)
"""
__PART = None
if urn is not None:
_urn = URN(urn)
__PART = [None, None, URN.NAMESPACE, URN.TEXTGROUP, URN.WORK, URN.VERSION, URN.COMPLETE][len(_urn)]
matches = [
text
for text in self.__texts__
if (lang is None or (lang is not None and lang == text.lang)) and
(urn is None or (urn is not None and text.urn.upTo(__PART) == urn)) and
(text.citation is not None) and
(category not in ["edition", "translation"] or (category in ["edition", "translation"] and category.lower() == text.subtype.lower()))
]
if pagination:
start_index, end_index, page, count = XMLFolderResolver.pagination(page, limit, len(matches))
else:
start_index, end_index, page, count = None, None, 0, len(matches)
return matches[start_index:end_index], page, count