Source code for oc_ocdm.graph.graph_set

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for any purpose
# with or without fee is hereby granted, provided that the above copyright notice
# and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
from __future__ import annotations

from typing import TYPE_CHECKING

from SPARQLWrapper import SPARQLWrapper, RDFXML

from oc_ocdm.reader import Reader
from oc_ocdm.abstract_set import AbstractSet
from oc_ocdm.support.support import get_count, get_short_name

if TYPE_CHECKING:
    from typing import Dict, ClassVar, Tuple, Optional, List, Set
    from rdflib import ConjunctiveGraph

from rdflib import Graph, Namespace, URIRef

from oc_ocdm.graph.graph_entity import GraphEntity
from oc_ocdm.counter_handler.counter_handler import CounterHandler
from oc_ocdm.counter_handler.filesystem_counter_handler import FilesystemCounterHandler
from oc_ocdm.counter_handler.in_memory_counter_handler import InMemoryCounterHandler
from oc_ocdm.graph.entities.identifier import Identifier
from oc_ocdm.graph.entities.bibliographic.agent_role import AgentRole
from oc_ocdm.graph.entities.bibliographic.bibliographic_reference import BibliographicReference
from oc_ocdm.graph.entities.bibliographic.bibliographic_resource import BibliographicResource
from oc_ocdm.graph.entities.bibliographic.citation import Citation
from oc_ocdm.graph.entities.bibliographic.discourse_element import DiscourseElement
from oc_ocdm.graph.entities.bibliographic.pointer_list import PointerList
from oc_ocdm.graph.entities.bibliographic.reference_annotation import ReferenceAnnotation
from oc_ocdm.graph.entities.bibliographic.reference_pointer import ReferencePointer
from oc_ocdm.graph.entities.bibliographic.resource_embodiment import ResourceEmbodiment
from oc_ocdm.graph.entities.bibliographic.responsible_agent import ResponsibleAgent


[docs]class GraphSet(AbstractSet): # Labels labels: ClassVar[Dict[str, str]] = { "an": "annotation", "ar": "agent role", "be": "bibliographic entry", "br": "bibliographic resource", "ci": "citation", "de": "discourse element", "id": "identifier", "pl": "single location pointer list", "ra": "responsible agent", "re": "resource embodiment", "rp": "in-text reference pointer" } def __init__(self, base_iri: str, info_dir: str = "", supplier_prefix: str = "", wanted_label: bool = True) -> None: super(GraphSet, self).__init__() # The following variable maps a URIRef with the related graph entity self.res_to_entity: Dict[URIRef, GraphEntity] = {} self.base_iri: str = base_iri self.supplier_prefix: str = supplier_prefix self.wanted_label: bool = wanted_label # Graphs # The following structure of URL is quite important for the other classes # developed and should not be changed. The only part that can change is the # value of the base_iri self.g_an: str = base_iri + "an/" self.g_ar: str = base_iri + "ar/" self.g_be: str = base_iri + "be/" self.g_br: str = base_iri + "br/" self.g_ci: str = base_iri + "ci/" self.g_de: str = base_iri + "de/" self.g_id: str = base_iri + "id/" self.g_pl: str = base_iri + "pl/" self.g_ra: str = base_iri + "ra/" self.g_re: str = base_iri + "re/" self.g_rp: str = base_iri + "rp/" if info_dir is not None and info_dir != "": self.counter_handler: CounterHandler = FilesystemCounterHandler(info_dir) else: self.counter_handler: CounterHandler = InMemoryCounterHandler()
[docs] def get_entity(self, res: URIRef) -> Optional[GraphEntity]: if res in self.res_to_entity: return self.res_to_entity[res]
# Add resources related to bibliographic entities
[docs] def add_an(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> ReferenceAnnotation: if res is not None and get_short_name(res) != "an": raise ValueError(f"Given res: <{res}> is inappropriate for a ReferenceAnnotation entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_an, "an", res) return ReferenceAnnotation(cur_g, self, res, GraphEntity.iri_note, resp_agent, source, count, label, "an", preexisting_graph)
[docs] def add_ar(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> AgentRole: if res is not None and get_short_name(res) != "ar": raise ValueError(f"Given res: <{res}> is inappropriate for an AgentRole entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_ar, "ar", res) return AgentRole(cur_g, self, res, GraphEntity.iri_role_in_time, resp_agent, source, count, label, "ar", preexisting_graph)
[docs] def add_be(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> BibliographicReference: if res is not None and get_short_name(res) != "be": raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicReference entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_be, "be", res) return BibliographicReference(cur_g, self, res, GraphEntity.iri_bibliographic_reference, resp_agent, source, count, label, "be", preexisting_graph)
[docs] def add_br(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> BibliographicResource: if res is not None and get_short_name(res) != "br": raise ValueError(f"Given res: <{res}> is inappropriate for a BibliographicResource entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_br, "br", res) return BibliographicResource(cur_g, self, res, GraphEntity.iri_expression, resp_agent, source, count, label, "br", preexisting_graph)
[docs] def add_ci(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> Citation: if res is not None and get_short_name(res) != "ci": raise ValueError(f"Given res: <{res}> is inappropriate for a Citation entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_ci, "ci", res) return Citation(cur_g, self, res, GraphEntity.iri_citation, resp_agent, source, count, label, "ci", preexisting_graph)
[docs] def add_de(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> DiscourseElement: if res is not None and get_short_name(res) != "de": raise ValueError(f"Given res: <{res}> is inappropriate for a DiscourseElement entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_de, "de", res) return DiscourseElement(cur_g, self, res, GraphEntity.iri_discourse_element, resp_agent, source, count, label, "de", preexisting_graph)
[docs] def add_id(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> Identifier: if res is not None and get_short_name(res) != "id": raise ValueError(f"Given res: <{res}> is inappropriate for an Identifier entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_id, "id", res) return Identifier(cur_g, self, res, GraphEntity.iri_identifier, resp_agent, source, count, label, "id", preexisting_graph)
[docs] def add_pl(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> PointerList: if res is not None and get_short_name(res) != "pl": raise ValueError(f"Given res: <{res}> is inappropriate for a PointerList entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_pl, "pl", res) return PointerList(cur_g, self, res, GraphEntity.iri_singleloc_pointer_list, resp_agent, source, count, label, "pl", preexisting_graph)
[docs] def add_rp(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> ReferencePointer: if res is not None and get_short_name(res) != "rp": raise ValueError(f"Given res: <{res}> is inappropriate for a ReferencePointer entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_rp, "rp", res) return ReferencePointer(cur_g, self, res, GraphEntity.iri_intextref_pointer, resp_agent, source, count, label, "rp", preexisting_graph)
[docs] def add_ra(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> ResponsibleAgent: if res is not None and get_short_name(res) != "ra": raise ValueError(f"Given res: <{res}> is inappropriate for a ResponsibleAgent entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_ra, "ra", res) return ResponsibleAgent(cur_g, self, res, GraphEntity.iri_agent, resp_agent, source, count, label, "ra", preexisting_graph)
[docs] def add_re(self, resp_agent: str, source: str = None, res: URIRef = None, preexisting_graph: Graph = None) -> ResourceEmbodiment: if res is not None and get_short_name(res) != "re": raise ValueError(f"Given res: <{res}> is inappropriate for a ResourceEmbodiment entity.") if res is not None and res in self.res_to_entity: return self.res_to_entity[res] cur_g, count, label = self._add(self.g_re, "re", res) return ResourceEmbodiment(cur_g, self, res, GraphEntity.iri_manifestation, resp_agent, source, count, label, "re", preexisting_graph)
def _add(self, graph_url: str, short_name: str, res: URIRef = None) -> Tuple[Graph, Optional[str], Optional[str]]: cur_g: Graph = Graph(identifier=graph_url) self._set_ns(cur_g) count: Optional[str] = None label: Optional[str] = None if res is not None: try: res_count: int = int(get_count(res)) except ValueError: res_count: int = -1 if res_count > self.counter_handler.read_counter(short_name): self.counter_handler.set_counter(res_count, short_name) return cur_g, count, label count = self.supplier_prefix + str(self.counter_handler.increment_counter(short_name)) if self.wanted_label: label = "%s %s [%s/%s]" % (self.labels[short_name], count, short_name, count) return cur_g, count, label
[docs] def get_orphans(self) -> List[GraphEntity]: full_set_of_entities: Set[URIRef] = set(self.res_to_entity.keys()) referenced_entities: Set[URIRef] = set() for res, entity in self.res_to_entity.items(): for obj in entity.g.objects(subject=res, predicate=None): if type(obj) == URIRef: referenced_entities.add(obj) set_of_orphan_res: Set[URIRef] = full_set_of_entities - referenced_entities result_list: List[GraphEntity] = [] for orphan_res in set_of_orphan_res: entity: Optional[GraphEntity] = self.get_entity(orphan_res) if entity is not None: result_list.append(entity) return result_list
[docs] def remove_orphans_from_triplestore(self, ts_url: str, resp_agent: str) -> None: sparql: SPARQLWrapper = SPARQLWrapper(ts_url) for entity_res, entity in self.res_to_entity.items(): if entity.to_be_deleted: query: str = f"CONSTRUCT {{?s ?p ?o}} WHERE {{?s ?p ?o ; ?p_1 <{entity_res}>}}" sparql.setQuery(query) sparql.setMethod('GET') sparql.setReturnFormat(RDFXML) result: ConjunctiveGraph = sparql.query().convert() if result is not None: imported_entities: List[GraphEntity] = Reader.import_entities_from_graph(self, result, resp_agent) for imported_entity in imported_entities: imported_entity.g.remove((imported_entity.res, None, entity_res))
[docs] def commit_changes(self): for res, entity in self.res_to_entity.items(): entity.commit_changes() if entity.to_be_deleted: del self.res_to_entity[res]
def _set_ns(self, g: Graph) -> None: g.namespace_manager.bind("an", Namespace(self.g_an)) g.namespace_manager.bind("ar", Namespace(self.g_ar)) g.namespace_manager.bind("be", Namespace(self.g_be)) g.namespace_manager.bind("ci", Namespace(self.g_ci)) g.namespace_manager.bind("de", Namespace(self.g_de)) g.namespace_manager.bind("br", Namespace(self.g_br)) g.namespace_manager.bind("id", Namespace(self.g_id)) g.namespace_manager.bind("pl", Namespace(self.g_pl)) g.namespace_manager.bind("ra", Namespace(self.g_ra)) g.namespace_manager.bind("re", Namespace(self.g_re)) g.namespace_manager.bind("rp", Namespace(self.g_rp)) g.namespace_manager.bind("biro", GraphEntity.BIRO) g.namespace_manager.bind("co", GraphEntity.CO) g.namespace_manager.bind("c4o", GraphEntity.C4O) g.namespace_manager.bind("cito", GraphEntity.CITO) g.namespace_manager.bind("datacite", GraphEntity.DATACITE) g.namespace_manager.bind("dcterms", GraphEntity.DCTERMS) g.namespace_manager.bind("deo", GraphEntity.DEO) g.namespace_manager.bind("doco", GraphEntity.DOCO) g.namespace_manager.bind("fabio", GraphEntity.FABIO) g.namespace_manager.bind("foaf", GraphEntity.FOAF) g.namespace_manager.bind("frbr", GraphEntity.FRBR) g.namespace_manager.bind("literal", GraphEntity.LITERAL) g.namespace_manager.bind("oa", GraphEntity.OA) g.namespace_manager.bind("oco", GraphEntity.OCO) g.namespace_manager.bind("prism", GraphEntity.PRISM) g.namespace_manager.bind("pro", GraphEntity.PRO)
[docs] def get_an(self) -> Tuple[ReferenceAnnotation]: result: Tuple[ReferenceAnnotation] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, ReferenceAnnotation): result += (entity, ) return result
[docs] def get_ar(self) -> Tuple[AgentRole]: result: Tuple[AgentRole] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, AgentRole): result += (entity, ) return result
[docs] def get_be(self) -> Tuple[BibliographicReference]: result: Tuple[BibliographicReference] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, BibliographicReference): result += (entity, ) return result
[docs] def get_br(self) -> Tuple[BibliographicResource]: result: Tuple[BibliographicResource] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, BibliographicResource): result += (entity, ) return result
[docs] def get_ci(self) -> Tuple[Citation]: result: Tuple[Citation] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, Citation): result += (entity, ) return result
[docs] def get_de(self) -> Tuple[DiscourseElement]: result: Tuple[DiscourseElement] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, DiscourseElement): result += (entity, ) return result
[docs] def get_id(self) -> Tuple[Identifier]: result: Tuple[Identifier] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, Identifier): result += (entity, ) return result
[docs] def get_pl(self) -> Tuple[PointerList]: result: Tuple[PointerList] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, PointerList): result += (entity, ) return result
[docs] def get_rp(self) -> Tuple[ReferencePointer]: result: Tuple[ReferencePointer] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, ReferencePointer): result += (entity, ) return result
[docs] def get_ra(self) -> Tuple[ResponsibleAgent]: result: Tuple[ResponsibleAgent] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, ResponsibleAgent): result += (entity, ) return result
[docs] def get_re(self) -> Tuple[ResourceEmbodiment]: result: Tuple[ResourceEmbodiment] = tuple() for ref in self.res_to_entity: entity: GraphEntity = self.res_to_entity[ref] if isinstance(entity, ResourceEmbodiment): result += (entity, ) return result