Source code for oc_ocdm.graph.entities.bibliographic_entity

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for any purpose
# with or without fee is hereby granted, provided that the above copyright notice
# and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
from __future__ import annotations

from typing import TYPE_CHECKING

from oc_ocdm.decorators import accepts_only

if TYPE_CHECKING:
    from typing import List, Dict, Optional
    from rdflib import URIRef
    from oc_ocdm.graph.entities.identifier import Identifier
from oc_ocdm.graph.graph_entity import GraphEntity


[docs]class BibliographicEntity(GraphEntity): """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM)."""
[docs] def merge(self, other: BibliographicEntity) -> None: """ **WARNING:** ``BibliographicEntity`` **is an abstract class that cannot be instantiated at runtime. As such, it's only possible to execute this method on entities generated from** ``BibliographicEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.** :param other: The entity which will be marked as to be deleted and whose properties will be merged into the current entity. :type other: BibliographicEntity :raises TypeError: if the parameter is of the wrong type :return: None """ super(BibliographicEntity, self).merge(other) id_list: List[Identifier] = other.get_identifiers() for cur_id in id_list: self.has_identifier(cur_id) # The special semantics associated to the identifiers # of a bibliographic entity requires them to be uniquely # defined based on their scheme and literal value: self.remove_duplicated_identifiers()
# HAS IDENTIFIER
[docs] def get_identifiers(self) -> List[Identifier]: """ Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. :return: A list containing the requested values if found, None otherwise """ uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id') result: List[Identifier] = [] for uri in uri_list: result.append(self.g_set.add_id(self.resp_agent, self.source, uri)) return result
[docs] @accepts_only('id') def has_identifier(self, id_res: Identifier) -> None: """ Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate. `In addition to the internal dataset identifier assigned to the entity upon initial curation (format: [entity short name]/[local identifier]), other external third-party identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).` :param id_res: The value that will be set as the object of the property related to this method :type id_res: Identifier :raises TypeError: if the parameter is of the wrong type :return: None """ self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res))
[docs] @accepts_only('id') def remove_identifier(self, id_res: Identifier = None) -> None: """ Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate. **WARNING: this is a non-functional property, hence, if the parameter is None, any existing value will be removed!** :param id_res: If not None, the specific object value that will be removed from the property related to this method (defaults to None) :type id_res: Identifier :raises TypeError: if the parameter is of the wrong type :return: None """ if id_res is not None: self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res)) else: self.g.remove((self.res, GraphEntity.iri_has_identifier, None))
[docs] def remove_duplicated_identifiers(self) -> None: """ Utility function that automatically scans the list of Identifier entities associated to the current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes duplicated entries. Two distinct ``Identifier`` entities are considered the same if they share both the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``). :return: None """ # Identifiers should be merged based on the # correspondence between both their scheme and literal value! id_list: List[Identifier] = self.get_identifiers() # We remove every identifier from 'self': only unique ones # will be re-associated with 'self'. self.remove_identifier() # We use a nested dictionary which associates the 'schema-literal_value' # pair to the corresponding identifier object # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34). id_dict: Dict[URIRef, Dict[str, Identifier]] = {} for identifier in id_list: schema: Optional[URIRef] = identifier.get_scheme() literal_value: Optional[str] = identifier.get_literal_value() if schema is not None and literal_value is not None: if schema not in id_dict: id_dict[schema] = {literal_value: identifier} self.has_identifier(identifier) # the Identifier is kept! else: if literal_value not in id_dict[schema]: id_dict[schema][literal_value] = identifier self.has_identifier(identifier) # the Identifier is kept! else: id_to_be_kept: Identifier = id_dict[schema][literal_value] id_to_be_kept.merge(identifier) # the Identifier is dropped!