Source code for oc_ocdm.graph.entities.bibliographic_entity

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2016, Silvio Peroni <essepuntato@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for any purpose
# with or without fee is hereby granted, provided that the above copyright notice
# and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
# SOFTWARE.
from __future__ import annotations

from typing import TYPE_CHECKING

from oc_ocdm.decorators import accepts_only

if TYPE_CHECKING:
    from typing import List, Dict, Optional
    from rdflib import URIRef
    from oc_ocdm.graph.entities.identifier import Identifier
from oc_ocdm.graph.graph_entity import GraphEntity


[docs]class BibliographicEntity(GraphEntity):
    """The base class for each bibliographic entity of the OpenCitations DataModel (OCDM)."""

[docs]    def merge(self, other: BibliographicEntity) -> None:
        """
        **WARNING:** ``BibliographicEntity`` **is an abstract class that cannot be instantiated at runtime.
        As such, it's only possible to execute this method on entities generated from**
        ``BibliographicEntity``'s **subclasses. Please, refer to their documentation of the** `merge` **method.**

        :param other: The entity which will be marked as to be deleted and whose properties will
         be merged into the current entity.
        :type other: BibliographicEntity
        :raises TypeError: if the parameter is of the wrong type
        :return: None
        """
        super(BibliographicEntity, self).merge(other)

        id_list: List[Identifier] = other.get_identifiers()
        for cur_id in id_list:
            self.has_identifier(cur_id)

        # The special semantics associated to the identifiers
        # of a bibliographic entity requires them to be uniquely
        # defined based on their scheme and literal value:
        self.remove_duplicated_identifiers()

    # HAS IDENTIFIER
[docs]    def get_identifiers(self) -> List[Identifier]:
        """
        Getter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.

        :return: A list containing the requested values if found, None otherwise
        """
        uri_list: List[URIRef] = self._get_multiple_uri_references(GraphEntity.iri_has_identifier, 'id')
        result: List[Identifier] = []
        for uri in uri_list:
            result.append(self.g_set.add_id(self.resp_agent, self.source, uri))
        return result

[docs]    @accepts_only('id')
    def has_identifier(self, id_res: Identifier) -> None:
        """
        Setter method corresponding to the ``datacite:hasIdentifier`` RDF predicate.

        `In addition to the internal dataset identifier assigned to the entity upon initial
        curation (format: [entity short name]/[local identifier]), other external third-party
        identifiers can be specified through this attribute (e.g. DOI, ORCID, PubMedID).`

        :param id_res: The value that will be set as the object of the property related to this method
        :type id_res: Identifier
        :raises TypeError: if the parameter is of the wrong type
        :return: None
        """
        self.g.add((self.res, GraphEntity.iri_has_identifier, id_res.res))

[docs]    @accepts_only('id')
    def remove_identifier(self, id_res: Identifier = None) -> None:
        """
        Remover method corresponding to the ``datacite:hasIdentifier`` RDF predicate.

        **WARNING: this is a non-functional property, hence, if the parameter
        is None, any existing value will be removed!**

        :param id_res: If not None, the specific object value that will be removed from the property
         related to this method (defaults to None)
        :type id_res: Identifier
        :raises TypeError: if the parameter is of the wrong type
        :return: None
        """
        if id_res is not None:
            self.g.remove((self.res, GraphEntity.iri_has_identifier, id_res.res))
        else:
            self.g.remove((self.res, GraphEntity.iri_has_identifier, None))

[docs]    def remove_duplicated_identifiers(self) -> None:
        """
        Utility function that automatically scans the list of Identifier entities associated to the
        current bibliographic entity (through the ``datacite:hasIdentifier`` RDF predicate) and it removes
        duplicated entries.

        Two distinct ``Identifier`` entities are considered the same if they share both
        the scheme (``datacite:usesIdentifierScheme``) and the literal value (``literal:hasLiteralValue``).

        :return: None
        """

        # Identifiers should be merged based on the
        # correspondence between both their scheme and literal value!
        id_list: List[Identifier] = self.get_identifiers()
        # We remove every identifier from 'self': only unique ones
        # will be re-associated with 'self'.
        self.remove_identifier()

        # We use a nested dictionary which associates the 'schema-literal_value'
        # pair to the corresponding identifier object
        # (ex. id_dict[ISSN][1234-5678] <- base_iri:id/34).
        id_dict: Dict[URIRef, Dict[str, Identifier]] = {}
        for identifier in id_list:
            schema: Optional[URIRef] = identifier.get_scheme()
            literal_value: Optional[str] = identifier.get_literal_value()
            if schema is not None and literal_value is not None:
                if schema not in id_dict:
                    id_dict[schema] = {literal_value: identifier}
                    self.has_identifier(identifier)  # the Identifier is kept!
                else:
                    if literal_value not in id_dict[schema]:
                        id_dict[schema][literal_value] = identifier
                        self.has_identifier(identifier)  # the Identifier is kept!
                    else:
                        id_to_be_kept: Identifier = id_dict[schema][literal_value]
                        id_to_be_kept.merge(identifier)  # the Identifier is dropped!