Source code for qwikidata.claim

# Copyright 2019 Kensho Technologies, LLC.
"""Module for Wikidata Claims (aka Statements)."""

from collections import OrderedDict
from collections.abc import Sequence
from typing import List, Union, overload

from qwikidata import typedefs
from qwikidata.snak import WikidataSnak


[docs]class WikidataReference: """A reference about a claim about a Wikidata Entity. See: https://www.wikidata.org/wiki/Help:Sources This class can be initialized from an entity dictionary as, .. code-block:: python >>> reference_dict = q42_dict['claims']['P69'][0]['references'][0] >>> wikidata_reference = WikidataReference(reference_dict) Parameters ---------- reference_dict A dictionary representing a Wikidata reference. See `the wikibase JSON data model docs`_ for a description of the format. Attributes ---------- referencehash: str Unique id for this reference snaks: collections.OrderedDict Maps property id to list of :py:class:`.WikidataSnak` """ def __init__(self, reference_dict: typedefs.ReferenceDict) -> None: self._validate_reference_dict(reference_dict) self._reference_dict = reference_dict self.referencehash = reference_dict["hash"] self.snaks = OrderedDict() # type: OrderedDict for property_id in reference_dict["snaks-order"]: self.snaks[property_id] = [ WikidataSnak(snak_dict) for snak_dict in reference_dict["snaks"][property_id] ] def _validate_reference_dict(self, reference_dict: typedefs.ReferenceDict) -> None: """Raise excpetions if reference_dict is not valid.""" _REQUIRED_KEYS = ["hash", "snaks", "snaks-order"] for req_key in _REQUIRED_KEYS: if req_key not in reference_dict: raise ValueError( "required reference_dict keys are {} but only found {}".format( _REQUIRED_KEYS, list(reference_dict.keys()) ) ) def __str__(self) -> str: return "WikidataReference(hash={}, snaks={})".format(self.referencehash, self.snaks) def __repr__(self) -> str: return self.__str__()
[docs]class WikidataQualifier: """A qualifier about a claim about a Wikidata Entity. See: https://www.wikidata.org/wiki/Help:Qualifiers This class can be initialized from an entity dictionary as, .. code-block:: python >>> qualifier_dict = q42_dict['claims']['P69'][0]['qualifiers']['P582'][0] >>> wikidata_qualifier = WikidataQualifier(qualifier_dict) Parameters ---------- qualifier_dict A dictionary representing a Wikidata qualifier. See `the wikibase JSON data model docs`_ for a description of the format. Attributes ---------- qualifierhash: str Unique id for this qualifier snak: WikidataSnak The snak for this qualifier """ def __init__(self, qualifier_dict: typedefs.QualifierDict) -> None: self._validate_qualifier_dict(qualifier_dict) self._qualifier_dict = qualifier_dict self.qualifierhash = qualifier_dict["hash"] self.snak = WikidataSnak(qualifier_dict) def _validate_qualifier_dict(self, qualifier_dict: typedefs.QualifierDict) -> None: """Raise excpetions if qualifier_dict is not valid.""" _REQUIRED_KEYS = ["hash", "snaktype", "property", "datatype"] for req_key in _REQUIRED_KEYS: if req_key not in qualifier_dict: raise ValueError( "required qualifier_dict keys are {} but only found {}".format( _REQUIRED_KEYS, list(qualifier_dict.keys()) ) ) def __str__(self) -> str: return "WikidataQualifier(hash={}, snak={})".format(self.qualifierhash, self.snak) def __repr__(self) -> str: return self.__str__()
[docs]class WikidataClaim: """A claim aka statement about a Wikidata Entity. From the Wikibase data model docs, "Statements describe the claim of a statement and list references for this claim. Every Statement refers to one particular Entity, called the subject of the Statement. There is always one main Snak that forms the most important part of the statement. Moreover, there can be zero or more additional PropertySnaks that describe the Statement in more detail. These qualifier Snaks (or "qualifiers" for short) store additional information that does not directly refer to the subject (e.g., the time at which the main part of the statement was valid). References are provided as a list (the order is significant in some contexts, especially for displaying a main reference)." -- https://www.mediawiki.org/wiki/Wikibase/DataModel This class can be initialized from an entity dictionary as, .. code-block:: python >>> claim_dict = q42_dict['claims']['P551'][0] >>> wikidata_claim = WikidataClaim(claim_dict) Parameters ---------- claim_dict: dict A dictionary representing a Wikidata claim. See `the wikibase JSON data model docs`_ for a description of the format. Attributes ---------- claim_id: str Unique id for this claim property_id: PropertyId A Wikiata property id (e.g. "P551") claim_type: str One of ["claim", "statement"] (statements may have references) rank: str One of ["preferred", "normal", "deprecated"] mainsnak: :py:class:`.WikidataSnak` The mainsnak of this claim qualifiers: collections.OrderedDict Maps property id to list of :py:class:`WikidataQualifier` references: list A list of :py:class:`WikidataReference` qualifiers_order: list The order of the property ids in qualifiers .. _the wikibase JSON data model docs: https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON """ def __init__(self, claim_dict: typedefs.ClaimDict) -> None: self._validate_claim_dict(claim_dict) self._claim_dict = claim_dict self.property_id = self.mainsnak.property_id self.qualifiers = ( OrderedDict() ) # type: OrderedDict[typedefs.PropertyId, List[WikidataQualifier]] self.qualifiers_order = claim_dict.get("qualifiers-order", []) if "qualifiers" in claim_dict: for property_id in self.qualifiers_order: qualifier_dicts = claim_dict["qualifiers"][property_id] self.qualifiers[property_id] = [WikidataQualifier(qd) for qd in qualifier_dicts] self.references = [] # type: List[WikidataReference] if "references" in claim_dict: for reference_dict in claim_dict["references"]: self.references.append(WikidataReference(reference_dict)) def _validate_claim_dict(self, claim_dict: typedefs.ClaimDict) -> None: """Raise excpetions if claim_dict is not valid.""" _REQUIRED_KEYS = ["id", "type", "rank", "mainsnak"] for req_key in _REQUIRED_KEYS: if req_key not in claim_dict: raise ValueError( "required claim_dict keys are {} but only found {}".format( _REQUIRED_KEYS, list(claim_dict.keys()) ) ) self.claim_id = claim_dict["id"] self.claim_type = claim_dict["type"] self.rank = claim_dict["rank"] self.mainsnak = WikidataSnak(claim_dict["mainsnak"]) def __str__(self) -> str: return "WikidataClaim(type={}, rank={}, mainsnak={}, qualifiers={})".format( self.claim_type, self.rank, self.mainsnak, self.qualifiers ) def __repr__(self) -> str: return self.__str__()
[docs]class WikidataClaimGroup(Sequence): """A sequence of :py:class:`WikidataClaim` instances with a common property id. For example the claim group for "Douglas Adams" (Q42) with property "residence" (P551) has three elements. This class can be initialized from an entity dictionary as, .. code-block:: python >>> claim_group = WikidataClaimGroup(q42_dict['claims']['P551']) Parameters ---------- claim_list: list A list of claim dictionaries representing a Wikidata claim group. See `the wikibase JSON data model docs`_ for a description of the format. .. _the wikibase JSON data model docs: https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON """ def __init__(self, claim_list: typedefs.ClaimList) -> None: super(WikidataClaimGroup, self).__init__() self._validate_claim_list(claim_list) self._claim_list = claim_list self._claims = [WikidataClaim(claim_dict) for claim_dict in claim_list] property_ids = set([claim.mainsnak.property_id for claim in self._claims]) self.property_id = None # type: Union[typedefs.PropertyId, None] if len(property_ids) == 1: self.property_id = property_ids.pop() elif len(property_ids) == 0: self.property_id = None else: raise ValueError( "claims in a claim list must all have the same property id but found multiple property ids {}".format( property_ids ) ) def _validate_claim_list(self, claim_list: typedefs.ClaimList) -> None: """Raise excpetions if claim_list is not valid.""" if not isinstance(claim_list, list): raise TypeError("claim_list must be a list but got {}.".format(type(claim_list))) @overload def __getitem__(self, indx: int) -> WikidataClaim: ... @overload def __getitem__(self, indx: slice) -> List[WikidataClaim]: ... def __getitem__(self, indx: Union[int, slice]) -> Union[WikidataClaim, List[WikidataClaim]]: return self._claims[indx] def __len__(self) -> int: return len(self._claims) def __str__(self) -> str: return "WikidataClaimGroup(property_id={}, claims={})".format( self.property_id, self._claims ) def __repr__(self) -> str: return self.__str__()