Source code for qwikidata.linked_data_interface

# Copyright 2019 Kensho Technologies, LLC.
"""Module for Wikidata linked data interface endpoints."""
import logging

import requests
from qwikidata import typedefs

logger = logging.getLogger(__name__)
WIKIDATA_LDI_URL = "https://www.wikidata.org/wiki/Special:EntityData"
VALID_ENTITY_PREFIXES = ("Q", "P", "L")


[docs]class LdiResponseNotOk(Exception): pass
[docs]class InvalidEntityId(Exception): pass
[docs]def get_entity_dict_from_api( entity_id: typedefs.EntityId, base_url: str = WIKIDATA_LDI_URL ) -> typedefs.EntityDict: """Get a dictionary representing a wikidata entity from the linked data interface API. https://www.wikidata.org/wiki/Wikidata:Data_access#Linked_Data_interface Parameters ---------- entity_id A Wikidata entity id beginning with "Q", "P", or "L" (e.g. "Q42") base_url The linked data interface URL to use Examples -------- Get the entity dictionary for item Q42, :: >>> entity_dict = get_entity_dict_from_api('Q42') >>> pprint(entity_dict, indent=4, depth=1) { 'aliases': {...}, 'claims': {...}, 'descriptions': {...}, 'id': 'Q42', 'labels': {...}, 'lastrevid': 716282445, 'modified': '2018-07-27T08:03:25Z', 'ns': 0, 'pageid': 138, 'sitelinks': {...}, 'title': 'Q42', 'type': 'item'}}} """ if not isinstance(entity_id, str): raise InvalidEntityId( 'entity_id must be a string (e.g. "Q42") but got entity_id={}.'.format(entity_id) ) if not entity_id[0] in VALID_ENTITY_PREFIXES: raise InvalidEntityId( "entity_id must start with one of {} but got entity_id={}.".format( VALID_ENTITY_PREFIXES, entity_id ) ) url = "{}/{}.json".format(base_url, entity_id) response = requests.get(url) if response.ok: entity_dict_full = response.json() else: raise LdiResponseNotOk( "input entity id: {}, " "response.headers: {}, " "response.status_code: {}, " "response.text: {}".format( entity_id, response.headers, response.status_code, response.text ) ) # remove redundant top level keys returned_entity_id = next(iter(entity_dict_full["entities"])) entity_dict = entity_dict_full["entities"][returned_entity_id] if entity_id != returned_entity_id: logger.warning( "Wikidata redirect detected. Input entity id={}. Returned entity id={}.".format( entity_id, returned_entity_id ) ) return entity_dict