Source code for qwikidata.sparql

# Copyright 2019 Kensho Technologies, LLC.
"""Module for the Wikidata SPARQL endpint."""
from typing import Dict, List, Union

import requests

WIKIDATA_SPARQL_URL = "https://query.wikidata.org/sparql"


[docs]def return_sparql_query_results( query_string: str, wikidata_sparql_url: str = WIKIDATA_SPARQL_URL ) -> Dict: """Send a SPARQL query and return the JSON formatted result. Parameters ---------- query_string: str SPARQL query string wikidata_sparql_url: str, optional wikidata SPARQL endpoint to use """ return requests.get( wikidata_sparql_url, params={"query": query_string, "format": "json"} ).json()
[docs]def get_subclasses_of_item(item_id: str, return_qids: bool = True) -> Union[List[str], Dict]: """Return all subclasses of a wikidata item. Finds all items where a chain of the following form exists,:: Qid_i -[P279]-> Qid_j ... Qid_k -[P279]-> item_id Will always include the item itself in the return results. Note that property P279 = "subclass of". Parameters ---------- item_id: str The item to use as the end of the chain. return_qids: bool, optional If false, the SPARQL query result is returned unaltered. If true, a list of item id string is returned instead. See examples. Examples -------- We can get all item IDs that are subclasses of `Q6256`, :: >>> get_subclasses_of_item('Q6256') ['Q6256', 'Q112099', 'Q123480', ... 'Q4994005', 'Q6805624', 'Q15895923'] :: >>> get_subclasses_of_item('Q6256', return_qids=False) {'head': {'vars': ['WDid']}, 'results': {'bindings': [ {'WDid': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q6256'}}, {'WDid': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q112099'}}, ... {'WDid': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q6805624'}}, {'WDid': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q15895923'}}]}} """ query_string = """ SELECT $WDid WHERE {{ ?WDid (wdt:P279)* wd:{} . }} """.format( item_id ) results = return_sparql_query_results(query_string) if return_qids: uris = [binding["WDid"]["value"] for binding in results["results"]["bindings"]] qids = [uri.split("/")[-1] for uri in uris] return qids else: return results