Source code for kgdata.wikidata.config

"""Locations of Wikidata dumps and datasets on disk."""

from dataclasses import dataclass
from pathlib import Path
from typing import Union
from glob import glob


[docs]class WDDataDirCfg: """Locations of Wikidata dumps and datasets on disk""" instance = None def __init__(self, datadir: Path) -> None: self.datadir = datadir # directorys contain dumps and their splitted files # for the name of the dumps, see the corresponding function `self.get_X_file` in this class self.dumps = datadir / "dumps" self.entity_dump = datadir / "entity_dump" self.page_dump = datadir / "page_dump" self.entity_redirection_dump = datadir / "entity_redirection_dump" self.entity_ids = datadir / "entity_ids" self.entity_types = datadir / "entity_types" self.entity_wikilinks = datadir / "entity_wikilinks" self.entity_metadata = datadir / "entity_metadata" self.entity_pagerank = datadir / "entity_pagerank" self.page_ids = datadir / "page_ids" self.entities = datadir / "entities" self.classes = datadir / "classes" self.class_count = datadir / "class_count" self.properties = datadir / "properties" self.property_domains = datadir / "property_domains" self.property_ranges = datadir / "property_ranges" self.wp2wd = datadir / "wp2wd" self.search = datadir / "search" self.entity_redirections = datadir / "entity_redirections"
[docs] def get_entity_dump_file(self): return self._get_file(self.dumps / "*wikidata-*all*.json.bz2")
[docs] def get_page_dump_file(self): return self._get_file(self.dumps / "*wikidatawiki-*page*.sql.gz")
[docs] def get_redirect_dump_file(self): return self._get_file(self.dumps / "*wikidatawiki-*redirect*.sql.gz")
def _get_file(self, file: Union[str, Path]): file = str(file) match_files = glob(file) if len(match_files) == 0: raise Exception("No file found: {}".format(file)) if len(match_files) > 1: raise Exception("Multiple files found: {}".format(file)) return Path(match_files[0])
[docs] @staticmethod def get_instance(): if WDDataDirCfg.instance is None: raise Exception("The config object must be initialized before use") return WDDataDirCfg.instance
[docs] @staticmethod def init(datadir: Union[str, Path]): """Initialize or update the config object to use the given directory""" WDDataDirCfg.instance = WDDataDirCfg(Path(datadir)) return WDDataDirCfg.instance