Source code for filip.semantics.vocabulary.vocabulary

"""Main Vocabulary Model, and Submodels"""

import operator
from enum import Enum

from pydantic import BaseModel, Field
from . import *
from typing import List, Dict, Union, Optional, Tuple

from ...models.base import LogLevel


[docs]class LabelSummary(BaseModel): """ Model holding all information for label conflicts in a vocabulary """ class_label_duplicates: Dict[str, List[Entity]] = Field( description="All Labels that are used more than once for class_names " "on export." "Key: Label, Values: List of entities with key label" ) field_label_duplicates: Dict[str, List[Entity]] = Field( description="All Labels that are used more than once for property_names" "on export." "Key: Label, Values: List of entities with key label" ) datatype_label_duplicates: Dict[str, List[Entity]] = Field( description="All Labels that are used more than once for datatype " "on export." "Key: Label, Values: List of entities with key label" ) blacklisted_labels: List[Tuple[str, Entity]] = Field( description="All Labels that are blacklisted, " "Tuple(Label, Entity with label)" ) labels_with_illegal_chars: List[Tuple[str, Entity]] = Field( description="All Labels that contain illegal characters, " "Tuple(Label, Entity with label)" )
[docs] def is_valid(self) -> bool: """Test if Labels are valid Returns: bool, True if no entries exist """ return len(self.class_label_duplicates) == 0 and \ len(self.field_label_duplicates) == 0 and \ len(self.datatype_label_duplicates) == 0 and \ len(self.blacklisted_labels) == 0 and \ len(self.labels_with_illegal_chars) == 0
def __str__(self): res = "" def print_collection(collection): sub_res = "" for key, values in collection.items(): sub_res += f"\t{key}: " for v in values: sub_res += f" \n\t\t{v.iri}" sub_res += "\n" if len(collection) == 0: sub_res += "\t/\n" return sub_res def print_list(collection): sub_res = "" for key, value in collection: sub_res += f"\t{key}: \t {value.iri}" sub_res += "\n" if len(collection) == 0: sub_res += "\t/\n" return sub_res res += "class_label_duplicates:\n" res += print_collection(self.class_label_duplicates) res += "field_label_duplicates:\n" res += print_collection(self.field_label_duplicates) res += "datatype_label_duplicates:\n" res += print_collection(self.datatype_label_duplicates) res += "blacklisted_labels:\n" res += print_list(self.blacklisted_labels) res += "labels_with_illegal_chars:\n" res += print_list(self.labels_with_illegal_chars) return res
[docs]class IdType(str, Enum): """Type of object that is referenced by an id/iri""" class_ = 'Class' object_property = 'Object Property' data_property = 'Data Property' datatype = 'Datatype' relation = 'Relation' combined_relation = 'Combined Relation' individual = 'Individual' source = 'Source'
[docs]class VocabularySettings(BaseModel): """ Settings that state how labels of ontology entities should be automatically converted on parsing """ pascal_case_class_labels: bool = Field( default=True, description="If true, convert all class labels given in the ontologies " "to PascalCase" ) pascal_case_individual_labels: bool = Field( default=True, description="If true, convert all labels of individuals given in the " "ontologies to PascalCase" ) camel_case_property_labels: bool = Field( default=True, description="If true, convert all labels of properties given in the " "ontologies to camelCase" ) camel_case_datatype_labels: bool = Field( default=True, description="If true, convert all labels of datatypes given in the " "ontologies to camelCase" ) pascal_case_datatype_enum_labels: bool = Field( default=True, description="If true, convert all values of enum datatypes given in " "the to PascalCase" )
[docs]class Vocabulary(BaseModel): """ Semantic Vocabulary of a project This class holds all objects in a vocabulary as central unit. These objects can be accessed with the according ids/iris The vocabulary consists out of multiple sources, that each contribute objects From the vocabulary nothing should be added or deleted manually. The sources are added and removed through the respective methods. Everything else should be used as READ-ONLY """ classes: Dict[str, Class] = Field( default={}, description="Classes of the vocabulary. Key: class_iri") object_properties: Dict[str, ObjectProperty] = Field( default={}, description="ObjectProperties of the vocabulary. " "Key: object_property_iri") data_properties: Dict[str, DataProperty] = Field( default={}, description="DataProperties of the vocabulary. Key: data_property_iri") datatypes: Dict[str, Datatype] = Field( default={}, description="Datatypes of the vocabulary. Key: datatype_iri") individuals: Dict[str, Individual] = Field( default={}, description="Individuals in the vocabulary. Key: individual_iri") relations: Dict[str, Relation] = Field( default={}, description="Relations of classes in the vocabulary. Key: relation_id") combined_object_relations: Dict[str, CombinedObjectRelation] = Field( default={}, description="CombinedObjectRelations of classes in the vocabulary." " Key: combined_relation_id") combined_data_relations: Dict[str, CombinedDataRelation] = Field( default={}, description="CombinedDataRelations of classes in the vocabulary." "Key: combined_data_id") sources: Dict[str, Source] = Field( default={}, description="Sources of the vocabulary. Key: source_id") id_types: Dict[str, IdType] = Field( default={}, description="Maps all entity iris and (combined)relations to their " "Entity/Object type, to speed up lookups") original_label_summary: Optional[LabelSummary] = Field( default=None, description="Original label after parsing, before the user made " "changes") settings: VocabularySettings = Field( default=VocabularySettings(), description="Settings how to auto transform the entity labels")
[docs] def get_type_of_id(self, id: str) -> Union[IdType,None]: """Get the type (class, relation,...) of an iri/id Args: id (str): id/iri of which the type should be returned Returns: IdType or None if id/iri not registered """ try: return self.id_types[id] except KeyError: return None
[docs] def get_class_by_iri(self, class_iri: str) -> Union[Class, None]: """Get the class belonging to the class_iri Args: class_iri (str): iri Returns: Class or None if iri not a registered class """ return self.classes.get(class_iri)
[docs] def get_object_property(self, obj_prop_iri: str) -> ObjectProperty: """Get the object property beloning to the iri Args: obj_prop_iri (str): iri Returns: ObjectProperty Raises: KeyError: if not a registered obj property """ return self.object_properties[obj_prop_iri]
[docs] def get_data_property(self, data_prop_iri: str) -> DataProperty: """Get the data property belonging to the iri Args: data_prop_iri (str): iri Returns: DataProperty Raises: KeyError: if not a registered obj property """ return self.data_properties[data_prop_iri]
[docs] def iri_is_predefined_datatype(self, iri: str) -> bool: """Test if an iri belongs to a predefined datatype Args: iri (str): Iri to test Returns: bool """ if iri not in self.id_types.keys(): return False if self.id_types[iri] is IdType.datatype: return self.get_datatype(iri).predefined
[docs] def get_datatype(self, datatype_iri:str) -> Datatype: """Get the datatype belonging to the iri Args: datatype_iri (str): iri Returns: DataType Raises: KeyError: if not a registered datatype """ return self.datatypes[datatype_iri]
[docs] def get_individual(self, individual_iri: str) -> Individual: """Get the individual belonging to the iri Args: individual_iri (str): iri Returns: Individual Raises: KeyError: if not a registered Individual """ return self.individuals[individual_iri]
[docs] def get_all_individuals_of_class(self, class_iri: str) -> List[Individual]: """Get all individual that have the class_iri as parent Args: class_iri (str): iri of parent class Returns: List[Individual] """ result = [] for individual in self.individuals.values(): if class_iri in individual.parent_class_iris: result.append(individual) return result
[docs] def is_id_from_individual(self, id: str) -> bool: """ Test if an id is from an Individual. Used to distinguish between instances and individuals Args: id (str): id Returns: bool """ try: return self.get_type_of_id(id) == IdType.individual except: return False
[docs] def get_classes(self) -> List[Class]: """ Get all classes in this vocabulary Returns: List[Class] """ return list(self.classes.values())
[docs] def get_classes_sorted_by_label(self) -> List[Class]: """Get all classes sorted by their labels Returns: List[Class]: sorted classes, ascending """ return sorted(self.classes.values(), key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_entity_list_sorted_by_label(self, list: List[Entity]) \ -> List[Entity]: """Sort a given entity list by their labels Args: list (List[Entity]) : entities to be sorted Returns: List[Entity]: sorted list """ return sorted(list, key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_object_properties_sorted_by_label(self) -> List[ObjectProperty]: """Get all object properties of the vocabulary sorted by their labels Returns: List[ObjectProperty], sorted by ascending labels """ return sorted(self.object_properties.values(), key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_data_properties_sorted_by_label(self) -> List[DataProperty]: """Get all data properties of the vocabulary sorted by their labels Returns: List[DataProperty], sorted by ascending labels """ return sorted(self.data_properties.values(), key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_individuals_sorted_by_label(self) -> List[Individual]: """Get all individuals of the vocabulary sorted by their labels Returns: List[Individual], sorted by ascending labels """ return sorted(self.individuals.values(), key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_datatypes_sorted_by_label(self) -> List[Datatype]: """Get all datatypes of the vocabulary sorted by their labels Returns: List[Datatype], sorted by ascending labels """ return sorted(self.datatypes.values(), key=operator.methodcaller("get_label"), reverse=False)
[docs] def get_relation_by_id(self, id: str) -> Relation: """Get Relation by relation id Args: id (str): relation_id Returns: Relation Raises: KeyError: if id not registered as relation """ return self.relations[id]
[docs] def get_combined_relation_by_id(self, id: str) -> CombinedRelation: """Get CombinedRelation by id Args: id (str): combined_relation_id Returns: CombinedRelation Raises: KeyError: if id not registered as CombinedObjectRelation or CombinedDataRelation """ if id in self.combined_object_relations: return self.combined_object_relations[id] else: return self.combined_data_relations[id]
[docs] def get_combined_data_relation_by_id(self, id: str) -> CombinedDataRelation: """Get CombinedDataRelation by id Args: id (str): combined_relation_id Returns: CombinedDataRelation Raises: KeyError: if id not registered as CombinedDataRelation """ return self.combined_data_relations[id]
[docs] def get_combined_object_relation_by_id(self, id: str)\ -> CombinedObjectRelation: """Get CombinedObjectRelation by id Args: id (str): combined_relation_id Returns: CombinedObjectRelation Raises: KeyError: if id not registered as CombinedObjectRelation """ return self.combined_object_relations[id]
[docs] def get_source(self, source_id: str) -> Source: """Get the source with the given id Args: source_id (str): id Returns: Source Raises: KeyError: if source_id is not registered """ return self.sources[source_id]
[docs] def get_source_list(self) -> List[Source]: """Get all source objects of the vocabulary as list Returns: List[Source] """ res = [] for iri in self.sources: res.append(self.sources[iri]) return res
[docs] def has_source(self, source_id: str) -> bool: """Test if the vocabulary contains a source with the given id Args: source_id (str): id to test Returns: bool """ return source_id in self.sources
[docs] def is_id_of_type(self, id: str, type: IdType) -> bool: """Test if an iri/id is of a given type Args: id (str): id to test type (str): Type to test against Returns: bool Raises: KeyError: if id not registered """ return self.id_types[id] == type
[docs] def get_label_for_entity_iri(self, iri: str) -> str: """Get the label of the entity with the given iri Fast efficient methode Args: iri (str) Returns: str, "" if iri does not belong to an entity """ entity = self.get_entity_by_iri(iri) if entity is not None: return entity.get_label() else: return ""
[docs] @staticmethod def get_base_out_of_iri(iri: str): """Extract out of a given iri the base aka ontology name Args: iri (str), iri to extract Returns: str, base of iri """ if "#" in iri: index = iri.find("#") return iri[:index] else: # for example if uri looks like: # http://webprotege.stanford.edu/RDwpQ8vbi7HaApq8VoqJUXH index = iri.rfind("/") return iri[:index]
[docs] def get_entity_by_iri(self, iri: str) -> Union[None, Entity]: """Get the entity with the given iri Fast efficient methode Args: iri (str) Returns: Entity or None if iri does not belong to an Entity """ if iri not in self.id_types: return None else: id_type = self.get_type_of_id(iri) if id_type == IdType.individual: return self.get_individual(iri) if id_type == IdType.class_: return self.get_class_by_iri(iri) if id_type == IdType.datatype: return self.get_datatype(iri) if id_type == IdType.object_property: return self.get_object_property(iri) if id_type == IdType.data_property: return self.get_data_property(iri) else: return None
[docs] def is_iri_registered(self, iri: str) -> bool: """Test if iri/id is registered (Entities or (Combined)relations) Args: iri (str): iri to test Returns: bool """ return iri in self.id_types
[docs] def set_label_for_entity(self, iri: str, label: str): """Set a userset label for the given entity Args: iri (str): entity iri label (str): new label Returns: None """ entity = self.get_entity_by_iri(iri) if entity.get_original_label() == label: entity.user_set_label = "" elif label == "": entity.user_set_label = "" else: entity.user_set_label = label
[docs] def get_all_entities(self) -> List[Entity]: """Get all registered Entities Returns: List[Entity] """ lists = [self.classes.values(), self.object_properties.values(), self.data_properties.values(), self.datatypes.values(), self.individuals.values() ] res = [] for l in lists: res.extend(l) return res
[docs] def get_enum_dataytypes(self) -> Dict[str, Datatype]: """Get all datatypes of vocabularies that are of type ENUM Returns: Dict[str, Datatype], {datatype.iri: Datatype} """ return {datatype.iri: datatype for datatype in self.datatypes.values() if len(datatype.enum_values) > 0 and not datatype.predefined}