Skip to content

models

Dataset

Bases: TaskMixin, TimeStampMixin, OwnableMixin

The internal dataset model.

Source code in backend/datasets/models.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class Dataset(TaskMixin, TimeStampMixin, OwnableMixin):
    """
    The internal dataset model.
    """
    STATES = ((state.value, state.value) for state in DatasetState)

    class Mode(models.TextChoices):
        """
        The Mode class is an enumeration of the possible modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        SPARQL = 'SPARQL', _('From SPARQL endpoint')

    class SearchMode(models.TextChoices):
        """
        The SearchMode class is an enumeration of the possible search modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        WIKIDATA = 'WIKIDATA', _('From Wikidata')
        TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

    id = models.UUIDField(default=uuid.uuid4, primary_key=True)
    """The identifier of the dataset."""
    name = models.CharField(max_length=255)
    """The name of the dataset."""
    description = models.TextField(blank=True)
    """The description of the dataset."""
    source = models.JSONField()
    """The source of the dataset."""
    mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)
    """The mode of the dataset."""
    search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)
    """The search mode of the dataset."""
    creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)
    """The user who created the dataset."""

    local_database: str = models.CharField(max_length=255, null=True)
    """The local blazegraph database identifier of the dataset."""
    sparql_endpoint = models.CharField(max_length=255, null=True)
    """The SPARQL endpoint of the dataset."""

    statistics = models.JSONField(null=True)
    """The statistics of the dataset."""
    namespaces = models.JSONField(null=True)
    """The list of sparql namespaces/prefixes in the dataset."""
    state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)
    """The import state of the dataset."""
    import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)
    """The import task of the dataset."""

    objects = models.Manager()

    @property
    def search_index_name(self) -> str:
        """
        The path to the search index of the dataset.
        :return:
        """
        return self.local_database if self.local_database else None

    def get_search_service(self) -> SearchService:
        """
        Return appropriate search service depending on the search mode
        """
        match self.search_mode:
            case self.SearchMode.LOCAL:
                if not self.search_index_name:
                    raise Exception('Dataset search index has not been created yet')
                return LocalSearchService(index_name=self.search_index_name)
            case self.SearchMode.WIKIDATA:
                return WikidataSearchService()
            case self.SearchMode.TRIPLYDB:
                if 'tdb_id' not in self.source:
                    raise Exception('Dataset is not a TriplyDB dataset')
                return TriplyDBSearchService(self.source['tdb_id'])
            case _:
                raise ValueError(f'Unknown search mode {self.search_mode}')

    def get_query_service(self) -> QueryService:
        """
        If the mode is local, return a local query service, otherwise return a SPARQL query service
        """
        match self.mode:
            case self.Mode.LOCAL:
                if not self.local_database:
                    raise Exception('Dataset local database has not been imported yet')
                return LocalQueryService(str(self.local_database))
            case self.Mode.SPARQL:
                return SPARQLQueryService(str(self.sparql_endpoint))
            case _:
                raise ValueError(f'Unknown mode {self.mode}')

    def can_view(self, user: User):
        return bool(user)

    def can_edit(self, user: User):
        return super().can_edit(user) or self.creator == user

creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

The user who created the dataset.

description = models.TextField(blank=True) class-attribute instance-attribute

The description of the dataset.

id = models.UUIDField(default=uuid.uuid4, primary_key=True) class-attribute instance-attribute

The identifier of the dataset.

import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True) class-attribute instance-attribute

The import task of the dataset.

local_database: str = models.CharField(max_length=255, null=True) class-attribute instance-attribute

The local blazegraph database identifier of the dataset.

mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL) class-attribute instance-attribute

The mode of the dataset.

name = models.CharField(max_length=255) class-attribute instance-attribute

The name of the dataset.

namespaces = models.JSONField(null=True) class-attribute instance-attribute

The list of sparql namespaces/prefixes in the dataset.

search_index_name: str property

The path to the search index of the dataset. :return:

search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL) class-attribute instance-attribute

The search mode of the dataset.

source = models.JSONField() class-attribute instance-attribute

The source of the dataset.

sparql_endpoint = models.CharField(max_length=255, null=True) class-attribute instance-attribute

The SPARQL endpoint of the dataset.

state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255) class-attribute instance-attribute

The import state of the dataset.

statistics = models.JSONField(null=True) class-attribute instance-attribute

The statistics of the dataset.

Mode

Bases: TextChoices

The Mode class is an enumeration of the possible modes of a dataset

Source code in backend/datasets/models.py
33
34
35
36
37
38
class Mode(models.TextChoices):
    """
    The Mode class is an enumeration of the possible modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    SPARQL = 'SPARQL', _('From SPARQL endpoint')

SearchMode

Bases: TextChoices

The SearchMode class is an enumeration of the possible search modes of a dataset

Source code in backend/datasets/models.py
40
41
42
43
44
45
46
class SearchMode(models.TextChoices):
    """
    The SearchMode class is an enumeration of the possible search modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    WIKIDATA = 'WIKIDATA', _('From Wikidata')
    TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

get_query_service()

If the mode is local, return a local query service, otherwise return a SPARQL query service

Source code in backend/datasets/models.py
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_query_service(self) -> QueryService:
    """
    If the mode is local, return a local query service, otherwise return a SPARQL query service
    """
    match self.mode:
        case self.Mode.LOCAL:
            if not self.local_database:
                raise Exception('Dataset local database has not been imported yet')
            return LocalQueryService(str(self.local_database))
        case self.Mode.SPARQL:
            return SPARQLQueryService(str(self.sparql_endpoint))
        case _:
            raise ValueError(f'Unknown mode {self.mode}')

get_search_service()

Return appropriate search service depending on the search mode

Source code in backend/datasets/models.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_search_service(self) -> SearchService:
    """
    Return appropriate search service depending on the search mode
    """
    match self.search_mode:
        case self.SearchMode.LOCAL:
            if not self.search_index_name:
                raise Exception('Dataset search index has not been created yet')
            return LocalSearchService(index_name=self.search_index_name)
        case self.SearchMode.WIKIDATA:
            return WikidataSearchService()
        case self.SearchMode.TRIPLYDB:
            if 'tdb_id' not in self.source:
                raise Exception('Dataset is not a TriplyDB dataset')
            return TriplyDBSearchService(self.source['tdb_id'])
        case _:
            raise ValueError(f'Unknown search mode {self.search_mode}')

DatasetState

Bases: Enum

The DatasetState class is an enumeration of the possible states of a dataset

Source code in backend/datasets/models.py
17
18
19
20
21
22
23
24
class DatasetState(Enum):
    """
    The DatasetState class is an enumeration of the possible states of a dataset
    """
    QUEUED = 'QUEUED'
    IMPORTING = 'IMPORTING'
    IMPORTED = 'IMPORTED'
    FAILED = 'FAILED'