Skip to content

datasets

DatasetViewSet

Bases: ModelViewSet

API endpoint that allows users to be viewed or edited.

Source code in backend/datasets/views/datasets.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class DatasetViewSet(viewsets.ModelViewSet):
    """
    API endpoint that allows users to be viewed or edited.
    """
    queryset = Dataset.objects.all()
    serializer_class = DatasetSerializer
    pagination_class = LimitOffsetPagination
    filter_backends = [DjangoFilterBackend, filters.SearchFilter, filters.OrderingFilter]
    filterset_fields = ['mode', 'search_mode', 'state', 'id', 'creator']
    search_fields = ['name', 'source', 'description']

    def perform_create(self, serializer):
        if serializer.validated_data.get('mode') == Dataset.Mode.SPARQL.value and \
                serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value:
            raise ValidationError('Local search index for sparql datasets is not yet supported')

        if serializer.validated_data.get('search_mode', None) == Dataset.SearchMode.TRIPLYDB.value and \
                'tdb_id' not in serializer.validated_data.get('source', {}):
            raise ValidationError('TriplyDB dataset must be a TriplyDB dataset')

        if not settings.BLAZEGRAPH_ENABLE and (
            serializer.validated_data.get('mode') != Dataset.Mode.SPARQL.value or
            serializer.validated_data.get('search_mode') == Dataset.SearchMode.LOCAL.value
        ):
            raise ValidationError('Local datasets are not enabled on this server')

        super().perform_create(serializer)

        instance: Dataset = serializer.instance
        instance.creator = self.request.user
        instance.save()

        files = None
        # If a files are uploaded, store them in a temporary folder
        if instance.source.get('source_type') == 'upload':
            tmp_dir = DOWNLOAD_DIR / random_string(10)
            tmp_dir.mkdir(parents=True)
            files = []
            for file in self.request.FILES.getlist('files'):
                file_path = tmp_dir / file.name
                with file_path.open('wb+') as destination:
                    for chunk in file.chunks():
                        destination.write(chunk)
                files.append(str(file_path.absolute()))

        instance.apply_async(
            import_dataset,
            (instance.id, files),
            creator=self.request.user,
            name=f'Import dataset {instance.name}'
        )

    def perform_destroy(self, instance):
        instance.apply_async(
            delete_dataset,
            (instance.id,),
            creator=self.request.user,
            name=f'Deleting dataset {instance.name}'
        )

    def perform_update(self, serializer):
        super().perform_update(serializer)

    def get_permissions(self):
        permissions = super().get_permissions()

        if self.action in ['destroy']:
            permissions.append(IsOwner())

        return permissions