Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from django_elasticsearch_dsl import Document, fields
2from django_elasticsearch_dsl.registries import registry
4from discuss_data.core.models import KeywordTagged, LanguageTagged
5from discuss_data.dddatasets.models import (
6 Category,
7 DataSet,
8 DataSetCreator,
9 CollectionMethodsTagged,
10 AnalysisMethodsTagged,
11 DisciplinesTagged,
12)
14from discuss_data.ddusers.models import Affiliation, Country, User
17@registry.register_document
18class DataSetDocument(Document):
19 countries = fields.ObjectField(properties={"name": fields.TextField()})
20 published_categories = fields.ObjectField(properties={"name": fields.TextField()})
21 published_main_category = fields.ObjectField(
22 properties={"name": fields.TextField()}
23 )
24 owner = fields.ObjectField(
25 properties={"first_name": fields.TextField(), "last_name": fields.TextField()}
26 )
27 dataset_creator = fields.ObjectField(properties={"name": fields.TextField()})
28 keywords = fields.ObjectField(properties={"name": fields.TextField()})
29 languages_of_data = fields.ObjectField(properties={"name": fields.TextField()})
30 methods_of_data_collection = fields.ObjectField(
31 properties={"name": fields.TextField()}
32 )
33 methods_of_data_analysis = fields.ObjectField(
34 properties={"name": fields.TextField()}
35 )
36 disciplines = fields.ObjectField(properties={"name": fields.TextField()})
37 # BleachField() instances are not recognized by django-elasticsearch-dsl
38 # and need to be accessed as model attributes and added as a TextField()
39 description = fields.TextField(attr="description")
40 related_dataset_text = fields.TextField(attr="related_dataset_text")
41 sources_of_data = fields.TextField(attr="sources_of_data")
42 funding = fields.TextField(attr="funding")
43 institutional_affiliation = fields.TextField(attr="institutional_affiliation")
45 class Index:
46 # Name of the Elasticsearch index
47 name = "datasets"
48 # See Elasticsearch Indices API reference for available settings
49 settings = {"number_of_shards": 1, "number_of_replicas": 0}
51 class Django:
52 model = DataSet # The model associated with this Document
54 # The fields of the model you want to be indexed in Elasticsearch
55 fields = [
56 "title",
57 "subtitle",
58 "version",
59 "time_period_text",
60 "time_period_from",
61 "time_period_to",
62 ]
63 related_models = [
64 Country,
65 Category,
66 User,
67 DataSetCreator,
68 KeywordTagged,
69 CollectionMethodsTagged,
70 AnalysisMethodsTagged,
71 DisciplinesTagged,
72 ]
74 def get_instances_from_related(self, related_instance):
75 """ If related_models is set, define how to retrieve the instance(s) from the related model.
76 The related_models option should be used with caution because it can lead in the index
77 to the updating of a lot of items.
78 """
79 if isinstance(related_instance, Country):
80 instance = related_instance.dataset_country.all()
81 elif isinstance(related_instance, User):
82 instance = related_instance.get_published_datasets()
83 elif isinstance(related_instance, Category):
84 instance = related_instance.get_published_datasets_category_all()
85 elif isinstance(related_instance, KeywordTagged):
86 instance = DataSet.objects.filter(keywords__name=related_instance.name)
87 elif isinstance(related_instance, CollectionMethodsTagged):
88 instance = DataSet.objects.filter(
89 methods_of_data_collection__name=related_instance.name
90 )
91 elif isinstance(related_instance, AnalysisMethodsTagged):
92 instance = DataSet.objects.filter(
93 methods_of_data_analysis__name=related_instance.name
94 )
95 elif isinstance(related_instance, DisciplinesTagged):
96 instance = DataSet.objects.filter(disciplines__name=related_instance.name)
97 elif isinstance(related_instance, LanguageTagged):
98 instance = DataSet.objects.filter(
99 languages_of_data__name=related_instance.name
100 )
101 elif isinstance(related_instance, DataSetCreator):
102 instance = related_instance.dataset
104 return instance
106 def get_queryset(self):
107 """ Mandatory for excluding instances from index by filtering by attributes.
108 Works only for build_index command.
109 Also to improve performance we can select related in one sql request
110 """
111 return (
112 super(DataSetDocument, self)
113 .get_queryset()
114 .filter(published="True")
115 .select_related("owner",)
116 )
118 @staticmethod
119 def filter_instance(instance):
120 """ return instance only if published is True"""
121 if isinstance(instance, DataSet):
122 if instance.published:
123 return [instance]
124 else:
125 return []
126 else:
127 return instance
129 def update(self, instance, refresh=None, action="index", **kwargs):
130 """ Apply exclude from index filter also for index update by signal (eg instance changes)"""
131 return super().update(self.filter_instance(instance), refresh, action, **kwargs)
134@registry.register_document
135class UserDocument(Document):
136 # ObjectField works for ForeignKey relations using related_name
137 # NestedField problematic in Search
138 countries = fields.ObjectField(properties={"name": fields.TextField()})
139 interests = fields.ObjectField(properties={"name": fields.TextField()})
141 affiliation_user = fields.ObjectField(
142 properties={
143 "position": fields.TextField(),
144 "name_of_institution": fields.TextField(),
145 "place_of_institution": fields.TextField(),
146 "country_of_institution": fields.TextField(),
147 }
148 )
150 class Index:
151 # Name of the Elasticsearch index
152 name = "users"
153 # See Elasticsearch Indices API reference for available settings
154 settings = {"number_of_shards": 1, "number_of_replicas": 0}
156 class Django:
157 model = User # The model associated with this Document
159 # The fields of the model you want to be indexed in Elasticsearch
160 fields = [
161 "first_name",
162 "last_name",
163 ]
164 related_models = [
165 Country,
166 KeywordTagged,
167 Affiliation,
168 ]
170 def get_instances_from_related(self, related_instance):
171 """If related_models is set, define how to retrieve the instance(s) from the related model.
172 The related_models option should be used with caution because it can lead in the index
173 to the updating of a lot of items.
174 """
175 if isinstance(related_instance, Country):
176 return related_instance.user_country.all()
177 elif isinstance(related_instance, KeywordTagged):
178 return User.objects.filter(interests__name=related_instance.name)
179 elif isinstance(related_instance, Affiliation):
180 return related_instance.user
182 def get_queryset(self):
183 """ Mandatory for excluding instances from index by filtering by attributes.
184 Works only for build_index command.
185 Also to improve performance we can select related in one sql request
186 """
187 return (
188 super(UserDocument, self)
189 .get_queryset()
190 .exclude(profile_accessibility="HID")
191 .exclude(is_active=False)
192 .exclude(first_name="")
193 .exclude(last_name="")
194 .exclude(username="AnonymousUser")
195 )
197 @staticmethod
198 def filter_instance(instance):
199 """filter hidden and internally used user accounts"""
200 if isinstance(instance, User):
201 if (
202 instance.profile_accessibility == "HID"
203 or not instance.is_active
204 or instance.first_name == ""
205 or instance.last_name == ""
206 or instance.username == "AnonymousUser"
207 ):
208 return []
209 else:
210 return [instance]
211 else:
212 return instance
214 def update(self, instance, refresh=None, action="index", **kwargs):
215 """ Apply exclude from index filter also for index update by signal (eg instance changes)"""
216 return super().update(self.filter_instance(instance), refresh, action, **kwargs)