Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import logging 

2import os 

3import uuid 

4from zipfile import ZipFile 

5 

6import filetype 

7from actstream import action 

8from datacite.errors import DataCiteNotFoundError, DataCiteUnauthorizedError 

9from dcxml import simpledc 

10from django.conf import settings 

11from django.contrib.auth.models import Group 

12from django.contrib.contenttypes.fields import GenericRelation 

13from django.contrib.contenttypes.models import ContentType 

14from django.core.exceptions import ( 

15 ImproperlyConfigured, 

16 ObjectDoesNotExist, 

17 PermissionDenied, 

18) 

19from django.core.files.base import ContentFile 

20from django.core.files.storage import FileSystemStorage 

21from django.db import models 

22from django.forms import ModelForm 

23from django.template.loader import render_to_string 

24from django.urls import reverse 

25from django.utils.html import format_html 

26from django.utils.text import slugify 

27from django.utils.translation import gettext as _ 

28from django_bleach.models import BleachField 

29from django.utils import timezone 

30from guardian.shortcuts import assign_perm, get_perms, get_users_with_perms, remove_perm 

31from PIL import Image 

32from taggit.managers import TaggableManager 

33from taggit.models import GenericTaggedItemBase, GenericUUIDTaggedItemBase, TagBase 

34from weasyprint import HTML 

35 

36from discuss_data.core.errors import MetaDataError 

37from discuss_data.core.models import KeywordTags, LanguageTags, Link 

38from discuss_data.core.utils import generate_discuss_data_doi, qs_to_str 

39from discuss_data.ddcomments.models import Comment, Notification 

40from discuss_data.ddusers.models import User 

41from discuss_data.pages.models import LicensePage, ManualPage 

42from discuss_data.utils.cropped_thumbnail import cropped_thumbnail 

43from discuss_data.core.utils import send_update_email 

44 

45from wagtail.admin.edit_handlers import FieldPanel 

46 

47logger = logging.getLogger(__name__) 

48 

49# provide a storage independent of 'media' file storage 

50datafile_storage = FileSystemStorage(location=settings.DATA_ROOT) 

51 

52 

53class DataList(models.Model): 

54 uuid = models.UUIDField(default=uuid.uuid4, editable=False) 

55 name = models.CharField(max_length=400) 

56 description = BleachField(max_length=2000) 

57 owner = models.ForeignKey(User, on_delete=models.PROTECT, null=True) 

58 datasets = models.ManyToManyField( 

59 "DataSet", blank=True, related_name="datalist_datasets" 

60 ) 

61 public = models.BooleanField(default="False") 

62 

63 class Meta: 

64 ordering = ["name"] 

65 

66 def get_status(self): 

67 if self.public: 

68 return _("Public") 

69 else: 

70 return _("Private") 

71 

72 def __str__(self): 

73 return self.name 

74 

75 def class_name(self): 

76 return self.__class__.__name__ 

77 

78 

79class Category(models.Model): 

80 class Meta: 

81 ordering = ["name"] 

82 

83 name = models.CharField(max_length=400) 

84 slug = models.SlugField() 

85 image = models.ImageField(blank=True, null=True) 

86 description = BleachField(max_length=2000) 

87 sponsors = models.ManyToManyField( 

88 "Sponsor", blank=True, related_name="category_sponsors_sponsor" 

89 ) 

90 curators = models.ManyToManyField( 

91 User, blank=True, related_name="category_curators_user" 

92 ) 

93 

94 # panels for wagtail ModelAdmin integration of django model 

95 # https://docs.wagtail.io/en/stable/reference/contrib/modeladmin/index.html 

96 panels = [ 

97 FieldPanel("name"), 

98 FieldPanel("slug"), 

99 FieldPanel("image"), 

100 FieldPanel("description"), 

101 FieldPanel("sponsors"), 

102 FieldPanel("curators"), 

103 ] 

104 

105 def get_curators_emails(self): 

106 curators_emails = list() 

107 for curator in self.curators.all(): 

108 curators_emails.append(curator.get_email()) 

109 return curators_emails 

110 

111 def get_published_datasets_category(self): 

112 return DataSet.objects.filter(published=True, published_categories__id=self.id) 

113 

114 def get_published_datasets_category_main(self): 

115 return DataSet.objects.filter( 

116 published=True, published_main_category__id=self.id 

117 ) 

118 

119 def get_published_datasets_category_all(self): 

120 datasets_categories = DataSet.objects.filter( 

121 published=True, published_categories__id=self.id 

122 ) 

123 dataset_main_category = DataSet.objects.filter( 

124 published=True, published_main_category__id=self.id 

125 ) 

126 return datasets_categories | dataset_main_category 

127 

128 def get_published_datasets_category_all_count(self): 

129 return self.get_published_datasets_category_all().count() 

130 

131 def __str__(self): 

132 return self.name 

133 

134 

135class Sponsor(models.Model): 

136 name = models.CharField(max_length=400) 

137 institution = models.ForeignKey( 

138 "ddusers.Institution", 

139 blank=True, 

140 null=True, 

141 related_name="sponsor_institution", 

142 on_delete=models.CASCADE, 

143 ) 

144 logo = models.ImageField(blank=True, null=True) 

145 url = models.URLField(blank=True) 

146 

147 def __str__(self): 

148 return self.name 

149 

150 

151def datafile_file_path(instance, filename): 

152 return "datasets/" + str(instance.uuid) 

153 

154 

155class DataFile(models.Model): 

156 FILE_FORMATS = { 

157 "png": "image", 

158 "jpeg": "image", 

159 "jpg": "image", 

160 } 

161 

162 FILE_TYPE_DATA = "DAT" 

163 FILE_TYPE_METADATA = "MET" 

164 FILE_TYPE_DOC = "DOC" 

165 FILE_TYPE_CONVERTED = "CFF" 

166 FILE_TYPE_GENERATED_PDF = "PDF" 

167 FILE_TYPE_GENERATED_ZIP = "ZIP" 

168 

169 DATA_FILE_TYPES = ( 

170 (FILE_TYPE_DATA, _("data")), 

171 (FILE_TYPE_METADATA, _("metadata")), 

172 (FILE_TYPE_DOC, _("data documentation")), 

173 (FILE_TYPE_CONVERTED, _("converted file format")), 

174 ) 

175 uuid = models.UUIDField(default=uuid.uuid4, editable=False) 

176 dataset = models.ForeignKey( 

177 "DataSet", related_name="datafile_dataset_dataset", on_delete=models.CASCADE, 

178 ) 

179 file = models.FileField( 

180 upload_to=datafile_file_path, storage=datafile_storage, max_length=1000 

181 ) 

182 data_file_type = models.CharField( 

183 max_length=3, choices=DATA_FILE_TYPES, default=FILE_TYPE_DATA, 

184 ) 

185 data_file_format = models.CharField(max_length=200, default="unknown") 

186 data_file_size = models.IntegerField(default=1) 

187 content_type = models.CharField(max_length=200, default="text/plain") 

188 name = models.CharField(max_length=200,) 

189 repository_file_id = models.CharField(max_length=200, default="not set") 

190 repository = models.CharField(max_length=200, default="dariah-repository") 

191 dhdoi = models.CharField(max_length=200, blank=True) 

192 

193 def get_download_file_name(self): 

194 return "DiscussData-{}-{}".format( 

195 self.dataset.title.replace(" ", "_"), self.name.replace(" ", "_") 

196 ) 

197 

198 def extension(self): 

199 extension = os.path.splitext(self.file.name) 

200 return extension 

201 

202 def get_file_format(self): 

203 try: 

204 file_format = filetype.guess(self.file) 

205 if file_format is None: 

206 return "other" 

207 except FileNotFoundError: 

208 logger.error("file {} not found".format(self.file)) 

209 return _("file not found") 

210 return file_format.mime 

211 

212 def get_user(self): 

213 return self.dataset.owner 

214 

215 def get_group(self): 

216 return self.dataset.group 

217 

218 def __str__(self): 

219 return self.name 

220 

221 def clone(self, new_ds): 

222 self.save() 

223 new_file = ContentFile(self.file.read()) 

224 new_file.name = self.file.name 

225 df = self 

226 df.pk = None 

227 df.uuid = uuid.uuid4() 

228 df.file = new_file 

229 df.dataset = new_ds 

230 df.save() 

231 

232 class Meta: 

233 permissions = (("view_dddatafile", "View Datafile"),) 

234 

235 

236class DataRepository(models.Model): 

237 name = models.CharField(max_length=400) 

238 logo = models.ImageField(blank=True, null=True) 

239 

240 def __str__(self): 

241 return self.name 

242 

243 

244class DataType(models.Model): 

245 name = models.CharField(max_length=400) 

246 logo = models.ImageField(blank=True, null=True) 

247 

248 # panels for wagtail ModelAdmin integration of django model 

249 # https://docs.wagtail.io/en/stable/reference/contrib/modeladmin/index.html 

250 panels = [ 

251 FieldPanel("name"), 

252 FieldPanel("logo"), 

253 ] 

254 

255 class Meta: 

256 ordering = ["name"] 

257 

258 def __str__(self): 

259 return self.name 

260 

261 

262class License(models.Model): 

263 INDIVIDUAL = "IND" 

264 STANDARD = "STD" 

265 LICENSE_TYPE_CHOICES = ( 

266 (INDIVIDUAL, _("individual")), 

267 (STANDARD, _("standard")), 

268 ) 

269 

270 license_type = models.CharField( 

271 max_length=4, choices=LICENSE_TYPE_CHOICES, default=STANDARD, 

272 ) 

273 standard_license = models.ForeignKey( 

274 "pages.LicensePage", 

275 blank=True, 

276 null=True, 

277 related_name="license_license_page", 

278 on_delete=models.PROTECT, 

279 ) 

280 license_text = BleachField(max_length=12000) 

281 license_name = models.CharField(max_length=400, blank=True) 

282 # TODO: license_slug field only unique for standard licenses, but not unique for individual licenses 

283 license_slug = models.CharField(max_length=200, blank=True) 

284 

285 def get_datasets(self): 

286 return self.dataset_license.all() 

287 

288 def get_license_name(self): 

289 if self.license_type == "STD": 

290 try: 

291 return self.standard_license.name 

292 except AttributeError: 

293 return "Standard license name not set" 

294 else: 

295 return "individual license" 

296 

297 def get_license_title(self): 

298 if self.license_type == "STD": 

299 try: 

300 return self.standard_license.title 

301 except AttributeError: 

302 return "Standard license title not set" 

303 else: 

304 return "individual license" 

305 

306 def get_license_slug(self): 

307 if self.license_type == "STD": 

308 try: 

309 return self.standard_license.slug 

310 except AttributeError: 

311 return "Standard license slug not set" 

312 else: 

313 return "license-individual" 

314 

315 def get_license_text(self): 

316 if self.license_type == "STD": 

317 try: 

318 return self.standard_license.text 

319 except AttributeError: 

320 return "Standard license text not set" 

321 else: 

322 return self.license_text 

323 

324 def list_licenses(self): 

325 return LicensePage.objects.all() 

326 

327 def __str__(self): 

328 return self.get_license_name() 

329 

330 

331class CollectionMethodsTagged(TagBase): 

332 class Meta: 

333 verbose_name = "Collection Methods Tag" 

334 verbose_name_plural = "Collection Methods Tags" 

335 

336 panels = [ 

337 FieldPanel("name"), 

338 ] 

339 

340 

341class CollectionMethodsTags(GenericTaggedItemBase): 

342 tag = models.ForeignKey( 

343 "CollectionMethodsTagged", 

344 null=True, 

345 related_name="collectionmethodstags_collectionmethodstagged", 

346 on_delete=models.CASCADE, 

347 ) 

348 

349 def __str__(self): 

350 return self.tag.name 

351 

352 

353class AnalysisMethodsTagged(TagBase): 

354 class Meta: 

355 verbose_name = "Analysis Methods Tag" 

356 verbose_name_plural = "Analysis Methods Tags" 

357 

358 panels = [ 

359 FieldPanel("name"), 

360 ] 

361 

362 

363class AnalysisMethodsTags(GenericTaggedItemBase): 

364 tag = models.ForeignKey( 

365 "AnalysisMethodsTagged", 

366 related_name="analysismethodstags_analysismethodstagged", 

367 on_delete=models.CASCADE, 

368 null=True, 

369 ) 

370 

371 def __str__(self): 

372 return self.tag.name 

373 

374 

375class DisciplinesTagged(TagBase): 

376 class Meta: 

377 verbose_name = "Disciplines Tag" 

378 verbose_name_plural = "Disciplines Tags" 

379 

380 panels = [ 

381 FieldPanel("name"), 

382 ] 

383 

384 

385class DisciplinesTags(GenericTaggedItemBase): 

386 tag = models.ForeignKey( 

387 "DisciplinesTagged", 

388 null=True, 

389 related_name="disciplinestags_disciplinestagged", 

390 on_delete=models.CASCADE, 

391 ) 

392 

393 def __str__(self): 

394 return self.tag.name 

395 

396 

397class Documentation(models.Model): 

398 DOCU_TYPES = ( 

399 ("GEN", _("general documentation")), 

400 ("LEG", _("legal documents")), 

401 ("BPR", _("best practices")), 

402 ("USE", _("user documentation")), 

403 ) 

404 title = models.CharField(max_length=200) 

405 slug = models.SlugField( 

406 max_length=200, 

407 help_text=_("Text used for URL generation, please use '_' between words"), 

408 ) 

409 subtitle = models.CharField(max_length=200, blank=True) 

410 docu_type = models.CharField(max_length=3, choices=DOCU_TYPES, default="GEN",) 

411 file = models.FileField(blank=True, null=True) # upload_to='datasets/') 

412 description = BleachField(max_length=2000, blank=True) 

413 text = BleachField(blank=True, max_length=12000) 

414 public = models.BooleanField(default=False) 

415 

416 def __str__(self): 

417 return self.title 

418 

419 

420class DataSetPublication(models.Model): 

421 PUB_TYPES = ( 

422 ("DIS", _("discusses dataset")), 

423 ("USE", _("uses dataset")), 

424 ("DES", _("describes dataset")), 

425 # ('MET', 'similiar methodology'), 

426 # ('SIM', 'similiar approach'), 

427 ) 

428 uuid = models.UUIDField(default=uuid.uuid4, editable=False) 

429 dataset = models.ForeignKey("dddatasets.DataSet", on_delete=models.CASCADE) 

430 publication = models.ForeignKey( 

431 "ddpublications.Publication", on_delete=models.CASCADE, null=True 

432 ) 

433 pub_type = models.CharField( 

434 max_length=3, 

435 choices=PUB_TYPES, 

436 default="DIS", 

437 verbose_name="Type of publication", 

438 help_text="Relation between dataset and publication", 

439 ) 

440 

441 def __str__(self): 

442 return format_html("{} ({})", self.publication, self.get_pub_type_display()) 

443 

444 

445class DataSetExternalLink(models.Model): 

446 dataset = models.ForeignKey("DataSet", on_delete=models.CASCADE) 

447 link = models.URLField(help_text=_("URL to external Dataset")) 

448 site_title = models.CharField( 

449 max_length=200, help_text=_("Title of the external repository or web site"), 

450 ) 

451 site_description = BleachField( 

452 max_length=400, 

453 blank=True, 

454 help_text=_("Brief description of the external repository or web site"), 

455 ) 

456 site_text = BleachField( 

457 blank=True, 

458 max_length=2000, 

459 help_text=_("In-depth description of the external repository or web site"), 

460 ) 

461 

462 def __str__(self): 

463 return self.site_title 

464 

465 

466class DataSetAccessRequest(models.Model): 

467 uuid = models.UUIDField(default=uuid.uuid4, editable=False) 

468 dataset = models.ForeignKey("DataSet", on_delete=models.CASCADE) 

469 user = models.ForeignKey(User, on_delete=models.CASCADE) 

470 created_at = models.DateTimeField(auto_now_add=True) 

471 notification = models.ForeignKey( 

472 "ddcomments.Notification", on_delete=models.PROTECT, null=True 

473 ) 

474 

475 def save(self, *args, **kwargs): 

476 # send notification on save 

477 ct = ContentType.objects.get_for_model(self.dataset) 

478 text = _("Dataset access request for dataset {}".format(self.dataset)) 

479 notification = Notification( 

480 owner=self.user, 

481 content_type=ct, 

482 object_id=self.dataset.id, 

483 text=text, 

484 notification_type=Notification.ACCESS_REQUEST, 

485 ) 

486 notification.save() 

487 self.notification = notification 

488 logger.error("[DataSetAccessRequest] send mail") 

489 # send email to dataset owner 

490 subject = text 

491 message = _( 

492 'A user requested access to your dataset "{}" at {}.'.format( 

493 self.dataset, self.dataset.get_absolute_url() 

494 ) 

495 ) 

496 email_to = [self.dataset.owner.get_email()] 

497 send_update_email(subject, message, email_to) 

498 logger.error("[DataSetAccessRequest] save") 

499 models.Model.save(self, *args, **kwargs) 

500 logger.error("[DataSetAccessRequest] saved") 

501 

502 def __str__(self): 

503 ar_string = "Access request pending since {}".format( 

504 self.created_at.strftime("%d.%m.%Y, %H:%M:%S") 

505 ) 

506 return _(ar_string) 

507 

508 

509class DataSetPublicationRequest(models.Model): 

510 uuid = models.UUIDField(default=uuid.uuid4, editable=False) 

511 dataset = models.ForeignKey("DataSet", on_delete=models.CASCADE) 

512 user = models.ForeignKey(User, on_delete=models.CASCADE) 

513 category = models.ForeignKey("Category", on_delete=models.CASCADE) 

514 created_at = models.DateTimeField(auto_now_add=True) 

515 notification = models.ForeignKey( 

516 "ddcomments.Notification", on_delete=models.PROTECT, null=True 

517 ) 

518 

519 def save(self, *args, **kwargs): 

520 logger.error("[DataSetPublicationRequest] start save") 

521 # send notification on save 

522 ct = ContentType.objects.get_for_model(self.dataset) 

523 text = _("Publication request for category {}".format(self.category)) 

524 notification = Notification( 

525 owner=self.user, 

526 content_type=ct, 

527 object_id=self.dataset.id, 

528 text=text, 

529 notification_type=Notification.PUB_REQUEST, 

530 ) 

531 logger.error("[DataSetPublicationRequest] notification save") 

532 notification.save() 

533 logger.error("[DataSetPublicationRequest] notification saved") 

534 

535 self.notification = notification 

536 # send email to all curators 

537 subject = "[Curation] {}".format(text,) 

538 message = _( 

539 "The dataset {} has been submitted for publication in the category {}:\n\n{}\n\nPlease conduct a technical review of the uploaded data and metadata prior to your decision about the publication.".format( 

540 self.dataset, self.category, self.dataset.get_absolute_url_curation() 

541 ) 

542 ) 

543 logger.error(self.category.get_curators_emails()) 

544 email_to = self.category.get_curators_emails() 

545 logger.error("[DataSetPublicationRequest] send mail") 

546 send_update_email(subject, message, email_to) 

547 logger.error("[DataSetPublicationRequest] save") 

548 models.Model.save(self, *args, **kwargs) 

549 logger.error("[DataSetPublicationRequest] saved") 

550 

551 def __str__(self): 

552 ar_string = "Publication request for {} by {} pending since {}".format( 

553 self.dataset, self.user, self.created_at.strftime("%d.%m.%Y, %H:%M:%S") 

554 ) 

555 return _(ar_string) 

556 

557 

558class DataSetCreator(models.Model): 

559 PERSON = "PER" 

560 INSTITUTION = "INS" 

561 

562 CREATOR_TYPE_CHOICES = [ 

563 (PERSON, _("Person")), 

564 (INSTITUTION, _("Institution")), 

565 ] 

566 name = models.CharField(max_length=200) 

567 first_name = models.CharField(max_length=200, blank=True) 

568 url = models.URLField(blank=True) 

569 

570 creator_type = models.CharField( 

571 max_length=3, choices=CREATOR_TYPE_CHOICES, default="PER", 

572 ) 

573 dataset = models.ForeignKey( 

574 "DataSet", 

575 on_delete=models.CASCADE, 

576 related_name="dataset_creator", 

577 null=True, 

578 blank=True, 

579 ) 

580 

581 def __str__(self): 

582 return self.name 

583 

584 def clone(self, new_ds): 

585 dsc = self 

586 dsc.id = None 

587 dsc.dataset = new_ds 

588 dsc.save() 

589 

590 

591def dsa_file_path(instance, filename): 

592 return "datasets/dsa/" + str(uuid.uuid4()) 

593 

594 

595class DataSubmissionAgreement(models.Model): 

596 text = models.TextField(default=_("Not generated yet")) 

597 birthdate = models.DateField() 

598 postal_address = BleachField(max_length=1200) 

599 project_type = BleachField(max_length=1200) 

600 project_title = BleachField(max_length=1200) 

601 dsa_accepted = models.BooleanField( 

602 default=False, verbose_name=_("I do accept the Data Submission Agreement.") 

603 ) 

604 dsa_accepted_date = models.DateField(blank=True, null=True) 

605 pdf = models.FileField( 

606 upload_to=dsa_file_path, storage=datafile_storage, blank=True 

607 ) 

608 

609 

610class DataSet(models.Model): 

611 OPEN_ACCESS = "OA" 

612 METADATA_ONLY = "MO" 

613 RESTRICTED_ACCESS = "RA" 

614 DATA_ACCESS_CHOICES = [ 

615 (OPEN_ACCESS, _("Open Access")), 

616 (METADATA_ONLY, _("Metadata only")), 

617 (RESTRICTED_ACCESS, _("Restricted access")), 

618 ] 

619 

620 uuid = models.UUIDField( 

621 default=uuid.uuid4, editable=False 

622 ) # uuid _not_ as pk as this disturbs django 3rd party apps 

623 owner = models.ForeignKey(User, on_delete=models.PROTECT, null=True) 

624 dsa = models.OneToOneField( 

625 "DataSubmissionAgreement", 

626 blank=True, 

627 null=True, 

628 related_name="dataset_dsa", 

629 on_delete=models.CASCADE, 

630 ) 

631 data_access = models.CharField( 

632 max_length=2, choices=DATA_ACCESS_CHOICES, default="RA", 

633 ) 

634 doi = models.CharField(max_length=200, blank=True) 

635 dhdoi = models.CharField(max_length=200, blank=True) 

636 institution = models.ForeignKey( 

637 "ddusers.Institution", 

638 blank=True, 

639 null=True, 

640 related_name="dataset_institution", 

641 on_delete=models.CASCADE, 

642 ) 

643 title = models.CharField(max_length=200) 

644 subtitle = models.CharField(max_length=200, blank=True) 

645 shorttitle = models.CharField(max_length=50) 

646 image = models.ImageField(blank=True, null=True) 

647 countries = models.ManyToManyField( 

648 "ddusers.Country", related_name="dataset_country", blank=True 

649 ) 

650 # access = models.CharField(max_length=3, choices=ACCESS_RULES_CHOICES, default='NET') 

651 link = models.ForeignKey( 

652 Link, 

653 blank=True, 

654 null=True, 

655 related_name="dataset_link", 

656 on_delete=models.CASCADE, 

657 ) 

658 data_repository = models.ForeignKey( 

659 "DataRepository", 

660 blank=True, 

661 null=True, 

662 related_name="dataset_datarepository_data_repository", 

663 on_delete=models.CASCADE, 

664 ) 

665 date_of_data_creation_from = models.DateField( 

666 blank=True, 

667 null=True, 

668 help_text=_("Date format YYYY-MM-DD"), 

669 verbose_name=_("Date/period of data creation (from)"), 

670 ) 

671 date_of_data_creation_to = models.DateField( 

672 blank=True, 

673 null=True, 

674 help_text=_("Date format YYYY-MM-DD"), 

675 verbose_name=_("Date/period of data creation (to)"), 

676 ) 

677 date_of_data_creation_text = models.CharField( 

678 max_length=400, 

679 help_text=_( 

680 'If you cannot specify the exact date please indicate the approximate date here (eq. "October 2010")' 

681 ), 

682 blank="True", 

683 ) 

684 version = models.FloatField(default=1.0) 

685 datatypes = models.ManyToManyField( 

686 "DataType", verbose_name="Data types", related_name="dataset_datatypes_datatype" 

687 ) 

688 datatype_text = models.CharField(max_length=400, blank=True) 

689 description = BleachField(verbose_name="Technical description", max_length=2400) 

690 keywords = TaggableManager("KeywordTags", blank=True, through=KeywordTags) 

691 sources_of_data = BleachField(max_length=6000) 

692 languages_of_data = TaggableManager( 

693 "LanguageTags", blank=True, through=LanguageTags 

694 ) 

695 methods_of_data_collection = TaggableManager( 

696 "CollectionMethodsTags", blank=True, through=CollectionMethodsTags 

697 ) 

698 methods_of_data_analysis = TaggableManager( 

699 "AnalysisMethodsTags", blank=True, through=AnalysisMethodsTags 

700 ) 

701 disciplines = TaggableManager( 

702 "DisciplinesTags", blank=True, through=DisciplinesTags 

703 ) 

704 time_period_text = models.CharField( 

705 max_length=400, 

706 blank=True, 

707 help_text=_( 

708 'If you cannot specify the exact date please indicate the approximate date here (eq. "October 2010")' 

709 ), 

710 verbose_name="Time period covered (text)", 

711 ) 

712 time_period_from = models.DateField( 

713 blank=True, 

714 null=True, 

715 help_text=_("Date format YYYY-MM-DD"), 

716 verbose_name=_("Time period covered in data collection (from)"), 

717 ) 

718 time_period_to = models.DateField( 

719 blank=True, 

720 null=True, 

721 help_text=_("Date format YYYY-MM-DD"), 

722 verbose_name=_("Time period covered in data collection (to)"), 

723 ) 

724 license = models.ForeignKey( 

725 "License", 

726 related_name="dataset_license", 

727 on_delete=models.PROTECT, 

728 blank=True, 

729 null=True, 

730 ) 

731 

732 related_dataset_text = BleachField(blank=True, max_length=1200) 

733 related_dataset = models.ManyToManyField( 

734 "DataSet", 

735 related_name="dataset_related_dataset_dataset", 

736 blank=True, 

737 help_text=_("Undo choice with CTRL–Mouse Click"), 

738 ) 

739 related_projects = BleachField(blank=True, max_length=1200) 

740 institutional_affiliation = BleachField(blank=True, max_length=1200) 

741 sponsors = models.ManyToManyField( 

742 "Sponsor", blank=True, related_name="dataset_sponsors" 

743 ) 

744 funding = BleachField(blank=True, max_length=1200) 

745 publications = models.ManyToManyField( 

746 "ddpublications.Publication", 

747 blank=True, 

748 through="dddatasets.DataSetPublication", 

749 ) 

750 comments = GenericRelation("ddcomments.Comment", related_query_name="comments") 

751 dataset_management_object = models.ForeignKey( 

752 "DataSetManagementObject", on_delete=models.PROTECT, blank=True, null=True, 

753 ) 

754 published = models.BooleanField(default=False) 

755 published_main_category = models.ForeignKey( 

756 "Category", 

757 related_name="dataset_published_main_category", 

758 on_delete=models.PROTECT, 

759 blank=True, 

760 null=True, 

761 ) 

762 published_categories = models.ManyToManyField( 

763 "Category", related_name="dataset_published_categories", blank=True 

764 ) 

765 created_at = models.DateField(default=timezone.now) 

766 publication_date = models.DateField(null=True, blank=True) 

767 publication_accepted = models.BooleanField(default=False) 

768 publication_accepted_by = models.ForeignKey( 

769 User, 

770 related_name="dataset_publication_accepted_by", 

771 on_delete=models.PROTECT, 

772 null=True, 

773 blank=True, 

774 ) 

775 

776 COPYRIGHT_DECLARATION_STRING = _("I declare, that no copyrights are violated.") 

777 copyright_declaration = models.BooleanField( 

778 default=False, verbose_name=COPYRIGHT_DECLARATION_STRING 

779 ) 

780 copyright_declaration_text = BleachField( 

781 default="I declare, that no copyrights are violated.", 

782 blank=True, 

783 max_length=400, 

784 ) 

785 

786 PRIVACY_RIGHTS_DECLARATION_STRING = _( 

787 "I declare, that no privacy and data protection regulations are violated." 

788 ) 

789 privacy_rights_declaration = models.BooleanField( 

790 default=False, verbose_name=PRIVACY_RIGHTS_DECLARATION_STRING, 

791 ) 

792 privacy_rights_declaration_text = BleachField(blank=True, max_length=400,) 

793 

794 class Meta: 

795 permissions = ( 

796 ("ra_view_dataset", _("View data set in restricted access mode")), 

797 ) 

798 ordering = ["title"] 

799 

800 def get_license_name_from_ds(self): 

801 if not self.license: 

802 return "No license" 

803 else: 

804 return self.license.get_license_name() 

805 

806 def get_license_text_from_ds(self): 

807 if not self.license: 

808 return "No license" 

809 else: 

810 return self.license.get_license_text() 

811 

812 def license_is_valid(self): 

813 """ use the DD_LICENSE_MATRIX map to check if a license 

814 (given by slug) is in the list of a data access model 

815 """ 

816 license_valid = False 

817 if not self.license: 

818 # license = None is only valid in "nolicense" exists in matrix list 

819 if "nolicense" in settings.DD_LICENSE_MATRIX[self.data_access]: 

820 license_valid = True 

821 elif ( 

822 # if license is set check validity 

823 self.license.get_license_slug() 

824 in settings.DD_LICENSE_MATRIX[self.data_access] 

825 ): 

826 license_valid = True 

827 

828 return license_valid 

829 

830 def generate_dsa(self): 

831 try: 

832 dsa_page = ManualPage.objects.get(slug="data-submission-agreement-skeleton") 

833 except Exception: 

834 dsa_page = None # geht so nicht, sonst scheitert zugriff auf .body 

835 

836 if self.data_access == self.OPEN_ACCESS: 

837 access_text_slug = "access-open-access" 

838 elif self.data_access == self.RESTRICTED_ACCESS: 

839 access_text_slug = "access-restricted-access" 

840 elif self.data_access == self.METADATA_ONLY: 

841 access_text_slug = "access-metadata-only" 

842 else: 

843 access_text_slug = "" 

844 

845 try: 

846 access_text_page = ManualPage.objects.get(slug=access_text_slug) 

847 access_text = str(access_text_page.body) 

848 except Exception: 

849 access_text = "" 

850 

851 files_info = "<ul>" 

852 for datafile in self.get_datafiles(): 

853 files_info += "<li>{}, {} bytes</li>".format( 

854 datafile.name, datafile.file.size 

855 ) 

856 files_info += "</ul>" 

857 

858 vars_dict = { 

859 "[[NAME]]": self.owner.get_academic_name(), 

860 "[[EMAIL]]": self.owner.email, 

861 "[[BIRTHDATE]]": str(self.dsa.birthdate), 

862 "[[ADDRESS]]": self.dsa.postal_address, 

863 "[[PROJECT_TYPE]]": self.dsa.project_type, 

864 "[[PROJECT_TITLE]]": self.dsa.project_title, 

865 "[[DESCRIPTION]]": self.description, 

866 "[[FILES]]": files_info, 

867 "[[AFFILIATION]]": self.institutional_affiliation, 

868 "[[SPONSORS]]": self.funding, 

869 "[[ACCESS_MODEL]]": self.get_data_access_display(), 

870 "[[ACCESS_TEXT]]": access_text, 

871 "[[LICENSE]]": self.get_license_name_from_ds(), 

872 "[[LICENSE_TEXT]]": self.get_license_text_from_ds(), 

873 } 

874 if dsa_page: 

875 dsa_text = str(dsa_page.body) 

876 else: 

877 dsa_text = "" 

878 

879 for key, value in vars_dict.items(): 

880 dsa_text = dsa_text.replace(key, value) 

881 

882 self.dsa.text = dsa_text 

883 self.dsa.save() 

884 self.dsa.pdf = self.create_dsa_pdf() 

885 self.dsa.save() 

886 

887 def create_dsa_pdf(self): 

888 # create pdf file 

889 file_name = "{}-data-submission-agreement.pdf".format(self.uuid) 

890 path = os.path.join(settings.DATA_ROOT, "datasets", file_name) 

891 html_string = render_to_string("dddatasets/pdf_dsa.html", {"ds": self}) 

892 html = HTML(string=html_string) 

893 html.write_pdf(path) 

894 return path 

895 

896 def pub_request_pending(self): 

897 try: 

898 pub_request = DataSetPublicationRequest.objects.get(dataset=self) 

899 if pub_request: 

900 return True 

901 except ObjectDoesNotExist: 

902 pass 

903 return False 

904 

905 def is_frozen(self): 

906 if self.published: 

907 return True 

908 if self.publication_accepted: 

909 return True 

910 if self.pub_request_pending(): 

911 return True 

912 return False 

913 

914 def get_dsa(self): 

915 return DataSubmissionAgreement.objects.get(dataset=self) 

916 

917 def get_escaped_description(self): 

918 description = self.description.replace("\n", "<br>") 

919 return description 

920 

921 def get_creators(self): 

922 return self.dataset_creator.all().order_by("name") 

923 

924 def creators_str(self): 

925 creators_list = list() 

926 for creator in self.get_creators(): 

927 if creator.creator_type == creator.PERSON: 

928 if creator.first_name: 

929 creators_list.append( 

930 "{} {}".format(creator.first_name, creator.name) 

931 ) 

932 else: 

933 creators_list.append(creator.name) 

934 else: 

935 creators_list.append(creator.name) 

936 return ", ".join(creators_list) 

937 

938 def get_all_access_requests(self): 

939 return DataSetAccessRequest.objects.filter(dataset=self) 

940 

941 def get_access_request(self, user): 

942 return DataSetAccessRequest.objects.filter(dataset=self, user=user).first() 

943 

944 def clear_user_permissions(self, user): 

945 for perm in get_perms(user, self): 

946 remove_perm(perm, user, self) 

947 

948 def assign_user_permissions(self, user, perm): 

949 # assign new permission 

950 assign_perm(perm, user, self) 

951 

952 def get_ra_with_perms(self): 

953 # get all users with permissions, but exclude owner 

954 return get_users_with_perms(self).exclude(uuid=self.owner.uuid) 

955 

956 def user_has_ra_view_right(self, user): 

957 return user.has_perm("ra_view_dataset", self) 

958 

959 def user_has_admin_right(self, user): 

960 return user.has_perm("admin_dsmo", self.dataset_management_object) 

961 

962 def class_name(self): 

963 return self.__class__.__name__ 

964 

965 def is_main_published(self): 

966 """check if a dataset is the main published ds of a dsmo""" 

967 return bool(self.dataset_management_object.main_published_ds == self) 

968 

969 def generate_html_string(self): 

970 class PDFForm(ModelForm): 

971 class Meta: 

972 model = DataSet 

973 exclude = ( 

974 "html_string", 

975 # "owner", 

976 # "group", 

977 # "institution", 

978 # "title", 

979 # "title_en", 

980 # "title_ru", 

981 # "subtitle", 

982 # "subtitle_en", 

983 # "subtitle_ru", 

984 # "image", 

985 # "categories", 

986 # "countries", 

987 # "description", 

988 ) 

989 

990 form = PDFForm(instance=self) 

991 html_string = render_to_string( 

992 "dddatasets/pdf_dataset.html", {"fields": form, "ds": self} 

993 ) 

994 

995 return html_string 

996 

997 # commented out because of mypy error 

998 # def get_users(self): 

999 # # strange import to avoid getting caught in circular import triggerd by guardian; conflict because guardian uses User Model defined in this file 

1000 # # TODO: define User model in seperate file 

1001 # from .views import dataset_get_users 

1002 # 

1003 # return dataset_get_users(self) 

1004 

1005 def get_prep_comments_all(self): 

1006 # return all prep comments in reverse date order 

1007 ct = ContentType.objects.get_for_model(DataSet) 

1008 return Comment.objects.filter(content_type=ct, object_id=self.id,).order_by( 

1009 "-date_added" 

1010 ) 

1011 

1012 def get_prep_comments_root(self): 

1013 # return prep comments root nodes in reverse date order 

1014 ct = ContentType.objects.get_for_model(DataSet) 

1015 return ( 

1016 Comment.objects.root_nodes() 

1017 .filter(content_type=ct, object_id=self.id,) 

1018 .order_by("-date_added") 

1019 ) 

1020 

1021 def get_public_comments_all(self): 

1022 # return all public comments in reverse date order 

1023 ct = ContentType.objects.get_for_model(DataSet) 

1024 return Comment.objects.filter( 

1025 content_type=ct, 

1026 object_id=self.id, 

1027 comment_type__in=(Comment.PUBLIC, Comment.PERMANENT), 

1028 ).order_by("-date_added") 

1029 

1030 def get_public_comments_root(self): 

1031 # return public comments root nodes in reverse date order 

1032 ct = ContentType.objects.get_for_model(DataSet) 

1033 return ( 

1034 Comment.objects.root_nodes() 

1035 .filter( 

1036 content_type=ct, 

1037 object_id=self.id, 

1038 comment_type__in=(Comment.PUBLIC, Comment.PERMANENT), 

1039 ) 

1040 .order_by("-date_added") 

1041 ) 

1042 

1043 def get_pdf_file_name(self): 

1044 return "DiscussData-{}-v{}-description.pdf".format( 

1045 slugify(self.title), self.version 

1046 ) 

1047 

1048 def get_zip_file_name(self): 

1049 return "DiscussData-{}-v{}-all-files.zip".format( 

1050 slugify(self.title), self.version 

1051 ) 

1052 

1053 # def generate_pdf(self): 

1054 # html = HTML(string=self.html_string, base_url=request.build_absolute_uri()) 

1055 # file_name = "/tmp/dataset_%s.pdf" % (self.id, ) 

1056 # return html.write_pdf() 

1057 

1058 def get_absolute_url(self): 

1059 return settings.DISCUSS_DATA_HOST + reverse( 

1060 "dddatasets:detail", args=[str(self.uuid)] 

1061 ) 

1062 

1063 def get_absolute_url_prep(self): 

1064 return settings.DISCUSS_DATA_HOST + reverse( 

1065 "dddatasets:prep_edit", args=[str(self.uuid)] 

1066 ) 

1067 

1068 def get_absolute_url_prep_versions(self): 

1069 return settings.DISCUSS_DATA_HOST + reverse( 

1070 "dddatasets:prep_edit_versions", args=[str(self.uuid)] 

1071 ) 

1072 

1073 def get_absolute_url_curation(self): 

1074 return settings.DISCUSS_DATA_HOST + reverse( 

1075 "dddatasets:prep_curation", args=[str(self.uuid)] 

1076 ) 

1077 

1078 def get_fulltitle(self): 

1079 if self.subtitle: 

1080 fulltitle = "%s – %s" % (self.title, self.subtitle) 

1081 else: 

1082 fulltitle = self.title 

1083 return fulltitle 

1084 

1085 def create_new_version(self): 

1086 # orig_id = self.id 

1087 datafiles = self.get_datafiles() 

1088 datatypes = self.get_datatypes() 

1089 countries = self.get_countries() 

1090 keyword_tags = self.get_keyword_tags() 

1091 language_tags = self.get_language_tags() 

1092 disciplines_tags = self.get_disciplines_tags() 

1093 methods_of_data_collection = self.get_methods_of_data_collection_tags() 

1094 methods_of_data_analysis = self.get_methods_of_data_analysis_tags() 

1095 data_creators = self.get_creators() 

1096 

1097 # create new version 

1098 ds = self 

1099 ds.pk = None 

1100 ds.published = False 

1101 ds.version = self.dataset_management_object.get_top_version() + 0.1 

1102 ds.publication_accepted = False 

1103 ds.publication_accepted_by = None 

1104 ds.copyright_declaration = False 

1105 ds.publication_date = None 

1106 ds.created_at = timezone.now() 

1107 ds.doi = "" 

1108 ds.dhdoi = "" 

1109 ds.dsa = None 

1110 ds.published_main_category = None 

1111 ds.privacy_rights_declaration = False 

1112 ds.save() 

1113 ds.published_categories.set([]) 

1114 

1115 for datafile in datafiles: 

1116 datafile.clone(ds) 

1117 

1118 for datatype in datatypes: 

1119 ds.datatypes.add(datatype) 

1120 

1121 for country in countries: 

1122 ds.countries.add(country) 

1123 

1124 for tag in keyword_tags: 

1125 ds.keywords.add(tag) 

1126 

1127 for tag in language_tags: 

1128 ds.languages_of_data.add(tag) 

1129 

1130 for tag in disciplines_tags: 

1131 ds.disciplines.add(tag) 

1132 

1133 for tag in methods_of_data_collection: 

1134 ds.methods_of_data_collection.add(tag) 

1135 

1136 for tag in methods_of_data_analysis: 

1137 ds.methods_of_data_analysis.add(tag) 

1138 

1139 for creator in data_creators: 

1140 creator.clone(ds) 

1141 

1142 return ds 

1143 

1144 def get_publications(self): 

1145 return self.publications.all() 

1146 

1147 def get_external_links(self): 

1148 return DataSetExternalLink.objects.filter(dataset=self) 

1149 

1150 def get_dataset_publications(self): 

1151 return DataSetPublication.objects.filter(dataset=self) 

1152 

1153 # deprecated begin 

1154 def get_countries_list(self): 

1155 return qs_to_str(self.get_countries(), "name") 

1156 

1157 def get_keyword_tags_list(self): 

1158 return qs_to_str(self.get_keyword_tags(), "name") 

1159 

1160 def get_disciplines_tags_list(self): 

1161 return qs_to_str(self.get_disciplines_tags(), "name") 

1162 

1163 # deprecated end 

1164 

1165 def get_countries(self): 

1166 return self.countries.all().order_by("name") 

1167 

1168 def get_keyword_tags(self): 

1169 return self.keywords.all().order_by("name") 

1170 

1171 def get_disciplines_tags(self): 

1172 return self.disciplines.all().order_by("name") 

1173 

1174 def get_language_tags(self): 

1175 return self.languages_of_data.all().order_by("name") 

1176 

1177 def get_methods_of_data_collection_tags(self): 

1178 return self.methods_of_data_collection.all().order_by("name") 

1179 

1180 def get_methods_of_data_analysis_tags(self): 

1181 return self.methods_of_data_analysis.all().order_by("name") 

1182 

1183 def get_datatypes(self): 

1184 return self.datatypes.all() 

1185 

1186 def get_data_citation(self): 

1187 """ 

1188 This citation has seven components of which five are human readable: 

1189 the author(s), title, year, data repository (or distributor), and version number. 

1190 Two components are machine-readable (UNF = universal numerical fingerprint; DOI = Digital Object Identifier; 

1191 ‘hdl’ refers to the international HANDLE.NET system). 

1192 

1193 TODO: refactor to provide different citation styles, 

1194 see https://gitlab.gwdg.de/discuss-data/discuss-data/-/issues/180 

1195 """ 

1196 if self.doi: 

1197 doi = self.doi 

1198 else: 

1199 doi = "00.00000/00000000-0000-0000-0000-000000000000" 

1200 

1201 if self.publication_date: 

1202 year = self.publication_date.year 

1203 else: 

1204 year = timezone.now().year 

1205 

1206 return format_html( 

1207 "{} ({}): {}, v. {}, Discuss Data, <a href='https://doi.org/{}'>&lt;doi:{}&gt;</a>.", 

1208 self.creators_str(), 

1209 year, 

1210 self.title, 

1211 self.version, 

1212 doi, 

1213 doi, 

1214 ) 

1215 

1216 def get_data_citation_pdf(self): 

1217 return self.get_data_citation() 

1218 

1219 groups_dict = { 

1220 "view": ("view_dataset",), 

1221 "edit": ("view_dataset", "edit_dataset"), 

1222 "admin": ("view_dataset", "edit_dataset", "admin_dataset"), 

1223 } 

1224 

1225 def create_groups(self, groups_dict): 

1226 dsid = str(self.id) 

1227 for gkey in groups_dict.keys(): 

1228 group_name = "%s_%s" % (dsid, gkey) 

1229 logger.debug("create %s" % (group_name,)) 

1230 group = Group.objects.create(name=group_name, description=gkey) 

1231 logger.debug("add %s to dataset" % (group_name,)) 

1232 self.groups.add(group) 

1233 # assign permissions to group 

1234 for perm in groups_dict[gkey]: 

1235 logger.debug("assign %s to %s" % (perm, group,)) 

1236 assign_perm(perm, group, self) 

1237 

1238 def save(self, *args, **kwargs): 

1239 super().save(*args, **kwargs) 

1240 

1241 if not self.dataset_management_object: 

1242 dsmo = DataSetManagementObject() 

1243 dsmo.owner = self.owner 

1244 dsmo.save() 

1245 self.dataset_management_object = dsmo 

1246 

1247 # if self.data_access != self.METADATA_ONLY and not self.license: 

1248 # # set odc-by as default license or individual if no odc-by licensepage exists 

1249 # try: 

1250 # license_page = LicensePage.objects.get(slug="license-odc-by-v1-0") 

1251 # license = License.objects.create(license_type=License.STANDARD, standard_license=license_page) 

1252 # license.save() 

1253 # except LicensePage.DoesNotExist(): 

1254 # license = License.objects.create(license_type=License.INDIVIDUAL) 

1255 # self.license = license 

1256 

1257 if self.image and hasattr(self.image, "url"): 

1258 # in situ thumbnail generation at upload 

1259 im = Image.open(self.image.path) 

1260 thumbnail = cropped_thumbnail(im, [320, 160]) 

1261 thumbnail.save(self.image.path) 

1262 

1263 # for using save a second time, force_insert may not be used 

1264 # see https://code.djangoproject.com/ticket/28253#comment:4 

1265 if kwargs.get("force_insert"): 

1266 kwargs.pop("force_insert") 

1267 super().save(*args, **kwargs) 

1268 

1269 def get_all_files(self): 

1270 return DataFile.objects.filter(dataset=self).order_by("name") 

1271 

1272 def get_datafiles(self): 

1273 datafiles = ( 

1274 DataFile.objects.filter(dataset=self) 

1275 .order_by("name") 

1276 .exclude(data_file_type=DataFile.FILE_TYPE_GENERATED_PDF) 

1277 .exclude(data_file_type=DataFile.FILE_TYPE_GENERATED_ZIP) 

1278 ) 

1279 

1280 # no datafiles/converted files if access is "metadata only" 

1281 if self.data_access == self.METADATA_ONLY: 

1282 datafiles = datafiles.exclude( 

1283 data_file_type=DataFile.FILE_TYPE_DATA 

1284 ).exclude(data_file_type=DataFile.FILE_TYPE_CONVERTED) 

1285 

1286 return datafiles 

1287 

1288 def get_all_datafiles(self): 

1289 return ( 

1290 DataFile.objects.filter(dataset=self) 

1291 .order_by("name") 

1292 .exclude(data_file_type=DataFile.FILE_TYPE_GENERATED_PDF) 

1293 .exclude(data_file_type=DataFile.FILE_TYPE_GENERATED_ZIP) 

1294 ) 

1295 

1296 def get_datafile(self, uuid): 

1297 return DataFile.objects.filter(dataset=self, uuid=uuid).get() 

1298 

1299 def get_datafiles_count(self): 

1300 return self.get_datafiles().count() 

1301 

1302 def dublin_core(self): 

1303 dc_dict = dict( 

1304 titles=[self.title_en, self.subtitle_en], 

1305 # creators = [self.get_owners()], 

1306 ) 

1307 # dc_dict['titles'] = "<dc:title>%s</dc:title>\n" % (self.title,) 

1308 # dc_str += "<dc:subtitle>%s</dc:subtitle>\n" % (self.subtitle,) 

1309 return simpledc.tostring(dc_dict) 

1310 

1311 def create_description_pdf(self): 

1312 ds = self 

1313 # create pdf file 

1314 file_name = "{}-description.pdf".format(ds.uuid) 

1315 path = os.path.join(settings.DATA_ROOT, "datasets", file_name) 

1316 html = HTML(string=ds.generate_html_string(), base_url=ds.get_absolute_url()) 

1317 html.write_pdf(path) 

1318 # create data file 

1319 df, created = DataFile.objects.get_or_create( 

1320 dataset=ds, data_file_type=DataFile.FILE_TYPE_GENERATED_PDF 

1321 ) 

1322 df.name = self.get_pdf_file_name() 

1323 df.file = path 

1324 df.save() 

1325 

1326 def get_description_pdf_file(self): 

1327 try: 

1328 pdf_file = DataFile.objects.filter(dataset=self, data_file_type="PDF")[0] 

1329 except Exception: 

1330 pdf_file = None 

1331 return pdf_file 

1332 

1333 def create_files_zip(self): 

1334 ds = self 

1335 file_name = "{}-all-files.zip".format(ds.uuid) 

1336 path = os.path.join(settings.DATA_ROOT, "datasets", file_name) 

1337 # create zip file 

1338 with ZipFile(path, "a") as zipfile: 

1339 # add autogenerated pdf file 

1340 pdf_file = self.get_description_pdf_file() 

1341 zipfile.writestr(pdf_file.name, pdf_file.file.read()) 

1342 for file in ds.get_datafiles(): 

1343 zipfile.writestr(file.name, file.file.read()) 

1344 # fix for Linux zip files read in Windows 

1345 for file in zipfile.filelist: 

1346 file.create_system = 0 

1347 

1348 # create data file 

1349 df, created = DataFile.objects.get_or_create( 

1350 dataset=ds, data_file_type=DataFile.FILE_TYPE_GENERATED_ZIP 

1351 ) 

1352 df.name = self.get_zip_file_name() 

1353 df.file = path 

1354 df.save() 

1355 

1356 def valid_licenses(self): 

1357 return settings.DD_LICENSE_MATRIX[self.data_access] 

1358 

1359 def request_publication(self, user): 

1360 logger.error("in request publication") 

1361 pub_request = DataSetPublicationRequest.objects.filter(dataset=self, user=user) 

1362 # TODO: add a check for compatibility between access model and chosen license 

1363 logger.error("not self published if") 

1364 if not self.published: 

1365 logger.error("pub_request.count() < 1 if") 

1366 if pub_request.count() < 1: 

1367 logger.error("user_has_admin_right if") 

1368 if self.user_has_admin_right(user): 

1369 self.dataset_management_object.save() 

1370 self.published_main_category = self.get_main_category() 

1371 self.published_categories.set(self.get_categories()) 

1372 self.copyright_declaration_text = self.COPYRIGHT_DECLARATION_STRING 

1373 self.privacy_rights_declaration_text = ( 

1374 self.PRIVACY_RIGHTS_DECLARATION_STRING 

1375 ) 

1376 logger.error("save ds") 

1377 self.save() 

1378 logger.error("saved ds") 

1379 # trigger publication request 

1380 pubreq = DataSetPublicationRequest() 

1381 pubreq.dataset = self 

1382 pubreq.user = user 

1383 pubreq.category = self.get_main_category() 

1384 logger.error("save pubreq") 

1385 pubreq.save() 

1386 logger.error("saved pubreq") 

1387 

1388 return _("Dataset publication requested") 

1389 else: 

1390 logger.error("user has no admin right") 

1391 return _("Operation not allowed") 

1392 else: 

1393 logger.error("pub_request.count() < 1 false") 

1394 return _("Dataset publication request pending") 

1395 

1396 else: 

1397 return _( 

1398 "Dataset is already published. Please create new version and publish again" 

1399 ) 

1400 

1401 def get_publication_request(self): 

1402 return DataSetPublicationRequest.objects.filter(dataset=self) 

1403 

1404 def deny_publication(self, user, message=None): 

1405 if user in self.get_curators(): 

1406 pubreq = DataSetPublicationRequest.objects.get(dataset=self) 

1407 ct = ContentType.objects.get_for_model(self) 

1408 text = _( 

1409 "The Publication request for category {} has been denied".format( 

1410 self.get_main_category() 

1411 ) 

1412 ) 

1413 subject = text 

1414 if message: 

1415 text = "{}\n\n{}'s curators message:\n{}".format(text, user, message) 

1416 

1417 notification = Notification( 

1418 owner=user, 

1419 content_type=ct, 

1420 object_id=self.id, 

1421 text=text, 

1422 parent=pubreq.notification, 

1423 notification_type=Notification.PUB_REQUEST, 

1424 ) 

1425 notification.save() 

1426 # send email to dataset owner 

1427 mail_message = "Dataset {} at {}:\n{}".format( 

1428 self, self.get_absolute_url_prep(), text 

1429 ) 

1430 email_to = [self.owner.get_email()] 

1431 send_update_email(subject, mail_message, email_to) 

1432 # send emails to category curators 

1433 curators_subject = "[Curation] {}".format(subject,) 

1434 curators_to = self.get_main_category().get_curators_emails() 

1435 send_update_email(curators_subject, mail_message, curators_to) 

1436 pubreq.delete() 

1437 else: 

1438 raise PermissionDenied 

1439 

1440 def perform_checks(self): 

1441 error = False 

1442 message_list = list() 

1443 if not self.license_is_valid(): 

1444 message_list.append(_("License is not valid.")) 

1445 error = True 

1446 return error, message_list 

1447 

1448 def accept_publication(self, user, message=None): 

1449 if user in self.get_curators(): 

1450 self.publication_accepted = True 

1451 self.publication_accepted_by = user 

1452 self.save() 

1453 pubreq = DataSetPublicationRequest.objects.get(dataset=self) 

1454 

1455 ct = ContentType.objects.get_for_model(self) 

1456 text = _( 

1457 "The Publication request for category {} has been accepted".format( 

1458 self.get_main_category() 

1459 ) 

1460 ) 

1461 subject = text 

1462 if message: 

1463 text = "{}\n\n{}'s curators message:\n{}\n".format(text, user, message) 

1464 

1465 notification = Notification( 

1466 owner=user, 

1467 content_type=ct, 

1468 object_id=self.id, 

1469 text=text, 

1470 parent=pubreq.notification, 

1471 notification_type=Notification.PUB_REQUEST, 

1472 ) 

1473 notification.save() 

1474 # send email to dataset owner 

1475 mail_message = "Dataset {} at {}:\n{}".format( 

1476 self, self.get_absolute_url_prep(), text 

1477 ) 

1478 mail_message += "\nYou can now publish the dataset at {}.\n".format( 

1479 self.get_absolute_url_prep_versions() 

1480 ) 

1481 email_to = [self.owner.get_email()] 

1482 send_update_email(subject, mail_message, email_to) 

1483 # send emails to category curators 

1484 curators_subject = "[Curation] {}".format(subject,) 

1485 curators_to = self.get_main_category().get_curators_emails() 

1486 send_update_email(curators_subject, mail_message, curators_to) 

1487 pubreq.delete() 

1488 else: 

1489 raise PermissionDenied 

1490 

1491 def publish(self, user): 

1492 logger.debug("DS PUBLISH started") 

1493 if not self.publication_accepted: 

1494 return _("Dataset not accepted for publication") 

1495 if self.user_has_admin_right(user): 

1496 # if self.data_access == DataSet.OPEN_ACCESS: 

1497 # publish_to_dhrep(self, token) 

1498 # run checks 

1499 self.perform_checks() 

1500 

1501 self.published_main_category = self.get_main_category() 

1502 self.dataset_published_categories = self.get_categories() 

1503 self.dataset_management_object.published = True 

1504 self.dataset_management_object.main_published_ds = self 

1505 self.dataset_management_object.save() 

1506 self.publication_date = timezone.now() 

1507 self.save() 

1508 try: 

1509 self.doi = generate_discuss_data_doi(self) 

1510 except DataCiteUnauthorizedError as e: 

1511 raise ImproperlyConfigured from e 

1512 except DataCiteNotFoundError as e: 

1513 raise MetaDataError from e 

1514 self.create_description_pdf() 

1515 # TODO: do not create zip file as we will need 

1516 # more than one to reflect multiple access models 

1517 # self.create_files_zip() 

1518 self.published = True 

1519 self.save() 

1520 action.send( 

1521 user, verb="published the dataset", target=self, 

1522 ) 

1523 # send email to dataset owner 

1524 subject = "Dataset {} has been published successfully!".format(self,) 

1525 message = "{}\n\nIt is accessible via DOI https://doi.org/{}".format( 

1526 subject, self.doi 

1527 ) 

1528 if self.dhdoi: 

1529 message += "\nDARIAH-Repository entry: https://doi.org/{}".format( 

1530 self.dhdoi 

1531 ) 

1532 email_to = [self.owner.get_email()] 

1533 send_update_email(subject, message, email_to) 

1534 # send emails to category curators 

1535 curators_subject = "[Curation] {}".format(subject,) 

1536 curators_to = self.get_main_category().get_curators_emails() 

1537 send_update_email(curators_subject, message, curators_to) 

1538 else: 

1539 raise PermissionDenied 

1540 logger.debug("DS PUBLISH ended") 

1541 

1542 def get_published_versions(self): 

1543 return self.dataset_management_object.get_published_datasets().order_by( 

1544 "-version" 

1545 ) 

1546 

1547 def get_versions(self): 

1548 return self.dataset_management_object.get_all_datasets().order_by("-version") 

1549 # return DataSet.objects.filter(parent_dataset = self.parent_dataset).order_by('-version') 

1550 # else: 

1551 # return None 

1552 

1553 def get_main_category(self): 

1554 if self.published: 

1555 return self.published_main_category 

1556 else: 

1557 return self.dataset_management_object.main_category 

1558 

1559 def get_categories(self): 

1560 if self.published: 

1561 return self.published_categories.order_by("name") 

1562 else: 

1563 return self.dataset_management_object.categories.order_by("name") 

1564 

1565 def get_curators(self): 

1566 if self.published: 

1567 return self.published_main_category.curators.order_by("last_name") 

1568 else: 

1569 return self.dataset_management_object.main_category.curators.order_by( 

1570 "last_name" 

1571 ) 

1572 

1573 def __str__(self): 

1574 return "{}, v{}".format(self.title, self.version) 

1575 

1576 

1577class DataSetManagementObject(models.Model): 

1578 uuid = models.UUIDField( 

1579 default=uuid.uuid4, editable=False 

1580 ) # uuid _not_ as pk as this disturbs django 3rd party apps 

1581 groups = models.ManyToManyField(Group, related_name="dsmo_groups_group") 

1582 owner = models.ForeignKey(User, on_delete=models.PROTECT, null=True) 

1583 created_at = models.DateTimeField(auto_now_add=True) 

1584 updated_at = models.DateTimeField(auto_now=True) 

1585 published = models.BooleanField(default=False) 

1586 doi = models.CharField(max_length=200, blank=True,) 

1587 main_category = models.ForeignKey( 

1588 "Category", 

1589 related_name="dsmo_main_category", 

1590 on_delete=models.PROTECT, 

1591 default=1, 

1592 ) 

1593 categories = models.ManyToManyField( 

1594 "Category", related_name="dsmo_categories", blank=True 

1595 ) 

1596 main_published_ds = models.OneToOneField( 

1597 "DataSet", null=True, blank=True, on_delete=models.PROTECT, 

1598 ) 

1599 

1600 # max_version = models.DecimalField(max_digits=5, decimal_places=1,default=1.0) 

1601 # max_version_published = models.DecimalField(max_digits=5, decimal_places=1,default=1.0) 

1602 

1603 class Meta: 

1604 """ 

1605 Definition of individual dsmo permissions. 

1606 DSMO permissions override DataSet permissions which are 

1607 deprecated and will be removed in the near future 

1608 """ 

1609 

1610 permissions = ( 

1611 ("view_dsmo", "View Data Set Management Object"), 

1612 ("edit_dsmo", "Edit Data Set Management Object"), 

1613 ("admin_dsmo", "Admin Data Set Management Object"), 

1614 ) 

1615 

1616 # group individual permissions together 

1617 perms_groups = { 

1618 "view": ("view_dsmo",), 

1619 "edit": ("view_dsmo", "edit_dsmo"), 

1620 "admin": ("view_dsmo", "edit_dsmo", "admin_dsmo"), 

1621 } 

1622 

1623 def clear_user_permissions(self, user): 

1624 for perm in get_perms(user, self): 

1625 remove_perm(perm, user, self) 

1626 

1627 def assign_user_permissions(self, user, perm): 

1628 # clear all permissions 

1629 self.clear_user_permissions(user) 

1630 # assign new permissions 

1631 for entry in self.perms_groups[perm]: 

1632 assign_perm(entry, user, self) 

1633 

1634 def save(self, *args, **kwargs): 

1635 models.Model.save(self, *args, **kwargs) 

1636 

1637 # set admin permissions for owner on first save 

1638 if not get_perms(self.owner, self): 

1639 self.assign_user_permissions(self.owner, "admin") 

1640 

1641 super(DataSetManagementObject, self).save(*args, **kwargs) 

1642 models.Model.save(self, *args, **kwargs) 

1643 

1644 # Call the "real" save() method in the base class 'models.Model'. 

1645 models.Model.save(self, *args, **kwargs) 

1646 

1647 def user_has_admin_right(self, user): 

1648 return user.has_perm("admin_dsmo", self) 

1649 

1650 def get_dsmo_users(self): 

1651 # get all users with permissions, but exclude owner 

1652 return get_users_with_perms(self).exclude(uuid=self.owner.uuid) 

1653 

1654 def get_dsmo_users_with_perms(self): 

1655 user_admin = set() 

1656 user_edit = set() 

1657 user_view = set() 

1658 user_groups = dict() 

1659 

1660 for user, perms in get_users_with_perms(self, attach_perms=True).items(): 

1661 if "admin_dsmo" in perms: 

1662 user_admin.add(user) 

1663 continue 

1664 if "edit_dsmo" in perms: 

1665 user_edit.add(user) 

1666 continue 

1667 if "view_dsmo" in perms: 

1668 user_view.add(user) 

1669 

1670 user_groups = { 

1671 "admin": user_admin, 

1672 "edit": user_edit, 

1673 "view": user_view, 

1674 } 

1675 return user_groups 

1676 

1677 def get_all_datasets(self): 

1678 return DataSet.objects.filter(dataset_management_object=self) 

1679 

1680 def get_published_datasets(self): 

1681 return DataSet.objects.filter(dataset_management_object=self).filter( 

1682 published=True 

1683 ) 

1684 

1685 def get_top_version(self): 

1686 all_ds = self.get_all_datasets() 

1687 max_version = all_ds.aggregate(models.Max("version"))["version__max"] 

1688 return max_version 

1689 

1690 def get_top_version_dataset(self): 

1691 try: 

1692 return self.get_all_datasets().order_by("-version")[0] 

1693 except IndexError: 

1694 return None 

1695 # max_version = self.get_top_version() 

1696 # TODO: bug in returning 1.2 as top version, nonfix applied 

1697 # return DataSet.objects.all()[0] #filter(dataset_management_object=self)[0] 

1698 

1699 def get_top_version_published_dataset(self): 

1700 try: 

1701 return self.get_published_datasets().order_by("-version")[0] 

1702 except IndexError: 

1703 return None 

1704 

1705 # max_version = all_ds.aggregate(models.Max('version'))['version__max'] 

1706 # return all_ds.filter(dataset_management_object=self).filter(published=True).filter(version = max_version)[0] 

1707 

1708 def get_absolute_url(self): 

1709 return reverse( 

1710 "dddatasets:detail", args=[str(self.get_top_version_dataset().uuid)] 

1711 ) 

1712 

1713 def class_name(self): 

1714 return self.__class__.__name__ 

1715 

1716 def __str__(self): 

1717 formatedDate = self.created_at.strftime("%Y-%m-%d %H:%M:%S") 

1718 top_ds = self.get_top_version_dataset() 

1719 return "{}, currently v{} – created at {}".format( 

1720 top_ds.title, top_ds.version, formatedDate 

1721 ) 

1722 # return "{}".format(self.id,) 

1723 

1724 

1725# class PublishedDataSet(models.Model): 

1726# created_at = models.DateTimeField(auto_now_add=True) 

1727# updated_at = models.DateTimeField(auto_now=True) 

1728# doi = models.CharField(max_length=200, blank=True) 

1729# #version = models.FloatField(default=1.0) 

1730# #dataset = models.OneToOneField('DataSet', on_delete=models.PROTECT) 

1731# #dataset_uuid = models.UUIDField(editable=False, default=uuid.uuid4) 

1732 

1733# def get_top_version_dataset(self): 

1734# all_ds = self.get_all_datasets() 

1735# max_version = all_ds.aggregate(models.Max('version'))['version__max'] 

1736 

1737# try: 

1738# return all_ds.filter(version = max_version)[0] 

1739# except: 

1740# return None 

1741 

1742 

1743# def get_all_datasets(self): 

1744# return DataSet.objects.filter(published_dataset=self).filter(published=True) 

1745 

1746 

1747# def __str__(self): 

1748# #return self.get_top_version_dataset().title 

1749# return "{}, v{}".format(self.get_top_version_dataset().title, self.id)