Skip to content

Commit 31b5861

Browse files
authored
openaire optimize contact (#339)
* openaire optimize contact * fix blank line error * fix blank line error
1 parent 1f0d2b7 commit 31b5861

1 file changed

Lines changed: 84 additions & 46 deletions

File tree

pygeometa/schemas/openaire/__init__.py

Lines changed: 84 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def import_(self, metadata: str) -> dict:
158158
mcf['identification']['language'] = language_
159159

160160
main_title = metadata_.get('mainTitle')
161-
# subtitle also exists
161+
162162
if main_title is not None:
163163
mcf['identification']['title'] = main_title
164164

@@ -170,7 +170,6 @@ def import_(self, metadata: str) -> dict:
170170
if version_ is not None:
171171
mcf['identification']['edition'] = version_
172172

173-
# topiccategory
174173
right_ = metadata_.get('bestAccessRight', {}).get('label')
175174
instance_right_ = None
176175
if main_instance_:
@@ -189,7 +188,6 @@ def import_(self, metadata: str) -> dict:
189188
'url': ''
190189
}
191190

192-
# url
193191
dates_dict = {}
194192
p_date = metadata_.get('publicationDate')
195193
e_date = metadata_.get('embargoEndDate')
@@ -207,14 +205,15 @@ def import_(self, metadata: str) -> dict:
207205
elif isinstance(subjects_, list):
208206
mcf['identification']['keywords'] = process_keywords(subjects_)
209207

210-
# contact point
211-
authors_ = metadata_.get('authors', [])
212-
orgs_ = metadata_.get('organizations', [])
213-
authors_ = authors_ or []
214-
orgs_ = orgs_ or []
215-
contact_ = authors_ + orgs_
216-
if len(contact_) > 0:
217-
mcf['contact'] = process_contact(contact_)
208+
# contact
209+
authors_ = metadata_.get('authors', []) or []
210+
orgs_ = metadata_.get('organizations', []) or []
211+
publisher_ = metadata_.get('publisher') or ''
212+
contribs_ = metadata_.get('contributors', []) or []
213+
214+
contact_ = process_contact(authors_, orgs_, publisher_, contribs_)
215+
if contact_:
216+
mcf['contact'] = contact_
218217

219218
# distribution
220219
if isinstance(children_instances_, list) and children_instances_:
@@ -371,51 +370,90 @@ def process_keywords(subjects: list) -> dict:
371370
return keywords_dict
372371

373372

374-
def process_contact(contact_list: list) -> dict:
373+
def process_contact(author_list: list,
374+
organization_list: list,
375+
publisher: str,
376+
contributor_list: list) -> dict:
375377
"""
376-
Process authors and organizations into MCF contact format
378+
Process authors, organizations, publisher, and contributors into MCF
379+
contact format
377380
378-
:param authors: list of author objects
379-
:param orgs: list of organization objects
381+
:param author_list: list of author objects
382+
:param organization_list: list of organization objects
383+
:param publisher: publisher string
384+
:param contributor_list: list of contributor objects
380385
381386
:returns: dict with UUID keys and contact point values
382387
"""
383388
contact_dict = {}
384389

385-
for contact in contact_list:
390+
# Process authors
391+
for author in author_list:
392+
contact_uuid = str(uuid.uuid4())
393+
contactpoint_dict = {
394+
'individualname': None,
395+
'organization': None,
396+
'url': None,
397+
'role': 'author'
398+
}
399+
contactpoint_dict['individualname'] = author.get('fullName', '')
400+
pid = author.get('pid')
401+
if pid is not None and pid.get('id') is not None:
402+
pid_scheme = pid.get('id', {}).get('scheme')
403+
pid_value = pid.get('id', {}).get('value')
404+
if None not in [pid_scheme, pid_value]:
405+
contactpoint_dict['url'] = id2url(pid_scheme, pid_value)
406+
407+
if contactpoint_dict['individualname']:
408+
contact_dict[contact_uuid] = contactpoint_dict
409+
410+
# Process organizations
411+
for org in organization_list:
412+
contact_uuid = str(uuid.uuid4())
413+
contactpoint_dict = {
414+
'individualname': None,
415+
'organization': None,
416+
'url': None,
417+
'role': 'contributor'
418+
}
419+
contactpoint_dict['organization'] = org.get('legalName', '')
420+
pids = org.get('pids', [])
421+
if pids:
422+
for p in pids:
423+
scheme = p.get('scheme', '')
424+
if scheme and scheme.lower() in [
425+
'ror', 'grid', 'wikidata', 'isni']:
426+
contactpoint_dict['url'] = id2url(
427+
p.get('scheme'), p.get('value'))
428+
break
429+
430+
if contactpoint_dict['organization']:
431+
contact_dict[contact_uuid] = contactpoint_dict
432+
433+
# Process publisher
434+
if publisher:
435+
contact_uuid = str(uuid.uuid4())
436+
contactpoint_dict = {
437+
'individualname': None,
438+
'organization': publisher,
439+
'url': None,
440+
'role': 'publisher'
441+
}
442+
contact_dict[contact_uuid] = contactpoint_dict
443+
444+
# Process contributors
445+
for contrib in contributor_list:
386446
contact_uuid = str(uuid.uuid4())
387-
# Initialize contact point structure
388447
contactpoint_dict = {
389-
'individualname': '',
390-
'organization': '',
391-
'url': ''
448+
'individualname': None,
449+
'organization': None,
450+
'url': None,
451+
'role': 'contributor'
392452
}
393-
# Process authors
394-
if 'fullName' in contact:
395-
contactpoint_dict['individualname'] = contact.get('fullName')
396-
pid = contact.get('pid')
397-
if pid is not None and pid.get('id') is not None:
398-
pid_scheme = pid.get('id', {}).get('scheme')
399-
pid_value = pid.get('id', {}).get('value')
400-
if None not in [pid_scheme, pid_value]:
401-
contactpoint_dict['url'] = id2url(pid_scheme, pid_value)
402-
403-
# Process organizations
404-
elif 'legalName' in contact:
405-
org_name = contact.get('legalName')
406-
contactpoint_dict['organization'] = org_name
407-
pids = contact.get('pids', [])
408-
if pids is not None:
409-
for p in pids:
410-
if p.get('scheme').lower() in ['ror', 'grid',
411-
'wikidata', 'isni']:
412-
contactpoint_dict['url'] = id2url(
413-
p.get('scheme'), p.get('value'))
414-
break
415-
416-
# Add to contactpoint dict
417-
if (contactpoint_dict['individualname'] or
418-
contactpoint_dict['organization']):
453+
454+
contactpoint_dict['organization'] = str(contrib) if contrib else ''
455+
456+
if contactpoint_dict['organization']:
419457
contact_dict[contact_uuid] = contactpoint_dict
420458

421459
return contact_dict

0 commit comments

Comments
 (0)