11# coding: utf-8
2- from glom import Coalesce , Iter , glom
32
4- from .extractors import extract_doi , extract_orcid , extract_ror_id
5- from .reports import RelatedWorkReports
3+ from .extractors import extract_doi
4+ from .resource_type_graph import RelatedWorkReports
65from .searchers import DoiListSearcher , DoiSearcher
76
87
9- def is_a_doi (rid ):
10- return bool (extract_doi (rid .get ("relatedIdentifier" , "" )))
11-
12-
13- def parse_attributes (doi_result ):
14- doi_result = doi_result .get ("attributes" , {}) or doi_result
15- if not doi_result :
16- return {}
17- spec = {
18- "doi" : ("doi" ),
19- "resourceTypeGeneral" : Coalesce ("types.resourceTypeGeneral" , default = "" ),
20- "resourceType" : Coalesce ("types.resourceType" , default = "" ),
21- "creator_orcid_ids" : Coalesce (
22- (
23- "creators" ,
24- [("nameIdentifiers" , (["nameIdentifier" ]))],
25- Iter ()
26- .flatten ()
27- .map (lambda x : extract_orcid (x ))
28- .filter (lambda x : x is not None )
29- .all (),
30- ),
31- default = [],
32- ),
33- "creator_ror_ids" : Coalesce (
34- (
35- "creator" ,
36- [("nameIdentifiers" , (["nameIdentifier" ]))],
37- Iter ()
38- .flatten ()
39- .map (lambda x : extract_ror_id (x ))
40- .filter (lambda x : x is not None )
41- .all (),
42- ),
43- default = [],
44- ),
45- "creator_affiliation_ror_ids" : Coalesce (
46- (
47- "creators" ,
48- [("affiliation" , (["affiliationIdentifier" ]))],
49- Iter ()
50- .flatten ()
51- .map (lambda x : extract_ror_id (x ))
52- .filter (lambda x : x is not None )
53- .all (),
54- ),
55- default = [],
56- ),
57- "contributor_orcid_ids" : Coalesce (
58- (
59- "contributors" ,
60- [("nameIdentifiers" , (["nameIdentifier" ]))],
61- Iter ()
62- .flatten ()
63- .map (lambda x : extract_orcid (x ))
64- .filter (lambda x : x is not None )
65- .all (),
66- ),
67- default = [],
68- ),
69- "contributor_ror_ids" : Coalesce (
70- (
71- "contributors" ,
72- [("nameIdentifiers" , (["nameIdentifier" ]))],
73- Iter ()
74- .flatten ()
75- .map (lambda x : extract_ror_id (x ))
76- .filter (lambda x : x is not None )
77- .all (),
78- ),
79- default = [],
80- ),
81- "contributor_affiliation_ror_ids" : Coalesce (
82- (
83- "contributors" ,
84- [("affiliation" , (["affiliationIdentifier" ]))],
85- Iter ()
86- .flatten ()
87- .map (lambda x : extract_ror_id (x ))
88- .filter (lambda x : x is not None )
89- .all (),
90- ),
91- default = "BOB" ,
92- ),
93- "related_identifiers" : Coalesce (
94- (
95- "relatedIdentifiers" ,
96- Iter ().filter (lambda r : is_a_doi (r )).all (),
97- ),
98- default = [],
99- ),
100- }
101- return glom (doi_result , spec )
102-
103-
104- def parse_list (doi_list ):
105- return {d ["id" ]: parse_attributes (d ) for d in doi_list }
106-
107-
1088def get_relation_types_grouped_by_doi (related_dois ):
1099 res = {}
11010 for r in related_dois :
@@ -114,31 +14,37 @@ def get_relation_types_grouped_by_doi(related_dois):
11414 return res
11515
11616
117- def get_incoming_and_primary_attributes (doi_query , doi_url ):
17+ def parse_list (doi_list , parser ):
18+ return {d ["id" ]: parser (d ) for d in doi_list }
19+
20+
21+ def get_incoming_and_primary_attributes (doi_query , doi_url , parser ):
11822 # Get incoming links and primary doi
11923 doi_list = DoiSearcher (doi_query , doi_url ).search ()
120- doi_attributes = parse_list (doi_list )
24+ doi_attributes = parse_list (doi_list , parser )
12125 return doi_attributes
12226
12327
124- def get_outgoing_link_attributes (primary_doi , doi_url ):
28+ def get_outgoing_link_attributes (primary_doi , doi_url , parser ):
12529 relations_grouped_by_doi = get_relation_types_grouped_by_doi (
12630 primary_doi .get ("related_identifiers" , [])
12731 )
12832 # Get outgoing links
12933 outgoing_dois = relations_grouped_by_doi .keys ()
13034 outgoing_doi_list = DoiListSearcher (outgoing_dois , doi_url ).search ()
131- outgoing_doi_attributes = parse_list (outgoing_doi_list )
35+ outgoing_doi_attributes = parse_list (outgoing_doi_list , parser )
13236 return outgoing_doi_attributes
13337
13438
13539def get_full_corpus_doi_attributes (
136- doi_query , api_url = "https://api.stage.datacite.org/dois/"
40+ doi_query , parser , api_url = "https://api.stage.datacite.org/dois/"
13741):
138- doi_attributes = get_incoming_and_primary_attributes (doi_query , api_url )
42+ doi_attributes = get_incoming_and_primary_attributes (doi_query , api_url , parser )
13943 if doi_query in doi_attributes .keys ():
14044 primary_doi = doi_attributes .get (doi_query , {})
141- outgoing_doi_attributes = get_outgoing_link_attributes (primary_doi , api_url )
45+ outgoing_doi_attributes = get_outgoing_link_attributes (
46+ primary_doi , api_url , parser
47+ )
14248 else :
14349 outgoing_doi_attributes = {}
14450
@@ -164,7 +70,9 @@ def _get_query():
16470 DOI_API = "https://api.stage.datacite.org/dois/"
16571 DOI_API = "https://api.datacite.org/dois/"
16672 doi_query = _get_query ()
167- full_doi_attributes = get_full_corpus_doi_attributes (doi_query , DOI_API )
73+ full_doi_attributes = get_full_corpus_doi_attributes (
74+ doi_query , RelatedWorkReports .parser , DOI_API
75+ )
16876 report = RelatedWorkReports (full_doi_attributes )
16977
17078 graph = {"nodes" : report .aggregate_counts , "edges" : report .type_connection_report }
0 commit comments