55Focus: Journal-level CC license adoption and temporal trends.
66Note: Articles do not contain license information in DOAJ API.
77
8- This script focuses on essential data for quantifying Creative Commons adoption:
8+ This script focuses on essential data for quantifying Creative Commons
9+ adoption:
910- Journal CC license counts by type
1011- Temporal trends (year-by-year adoption)
1112
6364
6465# File Paths
6566FILE_DOAJ_COUNT = shared .path_join (PATHS ["data_1-fetch" ], "doaj_1_count.csv" )
66- FILE_DOAJ_COUNTRY = shared .path_join (PATHS ["data_1-fetch" ], "doaj_3_count_by_country.csv" )
67- FILE_DOAJ_LANGUAGE = shared .path_join (PATHS ["data_1-fetch" ], "doaj_5_count_by_language.csv" )
67+ FILE_DOAJ_COUNTRY = shared .path_join (
68+ PATHS ["data_1-fetch" ], "doaj_3_count_by_country.csv"
69+ )
70+ FILE_DOAJ_LANGUAGE = shared .path_join (
71+ PATHS ["data_1-fetch" ], "doaj_5_count_by_language.csv"
72+ )
6873FILE_PROVENANCE = shared .path_join (
6974 PATHS ["data_1-fetch" ], "doaj_provenance.yaml"
7075)
7580# CSV Headers
7681HEADER_COUNT = ["TOOL_IDENTIFIER" , "COUNT" ]
7782HEADER_COUNTRY = ["TOOL_IDENTIFIER" , "COUNTRY_CODE" , "COUNTRY_NAME" , "COUNT" ]
78- HEADER_LANGUAGE = ["TOOL_IDENTIFIER" , "LANGUAGE_CODE" , "LANGUAGE_NAME" , "COUNT" ]
83+ HEADER_LANGUAGE = [
84+ "TOOL_IDENTIFIER" ,
85+ "LANGUAGE_CODE" ,
86+ "LANGUAGE_NAME" ,
87+ "COUNT" ,
88+ ]
7989HEADER_YEAR = ["TOOL_IDENTIFIER" , "YEAR" , "COUNT" ]
8090
8191# Runtime variables
@@ -165,13 +175,13 @@ def extract_license_types(license_info):
165175 """Extract all CC license types from DOAJ license information."""
166176 if not license_info :
167177 return []
168-
178+
169179 cc_licenses = []
170180 for lic in license_info :
171181 lic_type = lic .get ("type" , "" )
172182 if lic_type in CC_LICENSE_TYPES :
173183 cc_licenses .append (lic_type )
174-
184+
175185 return cc_licenses
176186
177187
@@ -183,7 +193,9 @@ def process_journals(session, args):
183193 country_counts = defaultdict (Counter )
184194 language_counts = defaultdict (Counter )
185195 year_counts = defaultdict (Counter )
186- processed_journals = set () # Track unique journals to avoid double counting
196+ processed_journals = (
197+ set ()
198+ ) # Track unique journals to avoid double counting
187199
188200 total_processed = 0
189201 page = 1
@@ -200,6 +212,10 @@ def process_journals(session, args):
200212 response .raise_for_status ()
201213 data = response .json ()
202214 except requests .HTTPError as e :
215+ # Handle 400 errors as end of data (DOAJ API behavior)
216+ if hasattr (e , "response" ) and e .response .status_code == 400 :
217+ LOGGER .info (f"Reached end of available data at page { page } " )
218+ break
203219 raise shared .QuantifyingException (f"HTTP Error: { e } " , 1 )
204220 except requests .RequestException as e :
205221 raise shared .QuantifyingException (f"Request Exception: { e } " , 1 )
@@ -260,7 +276,7 @@ def process_journals(session, args):
260276 if isinstance (publisher_info , dict ):
261277 country_code = publisher_info .get ("country" , "Unknown" )
262278 country_counts [license_type ][country_code ] += 1
263-
279+
264280 # Extract language information
265281 languages = bibjson .get ("language" , [])
266282 if languages :
@@ -332,12 +348,14 @@ def save_count_data(
332348 for lic , countries in country_counts .items ():
333349 for country_code , count in countries .items ():
334350 country_name = get_country_name (country_code )
335- writer .writerow ({
336- "TOOL_IDENTIFIER" : lic ,
337- "COUNTRY_CODE" : country_code ,
338- "COUNTRY_NAME" : country_name ,
339- "COUNT" : count ,
340- })
351+ writer .writerow (
352+ {
353+ "TOOL_IDENTIFIER" : lic ,
354+ "COUNTRY_CODE" : country_code ,
355+ "COUNTRY_NAME" : country_name ,
356+ "COUNT" : count ,
357+ }
358+ )
341359
342360 # Save language counts with pycountry names
343361 with open (
@@ -350,12 +368,14 @@ def save_count_data(
350368 for lic , languages in language_counts .items ():
351369 for lang_code , count in languages .items ():
352370 lang_name = get_language_name (lang_code )
353- writer .writerow ({
354- "TOOL_IDENTIFIER" : lic ,
355- "LANGUAGE_CODE" : lang_code ,
356- "LANGUAGE_NAME" : lang_name ,
357- "COUNT" : count ,
358- })
371+ writer .writerow (
372+ {
373+ "TOOL_IDENTIFIER" : lic ,
374+ "LANGUAGE_CODE" : lang_code ,
375+ "LANGUAGE_NAME" : lang_name ,
376+ "COUNT" : count ,
377+ }
378+ )
359379
360380 # Save year counts
361381 with open (
@@ -405,7 +425,10 @@ def query_doaj(args):
405425 "quarter" : QUARTER ,
406426 "script" : os .path .basename (__file__ ),
407427 "api_version" : "v4" ,
408- "note" : "Journal-level CC license data only - article counts not available via DOAJ API" ,
428+ "note" : (
429+ "Journal-level CC license data only - "
430+ "article counts not available via DOAJ API"
431+ ),
409432 }
410433
411434 try :
@@ -425,11 +448,14 @@ def query_doaj(args):
425448 )
426449
427450 LOGGER .info (f"Unique CC-licensed journals processed: { journals_processed } " )
428-
451+
429452 # Calculate total license availability instances
430453 total_license_instances = sum (license_counts .values ())
431454 LOGGER .info (f"Total CC license type instances: { total_license_instances } " )
432- LOGGER .info ("Note: Journals supporting multiple CC license types are counted once per license type" )
455+ LOGGER .info (
456+ "Note: Journals supporting multiple CC license types are "
457+ "counted once per license type"
458+ )
433459
434460
435461def main ():
0 commit comments