Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
74a099c
Add simple name output script
Opsmithe Nov 8, 2025
6cec0a3
feat: Complete DOAJ integration with API v4 and country code generation
Opsmithe Nov 10, 2025
8c80845
Delete output_name.py
Opsmithe Nov 10, 2025
9f8df08
Make doaj_fetch.py executable
Opsmithe Nov 10, 2025
2986e45
Make generate_country_codes.py executable
Opsmithe Nov 10, 2025
521b158
Move file path constants to top-level before function definitions
Opsmithe Nov 11, 2025
1b5312a
Organize constants alphabetically within logical groups
Opsmithe Nov 11, 2025
cd0d3f5
Move subprocess import to top-level imports section
Opsmithe Nov 11, 2025
29856ee
Replace 'f' with 'file_object' for descriptive variable naming
Opsmithe Nov 11, 2025
33fa723
Replace 'fh' with 'file_object' for descriptive variable naming
Opsmithe Nov 11, 2025
ae20f24
Fix static analysis issues and code formatting
Opsmithe Nov 11, 2025
3d75671
Fix duplicate counting for journals with multiple CC license types
Opsmithe Nov 15, 2025
37e3855
Remove article counting logic due to DOAJ API limitations
Opsmithe Nov 15, 2025
8c7bea5
Add DOAJ API documentation and technical details to sources.md
Opsmithe Nov 24, 2025
1cdf721
Add country and language data collection using pycountry library
Opsmithe Nov 24, 2025
f40dbf4
Update exception handling to use structured QuantifyingException appr…
Opsmithe Nov 24, 2025
daedef4
Fix static analysis issues and handle 400 errors as end-of-data
Opsmithe Nov 24, 2025
f44ea66
Order DOAJ API documentation links alphabetically
Opsmithe Nov 24, 2025
85f6faf
Remove generate_country_codes.py - using pycountry library instead
Opsmithe Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
286 changes: 286 additions & 0 deletions dev/generate_country_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#!/usr/bin/env python
"""
Generate ISO 3166-1 alpha-2 country codes YAML file for DOAJ fetch script.
"""
import os
import sys
import yaml

# Add parent directory so shared can be imported
sys.path.append(os.path.join(os.path.dirname(__file__), "..", "scripts"))
import shared

# ISO 3166-1 alpha-2 country codes (official list)
COUNTRIES = [
{"code": "AD", "name": "Andorra"},
{"code": "AE", "name": "United Arab Emirates"},
{"code": "AF", "name": "Afghanistan"},
{"code": "AG", "name": "Antigua and Barbuda"},
{"code": "AI", "name": "Anguilla"},
{"code": "AL", "name": "Albania"},
{"code": "AM", "name": "Armenia"},
{"code": "AO", "name": "Angola"},
{"code": "AQ", "name": "Antarctica"},
{"code": "AR", "name": "Argentina"},
{"code": "AS", "name": "American Samoa"},
{"code": "AT", "name": "Austria"},
{"code": "AU", "name": "Australia"},
{"code": "AW", "name": "Aruba"},
{"code": "AX", "name": "Åland Islands"},
{"code": "AZ", "name": "Azerbaijan"},
{"code": "BA", "name": "Bosnia and Herzegovina"},
{"code": "BB", "name": "Barbados"},
{"code": "BD", "name": "Bangladesh"},
{"code": "BE", "name": "Belgium"},
{"code": "BF", "name": "Burkina Faso"},
{"code": "BG", "name": "Bulgaria"},
{"code": "BH", "name": "Bahrain"},
{"code": "BI", "name": "Burundi"},
{"code": "BJ", "name": "Benin"},
{"code": "BL", "name": "Saint Barthélemy"},
{"code": "BM", "name": "Bermuda"},
{"code": "BN", "name": "Brunei"},
{"code": "BO", "name": "Bolivia"},
{"code": "BQ", "name": "Caribbean Netherlands"},
{"code": "BR", "name": "Brazil"},
{"code": "BS", "name": "Bahamas"},
{"code": "BT", "name": "Bhutan"},
{"code": "BV", "name": "Bouvet Island"},
{"code": "BW", "name": "Botswana"},
{"code": "BY", "name": "Belarus"},
{"code": "BZ", "name": "Belize"},
{"code": "CA", "name": "Canada"},
{"code": "CC", "name": "Cocos Islands"},
{"code": "CD", "name": "Democratic Republic of the Congo"},
{"code": "CF", "name": "Central African Republic"},
{"code": "CG", "name": "Republic of the Congo"},
{"code": "CH", "name": "Switzerland"},
{"code": "CI", "name": "Côte d'Ivoire"},
{"code": "CK", "name": "Cook Islands"},
{"code": "CL", "name": "Chile"},
{"code": "CM", "name": "Cameroon"},
{"code": "CN", "name": "China"},
{"code": "CO", "name": "Colombia"},
{"code": "CR", "name": "Costa Rica"},
{"code": "CU", "name": "Cuba"},
{"code": "CV", "name": "Cape Verde"},
{"code": "CW", "name": "Curaçao"},
{"code": "CX", "name": "Christmas Island"},
{"code": "CY", "name": "Cyprus"},
{"code": "CZ", "name": "Czech Republic"},
{"code": "DE", "name": "Germany"},
{"code": "DJ", "name": "Djibouti"},
{"code": "DK", "name": "Denmark"},
{"code": "DM", "name": "Dominica"},
{"code": "DO", "name": "Dominican Republic"},
{"code": "DZ", "name": "Algeria"},
{"code": "EC", "name": "Ecuador"},
{"code": "EE", "name": "Estonia"},
{"code": "EG", "name": "Egypt"},
{"code": "EH", "name": "Western Sahara"},
{"code": "ER", "name": "Eritrea"},
{"code": "ES", "name": "Spain"},
{"code": "ET", "name": "Ethiopia"},
{"code": "FI", "name": "Finland"},
{"code": "FJ", "name": "Fiji"},
{"code": "FK", "name": "Falkland Islands"},
{"code": "FM", "name": "Micronesia"},
{"code": "FO", "name": "Faroe Islands"},
{"code": "FR", "name": "France"},
{"code": "GA", "name": "Gabon"},
{"code": "GB", "name": "United Kingdom"},
{"code": "GD", "name": "Grenada"},
{"code": "GE", "name": "Georgia"},
{"code": "GF", "name": "French Guiana"},
{"code": "GG", "name": "Guernsey"},
{"code": "GH", "name": "Ghana"},
{"code": "GI", "name": "Gibraltar"},
{"code": "GL", "name": "Greenland"},
{"code": "GM", "name": "Gambia"},
{"code": "GN", "name": "Guinea"},
{"code": "GP", "name": "Guadeloupe"},
{"code": "GQ", "name": "Equatorial Guinea"},
{"code": "GR", "name": "Greece"},
{"code": "GS", "name": "South Georgia"},
{"code": "GT", "name": "Guatemala"},
{"code": "GU", "name": "Guam"},
{"code": "GW", "name": "Guinea-Bissau"},
{"code": "GY", "name": "Guyana"},
{"code": "HK", "name": "Hong Kong"},
{"code": "HM", "name": "Heard Island"},
{"code": "HN", "name": "Honduras"},
{"code": "HR", "name": "Croatia"},
{"code": "HT", "name": "Haiti"},
{"code": "HU", "name": "Hungary"},
{"code": "ID", "name": "Indonesia"},
{"code": "IE", "name": "Ireland"},
{"code": "IL", "name": "Israel"},
{"code": "IM", "name": "Isle of Man"},
{"code": "IN", "name": "India"},
{"code": "IO", "name": "British Indian Ocean Territory"},
{"code": "IQ", "name": "Iraq"},
{"code": "IR", "name": "Iran"},
{"code": "IS", "name": "Iceland"},
{"code": "IT", "name": "Italy"},
{"code": "JE", "name": "Jersey"},
{"code": "JM", "name": "Jamaica"},
{"code": "JO", "name": "Jordan"},
{"code": "JP", "name": "Japan"},
{"code": "KE", "name": "Kenya"},
{"code": "KG", "name": "Kyrgyzstan"},
{"code": "KH", "name": "Cambodia"},
{"code": "KI", "name": "Kiribati"},
{"code": "KM", "name": "Comoros"},
{"code": "KN", "name": "Saint Kitts and Nevis"},
{"code": "KP", "name": "North Korea"},
{"code": "KR", "name": "South Korea"},
{"code": "KW", "name": "Kuwait"},
{"code": "KY", "name": "Cayman Islands"},
{"code": "KZ", "name": "Kazakhstan"},
{"code": "LA", "name": "Laos"},
{"code": "LB", "name": "Lebanon"},
{"code": "LC", "name": "Saint Lucia"},
{"code": "LI", "name": "Liechtenstein"},
{"code": "LK", "name": "Sri Lanka"},
{"code": "LR", "name": "Liberia"},
{"code": "LS", "name": "Lesotho"},
{"code": "LT", "name": "Lithuania"},
{"code": "LU", "name": "Luxembourg"},
{"code": "LV", "name": "Latvia"},
{"code": "LY", "name": "Libya"},
{"code": "MA", "name": "Morocco"},
{"code": "MC", "name": "Monaco"},
{"code": "MD", "name": "Moldova"},
{"code": "ME", "name": "Montenegro"},
{"code": "MF", "name": "Saint Martin"},
{"code": "MG", "name": "Madagascar"},
{"code": "MH", "name": "Marshall Islands"},
{"code": "MK", "name": "North Macedonia"},
{"code": "ML", "name": "Mali"},
{"code": "MM", "name": "Myanmar"},
{"code": "MN", "name": "Mongolia"},
{"code": "MO", "name": "Macao"},
{"code": "MP", "name": "Northern Mariana Islands"},
{"code": "MQ", "name": "Martinique"},
{"code": "MR", "name": "Mauritania"},
{"code": "MS", "name": "Montserrat"},
{"code": "MT", "name": "Malta"},
{"code": "MU", "name": "Mauritius"},
{"code": "MV", "name": "Maldives"},
{"code": "MW", "name": "Malawi"},
{"code": "MX", "name": "Mexico"},
{"code": "MY", "name": "Malaysia"},
{"code": "MZ", "name": "Mozambique"},
{"code": "NA", "name": "Namibia"},
{"code": "NC", "name": "New Caledonia"},
{"code": "NE", "name": "Niger"},
{"code": "NF", "name": "Norfolk Island"},
{"code": "NG", "name": "Nigeria"},
{"code": "NI", "name": "Nicaragua"},
{"code": "NL", "name": "Netherlands"},
{"code": "NO", "name": "Norway"},
{"code": "NP", "name": "Nepal"},
{"code": "NR", "name": "Nauru"},
{"code": "NU", "name": "Niue"},
{"code": "NZ", "name": "New Zealand"},
{"code": "OM", "name": "Oman"},
{"code": "PA", "name": "Panama"},
{"code": "PE", "name": "Peru"},
{"code": "PF", "name": "French Polynesia"},
{"code": "PG", "name": "Papua New Guinea"},
{"code": "PH", "name": "Philippines"},
{"code": "PK", "name": "Pakistan"},
{"code": "PL", "name": "Poland"},
{"code": "PM", "name": "Saint Pierre and Miquelon"},
{"code": "PN", "name": "Pitcairn Islands"},
{"code": "PR", "name": "Puerto Rico"},
{"code": "PS", "name": "Palestine"},
{"code": "PT", "name": "Portugal"},
{"code": "PW", "name": "Palau"},
{"code": "PY", "name": "Paraguay"},
{"code": "QA", "name": "Qatar"},
{"code": "RE", "name": "Réunion"},
{"code": "RO", "name": "Romania"},
{"code": "RS", "name": "Serbia"},
{"code": "RU", "name": "Russia"},
{"code": "RW", "name": "Rwanda"},
{"code": "SA", "name": "Saudi Arabia"},
{"code": "SB", "name": "Solomon Islands"},
{"code": "SC", "name": "Seychelles"},
{"code": "SD", "name": "Sudan"},
{"code": "SE", "name": "Sweden"},
{"code": "SG", "name": "Singapore"},
{"code": "SH", "name": "Saint Helena"},
{"code": "SI", "name": "Slovenia"},
{"code": "SJ", "name": "Svalbard and Jan Mayen"},
{"code": "SK", "name": "Slovakia"},
{"code": "SL", "name": "Sierra Leone"},
{"code": "SM", "name": "San Marino"},
{"code": "SN", "name": "Senegal"},
{"code": "SO", "name": "Somalia"},
{"code": "SR", "name": "Suriname"},
{"code": "SS", "name": "South Sudan"},
{"code": "ST", "name": "São Tomé and Príncipe"},
{"code": "SV", "name": "El Salvador"},
{"code": "SX", "name": "Sint Maarten"},
{"code": "SY", "name": "Syria"},
{"code": "SZ", "name": "Eswatini"},
{"code": "TC", "name": "Turks and Caicos Islands"},
{"code": "TD", "name": "Chad"},
{"code": "TF", "name": "French Southern Territories"},
{"code": "TG", "name": "Togo"},
{"code": "TH", "name": "Thailand"},
{"code": "TJ", "name": "Tajikistan"},
{"code": "TK", "name": "Tokelau"},
{"code": "TL", "name": "Timor-Leste"},
{"code": "TM", "name": "Turkmenistan"},
{"code": "TN", "name": "Tunisia"},
{"code": "TO", "name": "Tonga"},
{"code": "TR", "name": "Turkey"},
{"code": "TT", "name": "Trinidad and Tobago"},
{"code": "TV", "name": "Tuvalu"},
{"code": "TW", "name": "Taiwan"},
{"code": "TZ", "name": "Tanzania"},
{"code": "UA", "name": "Ukraine"},
{"code": "UG", "name": "Uganda"},
{"code": "UM", "name": "U.S. Minor Outlying Islands"},
{"code": "US", "name": "United States"},
{"code": "UY", "name": "Uruguay"},
{"code": "UZ", "name": "Uzbekistan"},
{"code": "VA", "name": "Vatican City"},
{"code": "VC", "name": "Saint Vincent and the Grenadines"},
{"code": "VE", "name": "Venezuela"},
{"code": "VG", "name": "British Virgin Islands"},
{"code": "VI", "name": "U.S. Virgin Islands"},
{"code": "VN", "name": "Vietnam"},
{"code": "VU", "name": "Vanuatu"},
{"code": "WF", "name": "Wallis and Futuna"},
{"code": "WS", "name": "Samoa"},
{"code": "YE", "name": "Yemen"},
{"code": "YT", "name": "Mayotte"},
{"code": "ZA", "name": "South Africa"},
{"code": "ZM", "name": "Zambia"},
{"code": "ZW", "name": "Zimbabwe"},
]


def main():
"""Generate ISO country codes YAML file."""
repo_path = shared.path_join(os.path.dirname(__file__), "..")
output_file = shared.path_join(repo_path, "data", "iso_country_codes.yaml")

header = [
"# ISO 3166-1 alpha-2 country codes to country names mapping",
"# Used by DOAJ API for publisher country identification",
"# Generated programmatically by dev/generate_country_codes.py",
]

with open(output_file, "w", encoding="utf-8") as f:
f.write("\n".join(header) + "\n")
yaml.dump(COUNTRIES, f, default_flow_style=False, allow_unicode=True)

print(f"Generated {output_file} with {len(COUNTRIES)} country codes")


if __name__ == "__main__":
main()
Loading