#!/usr/bin/env python3 # # Create deprecated iso-codes XML from JSON # # Copyright © 2016 Dr. Tobias Quathamer # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import json import sys # Get the current ISO code domain, the path to the JSON data dir, and the XML output file if len(sys.argv) != 4: sys.exit("Please provide the domain, the path to the JSON data dir, and the XML output file.") domain = sys.argv[1] datapath = sys.argv[2] xml_file = sys.argv[3] # # Define the headers of the XML files # headers = { "639": """ ]> """, "639-3": """ ]> """, "639-5": """ ]> """, "3166": """ ]> """, "3166-2": """ ]> """, "15924": """ ]> """, "4217": """ ]> """, } def get_iso_entries(standard): """ Return all entries from the given standard """ with open(datapath + "/iso_" + standard + ".json", encoding="utf-8") as input_file: iso = json.load(input_file) return iso[standard] # Create the output file with open(xml_file, "w", encoding="utf-8") as outfile: # # Handle ISO 639-2 # if domain == "iso_639-2": outfile.write(headers["639"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("639-2"): outfile.write("\t\n") outfile.write("\n") # # Handle ISO 639-3 # elif domain == "iso_639-3": outfile.write(headers["639-3"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("639-3"): outfile.write("\t\n") outfile.write("\n") # # Handle ISO 639-5 # elif domain == "iso_639-5": outfile.write(headers["639-5"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("639-5"): outfile.write("\t\n") outfile.write("\n") # # Handle ISO 3166 # elif domain == "iso_3166-1": outfile.write(headers["3166"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("3166-1"): outfile.write("\t\n") for entry in get_iso_entries("3166-3"): outfile.write("\t\n") outfile.write("\n") # # Handle ISO 3166-2 # elif domain == "iso_3166-2": outfile.write(headers["3166-2"]) outfile.write("\n") outfile.write("\n") last_country_code = "" subsets = {} for entry in get_iso_entries("3166-2"): country_code = entry["code"].split("-")[0] # Initialize for every new country if last_country_code != country_code: # Write out if subsets are filled if len(subsets) > 0: outfile.write("\n") for subset in sorted(subsets): outfile.write("\n") for item in subsets[subset]: outfile.write("\t\n") outfile.write("\n") outfile.write("\n") last_country_code = country_code subsets = {} # Group by subset types if entry["type"] not in subsets: subsets[entry["type"]] = [entry] else: subsets[entry["type"]].append(entry) outfile.write("\n") # # Handle ISO 15924 # elif domain == "iso_15924": outfile.write(headers["15924"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("15924"): outfile.write("\t\n") outfile.write("\n") # # Handle ISO 4217 # elif domain == "iso_4217": outfile.write(headers["4217"]) outfile.write("\n") outfile.write("\n") for entry in get_iso_entries("4217"): outfile.write("\t\n") # Insert the obsolete historic entries, which are no # longer included in the JSON data files. outfile.write(""" """) outfile.write("\n")