from collections import OrderedDict
from os.path import join
from typing import Dict

import pandas as pd
import yaml

from item.common import paths

# Metadata on database dimensions
INFO: Dict[str, dict] = {}

# List of the index columns required to identify all data series

# Constants, for e.g. select()
ALL = "All"
PAX = None

[docs]def check(A, out_file): """Compare a table of quantities, *A*, to the official list. The comparison is performed using a database 'outer' join; this preserves rows in either table that do not appear in the other. Results are written to *out_file*; the last two columns, 'unit_generated' and 'unit_official' are filled iff the quantity is present in the respective list. """ fn = join(paths["data"], "model", "dimensions", "quantities.tsv") B = pd.read_table(fn, sep="\t", comment="#") cols = ["variable", "mode", "technology", "fuel"] merged = pd.merge(A, B, how="outer", on=cols, suffixes=("_generated", "_official")) N = merged.count().to_dict() print( "Excess generated quantities: {}\nMissing official quantities: {}".format( N["variable"] - N["unit_official"], N["variable"] - N["unit_generated"] ) ) merged.sort_values(by=cols).to_csv(out_file, sep="\t")
def load(): global PAX, FREIGHT # Read the lists of allowable labels for each data dimension data = OrderedDict() path = join(paths["data"], "model", "dimensions") for k in ["variable", "mode", "technology", "fuel", "match"]: with open(join(path, "{}.yaml".format(k))) as f: data[k] = yaml.load(f, Loader=yaml.SafeLoader) variable, mode, tech, fuel, match = data.values() # Sets of modes, for convenience all_modes = frozenset(mode.keys()) pax_modes = frozenset( [m for m, info in mode.items() if info["type"] == "passenger"] ) PAX = pax_modes freight_modes = all_modes - pax_modes FREIGHT = freight_modes # Read allowable combinations of data dimensions # *mode_tech* and *tech_fuel* are now dictionaries mapping from labels on # one dimension to allowable labels on the next dimension # mode → technology mode_tech = match["mode_technology"] for t in mode_tech.values(): t = tuple(["All"] + list(t)) mode_tech["All"] = ["All"] tech_fuel = match["technology_fuel"] for f in tech_fuel.items(): f = tuple(["All"] + list(f)) tech_fuel["All"] = ["All"] INFO.update(data) INFO.update( {"modes_all": all_modes, "mode_tech": mode_tech, "tech_fuel": tech_fuel} )
[docs]def generate(): """Attempt to generate the reporting quantities from simple rules.""" # Generate the list of quantities index = [] # Iterate through each variable, in order for name, var_info in INFO["variable"].items(): # Determine which modes are reported for this variable if var_info.get("global", False): # First eight are global quantities—only 'All' modes modes = {} else: # Some variables are only for either passenger or freight modes var_type = var_info.get("type", None) if var_type == "passenger": modes = set(PAX) elif var_type == "freight": modes = set(FREIGHT) elif name == "intensity_new": # A specific subset is used for this variable modes = {"2W", "Aviation", "Bus", "HDT", "LDV", "Passenger Rail"} else: # Other variables are reported for all modes, minus the # exclusions below modes = set(INFO["modes_all"]) # Further exclusions from some variables if name in [ "ef_bc", "intensity_service", "tkm", "ttw_bc", "ttw_ch4", "ttw_co2e", "ttw_n2o", "ttw_pm2.5", "vkt", "wtt_co2e", "wtw_co2e", ]: modes -= {"Freight Rail and Air and Ship"} if name in ["ttw_pm2.5", "wtt_co2e", "wtw_co2e"]: modes -= {"International Shipping"} if name in ["wtt_co2e", "wtw_co2e"]: modes -= {"Domestic Shipping"} # Add one entry to quantities for each allowable combination of # dimensions for m in ["All"] + sorted(modes): for t in INFO["mode_tech"][m]: for f in INFO["tech_fuel"][t]: index.append([name, m, t, f, var_info["unit"]]) # Combine into a single table and return index = pd.DataFrame( index, columns=["variable", "mode", "technology", "fuel", "unit"] ) return index
[docs]def list_pairs(in_file, out_file): """Helper function for preparing the quantities list. Writes a file *out_file* with all unique combinations of variables between successive dimesions in *in_file*; that is, all combinations of: - variable & mode, - mode & technology, and - technology & fuel. """ in_path = in_file qty = pd.read_table(in_path, sep="\t", comment="#") with open(out_file, "w") as f: for pair in [ ["variable", "mode"], ["mode", "technology"], ["technology", "fuel"], ]: # Find unique combinations of values in the two columns; sort unique = sorted(qty.groupby(pair).groups.keys()) # Write to file f.write(" → ".join(pair) + "\n\n" + "\n".join(map(str, unique)) + "\n\n")
[docs]def load_template(version): """Load a data submission template for *version*.""" qty = pd.read_csv(join(dir, "iTEM{}_template.csv".format(version))) qty["model"] = "" qty["scenario"] = "" qty["region"] = "" qty["2005"] = True # result = as_xarray(qty).sel(Year='2005').squeeze().drop(['model']) result = qty return result