Source code for item.model.dimensions

from collections.abc import Sequence
from functools import cache
from itertools import chain
from os.path import join
from typing import TYPE_CHECKING, Dict

import pandas as pd

from item.common import paths

if TYPE_CHECKING:
    from sdmx.model.common import Code

# Metadata on database dimensions
INFO: Dict[str, dict] = {}

# List of the index columns required to identify all data series
INDEX = [
    "model",
    "scenario",
    "region",
    "variable",
    "mode",
    "technology",
    "fuel",
    "unit",
]

# Constants, for e.g. select()
ALL = "All"
PAX: frozenset[str] = frozenset()
FREIGHT: frozenset[str] = frozenset()



[docs]
def check(A, out_file):
    """Compare a table of quantities, *A*, to the official list.

    The comparison is performed using a database 'outer' join; this preserves
    rows in either table that do not appear in the other.

    Results are written to *out_file*; the last two columns, 'unit_generated'
    and 'unit_official' are filled iff the quantity is present in the
    respective list.

    """
    fn = join(paths["data"], "model", "dimensions", "quantities.tsv")
    B = pd.read_table(fn, sep="\t", comment="#")
    cols = ["variable", "mode", "technology", "fuel"]
    merged = pd.merge(A, B, how="outer", on=cols, suffixes=("_generated", "_official"))
    N = merged.count().to_dict()
    print(
        "Excess generated quantities: {}\nMissing official quantities: {}".format(
            N["variable"] - N["unit_official"], N["variable"] - N["unit_generated"]
        )
    )
    merged.sort_values(by=cols).to_csv(out_file, sep="\t")



def load():
    from . import structure

    global PAX, FREIGHT

    # Retrieve codelists for each data dimension
    INFO["fuel"] = structure.get_cl_fuel()
    mode = INFO["mode"] = structure.get_cl_mode()
    INFO["technology"] = structure.get_cl_technology()
    INFO["variable"] = structure.get_cl_measure()

    # Sets of modes, for convenience
    all_modes, pax_modes, freight_modes = set(), set(), set()
    for m in mode:
        try:
            service = str(m.get_annotation(id="SERVICE").text)
        except KeyError:
            pass
        else:
            if service == "passenger":
                pax_modes.add(m.id)
            elif service == "freight":
                freight_modes.add(m.id)
        all_modes.add(m.id)

    PAX = frozenset(pax_modes)
    FREIGHT = frozenset(freight_modes)
    INFO.update(modes_all=frozenset(all_modes))


#: Exclusions for :func:`generate`.
EXCLUDE_MODE = {
    "ef_bc": {"Freight Rail and Air and Ship"},
    "intensity_service": {"Freight Rail and Air and Ship"},
    "tkm": {"Freight Rail and Air and Ship"},
    "ttw_bc": {"Freight Rail and Air and Ship"},
    "ttw_ch4": {"Freight Rail and Air and Ship"},
    "ttw_co2e": {"Freight Rail and Air and Ship"},
    "ttw_n2o": {"Freight Rail and Air and Ship"},
    "ttw_pm2.5": {"Freight Rail and Air and Ship", "International Shipping"},
    "vkt": {"Freight Rail and Air and Ship"},
    "wtt_co2e": {
        "Domestic Shipping",
        "Freight Rail and Air and Ship",
        "International Shipping",
    },
    "wtw_co2e": {
        "Domestic Shipping",
        "Freight Rail and Air and Ship",
        "International Shipping",
    },
}



[docs]
def generate():
    """Attempt to generate the reporting quantities from simple rules."""
    # Generate the list of quantities
    index = []

    @cache
    def _tf(technology: "Code") -> Sequence[tuple[str, str]]:
        """Return a sequence of valid (t, f) indices for `technology`."""
        result = []

        for f in technology.eval_annotation(id="FUEL") or INFO["fuel"]:
            result.append((technology.id, f))
        return tuple(result)

    @cache
    def _mtf(mode: "Code") -> Sequence[tuple[str, str, str]]:
        """Return a sequence of valid (m, t, f) indices for `mode`."""
        result: list[tuple[str, str, str]] = []

        for t in mode.eval_annotation(id="TECHNOLOGY") or INFO["technology"]:
            t_code = INFO["technology"][t]
            result.extend((mode.id, t, f) for (t, f) in _tf(t_code))

        return tuple(result)

    # Iterate through each measure concept, in order
    for measure in INFO["variable"]:
        # Retrieve information from annotations
        global_ = measure.eval_annotation(id="is-global")
        u = str(measure.get_annotation(id="UNIT_MEASURE").text)

        # Determine which modes are relevant for this measures
        modes = {m.id for m in INFO["mode"]}
        if not global_:
            # Some measures are relevant only for either passenger or freight modes
            try:
                service = str(measure.get_annotation(id="SERVICE").text)
            except KeyError:
                service = None

            if service == "passenger":
                modes = set(PAX)
            elif service == "freight":
                modes = set(FREIGHT)
            elif measure.id == "intensity_new":
                # A specific subset is used for this measure
                modes = {"2W", "Aviation", "Bus", "HDT", "LDV", "Passenger Rail"}

            # Further exclude
            modes -= EXCLUDE_MODE.get(measure.id, set())

        # Convert set of mode IDs to a list of codes
        m_codes = [INFO["mode"][m] for m in modes]

        # Add one entry to quantities for each allowable combination of dimensions
        index.extend(
            [measure.id, m, t, f, u] for (m, t, f) in chain(*map(_mtf, m_codes))
        )

    # Combine into a single table and return
    index = pd.DataFrame(
        index, columns=["variable", "mode", "technology", "fuel", "unit"]
    )
    return index




[docs]
def list_pairs(in_file, out_file):
    """Helper function for preparing the quantities list.

    Writes a file *out_file* with all unique combinations of variables between
    successive dimesions in *in_file*; that is, all combinations of:
    - variable & mode,
    - mode & technology, and
    - technology & fuel.

    """
    in_path = in_file
    qty = pd.read_table(in_path, sep="\t", comment="#")

    with open(out_file, "w") as f:
        for pair in [
            ["variable", "mode"],
            ["mode", "technology"],
            ["technology", "fuel"],
        ]:
            # Find unique combinations of values in the two columns; sort
            unique = sorted(qty.groupby(pair).groups.keys())
            # Write to file
            f.write(" → ".join(pair) + "\n\n" + "\n".join(map(str, unique)) + "\n\n")




[docs]
def load_template(version):
    """Load a data submission template for *version*."""
    qty = pd.read_csv(join(dir, "iTEM{}_template.csv".format(version)))
    qty["model"] = ""
    qty["scenario"] = ""
    qty["region"] = ""
    qty["2005"] = True

    # result = as_xarray(qty).sel(Year='2005').squeeze().drop(['model'])
    result = qty
    return result



load()