"""Data cleaning code and configuration for T002."""fromfunctoolsimportlru_cacheimportpandasaspdfromitem.utilimportdropna_logged#: iTEM data flow matching the data from this source.DATAFLOW="ACTIVITY"#: Dimensions and attributes which do not vary across this data set.COMMON_DIMS=dict(# Add the same source to all rows since all data comes from the same sourcesource="International Transport Forum",# Since all the data is associated to "Freight," the Service is "Freight"service="Freight",vehicle="Container",# The dataset does not provide any data on the following columns, so we# add the default value of "All" in both casesautomation="_T",fuel="_T",operator="_T",technology="_T",)#: Columns to drop from the raw data.COLUMNS=dict(drop=["COUNTRY","VARIABLE","YEAR","Unit Code","PowerCode Code","PowerCode","Reference Period Code","Reference Period","Flag Codes","Flags",],# Column containing country name for determining ISO 3166 alpha-3 codes and# iTEM regions. Commented, because this is the default value.# country_name='Country',)
[docs]defprocess(df):"""Process data set T002."""df=df.pipe(dropna_logged,"Value",["Country"])# Assign 'Mode', 'Variable', and 'Unit' valuesreturnpd.concat([df.drop(columns=["Variable","Unit"]),df["Variable"].apply(map_variable),df["Unit"].apply(map_unit),],axis=1,)