Preparation of CMIP6 GHG forcing data

We will have a look at the greenhouse gas concentrations (ghg) that are used for REMO’s radiation code to force different emissions scenarios according to CMIP5 and CMIP6. We define some helper functions here to look at the origin REMO ghg tables for CMIP5 which can be found in the DKRZ data pool.

CMIP5

[1]:

import glob
import re
from os import path as op

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr


def read_remo_ghg(fname):
    return pd.read_csv(
        fname,
        skiprows=4,
        header=None,
        delim_whitespace=True,
        names=["time", "CO2", "CH4", "N2O", "CFC_11", "CFC_12"],
    )


def get_remo_data(files):
    return {re.findall("rcp[0-9]+", op.basename(f))[0]: read_remo_ghg(f) for f in files}


def get_echam_data(files):
    return {
        re.findall("ssp[0-9]+", op.basename(f))[0]: xr.open_dataset(f) for f in files
    }


def concat(dsets):
    concat_dim = xr.DataArray(list(dsets.keys()), dims="scenario")
    ds = xr.concat(dsets.values(), dim=concat_dim, join="override").squeeze(drop=True)
    ds["time"] = ds.time.astype(int)
    return ds.drop("time_bnds")


def remo_cmip5_df(df_dict):
    for scen, df in df_dict.items():
        df["scenario"] = scen
    return pd.concat(df_dict.values())


def plot_species(ds):
    """plot all species and scenarios"""
    import matplotlib.pyplot as plt

    fig = plt.figure(figsize=(10, 15))
    axes = []
    axes.append(plt.subplot2grid((4, 2), (0, 0), colspan=2, rowspan=2))
    axes.append(plt.subplot2grid((4, 2), (2, 0), colspan=1, rowspan=1))
    axes.append(plt.subplot2grid((4, 2), (2, 1), colspan=1, rowspan=1))
    axes.append(plt.subplot2grid((4, 2), (3, 0), colspan=1, rowspan=1))
    axes.append(plt.subplot2grid((4, 2), (3, 1), colspan=1, rowspan=1))

    for i, var in enumerate(ds.data_vars):
        ds[var].plot(ax=fig.axes[i], hue="scenario")
    plt.tight_layout()

[2]:

remo_ghg = glob.glob("/pool/data/remo/GHG_*_1850-2101.txt")
remo_ghg.sort()
remo_cmip5 = get_remo_data(remo_ghg)
remo_cmip5_ds = remo_cmip5_df(remo_cmip5).set_index(["time", "scenario"]).to_xarray()
plot_species(remo_cmip5_ds.sel(time=slice("1960", "2100")))

Those are the CMIP5 rcp scenarios, that have been used for most of the recent REMO runs that were driven by CMIP5 datasets.

input4MIPs

For CMIP6, we will use the data from input4MIPs where ESGF provides forcing datasets for CMIP6. At DKRZ we find:

[3]:

scenarios = [
    op.basename(f)
    for f in glob.glob(
        "/work/kd0956/INPUT4MIPS/data/input4MIPs/CMIP6/ScenarioMIP/UoM/*"
    )
]
scenarios

[3]:

['UoM-REMIND-MAGPIE-ssp585-1-2-1',
 'UoM-GCAM4-ssp434-1-2-1',
 'UoM-REMIND-MAGPIE-ssp534-over-1-2-0',
 'UoM-IMAGE-ssp119-1-2-1',
 'UoM-GCAM4-ssp460-1-2-0',
 'UoM-GCAM4-ssp460-1-2-1',
 'UoM-GCAM4-ssp434-1-2-0',
 'UoM-IMAGE-ssp126-1-2-1',
 'UoM-MESSAGE-GLOBIOM-ssp245-1-2-0',
 'UoM-AIM-ssp370-1-2-0',
 'UoM-IMAGE-ssp126-1-2-0',
 'UoM-REMIND-MAGPIE-ssp585-1-2-0',
 'UoM-IMAGE-ssp119-1-2-0',
 'UoM-MESSAGE-GLOBIOM-ssp245-1-2-1',
 'UoM-REMIND-MAGPIE-ssp534-over-1-2-1',
 'UoM-AIM-ssp370-1-2-1',
 'UoM-ssp126-1-1-0']

For REMO, we requires the following species and experiment ids:

[4]:

mapping = {
    "mole_fraction_of_carbon_dioxide_in_air": "CO2",
    "mole_fraction_of_methane_in_air": "CH4",
    "mole_fraction_of_nitrous_oxide_in_air": "N2O",
    "mole_fraction_of_cfc11_in_air": "CFC_11",
    "mole_fraction_of_cfc12_in_air": "CFC_12",
}

scenarios = {
    "UoM-IMAGE-ssp119-1-2-0": "ssp119",
    "UoM-IMAGE-ssp126-1-2-0": "ssp126",
    "UoM-MESSAGE-GLOBIOM-ssp245-1-2-1": "ssp245",
    "UoM-AIM-ssp370-1-2-1": "ssp370",
    "UoM-REMIND-MAGPIE-ssp585-1-2-1": "ssp585",
}

Let’s define some helpful functions to access the different scenario and species datasets at DKRZ:

[5]:

def get_url(source_id, variable_id, activity_id):
    return f"/work/kd0956/INPUT4MIPS/data/input4MIPs/CMIP6/{activity_id}/UoM/{source_id}/atmos/yr/{variable_id}/gr1-GMNHSH/*/*"


def create_historical_dataset(mapping, sector=0):
    """ """
    # decode_times = False, since calendar is no cf standard.
    ds = create_dataset(
        "UoM-CMIP-1-2-0", mapping, "CMIP", sector=sector, decode_times=False
    )
    ds["time"] = np.arange(ds.dims["time"])

    return ds


def create_dataset(source_id, mapping, activity_id, sector=0, **kwargs):
    mapp = mapping
    if source_id == "UoM-CMIP-1-2-0":
        # For some reason, here the variables names are written with "minus"
        # instead of underscore(?), weired... we replace underscore with minus then...
        mapp = {k.replace("_", "-"): v for k, v in mapp.items()}
    urls = []
    for key in mapp.keys():
        url = get_url(source_id, key, activity_id)
        paths = glob.glob(url)
        if len(paths) > 1:
            raise Exception("no unique selection")
        elif len(paths) == 0:
            print(url)
            raise Exception("no files found")
        urls.append(paths[0])
    ds = (
        xr.open_mfdataset(urls, **kwargs)
        .isel(sector=0, drop=True)
        .drop(["time_bnds", "sector_bnds"])
    )
    for var in ds.data_vars:
        ds[var].attrs["long_name"] = var
    return ds.rename(mapping)


def all_scenarios(mapping, scenarios):
    dsets = {
        s: create_dataset(key, mapping, "ScenarioMIP") for key, s in scenarios.items()
    }
    dim = xr.DataArray(list(dsets.keys()), dims="scenario")
    return xr.concat(list(dsets.values()), dim=dim)

[6]:

hist = create_historical_dataset(mapping)
scens = all_scenarios(mapping, scenarios)
scens["time"] = scens.time.dt.year

We create one big datasets containing all species and scenarios to get an overview. Then we can have a look.

[7]:

ds = xr.concat([hist, scens], dim="time")
plot_species(ds.sel(time=slice("1960", "2100")))

write the tables to REMO formatted txt input files:

[8]:

def write_table(ds, scen, start=1850, end=2101):
    data = ds.sel(scenario=scen)  # .squeeze(drop=True)
    df = (
        data.rename(time="year")
        .sel(year=slice(start, end))
        .drop("scenario")
        .to_dataframe()
        .reset_index()[["year", "CO2", "CH4", "N2O", "CFC_11", "CFC_12"]]
    )
    data = df.to_string(
        index=False, header=False, float_format="%.2f", col_space=8, justify="left"
    )
    header = (
        f"{scen}\n"
        "time     CO2       CH4     N2O        CFC_11   CFC_12\n"
        "year     ppmv      ppbv    ppbv       pptv     pptv\n\n"
    )
    ref = "\n\nreference: https://pyremo.readthedocs.io/en/stable/ghg-forcing.html"
    txt = header + data + ref
    filename = f"GHG_{scen}_{start}-{end}.txt"
    with open(filename, "w") as f:
        f.write(txt)
    return filename


[write_table(ds, scen) for scen in ds.scenario.values]

[8]:

['GHG_ssp119_1850-2101.txt',
 'GHG_ssp126_1850-2101.txt',
 'GHG_ssp245_1850-2101.txt',
 'GHG_ssp370_1850-2101.txt',
 'GHG_ssp585_1850-2101.txt']