Source code for pyremo.tutorial

"""
Useful for:
* users learning pyremo
* building tutorials in the documentation.
* stolen from xarray!
"""

# code stolen from xarray, I am sorry!
import os
import pathlib

import xarray as xr
from xarray import open_dataset as _open_dataset

base_url = "https://github.com/remo-rcm/pyremo-data"
version = "main"

_default_cache_dir_name = "pyremo_tutorial_data"

external_urls = {}  # type: dict

file_formats = {}


def _construct_cache_dir(path):
    import pooch

    if isinstance(path, os.PathLike):
        path = os.fspath(path)
    elif path is None:
        path = pooch.os_cache(_default_cache_dir_name)

    return path


def _check_netcdf_engine_installed(name):
    version = file_formats.get(name)
    if version == 3:
        try:
            import scipy  # noqa
        except ImportError:
            try:
                import netCDF4  # noqa
            except ImportError:
                raise ImportError(
                    f"opening tutorial dataset {name} requires either scipy or "
                    "netCDF4 to be installed."
                )
    if version == 4:
        try:
            import h5netcdf  # noqa
        except ImportError:
            try:
                import netCDF4  # noqa
            except ImportError:
                raise ImportError(
                    f"opening tutorial dataset {name} requires either h5netcdf "
                    "or netCDF4 to be installed."
                )


# idea borrowed from Seaborn
[docs] def open_dataset( name="remo_EUR-44", cache=True, cache_dir=None, *, engine=None, **kws, ): """ Open a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Available datasets: * ``"remo_EUR-44"``: remo example output on EUR-44 domain * ``"remo_EUR-11_TEMP2"``: remo 2m temperature time series on EUR-11 domain Parameters ---------- name : str Name of the file containing the dataset. e.g. 'remo_EUR-44' cache_dir : path-like, optional The directory in which to search for and write cached data. cache : bool, optional If True, then cache data locally for use on subsequent calls **kws : dict, optional Passed to xarray.open_dataset See Also -------- xarray.open_dataset """ try: import pooch except ImportError as e: raise ImportError( "tutorial.open_dataset depends on pooch to download and manage datasets." " To proceed please install pooch." ) from e logger = pooch.get_logger() logger.setLevel("WARNING") cache_dir = _construct_cache_dir(cache_dir) if name in external_urls: url = external_urls[name] else: path = pathlib.Path(name) if not path.suffix: # process the name default_extension = ".nc" if engine is None: _check_netcdf_engine_installed(name) path = path.with_suffix(default_extension) elif path.suffix == ".grib": if engine is None: engine = "cfgrib" url = f"{base_url}/raw/{version}/{path.name}" # retrieve the file filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir) ds = _open_dataset(filepath, engine=engine, **kws) if not cache: ds = ds.load() pathlib.Path(filepath).unlink() return ds
[docs] def load_dataset(*args, **kwargs): """ Open, load into memory, and close a dataset from the online repository (requires internet). See Also -------- open_dataset """ with open_dataset(*args, **kwargs) as ds: return ds.load()
def mpi_esm(**kwargs): files = [ "ta_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600", "ua_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600", "va_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600", "hus_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600", "ps_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600", "orog_fx_MPI-ESM1-2-HR_historical_r1i1p1f1_gn", "sftlf_fx_MPI-ESM1-2-HR_historical_r1i1p1f1_gn", ] return xr.merge( [load_dataset(f, **kwargs) for f in files], compat="override", join="override" ) def mpi_esm_tos(**kwargs): return open_dataset( "tos_Oday_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_19781231-19790102", **kwargs )