"""
Useful for:
* users learning pyremo
* building tutorials in the documentation.
* stolen from xarray!
"""
# code stolen from xarray, I am sorry!
import os
import pathlib
import xarray as xr
from xarray import open_dataset as _open_dataset
base_url = "https://github.com/remo-rcm/pyremo-data"
version = "main"
_default_cache_dir_name = "pyremo_tutorial_data"
external_urls = {} # type: dict
file_formats = {}
def _construct_cache_dir(path):
import pooch
if isinstance(path, os.PathLike):
path = os.fspath(path)
elif path is None:
path = pooch.os_cache(_default_cache_dir_name)
return path
def _check_netcdf_engine_installed(name):
version = file_formats.get(name)
if version == 3:
try:
import scipy # noqa
except ImportError:
try:
import netCDF4 # noqa
except ImportError:
raise ImportError(
f"opening tutorial dataset {name} requires either scipy or "
"netCDF4 to be installed."
)
if version == 4:
try:
import h5netcdf # noqa
except ImportError:
try:
import netCDF4 # noqa
except ImportError:
raise ImportError(
f"opening tutorial dataset {name} requires either h5netcdf "
"or netCDF4 to be installed."
)
# idea borrowed from Seaborn
[docs]
def open_dataset(
name="remo_EUR-44",
cache=True,
cache_dir=None,
*,
engine=None,
**kws,
):
"""
Open a dataset from the online repository (requires internet).
If a local copy is found then always use that to avoid network traffic.
Available datasets:
* ``"remo_EUR-44"``: remo example output on EUR-44 domain
* ``"remo_EUR-11_TEMP2"``: remo 2m temperature time series on EUR-11 domain
Parameters
----------
name : str
Name of the file containing the dataset.
e.g. 'remo_EUR-44'
cache_dir : path-like, optional
The directory in which to search for and write cached data.
cache : bool, optional
If True, then cache data locally for use on subsequent calls
**kws : dict, optional
Passed to xarray.open_dataset
See Also
--------
xarray.open_dataset
"""
try:
import pooch
except ImportError as e:
raise ImportError(
"tutorial.open_dataset depends on pooch to download and manage datasets."
" To proceed please install pooch."
) from e
logger = pooch.get_logger()
logger.setLevel("WARNING")
cache_dir = _construct_cache_dir(cache_dir)
if name in external_urls:
url = external_urls[name]
else:
path = pathlib.Path(name)
if not path.suffix:
# process the name
default_extension = ".nc"
if engine is None:
_check_netcdf_engine_installed(name)
path = path.with_suffix(default_extension)
elif path.suffix == ".grib":
if engine is None:
engine = "cfgrib"
url = f"{base_url}/raw/{version}/{path.name}"
# retrieve the file
filepath = pooch.retrieve(url=url, known_hash=None, path=cache_dir)
ds = _open_dataset(filepath, engine=engine, **kws)
if not cache:
ds = ds.load()
pathlib.Path(filepath).unlink()
return ds
[docs]
def load_dataset(*args, **kwargs):
"""
Open, load into memory, and close a dataset from the online repository
(requires internet).
See Also
--------
open_dataset
"""
with open_dataset(*args, **kwargs) as ds:
return ds.load()
def mpi_esm(**kwargs):
files = [
"ta_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600",
"ua_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600",
"va_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600",
"hus_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600",
"ps_6hrLev_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197901010600",
"orog_fx_MPI-ESM1-2-HR_historical_r1i1p1f1_gn",
"sftlf_fx_MPI-ESM1-2-HR_historical_r1i1p1f1_gn",
]
return xr.merge(
[load_dataset(f, **kwargs) for f in files], compat="override", join="override"
)
def mpi_esm_tos(**kwargs):
return open_dataset(
"tos_Oday_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_19781231-19790102", **kwargs
)