diff --git a/Snakefile b/Snakefile index e2b0b27c9..d639e0607 100644 --- a/Snakefile +++ b/Snakefile @@ -150,6 +150,7 @@ if config["enable"].get("retrieve_databundle", True): params: countries=config["countries"], tutorial=config["tutorial"], + hydrobasins_level=config["renewable"]["hydro"]["hydrobasins_level"], output: #expand(directory('{file}') if isdir('{file}') else '{file}', file=datafiles) expand("{file}", file=datafiles_retrivedatabundle(config)), directory("data/landcover"), diff --git a/config.default.yaml b/config.default.yaml index ba638576a..dd05a9fe2 100644 --- a/config.default.yaml +++ b/config.default.yaml @@ -292,9 +292,10 @@ renewable: extendable: true hydro: cutout: cutout-2013-era5 + hydrobasins_level: 6 resource: method: hydro - hydrobasins: data/hydrobasins/hybas_world_lev06_v1c.shp + hydrobasins: data/hydrobasins/hybas_world.shp flowspeed: 1.0 # m/s # weight_with_height: false # show_progress: true diff --git a/config.tutorial.yaml b/config.tutorial.yaml index 410d93683..6aba3e46c 100644 --- a/config.tutorial.yaml +++ b/config.tutorial.yaml @@ -290,9 +290,10 @@ renewable: extendable: true hydro: cutout: cutout-2013-era5-tutorial + hydrobasins_level: 4 resource: method: hydro - hydrobasins: data/hydrobasins/hybas_world_lev04_v1c.shp + hydrobasins: data/hydrobasins/hybas_world.shp flowspeed: 1.0 # m/s # weight_with_height: false # show_progress: true diff --git a/configs/bundle_config.yaml b/configs/bundle_config.yaml index 987b2c524..2fa1c74f9 100644 --- a/configs/bundle_config.yaml +++ b/configs/bundle_config.yaml @@ -36,7 +36,7 @@ databundles: category: data destination: "data" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_NGBJ.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_NGBJ.zip?download=1 gdrive: https://drive.google.com/file/d/1Vb1ISjhy7iwTTZYeezGd6S4nLt-EDGme/view?usp=drive_link output: - data/gebco/GEBCO_2021_TID.nc @@ -49,7 +49,7 @@ databundles: category: data destination: "data" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_BW.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_BW.zip?download=1 gdrive: https://drive.google.com/file/d/19IXvTD8gVSzgTInL85ta7QjaNI8ZPCCY/view?usp=drive_link output: - data/gebco/GEBCO_2021_TID.nc @@ -62,12 +62,26 @@ databundles: category: data destination: "data" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_data_MA.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_tutorial_MA.zip?download=1 gdrive: https://drive.google.com/file/d/1VGzE8ZJHAvAQ9X44QNSX4rH3QF7Yi37D/view?usp=drive_link output: - data/gebco/GEBCO_2021_TID.nc - data/copernicus/PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif + # load tutorial hydrobasins bundle for Africa only + bundle_tutorial_hydrobasins: + countries: [Africa] + tutorial: true + category: hydrobasins + destination: "data/hydrobasins" + urls: + hydrobasins: + base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/ + suffixes: ["af"] + unzip: true + output: + - data/hydrobasins/hybas_world.shp + # tutorial bundle specific for Nigeria and Benin only bundle_cutouts_tutorial_NGBJ: countries: [NG, BJ] @@ -75,7 +89,7 @@ databundles: category: cutouts destination: "cutouts" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_NGBJ.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_NGBJ.zip?download=1 gdrive: https://drive.google.com/file/d/1xnomHdXf_c5STrf7jtDiuRlN2zW0FSVC/view?usp=drive_link output: [cutouts/cutout-2013-era5-tutorial.nc] disable_by_opt: @@ -88,7 +102,7 @@ databundles: category: cutouts destination: "cutouts" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_BW.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_BW.zip?download=1 gdrive: https://drive.google.com/file/d/1DDQAtnIDM0FNC3vCldfHeH__IpTbyIJt/view?usp=drive_link output: [cutouts/cutout-2013-era5-tutorial.nc] disable_by_opt: @@ -101,7 +115,7 @@ databundles: category: cutouts destination: "cutouts" urls: - zenodo: https://sandbox.zenodo.org/record/1223907/files/tutorial_cutouts_MA.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/bundle_cutouts_tutorial_MA.zip?download=1 gdrive: https://drive.google.com/file/d/1j5v2f4E756jmDMa707QvdNJq3xM4bYUk/view?usp=drive_link output: [cutouts/cutout-2013-era5-tutorial.nc] disable_by_opt: @@ -125,12 +139,9 @@ databundles: category: common destination: "data" urls: - zenodo: https://sandbox.zenodo.org/record/1183583/files/tutorial_data_general.zip?download=1 + zenodo: https://sandbox.zenodo.org/records/3853/files/tutorial_data_general.zip?download=1 gdrive: https://drive.google.com/file/d/1nRLrs_kP0qVl-IHC4BFLjpoKa3HLk2Py/view output: - - data/costs.csv - - data/hydro_capacities.csv - - data/custom_powerplants.csv - data/eez/eez_v11.gpkg - data/ssp2-2.6/2030/era5_2013/Africa.nc - data/ssp2-2.6/2030/era5_2013/Asia.nc @@ -138,8 +149,20 @@ databundles: - data/ssp2-2.6/2030/era5_2013/NorthAmerica.nc - data/ssp2-2.6/2030/era5_2013/SouthAmerica.nc - data/ssp2-2.6/2030/era5_2013/Oceania.nc - - data/hydrobasins/hybas_world_lev04_v1c.shp - - data/hydrobasins/hybas_world_lev05_v1c.shp + + # global data for hydrobasins + bundle_hydrobasins: + countries: [Earth] + tutorial: false + category: hydrobasins + destination: "data/hydrobasins" + urls: + hydrobasins: + base_url: https://data.hydrosheds.org/file/HydroBASINS/standard/ + suffixes: ["af", "ar", "as", "au", "eu", "gr", "na", "sa", "si"] + unzip: true + output: + - data/hydrobasins/hybas_world.shp # data bundle containing the data of the data folder common to all regions of the world bundle_data_earth: diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 3f50f3920..121c8fd6d 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -20,6 +20,8 @@ E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_t * Improve retrieve_databundle to prioritize smallest databundles `PR #911 `__ +* Add functionality to load shapefiles for hydrobasins directly from the data source directly `PR #919 `__ + PyPSA-Earth 0.2.3 ================= diff --git a/scripts/_helpers.py b/scripts/_helpers.py index b3bb8e90f..d936c4c47 100644 --- a/scripts/_helpers.py +++ b/scripts/_helpers.py @@ -386,7 +386,9 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False): return costs -def progress_retrieve(url, file, data=None, disable_progress=False, roundto=1.0): +def progress_retrieve( + url, file, data=None, headers=None, disable_progress=False, roundto=1.0 +): """ Function to download data from a url with a progress bar progress in retrieving data. @@ -418,6 +420,11 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto): if data is not None: data = urllib.parse.urlencode(data).encode() + if headers: + opener = urllib.request.build_opener() + opener.addheaders = headers + urllib.request.install_opener(opener) + urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data) @@ -523,7 +530,7 @@ def make_accessable(*ios): return snakemake -def getContinent(code): +def getContinent(code, world_iso=read_osm_config("world_iso")): """ Returns continent names that contains list of iso-code countries. @@ -547,7 +554,6 @@ def getContinent(code): continent_list = [] code_set = set(code) - world_iso = read_osm_config("world_iso") for continent in world_iso.keys(): single_continent_set = set(world_iso[continent]) if code_set.intersection(single_continent_set): @@ -787,9 +793,10 @@ def filter_codes(c_list, iso_coding=True): full_codes_list = [] + world_iso, continent_regions = read_osm_config("world_iso", "continent_regions") + for value1 in input: codes_list = [] - world_iso, continent_regions = read_osm_config("world_iso", "continent_regions") # extract countries in world if value1 == "Earth": for continent in world_iso.keys(): diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py index c05586642..580ba20f7 100644 --- a/scripts/retrieve_databundle_light.py +++ b/scripts/retrieve_databundle_light.py @@ -80,11 +80,13 @@ - ``cutouts``: input data unzipped into the cutouts folder """ +import glob import logging import os import re from zipfile import ZipFile +import geopandas as gpd import pandas as pd import yaml from _helpers import ( @@ -95,6 +97,7 @@ sets_path_to_root, ) from google_drive_downloader import GoogleDriveDownloader as gdd +from tqdm import tqdm logger = create_logger(__name__) @@ -303,20 +306,24 @@ def download_and_unzip_protectedplanet( return True -def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False): +def download_and_unpack( + url, + file_path, + resource, + destination, + headers=None, + hot_run=True, + unzip=True, + disable_progress=False, +): """ - download_and_unzip_direct(config, rootpath, dest_path, hot_run=True, - disable_progress=False) + download_and_unpack( url, file_path, resource, destination, headers=None, + hot_run=True, unzip=True, disable_progress=False) - Function to download the data by category from a direct url with no processing. - If in the configuration file the unzip is specified True, then the downloaded data is unzipped. + A helper function to encapsulate retrieval and unzip Inputs ------ - config : Dict - Configuration data for the category to download - rootpath : str - Absolute path of the repository hot_run : Bool (default True) When true the data are downloaded When false, the workflow is run without downloading and unzipping @@ -327,24 +334,21 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F ------- True when download is successful, False otherwise """ - resource = config["category"] - url = config["urls"]["direct"] - - file_path = os.path.join(config["destination"], os.path.basename(url)) - if hot_run: if os.path.exists(file_path): os.remove(file_path) try: logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") - progress_retrieve(url, file_path, disable_progress=disable_progress) + progress_retrieve( + url, file_path, headers=headers, disable_progress=disable_progress + ) # if the file is a zipfile and unzip is enabled # then unzip it and remove the original file - if config.get("unzip", False): + if unzip: with ZipFile(file_path, "r") as zipfile: - zipfile.extractall(config["destination"]) + zipfile.extractall(destination) os.remove(file_path) logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.") @@ -355,6 +359,108 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F return True +def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False): + """ + download_and_unzip_direct(config, rootpath, dest_path, hot_run=True, + disable_progress=False) + + Function to download the data by category from a direct url with no processing. + If in the configuration file the unzip is specified True, then the downloaded data is unzipped. + + Inputs + ------ + config : Dict + Configuration data for the category to download + rootpath : str + Absolute path of the repository + hot_run : Bool (default True) + When true the data are downloaded + When false, the workflow is run without downloading and unzipping + disable_progress : Bool (default False) + When true the progress bar to download data is disabled + + Outputs + ------- + True when download is successful, False otherwise + """ + resource = config["category"] + destination = config["destination"] + url = config["urls"]["direct"] + + file_path = os.path.join(config["destination"], os.path.basename(url)) + + unzip = config.get("unzip", False) + + download_and_unpack( + url=url, + file_path=file_path, + resource=resource, + hot_run=hot_run, + unzip=unzip, + disable_progress=disable_progress, + ) + + +def download_and_unzip_hydrobasins( + config, rootpath, hot_run=True, disable_progress=False +): + """ + download_and_unzip_basins(config, rootpath, dest_path, hot_run=True, + disable_progress=False) + + Function to download and unzip the data for hydrobasins from HydroBASINS database + available via https://www.hydrosheds.org/products/hydrobasins + + We are using data from the HydroSHEDS version 1 database + which is © World Wildlife Fund, Inc. (2006-2022) and has been used herein under license. + WWF has not evaluated our data pipeline and therefore gives no warranty regarding its + accuracy, completeness, currency or suitability for any particular purpose. + Portions of the HydroSHEDS v1 database incorporate data which are the intellectual property + rights of © USGS (2006-2008), NASA (2000-2005), ESRI (1992-1998), CIAT (2004-2006), + UNEP-WCMC (1993), WWF (2004), Commonwealth of Australia (2007), and Her Royal Majesty + and the British Crown and are used under license. The HydroSHEDS v1 database and + more information are available at https://www.hydrosheds.org. + + Inputs + ------ + config : Dict + Configuration data for the category to download + rootpath : str + Absolute path of the repository + hot_run : Bool (default True) + When true the data are downloaded + When false, the workflow is run without downloading and unzipping + disable_progress : Bool (default False) + When true the progress bar to download data is disabled + + Outputs + ------- + True when download is successful, False otherwise + """ + resource = config["category"] + destination = config["destination"] + url_templ = config["urls"]["hydrobasins"]["base_url"] + suffix_list = config["urls"]["hydrobasins"]["suffixes"] + + level_code = snakemake.config["renewable"]["hydro"]["hydrobasins_level"] + level_code = "{:02d}".format(int(level_code)) + + for rg in suffix_list: + url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip" + file_path = os.path.join(config["destination"], os.path.basename(url)) + + download_and_unpack( + url=url, + file_path=file_path, + resource=resource, + destination=destination, + headers=[("User-agent", "Mozilla/5.0")], + hot_run=hot_run, + unzip=True, + disable_progress=disable_progress, + ) + + def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False): """ download_and_unzip_post(config, rootpath, dest_path, hot_run=True, @@ -395,7 +501,10 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal logger.info(f"Downloading resource '{resource}' from cloud '{url}'.") progress_retrieve( - url, file_path, data=postdata, disable_progress=disable_progress + url, + file_path, + data=postdata, + disable_progress=disable_progress, ) # if the file is a zipfile and unzip is enabled @@ -623,6 +732,26 @@ def datafiles_retrivedatabundle(config): return listoutputs +def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level): + basins_path = config_hydrobasin["destination"] + output_fl = config_hydrobasin["output"][0] + + files_to_merge = [ + "hybas_{0:s}_lev{1:02d}_v1c.shp".format(suffix, hydrobasins_level) + for suffix in config_hydrobasin["urls"]["hydrobasins"]["suffixes"] + ] + + gpdf_list = [None] * len(files_to_merge) + logger.info("Reading hydrobasins files \n\r") + for i, f_name in tqdm(enumerate(files_to_merge)): + gpdf_list[i] = gpd.read_file(os.path.join(basins_path, f_name)) + fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates( + subset="HYBAS_ID", ignore_index=True + ) + logger.info("Merging single files into:\n\t" + output_fl) + fl_merged.to_file(output_fl, driver="ESRI Shapefile") + + if __name__ == "__main__": if "snakemake" not in globals(): os.chdir(os.path.dirname(os.path.abspath(__file__))) @@ -687,6 +816,16 @@ def datafiles_retrivedatabundle(config): if not downloaded_bundle: logger.error(f"Bundle {b_name} cannot be downloaded") + hydrobasin_bundles = [ + b_name for b_name in bundles_to_download if "hydrobasins" in b_name + ] + if len(hydrobasin_bundles) > 0: + logger.info("Merging regional hydrobasins files into a global shapefile") + hydrobasins_level = snakemake.params["hydrobasins_level"] + merge_hydrobasins_shape( + config_bundles[hydrobasin_bundles[0]], hydrobasins_level + ) + logger.info( "Bundle successfully loaded and unzipped:\n\t" + "\n\t".join(bundles_to_download)