Skip to content

Commit

Permalink
Merge branch 'develop' into Docker-Stack-Testing
Browse files Browse the repository at this point in the history
  • Loading branch information
allgandalf authored Jun 15, 2023
2 parents 5097088 + c6a4d2d commit e8cc5f7
Show file tree
Hide file tree
Showing 10 changed files with 299 additions and 126 deletions.
1 change: 1 addition & 0 deletions docker/depends/pecan.depends.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ remotes::install_github(c(
wanted <- c(
'abind',
'amerifluxr',
'arrow',
'assertthat',
'BayesianTools',
'BioCro',
Expand Down
1 change: 1 addition & 0 deletions modules/data.atmosphere/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Depends:
Imports:
abind (>= 1.4.5),
amerifluxr,
arrow,
curl,
data.table,
dplyr,
Expand Down
1 change: 1 addition & 0 deletions modules/data.atmosphere/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export(metgapfill.NOAA_GEFS)
export(model.train)
export(nc.merge)
export(noaa_grid_download)
export(noaa_stage2)
export(par2ppfd)
export(pecan_standard_met_table)
export(permute.nc)
Expand Down
111 changes: 111 additions & 0 deletions modules/data.atmosphere/R/download_noaa_gefs_efi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' download_NOAA_GEFS_EFI
#'
#' @param start_date start date for met forecast
#' @param sitename NEON site name
#' @param outfolder filepath to save ensemble member .nc files
#' @param site.lat site lat
#' @param site.lon site lon
#'
#' @return message confirming download complete and location of .nc files
#'
#'
#' @author Alexis Helgeson
#'
download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, site.lon){
#using the stage2 fcn mean that the met as already been downscaled and gapfilled to 1 hr intervals
met = PEcAn.data.atmosphere::noaa_stage2(cycle = 0,
version = "v12",
endpoint = "data.ecoforecast.org",
verbose = TRUE,
start_date = start_date)

weather = met %>%
dplyr::filter(.data$reference_datetime == as.POSIXct(start_date,tz="UTC"), sitename == sitename) %>%
dplyr::collect() %>%
dplyr::select(.data$sitename, .data$prediction, .data$variable, .data$horizon, .data$parameter, .data$datetime)

PEcAn.logger::logger.info("Met Aquired for", sitename, "on", as.character(start_date))
#grab/calculate timestep, this might not be necessary b/c of the datetime column?
forecast_date = start_date
cycle = 0
hours_char <- unique(weather$horizon)
forecast_times <- lubridate::as_datetime(forecast_date) + lubridate::hours(as.numeric(cycle)) + lubridate::hours(as.numeric(hours_char))

#the neon4cast fcn already has the weather variable names in cf standard
cf_var_names <- unique(weather$variable)

noaa_data <- list()

for(v in 1:length(cf_var_names)){

noaa_data[v] <- NULL
#filter for met variable
curr_var <- filter(weather, .data$variable == cf_var_names[v])
#remove ensemble member 31 does not cover full timeseries
#this is a HACK should add a generalized method for ensemble member outlier detection
curr_var <- filter(curr_var, .data$parameter <= 30)
noaa_data[[v]] <- list(value = curr_var$prediction,
ensembles = curr_var$parameter,
forecast.date = curr_var$datetime)

}

names(noaa_data) <- cf_var_names

#adding in windspeed and specific humidity
cf_var_names1 <- c("surface_downwelling_longwave_flux_in_air", "surface_downwelling_shortwave_flux_in_air", "precipitation_flux", "air_pressure", "relative_humidity", "air_temperature", "specific_humidity", "wind_speed")
cf_var_units1 <- c("Wm-2", "Wm-2", "kgm-2s-1", "Pa", "1", "K", "1", "ms-1")
#calculate specific humdity using realtive humidity (no unit conversion requied as relative humidity is in range 0-1), air temperature (no unit conversion already in K), and air pressure (no unit conversion already in Pa)
specific_humidity <- rep(NA, length(noaa_data$relative_humidity$value))
specific_humidity[which(!is.na(noaa_data$relative_humidity$value))] <- PEcAn.data.atmosphere::rh2qair(rh = noaa_data$relative_humidity$value[which(!is.na(noaa_data$relative_humidity$value))],
T = noaa_data$air_temperature$value[which(!is.na(noaa_data$relative_humidity$value))],
press = noaa_data$air_pressure$value[which(!is.na(noaa_data$relative_humidity$value))])

#Calculate wind speed from east and north components
wind_speed <- sqrt(noaa_data$eastward_wind$value^2 + noaa_data$northward_wind$value^2)

forecast_noaa <- tibble::tibble(time = noaa_data$surface_downwelling_longwave_flux_in_air$forecast.date,
NOAA.member = noaa_data$surface_downwelling_longwave_flux_in_air$ensembles,
air_temperature = noaa_data$air_temperature$value,
air_pressure= noaa_data$air_pressure$value,
relative_humidity = noaa_data$relative_humidity$value,
surface_downwelling_longwave_flux_in_air = noaa_data$surface_downwelling_longwave_flux_in_air$value,
surface_downwelling_shortwave_flux_in_air = noaa_data$surface_downwelling_shortwave_flux_in_air$value,
precipitation_flux = noaa_data$precipitation_flux$value,
specific_humidity = specific_humidity,
wind_speed = wind_speed)

PEcAn.logger::logger.info("Met df complied including specific humidity and wind speed")

#create directory to save ensemble member if one does not already exist
output_path = file.path(outfolder, "noaa/NOAAGEFS_1hr/", sitename, "/", forecast_date, "/00/")
if(!dir.exists(output_path)){dir.create(output_path, recursive = TRUE)}

for (ens in 1:length(unique(forecast_noaa$NOAA.member))) { # i is the ensemble number

forecast_noaa_ens <- forecast_noaa %>%
dplyr::filter(.data$NOAA.member == ens) %>%
dplyr::filter(!is.na(.data$air_temperature))

end_date <- forecast_noaa_ens %>%
dplyr::summarise(max_time = max(.data$time))

identifier = paste("NOAA_GEFS", sitename, ens, format(as.POSIXct(forecast_date), "%Y-%m-%dT%H:%M"),
format(end_date$max_time, "%Y-%m-%dT%H:%M"), sep="_")

fname <- paste0(identifier, ".nc")
#ensemble_folder = file.path(output_path, identifier)
output_file <- file.path(output_path,fname)

#Write netCDF
if(!nrow(forecast_noaa_ens) == 0){
PEcAn.data.atmosphere::write_noaa_gefs_netcdf(df = forecast_noaa_ens,ens, lat = site.lat, lon = site.lon, cf_units = cf_var_units1, output_file = output_file, overwrite = TRUE)
}else {next}
}

return(PEcAn.logger::logger.info("Met download complete and saved as .nc files at", output_path))
}




77 changes: 77 additions & 0 deletions modules/data.atmosphere/R/noaa_gefs_efi_helper.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#code taken from https://github.com/eco4cast/neon4cast/blob/main/R/noaa_gefs.R
#' noaa_stage2
#'
#' @param cycle Hour at which forecast was made, as character string
#' (`"00"`, `"06"`, `"12"` or `"18"`). Only `"00"` (default) has 30 days horizon.
#' @param version GEFS forecast version. Prior versions correspond to forecasts
#' issued before 2020-09-25 which have different ensemble number and horizon,
#' among other changes, and are not made available here. Leave as default.
#' @param endpoint the EFI host address (leave as default)
#' @param verbose logical, displays or hides messages
#' @param start_date forecast start date yyyy-mm-dd format
#'
#' @export
#'
#' @author Alexis Helgeson (taken from neon4cast package)
noaa_stage2 <- function(cycle = 0,
version = "v12",
endpoint = "data.ecoforecast.org",
verbose = TRUE,
start_date = "") {
noaa_gefs_stage(file.path("stage2/parquet",cycle, start_date),
partitioning = "start_date",
version = version,
endpoint = endpoint,
verbose = verbose,
start_date = start_date)

}

noaa_gefs_stage <- function(stage = "stage1",
partitioning = c("cycle","start_date"),
cycle = 0,
version = "v12",
endpoint = "data.ecoforecast.org",
verbose = getOption("verbose", TRUE),
start_date = start_date) {
if(verbose)
message(paste("establishing connection to", stage, "at", endpoint, "..."))
s3 <- noaa_gefs(version, endpoint)
if (!is.na(as.Date(start_date))) {
ds <- arrow::open_dataset(s3$path(stage))
} else {
ds <- arrow::open_dataset(s3$path(stage), partitioning = partitioning)
}
if(verbose)
message(paste0("connected! Use dplyr functions to filter and summarise.\n",
"Then, use collect() to read result into R\n"))
ds
}
noaa_gefs <- function(version = "v12",
endpoint = "data.ecoforecast.org") {

vars <- arrow_env_vars()
gefs <- arrow::s3_bucket(paste0("neon4cast-drivers/noaa/gefs-", version),
endpoint_override = endpoint,
anonymous = TRUE)
#error is coming from this chunk Error: NotImplemented: Got S3 URI but Arrow compiled without S3 support
on.exit(unset_arrow_vars(vars))
gefs

}

arrow_env_vars <- function(){
user_region <- Sys.getenv("AWS_DEFAULT_REGION")
user_meta <- Sys.getenv("AWS_EC2_METADATA_DISABLED")
Sys.unsetenv("AWS_DEFAULT_REGION")
Sys.setenv(AWS_EC2_METADATA_DISABLED="TRUE")

list(user_region=user_region, user_meta = user_meta)
}

unset_arrow_vars <- function(vars) {
Sys.setenv("AWS_DEFAULT_REGION" = vars$user_region)
if (vars$user_meta != "") {
Sys.setenv(AWS_EC2_METADATA_DISABLED = vars$user_meta)
}
}
28 changes: 28 additions & 0 deletions modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions modules/data.atmosphere/man/noaa_stage2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions modules/data.atmosphere/tests/Rcheck_reference.log
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ Found the following (possibly) invalid URLs:
The Date field is over a month old.
* checking package namespace information ... OK
* checking package dependencies ... WARNING
Imports includes 38 non-default packages.
Imports includes 39 non-default packages.
Importing from so many packages makes the package vulnerable to any of
them becoming unavailable. Move as many as possible to Suggests and
use conditionally.

* checking package dependencies ... NOTE
Imports includes 38 non-default packages.
Imports includes 39 non-default packages.
Importing from so many packages makes the package vulnerable to any of
them becoming unavailable. Move as many as possible to Suggests and
use conditionally.
Expand Down
Loading

0 comments on commit e8cc5f7

Please sign in to comment.