-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprepare_forcings_for_clm.R
2011 lines (1756 loc) · 89.3 KB
/
prepare_forcings_for_clm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
##############################################################################################
#' title Workflow to NCAR CLM data set
#' author
#' Hannah Holland-Moritz (hhollandmoritz AT gmail.com), based on script by David Durden (eddy4R.info AT gmail.com)
#'
#' description
#' Workflow for collating NIWOT LTER data, gap-filling, and packaging in NCAR CLM netcdf format.
# Modified from David Durden's flow.api.clm.R script for NEON data
# changelog and author contributions / copyrights
# David Durden (2019-07-05)
# original creation
# David Durden (2020-05-31)
# Updating to use neonUtilities for all data retrieval from API
##############################################################################
##############################################################################
# Dependencies
##############################################################################
#Call the R HDF5 Library
packReq <- c("rhdf5","REddyProc", "ncdf4","devtools","magrittr","EML", "dplyr",
"ggplot2", "purrr", "tidyr", "lubridate","RCurl", "httr", "jsonlite", "stringr")
#Install and load all required packages
lapply(packReq, function(x) {
print(x)
if (require(x, character.only = TRUE) == FALSE) {
install.packages(x)
library(x, character.only = TRUE)
}})
#Setup Environment
options(stringsAsFactors = F)
##############################################################################
#Workflow parameters
##############################################################################
#### Ploting options ####
# Should plots be made of gap-filled data?
makeplots <- TRUE # FALSE
#### Output Options ####
# Base directory for all files
DirBase <- "~/Desktop/Working_files/Niwot/"
# Base directory for output
DirOutBase <- paste0(DirBase,"CLM/data")
#### Download and input options ####
# Directory to download precipitation and radidation data to
DirDnld = paste0(DirBase,"lter_flux")
# Should a newer version of precip data be automatically
# downloaded if one is available?
getNewData = TRUE
# Ameriflux username and email
# NOTE: you cannot download Ameriflux data without a valid username and email
# to create an account, visit the Ameriflux website: https://ameriflux.lbl.gov/
# Please also read their data-use policy, by downloading their data you are agreeing
# to follow it. The policy can be found here: https://ameriflux.lbl.gov/data/data-policy/
amf_usr <- "jayka" # CHANGE ME
amf_email <- "[email protected]" # CHANGE ME
#### Tower Use Options ####
# What tvan tower should be used?
tower <- "Both" # Options are "East", "West", or "Both"
# if "Both" the one tower will be used to gapfill the other tower
# basetower provides which tower is the baseline that will be filled
# with the other tower. Currently the East tower record is more complete
# and has fewer gaps and errors, so it is being used as the basetower.
basetower <- "East" # West
#### Tvan data location ####
# Only necessary to set the location of the tower that you are processing, or
# both, if tower = "Both"
# The data should be formatted with ReddyProc file format.
# Briefly the file should be formated as follows: the file should be
# tab-delimited with the first row specifying the name of the variable
# and the second specifying the units of that variable. The columns should have names
# and units that follow the guidelines below:
# Column formating guidelines for Tvan data
# (optional indicates a column is not necessary for producing the final netcdf,
# it includes variables that are necessary for CLM, and also variables that are
# necessary for ReddyProc gapfilling of the data in preparation for CLM).
# | Column Name | Column Description | Units | Optional? |
# | ----------- | -------------------------------- | -------------- | --------- |
# | NEE | Net ecosystem exchange | umol m^-2 s^-1 | Yes |
# | LE | Latent heat flux | W m^-2 | No |
# | H | Sensible heat flux | W m^-2 | No |
# | Ustar | Friction velocity | m s^-1 | Yes |
# | Tair | Air temperature | degC | No |
# | VPD | Vapor pressure density | kPa | No |
# | rH | relative humidity | % | No |
# | U | Wind speed | m s^-1 | No |
# | P | Atmospheric pressure | kPa | No |
# | Tsoil | Soil temperature | degC | Yes |
# | Year | Year | - | No |
# | DoY | The day of year (1-365/366) | - | No |
# | Hour | Decimal hour of the day (0.5-24) | - | No |
# The location of the east tvan data filepath, use "", if tower = "West"
DirIN = paste0(DirBase,"Tvan_out_new/supp_filtering/")
east_data_fp <- paste0(DirIN,"tvan_East_2007-05-10_00-30-00_to_2021-11-08_flux_P_reddyproc_cleaned.txt")
# The location of the west tvan data filepath, use "", if tower = "East"
west_data_fp <- paste0(DirIN,"tvan_West_2007-05-10_00-30-00_to_2021-11-08_flux_P_reddyproc_cleaned.txt")
#### Simulated Runoff Option ####
# WARNING THIS FEATURE IS UNTESTED; CHANGE AT YOUR OWN RISK
# The user can provide a data file from a simulated Moist Meadow run that
# contains two columns, a timestamp column (every timestamp represents the
# state at the *end* of the 30 minute sampling period) called "time",
# and a column containing the QRUNOFF amounts in mm/s from a Moist Meadow
# simulation. If provided, this data will be added to the Wet meadow
# precipitation. If not provided, wet meadow precipitation will be 75% of
# observed precipitation.
# As done in Wieder et al. 2017, JGR-B. doi:10.1002/2016JG003704.
# Provide a character string specifying the location of the simulated runoff data
# if NA, no simulated runoff will be used
simulated_runoff_fp <- paste0(DirIN,'QRUNOFF_clm50bgc_NWT_mm_newPHS_lowSLA.csv')
##############################################################################
# Static workflow parameters - these are unlikely to change
##############################################################################
#Append the site to the base output directory
DirOut <- paste0(DirOutBase, "/", "data")
plots_dir <- paste0(DirOutBase, "/plots")
# Check if directory exists and create if not
if (!dir.exists(DirOut)) dir.create(DirOut, recursive = TRUE)
if (!dir.exists(DirDnld)) dir.create(DirDnld, recursive = TRUE)
if (!dir.exists(plots_dir)) dir.create(plots_dir, recursive = TRUE)
# the EDI id for precip data from the saddle and C1 weather stations
saddle_precip_data <- "416" # NWT LTER EDI id
# Lat/long coords - shouldn't need to change unless modified in surface
# dataset lat/long
latSite <- 40.05 # should match the lat of the surface dataset
lonSite <- 360 - 254.42 # should match the long of the surface dataset
# Should simulated runoff mode be activated?
if (is.na(simulated_runoff_fp)) {
simulated_runoff_present <- FALSE
writeLines(paste0("No simulated runoff file supplied. Wet meadow precipitation",
" will be calculated without any added runoff."))
} else {
simulated_runoff_present <- TRUE
writeLines(paste0("You have supplied the following simulated runoff file: \n",
simulated_runoff_fp,
"\nIt will be added when wet meadow precipitation",
" is calculated."))
}
##############################################################################
# Helper functions - for downloading and loading data
##############################################################################
# Functions for downloading LTER Precip data are from Sarah Elmendorf's
# utility_functions_all.R script
# https://github.com/NWTlter/long-term-trends/blob/master/utility_functions/utility_functions_all.R
# function to determine current version of data package on EDI
getCurrentVersion <- function(edi_id){
require(magrittr)
versions = readLines(paste0('https://pasta.lternet.edu/package/eml/knb-lter-nwt/', edi_id),
warn = FALSE) %>%
as.numeric() %>% (max)
packageid = paste0('knb-lter-nwt.', edi_id, '.', versions)
return(packageid)
}
#function to download the EML file from EDI
getEML <- function(packageid){
require(magrittr)
myurl<-paste0("https://portal.edirepository.org/nis/metadataviewer?packageid=",
packageid,
"&contentType=application/xml")
#myeml<-xml2::download_html(myurl)%>%xml2::read_xml()%>%EML::read_eml()
myeml<-xml2::read_xml(paste0("https://portal.edirepository.org/nis/metadataviewer?packageid=",
packageid,
"&contentType=application/xml")) %>% EML::read_eml()
}
# Function for downloading from EDI
download_EDI <- function(edi_id, dest_dir, getNewData = TRUE) {
# This section heavily borrowed from Sarah Elmendorf's generic_timeseries_workflow.R script
# https://github.com/NWTlter/long-term-trends/blob/master/plotting_scripts/generic_timeseries_workflow.R
# Depends on getCurrentVersion() and getEML()
packageid = getCurrentVersion(edi_id)
if (any(grepl(packageid, list.files(dest_dir)) == TRUE)) {
writeLines(paste0("Most recent package version ",
packageid, " is already downloaded. Nothing to do."))
return(list.files(dest_dir, pattern = paste0(packageid, ".{1,}csv"), full.names = T))
} else if (getNewData == FALSE) {
writeLines(paste0("A more recent version of the data (version ",
packageid, ") is available. ",
"But since you have specified getNewData = FALSE, ",
"the latest version will not be downloaded."))
return(list.files(dest_dir, pattern = paste0(".{1,}csv"), full.names = T))
} else {
writeLines(paste0("Downloading package ", packageid, " from EDI."))
myeml = getEML(packageid)
# Create output directory for data
ifelse(!dir.exists(file.path(dest_dir)),
dir.create(file.path(dest_dir)), FALSE)
### eml reading and downloading of csv
if (is.null(names(myeml$dataset$dataTable))) {
attributeList = lapply(myeml$dataset$dataTable, function(x){
EML::get_attributes(x$attributeList)
})
names(attributeList) = lapply(myeml$dataset$dataTable, function(x){
x$physical$objectName})
if (getNewData) {
#download all the datatables in the package
csv_list <- list()
csv_list <- lapply(myeml$dataset$dataTable, function(x){
url_to_get = x$physical$distribution$online$url$url
download.file(url_to_get,
destfile = paste0(dest_dir, "/",
packageid, "_",
myeml$dataset$dataTable$physical$objectName),
method = "curl")
output_csv_file <- paste0(dest_dir, "/",
packageid, "_",
myeml$dataset$dataTable$physical$objectName)
})
}
}else{
#if only one data table
attributeList = list(EML::get_attributes(myeml$dataset$dataTable$attributeList))
names(attributeList) = myeml$dataset$dataTable$physical$objectName
if (getNewData) {
url_to_get = myeml$dataset$dataTable$physical$distribution$online$url$url
download.file(url_to_get,
destfile = paste0(dest_dir, "/",
packageid, "_",
myeml$dataset$dataTable$physical$objectName),
method = "curl")
output_csv_file <- paste0(dest_dir, "/",
packageid, "_",
myeml$dataset$dataTable$physical$objectName)
}
}
# Also save the full xml
write_eml(myeml, file = paste0(dest_dir, "/", packageid, ".xml"))
writeLines(paste0("Downloaded data can be found in: ", dest_dir))
return(output_csv_file)
}
}
# Function for downloading USCRN precip
download_USCRN <- function(start_date, end_date, dest_dir, DoNotOverwrite = TRUE) {
# This function downloads precipitation data from the Boulder USCRN weather
# station at C1. It returns a list of the files it tried to download. By
# default it will not download files that are already in the destination directory.
# Arguments:
# start_date = the start date of tvan data in character form (or other form
# that lubridate can coerce with its `year()` function)
# end_date = the end date of tvan data in character form (or other form
# that lubridate can coerce with its `year()` function)
# dest_dir = the destination directory where the files will be downloaded
# DoNotOverwrite = should existing files with the same name be overwritten? If
# TRUE, files will not be overwritten, if FALSE, files will be
#overwritten.
require(lubridate)
require(RCurl)
# To do: replace this warning with a check for the tvan data
message("Please note, end_date of USCRN data must not be less than the end_date of the tvan data.")
# make dest_dir if it doesn't exist
made_dir <- ifelse(!dir.exists(file.path(dest_dir)),
dir.create(file.path(dest_dir), recursive = TRUE), FALSE)
if (!made_dir) {
writeLines("Data download directory not created, it already exists.")
}
# Create a list of urls - one for each year of data
url_list <- vector(mode = "list",
length = lubridate::year(end_date) - lubridate::year(start_date) + 1)
file_list <- vector(mode = "list",
length = lubridate::year(end_date) - lubridate::year(start_date) + 1)
# get the names for each year (including unfinished partial years at the end)
names(url_list) <- lubridate::year(seq(from = lubridate::ymd(as.Date(start_date)),
length.out = (lubridate::year(end_date) -
lubridate::year(start_date) + 1),
by = "years"))
names(file_list) <- lubridate::year(seq(from = lubridate::ymd(as.Date(start_date)),
length.out = (lubridate::year(end_date) -
lubridate::year(start_date) + 1),
by = "years"))
for (i in seq_along(url_list)) {
url_list[[i]] <- paste0("https://www1.ncdc.noaa.gov/pub/data/uscrn/products/subhourly01/", names(url_list[i]), "/CRNS0101-05-", names(url_list[i]),"-CO_Boulder_14_W.txt")
}
# Check if url exists and if it does, download file
for (i in seq_along(url_list)) {
writeLines(paste0("Checking if ", url_list[[i]], " exists..."))
if (!url.exists(url_list[[i]])) {
stop(paste0("Url ", x, " is not accessible."))
} else {
writeLines("TRUE")
}
# Check if destination file already exists
dest_fp <- paste0(dest_dir, "/CRNS0101-05-",
names(url_list[i]),"-CO_Boulder_14_W.txt")
file_list[[i]] <- dest_fp
if (file.exists(dest_fp) & DoNotOverwrite == TRUE) {
writeLines(paste0(dest_fp, " already exits, skipping..."))
} else { # if file doesn't exist or if overwrite is TRUE, download
try(download.file(url = url_list[[i]],
destfile = dest_fp))
}
}
return(file_list)
}
# Function for reading in USCRN precip text files
read_USCRN_precip_data <- function(USCRN_precip_fp) {
# This function reads in USCRN precipitation data files. It adds column
# names and then it 1) collapses the time from 5-minute increments to half-
# hourly by summing the precipitation over each 1/2-hour period; 2) Changes -9999
# to NAs; and 3) selects only the local date, local time, and precpitation variables
# for the final data frame. It returns the resulting dataframe.
# Arguments:
# USCRN_precip_fp = file path to the USCRN text file you want to load
# USCRN Fields and information can be found here:
# https://www1.ncdc.noaa.gov/pub/data/uscrn/products/subhourly01/README.txt
# Field# Name Units
# ---------------------------------------------
# 1 WBANNO XXXXX
# 2 UTC_DATE YYYYMMDD
# 3 UTC_TIME HHmm
# 4 LST_DATE YYYYMMDD
# 5 LST_TIME HHmm
# 6 CRX_VN XXXXXX
# 7 LONGITUDE Decimal_degrees
# 8 LATITUDE Decimal_degrees
# 9 AIR_TEMPERATURE Celsius
# 10 PRECIPITATION mm
# 11 SOLAR_RADIATION W/m^2
# 12 SR_FLAG X
# 13 SURFACE_TEMPERATURE Celsius
# 14 ST_TYPE X
# 15 ST_FLAG X
# 16 RELATIVE_HUMIDITY %
# 17 RH_FLAG X
# 18 SOIL_MOISTURE_5 m^3/m^3
# 19 SOIL_TEMPERATURE_5 Celsius
# 20 WETNESS Ohms
# 21 WET_FLAG X
# 22 WIND_1_5 m/s
# 23 WIND_FLAG X
#
# ----------------------- Begin Function -------------------- #
require(dplyr)
# read in text file
writeLines(paste0("Reading in ", USCRN_precip_fp))
precip <- read.table(USCRN_precip_fp, sep = "",
colClasses = c(rep("character", times = 6),
rep("numeric", times = 7),
"character",
rep("numeric", times = 9)))
# Assign column names
names(precip) <- c("WBANNO", "UTC_DATE", "UTC_TIME", "LST_DATE", "LST_TIME",
"CRX_VN", "LONGITUDE", "LATITUDE", "AIR_TEMPERATURE",
"PRECIPITATION", "SOLAR_RADIATION", "SR_FLAG",
"SURFACE_TEMPERATURE", "ST_TYPE", "ST_FLAG",
"RELATIVE_HUMIDITY", "RH_FLAG", "SOIL_MOISTURE_5",
"SOIL_TEMPERATURE_5", "WETNESS", "WET_FLAG", "WIND_1_5",
"WIND_FLAG")
# Clean data frame
precip <- precip %>%
# Split local time string and convert to decimal time
dplyr::mutate(UTC_TIME = gsub("(..)(..)", "\\1:\\2:00", UTC_TIME),
cleanTime_UTC =
strsplit(UTC_TIME, ":") %>%
sapply(function(x){
x <- as.numeric(x)
x[1] + x[2]/60 + x[3]/(60*60)
}),
decimalTime_UTC = floor(cleanTime_UTC * 2)/2) %>%
dplyr::mutate(LST_TIME = gsub("(..)(..)", "\\1:\\2:00", LST_TIME),
cleanTime_LST =
strsplit(LST_TIME, ":") %>%
sapply(function(x){
x <- as.numeric(x)
x[1] + x[2]/60 + x[3]/(60*60)
}),
decimalTime_LST = floor(cleanTime_LST * 2)/2) %>%
# select only columns used for precipitation and time stamp
dplyr::select(UTC_DATE, UTC_TIME, LST_DATE, LST_TIME,
cleanTime_UTC, decimalTime_UTC,
cleanTime_LST, decimalTime_LST, PRECIPITATION) %>%
# set NAs from -9999
dplyr::mutate_all(list(~na_if(., -9999))) %>%
# sum all precip events in each 1/2 period
dplyr::group_by(UTC_DATE, decimalTime_UTC) %>%
dplyr::mutate(PRECIP_TOT = sum(PRECIPITATION)) %>%
# remove extra time steps
dplyr::select(-PRECIPITATION, -LST_TIME, -UTC_TIME,
-cleanTime_UTC, -cleanTime_LST) %>%
unique() %>%
# create 1/2-hourly time stamps
dplyr::mutate(UTC_DATE = as.Date(UTC_DATE, format = "%Y%m%d"),
timestamp_UTC = as.POSIXct(paste0(UTC_DATE," 00:00:00"),
tz = "UTC") + 3600*decimalTime_UTC) %>%
dplyr::mutate(LST_DATE = as.Date(LST_DATE, format = "%Y%m%d"),
timestamp_LST = as.POSIXct(paste0(LST_DATE," 00:00:00"),
tz = "MST") + 3600*decimalTime_LST)
return(precip)
}
# Function for downloading radiation data from Ameriflux
download_amflx <- function(dest_dir, username, useremail,
site = "US-NR1", DescriptionOfDataUse,
DoNotOverwrite = TRUE,
verbose = FALSE) {
# This function downloads radiation data from the Ameriflux webiste
# It returns a list of the files it tried to download. By default it will
# not download files that are already in the destination directory.
# Arguments:
# dest_dir -------------- the destination directory where the files will be
# downloaded
# username -------------- the Ameriflux username of the user - this function
# will fail without a valid username.
# useremail -------------- the email address of the user, provied to Ameriflux
# upon creating an account.
# site ------------------ the Ameriflux site to get the data from; defaults to
# US-NR1
# DescriptionOfDataUse --- the description to provide to Ameriflux for the intended
# use of the data. If not provided by the user, the
# description will read:
#
# These data will be used as atmospheric forcings
# to run a local point-simulation for the alpine
# tundra at the Niwot Ridge LTER site.
#
# DoNotOverwrite --------- should existing files with the same name be overwritten?
# If TRUE, files will not be overwritten, if FALSE, files
# will be overwritten.
# verbose ---------------- Should the communication with the website be verbose?
# default is FALSE.
require(httr)
require(jsonlite)
require(RCurl)
# Testing
# site <- "US-NR1"
# username <- amf_usr
# dest_dir <- "~/Downloads/lter_flux/rad2"
writeLines("Connecting with Ameriflux endpoint...")
# NOTE THIS ENDPOINT MAY CHANGE
ameriflux_endpoint <- "https://amfcdn.lbl.gov/api/v1/data_download"
if (missing(DescriptionOfDataUse)) {
DescriptionOfDataUse = "These data will be used as atmospheric forcings to run a local point-simulation for the alpine tundra at the Niwot Ridge LTER site."
}
# Construct Payload request for ameriflux endpoint
Payload <- paste0('{',
'"user_id":"', username, '",',
'"user_email":"', useremail, '",',
'"data_product":', '"BASE-BADM"',',',
'"data_variant":', '"FULLSET"',',',
'"data_policy":', '"LEGACY"',',',
'"site_ids":["', site, '"],',
'"intended_use": "Research - Land model/Earth system model",',
'"description": "', DescriptionOfDataUse, '"',
# '"is_test": ', "false",
'}')
# Get download information from Ameriflux endpoint
if (verbose) {
tmp <- httr::POST(url = ameriflux_endpoint,
body = Payload, verbose(), content_type_json())
} else {
tmp <- httr::POST(url = ameriflux_endpoint,
body = Payload, content_type_json())
}
# Check that the connection was successful
if (tmp$status_code < 200 | tmp$status_code > 299) {
stop(paste0("Attempt to connect to the website was not successful.\n",
"This may be because Ameriflux has changed its endpoint url \n",
"and you may need to contact Ameriflux support for an updated \n",
"address, or it may be due to a mistake in the request payload \n",
"syntax. Please check that the Ameriflux endpoing url and the \n",
"payload syntax are valid. \n\n",
"Current endpoint: ", ameriflux_endpoint, "\n",
"Current payload: ", Payload))
} else {
writeLines("Connection to Ameriflux successful.")
}
# extract content from the response
r <- content(tmp)
# Check if the content is successfully received
if (class(r) == "raw" | length(r$data_urls) == 0) {
stop(paste0("No data was received from Ameriflux. Please check that your ",
"username is valid and that both it and the site name are ",
"spelled correctly."))
}
# Extract list of ftp urls
url_list <- unlist(lapply(1:length(r$data_urls),
function(x){r$data_urls[[x]]$url}))
if(length(url_list) == 1){
file_list <- url_list # url_list is actually not a list
} else {
file_list <- as.vector(mode = "list",
length = length(url_list))
}
# Notify user of the data policy prior to download
message(paste0("Thank you for using Ameriflux data. Please be aware of the data \n",
"policy. By downloading this data you are acknowledging that you \n",
"have read and agree to that policy. \n\n",
"The following is how you described how you intend to use the data.\n\n",
"\tIntended Use: Research - Land model/Earth system model \n",
"\tDescription: These data will be used as atmospheric forcings \n",
"\tto run a local point-simulation for the alpine tundra at the \n",
"\tNiwot Ridge LTER site)\n\n",
"By downloading the data, the data contributors have been informed \n",
"of your use. If you are planning an in-depth analysis that may \n",
"result in a publication, please contact the data contributors \n",
"directly so they have the opportunity to contribute substantially \n",
"and become a co-author. \n\n",
"The contact email for this site is: ",
unlist(r$manifest$emailForSitePIs), "\n\n",
"You should also acknowledge Ameriflux in your presentations and \n",
"publications. Details about how this should be done can be found \n",
"on the Ameriflux website. \n\n",
"The full policy along with details about how to properly cite the \n",
"data can found here: \n",
"https://ameriflux.lbl.gov/data/data-policy/"))
# make dest_dir if it doesn't exist
made_dir <- ifelse(!dir.exists(file.path(dest_dir)),
dir.create(file.path(dest_dir), recursive = TRUE), FALSE)
writeLines("Downloading data...")
if(!made_dir) {
writeLines("Data download directory not created, it already exists.")
}
# Check if downloaded files already exist and if not, download file
for (i in seq_along(url_list)) {
# Check if destination file already exists
dest_fp <- paste0(dest_dir, "/", basename(url_list[[i]]))
file_list[[i]] <- dest_fp
if (file.exists(dest_fp) & DoNotOverwrite == TRUE) {
writeLines(paste0(dest_fp, " already exits, skipping..."))
} else { # if file doesn't exist or if overwrite is TRUE, download
# try(download.file(url = url_list[[i]],
# destfile = dest_fp,
# method = "curl"))
try(GET(url = url_list[[i]],
write_disk(dest_fp, overwrite=FALSE), progress(), verbose()))
}
}
return(unlist(file_list))
}
##############################################################################
# Read in L1 flux tower data product
##############################################################################
# Read in East & West tower
if (tower == "East" | tower == "Both") {
# East data
tvan_east <- read.table(file = east_data_fp, sep = "\t",
skip = 2, header = FALSE)
tvan_east_names <- read.table(file = east_data_fp, sep = "\t",
header = TRUE, nrows = 1)
tvan_east_units <- as.character(unname(unlist(tvan_east_names[1,])))
colnames(tvan_east) <- names(tvan_east_names)
}
if (tower == "West" | tower == "Both") {
# West data
tvan_west <- read.csv(file = west_data_fp, sep = "\t",
skip = 2, header = FALSE)
tvan_west_names <- read.table(file = west_data_fp, sep = "\t",
header = TRUE, nrows = 1)
tvan_west_units <- as.character(unname(unlist(tvan_west_names[1,])))
colnames(tvan_west) <- names(tvan_west_names)
}
# Get the start and end dates of the tvan data. If tower = "Both",
# combine East and West data into one dataframe for convenience
if (tower == "Both") {
tvan_east$Tower <- "East"
tvan_west$Tower <- "West"
tvan_all <- bind_rows(tvan_east, tvan_west) %>%
mutate_all(list(~na_if(., -9999))) %>%
mutate(date = as.Date(DoY - 1, origin = paste0(Year, "-01-01")),
timestamp = as.POSIXct(paste0(date," 00:00:00"),
format = "%Y-%m-%d %H:%M:%OS",
tz = "MST") + 3600*Hour) %>%
group_by(Tower, Year, DoY) %>%
mutate_at(vars(NEE:Ustar), list(daily_mean = mean), na.rm = TRUE) %>%
select(date, timestamp, Year, DoY, Hour, Tower, everything())
# Set a start/end date for the precip and radiation data based on the tvan data
# make sure it's a round number or rEddyProc will complain
start_date <- ceiling_date(min(tvan_all$timestamp, na.rm = TRUE), unit = "day")
end_date <- floor_date(max(tvan_all$timestamp, na.rm = TRUE), unit = "day")
} else if (tower == "East") {
tvan_east$Tower <- "East"
# Set a start/end date for the precip and radiation data based on the tvan data
start_date <- min(tvan_east$timestamp, na.rm = TRUE)
end_date <- max(tvan_east$timestamp, na.rm = TRUE)
} else if (tower == "West") {
tvan_west$Tower <- "West"
# Set a start/end date for the precip and radiation data based on the tvan data
start_date <- min(tvan_west$timestamp, na.rm = TRUE)
end_date <- max(tvan_west$timestamp, na.rm = TRUE)
}
# Create a timeseries dataframe with the timestamps (this is in MST since start_date
# and end_date are in MST):
posix_complete <- as.data.frame(seq.POSIXt(start_date, end_date, by = "30 mins"))
colnames(posix_complete) <- "timestamp"
# get rid of first timestep, which is at midnight and not 00:30:00; it makes rEddyProc complain
posix_complete <- data.frame(timestamp = posix_complete[-1,])
##############################################################################
# Download Precipitation
##############################################################################
# Download precip data
# From here: https://portal.edirepository.org/nis/mapbrowse?packageid=knb-lter-nwt.416.10
writeLines("Downloading Saddle Precip data from EDI...")
saddle_precip_data_fp <- download_EDI(edi_id = saddle_precip_data,
dest_dir = paste0(DirDnld, "/precip_data"),
getNewData = getNewData)
writeLines("Downloading C1 precipitation data from USCRN...")
USCRN_precip_data_fp <- download_USCRN(start_date = start_date,
end_date = end_date,
dest_dir = paste0(DirDnld, "/precip_data"),
DoNotOverwrite = TRUE)
##############################################################################
# Handling Precip data
##############################################################################
# Saddle precip data must be corrected for blowing snow events, and extended to
# half-hourly precip using Will's formula (see below for details).
writeLines("Reading in Saddle data...")
# Read in Saddle and USCRN Precip data; also collapse USCRN data into one dataframe
saddle_precip <- read.csv(saddle_precip_data_fp,
sep = ",", quot = '"', check.names = TRUE)
writeLines("Reading in C1 precipitation data from USCRN. This may take a while.")
USCRN_precip_list <- lapply(USCRN_precip_data_fp, read_USCRN_precip_data)
USCRN_precip <- plyr::rbind.fill(USCRN_precip_list) %>%
unique() # make sure to remove duplicates caused by aggregating to 30-minute time steps
# Check for duplicated time stamps - should be 0 (aka no TRUEs)
if (sum(duplicated(USCRN_precip$timestamp_UTC)) > 0) {
warning("USCRN precipitation data still contains ",
sum(duplicated(USCRN_precip$timestamp_UTC)),
" duplicates!")
} else {
writeLines(paste0("USCRN precipitation data has been loaded. ",
sum(duplicated(USCRN_precip$timestamp_UTC)),
" duplicated timestamps have been detected."))
}
# Filter the precip data by exact start and end dates
saddle_precip <- saddle_precip %>%
mutate(date = as.Date(date)) %>%
filter(date >= floor_date(start_date, unit = "day") &
date <= ceiling_date(end_date, unit = "day"))
USCRN_precip <- USCRN_precip %>%
rename(date = LST_DATE) %>%
mutate(timestamp_LST = as.POSIXct(timestamp_LST, tz = "MST")) %>%
filter(timestamp_LST >= floor_date(start_date, unit = "day") &
timestamp_LST <= ceiling_date(end_date, unit = "day"))
# Apply blowing snow correction to months of Oct-May Saddle data
# Due to blowing snow events where the belfort gauge has an oversampling of precipitation,
# it is recommended to add a correction for the precipitation total in the months Oct-May.
# The recommended correction for these events should be (0.39 * the recorded total). More
# information on this can be found in:
# Williams, M.W., Bardsley, T., Rikkers, M., (1998) Overestimation of snow depth and inorganic nitrogen wetfall using NADP data, Niwot Ridge, Colorado. Atmospheric Environment 32 (22) :3827-3833
writeLines("Applying blowing snow correction to Saddle precip data.")
saddle_precip <- saddle_precip %>%
mutate(month = month(date),
ppt_tot_corr = ifelse(month %in% c(10, 11, 12, 1, 2, 3, 4, 5),
ppt_tot * 0.39, ppt_tot))
# Change any Nas or NaNs to zero
saddle_precip <- saddle_precip %>%
mutate(ppt_tot_corr = ifelse(is.na(ppt_tot_corr), 0, ppt_tot_corr))
USCRN_precip <- USCRN_precip %>%
mutate(PRECIP_TOT = ifelse(is.na(PRECIP_TOT), 0, PRECIP_TOT))
# Apply Will's algorithm for Precip data from paper:
# Use half-hourly precipitation recordfrom the U.S. Climate Reference Network (USCRN; data from https://www1.ncdc.noaa.gov/pub/data/uscrn/products/subhourly01/;), measured nearby (4 km) at the lower elevation(3050 m asl) C-1 site. Proportioanlly allocate the daily saddle precip measurements to the half-hourly precip record from USCRN. On days when Saddle record reports measurable precip, but the USCRN does not, distribute the daily saddle precip evenly across the day for model simulations.
# Code modified from his TVAN_daily_ppt.R script
writeLines(paste0("Applying Will Wieder's algorithm for allocating daily Saddle ",
"precipitation totals into 30-minute increments."))
Tvan_ppt <- saddle_precip$ppt_tot_corr
CRNS_ppt <- USCRN_precip$PRECIP_TOT
CRNS_date <- USCRN_precip$date
CRNS_mo <- month(USCRN_precip$date)
CRNS_hour <- USCRN_precip$decimalTime
CRNS_d <- tapply(CRNS_ppt, CRNS_date, sum) # daily precip totals
CRNS_day <- tapply(CRNS_date, CRNS_date, mean) # num of days since 1970-01-01 - see date.mean()
CRNS_month <- tapply(CRNS_mo, CRNS_date, mean) # months
#------------------------------------------------------
# distribute Tvan ppt when observed in half-hourly CRNS
#------------------------------------------------------
ndays <- length(Tvan_ppt)
nsteps <- length(CRNS_ppt)
Tvan_fine <- rep(NA, nsteps)
Tvan_note <- rep(NA, nsteps)
Tvan_flag <- rep(NA, ndays)
Tvan_flag_mo <- rep(NA, ndays)
Tvan_date <- USCRN_precip$date # MST date
Tvan_hour <- USCRN_precip$decimalTime_LST # MST hour
start <- 1
# code below does the following:
# (0) if no daily precip at Tvan, add zeros to half hourly results
# (1) if precip at Tvan, but not recorded @ CRNS, distribute evenly in day and add 1 the flag
# (2) if both precip at Tvan and CRNS, distribute Tvan in same proportion as CRNS
for (d in 1:ndays) {
end <- start + 47
if (Tvan_ppt[d] == 0) {
Tvan_fine[start:end] <- 0
Tvan_note[start:end] <- 0
} else if (CRNS_d[d] == 0){
Tvan_fine[start:end] <- Tvan_ppt[d] / 48
Tvan_note[start:end] <- 1
Tvan_flag[d] <- 1
Tvan_flag_mo[d] <- CRNS_month[d]
} else {
temp_frac <- CRNS_ppt[start:end] / CRNS_d[d]
Tvan_fine[start:end] <- Tvan_ppt[d] * temp_frac
Tvan_note[start:end] <- 2
}
if (round(sum(Tvan_fine[start:end], na.rm = TRUE), digits = 7) !=
round(sum(Tvan_ppt[d], na.rm = TRUE), digits = 7)) {
warning(paste0("Running precip totals don't match at day ", d))
}
start <- end + 1
}
# Check that the total precip that fell at the saddle is the same as the total precip
# when allocated over 30-minute time steps
if (sum(Tvan_fine, na.rm=T) == sum(Tvan_ppt)) {
writeLines(paste0("Total precip that fell at the Saddle (", sum(Tvan_ppt),
") matches the amount of total precip that has been ",
"allocated to the for the tvan data (", sum(Tvan_fine, na.rm=T), ")."))
} else {
warning(paste0("Total precip that fell at the Saddle (", sum(Tvan_ppt),
") does NOT match the amount of total precip that has been ",
"allocated to the for the tvan data (", sum(Tvan_fine, na.rm=T), ")!"))
}
writeLines(paste0("Number of total days = ",ndays, " [", ddays(ndays), "]"))
writeLines(paste0("Number of days w/ precip at Tvan = ",
length(Tvan_ppt[Tvan_ppt > 0])))
writeLines(paste0("Number of days with Tvan precip but w/o recorded CRNS precip = ",
sum(Tvan_flag, na.rm = T)))
hist(Tvan_flag_mo, xlim = c(1,12),
main = paste0("Montly frequency of days with Tvan precip but ",
"w/o recorded CRNS precip"),
xlab = "Months"
)
# Convert precip from mm/30 minutes into mm/s
Precip = Tvan_fine[1:nsteps] # mm every 30 minutes
PRECTmms <- Precip / (30*60) # mm/s
# Combine date and 1/2-hourly precip into one dataframe and add a timestamp
hlf_hr_precip <- data.frame(PRECTmms = PRECTmms, # mm/s
MST_HOUR = Tvan_hour[1:nsteps], # decimal hours
MST_DATE = Tvan_date[1:nsteps]) %>% # date
mutate(timestamp = as.POSIXct(paste0(MST_DATE," 00:00:00"), tz = "MST") +
3600*MST_HOUR) %>%
# fix date so that "0" hour readings are converted into 24
mutate(MST_DATE = if_else(MST_HOUR == 0, MST_DATE - 1, MST_DATE),
MST_HOUR = if_else(MST_HOUR == 0.0, 24, MST_HOUR))
##############################################################################
# Download Radiation data
##############################################################################
writeLines("Downloading Ameriflux radiation data...")
rad_data_fp <- download_amflx(dest_dir = paste0(DirDnld, "/rad_data"),
username = amf_usr, useremail = amf_email, verbose = TRUE)
# NOTE: the files are saved as a zipfile with the username included after the .zip
# Need to remove the username and extra characters before unzipping
file <- rad_data_fp
if (file.exists(file)) {
file.rename(rad_data_fp, paste0(DirDnld, "/rad_data/AMF_US-NR1_BASE-BADM_18-5.zip"))
} else {
cat("The file does not exist")
} # if returns TRUE then the file was renamed
# Now change rad_data_fp to remove username and extra characters
rad_data_fp <- str_remove(rad_data_fp, amf_usr)
rad_data_fp <- substr(rad_data_fp,1,nchar(rad_data_fp)-2)
# Check if the files have already been unzipped, if not, unzip the zip file
for (i in seq_along(rad_data_fp)) {
if (grepl(".zip", basename(rad_data_fp[i]))) {
writeLines(paste0("Unzipping ", rad_data_fp[i]))
# check if the unzipped files exist
unzip_list <- unzip(zipfile = rad_data_fp[i],
exdir = dirname(rad_data_fp[i]),
overwrite = FALSE)
}
}
amf_data_fp <- list.files(dirname(rad_data_fp[i]),
full.names = TRUE,
pattern = "*.csv")
##############################################################################
# Handle Radiation data
##############################################################################
# Note: Radiation data comes from the Ameriflux NR-1 site. Currently this
# data cannot be downloaded automatically and has to be downloaded by hand from
# the Ameriflux site after getting a user account: https://ameriflux.lbl.gov/data/download-data/
# For CLM we will pull out incoming shortwave (necessary) and incoming longwave (optional).
# The net radation is provided by the Tvan tower datasets.
# The possible Ameriflux variables are:
# NETRAD_1_1_2 (W m-2): Net radiation (no QA/QC or gapfilling)
# NETRAD_PI_F_1_1_2 (W m-2): Net radiation (gapfilled by tower team)
# SW_IN_1_1_1 (W m-2): Shortwave radiation, incoming (no QA/QC or gapfilling)
# LW_IN_1_1_1 (W m-2): Longwave radiation, incoming (no QA/QC or gapfilling)
# SW_IN_PI_F_1_1_1 (W m-2): Shortwave radiation, incoming (gapfilled by tower team)
# LW_IN_PI_F_1_1_1 (W m-2): Longwave radiation, incoming (gapfilled by tower team)
# SW_OUT_1_1_1 (W m-2): Shortwave radiation, outgoing (no QA/QC or gapfilling)
# LW_OUT_1_1_1 (W m-2): Longwave radiation, outgoing (no QA/QC or gapfilling)
# SW_OUT_PI_F_1_1_1 (W m-2): Shortwave radiation, outgoing (gapfilled by tower team)
# LW_OUT_PI_F_1_1_1 (W m-2): Longwave radiation, outgoing (gapfilled by tower team)
writeLines("Reading in Ameriflux radiation data...")
# Load in Radiation data:
amf_data <- read.csv(file = amf_data_fp[1],
skip = 2,
header = TRUE,
na.strings = "-9999",
as.is = TRUE)
# Select timestamps, and radiation variables
rad_data <- amf_data[,c("TIMESTAMP_START", "TIMESTAMP_END",
"SW_IN_1_1_1", # also sometimes called Rg
"LW_IN_1_1_1", # also sometimes called FLDS
"SW_IN_PI_F_1_1_1", # also sometimes called Rg
"LW_IN_PI_F_1_1_1", # also sometimes called FLDS
"SW_OUT_1_1_1",
"LW_OUT_1_1_1",
"SW_OUT_PI_F_1_1_1",
"LW_OUT_PI_F_1_1_1",
"NETRAD_1_1_2",
"NETRAD_PI_F_1_1_2")]
rad_data$TIMESTAMP_START <- as.POSIXct(as.character(rad_data$TIMESTAMP_START), format = "%Y%m%d%H%M%OS", tz = "MST")
rad_data$TIMESTAMP_END <- as.POSIXct(as.character(rad_data$TIMESTAMP_END), format = "%Y%m%d%H%M%OS", tz = "MST")
# Subset the radiation data to the Tvan time period, reformat the times to get hours
# and dates, finally, select only the radiation, hour, and date variables.
hlf_hr_rad <- rad_data %>%
mutate(date = lubridate::date(TIMESTAMP_END)) %>%
filter(date >= floor_date(start_date, unit = "day") &
date <= floor_date(end_date, unit = "day")) %>%
# Take reading from end of period, keep the date at midnight as the day before
# to be consistent with other variables
mutate(MST_HOUR = lubridate::hour(TIMESTAMP_END) +
lubridate::minute(TIMESTAMP_END)/60,
MST_DATE = lubridate::date(TIMESTAMP_START)) %>%
# fix date so that "0" hour readings are converted into 24
mutate(MST_HOUR = if_else(MST_HOUR == 0.0, 24, MST_HOUR)) %>%
# Calculate net radiation from in/out radiation
mutate(radNet = (SW_IN_PI_F_1_1_1 - SW_OUT_PI_F_1_1_1) +
(LW_IN_PI_F_1_1_1 - LW_OUT_PI_F_1_1_1)) %>%
rename(Rg_usnr1 = SW_IN_PI_F_1_1_1, FLDS = LW_IN_PI_F_1_1_1,
SW_OUT = SW_OUT_PI_F_1_1_1, LW_OUT = LW_OUT_PI_F_1_1_1,
timestamp = TIMESTAMP_END) %>%
select(timestamp, MST_DATE, MST_HOUR, Rg_usnr1, FLDS, radNet)
##############################################################################
# Combine flux and met data
##############################################################################
if (tower == "East" | tower == "Both") {
# East tower
tvan_east_tms <- tvan_east %>%
mutate_all(list(~na_if(., -9999))) %>%
mutate(date = as.Date(DoY - 1, origin = paste0(Year, "-01-01")),
timestamp = as.POSIXct(paste0(date," 00:00:00"),
format = "%Y-%m-%d %H:%M:%OS",
tz = "MST") + 3600*Hour)
}
if (tower == "West" | tower == "Both") {
# West tower
tvan_west_tms <- tvan_west %>%
mutate_all(list(~na_if(., -9999))) %>%
mutate(date = as.Date(DoY - 1, origin = paste0(Year, "-01-01")),
timestamp = as.POSIXct(paste0(date," 00:00:00"),
format = "%Y-%m-%d %H:%M:%OS",
tz = "MST") + 3600*Hour)
}
# Join the flux data to the posix_complete date sequence
if (tower == "Both") {
tmp_east <- left_join(posix_complete, tvan_east_tms, by = "timestamp") %>%
mutate(Tower = "East")
tmp_west <- left_join(posix_complete, tvan_west_tms, by = "timestamp") %>%
mutate(Tower = "West")
tvan_comb_tms <- bind_rows(tmp_east, tmp_west)
tvan_tms <- tvan_comb_tms %>%
# Fill in the DoY, Hour, Date, and Year that are NAs
mutate(date = lubridate::date(timestamp)) %>%
# Take reading from end of period, keep the date at midnight as the day before
# to be consistent with other variables
mutate(Hour = lubridate::hour(timestamp) +
lubridate::minute(timestamp)/60,
date = lubridate::date(timestamp)) %>%
# fix date so that "0" hour readings are converted into 24
mutate(Hour = if_else(Hour == 0.0, 24, Hour),
date = if_else(Hour == 24, date-1, date),
DoY = yday(date),
Year = year(date))
} else if (tower == "West") {
tmp_west <- left_join(posix_complete, tvan_west_tms, by = "timestamp") %>%
mutate(Tower = "West")
tvan_tms <- tmp_west %>%
# Fill in the DoY, Hour, Date, and Year that are NAs