-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2. Random Forest_Council_github.Rmd
1016 lines (771 loc) · 39.2 KB
/
2. Random Forest_Council_github.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "2.from kyle_Random Forest Council (Neural Network)"
#Here, we use the Random Forest model to gapfill data based on predictors and real data - it runs simulations and learns patterns, attempting to fill in gaps based on training data and validation data
#Random Forest is used to do a lot of the gap-filling --we could try to improve it by increasing the trees to allow for more complex relationships and we can clean the data a little more to take out outliers (which Dani did Oct 28, 2024 - we use the re-cleaned version here)
##For Council, NOTE: primary windirection is N-S at this location, S is thermokarst and degradation which can be a big methane source, so wind direction could be important at this site --> NOTE: model is elevating CH4 in the winter, may need to revisit training / other relationships we can use to make sure it's more reasonable (no real winter data available to better train model with)
output: html_document
date: "2024-09-17"
---
#Set working directory
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,dev = 'png')
#knitr::opts_knit$set(root.dir = 'C:/Users/karndt.WHRC/Desktop/sites/council/data') #Edit this to set your working directory
```
#Load in required packages
```{r,error=FALSE,warning=FALSE}
rm(list = ls()) #clear obj from environment
gc() #garbage collection to free up R memory
library(data.table)
library(ggplot2)
library(caret)
library(randomForest)
library(Boruta)
#test
Sys.setenv(TZ='UTC')
```
#Load in site data & make training datasets
```{r,error=FALSE,warning=FALSE}
cf = fread("C:/Users/kkent/Documents/Council Data/Council BASE gapfilling/council_2016_2023_era.csv") #using the re-cleaned df from Dani
#subset out by which has complete air t data
cf = cf[complete.cases(cf$airt.eramod),]
```
#Create subset of data to use for RF training -- use only ERA5 variables for training since RF needs a continuous dataset (no NAs)
```{r}
#create a sub data frame of the variables you want for a neural network (random forest)
#KK Nov 27: added in u, v, wd, and cardinal direction from era df; also changed FC and FCH4 to FC.c, FCH4.c, and SWC_1_1_1.c to use re-cleaned data --> adj to WD from actual data and Cardinal_Direction from actual data as these seem to be era5 data adj by actual data and this uses the actual SWC data from site.
#####
#create subset of the variables wanted for training RF
ncf = data.frame(cf$date,cf$FC.c,cf$FCH4.c,
cf$airt.eramod, cf$wd, cf$cardinal_direction, cf$rh.eramod,cf$rad.eramod,
cf$ws.eramod,cf$tsoil.eramod,#cf$SWC_1_1_1.c,cf$SWC_2_1_1.c,
cf$SWC_3_1_1.c, #SWC 3 = the tussock location
cf$h.eramod,cf$le.eramod)
#SWC also broken up by location
# SWC_1_1_1 % Soil water content (15cm depth) – margin pond
# SWC_2_1_1 % Soil water content (15cm depth) – lichen/berries
# SWC_3_1_1 % Soil water content (15cm depth) - tussock ***** switch to using this one ** (used SWC1 before, by margin pond, switching to SWC3, tussock)
#another way to subset a new df
# library(dplyr)
#
# # Select specific variables from 'cf' and store them in 'ncf'
# ncf <- cf %>%
# select(date, FC.c, FCH4.c, airt.eramod, WD, Cardinal_Direction, rh.eramod, rad.eramod, ws.eramod, tsoil.eramod, SWC_1_1_1.c, h.eramod, le.eramod)
# View the new dataframe
head(ncf)
#rename for easier names
names(ncf) = c('date','nee',"fch4",'tair', 'wd', 'cardDir','rh','rg','ws','tsoil','swc','h','le')
#make the ERA5 wd numeric
# #can't have NAs, so need to remove them or use the ERA5 data -- tried this, model still didn't want to run...
# ncf$wd <- na.omit(ncf$wd)
# ncf$cardDir <- na.omit(ncf$cardDir)
#make card dir as factor - 8 levels
ncf$cardDir <- as.factor(ncf$cardDir)
```
#Calc VPD from air temp and RH
```{r}
#calculate VPD from air t and RH
svp = 610.7*10^((7.5*ncf$tair)/(237.3+ncf$tair))
ncf$vpd = ((100 - ncf$rh)/100)*svp
```
#create training dataset and testing dataset
```{r}
#*KK edit 12/27/24 - changing to SWC3 to reflect tussock
set.seed(123)#sets the start point of models, good for repeatability
cc = ncf[complete.cases(ncf$swc),]#create a gap free data set of the target variable
#use 80% of data set as training set and 20% as test set
sample = sample(c(TRUE, FALSE), nrow(cc), replace=TRUE, prob=c(0.8,0.2))
train = cc[sample, ]
test = cc[!sample, ]
#compare to ensure all data sets are representative of total data
hist(cc$swc)
hist(train$swc)
hist(test$swc)
#run random forest to predict missing SWC values - leave out or include wd? included wd here*
rf.swc = randomForest(formula = swc ~ tair + rh + rg + ws + wd + tsoil + vpd + le + h,data = train,ntree = 150)
#tried various tree numbers, no sig improvement after 300....very minimal diff between 150 and 300, and 150 looks slightly better.
#predict it on the full data set
swc.rf = predict(object = rf.swc,newdata = ncf)
ncf$swc.rf = swc.rf #add the variable to the main data frame
#run random forest to predict missing SWC values - used wd and carDir --> very similar results to when just using wd
# rf.swc2 = randomForest(formula = swc ~ tair + rh + rg + ws + wd + cardDir + tsoil + vpd + le + h,data = train,ntree = 150)
# #tried various tree numbers, no sig improvement after 300....very minimal diff between 150 and 300, and 150 looks slightly better.
#
# #predict it on the full data set
# swc.rf2 = predict(object = rf.swc2,newdata = ncf)
# ncf$swc.rf2 = swc.rf2 #add the variable to the main data frame
```
#Check out the SWC gap filling
```{r}
#time series plot of gap filled vs real
ggplot(data = ncf)+
geom_point(aes(date,swc.rf*100,col='RF'))+
geom_point(aes(date,swc*100,col='Measured'))+
scale_y_continuous("SWC (%)")
#validation dataset from only test data
val = merge(test,ncf,by = "date",all.x = T)
ggplot(data = val,aes(swc.rf,swc.x))+theme_bw()+geom_abline(slope = 1,intercept = 0,col='red')+
geom_point(alpha=0.1)+
scale_x_continuous(limits = c(0,70),"RF SWC (%)")+
scale_y_continuous(limits = c(0,70),"Measured SWC (%)")
#summary stats on gap filling
summary(lm(val$swc.x ~ val$swc.rf)) #R2 = 0.97, slope = 1.02
#create final gap free soil moisture
ncf$swc = ifelse(is.na(ncf$swc),ncf$swc.rf,ncf$swc)
#results from using cardinal directions as well as wd in RF --> very similar results, using the version with just wd for simplicity
#time series plot of gap filled vs real
# ggplot(data = ncf)+
# geom_point(aes(date,swc.rf2*100,col='RF'))+
# geom_point(aes(date,swc*100,col='Measured'))+
# scale_y_continuous("SWC (%)")
#
# #validation dataset from only test data
# val = merge(test,ncf,by = "date",all.x = T)
#
# ggplot(data = val,aes(swc.rf2,swc.x))+theme_bw()+geom_abline(slope = 1,intercept = 0,col='red')+
# geom_point(alpha=0.1)+
# scale_x_continuous(limits = c(0,70),"RF SWC (%)")+
# scale_y_continuous(limits = c(0,70),"Measured SWC (%)")
#
# #summary stats on gap filling
# summary(lm(val$swc.x ~ val$swc.rf2)) #R2 = 0.945
#create final gap free soil moisture
#ncf$swc = ifelse(is.na(ncf$swc),ncf$swc.rf,ncf$swc)
```
#NEE *********************
####Look at range of data
```{r}
ggplot(data = ncf, aes(x=date, y = nee)) + geom_point()
```
####Create NEE dataset & Examine variable importance
```{r}
set.seed(123)
cc = ncf[complete.cases(ncf$nee),] #complete NEE data only
cc = subset(cc,cc$nee < 12 & cc$nee > -14) #the range of nee for ncf dataset is -16 - 10 and there are no real major outliers, so this range captures the data well, otherwise use these to set limits on the spread of data to exclude large outliers and tidy up the model - may need to adjust these limits based on your data*
#this attempts to establish which of the variables have the most important influence on NEE - kyle commented this out, may not be necessary & can take a little while to run but cool to see
#added in wd (degrees)
boruta = Boruta(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,data = cc,doTrace = 2,maxRuns = 100)
#plots the variable importance from boruta
plot(boruta,las = 2)
```
#Let's try to fill NEE with random forest
####Create training and testing dataset
```{r,error=FALSE,warning=FALSE}
#use 80% of data set as training set and 20% as test set
sample.nee = sample(c(TRUE, FALSE), nrow(cc), replace=TRUE, prob=c(0.8,0.2))
train.nee = cc[sample.nee, ]
test.nee = cc[!sample.nee, ]
#Make sure the patterns here look similar so you know the training and testing datasets are representative of the overall dataset
hist(cc$nee)
hist(train.nee$nee)
hist(test.nee$nee)
#sum(is.na(cc$swc)) #make sure there are no NAs in dataset or RF model won't work
```
#TO Do -- add in rf n=1000 data into merged dataset to plot and compare to tree = 150 -- re-run processing steps with new SWC -- re-run processing steps for just Methane based on pond margin, or just 2019 data??
#Find the optimum number of trees for the RF model using minimum out-of-bag error (OOB error)
#Can't get this to work, look over it with Kyle?
```{r}
library(randomForest)
rf_model_test <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = 100,
importance = TRUE)
# Check the structure of err.rate
str(rf_model_test$err.rate) #returns NULL, which means error rate is possibly not included or not in the model config *Ask Kyle about this
#Code if error rate worked (but does not, so see alt code below)
# Define a range of ntree values
ntree_values <- seq(100, 1000, by = 50) # Adjust the range and step as needed
oob_errors <- numeric(length(ntree_values)) # Initialize a vector for storing OOB errors
# Loop through ntree values and extract the final OOB error -- can't get this portion to work
for (i in seq_along(ntree_values)) {
rf_model_ntree <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = ntree_values[i],
importance = TRUE)
# Extract the final OOB error (last row corresponds to the last tree)
oob_errors[i] <- rf_model_ntree$err.rate[nrow(rf_model_ntree$err.rate), "OOB"]
}
# Find the optimal number of trees
optimal_ntree <- ntree_values[which.min(oob_errors)]
# Plot the OOB errors
plot(ntree_values, oob_errors, type = "b", pch = 19, col = "blue",
xlab = "Number of Trees (ntree)", ylab = "OOB Error",
main = "Optimal ntree for Random Forest")
abline(v = optimal_ntree, col = "red", lty = 2) # Add a vertical line for the optimal ntree
# Print the optimal number of trees
cat("Optimal number of trees:", optimal_ntree, "\n")
#Alt code for calculating OOB error ---> this was running for 24 hours and was still not complete, trying the alternate code in the chunk below to try and optimize
# Define a range of ntree values
ntree_values <- seq(100, 1000, by = 50)
oob_errors <- numeric(length(ntree_values)) # Initialize a vector for OOB errors
# Loop through ntree values and calculate OOB error
for (i in seq_along(ntree_values)) {
rf_model <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = ntree_values[i],
importance = TRUE)
# Predict OOB data
oob_pred <- predict(rf_model, train.nee, type = "response", predict.all = TRUE)
# Calculate OOB error as RMSE
oob_error <- sqrt(mean((train.nee$nee - oob_pred$aggregate)^2, na.rm = TRUE))
oob_errors[i] <- oob_error
}
# Find the optimal number of trees
optimal_ntree <- ntree_values[which.min(oob_errors)]
# Plot the OOB errors
plot(ntree_values, oob_errors, type = "b", pch = 19, col = "blue",
xlab = "Number of Trees (ntree)", ylab = "OOB RMSE",
main = "Optimal ntree for Random Forest")
abline(v = optimal_ntree, col = "red", lty = 2) # Add a vertical line for the optimal ntree
# Print the optimal number of trees
cat("Optimal number of trees:", optimal_ntree, "\n")
```
#Parallel progress for calculating OOB error based on number of RF trees
```{r}
library(doSNOW)
library(foreach)
library(parallel)
library(doParallel)
# Define the number of trees and prepare progress bar
ntree_values <- seq(100, 1000, by = 100) # Example range of trees
pb <- txtProgressBar(min = 0, max = length(ntree_values), style = 3)
# Initialize cluster and progress bar
cl <- makeCluster(detectCores() - 1) # Use all but one core
registerDoSNOW(cl)
# Function to run Random Forest and track progress
oob_errors <- foreach(ntree = ntree_values, .combine = c, .packages = "randomForest", .options.snow = list(progress = function(n) setTxtProgressBar(pb, n))) %dopar% {
model <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = ntree,
importance = TRUE)
tail(model$err.rate[, 1], 1) # Get the final OOB error
}
# Close progress bar and cluster
close(pb)
stopCluster(cl)
# Plot OOB errors vs. number of trees
plot(ntree_values, oob_errors, type = "b", xlab = "Number of Trees", ylab = "OOB Error") #plot does not look correct
#checking out the error
print(ntree_values) # correct 100 - 1000 by 100
print(oob_errors) #returns "NULL"
length(ntree_values) == length(oob_errors) # null, oob_errors doesn't seem to exist
#testing OOB error
library(foreach)
result <- foreach(i = 1:5, .combine = c) %dopar% {
i^2
}
print(result) # Should return 1, 4, 9, 16, 25
#checking parallel backend
oob_errors <- foreach(ntree = ntree_values, .combine = c,
.packages = "randomForest",
.options.snow = list(progress = function(n) setTxtProgressBar(pb, n))) %dopar% {
print(ntree) # Check which ntree value is being processed
model <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = ntree,
importance = TRUE)
tail(model$err.rate[, 1], 1) # Return the final OOB error
}
#oob_errors still returns NULL
#testing to see if error rate is in the rf model
test_model <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,
data = train.nee,
ntree = 100,
importance = TRUE)
print(test_model$err.rate) # Check if OOB error rates are available --> returns NULL, so definitely not working, might not be included in the model?
#double checking predictor variables have no NA's
colSums(is.na(train.nee[, c("tair", "rh", "rg", "ws", "wd", "tsoil", "swc", "vpd", "le", "h")])) #--> no NAs or missing values
```
#Sticking with the original 150 for now - will revisit after disc with Kyle
####1000 trees for RF -- have the original 150 trees in code chunks below...
```{r}
# Train the Random Forest model with a large number of trees
rf_model_1000 <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h, data = train.nee, ntree = 1000, importance = TRUE)
pnee.1000 = predict(object = rf_model_1000,newdata = ncf)
cf$rf_model_1000 = pnee.1000
# Extract variable importance
importance_values_1000 <- importance(rf_model_1000, type = 1) # type = 1 for %IncMSE
# Print the importance values
print(importance_values_1000)
#Results:
# %IncMSE
# tair 44.60310
# rh 80.63140
# rg 47.78442
# ws 106.25952 --> 3
# wd 124.84380 --> 2
# tsoil 81.23716
# swc 139.01015--> 1
# vpd 53.10850
# le 72.99186
# h 85.80990
# Plot the importance values
varImpPlot(rf_model_1000, type = 1, main = "Variable Importance (%IncMSE)")
```
#Training RF; *ended up using the model with "wd" - changed SWC to tussock location
```{r}
#original, no wind
rfnee = randomForest(formula = nee ~ tair + rh + rg + ws + tsoil + swc + vpd + le + h,data = train.nee,ntree = 150,importance=T)
pnee = predict(object = rfnee,newdata = ncf)
cf$rfnee = pnee
# Extract variable importance
importance_values1 <- importance(rfnee, type = 1) # type = 1 for %IncMSE
# Print the importance values
print(importance_values1)
# Plot the importance values
varImpPlot(rfnee, type = 1, main = "Variable Importance (%IncMSE)")
#Results:
# %IncMSE
# tair 16.62267
# rh 34.39371
# rg 18.50870
# ws 44.29823 --> 2
# tsoil 29.00674
# swc 57.89705 --> 1
# vpd 17.67771
# le 28.87872
# h 36.95337 --> 3
#using just wd (no cardDir) ---- using this one **************
rfnee.2 = randomForest(formula = nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,data = train.nee,ntree = 150,importance=T)
pnee.2 = predict(object = rfnee.2,newdata = ncf)
cf$rfnee.2 = pnee.2
#trying with just cardinal directions (no wd)
rfnee.3 = randomForest(formula = nee ~ tair + cardDir + rh + rg + ws + cardDir + tsoil + swc + vpd + le + h,data = train.nee,ntree = 150,importance=T)
pnee.3 = predict(object = rfnee.3,newdata = ncf)
cf$rfnee.3 = pnee.3
```
#Checking variable importance using IncMSE
```{r}
#Higher %IncMSE: A higher %IncMSE value means that the variable is more important for the model. Removing this variable would result in a larger increase in the model's prediction error.
#Lower %IncMSE: A lower %IncMSE value means that the variable is less important. Removing this variable would result in a smaller increase in the model's prediction error.
# Train the Random Forest model - same as rfnee.2 in code chunk above
rf_model_nee <- randomForest(nee ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h, data = train.nee, importance = TRUE, ntree = 150)
# Extract variable importance
importance_values <- importance(rf_model_nee, type = 1) # type = 1 for %IncMSE
# Print the importance values
print(importance_values)
# Plot the importance values
varImpPlot(rf_model_nee, type = 1, main = "Variable Importance (%IncMSE)")
#Most important variables for NEE are wd (49.81), swc (49.11), ws(41.35)
```
#RSME of RF NEE ntree 150 and ntree 1000
```{r}
#original, no wd, ntree = 150
rmse_nee <- sqrt(mean((test.data.nee$FC.c - test.data.nee$rfnee)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 150 ", rmse_nee)) #RSME = 1.51710632421705"
#with wd, ntree 150
rmse_nee150 <- sqrt(mean((test.data.nee$FC.c - test.data.nee$rfnee.2)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 150 ", rmse_nee150)) #RSME 1.49099361362971
#with wd, ntree 1000
rmse_nee1000 <- sqrt(mean((test.data.nee$FC.c - test.data.nee$rf_model_1000)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 1000 ", rmse_nee1000)) #RSME 1.49090148870823" --> very slightly better fit
#with Card Dir, ntree 150
rmse_nee_CD <- sqrt(mean((test.data.nee$FC.c - test.data.nee$rfnee.3)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 150 ", rmse_nee_CD)) #RSME 1.49438824446229"
```
####Time series plots
```{r,error=FALSE,warning=FALSE}
#This overlays the random forest modeled data points with the real data points so you can observe model agreement
# #plot of certain timeframe - no wind
# ggplot(data = cf)+theme_bw()+
# geom_point(aes(date,rfnee,col='RF'),alpha=0.2)+
# geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
# scale_x_datetime(limits = as.POSIXct(c("2019-06-15","2019-06-30")))
# #plot of all years
# ggplot(data = cf)+theme_bw()+
# geom_point(aes(date,rfnee,col='RF'),alpha=0.2)+
# geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
# geom_hline(yintercept = 0)
#tree = 150; plot of certain timeframes using wd (degrees) -- used this version of the model
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfnee.2,col='RF'),alpha=0.2)+
geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
labs(title = "with WD (degrees), ntree=150")+
scale_x_datetime(limits = as.POSIXct(c("2019-06-15","2019-06-30")))
#plot of all years; tree = 150
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfnee.2,col='RF'),alpha=0.2)+
geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
geom_hline(yintercept = 0)+
labs(title = "with WD (degrees), ntree=150")
#tree = 1000; plot of certain timeframes using wd (degrees) -- used this version of the model
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rf_model_1000,col='RF'),alpha=0.2)+
geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
labs(title = "with WD (degrees), ntree=1000")+
scale_x_datetime(limits = as.POSIXct(c("2019-06-15","2019-06-30")))
#plot of all years; tree = 1000
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rf_model_1000,col='RF'),alpha=0.2)+
geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
geom_hline(yintercept = 0)+
labs(title = "with WD (degrees), ntree = 1000")
#ntree = 15- and ntree = 1000 look very similar*
# #plot of certain timeframe - cardinal directions, no wd
# ggplot(data = cf)+theme_bw()+
# geom_point(aes(date,rfnee.3,col='RF'),alpha=0.2)+
# geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
# labs(title = "Cardinal Direction (no wd)")+
# scale_x_datetime(limits = as.POSIXct(c("2019-06-15","2019-06-30")))
# #plot of all years
# ggplot(data = cf)+theme_bw()+
# geom_point(aes(date,rfnee.3,col='RF'),alpha=0.2)+
# geom_point(aes(date,FC.c,col='Real'),alpha=0.2)+
# geom_hline(yintercept = 0)+
# labs(title = "Cardinal Direction (no wd)")
#Results very similar, also seems to be predicting slightly lower/more conservative nee compared to site data
```
####Validation - looking at agreement between modeled data and real data
```{r,error=FALSE,warning=FALSE}
#merge the predicted datasets with the orig to validate
test.data.nee = merge(test.nee,cf,by = 'date',all.x = T)
#changed to FC.c to use re-cleaned variable
#original model, no wind direction
summary(lm(test.data.nee$FC.c ~ test.data.nee$rfnee)) #R2 = 0.63, slope = 1.01
# councilnee = ggplot(data = test.data.nee,aes(rfnee,FC.c))+theme_bw()+
# geom_hline(yintercept = 0,lty=2)+
# geom_vline(xintercept = 0,lty=2)+
# geom_point(alpha=0.2)+
# scale_fill_viridis_c()+
# geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
# scale_x_continuous(limits = c(-20,20),expression('Random Forest NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# scale_y_continuous(limits = c(-20,20),expression('Eddy Covariance NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.64"),size = 3)+
# annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 1.02"),size = 3)+
# theme(text = element_text(size = 8)) +
# labs(title = "Orig - no wind")
#
# councilnee
#Added wd (degrees) - RF ntree = 150
summary(lm(test.data.nee$FC.c ~ test.data.nee$rfnee.2)) #R2 = 0.646, slope = 1.02; p<0.001
councilnee2 = ggplot(data = test.data.nee,aes(rfnee.2,FC.c))+theme_bw()+
geom_hline(yintercept = 0,lty=2)+
geom_vline(xintercept = 0,lty=2)+
geom_point(alpha=0.2)+
scale_fill_viridis_c()+
geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
scale_x_continuous(limits = c(-20,20),expression('Random Forest NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
scale_y_continuous(limits = c(-20,20),expression('Eddy Covariance NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.65"),size = 3)+
annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 1.02"),size = 3)+
theme(text = element_text(size = 8)) +
labs(title = "with WD (degrees), ntree = 150")
councilnee2
#Added wd (degrees) - RF ntree = 1000
summary(lm(test.data.nee$FC.c ~ test.data.nee$rf_model_1000)) #R2 = 0.648, slope = 1.02
councilnee3 = ggplot(data = test.data.nee,aes(rf_model_1000,FC.c))+theme_bw()+
geom_hline(yintercept = 0,lty=2)+
geom_vline(xintercept = 0,lty=2)+
geom_point(alpha=0.2)+
scale_fill_viridis_c()+
geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
scale_x_continuous(limits = c(-20,20),expression('Random Forest NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
scale_y_continuous(limits = c(-20,20),expression('Eddy Covariance NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.65"),size = 3)+
annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 1.02"),size = 3)+
theme(text = element_text(size = 8)) +
labs(title = "with WD (degrees), ntree = 1000")
councilnee3
#NOTE: ntree = 150 and ntree = 1000 appear essentially the same
#used cardinal direction only
summary(lm(test.data.nee$FC.c ~ test.data.nee$rfnee.3)) #R2 = 0.646, slope = 1.02
#
# councilnee3 = ggplot(data = test.data.nee,aes(rfnee.3,FC.c))+theme_bw()+
# geom_hline(yintercept = 0,lty=2)+
# geom_vline(xintercept = 0,lty=2)+
# geom_point(alpha=0.2)+
# scale_fill_viridis_c()+
# geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
# scale_x_continuous(limits = c(-20,20),expression('Random Forest NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# scale_y_continuous(limits = c(-20,20),expression('Eddy Covariance NEE ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.65"),size = 3)+
# annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 1.03"),size = 3)+
# theme(text = element_text(size = 8))+
# labs(title = "with wind cardinal direction")
#
# councilnee3
```
#Methane
####Examine variable importance for FCH4
```{r}
set.seed(123)
cc = ncf[complete.cases(ncf$fch4),]
#added wd (degrees)
boruta = Boruta(fch4 ~ tair + rh + rg + ws + wd + tsoil + swc + vpd + le + h,data = cc,doTrace = 2,maxRuns = 100)
boruta$finalDecision #indicates all above are confirmed important variables via a list
plot(boruta,las = 2) #plots the variables; green = important, yellow = tentative, red = not important
#added wd - cardinal direction
# boruta2 = Boruta(fch4 ~ tair + rh + rg + ws + cardDir + tsoil + swc + vpd + le + h,data = cc,doTrace = 2,maxRuns = 100)
# plot(boruta2,las = 2)
```
#try random forest for CH4
####Subset training and testing datasets
```{r,error=FALSE,warning=FALSE}
#looking at range of data
ggplot(data = cc, aes(x=date, y = fch4)) + geom_point()
#KK edit 11.12.2024 - adding trees to see if the model can constrain winter CH4 better
#use 80% of data set as training set and 20% as test set, expanded to try to represent the larger points better
#cc = subset(cc,cc$fch4 < 50) #50 cuts off what looks like real data for one of the years...adj to perhaps 60
cc = subset(cc,cc$fch4 < 60)
cc = subset(cc,cc$fch4 > -25)
sample.ch4 = sample(c(TRUE, FALSE), nrow(cc), replace=TRUE, prob=c(0.8,0.2))
train.ch4 = cc[sample.ch4, ]
test.ch4 = cc[!sample.ch4, ]
hist(cc$fch4)
hist(train.ch4$fch4)
hist(test.ch4$fch4)
summary(cc$fch4)
summary(train.ch4$fch4)
summary(test.ch4$fch4)
```
#### RF model for CH4
```{r}
#no wind direction
rfch4 = randomForest(formula = fch4 ~ tair + rh + rg + ws + tsoil + vpd + swc + h + le,data = train.ch4,ntree = 150)
pch4 = predict(object = rfch4,newdata = ncf)
cf$rfch4 = pch4
#added wd (degrees) -- used this version of the model*********** ntree = 150
rfch4.2 = randomForest(formula = fch4 ~ tair + rh + rg + ws + wd + tsoil + vpd + swc + h + le,data = train.ch4,ntree = 150)
pch4.2 = predict(object = rfch4.2,newdata = ncf)
cf$rfch4.2 = pch4.2
#added wd (degrees) -- trying with ntree = 1000
rfch4.1000 = randomForest(formula = fch4 ~ tair + rh + rg + ws + wd + tsoil + vpd + swc + h + le,data = train.ch4,ntree = 1000)
pch4.1000 = predict(object = rfch4.1000,newdata = ncf)
cf$rfch4.1000 = pch4.1000
#added cardinal direction (no wd)
# rfch4.3 = randomForest(formula = fch4 ~ tair + rh + rg + ws + cardDir + tsoil + vpd + swc + h + le,data = train.ch4,ntree = 150)
#
#
# pch4.3 = predict(object = rfch4.3,newdata = ncf)
#
# cf$rfch4.3 = pch4.3
#added cardinal direction and wd
# rfch4.4 = randomForest(formula = fch4 ~ tair + rh + rg + ws + wd + cardDir + tsoil + vpd + swc + h + le,data = train.ch4,ntree = 150)
#
#
# pch4.4 = predict(object = rfch4.4,newdata = ncf)
#
# cf$rfch4.4 = pch4.4
```
#IncMSE for CH4 variable importance
```{r}
#Higher %IncMSE: A higher %IncMSE value means that the variable is more important for the model. Removing this variable would result in a larger increase in the model's prediction error.
#Lower %IncMSE: A lower %IncMSE value means that the variable is less important. Removing this variable would result in a smaller increase in the model's prediction error.
#Re-running the RF models rom above here as well since it seems the importance_values don't calc properly unless its done directly following the model run
# Train the Random Forest model - same as rfch4.2 model above
rf_model_CH4 = randomForest(formula = fch4 ~ tair + rh + rg + ws + wd + tsoil + vpd + swc + h + le, data = train.ch4,importance = TRUE, ntree = 150)
# Extract variable importance
importance_values <- importance(rf_model_CH4, type = 1) # type = 1 for %IncMSE
# Print the importance values
print(importance_values)
# Plot the importance values
varImpPlot(rf_model_CH4, type = 1, main = "FCH4: Variable Importance (%IncMSE), ntree = 150")
#Results:
# %IncMSE
# tair 28.16018
# rh 35.06199
# rg 23.75861
# ws 45.06025 --> 3
# wd 53.83652 --> 2
# tsoil 40.79492
# vpd 25.60573
# swc 70.87396 --> 1
# h 31.45271
# le 31.76639
#Now incMSE for ntree = 1000
# Train the Random Forest model - same as rfch4.1000 model above
rf_model_CH4_1000 = randomForest(formula = fch4 ~ tair + rh + rg + ws + wd + tsoil + vpd + swc + h + le, data = train.ch4,importance = TRUE, ntree = 1000)
# Extract variable importance
importance_values2 <- importance(rf_model_CH4_1000, type = 1) # type = 1 for %IncMSE
# Print the importance values
print(importance_values2)
# Plot the importance values
varImpPlot(rf_model_CH4_1000, type = 1, main = "FCH4: Variable Importance (%IncMSE), ntree = 1000")
#Results:
# %IncMSE
# tair 71.23076
# rh 99.79669
# rg 62.36291
# ws 127.36021
# wd 150.88643
# tsoil 103.55460
# vpd 62.92133
# swc 183.88281
# h 81.56650
# le 73.25064
```
#Validation - looking at agreement between modeled data and real data
## Methane - no wind direction, RF tree = 150
```{r,error=FALSE,warning=FALSE}
#changed to FCH4.c to use the re-cleaned data - orig model, no wind direction, ntree = 150
test.data.ch4 = merge(test.ch4,cf,by = 'date',all.x = T)
councilch4 = ggplot(data = test.data.ch4,aes(rfch4,FCH4.c))+theme_bw()+
geom_hline(yintercept = 0,lty=2)+
geom_vline(xintercept = 0,lty=2)+
geom_point(alpha=0.2)+
scale_fill_viridis_c()+
geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
scale_x_continuous(limits = c(-50,150),expression('Random Forest '*CH[4]~flux~" ("*mu*mol~CH[4]~m^-2~s^-1*')'))+
scale_y_continuous(limits = c(-50,150),expression('Eddy Covariance '*CH[4]~flux~" ("*mu*mol~CH[4]~m^-2~s^-1*')'))+
annotate(geom = 'text',x = 100,y = 20,label = expression(R^2~"= 0.44"),size = 3)+
annotate(geom= 'text',x = 100,y = 10,label = expression(Slope~"= 1.02"),size = 3)+
theme(text = element_text(size = 8))
councilch4
summary(lm(test.data.ch4$FCH4.c ~ test.data.ch4$rfch4)) #R2 = 0.51, slope = 1.04, p<0.001
```
##Methane - adding wind direction, RF tree = 150 & 1000
```{r,error=FALSE,warning=FALSE}
#added wd (degrees) -- ntree = 150
#test.data.ch4 = merge(test.ch4,cf,by = 'date',all.x = T) #don't need this here if merged in code chunk above
summary(lm(test.data.ch4$FCH4.c ~ test.data.ch4$rfch4.2)) #R2 = 0.548, slope = 1.0457; Residual standard error: 5.878 on 4441 degrees of freedom
councilch4.2= ggplot(data = test.data.ch4,aes(rfch4.2,FCH4.c))+theme_bw()+
geom_hline(yintercept = 0,lty=2)+
geom_vline(xintercept = 0,lty=2)+
geom_point(alpha=0.2)+
scale_fill_viridis_c()+
geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
scale_x_continuous(limits = c(-50,150),expression('Random Forest FCH4 - with WD ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
scale_y_continuous(limits = c(-50,150),expression('Eddy Covariance FCH4 ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
annotate(geom = 'text',x = 50, y = -8,label=expression(R^2~"= 0.55"),size = 3)+
annotate(geom = 'text',x = 50,y = -17,label=expression(Slope~"= 1.04"),size = 3)+
theme(text = element_text(size = 8)) +
labs(title = "CH4 with WD (degrees), ntree = 150")
councilch4.2
#using wd and ntree = 1000
summary(lm(test.data.ch4$FCH4.c ~ test.data.ch4$rfch4.1000)) #R2 = 0.5497, slope = 1.050; Residual standard error: 5.868 on 4441 degrees of freedom
councilch4.1000= ggplot(data = test.data.ch4,aes(rfch4.2,FCH4.c))+theme_bw()+
geom_hline(yintercept = 0,lty=2)+
geom_vline(xintercept = 0,lty=2)+
geom_point(alpha=0.2)+
scale_fill_viridis_c()+
geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
scale_x_continuous(limits = c(-50,150),expression('Random Forest FCH4 - with WD ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
scale_y_continuous(limits = c(-50,150),expression('Eddy Covariance FCH4 ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
annotate(geom = 'text',x = 50, y = -8,label=expression(R^2~"= 0.55"),size = 3)+
annotate(geom = 'text',x = 50,y = -17,label=expression(Slope~"= 1.05"),size = 3)+
theme(text = element_text(size = 8))+
labs(title = "CH4 with WD (degrees), ntree = 1000")
councilch4.1000
#NOTE: ntree = 150 and ntree = 1000 appear essentially the same*
#used cardinal direction
# summary(lm(test.data.ch4$FCH4.c ~ test.data.ch4$rfch4.3)) #R2 =0.46, slope = 0.99
#
# councilch4= ggplot(data = test.data.ch4,aes(rfch4.3,FCH4.c))+theme_bw()+
# geom_hline(yintercept = 0,lty=2)+
# geom_vline(xintercept = 0,lty=2)+
# geom_point(alpha=0.2)+
# scale_fill_viridis_c()+
# geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
# scale_x_continuous(limits = c(-50,150),expression('Random Forest FCH4 with card Dir ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# scale_y_continuous(limits = c(-50,150),expression('Eddy Covariance FCH4 ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.46"),size = 3)+
# annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 0.99"),size = 3)+
# theme(text = element_text(size = 8))
#
# councilch4.3
#used cardinal direction and wd
# summary(lm(test.data.ch4$FCH4.c ~ test.data.ch4$rfch4.4)) #R2 =0.46, slope = 1.00
#
# councilch4= ggplot(data = test.data.ch4,aes(rfch42,FCH4.c))+theme_bw()+
# geom_hline(yintercept = 0,lty=2)+
# geom_vline(xintercept = 0,lty=2)+
# geom_point(alpha=0.2)+
# scale_fill_viridis_c()+
# geom_abline(slope = 1,intercept = 0,col='red',lty=1)+
# scale_x_continuous(limits = c(-50,150),expression('Random Forest FCH4 with cardDir and WD ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# scale_y_continuous(limits = c(-50,150),expression('Eddy Covariance FCH4 ('*mu*mol~CO[2]~m^-2~s^-1*')'))+
# annotate(geom = 'text',x = 6, y = -8,label=expression(R^2~"= 0.46"),size = 3)+
# annotate(geom = 'text',x = 6,y = -10,label=expression(Slope~"= 1.00"),size = 3)+
# theme(text = element_text(size = 8))
#
# councilch4.4
```
#RSME of RF ntree 150 and ntree 1000
```{r}
#ntree 150, no wd
rmse_noWD<- sqrt(mean((test.data.ch4$FCH4.c - test.data.ch4$rfch4)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 150 ", rmse_noWD)) #RSME 6.11332419169469"
#ntree 150, using wd
rmse150 <- sqrt(mean((test.data.ch4$FCH4.c - test.data.ch4$rfch4.2)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 150 ", rmse150)) #RSME 5.88378022838222"
#ntree 1000, using wd
rmse1000 <- sqrt(mean((test.data.ch4$FCH4.c - test.data.ch4$rfch4.1000)^2, na.rm = TRUE))
print(paste("RMSE: ntree = 1000 ", rmse1000)) #RSME 5.8754464185906" --> very slightly better fit
```
#Plots and Validation
```{r,error=FALSE,warning=FALSE}
#changed to FCH4.c to use re-cleaned data
#Plots modeled vs real data among several dates to observe agreement
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2019 July 1 - July 30")+
scale_x_datetime(limits = as.POSIXct(c("2019-07-01","2019-07-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2017")+
scale_x_datetime(limits = as.POSIXct(c("2017-01-01","2017-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2018")+
scale_x_datetime(limits = as.POSIXct(c("2018-01-01","2018-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2019")+
scale_x_datetime(limits = as.POSIXct(c("2019-01-01","2019-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2020")+
scale_x_datetime(limits = as.POSIXct(c("2020-01-01","2020-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2021")+
scale_x_datetime(limits = as.POSIXct(c("2021-01-01","2021-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2022")+
scale_x_datetime(limits = as.POSIXct(c("2022-01-01","2022-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.5)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.5)+
labs(title = "FCH4 2023")+
scale_x_datetime(limits = as.POSIXct(c("2023-01-01","2023-12-30")))
# scale_y_continuous(limits = c(-0.05,0.05))
ggplot(data = cf)+theme_bw()+
geom_point(aes(date,rfch4.2,col='RF'),alpha=0.2)+
geom_point(aes(date,FCH4.c,col='Real'),alpha=0.2)
```
#Observing agreement between modeled and real NEE and CH4 flux data
```{r}
library(cowplot)
#png(filename = 'C:/Users/karndt.WHRC/Desktop/sites/YKD/plots/2023 yk2unburned gapfilling validation.png',width = 6,height = 4,units = 'in',res = 1000)
plot_grid(councilnee2,councilch4.2)
#dev.off()
```
#Rename refnee.2 and fch4.2
```{r}
library(dplyr)
#removing all the other exploratory rf nee and rf ch4 model results from the cf dataset to leave the two chosen models (which include the wd in degrees, SWC for the tussock location, and ntree=150))
library(dplyr)