-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMW_SDDmodeling.Rmd
142 lines (116 loc) · 5.22 KB
/
MW_SDDmodeling.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
---
title: "MW_SDDModeling"
author: "Janneke Hille Ris Lambers, Aji John, Meera Sethi, Elli Theobald"
date: "12/15/2020"
output: html_document
---
#Setup for R script and R markdown
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
#Read in SDD data (all sources)
This code chunk reads in SDD data from several data sets: MeadoWatch, Forest Demography, Elli's phenology plots, and Meera's phenology / herbivory plots. Some observations are removed due to data issues (those with faulty hobo's) in Meera and the FD dataset. Additionally, data from years where MeadoWatch data were not collected are removed from Elli's data. Finally, the FD data set was pared down to only include higher elevation stands (above 1000 meters) and stands in the SW and NE parts of the park (where MeadoWatch trails are located).
```{r}
# Read in SDD data from MeadoWatch
MW_SDD <- read.csv("data/MeadoWatchSDD.csv", header=TRUE)
MW_SDD_obs <- MW_SDD[is.na(MW_SDD$SDD)==FALSE,]
# Read in Forest Demo data; remove rows with HOBO issues
FD_SDD <- read.csv("data/ForestDemoSDD.csv", header=TRUE)
FD_SDD <- FD_SDD[FD_SDD$Issue=="",]
# Extract stands closer to GB and RL, at higher elevation
FD_stands <- substr(FD_SDD$Site_Loc, 1, 4)
relevant_stands <- c("AB08", "AE10", "AM16", "AR07","AV06","PARA",
"PP17", "SPRY", "SUNR")
standskeep <- FD_stands %in% relevant_stands
FD_SDD <- FD_SDD[standskeep,]
# Read in MLS data; remove duplicate rows and plots with 1 measurement
MLS_SDD <- read.csv("data/MeeraSDD.csv", header=TRUE)
MLS_SDD <- MLS_SDD[MLS_SDD$duplicate=="",]
MLS_SDD <- MLS_SDD[MLS_SDD$single=="",]
# Read in EJT data; remove 2010-2012 data
EJT_SDD <- read.csv("data/ElliSDD.csv", header=TRUE)
EJT_SDD <- EJT_SDD[EJT_SDD$Year>2012,]
```
#Create model of SDD vs year and plot
This code chunk fits a simple linear regression to the SDD data, relating the SDD observations to location and year. Includes creation of Fig B1 (appendix)
```{r}
# Now merge data
Site_Loc <- as.factor(c(MW_SDD_obs$Site_Loc, FD_SDD$Site_Loc,
MLS_SDD$Site_Loc, EJT_SDD$Site_Loc))
Year <- as.factor(c(MW_SDD_obs$Year, FD_SDD$Year,
MLS_SDD$Year, EJT_SDD$Year))
SDD <- c(MW_SDD_obs$SDD, FD_SDD$SDD, MLS_SDD$SDD, EJT_SDD$SDD)
SDDsource <- c(rep("lightblue", times=dim(MW_SDD)[1]),
rep("green", times=dim(FD_SDD)[1]),
rep("orange", times=dim(MLS_SDD)[1]),
rep("pink", times=dim(EJT_SDD)[1]))
yrcol <- c(MW_SDD_obs$Year, FD_SDD$Year, MLS_SDD$Year, EJT_SDD$Year)
yrcol[yrcol==2013] <- "yellowgreen"
yrcol[yrcol==2014] <- "navyblue"
yrcol[yrcol==2015] <- "orange"
yrcol[yrcol==2016] <- "darkgreen"
yrcol[yrcol==2017] <- "pink"
yrcol[yrcol==2018] <- "lightblue"
yrcol[yrcol==2019] <- "purple"
yrcol[yrcol==2020] <- "red"
# Now fit model
SDDModel <- lm(SDD ~ Site_Loc + Year)
SDDR2 <- round(summary(SDDModel)$r.squared, 3)
# Other models
SDDModel_0 <- lm(SDD ~ 1)
SDDModel_yr <- lm(SDD ~ Year)
SDDModel_loc <- lm(SDD ~ Site_Loc)
SDDModel_int <- lm(SDD ~ Site_Loc*Year)
# Examine model fits
anova(SDDModel)
anova(SDDModel_0, SDDModel_yr, SDDModel_loc,
SDDModel, SDDModel_int)
# Rsquared
summary(SDDModel)$r.squared
summary(SDDModel_yr)$r.squared
summary(SDDModel_loc)$r.squared
summary(SDDModel_int)$r.squared
# Graph of predicted vs. observed; color by data source
par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02,
pty="s", mgp=c(1.2,0.35,0))
plot(SDDModel$fitted.values, SDD, pch=21, bg=SDDsource,
xlab="Predicted", ylab="Observed", main="SDD model - all data")
abline(0,1)
mtext(paste("R2=",SDDR2, sep=""), line=-1, side=3, adj=0.05, cex=0.75)
legend(x="bottomright", legend=c("Mwatch","ForestDemo","MLS","EJT"),
pch=21, pt.bg=c("lightblue","green","orange","pink"),
cex = 0.75, pt.cex = 1)
# Graph of predicted vs. observed; color by year (Fig B1 in Appendix B)
tiff(file="output/figures/FigC1.tif", width=5, height=5, units="in", res=600)
par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02,
pty="s", mgp=c(1.2,0.35,0))
plot(SDDModel$fitted.values, SDD, pch=21, bg=yrcol,
xlab="Predicted", ylab="Observed", main="SDD model")
abline(0,1)
mtext(paste("R2=",SDDR2, sep=""), line=-1, side=3, adj=0.05, cex=0.75)
legend(x="bottomright", legend=c(seq(2013,2020)), title="Year",
pch=21, pt.bg=c("yellowgreen", "navyblue", "orange", "darkgreen",
"pink", "lightblue", "purple", "red"),
cex = 0.75, pt.cex = 1)
dev.off()
```
#MeadoWatch SDD data - predict missing data
This code chunk uses the fitted model (to all data) to predict SDD for just MeadoWatch sites. It saves the predicted SDD data to a file.
```{r}
MW_SDD$Year <- as.factor(MW_SDD$Year)
SDD_pred <- predict.lm(SDDModel, MW_SDD)
# correlate predicted, observed
test <- cor.test(SDD_pred, MW_SDD$SDD)
print(test)
# Graph of predicted vs. observed: only MW
par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02,
pty="s", mgp=c(1.2,0.5,0))
plot(SDD_pred, MW_SDD$SDD, pch=21, bg="lightblue",
xlab="Predicted SDD", ylab="Observed SDD", main="MeadoWatch only")
abline(0,1)
mtext(paste("r=", round(test$estimate,3), sep=""), line=-1, cex=0.75, adj=0.05)
# Now write data
MW_SDD$predSDD <- SDD_pred
write.csv(MW_SDD, "cleandata/MW_SDDall.csv", quote=FALSE,
row.names=FALSE)
```