MW_SDDmodeling.Rmd

---
title: "MW_SDDModeling"
author: "Janneke Hille Ris Lambers, Aji John, Meera Sethi, Elli Theobald"
date: "12/15/2020"
output: html_document
---

#Setup for R script and R markdown
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

#Read in SDD data (all sources)
This code chunk reads in SDD data from several data sets: MeadoWatch, Forest Demography, Elli's phenology plots, and Meera's phenology / herbivory plots. Some observations are removed due to data issues (those with faulty hobo's) in Meera and the FD dataset. Additionally, data from years where MeadoWatch data were not collected are removed from Elli's data. Finally, the FD data set was pared down to only include higher elevation stands (above 1000 meters) and stands in the SW and NE parts of the park (where MeadoWatch trails are located).
```{r}
# Read in SDD data from MeadoWatch
MW_SDD <- read.csv("data/MeadoWatchSDD.csv", header=TRUE)
MW_SDD_obs <- MW_SDD[is.na(MW_SDD$SDD)==FALSE,]

# Read in Forest Demo data; remove rows with HOBO issues
FD_SDD <- read.csv("data/ForestDemoSDD.csv", header=TRUE)
FD_SDD <- FD_SDD[FD_SDD$Issue=="",]

# Extract  stands closer to GB and RL, at higher elevation
FD_stands <- substr(FD_SDD$Site_Loc, 1, 4)
relevant_stands <- c("AB08", "AE10", "AM16", "AR07","AV06","PARA",
                     "PP17", "SPRY", "SUNR")
standskeep <- FD_stands %in% relevant_stands
FD_SDD <- FD_SDD[standskeep,]

# Read in MLS data; remove duplicate rows and plots with 1 measurement
MLS_SDD <- read.csv("data/MeeraSDD.csv", header=TRUE)
MLS_SDD <- MLS_SDD[MLS_SDD$duplicate=="",]
MLS_SDD <- MLS_SDD[MLS_SDD$single=="",]

# Read in EJT data; remove 2010-2012 data
EJT_SDD <- read.csv("data/ElliSDD.csv", header=TRUE)
EJT_SDD <- EJT_SDD[EJT_SDD$Year>2012,]

```

#Create model of SDD vs year and plot
This code chunk fits a simple linear regression to the SDD data, relating the SDD observations to location and year. Includes creation of Fig B1 (appendix)
```{r}
# Now merge data
Site_Loc <- as.factor(c(MW_SDD_obs$Site_Loc, FD_SDD$Site_Loc, 
                        MLS_SDD$Site_Loc, EJT_SDD$Site_Loc))
Year <- as.factor(c(MW_SDD_obs$Year, FD_SDD$Year, 
                        MLS_SDD$Year, EJT_SDD$Year))
SDD <- c(MW_SDD_obs$SDD, FD_SDD$SDD, MLS_SDD$SDD, EJT_SDD$SDD)
SDDsource <- c(rep("lightblue", times=dim(MW_SDD)[1]), 
               rep("green", times=dim(FD_SDD)[1]),
               rep("orange", times=dim(MLS_SDD)[1]),
               rep("pink", times=dim(EJT_SDD)[1]))

yrcol <- c(MW_SDD_obs$Year, FD_SDD$Year, MLS_SDD$Year, EJT_SDD$Year)
yrcol[yrcol==2013] <- "yellowgreen"
yrcol[yrcol==2014] <- "navyblue"
yrcol[yrcol==2015] <- "orange"
yrcol[yrcol==2016] <- "darkgreen"
yrcol[yrcol==2017] <- "pink"
yrcol[yrcol==2018] <- "lightblue"
yrcol[yrcol==2019] <- "purple"
yrcol[yrcol==2020] <- "red"


# Now fit model
SDDModel <- lm(SDD ~ Site_Loc + Year)
SDDR2 <- round(summary(SDDModel)$r.squared, 3)

# Other models
SDDModel_0 <- lm(SDD ~ 1)
SDDModel_yr <- lm(SDD ~ Year)
SDDModel_loc <- lm(SDD ~ Site_Loc)
SDDModel_int <- lm(SDD ~ Site_Loc*Year)

# Examine model fits
anova(SDDModel)
anova(SDDModel_0, SDDModel_yr, SDDModel_loc, 
      SDDModel, SDDModel_int)

# Rsquared
summary(SDDModel)$r.squared
summary(SDDModel_yr)$r.squared
summary(SDDModel_loc)$r.squared
summary(SDDModel_int)$r.squared

# Graph of predicted vs. observed; color by data source
par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02, 
    pty="s", mgp=c(1.2,0.35,0))
plot(SDDModel$fitted.values, SDD, pch=21, bg=SDDsource,
     xlab="Predicted", ylab="Observed", main="SDD model - all data")
abline(0,1)
mtext(paste("R2=",SDDR2, sep=""), line=-1, side=3, adj=0.05, cex=0.75)
legend(x="bottomright", legend=c("Mwatch","ForestDemo","MLS","EJT"),
       pch=21, pt.bg=c("lightblue","green","orange","pink"),
       cex = 0.75, pt.cex = 1)

# Graph of predicted vs. observed; color by year (Fig B1 in Appendix B)
tiff(file="output/figures/FigC1.tif", width=5, height=5, units="in", res=600)

par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02, 
    pty="s", mgp=c(1.2,0.35,0))
plot(SDDModel$fitted.values, SDD, pch=21, bg=yrcol,
     xlab="Predicted", ylab="Observed", main="SDD model")
abline(0,1)
mtext(paste("R2=",SDDR2, sep=""), line=-1, side=3, adj=0.05, cex=0.75)
legend(x="bottomright", legend=c(seq(2013,2020)), title="Year",
       pch=21, pt.bg=c("yellowgreen", "navyblue", "orange", "darkgreen",
                       "pink", "lightblue", "purple", "red"),
       cex = 0.75, pt.cex = 1)
dev.off()


```

#MeadoWatch SDD data - predict missing data
This code chunk uses the fitted model (to all data) to predict SDD for just MeadoWatch sites. It saves the predicted SDD data to a file.
```{r}
MW_SDD$Year <- as.factor(MW_SDD$Year)
SDD_pred <- predict.lm(SDDModel, MW_SDD)

# correlate predicted, observed
test <- cor.test(SDD_pred, MW_SDD$SDD)
print(test)

# Graph of predicted vs. observed: only MW
par(omi=c(0,0,0,0), mai=c(0.5,0.5,0.5,0.4), tck=-0.02, 
    pty="s", mgp=c(1.2,0.5,0))
plot(SDD_pred, MW_SDD$SDD, pch=21, bg="lightblue",
     xlab="Predicted SDD", ylab="Observed SDD", main="MeadoWatch only")
abline(0,1)
mtext(paste("r=", round(test$estimate,3), sep=""), line=-1, cex=0.75, adj=0.05)

# Now write data
MW_SDD$predSDD <- SDD_pred
write.csv(MW_SDD, "cleandata/MW_SDDall.csv", quote=FALSE,
          row.names=FALSE)

```