-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathday2.R
146 lines (100 loc) · 3.54 KB
/
day2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#follow-up from 1st day
# what are variables
# what are classes
# arrays immutable ?
# repeat the questions when asked
# if we have time leftover
# helpful forays - writing functions
# - writing packages
#ls() - list the veiables in space
download.file("https://ndownloader.figshare.com/files/2292169",
"data/portal_data_joined.csv")
# read from the surveys
surveys <- read.csv("data/portal_data_joined.csv")
names(surveys)
as.character(surveys$species)
as.numeric(c(1,3,4,5))
summary(surveys)
head(surveys)
tail(surveys)
surveys[1,1]
surveys[1,4]
surveys[34786,4]
surveys[,4]
surveys[1,]
dim(surveys)
nrow(surveys)
ncol(surveys)
levels(surveys$sex)
names(surveys)
surveys[1,'record_id']
#c1 - rename column 'sex' to gender
#c2 - count by gender
summary(surveys$gender)
#
#mix and match row s
surveys[c(1:6,8),1]
#select certain columns from sdurveys
surveys %>% select(gender,species_id)
#first 6 columns
surveys %>% select(names(surveys)[1:6])
#filter
surveys %>% select(names(surveys)[1:6]) %>% filter(plot_id == 2)
#c3
#filter plot_ids 2,3,6
surveys %>% select(names(surveys)[1:6]) %>% filter(plot_id %in% c(2,3,6)) %>% head()
#filter plot_ids 2,3,6 and day 16 or day 1
surveys %>% select(names(surveys)[1:6]) %>%
filter(plot_id %in% c(2,3,6) & (day==16 | day==1)) %>% head()
#c4
#filter plot_ids 2,3,6 and day 16 or day 1 and for January only(1)
surveys %>% select(names(surveys)[1:6]) %>%
filter(plot_id %in% c(2,3,6) & (day==16 | day==1) & month==1) %>% head()
#adding new columns
surveys %>% select(names(surveys)[1:6]) %>%
filter(plot_id %in% c(2,3,6) & (day==16 | day==1) & month==1) %>% mutate(season='winter')
#c5 tag season to all the observations
# skip days
#winter
winter <- surveys %>% select(names(surveys)[1:6]) %>%
filter( month %in% c(1,2,3)) %>% mutate(season='winter') %>% as.data.frame()
#summer
summer <- surveys %>% select(names(surveys)[1:6]) %>%
filter( month %in% c(7,8,9)) %>% mutate(season='winter') %>% as.data.frame()
#rbind
all<- rbind(summer,winter)
#group by plots
surveys %>% group_by(plot_id) %>% head()
#group by plots
surveys %>% group_by(plot_id,year) %>% head()
#summarize()
surveys %>% group_by(plot_id,year) %>% summarise(mean_weight=mean(weight))
surveys %>% filter(!is.na(weight)) %>%
group_by(plot_id,year) %>%
summarise(mean_weight=mean(weight))
#using tidyr
surveys %>% drop_na() %>% group_by(plot_id,year) %>%
summarise(mean_weight=mean(weight))
surveys %>% drop_na() %>% group_by(plot_id,year) %>%
summarise(mean_weight=mean(weight),min_weight=min(weight))
#find out how many rows contributed to the grouping functions
surveys %>% drop_na() %>% group_by(plot_id,year) %>%
summarise(mean_weight=mean(weight),min_weight=min(weight),count=n())
#using complete cases
surveys[complete.cases(surveys),] %>% group_by(plot_id,year) %>%
summarise(mean_weight=mean(weight),min_weight=min(weight),count=n())
library(lme4)
fm1 <- glmer(Reaction ~ Days + (Days|Subject), sleepstudy,family=binomial(link = "logit"))
#lets look at the iris dataset
library(lme4)
#add a new binary column to iris dataset
iris_clone <- iris
iris_clone$oblong <- iris$Petal.Length/iris$Petal.Width< 5
str(iris_clone)
#Petal width being a random effect, and length being linear
model_1 <- glmer(oblong ~ Petal.Length + (1| Petal.Width),
iris_clone,family=binomial(link = "logit"))
#Petal width still being a random effect
model_2 <- glmer(oblong ~ Petal.Length + I(Petal.Length^2) + (1|Petal.Width) ,
iris_clone,family=binomial(link = "logit"))
AIC(model_1,model_2)