-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathSurrogate pairs.R
78 lines (68 loc) · 3.81 KB
/
Surrogate pairs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
d$Group <- as.numeric(as.character(d$Group))
Groups <- unique(d$Group[d$Study == 1])
SurrogateList1 <- expand.grid(a = Groups, b = Groups)
SurrogateList1 <- subset(SurrogateList1, a != b)
Groups <- unique(d$Group[d$Study == 2])
SurrogateList2 <- expand.grid(a = Groups, b = Groups)
SurrogateList2 <- subset(SurrogateList2, a != b)
Groups <- unique(d$Group[d$Study == 3])
SurrogateList3 <- expand.grid(a = Groups, b = Groups)
SurrogateList3 <- subset(SurrogateList3, a != b)
SurrogateList <- rbind(SurrogateList1, SurrogateList2, SurrogateList3)
#### this is just an example to help you understand how the row numbers work. You can uncomment to try it out, otherwise move straight to the loop
# subsetting to create a group that corresponds to the first item in the SurrogateList dataset a and b columns
#z1 <- subset(d, Group == SurrogateList$a[1])
# z2 <- subset(d, Group == SurrogateList$b[1])
# removing the extra rows. z1 has 631 rows and z2 has 639. We need both to have 631 (the length of the shortest one). So we cut z2 in a way that it has the same
# number of rows as z1, it has to start from row 632 up until the number of rows z2 has
# z2 <- z2[-(632:639),]
# now the same thing written has a code. this is what is in the for loop
# z2 <- z2[-((nrow(z1)+1):nrow(z2)),]
## creating a loop that iterates through every row of the SurrogateList
for(i in 1:nrow(SurrogateList)){
# creates a subset of the main dataset that corresponds to the ith group ID of the SurrogateList in a column
x <- subset(d, Group == SurrogateList$a[i])
# creates a subset of the main dataset that corresponds to the ith group ID of the SurrogateList in b column
y <- subset(d, Group == SurrogateList$b[i])
# creates a new id for the surrogate group
group <- 800 + i
# looping through every item of the conditions to make sure the surrogate pairs are per condition
for (co in c("Synchronous", "TurnTaking", "SelfPaced", "Conversation")){
if (co %in% unique(x$condition) & co %in% unique(y$condition)){
# creates a subset that corresponds to the co-th item of the conditions, one for x and one for y dataset
z1 <- subset(x, condition == co)
z2 <- subset(y, condition == co)
}
# compares the number of rows of the new datasets. if one is larger than the other, cuts the difference in the number of rows of the larger dataset
if (nrow(z1)>nrow(z2)){
z1 <- z1[-((nrow(z2)+1):nrow(z1)),]
}
if (nrow(z2)>nrow(z1)){
z2 <- z2[-((nrow(z1)+1):nrow(z2)),]
}
# creates a new dataframe, where the "other" (or HR2) of the z1 dataset is the "other" of the z2 dataset (i.e. another group)
w1 <- z1 %>% mutate(
HR2 = z2$HR2,
Resp2 = z2$Resp2,
HR2_lead = z2$HR2_lead,
Resp2_lead = z2$Resp2_lead,
HR2_change = z2$HR2_change,
Resp2_change = z2$Resp2_change
)
# replaces the group column with the new group surrogate group number
w1$Group <- group
# makes the entire dataframe the surrogate, so that when merged with the real one, this column will be the identifier
w1$Type <- "Surrogate"
w <- w1
# if/else statements that evalueate whether d_surrogate dataframe exists. if it does, binds the w dataframe to it. if it doesn't, creates the d_surrogate
# dataframe. N.B. because of this line, if you run the loop more than once, your dataframe will be doubled. so make sure to d_surrogate <- NULL first,
# then run the loop again
if(exists("d_surrogate")){d_surrogate <- rbind(d_surrogate, w)}else{(d_surrogate <- w)}
}
}
# We want our real dataframe to have the "Real" identifier
d$Type = "Real"
# now bind the real and the surrogate dataframes.
dd <- rbind(d, d_surrogate)
# N.B. the output of the for loop is still in wide format. either make it long, then merge to the long real dataframe
# or merge them in the wide shape and make them long afterwards