-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnlp.R
60 lines (48 loc) · 1.46 KB
/
nlp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library(tidyverse)
library(lubridate)
library(tidytext)
library(wordcloud)
library(reshape2)
data("stop_words")
# read in sentiments
sentiments_bing <- get_sentiments("bing")
# read in conference talks from 1971 - 2018
text <- read_csv("conference_talks.csv")
# add date field
text %>%
mutate(date = dmy(paste("01", conference))) %>%
group_by(date) %>%
summarise(number_talks = n()) %>%
ggplot(aes(x = date, y = number_talks)) +
geom_point() +
theme_bw() +
labs(x = "", y = "Number of Talks per Conference") +
geom_smooth()
# count number of words
text %>%
mutate(date = dmy(paste("01", conference))) %>%
group_by(date) %>%
summarise(number_words = ) %>%
ggplot(aes(x = date, y = number_talks)) +
geom_point() +
theme_bw() +
labs(x = "", y = "Number of Talks per Conference") +
geom_smooth()
# wordcloud titles
text %>%
unnest_tokens(word, title) %>%
anti_join(stop_words) %>%
count(word, sort = T) %>%
with(wordcloud(word, n, max.words = 100))
# with sentiment
text %>%
unnest_tokens(word, title) %>%
anti_join(stop_words) %>%
count(word, sort = T) %>%
inner_join(sentiments_bing) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)
# wordcloud speakers
# examine sentiment over time of each talk or each conference
# could look at sentiment by sentence or paragraph