Packages
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (tidytext)
library (wordcloud2)
library (readtext)
library (friends)
library (tm) # for the Corpus function
Loading required package: NLP
Attaching package: 'NLP'
The following object is masked from 'package:ggplot2':
annotate
Loading required package: jiebaRD
With demo data
demoFreqC |>
wordcloud2 (color = "random-light" , backgroundColor = "black" )
demoFreqC |>
wordcloud2 (minRotation = - pi/ 6 ,
maxRotation = - pi/ 6 ,
rotateRatio = 1 ,
size = 0.5 ,
shape = "circle" ,
color = "random-light" ,
backgroundColor = "black" )
demoFreq |>
slice_max (order_by = freq, n = 50 ) |>
wordcloud2 (size = 1.5 ,
shape = "circle" )
More complex setup example using Friends data
docs <- friends |>
filter (speaker == "Ross Geller" ) |>
select (text) |>
pull () |>
VectorSource () |>
Corpus () |>
tm_map (content_transformer (tolower)) |>
tm_map (removePunctuation) |>
tm_map (removeNumbers) |>
tm_map (removeWords, stopwords ("en" )) |>
tm_map (stripWhitespace)
Warning in tm_map.SimpleCorpus(Corpus(VectorSource(pull(select(filter(friends,
: transformation drops documents
Warning in
tm_map.SimpleCorpus(tm_map(Corpus(VectorSource(pull(select(filter(friends, :
transformation drops documents
Warning in
tm_map.SimpleCorpus(tm_map(tm_map(Corpus(VectorSource(pull(select(filter(friends,
: transformation drops documents
Warning in
tm_map.SimpleCorpus(tm_map(tm_map(tm_map(Corpus(VectorSource(pull(select(filter(friends,
: transformation drops documents
Warning in
tm_map.SimpleCorpus(tm_map(tm_map(tm_map(tm_map(Corpus(VectorSource(pull(select(filter(friends,
: transformation drops documents
docs1 <- friends |>
filter (speaker == "Ross Geller" ) |>
select (text) |>
unnest_tokens (input = text, output = word) |> # split the text into words
anti_join (stop_words) |> # remove stop words
count (word, sort = TRUE ) |>
filter (n > 10 )
Joining with `by = join_by(word)`
docs1 |>
wordcloud2 (size = 1.5 ,
shape = "circle" ,
color = "random-light" ,
backgroundColor = "black" )
dtm <- TermDocumentMatrix (docs) |>
as.matrix ()
words <- sort (rowSums (dtm), decreasing = TRUE )
df <- data.frame (word = names (words), freq = words)
df |>
wordcloud2 (size = .5 ,
shape = "circle" ,
color = "random-light" ,
backgroundColor = "black" )
Introduction to Text Analysis in R: David Caughlin
Lexicon-based sentiment analysis
db <- read_csv ("SHRM Discussion Board.csv" )
Rows: 46 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Thread, Text
dbl (1): Post
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
db_new <- db |>
unnest_tokens (input = Text, output = word) |> # split the text into words
anti_join (stop_words) |> # remove stop words
count (word, sort = TRUE ) |>
filter (n > 10 )
Joining with `by = join_by(word)`
db_new |>
wordcloud2 (size = 1.5 ,
shape = "circle" ,
color = "random-light" ,
backgroundColor = "black" )
shi <- read_csv ("../ebooks/诗/诗经/shijing.csv" )
New names:
Rows: 305 Columns: 6
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(5): h2, h3, title, hplinks, zhengwen dbl (1): ...1
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
shi_new <- shi |>
select (zhengwen) |>
pull () |>
# 用jiebaR分词
segment (worker ()) |>
tibble () |>
rename (zhengwen = 1 ) |>
count (zhengwen, sort = TRUE )
shi_new |>
wordcloud2 (size = 1.5 ,
shape = "circle" ,
color = "random-light" ,
backgroundColor = "black" )
回到顶部