点击查看R代码
library(wordcloud2)
library(jiebaR)
library(tidytext)
library(here)
text <- read.csv("report.txt", header = FALSE, sep = "\n")
stopwords <- readLines(here("data/stopwords-zh.txt"), encoding = "UTF-8") |>
str_trim() |>
discard(\(x) x == "") |>
tibble(word = _)
text |>
pull(V1) |>
segment(worker()) |>
tibble(word = _) |>
unnest_tokens(input = word, output = word) |>
filter(nchar(word) >= 2, !str_detect(word, "[0-9[:punct:]%]")) |>
anti_join(stopwords, join_by(word)) |>
count(word, sort = TRUE) |>
wordcloud2()