wfm(text.var = NULL, grouping.var = NULL, output = "raw", stopwords = NULL, char2space = "~~", ...)"wfm"(text.var = NULL, grouping.var = NULL, output = "raw", stopwords = NULL, char2space = "~~", ...)"wfm"(text.var = NULL, grouping.var = NULL, output = "raw", stopwords = NULL, char2space = "~~", ...)"wfm"(text.var = NULL, grouping.var = NULL, output = "raw", stopwords = NULL, char2space = "~~", ...)wfdf(text.var, grouping.var = NULL, stopwords = NULL, margins = FALSE, output = "raw", digits = 2, char2space = "~~", ...)wfm_expanded(text.var, grouping.var = NULL, ...)wfm_combine(wf.obj, word.lists, matrix = TRUE)"weight"(x, type = "prop", ...)"weight"(x, type = "prop", ...)as.wfm(x, ...)"as.wfm"(x, ...)"as.wfm"(x, ...)"as.wfm"(x, ...)"as.wfm"(x, ...)"as.wfm"(x, ...)"as.wfm"(x, ...)"as.wfm"(x, col = "docs", row = "text", ...)"wfm"(text.var, ...)
NULL generates
one word list for all text. Also takes a single grouping variable or a list
of 1 or more grouping variables."proportion" or "percent").char.keep is NULL, char2space will activate this
argument.TRUE provides grouping.var and word
variable totals.wfm or wfdf object.wfm_combineTRUE returns the output as a
wfm rather than a wfdf object."prop", "max",
"scaled"). All weight by column. "prop" uses a proportion
weighting and all columns sum to 1. "max" weights in proportion to
the max value; all values are integers and column sums may not be equal.
"scaled" uses scale to scale with
center = FALSE; output is not integer and column sums may not be
equal.Corpus or
TermDocumentMatrix. If as.wfm this is other
arguments passed to as.wfm methods (currently ignored).wfm - returns a word frequency of the class matrix.
wfdf - returns a word frequency of the class data.frame with
a words column and optional margin sums.
wfm_expanded - returns a matrix similar to a word frequency
matrix (wfm) but the rows are expanded to represent the maximum usages
of the word and cells are dummy coded to indicate that number of uses.
wfm_combine - returns a word frequency matrix (wfm) or
dataframe (wfdf) with counts for the combined word.lists merged and
remaining terms (else).
weight - Returns a weighted matrix for use with other R
packages. The output is not of the class "wfm".
as.wfm - Returns a matrix of the class "wfm".
wfm - Generate a word frequency matrix by grouping variable(s).
wfm.wfdf - wfdf method for wfm.
wfm.character - character method for wfm.
wfm.factor - factor method for wfm.
wfdf - Generate a word frequency data frame by grouping variable.
wfm_expanded - Expand a word frequency matrix to have multiple rows
for each word.
wfm_combine - Combines words (rows) of a word frequency matrix
(wfdf) together.
weight - Weight a word frequency matrix for analysis where such
weighting is sensible.
weight.wfdf - Weight a word frequency matrix for analysis where such
weighting is sensible.
as.wfm - Attempts to coerce a matrix to a wfm.
as.wfm.matrix - matrix method for as.wfm used to
convert matrices to a wfm.
as.wfm.default - Default method for as.wfm used to
convert matrices to a wfm.
as.wfm.TermDocumentMatrix - TermDocumentMatrix method for
as.wfm used to a TermDocumentMatrix to a wfm.
as.wfm.DocumentTermMatrix - DocumentTermMatrix method for
as.wfm used to a DocumentTermMatrix to a wfm.
as.wfm.data.frame - data.frame method for as.wfm used to
convert matrices to a wfm.
as.wfm.wfdf - wfdf method for as.wfm used to
convert matrices to a wfm.
as.wfm.Corpus - Corpus method for as.wfm used to
convert matrices to a wfm.
wfm.Corpus - Corpus method for wfm.
Words can be kept as one by inserting a double tilde ("~~"), or
other character strings passed to char2space, as a single word/entry. This is
useful for keeping proper names as a single unit.
## <strong>Not run</strong>: # ## word frequency matrix (wfm) example: # with(DATA, wfm(state, list(sex, adult)))[1:15, ] # with(DATA, wfm(state, person))[1:15, ] # Filter(with(DATA, wfm(state, list(sex, adult))), 5) # with(DATA, wfm(state, list(sex, adult))) # # ## Filter particular words based on max/min values in wfm # v <- with(DATA, wfm(state, list(sex, adult))) # Filter(v, 5) # Filter(v, 5, count.apostrophe = FALSE) # Filter(v, 5, 7) # Filter(v, 4, 4) # Filter(v, 3, 4) # Filter(v, 3, 4, stopwords = Top25Words) # # ## insert double tilde ("~~") to keep phrases(i.e., first last name) # alts <- c(" fun", "I ") # state2 <- space_fill(DATA$state, alts, rm.extra = FALSE) # with(DATA, wfm(state2, list(sex, adult)))[1:18, ] # # ## word frequency dataframe (wfdf) example: # with(DATA, wfdf(state, list(sex, adult)))[1:15, ] # with(DATA, wfdf(state, person))[1:15, ] # # ## wfm_expanded example: # z <- wfm(DATA$state, DATA$person) # wfm_expanded(z)[30:45, ] #two "you"s # # ## wf_combine examples: # #=================== # ## raw no margins (will work) # x <- wfm(DATA$state, DATA$person) # # ## raw with margin (will work) # y <- wfdf(DATA$state, DATA$person, margins = TRUE) # # ## Proportion matrix # z2 <- wfm(DATA$state, DATA$person, output="proportion") # # WL1 <- c(y[, 1]) # WL2 <- list(c("read", "the", "a"), c("you", "your", "you're")) # WL3 <- list(bob = c("read", "the", "a"), yous = c("you", "your", "you're")) # WL4 <- list(bob = c("read", "the", "a"), yous = c("a", "you", "your", "your're")) # WL5 <- list(yous = c("you", "your", "your're")) # WL6 <- list(c("you", "your", "your're")) #no name so will be called words 1 # WL7 <- c("you", "your", "your're") # # wfm_combine(z2, WL2) #Won't work not a raw frequency matrix # wfm_combine(x, WL2) #Works (raw and no margins) # wfm_combine(y, WL2) #Works (raw with margins) # wfm_combine(y, c("you", "your", "your're")) # wfm_combine(y, WL1) # wfm_combine(y, WL3) # ## wfm_combine(y, WL4) #Error # wfm_combine(y, WL5) # wfm_combine(y, WL6) # wfm_combine(y, WL7) # # worlis <- c("you", "it", "it's", "no", "not", "we") # y <- wfdf(DATA$state, list(DATA$sex, DATA$adult), margins = TRUE) # z <- wfm_combine(y, worlis) # # chisq.test(z) # chisq.test(wfm(y)) # # ## Dendrogram # presdeb <- with(pres_debates2012, wfm(dialogue, list(person, time))) # library(sjPlot) # sjc.dend(t(presdeb), 2:4) # # ## Words correlated within turns of talk # ## EXAMPLE 1 # library(reports) # x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|"))) # dat <- wfm(rajSPLIT$dialogue, x) # # cor(t(dat)[, c("romeo", "juliet")]) # cor(t(dat)[, c("romeo", "banished")]) # cor(t(dat)[, c("romeo", "juliet", "hate", "love")]) # qheat(cor(t(dat)[, c("romeo", "juliet", "hate", "love")]), # diag.na = TRUE, values = TRUE, digits = 3, by.column = NULL) # # dat2 <- wfm(DATA$state, id(DATA)) # qheat(cor(t(dat2)), low = "yellow", high = "red", # grid = "grey90", diag.na = TRUE, by.column = NULL) # # ## EXAMPLE 2 # x2 <- factor(with(pres_debates2012, paste(time, pad(TOT(tot)), sep = "|"))) # dat2 <- wfm(pres_debates2012$dialogue, x2) # wrds <- word_list(pres_debates2012$dialogue, # stopwords = c("it's", "that's", Top200Words)) # wrds2 <- tolower(sort(wrds$rfswl[[1]][, 1])) # qheat(word_cor(t(dat2), word = wrds2, r = NULL), # diag.na = TRUE, values = TRUE, digits = 3, by.column = NULL, # high="red", low="yellow", grid=NULL) # # ## EXAMPLE 3 # library(gridExtra); library(ggplot2); library(grid) # dat3 <- lapply(qcv(OBAMA, ROMNEY), function(x) { # with(pres_debates2012, wfm(dialogue[person == x], x2[person == x])) # }) # # # # Presidential debates by person # dat5 <- pres_debates2012 # dat5 <- dat5[dat5$person %in% qcv(ROMNEY, OBAMA), ] # # disp <- with(dat5, dispersion_plot(dialogue, wrds2, grouping.var = person, # total.color = NULL, rm.vars=time)) # # # cors <- lapply(dat3, function(m) { # word_cor(t(m), word = wrds2, r = NULL) # }) # # plots <- lapply(cors, function(x) { # qheat(x, diag.na = TRUE, values = TRUE, digits = 3, plot = FALSE, # by.column = NULL, high="red", low="yellow", grid=NULL) # }) # # plots <- lapply(1:2, function(i) { # plots[[i]] + ggtitle(qcv(OBAMA, ROMNEY)[i]) + # theme(axis.title.x = element_blank(), # plot.margin = unit(rep(0, 4), "lines")) # }) # # grid.arrange(disp, arrangeGrob(plots[[1]], plots[[2]], ncol=1), ncol=2) # # ## With `word_cor` # worlis <- list( # pronouns = c("you", "it", "it's", "we", "i'm", "i"), # negative = qcv(no, dumb, distrust, not, stinks), # literacy = qcv(computer, talking, telling) # ) # y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE)) # z <- wfm_combine(y, worlis) # # word_cor(t(z), word = names(worlis), r = NULL) # # ## Plotting method # plot(y, TRUE) # plot(z) # # ## Correspondence Analysis # library(ca) # # dat <- pres_debates2012 # dat <- dat[dat$person %in% qcv(ROMNEY, OBAMA), ] # # speech <- stemmer(dat$dialogue) # mytable1 <- with(dat, wfm(speech, list(person, time), stopwords = Top25Words)) # # fit <- ca(mytable1) # summary(fit) # plot(fit) # plot3d.ca(fit, labels=1) # # # mytable2 <- with(dat, wfm(speech, list(person, time), stopwords = Top200Words)) # # fit2 <- ca(mytable2) # summary(fit2) # plot(fit2) # plot3d.ca(fit2, labels=1) # # ## Weight a wfm # WFM <- with(DATA, wfm(state, list(sex, adult))) # plot(weight(WFM, "scaled"), TRUE) # weight(WFM, "prop") # weight(WFM, "max") # weight(WFM, "scaled") # ## <strong>End(Not run)</strong>