word_cor. qdap 2.2.0

Usage

word_cor(text.var, grouping.var = NULL, word, r = 0.7, values = TRUE, method = "pearson", ...)

Arguments

text.var: The text variable (or frequency matrix).
grouping.var: The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.
word: The word(s) vector to find associated words for.
r: The correlation level find associated words for. If positive this is the minimum value, if negative this is the maximum value.
values: logical. If TRUE returns the named correlates (names are the words). If FALSE only the associated words are returned.
method: A character string indicating which correlation coefficient is to be computed ("pearson", "kendall", or "spearman").
...: Other arguments passed to wfm.

Find Correlated Words

Value

Returns a vector of associated words or correlation matrix if r = NULL.

Description

Find associated words within grouping variable(s).

References

The plotting method for the list output was inspired by Ben Marwick; see http://stackoverflow.com/a/19925445/1000343 for more.

Examples

## <strong>Not run</strong>: 
# x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|")))
# word_cor(rajSPLIT$dialogue, x, "romeo", .45)
# word_cor(rajSPLIT$dialogue, x, "love", .5)
# 
# ## Negative correlation
# word_cor(rajSPLIT$dialogue, x, "you", -.1)
# with(rajSPLIT, word_cor(dialogue, list(person, act), "hate"))
# 
# words <- c("hate", "i", "love", "ghost")
# with(rajSPLIT, word_cor(dialogue, x, words, r = .5))
# with(rajSPLIT, word_cor(dialogue, x, words, r = .4))
# 
# ## Set `r = NULL` to get matrix between words
# with(rajSPLIT, word_cor(dialogue, x, words, r = NULL))
# 
# ## Plotting
# library(tm)
# data("crude")
# oil_cor1 <- apply_as_df(crude, word_cor, word = "oil", r=.7)
# plot(oil_cor1)
# 
# oil_cor2 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=.7)
# plot(oil_cor2)
# plot(oil_cor2, ncol=2)
# 
# oil_cor3 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=NULL)
# plot(oil_cor3)
# 
# ## Run on multiple times/person/nested
# ## Split and apply to data sets
# ## Suggested use of stemming
# DATA3 <- split(DATA2, DATA2$person)
# 
# ## Find correlations between words per turn of talk by person
# ## Throws multiple warning because small data set
# library(qdapTools)
# lapply(DATA3, function(x) {
#     word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good), r = NULL)
# })
# 
# ## Find words correlated per turn of talk by person
# ## Throws multiple warning because small data set
# lapply(DATA3, function(x) {
#     word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good))
# })
# 
# 
# ## A real example
# dat <- pres_debates2012
# dat$TOT <- factor(with(dat, paste(time, pad(TOT(tot)), sep = "|")))
# dat <- dat[dat$person %in% qcv(OBAMA, ROMNEY), ]
# dat$person <- factor(dat$person)
# dat.split <- with(dat, split(dat, list(person, time)))
# 
# wrds <- qcv(america, debt, dollar, people, tax, health)
# lapply(dat.split, function(x) {
#     word_cor(x[, "dialogue"], x[, "TOT"], wrds, r=NULL)
# })
# 
# ## Supply a matrix (make sure to use `t` on a `wfm` matrix)
# worlis <- list(
#     pronouns = c("you", "it", "it's", "we", "i'm", "i"),
#     negative = qcv(no, dumb, distrust, not, stinks),
#     literacy = qcv(computer, talking, telling)
# )
# y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE))
# z <- wfm_combine(y, worlis)
# 
# out <- word_cor(t(z), word = c(names(worlis), "else.words"), r = NULL)
# out
# plot(out)
# 
# ## Additional plotting/viewing
# require(tm)
# data("crude")
# 
# out1 <- word_cor(t(as.wfm(crude)), word = "oil", r=.7)
# vect2df(out1[[1]], "word", "cor")
# 
# plot(out1)
# qheat(vect2df(out1[[1]], "word", "cor"), values=TRUE, high="red",
#     digits=2, order.by ="cor", plot=FALSE) + coord_flip()
# 
# 
# out2 <- word_cor(t(as.wfm(crude)), word = c("oil", "country"), r=.7)
# plot(out2)
# ## <strong>End(Not run)</strong>

Find Correlated Words

Usage

Arguments

Find Correlated Words

Value

Description

References

Examples

See also