Find Correlated Words

Usage

word_cor(text.var, grouping.var = NULL, word, r = 0.7, values = TRUE, method = "pearson", ...)

Arguments

text.var
The text variable (or frequency matrix).
grouping.var
The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.
word
The word(s) vector to find associated words for.
r
The correlation level find associated words for. If positive this is the minimum value, if negative this is the maximum value.
values
logical. If TRUE returns the named correlates (names are the words). If FALSE only the associated words are returned.
method
A character string indicating which correlation coefficient is to be computed ("pearson", "kendall", or "spearman").
...
Other arguments passed to wfm.

Find Correlated Words

Value

Returns a vector of associated words or correlation matrix if r = NULL.

Description

Find associated words within grouping variable(s).

References

The plotting method for the list output was inspired by Ben Marwick; see http://stackoverflow.com/a/19925445/1000343 for more.

Examples

## <strong>Not run</strong>: # x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|"))) # word_cor(rajSPLIT$dialogue, x, "romeo", .45) # word_cor(rajSPLIT$dialogue, x, "love", .5) # # ## Negative correlation # word_cor(rajSPLIT$dialogue, x, "you", -.1) # with(rajSPLIT, word_cor(dialogue, list(person, act), "hate")) # # words <- c("hate", "i", "love", "ghost") # with(rajSPLIT, word_cor(dialogue, x, words, r = .5)) # with(rajSPLIT, word_cor(dialogue, x, words, r = .4)) # # ## Set `r = NULL` to get matrix between words # with(rajSPLIT, word_cor(dialogue, x, words, r = NULL)) # # ## Plotting # library(tm) # data("crude") # oil_cor1 <- apply_as_df(crude, word_cor, word = "oil", r=.7) # plot(oil_cor1) # # oil_cor2 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=.7) # plot(oil_cor2) # plot(oil_cor2, ncol=2) # # oil_cor3 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=NULL) # plot(oil_cor3) # # ## Run on multiple times/person/nested # ## Split and apply to data sets # ## Suggested use of stemming # DATA3 <- split(DATA2, DATA2$person) # # ## Find correlations between words per turn of talk by person # ## Throws multiple warning because small data set # library(qdapTools) # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good), r = NULL) # }) # # ## Find words correlated per turn of talk by person # ## Throws multiple warning because small data set # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good)) # }) # # # ## A real example # dat <- pres_debates2012 # dat$TOT <- factor(with(dat, paste(time, pad(TOT(tot)), sep = "|"))) # dat <- dat[dat$person %in% qcv(OBAMA, ROMNEY), ] # dat$person <- factor(dat$person) # dat.split <- with(dat, split(dat, list(person, time))) # # wrds <- qcv(america, debt, dollar, people, tax, health) # lapply(dat.split, function(x) { # word_cor(x[, "dialogue"], x[, "TOT"], wrds, r=NULL) # }) # # ## Supply a matrix (make sure to use `t` on a `wfm` matrix) # worlis <- list( # pronouns = c("you", "it", "it's", "we", "i'm", "i"), # negative = qcv(no, dumb, distrust, not, stinks), # literacy = qcv(computer, talking, telling) # ) # y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE)) # z <- wfm_combine(y, worlis) # # out <- word_cor(t(z), word = c(names(worlis), "else.words"), r = NULL) # out # plot(out) # # ## Additional plotting/viewing # require(tm) # data("crude") # # out1 <- word_cor(t(as.wfm(crude)), word = "oil", r=.7) # vect2df(out1[[1]], "word", "cor") # # plot(out1) # qheat(vect2df(out1[[1]], "word", "cor"), values=TRUE, high="red", # digits=2, order.by ="cor", plot=FALSE) + coord_flip() # # # out2 <- word_cor(t(as.wfm(crude)), word = c("oil", "country"), r=.7) # plot(out2) # ## <strong>End(Not run)</strong>

See also

word_proximity, findAssocs, word_associate, wfm, cor