Find Correlated Words


word_cor(text.var, grouping.var = NULL, word, r = 0.7, values = TRUE, method = "pearson", ...)


The text variable (or frequency matrix).
The grouping variables. Default NULL generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.
The word(s) vector to find associated words for.
The correlation level find associated words for. If positive this is the minimum value, if negative this is the maximum value.
logical. If TRUE returns the named correlates (names are the words). If FALSE only the associated words are returned.
A character string indicating which correlation coefficient is to be computed ("pearson", "kendall", or "spearman").
Other arguments passed to wfm.

Find Correlated Words


Returns a vector of associated words or correlation matrix if r = NULL.


Find associated words within grouping variable(s).


The plotting method for the list output was inspired by Ben Marwick; see for more.


## <strong>Not run</strong>: # x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|"))) # word_cor(rajSPLIT$dialogue, x, "romeo", .45) # word_cor(rajSPLIT$dialogue, x, "love", .5) # # ## Negative correlation # word_cor(rajSPLIT$dialogue, x, "you", -.1) # with(rajSPLIT, word_cor(dialogue, list(person, act), "hate")) # # words <- c("hate", "i", "love", "ghost") # with(rajSPLIT, word_cor(dialogue, x, words, r = .5)) # with(rajSPLIT, word_cor(dialogue, x, words, r = .4)) # # ## Set `r = NULL` to get matrix between words # with(rajSPLIT, word_cor(dialogue, x, words, r = NULL)) # # ## Plotting # library(tm) # data("crude") # oil_cor1 <- apply_as_df(crude, word_cor, word = "oil", r=.7) # plot(oil_cor1) # # oil_cor2 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=.7) # plot(oil_cor2) # plot(oil_cor2, ncol=2) # # oil_cor3 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=NULL) # plot(oil_cor3) # # ## Run on multiple times/person/nested # ## Split and apply to data sets # ## Suggested use of stemming # DATA3 <- split(DATA2, DATA2$person) # # ## Find correlations between words per turn of talk by person # ## Throws multiple warning because small data set # library(qdapTools) # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good), r = NULL) # }) # # ## Find words correlated per turn of talk by person # ## Throws multiple warning because small data set # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good)) # }) # # # ## A real example # dat <- pres_debates2012 # dat$TOT <- factor(with(dat, paste(time, pad(TOT(tot)), sep = "|"))) # dat <- dat[dat$person %in% qcv(OBAMA, ROMNEY), ] # dat$person <- factor(dat$person) # dat.split <- with(dat, split(dat, list(person, time))) # # wrds <- qcv(america, debt, dollar, people, tax, health) # lapply(dat.split, function(x) { # word_cor(x[, "dialogue"], x[, "TOT"], wrds, r=NULL) # }) # # ## Supply a matrix (make sure to use `t` on a `wfm` matrix) # worlis <- list( # pronouns = c("you", "it", "it's", "we", "i'm", "i"), # negative = qcv(no, dumb, distrust, not, stinks), # literacy = qcv(computer, talking, telling) # ) # y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE)) # z <- wfm_combine(y, worlis) # # out <- word_cor(t(z), word = c(names(worlis), "else.words"), r = NULL) # out # plot(out) # # ## Additional plotting/viewing # require(tm) # data("crude") # # out1 <- word_cor(t(as.wfm(crude)), word = "oil", r=.7) # vect2df(out1[[1]], "word", "cor") # # plot(out1) # qheat(vect2df(out1[[1]], "word", "cor"), values=TRUE, high="red", # digits=2, ="cor", plot=FALSE) + coord_flip() # # # out2 <- word_cor(t(as.wfm(crude)), word = c("oil", "country"), r=.7) # plot(out2) # ## <strong>End(Not run)</strong>

See also

word_proximity, findAssocs, word_associate, wfm, cor