word_cor(text.var, grouping.var = NULL, word, r = 0.7, values = TRUE, method = "pearson", ...)
NULL
generates
one word list for all text. Also takes a single grouping variable or a list
of 1 or more grouping variables.TRUE
returns the named correlates (names are
the words). If FALSE
only the associated words are returned."pearson"
, "kendall"
, or "spearman"
).wfm
.Returns a vector of associated words or correlation matrix if
r = NULL
.
Find associated words within grouping variable(s).
The plotting method for the list output was inspired by Ben Marwick; see http://stackoverflow.com/a/19925445/1000343 for more.
## <strong>Not run</strong>: # x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep = "|"))) # word_cor(rajSPLIT$dialogue, x, "romeo", .45) # word_cor(rajSPLIT$dialogue, x, "love", .5) # # ## Negative correlation # word_cor(rajSPLIT$dialogue, x, "you", -.1) # with(rajSPLIT, word_cor(dialogue, list(person, act), "hate")) # # words <- c("hate", "i", "love", "ghost") # with(rajSPLIT, word_cor(dialogue, x, words, r = .5)) # with(rajSPLIT, word_cor(dialogue, x, words, r = .4)) # # ## Set `r = NULL` to get matrix between words # with(rajSPLIT, word_cor(dialogue, x, words, r = NULL)) # # ## Plotting # library(tm) # data("crude") # oil_cor1 <- apply_as_df(crude, word_cor, word = "oil", r=.7) # plot(oil_cor1) # # oil_cor2 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=.7) # plot(oil_cor2) # plot(oil_cor2, ncol=2) # # oil_cor3 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=NULL) # plot(oil_cor3) # # ## Run on multiple times/person/nested # ## Split and apply to data sets # ## Suggested use of stemming # DATA3 <- split(DATA2, DATA2$person) # # ## Find correlations between words per turn of talk by person # ## Throws multiple warning because small data set # library(qdapTools) # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good), r = NULL) # }) # # ## Find words correlated per turn of talk by person # ## Throws multiple warning because small data set # lapply(DATA3, function(x) { # word_cor(x[, "state"], qdapTools::id(x), qcv(computer, i, no, good)) # }) # # # ## A real example # dat <- pres_debates2012 # dat$TOT <- factor(with(dat, paste(time, pad(TOT(tot)), sep = "|"))) # dat <- dat[dat$person %in% qcv(OBAMA, ROMNEY), ] # dat$person <- factor(dat$person) # dat.split <- with(dat, split(dat, list(person, time))) # # wrds <- qcv(america, debt, dollar, people, tax, health) # lapply(dat.split, function(x) { # word_cor(x[, "dialogue"], x[, "TOT"], wrds, r=NULL) # }) # # ## Supply a matrix (make sure to use `t` on a `wfm` matrix) # worlis <- list( # pronouns = c("you", "it", "it's", "we", "i'm", "i"), # negative = qcv(no, dumb, distrust, not, stinks), # literacy = qcv(computer, talking, telling) # ) # y <- wfdf(DATA$state, qdapTools::id(DATA, prefix = TRUE)) # z <- wfm_combine(y, worlis) # # out <- word_cor(t(z), word = c(names(worlis), "else.words"), r = NULL) # out # plot(out) # # ## Additional plotting/viewing # require(tm) # data("crude") # # out1 <- word_cor(t(as.wfm(crude)), word = "oil", r=.7) # vect2df(out1[[1]], "word", "cor") # # plot(out1) # qheat(vect2df(out1[[1]], "word", "cor"), values=TRUE, high="red", # digits=2, order.by ="cor", plot=FALSE) + coord_flip() # # # out2 <- word_cor(t(as.wfm(crude)), word = c("oil", "country"), r=.7) # plot(out2) # ## <strong>End(Not run)</strong>