# Dissimilarity Statistics

## Usage

`Dissimilarity(text.var, grouping.var = NULL, method = "prop", diag = FALSE, upper = FALSE, p = 2, ...)`

## Arguments

text.var
A text variable or word frequency matrix object.
grouping.var
The grouping variables. Default `NULL` generates one word list for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.
method
Distance methods (see `dist` function). If `"prop"` (the default) the result is 1 - `"binary"`.
diag
logical. If `TRUE` returns the diagonals of the matrix. If `method = "prop"` diagonals will not be returned.
upper
logical. If `TRUE` returns the upper triangle of the matrix.
p
The power of the Minkowski distance.
...
Other arguments passed to `wfm`.

## Value

Returns a matrix of dissimilarity values (the agreement between text).

## Description

Uses the distance function to calculate dissimilarity statistics by grouping variables.

## Examples

```## <strong>Not run</strong>:
# with(DATA, Dissimilarity(state, person, diag = TRUE))
#
# ## Clustering: Dendrogram
# (x <- with(pres_debates2012, Dissimilarity(dialogue, list(person, time))))
# fit <- hclust(x)
# plot(fit)
# ## draw dendrogram with red borders around the 3 clusters
# rect.hclust(fit, k=3, border=c("red", "purple", "seagreen"))
#
# ## Clustering: Dendrogram with p.values
# library(pvclust)
# wfm.mod <- with(pres_debates2012, wfm(dialogue, list(person, time)))
# fit <- suppressMessages(pvclust(wfm.mod, method.hclust="ward",
#     method.dist="euclidean"))
# plot(fit)
# pvrect(fit, alpha=.95)
#
# ## Multidimentional Scaling
# ## Based on blog post from Bodong Chen
# ## http://bodongchen.com/blog/?p=301
#
# ## Fit it: 2-D
# (diss <- with(pres_debates2012, Dissimilarity(dialogue, list(person, time),
#     method = "euclidean")))
# fit <- cmdscale(diss, eig = TRUE, k = 2)
#
# ## Plot it 2-D
# points <- data.frame(x = fit\$points[, 1], y = fit\$points[, 2])
# ggplot(points, aes(x = x, y = y)) +
#     geom_point(data = points, aes(x = x, y = y, color = rownames(points))) +
#     geom_text(data = points, aes(x = x, y = y - 0.2, label = row.names(points)))
#
# ## Fit it: 3-D
# library(scatterplot3d)
# fit <- cmdscale(diss, eig = TRUE, k = 3)
#
# points <- data.frame(colSplit(names(fit\$points[, 1])))
# library(qdapTools)
# points\$colors <- points\$X1 %l% data.frame(levels(points\$X1),
#     qcv(yellow, yellow, blue, yellow, red, yellow))
# points\$shape <- points\$X2 %l% data.frame(levels(points\$X2), c(15, 17, 19))
#
# ## Plot it: 3-D
# scatterplot3d(fit\$points[, 1], fit\$points[, 2], fit\$points[, 3],
#     color = points\$colors, pch = points\$shape,
#     main = "Semantic Space Scaled to 3D", xlab = "x", ylab = "y",
#     zlab = "z", type = "h")
#
# legend("bottomright", title="Person",
#    qcv(Obama, Romney, Other), fill=qcv(blue, red, yellow))
# legend("topleft",  paste("Time", 1:3), pch=c(15, 17, 19))
#
# ## Compare to Cosine Similarity
# cos_sim <- function(x, y) x %*% y / sqrt(x%*%x * y%*%y)
# mat <- matrix(rbinom(500, 0:1, .45), ncol=10)
# v_outer(mat, cos_sim)
#
# v_outer(with(DATA, wfm(state, person)), cos_sim)
# with(DATA, Dissimilarity(state, person))
# ## <strong>End(Not run)</strong>
```

`dist`