check_spelling(text.var, range = 2, assume.first.correct = TRUE, method = "jw", dictionary = qdapDictionaries::GradyAugmented, parallel = TRUE, cores = parallel::detectCores()/2, n.suggests = 8)which_misspelled(x, suggest = FALSE, range = 2, assume.first.correct = TRUE, dictionary = qdapDictionaries::GradyAugmented, method = "jw", nchar.dictionary = nchar(dictionary), first.char.dictionary = substring(dictionary, 1, 1), n.suggests = 8)check_spelling_interactive(text.var, range = 2, assume.first.correct = TRUE, click = TRUE, method = "jw", dictionary = qdapDictionaries::GradyAugmented, parallel = TRUE, cores = parallel::detectCores()/2, n.suggests = 8, ...)correct(x, ...)
dictionary
, to initially limit dictionary
size and thus time to
find a suggested replacement term. This may be expanded if no suitable
suggestion is returned.TRUE
it is assumed that the
first letter of the misspelled word is correct. This reduces the dictionary
size, thus speeding up computation.stringdist
for details).TRUE
attempts to run the function on
multiple cores. Note that this may not mean a speed boost if you have one
core or if the data set is smaller as the cluster takes time to create.parallel = TRUE
. Default
is half the number of available cores.n.suggests
suggested terms.which_misspelled
- A character string. If correct
-
An object from check_spelling_interactive
.TRUE
returns a
data.frame
with possible suggestions for misspelled words
(words not found in the dictionary).dictionary
with elements that are the precalculated number of
characters for each word in the dictionary.dictionary
with elements that are the pre-allotted first characters
of each word in the dictionary.TRUE
the interface is a point and click GUI.
If FALSE
the interface is command line driven.check_spelling
- Returns a data.frame
with
row
(row number), not.found
word.no
(number of
misspelled word), not.found
(a word not found in the dictionary),
suggestion
(the most likely replacement for the word), and
more.suggestions
(A list of vectors of up to 10 most likely replacements).
which_misspelled
- Returns either a named vector (names are
the word number) of possible misspelled words (ifsuggestions = FALSE
)
or a data.frame
with word.no
(number of misspelled
word), not.found
(a word not found in the dictionary),
suggestion
(the most likely replacement for the word), and
more.suggestions
(A list of vectors of up to 10 most likely replacements).
check_spelling_interactive
- Returns a character vector with
the corrected text, the replacement list (via an attribute
to the
character vector), and a function to correct the same spelling errors in
subsequent text character vectors.
correct
- Returns a function for correcting spelling errors.
check_spelling
- Check the spelling for an vector of strings. The
function use the following technique:
stringdist
to find string distances between possible replacements and the misspelled term.
n.suggests
) terms from dictionary that are closest to the misspelled term.
which_misspelled
- Check the spelling for a string.
check_spelling_interactive
- Interactively check spelling.
correct
- Access the spell corrector function from a
"check_spelling_interactive"
object for subsequent text character
vector spelling corrections.
A possible misspelled word is defined as not found in the
dictionary
.
check_spelling_interactive
- The user may go back (undo) by
pressing "TYPE MY OWN"
entering either "!"
(not) or "0"
(similar to a phone system). The second choice in the
"SELECT REPLACEMNT:"
will be the original word and is prefixed with
"IGNORE:"
. Press this to keep the original word.
http://stackoverflow.com/a/24454727/1000343 http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Hornik+Murdoch.pdf
## <strong>Not run</strong>: # x <- "Robots are evl creatres and deserv exterimanitation." # which_misspelled(x, suggest=FALSE) # which_misspelled(x, suggest=TRUE) # # check_spelling(DATA$state) # # ## browseURL("http://stackoverflow.com/a/24454727/1000343") # terms <- c("accounts", "account", "accounting", "acounting", "acount", "acounts", "accounnt") # # set.seed(10) # (fake_text <- unlist(lapply(terms, function(x) { # unbag(sample(c(x, sample(DICTIONARY[[1]], sample(1:5, 1))))) # }))) # # check_spelling(fake_text) # # ##============================## # ## INTERACTIVE SPELL CHECKING ## # ##============================## # # ## No misspellings found # check_spelling_interactive(DATA$state) # # ## character method approach (minimal example) # dat <- DATA$state; dat[1] <- "I likedd the cokie icekream" # (o <- check_spelling_interactive(dat)) # preprocessed(o) # fixit <- attributes(o)$correct # fixit(dat) # # ## character method approach (larger example) # m <- check_spelling_interactive(mraja1spl$dialogue[1:75]) # preprocessed(m) # fixit <- attributes(m)$correct # fixit(mraja1spl$dialogue[1:75]) # # ## check_spelling method approach # out <- check_spelling(mraja1spl$dialogue[1:75]) # (x <- check_spelling_interactive(out)) # preprocessed(x) # correct(x)(mraja1spl$dialogue[1:75]) # (y <- check_spelling_interactive(out, click=FALSE)) # preprocessed(y) # # ## Examine Methods (?stringdist::stringdist) # strings <- c( # "Robots are evl creatres and deserv exterimanitation kream.", # "I gots me a biggert measrue, tommorrow" # ) # # meths <- c("osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw") # # setNames(lapply(meths, function(x) check_spelling(strings, method=x)), meths) # ## <strong>End(Not run)</strong>
stringdist