Search(dataframe, term, column.name = NULL, max.distance = 0.02, ...)boolean_search(text.var, terms, ignore.case = TRUE, values = FALSE, exclude = NULL, apostrophe.remove = FALSE, char.keep = NULL, digit.remove = FALSE)text.var %bs% terms
AND
or &&
to connect terms
together) and OR (use OR
or ||
to allow for searches of
either set of terms. Spaces may be used to control what is searched for.
For example using " I "
on c("I'm", "I want", "in")
will result
in FALSE TRUE FALSE
whereas "I"
will match all three (if case
is ignored).TRUE
case is ignored.TRUE
removes apostrophes from
the text before examining.termco
attempts to auto detect characters to
keep based on the elements in match.list
.TRUE
strips digits from the text
before counting. termco
attempts to auto detect if digits
should be retained based on the elements in match.list
.agrep
.Search
- Returns the rows of the data frame that match the
search term.
boolean_search
- Returns the values (or indices) of a vector of strings that match
given terms.
Search
- Find terms located in columns of a data frame.
boolean_search
- Conducts a Boolean search for terms/strings within a
character vector.
%bs%
- Binary operator version of boolean_search
.
The terms string is first split by the OR separators into a list. Next the list of vectors is split on the AND separator to produce a list of vectors of search terms. Each sentence is matched against the terms. For a sentence to be counted it must fit all of the terms in an AND Boolean or one of the conditions in an OR Boolean.
## <strong>Not run</strong>: # ## Dataframe search: # (SampDF <- data.frame("islands"=names(islands)[1:32],mtcars, row.names=NULL)) # # Search(SampDF, "Cuba", "islands") # Search(SampDF, "New", "islands") # Search(SampDF, "Ho") # Search(SampDF, "Ho", max.distance = 0) # Search(SampDF, "Axel Heiberg") # Search(SampDF, 19) #too much tolerance in max.distance # Search(SampDF, 19, max.distance = 0) # Search(SampDF, 19, "qsec", max.distance = 0) # # ##Boolean search: # boolean_search(DATA$state, " I ORliar&&stinks") # boolean_search(DATA$state, " I &&.", values=TRUE) # boolean_search(DATA$state, " I OR.", values=TRUE) # boolean_search(DATA$state, " I &&.") # # ## Exclusion: # boolean_search(DATA$state, " I ||.", values=TRUE) # boolean_search(DATA$state, " I ||.", exclude = c("way", "truth"), values=TRUE) # # ## From stackoverflow: http://stackoverflow.com/q/19640562/1000343 # dat <- data.frame(x = c("Doggy", "Hello", "Hi Dog", "Zebra"), y = 1:4) # z <- data.frame(z =c("Hello", "Dog")) # # dat[boolean_search(dat$x, paste(z$z, collapse = "OR")), ] # # ## Binary operator version # dat[dat$x %bs% paste(z$z, collapse = "OR"), ] # # ## Passing to `trans_context` # inds <- boolean_search(DATA.SPLIT$state, " I&&.|| I&&!", ignore.case = FALSE) # with(DATA.SPLIT, trans_context(state, person, inds=inds)) # # (inds2 <- boolean_search(raj$dialogue, spaste(paste(negation.words, # collapse = " || ")))) # trans_context(raj$dialogue, raj$person, inds2) # ## <strong>End(Not run)</strong>
trans_context
termco