rm_between(text.var, left, right, fixed = TRUE, trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_between_multiple(text.var, left, right, fixed = TRUE, trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = FALSE, merge = TRUE)
TRUE
regular expression special characters
(c(".", "|", "(", ")", "[", "]", "{", "}", "^", "$", "*", "+", "?")
)
will be treated as typical characters. If the user wants to pass a regular
expression with special characters then fixed = FALSE
should be used.TRUE
removes leading and trailing white
spaces.TRUE
extra white spaces and escaped
character will be removed.pattern
.TRUE
the strings are extracted into a
list of vectors.TRUE
and extract = TRUE
returns
the markers (left/right) and the text between.pattern
begins with "@rm_"
.TRUE
the results of each bracket type will
be merged by string. FALSE
returns a named list of lists of vectors
of markered text per marker type.gsub
.rm_between
returns merged strings and is significantly faster. If
rm_between_multiple
the strings are optionally merged by
left
/right
symbols. The latter approach is more flexible and
names extracted strings by symbol boundaries, however, it is slower than
rm_between
.
Remove/replace/extract strings bounded between a left and right marker.
x <- "I like [bots] (not)." rm_between(x, "(", ")")[1] "I like [bots] ."rm_between(x, "(", ")", extract=TRUE)[[1]] [1] "not"rm_between(x, c("(", "["), c(")", "]"))[1] "I like ."rm_between(x, c("(", "["), c(")", "]"), extract=TRUE)[[1]] [1] "bots" "not"rm_between(x, c("(", "["), c(")", "]"), include.markers=FALSE)[1] "I like [] ()."rm_between(x, c("(", "["), c(")", "]"), extract=TRUE, include.markers=TRUE)[[1]] [1] "[bots]" "(not)"## multiple (naming and ability to keep separate bracket types but slower) x <- c("Where is the /big dog#?", "I think he's @arunning@b with /little cat#.") rm_between_multiple(x, "@a", "@b")[1] "Where is the /big dog#?" "I think he's with /little cat#."rm_between_multiple(x, "@a", "@b", extract=TRUE)$`@a : @b1` character(0) $`@a : @b2` [1] "running"rm_between_multiple(x, c("/", "@a"), c("#", "@b"))[1] "Where is the?" "I think he's with."rm_between_multiple(x, c("/", "@a"), c("#", "@b"), extract=TRUE)[[1]] [1] "big dog" [[2]] [1] "little cat" "running"x2 <- c("Where is the L1big dogL2?", "I think he's 98running99 with L1little catL2.") rm_between_multiple(x2, c("L1", 98), c("L2", 99))[1] "Where is the?" "I think he's with."rm_between_multiple(x2, c("L1", 98), c("L2", 99), extract=TRUE)[[1]] [1] "big dog" [[2]] [1] "little cat" "running"state <- c("Computer is fun. Not too fun.", "No it's not, it's dumb.", "What should we do?", "You liar, it stinks!", "I am telling the truth!", "How can we be certain?", "There is no way.", "I distrust you.", "What are you talking about?", "Shall we move on? Good then.", "I'm hungry. Let's eat. You already?") rm_between_multiple(state, c("is", "we"), c("too", "on"))[1] "Computer fun." "No it's not, it's dumb." "What should we do?" [4] "You liar, it stinks!" "I am telling the truth!" "How can we be certain?" [7] "There is no way." "I distrust you." "What are you talking about?" [10] "Shall? Good then." "I'm hungry. Let's eat. You already?"## Use Grouping s <- "something before stuff $some text$ in between $1$ and after" rm_between(s, "$", "$", replacement="<B>\\2<E>")[1] "something before stuff <B>some text<E> in between <B>1<E> and after"## Using regular expressions as boundaries (fixed =FALSE) x <- c( "There are 2.3 million species in the world", "There are 2.3 billion species in the world" ) rm_between(x, left='There', right = '[mb]illion', fixed = FALSE, extract=TRUE, include=TRUE)[[1]] [1] "There are 2.3 million" [[2]] [1] "There are 2.3 billion"
gsub
,
rm_bracket
,
stri_extract_all_regex
Other rm_.functions: as_numeric
,
as_numeric2
, rm_number
;
as_time
, as_time2
,
rm_time
, rm_transcript_time
;
rm_abbreviation
; rm_angle
,
rm_bracket
,
rm_bracket_multiple
,
rm_curly
, rm_round
,
rm_square
; rm_caps_phrase
;
rm_caps
; rm_citation_tex
;
rm_citation
;
rm_city_state_zip
;
rm_city_state
; rm_date
;
rm_default
; rm_dollar
;
rm_email
; rm_emoticon
;
rm_endmark
; rm_hash
;
rm_nchar_words
; rm_non_ascii
;
rm_non_words
; rm_percent
;
rm_phone
; rm_postal_code
;
rm_repeated_characters
;
rm_repeated_phrases
;
rm_repeated_words
; rm_tag
;
rm_title_name
;
rm_twitter_url
, rm_url
;
rm_white
, rm_white_bracket
,
rm_white_colon
,
rm_white_comma
,
rm_white_endmark
,
rm_white_lead
,
rm_white_lead_trail
,
rm_white_multiple
,
rm_white_punctuation
,
rm_white_trail
; rm_zip