rm_bracket(text.var, pattern = "all", trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_round(text.var, pattern = "(", trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_square(text.var, pattern = "[", trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_curly(text.var, pattern = "{", trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_angle(text.var, pattern = "<", trim = TRUE, clean = TRUE, replacement = "", extract = FALSE, include.markers = ifelse(extract, FALSE, TRUE), dictionary = getOption("regex.library"), ...)rm_bracket_multiple(text.var, trim = TRUE, clean = TRUE, pattern = "all", replacement = "", extract = FALSE, include.markers = FALSE, merge = TRUE)
"curly"/"\{", "square"/"[",
"round"/"(", "angle"/"<" and "all". These
strings correspond to: {, [, (, < or all four types.TRUE removes leading and trailing white
spaces.TRUE extra white spaces and escaped
character will be removed.pattern.TRUE the bracketed text is extracted into
a list of vectors.TRUE and extract = TRUE returns
the markers (left/right) and the text between.pattern begins with "@rm_".TRUE the results of each bracket type will
be merged by string. FALSE returns a named list of lists of vectors
of bracketed text per bracket type.gsub.rm_bracket - returns a character string with
multiple brackets removed. If extract = TRUE the results are
optionally merged and named by bracket type. This is more flexible than
rm_bracket but slower.
rm_round - returns a character string with round brackets removed.
rm_square - returns a character string with square brackets
removed.
rm_curly - returns a character string with curly brackets
removed.
rm_angle - returns a character string with angle brackets
removed.
rm_bracket_multiple - returns a character string with
multiple brackets removed. If extract = TRUE the results are
optionally merged and named by bracket type. This is more flexible than
rm_bracket but slower.
Remove/replace/extract bracketed strings.
examp <- structure(list(person = structure(c(1L, 2L, 1L, 3L), .Label = c("bob", "greg", "sue"), class = "factor"), text = c("I love chicken [unintelligible]!", "Me too! (laughter) It's so good.[interrupting]", "Yep it's awesome {reading}.", "Agreed. {is so much fun}")), .Names = c("person", "text"), row.names = c(NA, -4L), class = "data.frame") exampperson text 1 bob I love chicken [unintelligible]! 2 greg Me too! (laughter) It's so good.[interrupting] 3 bob Yep it's awesome {reading}. 4 sue Agreed. {is so much fun}rm_bracket(examp$text, pattern = "square")[1] "I love chicken !" "Me too! (laughter) It's so good." "Yep it's awesome {reading}." [4] "Agreed. {is so much fun}"rm_bracket(examp$text, pattern = "curly")[1] "I love chicken [unintelligible]!" "Me too! (laughter) It's so good.[interrupting]" [3] "Yep it's awesome ." "Agreed."rm_bracket(examp$text, pattern = c("square", "round"))[1] "I love chicken !" "Me too! It's so good." "Yep it's awesome {reading}." "Agreed. {is so much fun}"rm_bracket(examp$text)[1] "I love chicken !" "Me too! It's so good." "Yep it's awesome ." "Agreed."rm_bracket(examp$text, pattern = "square", extract=TRUE)[[1]] [1] "unintelligible" [[2]] [1] "interrupting" [[3]] [1] NA [[4]] [1] NArm_bracket(examp$text, pattern = "curly", extract=TRUE)[[1]] [1] NA [[2]] [1] NA [[3]] [1] "reading" [[4]] [1] "is so much fun"rm_bracket(examp$text, pattern = c("square", "round"), extract=TRUE)[[1]] [1] "unintelligible" [[2]] [1] "laughter" "interrupting" [[3]] [1] NA [[4]] [1] NArm_bracket(examp$text, pattern = c("square", "round"), merge = FALSE, extract=TRUE)[[1]] [1] "unintelligible" [[2]] [1] "laughter" "interrupting" [[3]] [1] NA [[4]] [1] NArm_bracket(examp$text, extract=TRUE)[[1]] [1] "unintelligible" [[2]] [1] "laughter" "interrupting" [[3]] [1] "reading" [[4]] [1] "is so much fun"rm_bracket(examp$tex, include.markers=TRUE, extract=TRUE)[[1]] [1] "[unintelligible]" [[2]] [1] "(laughter)" "[interrupting]" [[3]] [1] "{reading}" [[4]] [1] "{is so much fun}"## <strong>Not run</strong>: # library(qdap) # rm_bracket(examp$tex, pattern="curly", extract=TRUE) %>% # unlist() %>% # na.omit() %>% # paste2() # ## <strong>End(Not run)</strong> x <- "I like [bots] (not). And <likely> many do not {he he}" rm_round(x)[1] "I like [bots] . And <likely> many do not {he he}"rm_round(x, extract = TRUE)[[1]] [1] "not"rm_round(x, include.marker = FALSE)[1] "I like [bots] (). And <likely> many do not {he he}"rm_round(x, extract = TRUE, include.marker = TRUE)[[1]] [1] "(not)"rm_square(x)[1] "I like (not). And <likely> many do not {he he}"rm_square(x, extract = TRUE)[[1]] [1] "bots"rm_curly(x)[1] "I like [bots] (not). And <likely> many do not"rm_curly(x, extract = TRUE)[[1]] [1] "he he"rm_angle(x)[1] "I like [bots] (not). And many do not {he he}"rm_angle(x, extract = TRUE)[[1]] [1] "likely"lapply(rm_between('She said, "I am!" and he responded..."Am what?".', left='"', right='"', extract = TRUE), "[", c(TRUE, FALSE))[[1]] [1] "I am!"
gsub,
rm_between,
stri_extract_all_regex
Other rm_.functions: as_numeric,
as_numeric2, rm_number;
as_time, as_time2,
rm_time, rm_transcript_time;
rm_abbreviation; rm_between,
rm_between_multiple;
rm_caps_phrase; rm_caps;
rm_citation_tex; rm_citation;
rm_city_state_zip;
rm_city_state; rm_date;
rm_default; rm_dollar;
rm_email; rm_emoticon;
rm_endmark; rm_hash;
rm_nchar_words; rm_non_ascii;
rm_non_words; rm_percent;
rm_phone; rm_postal_code;
rm_repeated_characters;
rm_repeated_phrases;
rm_repeated_words; rm_tag;
rm_title_name;
rm_twitter_url, rm_url;
rm_white, rm_white_bracket,
rm_white_colon,
rm_white_comma,
rm_white_endmark,
rm_white_lead,
rm_white_lead_trail,
rm_white_multiple,
rm_white_punctuation,
rm_white_trail; rm_zip