rm_email(text.var, trim = !extract, clean = TRUE, pattern = "@rm_email", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)
TRUE removes leading and trailing white
spaces.TRUE extra white spaces and escaped
character will be removed.fixed = TRUE) to be matched in the given
character vector. Default, @rm_email uses the
rm_email regex from the regular expression dictionary from
the dictionary argument.pattern.TRUE the emails are extracted into a list
of vectors.pattern begins with "@rm_".gsub.Remove/replace/extract email addresses from a string.
The email regular expression was taken from: http://stackoverflow.com/a/25077704/1000343
x <- paste("fred is fred@foo.com and joe is joe@example.com - but @this is a twitter handle for twit@here.com or foo+bar@google.com/fred@foo.fnord") x2 <- c("fred is fred@foo.com and joe is joe@example.com - but @this is a", "twitter handle for twit@here.com or foo+bar@google.com/fred@foo.fnord", "hello world") rm_email(x)[1] "fred is and joe is - but @this is a twitter handle for or /"rm_email(x, replacement = '<a href="mailto:\\1" target="_blank">\\1</a>')[1] "fred is <a href=\"mailto:fred@foo.com\" target=\"_blank\">fred@foo.com</a> and joe is <a href=\"mailto:joe@example.com\" target=\"_blank\">joe@example.com</a> - but @this is a twitter handle for <a href=\"mailto:twit@here.com\" target=\"_blank\">twit@here.com</a> or <a href=\"mailto:foo+bar@google.com\" target=\"_blank\">foo+bar@google.com</a>/<a href=\"mailto:fred@foo.fnord\" target=\"_blank\">fred@foo.fnord</a>"rm_email(x, extract=TRUE)[[1]] [1] "fred@foo.com" "joe@example.com" "twit@here.com" "foo+bar@google.com" "fred@foo.fnord"rm_email(x2, extract=TRUE)[[1]] [1] "fred@foo.com" "joe@example.com" [[2]] [1] "twit@here.com" "foo+bar@google.com" "fred@foo.fnord" [[3]] [1] NA
gsub,
stri_extract_all_regex
Other rm_.functions: as_numeric,
as_numeric2, rm_number;
as_time, as_time2,
rm_time, rm_transcript_time;
rm_abbreviation; rm_angle,
rm_bracket,
rm_bracket_multiple,
rm_curly, rm_round,
rm_square; rm_between,
rm_between_multiple;
rm_caps_phrase; rm_caps;
rm_citation_tex; rm_citation;
rm_city_state_zip;
rm_city_state; rm_date;
rm_default; rm_dollar;
rm_emoticon; rm_endmark;
rm_hash; rm_nchar_words;
rm_non_ascii; rm_non_words;
rm_percent; rm_phone;
rm_postal_code;
rm_repeated_characters;
rm_repeated_phrases;
rm_repeated_words; rm_tag;
rm_title_name;
rm_twitter_url, rm_url;
rm_white, rm_white_bracket,
rm_white_colon,
rm_white_comma,
rm_white_endmark,
rm_white_lead,
rm_white_lead_trail,
rm_white_multiple,
rm_white_punctuation,
rm_white_trail; rm_zip