pastex(..., sep = "|", dictionary = getOption("regex.library"))x %|% yx %+% y
@
) (e.g., "@rm_hash"
) or a regular expression from
regex_supplement
dictionary prefixed with an at
(@
) (e.g., "@time_12_hours"
).|
).
pastex
- A wrapper for paste(collapse="|")
that also searches
the default and supplemental (regex_supplement
)
dictionaries for regular expressions before pasting them together with a pipe
(|
) separator.
%|%
- A binary operator version of pastex
that joins two
character strings with a regex or ("|"). Equivalent to
pastex(x, y, sep="|")
.
%+%
- A binary operator version of pastex
that joins two
character strings with no space. Equivalent to pastex(x, y, sep="")
.
Note that while pastex
is designed for pasting purposes it can
also be used to call a single regex from the default regional dictionary or
the supplemental dictionary (regex_supplement
) (see
Examples).
x <- c("There is $5.50 for me.", "that's 45.6% of the pizza", "14% is $26 or $25.99", "It's 12:30 pm to 4:00 am") pastex("@rm_percent", "@rm_dollar")[1] "\\(?[0-9.]+\\)?%|\\$\\(?[0-9.]+\\)?"pastex("@rm_percent", "@time_12_hours")[1] "\\(?[0-9.]+\\)?%|(1[012]|[1-9]):[0-5][0-9](\\s?)(am|pm)"rm_dollar(x, extract=TRUE, pattern=pastex("@rm_percent", "@rm_dollar"))[[1]] [1] "$5.50" [[2]] [1] "45.6%" [[3]] [1] "14%" "$26" "$25.99" [[4]] [1] NArm_dollar(x, extract=TRUE, pattern=pastex("@rm_dollar", "@rm_percent", "@time_12_hours"))[[1]] [1] "$5.50" [[2]] [1] "45.6%" [[3]] [1] "14%" "$26" "$25.99" [[4]] [1] "12:30 pm" "4:00 am"## retrieve regexes from dictionary pastex("@rm_email")[1] "([_+a-z0-9-]+(\\.[_+a-z0-9-]+)*@[a-z0-9-]+(\\.[a-z0-9-]+)*(\\.[a-z]{2,14}))"pastex("@rm_url3")[1] "(https?|ftps?)://(-\\.)?([^\\s/?\\.#-]+\\.?)+(/[^\\s]*)?"pastex("@version")[1] "(?<=\\b(v|version)\\s?)([0-9]+)\\.([0-9]+)\\.([0-9]+)(?:\\.([0-9]+))?\\b"## pipe operator (%|%) "x" %|% "y"[1] "x|y""@rm_url" %|% "@rm_twitter_url"[1] "(http[^ ]*)|(ftp[^ ]*)|(www\\.[^ ]*)|(https?://t\\.co[^ ]*)|(t\\.co[^ ]*)"## pipe operator (%p%) "x" %+% "y"[1] "xy""@rm_time" %+% "\\s[AP]M"[1] "\\d{0,2}:\\d{2}(?:[:.]\\d+)?\\s[AP]M"## Remove Twitter Short URL x <- c("download file from http://example.com", "this is the link to my website http://example.com", "go to http://example.com from more info.", "Another url ftp://www.example.com", "And https://www.example.net", "twitter type: t.co/N1kq0F26tG", "still another one https://t.co/N1kq0F26tG :-)") rm_twitter_url(x)[1] "download file from http://example.com" "this is the link to my website http://example.com" [3] "go to http://example.com from more info." "Another url ftp://www.example.com" [5] "And https://www.example.net" "twitter type:" [7] "still another one :-)"rm_twitter_url(x, extract=TRUE)[[1]] [1] NA [[2]] [1] NA [[3]] [1] NA [[4]] [1] NA [[5]] [1] NA [[6]] [1] "t.co/N1kq0F26tG" [[7]] [1] "https://t.co/N1kq0F26tG"## Combine removing Twitter URLs and standard URLs rm_twitter_n_url <- rm_(pattern="@rm_twitter_url" %|% "@rm_url") rm_twitter_n_url(x)[1] "download file from" "this is the link to my website" "go to from more info." [4] "Another url" "And" "twitter type:" [7] "still another one :-)"rm_twitter_n_url(x, extract=TRUE)[[1]] [1] "http://example.com" [[2]] [1] "http://example.com" [[3]] [1] "http://example.com" [[4]] [1] "ftp://www.example.com" [[5]] [1] "https://www.example.net" [[6]] [1] "t.co/N1kq0F26tG" [[7]] [1] "https://t.co/N1kq0F26tG"
paste