rm_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)rm_transcript_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_transcript_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)as_time(x, as.POSIXlt = FALSE, millisecond = TRUE)as_time2(x, ...)
TRUE
removes leading and trailing white
spaces.TRUE
extra white spaces and escaped
character will be removed.fixed = TRUE
) to be matched in the given
character vector (see Details for additional information). Default,
@rm_time
uses the rm_time
regex from the regular expression
dictionary from the dictionary
argument.pattern
.TRUE
the times are extracted into a
list of vectors.pattern
begins with "@rm_"
.TRUE
the output will be converted to
as.POSIXlt
.TRUE
milliseconds are retained. If
FALSE
they are rounded and added to seconds.gsub
.rm_time
- Remove/replace/extract time from a string.
rm_transcript_time
- Remove/replace/extract transcript specific time
stamps from a string.
as_time
- Convert a time stamp removed by rm_time
or
rm_transcript_time
to a standard time format (HH:SS:MM.OS) and
optionally convert to as.POSIXlt
.
as_time
- A convenience function for as_time
that unlists and
returns a vector rather than a list.
The default regular expression used by rm_time
finds
time with no AM/PM. This behavior can be altered by using a
secondary regular expression from the regex_usa
data (or other dictionary) via (pattern = "@rm_time2"
. See
Examples for example usage.
... in as_time2
are the other arguments passed to as_time
.
The time regular expression was taken from: http://stackoverflow.com/a/25111133/1000343
x <- c("R uses 1:5 for 1, 2, 3, 4, 5.", "At 3:00 we'll meet up and leave by 4:30:20", "We'll meet at 6:33.", "He ran it in :22.34") rm_time(x)[1] "R uses 1:5 for 1, 2, 3, 4, 5." "At we'll meet up and leave by" "We'll meet at ." "He ran it in"rm_time(x, extract=TRUE)[[1]] [1] NA [[2]] [1] "3:00" "4:30:20" [[3]] [1] "6:33" [[4]] [1] ":22.34"## With AM/PM x <- c( "I'm getting 3:04 AM just fine, but...", "for 10:47 AM I'm getting 0:47 AM instead.", "no time here", "Some time has 12:04 with no AM/PM after it", "Some time has 12:04 a.m. or the form 1:22 pm" ) rm_time(x, extract=TRUE)[[1]] [1] "3:04" [[2]] [1] "10:47" "0:47" [[3]] [1] NA [[4]] [1] "12:04" [[5]] [1] "12:04" "1:22"rm_time(x, pat="@rm_time2", extract=TRUE)[[1]] [1] "3:04 AM" [[2]] [1] "10:47 AM" "0:47 AM" [[3]] [1] NA [[4]] [1] NA [[5]] [1] "12:04 a.m." "1:22 pm"rm_time(x, pat="@rm_time2")[1] "I'm getting just fine, but..." "for I'm getting instead." [3] "no time here" "Some time has 12:04 with no AM/PM after it" [5] "Some time has or the form"rm_time(x, pat=pastex("@rm_time2", "@rm_time"), extract=TRUE)[[1]] [1] "3:04 AM" [[2]] [1] "10:47 AM" "0:47 AM" [[3]] [1] NA [[4]] [1] "12:04" [[5]] [1] "12:04 a.m." "1:22 pm"# Convert to standard format as_time(rm_time(x, extract=TRUE))[[1]] [1] "00:03:04.0" [[2]] [1] "00:10:47.0" "00:00:47.0" [[3]] [1] NA [[4]] [1] "00:12:04.0" [[5]] [1] "00:12:04.0" "00:01:22.0"as_time(rm_time(x, extract=TRUE), as.POSIXlt = TRUE)[[1]] [1] "2015-08-16 00:03:04 EDT" [[2]] [1] "2015-08-16 00:10:47 EDT" "2015-08-16 00:00:47 EDT" [[3]] [1] NA [[4]] [1] "2015-08-16 00:12:04 EDT" [[5]] [1] "2015-08-16 00:12:04 EDT" "2015-08-16 00:01:22 EDT"as_time(rm_time(x, extract=TRUE), as.POSIXlt = FALSE, millisecond = FALSE)[[1]] [1] "00:03:4" [[2]] [1] "00:10:47" "00:00:47" [[3]] [1] NA [[4]] [1] "00:12:4" [[5]] [1] "00:12:4" "00:01:22"# Transcript specific time stamps x2 <-c( '08:15 8 minutes and 15 seconds 00:08:15.0', '3:15 3 minutes and 15 seconds not 1:03:15.0', '01:22:30 1 hour 22 minutes and 30 seconds 01:22:30.0', '#00:09:33-5# 9 minutes and 33.5 seconds 00:09:33.5', '00:09.33,75 9 minutes and 33.5 seconds 00:09:33.75' ) rm_transcript_time(x2)[1] "8 minutes and 15 seconds" "3 minutes and 15 seconds not" "1 hour 22 minutes and 30 seconds" [4] "# 9 minutes and 33.5 seconds" "9 minutes and 33.5 seconds"(out <- rm_transcript_time(x2, extract=TRUE))[[1]] [1] "08:15" "00:08:15.0" [[2]] [1] "3:15" "1:03:15.0" [[3]] [1] "01:22:30" "01:22:30.0" [[4]] [1] "#00:09:33-5" "00:09:33.5" [[5]] [1] "00:09.33,75" "00:09:33.75"as_time(out)[[1]] [1] "00:08:15.0" "00:08:15.0" [[2]] [1] "00:03:15.0" "01:03:15.0" [[3]] [1] "01:22:30.0" "01:22:30.0" [[4]] [1] "00:09:33.5" "00:09:33.5" [[5]] [1] "00:09:33.75" "00:09:33.75"as_time(out, TRUE)[[1]] [1] "2015-08-16 00:08:15 EDT" "2015-08-16 00:08:15 EDT" [[2]] [1] "2015-08-16 00:03:15 EDT" "2015-08-16 01:03:15 EDT" [[3]] [1] "2015-08-16 01:22:30 EDT" "2015-08-16 01:22:30 EDT" [[4]] [1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT" [[5]] [1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT"as_time(out, ,FALSE)[[1]] [1] "00:08:15" "00:08:15" [[2]] [1] "00:03:15" "01:03:15" [[3]] [1] "01:22:30" "01:22:30" [[4]] [1] "00:09:33" "00:09:33" [[5]] [1] "00:09:34" "00:09:34"## <strong>Not run</strong>: # if (!require("pacman")) install.packages("pacman") # pacman::p_load(chron) # lapply(as_time(out), chron::times) # lapply(as_time(out, , FALSE), chron::times) # ## <strong>End(Not run)</strong>
gsub
,
stri_extract_all_regex
Other rm_.functions: as_numeric
,
as_numeric2
, rm_number
;
rm_abbreviation
; rm_angle
,
rm_bracket
,
rm_bracket_multiple
,
rm_curly
, rm_round
,
rm_square
; rm_between
,
rm_between_multiple
;
rm_caps_phrase
; rm_caps
;
rm_citation_tex
; rm_citation
;
rm_city_state_zip
;
rm_city_state
; rm_date
;
rm_default
; rm_dollar
;
rm_email
; rm_emoticon
;
rm_endmark
; rm_hash
;
rm_nchar_words
; rm_non_ascii
;
rm_non_words
; rm_percent
;
rm_phone
; rm_postal_code
;
rm_repeated_characters
;
rm_repeated_phrases
;
rm_repeated_words
; rm_tag
;
rm_title_name
;
rm_twitter_url
, rm_url
;
rm_white
, rm_white_bracket
,
rm_white_colon
,
rm_white_comma
,
rm_white_endmark
,
rm_white_lead
,
rm_white_lead_trail
,
rm_white_multiple
,
rm_white_punctuation
,
rm_white_trail
; rm_zip
Other rm_.functions: as_numeric
,
as_numeric2
, rm_number
;
rm_abbreviation
; rm_angle
,
rm_bracket
,
rm_bracket_multiple
,
rm_curly
, rm_round
,
rm_square
; rm_between
,
rm_between_multiple
;
rm_caps_phrase
; rm_caps
;
rm_citation_tex
; rm_citation
;
rm_city_state_zip
;
rm_city_state
; rm_date
;
rm_default
; rm_dollar
;
rm_email
; rm_emoticon
;
rm_endmark
; rm_hash
;
rm_nchar_words
; rm_non_ascii
;
rm_non_words
; rm_percent
;
rm_phone
; rm_postal_code
;
rm_repeated_characters
;
rm_repeated_phrases
;
rm_repeated_words
; rm_tag
;
rm_title_name
;
rm_twitter_url
, rm_url
;
rm_white
, rm_white_bracket
,
rm_white_colon
,
rm_white_comma
,
rm_white_endmark
,
rm_white_lead
,
rm_white_lead_trail
,
rm_white_multiple
,
rm_white_punctuation
,
rm_white_trail
; rm_zip