rm_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)rm_transcript_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_transcript_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)as_time(x, as.POSIXlt = FALSE, millisecond = TRUE)as_time2(x, ...)
TRUE removes leading and trailing white
spaces.TRUE extra white spaces and escaped
character will be removed.fixed = TRUE) to be matched in the given
character vector (see Details for additional information). Default,
@rm_time uses the rm_time regex from the regular expression
dictionary from the dictionary argument.pattern.TRUE the times are extracted into a
list of vectors.pattern begins with "@rm_".TRUE the output will be converted to
as.POSIXlt.TRUE milliseconds are retained. If
FALSE they are rounded and added to seconds.gsub.rm_time - Remove/replace/extract time from a string.
rm_transcript_time - Remove/replace/extract transcript specific time
stamps from a string.
as_time - Convert a time stamp removed by rm_time or
rm_transcript_time to a standard time format (HH:SS:MM.OS) and
optionally convert to as.POSIXlt.
as_time - A convenience function for as_time that unlists and
returns a vector rather than a list.
The default regular expression used by rm_time finds
time with no AM/PM. This behavior can be altered by using a
secondary regular expression from the regex_usa
data (or other dictionary) via (pattern = "@rm_time2". See
Examples for example usage.
... in as_time2 are the other arguments passed to as_time.
The time regular expression was taken from: http://stackoverflow.com/a/25111133/1000343
x <- c("R uses 1:5 for 1, 2, 3, 4, 5.", "At 3:00 we'll meet up and leave by 4:30:20", "We'll meet at 6:33.", "He ran it in :22.34") rm_time(x)[1] "R uses 1:5 for 1, 2, 3, 4, 5." "At we'll meet up and leave by" "We'll meet at ." "He ran it in"rm_time(x, extract=TRUE)[[1]] [1] NA [[2]] [1] "3:00" "4:30:20" [[3]] [1] "6:33" [[4]] [1] ":22.34"## With AM/PM x <- c( "I'm getting 3:04 AM just fine, but...", "for 10:47 AM I'm getting 0:47 AM instead.", "no time here", "Some time has 12:04 with no AM/PM after it", "Some time has 12:04 a.m. or the form 1:22 pm" ) rm_time(x, extract=TRUE)[[1]] [1] "3:04" [[2]] [1] "10:47" "0:47" [[3]] [1] NA [[4]] [1] "12:04" [[5]] [1] "12:04" "1:22"rm_time(x, pat="@rm_time2", extract=TRUE)[[1]] [1] "3:04 AM" [[2]] [1] "10:47 AM" "0:47 AM" [[3]] [1] NA [[4]] [1] NA [[5]] [1] "12:04 a.m." "1:22 pm"rm_time(x, pat="@rm_time2")[1] "I'm getting just fine, but..." "for I'm getting instead." [3] "no time here" "Some time has 12:04 with no AM/PM after it" [5] "Some time has or the form"rm_time(x, pat=pastex("@rm_time2", "@rm_time"), extract=TRUE)[[1]] [1] "3:04 AM" [[2]] [1] "10:47 AM" "0:47 AM" [[3]] [1] NA [[4]] [1] "12:04" [[5]] [1] "12:04 a.m." "1:22 pm"# Convert to standard format as_time(rm_time(x, extract=TRUE))[[1]] [1] "00:03:04.0" [[2]] [1] "00:10:47.0" "00:00:47.0" [[3]] [1] NA [[4]] [1] "00:12:04.0" [[5]] [1] "00:12:04.0" "00:01:22.0"as_time(rm_time(x, extract=TRUE), as.POSIXlt = TRUE)[[1]] [1] "2015-08-16 00:03:04 EDT" [[2]] [1] "2015-08-16 00:10:47 EDT" "2015-08-16 00:00:47 EDT" [[3]] [1] NA [[4]] [1] "2015-08-16 00:12:04 EDT" [[5]] [1] "2015-08-16 00:12:04 EDT" "2015-08-16 00:01:22 EDT"as_time(rm_time(x, extract=TRUE), as.POSIXlt = FALSE, millisecond = FALSE)[[1]] [1] "00:03:4" [[2]] [1] "00:10:47" "00:00:47" [[3]] [1] NA [[4]] [1] "00:12:4" [[5]] [1] "00:12:4" "00:01:22"# Transcript specific time stamps x2 <-c( '08:15 8 minutes and 15 seconds 00:08:15.0', '3:15 3 minutes and 15 seconds not 1:03:15.0', '01:22:30 1 hour 22 minutes and 30 seconds 01:22:30.0', '#00:09:33-5# 9 minutes and 33.5 seconds 00:09:33.5', '00:09.33,75 9 minutes and 33.5 seconds 00:09:33.75' ) rm_transcript_time(x2)[1] "8 minutes and 15 seconds" "3 minutes and 15 seconds not" "1 hour 22 minutes and 30 seconds" [4] "# 9 minutes and 33.5 seconds" "9 minutes and 33.5 seconds"(out <- rm_transcript_time(x2, extract=TRUE))[[1]] [1] "08:15" "00:08:15.0" [[2]] [1] "3:15" "1:03:15.0" [[3]] [1] "01:22:30" "01:22:30.0" [[4]] [1] "#00:09:33-5" "00:09:33.5" [[5]] [1] "00:09.33,75" "00:09:33.75"as_time(out)[[1]] [1] "00:08:15.0" "00:08:15.0" [[2]] [1] "00:03:15.0" "01:03:15.0" [[3]] [1] "01:22:30.0" "01:22:30.0" [[4]] [1] "00:09:33.5" "00:09:33.5" [[5]] [1] "00:09:33.75" "00:09:33.75"as_time(out, TRUE)[[1]] [1] "2015-08-16 00:08:15 EDT" "2015-08-16 00:08:15 EDT" [[2]] [1] "2015-08-16 00:03:15 EDT" "2015-08-16 01:03:15 EDT" [[3]] [1] "2015-08-16 01:22:30 EDT" "2015-08-16 01:22:30 EDT" [[4]] [1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT" [[5]] [1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT"as_time(out, ,FALSE)[[1]] [1] "00:08:15" "00:08:15" [[2]] [1] "00:03:15" "01:03:15" [[3]] [1] "01:22:30" "01:22:30" [[4]] [1] "00:09:33" "00:09:33" [[5]] [1] "00:09:34" "00:09:34"## <strong>Not run</strong>: # if (!require("pacman")) install.packages("pacman") # pacman::p_load(chron) # lapply(as_time(out), chron::times) # lapply(as_time(out, , FALSE), chron::times) # ## <strong>End(Not run)</strong>
gsub,
stri_extract_all_regex
Other rm_.functions: as_numeric,
as_numeric2, rm_number;
rm_abbreviation; rm_angle,
rm_bracket,
rm_bracket_multiple,
rm_curly, rm_round,
rm_square; rm_between,
rm_between_multiple;
rm_caps_phrase; rm_caps;
rm_citation_tex; rm_citation;
rm_city_state_zip;
rm_city_state; rm_date;
rm_default; rm_dollar;
rm_email; rm_emoticon;
rm_endmark; rm_hash;
rm_nchar_words; rm_non_ascii;
rm_non_words; rm_percent;
rm_phone; rm_postal_code;
rm_repeated_characters;
rm_repeated_phrases;
rm_repeated_words; rm_tag;
rm_title_name;
rm_twitter_url, rm_url;
rm_white, rm_white_bracket,
rm_white_colon,
rm_white_comma,
rm_white_endmark,
rm_white_lead,
rm_white_lead_trail,
rm_white_multiple,
rm_white_punctuation,
rm_white_trail; rm_zip
Other rm_.functions: as_numeric,
as_numeric2, rm_number;
rm_abbreviation; rm_angle,
rm_bracket,
rm_bracket_multiple,
rm_curly, rm_round,
rm_square; rm_between,
rm_between_multiple;
rm_caps_phrase; rm_caps;
rm_citation_tex; rm_citation;
rm_city_state_zip;
rm_city_state; rm_date;
rm_default; rm_dollar;
rm_email; rm_emoticon;
rm_endmark; rm_hash;
rm_nchar_words; rm_non_ascii;
rm_non_words; rm_percent;
rm_phone; rm_postal_code;
rm_repeated_characters;
rm_repeated_phrases;
rm_repeated_words; rm_tag;
rm_title_name;
rm_twitter_url, rm_url;
rm_white, rm_white_bracket,
rm_white_colon,
rm_white_comma,
rm_white_endmark,
rm_white_lead,
rm_white_lead_trail,
rm_white_multiple,
rm_white_punctuation,
rm_white_trail; rm_zip