rm_time. qdapRegex 0.5.0

Usage

rm_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)
rm_transcript_time(text.var, trim = !extract, clean = TRUE, pattern = "@rm_transcript_time", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)
as_time(x, as.POSIXlt = FALSE, millisecond = TRUE)
as_time2(x, ...)

Arguments

text.var: The text variable.
trim: logical. If TRUE removes leading and trailing white spaces.
clean: trim logical. If TRUE extra white spaces and escaped character will be removed.
pattern: A character string containing a regular expression (or character string for fixed = TRUE) to be matched in the given character vector (see Details for additional information). Default, @rm_time uses the rm_time regex from the regular expression dictionary from the dictionary argument.
replacement: Replacement for matched pattern.
extract: logical. If TRUE the times are extracted into a list of vectors.
dictionary: A dictionary of canned regular expressions to search within if pattern begins with "@rm_".
x: A list with extracted time stamps.
as.POSIXlt: logical. If TRUE the output will be converted to as.POSIXlt.
millisecond: logical. If TRUE milliseconds are retained. If FALSE they are rounded and added to seconds.
...: Other arguments passed to gsub.

Value

Returns a character string with time removed.

Description

rm_time - Remove/replace/extract time from a string.

rm_transcript_time - Remove/replace/extract transcript specific time stamps from a string.

as_time - Convert a time stamp removed by rm_time or rm_transcript_time to a standard time format (HH:SS:MM.OS) and optionally convert to as.POSIXlt.

as_time - A convenience function for as_time that unlists and returns a vector rather than a list.

Details

The default regular expression used by rm_time finds time with no AM/PM. This behavior can be altered by using a secondary regular expression from the regex_usa data (or other dictionary) via (pattern = "@rm_time2". See Examples for example usage.

Note

... in as_time2 are the other arguments passed to as_time.

References

The time regular expression was taken from: http://stackoverflow.com/a/25111133/1000343

Examples

x <-  c("R uses 1:5 for 1, 2, 3, 4, 5.",
    "At 3:00 we'll meet up and leave by 4:30:20",
    "We'll meet at 6:33.", "He ran it in :22.34")

rm_time(x)

[1] "R uses 1:5 for 1, 2, 3, 4, 5." "At we'll meet up and leave by" "We'll meet at ."               "He ran it in"                 

rm_time(x, extract=TRUE)

[[1]]
[1] NA

[[2]]
[1] "3:00"    "4:30:20"

[[3]]
[1] "6:33"

[[4]]
[1] ":22.34"



## With AM/PM
x <- c(
    "I'm getting 3:04 AM just fine, but...",
    "for 10:47 AM I'm getting 0:47 AM instead.",
    "no time here",
    "Some time has 12:04 with no AM/PM after it",
    "Some time has 12:04 a.m. or the form 1:22 pm"
)

rm_time(x, extract=TRUE)

[[1]]
[1] "3:04"

[[2]]
[1] "10:47" "0:47" 

[[3]]
[1] NA

[[4]]
[1] "12:04"

[[5]]
[1] "12:04" "1:22" 


rm_time(x, pat="@rm_time2", extract=TRUE)

[[1]]
[1] "3:04 AM"

[[2]]
[1] "10:47 AM" "0:47 AM" 

[[3]]
[1] NA

[[4]]
[1] NA

[[5]]
[1] "12:04 a.m." "1:22 pm"   


rm_time(x, pat="@rm_time2")

[1] "I'm getting just fine, but..."              "for I'm getting instead."                  
[3] "no time here"                               "Some time has 12:04 with no AM/PM after it"
[5] "Some time has or the form"                 

rm_time(x, pat=pastex("@rm_time2", "@rm_time"), extract=TRUE)

[[1]]
[1] "3:04 AM"

[[2]]
[1] "10:47 AM" "0:47 AM" 

[[3]]
[1] NA

[[4]]
[1] "12:04"

[[5]]
[1] "12:04 a.m." "1:22 pm"   



# Convert to standard format
as_time(rm_time(x, extract=TRUE))

[[1]]
[1] "00:03:04.0"

[[2]]
[1] "00:10:47.0" "00:00:47.0"

[[3]]
[1] NA

[[4]]
[1] "00:12:04.0"

[[5]]
[1] "00:12:04.0" "00:01:22.0"


as_time(rm_time(x, extract=TRUE), as.POSIXlt = TRUE)

[[1]]
[1] "2015-08-16 00:03:04 EDT"

[[2]]
[1] "2015-08-16 00:10:47 EDT" "2015-08-16 00:00:47 EDT"

[[3]]
[1] NA

[[4]]
[1] "2015-08-16 00:12:04 EDT"

[[5]]
[1] "2015-08-16 00:12:04 EDT" "2015-08-16 00:01:22 EDT"


as_time(rm_time(x, extract=TRUE), as.POSIXlt = FALSE, millisecond = FALSE)

[[1]]
[1] "00:03:4"

[[2]]
[1] "00:10:47" "00:00:47"

[[3]]
[1] NA

[[4]]
[1] "00:12:4"

[[5]]
[1] "00:12:4"  "00:01:22"



# Transcript specific time stamps
x2 <-c(
    '08:15 8 minutes and 15 seconds	00:08:15.0',
    '3:15 3 minutes and 15 seconds	not 1:03:15.0',
    '01:22:30 1 hour 22 minutes and 30 seconds	01:22:30.0',
    '#00:09:33-5# 9 minutes and 33.5 seconds	00:09:33.5',
    '00:09.33,75 9 minutes and 33.5 seconds	00:09:33.75'
)

rm_transcript_time(x2)

[1] "8 minutes and 15 seconds"         "3 minutes and 15 seconds not"     "1 hour 22 minutes and 30 seconds"
[4] "# 9 minutes and 33.5 seconds"     "9 minutes and 33.5 seconds"      

(out <- rm_transcript_time(x2, extract=TRUE))

[[1]]
[1] "08:15"      "00:08:15.0"

[[2]]
[1] "3:15"      "1:03:15.0"

[[3]]
[1] "01:22:30"   "01:22:30.0"

[[4]]
[1] "#00:09:33-5" "00:09:33.5" 

[[5]]
[1] "00:09.33,75" "00:09:33.75"



as_time(out)

[[1]]
[1] "00:08:15.0" "00:08:15.0"

[[2]]
[1] "00:03:15.0" "01:03:15.0"

[[3]]
[1] "01:22:30.0" "01:22:30.0"

[[4]]
[1] "00:09:33.5" "00:09:33.5"

[[5]]
[1] "00:09:33.75" "00:09:33.75"


as_time(out, TRUE)

[[1]]
[1] "2015-08-16 00:08:15 EDT" "2015-08-16 00:08:15 EDT"

[[2]]
[1] "2015-08-16 00:03:15 EDT" "2015-08-16 01:03:15 EDT"

[[3]]
[1] "2015-08-16 01:22:30 EDT" "2015-08-16 01:22:30 EDT"

[[4]]
[1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT"

[[5]]
[1] "2015-08-16 00:09:33 EDT" "2015-08-16 00:09:33 EDT"


as_time(out, ,FALSE)

[[1]]
[1] "00:08:15" "00:08:15"

[[2]]
[1] "00:03:15" "01:03:15"

[[3]]
[1] "01:22:30" "01:22:30"

[[4]]
[1] "00:09:33" "00:09:33"

[[5]]
[1] "00:09:34" "00:09:34"



## <strong>Not run</strong>: 
# if (!require("pacman")) install.packages("pacman")
# pacman::p_load(chron)
# lapply(as_time(out), chron::times)
# lapply(as_time(out, , FALSE), chron::times)
# ## <strong>End(Not run)</strong>

gsub, stri_extract_all_regex Other rm_.functions: as_numeric, as_numeric2, rm_number; rm_abbreviation; rm_angle, rm_bracket, rm_bracket_multiple, rm_curly, rm_round, rm_square; rm_between, rm_between_multiple; rm_caps_phrase; rm_caps; rm_citation_tex; rm_citation; rm_city_state_zip; rm_city_state; rm_date; rm_default; rm_dollar; rm_email; rm_emoticon; rm_endmark; rm_hash; rm_nchar_words; rm_non_ascii; rm_non_words; rm_percent; rm_phone; rm_postal_code; rm_repeated_characters; rm_repeated_phrases; rm_repeated_words; rm_tag; rm_title_name; rm_twitter_url, rm_url; rm_white, rm_white_bracket, rm_white_colon, rm_white_comma, rm_white_endmark, rm_white_lead, rm_white_lead_trail, rm_white_multiple, rm_white_punctuation, rm_white_trail; rm_zip Other rm_.functions: as_numeric, as_numeric2, rm_number; rm_abbreviation; rm_angle, rm_bracket, rm_bracket_multiple, rm_curly, rm_round, rm_square; rm_between, rm_between_multiple; rm_caps_phrase; rm_caps; rm_citation_tex; rm_citation; rm_city_state_zip; rm_city_state; rm_date; rm_default; rm_dollar; rm_email; rm_emoticon; rm_endmark; rm_hash; rm_nchar_words; rm_non_ascii; rm_non_words; rm_percent; rm_phone; rm_postal_code; rm_repeated_characters; rm_repeated_phrases; rm_repeated_words; rm_tag; rm_title_name; rm_twitter_url, rm_url; rm_white, rm_white_bracket, rm_white_colon, rm_white_comma, rm_white_endmark, rm_white_lead, rm_white_lead_trail, rm_white_multiple, rm_white_punctuation, rm_white_trail; rm_zip

Author

stackoverflow's hwnd and Tyler Rinker .

Remove/Replace/Extract Time