rm_number(text.var, trim = !extract, clean = TRUE, pattern = "@rm_number", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)as_numeric(x)as_numeric2(x)
TRUE
removes leading and trailing white
spaces.TRUE
extra white spaces and escaped
character will be removed.fixed = TRUE
) to be matched in the given
character vector. Default, @rm_number
uses the
rm_number
regex from the regular expression dictionary from
the dictionary
argument.pattern
.TRUE
the numbers are extracted into a
list of vectors.pattern
begins with "@rm_"
.gsub
.rm_number
- Returns a character string with number removed.
as_numeric
- Returns a list of vectors of numbers.
as_numeric2
- Returns an unlisted vector of numbers.
rm_number
- Remove/replace/extract number from a string (works on
numbers with commas, decimals and negatives).
as_numeric
- A wrapper for as.numeric(gsub(",", "", x))
, which
removes commas and converts a list of vectors of strings to numeric. If the
string cannot be converted to numeric NA
is returned.
as_numeric2
- A convenience function for as_numeric
that
unlists and returns a vector rather than a list.
The number regular expression was created by Jason Gray.
x <- c("-2 is an integer. -4.3 and 3.33 are not.", "123,456 is 0 alot -123456 more than -.2", "and 3456789123 fg for 345.", "fg 12,345 23 .44 or 18.", "don't remove this 444,44", "hello world -.q") rm_number(x)[1] "is an integer. and are not." "is alot more than" "and fg for ." "fg or ." [5] "don't remove this 444,44" "hello world -.q"rm_number(x, extract=TRUE)[[1]] [1] "-2" "-4.3" "3.33" [[2]] [1] "123,456" "0" "-123456" "-.2" [[3]] [1] "3456789123" "345" [[4]] [1] "12,345" "23" ".44" "18" [[5]] [1] NA [[6]] [1] NA##Convert to numeric as_numeric(rm_number(x, extract=TRUE)) # retain list[[1]] [1] -2.00 -4.30 3.33 [[2]] [1] 123456.0 0.0 -123456.0 -0.2 [[3]] [1] 3456789123 345 [[4]] [1] 12345.00 23.00 0.44 18.00 [[5]] [1] NA [[6]] [1] NAas_numeric2(rm_number(x, extract=TRUE)) # unlist[1] -2.00 -4.30 3.33 123456.00 0.00 -123456.00 -0.20 3456789123.00 345.00 [10] 12345.00 23.00 0.44 18.00 NA NA
gsub
,
stri_extract_all_regex
Other rm_.functions: as_time
,
as_time2
, rm_time
,
rm_transcript_time
;
rm_abbreviation
; rm_angle
,
rm_bracket
,
rm_bracket_multiple
,
rm_curly
, rm_round
,
rm_square
; rm_between
,
rm_between_multiple
;
rm_caps_phrase
; rm_caps
;
rm_citation_tex
; rm_citation
;
rm_city_state_zip
;
rm_city_state
; rm_date
;
rm_default
; rm_dollar
;
rm_email
; rm_emoticon
;
rm_endmark
; rm_hash
;
rm_nchar_words
; rm_non_ascii
;
rm_non_words
; rm_percent
;
rm_phone
; rm_postal_code
;
rm_repeated_characters
;
rm_repeated_phrases
;
rm_repeated_words
; rm_tag
;
rm_title_name
;
rm_twitter_url
, rm_url
;
rm_white
, rm_white_bracket
,
rm_white_colon
,
rm_white_comma
,
rm_white_endmark
,
rm_white_lead
,
rm_white_lead_trail
,
rm_white_multiple
,
rm_white_punctuation
,
rm_white_trail
; rm_zip