rm_number(text.var, trim = !extract, clean = TRUE, pattern = "@rm_number", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)as_numeric(x)as_numeric2(x)
TRUE removes leading and trailing white
spaces.TRUE extra white spaces and escaped
character will be removed.fixed = TRUE) to be matched in the given
character vector. Default, @rm_number uses the
rm_number regex from the regular expression dictionary from
the dictionary argument.pattern.TRUE the numbers are extracted into a
list of vectors.pattern begins with "@rm_".gsub.rm_number - Returns a character string with number removed.
as_numeric - Returns a list of vectors of numbers.
as_numeric2 - Returns an unlisted vector of numbers.
rm_number - Remove/replace/extract number from a string (works on
numbers with commas, decimals and negatives).
as_numeric - A wrapper for as.numeric(gsub(",", "", x)), which
removes commas and converts a list of vectors of strings to numeric. If the
string cannot be converted to numeric NA is returned.
as_numeric2 - A convenience function for as_numeric that
unlists and returns a vector rather than a list.
The number regular expression was created by Jason Gray.
x <- c("-2 is an integer. -4.3 and 3.33 are not.", "123,456 is 0 alot -123456 more than -.2", "and 3456789123 fg for 345.", "fg 12,345 23 .44 or 18.", "don't remove this 444,44", "hello world -.q") rm_number(x)[1] "is an integer. and are not." "is alot more than" "and fg for ." "fg or ." [5] "don't remove this 444,44" "hello world -.q"rm_number(x, extract=TRUE)[[1]] [1] "-2" "-4.3" "3.33" [[2]] [1] "123,456" "0" "-123456" "-.2" [[3]] [1] "3456789123" "345" [[4]] [1] "12,345" "23" ".44" "18" [[5]] [1] NA [[6]] [1] NA##Convert to numeric as_numeric(rm_number(x, extract=TRUE)) # retain list[[1]] [1] -2.00 -4.30 3.33 [[2]] [1] 123456.0 0.0 -123456.0 -0.2 [[3]] [1] 3456789123 345 [[4]] [1] 12345.00 23.00 0.44 18.00 [[5]] [1] NA [[6]] [1] NAas_numeric2(rm_number(x, extract=TRUE)) # unlist[1] -2.00 -4.30 3.33 123456.00 0.00 -123456.00 -0.20 3456789123.00 345.00 [10] 12345.00 23.00 0.44 18.00 NA NA
gsub,
stri_extract_all_regex
Other rm_.functions: as_time,
as_time2, rm_time,
rm_transcript_time;
rm_abbreviation; rm_angle,
rm_bracket,
rm_bracket_multiple,
rm_curly, rm_round,
rm_square; rm_between,
rm_between_multiple;
rm_caps_phrase; rm_caps;
rm_citation_tex; rm_citation;
rm_city_state_zip;
rm_city_state; rm_date;
rm_default; rm_dollar;
rm_email; rm_emoticon;
rm_endmark; rm_hash;
rm_nchar_words; rm_non_ascii;
rm_non_words; rm_percent;
rm_phone; rm_postal_code;
rm_repeated_characters;
rm_repeated_phrases;
rm_repeated_words; rm_tag;
rm_title_name;
rm_twitter_url, rm_url;
rm_white, rm_white_bracket,
rm_white_colon,
rm_white_comma,
rm_white_endmark,
rm_white_lead,
rm_white_lead_trail,
rm_white_multiple,
rm_white_punctuation,
rm_white_trail; rm_zip