rm_citation(text.var, trim = !extract, clean = TRUE, pattern = "@rm_citation", replacement = "", extract = FALSE, dictionary = getOption("regex.library"), ...)
TRUE
removes leading and trailing white
spaces.TRUE
extra white spaces and escaped
character will be removed.fixed = TRUE
) to be matched in the given
character vector (see Details for additional information). Default,
@rm_citation
uses the rm_citation
regex from the regular
expression dictionary from the dictionary
argument.pattern
.TRUE
the dates are extracted into a
list of vectors.pattern
begins with "@rm_"
.Remove/replace/extract APA6 style citations from a string.
The default regular expression used by rm_citation
finds
in-text and parenthetical citations. This behavior can be altered by using a
secondary regular expression from the regex_usa
data (or other dictionary) via (pattern = "@rm_citation2"
or
pattern = "@rm_citation3"
). See Examples for example usage.
## All Citations x <- c("Hello World (V. Raptor, 1986) bye", "Narcissism is not dead (Rinker, 2014)", "The R Core Team (2014) has many members.", paste("Bunn (2005) said, \"As for elegance, R is refined, tasteful, and", "beautiful. When I grow up, I want to marry R.\""), "It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).", "Wickham's (in press) Tidy Data should be out soon.", "Rinker's (n.d.) dissertation not so much.", "I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).", "Uwe Ligges (2007) says, \"RAM is cheap and thinking hurts\"" ) rm_citation(x)[1] "Hello World () bye" [2] "Narcissism is not dead ()" [3] "has many members." [4] "said, \"As for elegance, R is refined, tasteful, and beautiful. When I grow up, I want to marry R.\"" [5] "It is wrong to blame ANY tool for our own shortcomings ()." [6] "Tidy Data should be out soon." [7] "dissertation not so much." [8] "I always consult xkcd comics for guidance (; )." [9] "says, \"RAM is cheap and thinking hurts\""rm_citation(x, extract=TRUE)[[1]] [1] "V. Raptor, 1986" [[2]] [1] "Rinker, 2014" [[3]] [1] "The R Core Team (2014)" [[4]] [1] "Bunn (2005)" [[5]] [1] "Baer, 2005" [[6]] [1] "Wickham's (in press)" [[7]] [1] "Rinker's (n.d.)" [[8]] [1] "Foo, 2012" "Bar, 2014" [[9]] [1] "Uwe Ligges (2007)"rm_citation(x, replacement="[CITATION HERE]")[1] "Hello World ([CITATION HERE]) bye" [2] "Narcissism is not dead ([CITATION HERE])" [3] "[CITATION HERE] has many members." [4] "[CITATION HERE] said, \"As for elegance, R is refined, tasteful, and beautiful. When I grow up, I want to marry R.\"" [5] "It is wrong to blame ANY tool for our own shortcomings ([CITATION HERE])." [6] "[CITATION HERE] Tidy Data should be out soon." [7] "[CITATION HERE] dissertation not so much." [8] "I always consult xkcd comics for guidance ([CITATION HERE]; [CITATION HERE])." [9] "[CITATION HERE] says, \"RAM is cheap and thinking hurts\""## <strong>Not run</strong>: # qdapTools::vect2df(sort(table(unlist(rm_citation(x, extract=TRUE)))), # "citation", "count") # ## <strong>End(Not run)</strong> ## In-Text rm_citation(x, extract=TRUE, pattern="@rm_citation2")[[1]] [1] NA [[2]] [1] NA [[3]] [1] "The R Core Team (2014)" [[4]] [1] "Bunn (2005)" [[5]] [1] NA [[6]] [1] "Wickham's (in press)" [[7]] [1] "Rinker's (n.d.)" [[8]] [1] NA [[9]] [1] "Uwe Ligges (2007)"## Parenthetical rm_citation(x, extract=TRUE, pattern="@rm_citation3")[[1]] [1] "V. Raptor, 1986" [[2]] [1] "Rinker, 2014" [[3]] [1] NA [[4]] [1] NA [[5]] [1] "Baer, 2005" [[6]] [1] NA [[7]] [1] NA [[8]] [1] "Foo, 2012" "Bar, 2014" [[9]] [1] NA## <strong>Not run</strong>: # ## Mining Citation # url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx") # # (txt <- read_docx("whole_language_timeline-updated.docx")) # # library(qdapTools); library(ggplot2); library(qdap) # txt <- rm_non_ascii(txt) # # parts <- split_vector(txt, split = "References", include = TRUE, regex=TRUE) # # parts[[1]] # # rm_citation(unbag(parts[[1]]), extract=TRUE)[[1]] # # ## By line # rm_citation(parts[[1]], extract=TRUE) # # ## Frequency # left_just(cites <- list2df(sort(table(rm_citation(unbag(parts[[1]]), # extract=TRUE)), T), "freq", "citation")[2:1]) # # ## Distribution of citations (find locations and then plot) # cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){ # m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE) # data.frame( # citation=x, # start = m[[1]] -5, # end = m[[1]] + 5 + attributes(m[[1]])[["match.length"]] # ) # })) # # ggplot(cite_locs) + # geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3, # color="yellow") + # xlab("Duration") + # scale_x_continuous(expand = c(0,0), # limits = c(0, nchar(unbag(parts[[1]])) + 25)) + # theme_grey() + # theme( # panel.grid.major=element_line(color="grey20"), # panel.grid.minor=element_line(color="grey20"), # plot.background = element_rect(fill="black"), # panel.background = element_rect(fill="black"), # panel.border = element_rect(colour = "grey50", fill=NA, size=1), # axis.text=element_text(color="grey50"), # axis.title=element_text(color="grey50") # ) # ## <strong>End(Not run)</strong>
gsub
,
stri_extract_all_regex
Other rm_.functions: as_numeric
,
as_numeric2
, rm_number
;
as_time
, as_time2
,
rm_time
, rm_transcript_time
;
rm_abbreviation
; rm_angle
,
rm_bracket
,
rm_bracket_multiple
,
rm_curly
, rm_round
,
rm_square
; rm_between
,
rm_between_multiple
;
rm_caps_phrase
; rm_caps
;
rm_citation_tex
;
rm_city_state_zip
;
rm_city_state
; rm_date
;
rm_default
; rm_dollar
;
rm_email
; rm_emoticon
;
rm_endmark
; rm_hash
;
rm_nchar_words
; rm_non_ascii
;
rm_non_words
; rm_percent
;
rm_phone
; rm_postal_code
;
rm_repeated_characters
;
rm_repeated_phrases
;
rm_repeated_words
; rm_tag
;
rm_title_name
;
rm_twitter_url
, rm_url
;
rm_white
, rm_white_bracket
,
rm_white_colon
,
rm_white_comma
,
rm_white_endmark
,
rm_white_lead
,
rm_white_lead_trail
,
rm_white_multiple
,
rm_white_punctuation
,
rm_white_trail
; rm_zip