read_docx. qdapTools 1.3.1

Usage

read_docx(file, skip = 0)

Arguments

file: The path to the .docx file.
skip: The number of lines to skip.

Value

Returns a character vector.

Description

Read in the content from a .docx file.

Examples

## <strong>Not run</strong>: 
# ## Mining Citation
# url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")
# 
# (txt <- read_docx("whole_language_timeline-updated.docx"))
# 
# library(qdapTools); library(ggplot2); library(qdap)
# txt <- rm_non_ascii(txt)
# 
# parts <- split_vector(txt, split = "References", include = TRUE, regex=TRUE)
# 
# parts[[1]]
# 
# rm_citation(unbag(parts[[1]]), extract=TRUE)[[1]]
# 
# ## By line
# rm_citation(parts[[1]], extract=TRUE)
# 
# ## Frequency
# left_just(cites <- list2df(sort(table(rm_citation(unbag(parts[[1]]),
#     extract=TRUE)), T), "freq", "citation")[2:1])
# 
# ## Distribution of citations (find locations and then plot)
# cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
#     m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
#     data.frame(
#         citation=x,
#         start = m[[1]] -5,
#         end =  m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
#     )
# }))
# 
# ggplot(cite_locs) +
#     geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
#         color="yellow") +
#     xlab("Duration") +
#     scale_x_continuous(expand = c(0,0),
#         limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
#     theme_grey() +
#     theme(
#         panel.grid.major=element_line(color="grey20"),
#         panel.grid.minor=element_line(color="grey20"),
#         plot.background = element_rect(fill="black"),
#         panel.background = element_rect(fill="black"),
#         panel.border = element_rect(colour = "grey50", fill=NA, size=1),
#         axis.text=element_text(color="grey50"),
#         axis.title=element_text(color="grey50")
#     )
# ## <strong>End(Not run)</strong>

Author

Bryan Goodrich and Tyler Rinker .

Read in .docx Content

Usage

Arguments

Value

Description

Examples

Author