read.transcript(file, col.names = NULL, text.var = NULL, merge.broke.tot = TRUE, header = FALSE, dash = "", ellipsis = "...", quote2bracket = FALSE, rm.empty.rows = TRUE, na.strings = c("999", "NA", "", " "), sep = NULL, skip = 0, nontext2factor = TRUE, text, comment.char = "", ...)
getwd()
.NULL
read.transcript
attempts to guess the text.variable
(dialogue).TRUE
and if the file being read in
is .docx with broken space between a single turn of talk read.transcript
will attempt to merge these into a single turn of talk.TRUE
the file contains the names of the
variables as its first line.TRUE
replaces curly quotes with curly
braces (default is FALSE
). If FALSE
curly quotes are removed.TRUE
read.transcript
attempts to remove empty rows.NA
values.NULL
instructs
read.transcript
to use a separator suitable for the file
type being read in.TRUE
attempts to convert any
non-text to a factor.""
to turn off the interpretation of
comments altogether.read.table
.Returns a dataframe of dialogue and people.
Read .docx, .csv or .xlsx files into R.
If a transcript is a .docx file read transcript expects two columns (generally person and dialogue) with some sort of separator (default is colon separator). .doc files must be converted to .docx before reading in.
read.transcript
may contain errors if the
file being read in is .docx. The researcher should carefully investigate
each transcript for errors before further parsing the data.
## <strong>Not run</strong>: # #Note: to view the document below use the path: # system.file("extdata/transcripts/", package = "qdap") # (doc1 <- system.file("extdata/transcripts/trans1.docx", package = "qdap")) # (doc2 <- system.file("extdata/transcripts/trans2.docx", package = "qdap")) # (doc3 <- system.file("extdata/transcripts/trans3.docx", package = "qdap")) # (doc4 <- system.file("extdata/transcripts/trans4.xlsx", package = "qdap")) # # dat1 <- read.transcript(doc1) # truncdf(dat1, 40) # dat2 <- read.transcript(doc1, col.names = c("person", "dialogue")) # truncdf(dat2, 40) # dat2b <- rm_row(dat2, "person", "[C") #remove bracket row # truncdf(dat2b, 40) # # ## read.transcript(doc2) #throws an error (need skip) # dat3 <- read.transcript(doc2, skip = 1); truncdf(dat3, 40) # # ## read.transcript(doc3, skip = 1) #incorrect read; wrong sep # dat4 <- read.transcript(doc3, sep = "-", skip = 1); truncdf(dat4, 40) # # dat5 <- read.transcript(doc4); truncdf(dat5, 40) #an .xlsx file # trans <- "sam: Computer is fun. Not too fun. # greg: No it's not, it's dumb. # teacher: What should we do? # sam: You liar, it stinks!" # # read.transcript(text=trans) # # ## Read in text specify spaces as sep # ## EXAMPLE 1 # # read.transcript(text="34 The New York Times reports a lot of words here. # 12 Greenwire reports a lot of words. # 31 Only three words. # 2 The Financial Times reports a lot of words. # 9 Greenwire short. # 13 The New York Times reports a lot of words again.", # col.names=qcv(NO, ARTICLE), sep=" ") # # ## EXAMPLE 2 # # read.transcript(text="34.. The New York Times reports a lot of words here. # 12.. Greenwire reports a lot of words. # 31.. Only three words. # 2.. The Financial Times reports a lot of words. # 9.. Greenwire short. # 13.. The New York Times reports a lot of words again.", # col.names=qcv(NO, ARTICLE), sep="\\.\\.") # ## <strong>End(Not run)</strong>
dir_map