read.table()and subfunctionswrite.table()andwrite.csv()readLines()writeLines()- more about
save()
Feb 9, 2026
read.table() and subfunctionswrite.table() and write.csv()readLines()writeLines()save()read.table()
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
file = file name you want to read in as a character (in quotes)header = does it have a header? (T or F)sep = what symbol is separating the values in each columnread.table()
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
quote = What characters are used to denote quotes?dec = What’s used to denote decimals? Usually “.” or “,”.row.names = which column are the rownames in?col.names = specify column names directlyread.table()
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
as.is = Which columns to keep strings as factors (prevent conversion)?na.strings = What character symbols should be interpreted as NAs?colClasses = A vector of classes to be assumed for columnsread.table()
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
nrows = How many total rows to read in (after the header, if applicable)skip = How many rows to skip before start reading. This includes skipping the header.check.names = Makes sure names are valid column names. No special characters, etc.read.table()
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = FALSE,
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
fill = Should missing values be filled in?strip.white = Should spaces or tabs before or after values be removed?blank.lines.skip = Should blank rows be ignored?read.table()If you know how they work, you will save time by avoiding typing, find-and-replacing, etc.
read.csv() and read.delim()“.csv” = comma-separated values
read.csv(file, header = TRUE, sep = ",", quote = "\"",
dec = ".", fill = TRUE, comment.char = "", ...)
Tab-delimited values
read.delim(file, header = TRUE, sep = "\t", quote = "\"",
dec = ".", fill = TRUE, comment.char = "", ...)
read.table()Access the object read.csv.
read.csv
## function (file, header = TRUE, sep = ",", quote = "\"", dec = ".", ## fill = TRUE, comment.char = "", ...) ## read.table(file = file, header = header, sep = sep, quote = quote, ## dec = dec, fill = fill, comment.char = comment.char, ...) ## <bytecode: 0x140683ba0> ## <environment: namespace:utils>
List all files in the directory with the extension “.csv”
list.files(pattern=".csv")
## [1] "Puromycin_data.csv"
“The Puromycin data frame has 23 rows and 3 columns of the reaction velocity versus substrate concentration in an enzymatic reaction involving untreated cells or cells treated with Puromycin.”—R help
Written as a “.csv” file
"","conc","rate","state" "1",0.02,76,"treated" "2",0.02,47,"treated" "3",0.06,97,"treated" "4",0.06,107,"treated" "5",0.11,123,"treated" "6",0.11,139,"treated" "7",0.22,159,"treated" "8",0.22,152,"treated" "9",0.56,191,"treated" "10",0.56,201,"treated" "11",1.1,207,"treated" "12",1.1,200,"treated" "13",0.02,67,"untreated" "14",0.02,51,"untreated" "15",0.06,84,"untreated" "16",0.06,86,"untreated" "17",0.11,98,"untreated" "18",0.11,115,"untreated" "19",0.22,131,"untreated" "20",0.22,124,"untreated" "21",0.56,144,"untreated" "22",0.56,158,"untreated" "23",1.1,160,"untreated"
pur<-read.csv("Puromycin_data.csv", row.names=1)
head(pur)
## conc rate state ## 1 0.02 76 treated ## 2 0.02 47 treated ## 3 0.06 97 treated ## 4 0.06 107 treated ## 5 0.11 123 treated ## 6 0.11 139 treated
Change to tab-delimted text using write.table()
write.table(pur, file = "Puromycin_data.delim", row.names=F, sep= "\t")
write.delim() function in base R."conc" "rate" "state" 0.02 76 "treated" 0.02 47 "treated" 0.06 97 "treated" 0.06 107 "treated" 0.11 123 "treated" 0.11 139 "treated" 0.22 159 "treated" 0.22 152 "treated" 0.56 191 "treated" 0.56 201 "treated" 1.1 207 "treated" 1.1 200 "treated" 0.02 67 "untreated" 0.02 51 "untreated" 0.06 84 "untreated" 0.06 86 "untreated" 0.11 98 "untreated" 0.11 115 "untreated" 0.22 131 "untreated" 0.22 124 "untreated" 0.56 144 "untreated" 0.56 158 "untreated" 1.1 160 "untreated"
readLines()What if we just downloaded a massive file with an unknown extension, but we think it’s text.
file.info("usearch.otus.97.tax")
## size isdir mode mtime ctime ## usearch.otus.97.tax 16315973 FALSE 755 2026-01-09 14:30:06 2026-01-09 14:30:06 ## atime uid gid uname grname ## usearch.otus.97.tax 2026-01-09 14:43:28 502 20 hgreen staff
readLines()tax<-readLines("usearch.otus.97.tax", n=4)
head(tax)
## [1] "Cluster_ID\tTaxonomy\tRank\tCluster_Size\tMin_GDist\tAvg_GDist\tVote\tMin_rank\tTaxa_counts\tMax_pcts\tNa_pcts" ## [2] "Cluster0\tBacteria;Proteobacteria;Betaproteobacteria;Burkholderiales;Alcaligenaceae;Alcaligenes\tgenus\t120779\t0\t0\t100\tgenus\t1;1;1;1;1;1;0;0\t100;100;100;100;100;100;0;0\t0;0;0;0;0;0;100;100" ## [3] "Cluster1\tBacteria;Firmicutes;Clostridia;Clostridiales;Lachnospiraceae;Roseburia\tgenus\t61062\t0\t0\t100\tgenus\t1;1;1;1;1;1;0;0\t100;100;100;100;100;100;0;0\t0;0;0;0;0;0;100;100" ## [4] "Cluster10\tBacteria;Actinobacteria;Actinobacteria;Bifidobacteriales;Bifidobacteriaceae;Bifidobacterium\tgenus\t26131\t0\t0\t100\tgenus\t1;1;1;1;1;1;0;0\t100;100;100;100;100;100;0;0\t0;0;0;0;0;0;100;100"
readLines() can be used to quickly sample and explore unfamiliar datacat() and writeLines()cat() = raw flexible output to file or consolemonth.abb[1:4]
## [1] "Jan" "Feb" "Mar" "Apr"
cat(month.abb[1:4])
## Jan Feb Mar Apr
cat() and writeLines()writeLines() = similar to cat(), but default sep = "\n"month.abb[1:4]
## [1] "Jan" "Feb" "Mar" "Apr"
writeLines(month.abb[1:4])
## Jan ## Feb ## Mar ## Apr
cat() is probably not what you want unless you specify sep.Read in “tax” file:
system.time(tax<-read.delim("usearch.otus.97.tax"))
dim(tax)
object.size(tax)
## user system elapsed ## 0.153 0.006 0.160 ## [1] 93488 11 ## 13883864 bytes
save(tax, file = "TAX_data_GZIP.Rdata", compress="gzip")
list.files()
## [1] "custom.css" "Importing and Exporting data.pdf" ## [3] "Importing-Exporting-Data.html" "Importing-Exporting-Data.Rmd" ## [5] "Puromycin_data.csv" "Puromycin_data.delim" ## [7] "qPCR-data.Rdata" "qPCR-data.txt" ## [9] "TAX_data_GZIP.Rdata" "usearch.otus.97.tax"
Compressed R object 10X smaller than original text file
file.info("usearch.otus.97.tax")[1] / 10^6 #Original text file
file.info("TAX_data_GZIP.Rdata")[1]/ 10^6 # Compressed R object
## size ## usearch.otus.97.tax 16.31597 ## size ## TAX_data_GZIP.Rdata 1.223305
read.table() and related functions.RExcel, xlsx, XLConnect, or gdata.qPCR-data.txt file into R removing the metadata at the top of the file.qPCR-data.txt file into R removing the metadata at the top of the file.I did it this way:
dat<-read.delim("qPCR-data.txt", skip=41, na.strings="Undetermined")
2.Find the median “CT” of all wells.
head(dat[, 1:10])
## Well Well.Position Omit Sample.Name Target.Name Task Reporter Quencher ## 1 1 A1 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## 2 2 A2 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## 3 3 A3 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## 4 4 A4 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## 5 5 A5 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## 6 6 A6 FALSE Sample 1 HF183 STANDARD FAM NFQ-MGB ## CT Quantity ## 1 NA 1 ## 2 NA 1 ## 3 NA 1 ## 4 19.215 1 ## 5 18.648 1 ## 6 18.506 1
median(dat$CT, na.rm=T)
## [1] 29.234
3.Save the data frame as a compressed R object.
save(dat, file="qPCR-data.Rdata", compress = "gzip") list.files()
## [1] "custom.css" "Importing and Exporting data.pdf" ## [3] "Importing-Exporting-Data.html" "Importing-Exporting-Data.Rmd" ## [5] "Puromycin_data.csv" "Puromycin_data.delim" ## [7] "qPCR-data.Rdata" "qPCR-data.txt" ## [9] "TAX_data_GZIP.Rdata" "usearch.otus.97.tax"