Description
This is a workflow of using DADA2 to do feature(otu) picking on demultiplexed 16S sequencing data. This workflow should be ran after you run the 16S Amplicon Demultiplex Workflow
To run this workflow, you need to have R, Rstudio, and the package dada2 installed in your computer. To install dada2, run the following commands.
For more detail, please read the dada2 tutorial on: https://benjjneb.github.io/dada2/tutorial.html
source("http://bioconductor.org/biocLite.R")
biocLite("dada2")
Step 1. load the library
rm(list=ls())
library("dada2", quietly=TRUE, verbose=FALSE, warn.conflicts=FALSE)
cat(paste("dada2 version:", packageVersion("dada2")))
Step 2. Create a file list
## set your working directory here
setwd("/Users/chenghaozhu/sequence_data/FF/fastq")
## path to your fastq files
path <- paste0(getwd(),"/alldemultx")
fnFs <- list.files(path,pattern="R1",full.names=T)
fnRs <- list.files(path,pattern="R2",full.names=T)
sample_id = str_split(list.files(path,pattern="R1"),"_R1",simplify=T)[,1]
filt_path <- file.path(path,"filtered")
filtFs <- file.path(filt_path, paste0(sample_id,"_R1_filt.fastq"))
filtRs <- file.path(filt_path, paste0(sample_id,"_R2_filt.fastq"))
Step 3. Filter and Trim
- They only parameter you should specify in this step is the truncLen. The first number is the length to truncate for R1 and the second number is for R2. You can get the truncate length from the last step of the 16S Amplicon Demultiplex Workflow using fastQC
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(195,160),
maxN=0,maxEE=c(2,2), truncQ=2,
rm.phix=T, compress=T, multithread=T)
Step 4. Learn the Error Rates
- This is a very time consuming step. Each step should spend around 20 ~ 40 minutes depends on your computer. If you see a message of “failed to convergence after 6 times of calculation” or something similar, increase the MAX_CONSIST number.
errF <- learnErrors(filtFs, multithread=TRUE, MAX_CONSIST = 20)
errR <- learnErrors(filtRs, multithread=TRUE, MAX_CONSIST = 30)
dada2:::checkConvergence(errF)
dada2:::checkConvergence(errR)
plotErrors(errF, nominalQ=TRUE)
plotErrors(errR, nominalQ=TRUE)
Step 5. Dereplication
derepFs <- derepFastq(filtFs, verbose=TRUE)
derepRs <- derepFastq(filtRs, verbose=TRUE)
# Name the derep-class objects by the sample names
names(derepFs) <- sample_id
names(derepRs) <- sample_id
Step 6. Sample Inference
dadaFs <- dada(derepFs, err=errF, multithread=T)
dadaRs <- dada(derepRs, err=errR, multithread=T)
Step 7. Merge Paired Reads
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE)
Step 8. Construct Sequence Table
seqtab <- makeSequenceTable(mergers)
Step 9. Remove Chimeras
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
dim(seqtab.nochim)
Step 10
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(mergers, getN), rowSums(seqtab), rowSums(seqtab.nochim))
# If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
colnames(track) <- c("input", "filtered", "denoised", "merged", "tabled", "nonchim")
rownames(track) <- sample_id
head(track)
Step 11. Asign Taxonomy
- In this step, you need to have the silva database available. The most recent version of silva is 132
## Asign Taxonomy
taxa <- assignTaxonomy(seqtab.nochim,
"Training/silva_nr_v128_train_set.fa.gz",
multithread=TRUE, verbose=T,tryRC=T)
## Add Species
taxa <- addSpecies(taxa, "Training/silva_species_assignment_v128.fa.gz",
tryRC=T,verbose=T)
taxa.print <- taxa # Removing sequence rownames for display only
rownames(taxa.print) <- NULL
head(taxa.print)
Step 12. Save
Save the data. The data cleaning will be done in another script/rmarkdown
save.image(file='dada2.Rdata')
LS0tCnRpdGxlOiAiMTZTIFNlcXVlbmNpbmcgRGF0YSBQcm9jZXNzaW5nIERBREEyIFdvcmtmbG93IgphdXRob3I6ICJDaGVuZ2hhbyBaaHUiCmRhdGU6ICIzLzcvMjAxOCIKb3V0cHV0OiBodG1sX25vdGVib29rCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKIyMgRGVzY3JpcHRpb24KCiogVGhpcyBpcyBhIHdvcmtmbG93IG9mIHVzaW5nIERBREEyIHRvIGRvIGZlYXR1cmUob3R1KSBwaWNraW5nIG9uIGRlbXVsdGlwbGV4ZWQgMTZTIHNlcXVlbmNpbmcgZGF0YS4gVGhpcyB3b3JrZmxvdyBzaG91bGQgYmUgcmFuIGFmdGVyIHlvdSBydW4gdGhlICoxNlMgQW1wbGljb24gRGVtdWx0aXBsZXggV29ya2Zsb3cqCgoqIFRvIHJ1biB0aGlzIHdvcmtmbG93LCB5b3UgbmVlZCB0byBoYXZlIFIsIFJzdHVkaW8sIGFuZCB0aGUgcGFja2FnZSBkYWRhMiBpbnN0YWxsZWQgaW4geW91ciBjb21wdXRlci4gVG8gaW5zdGFsbCBkYWRhMiwgcnVuIHRoZSBmb2xsb3dpbmcgY29tbWFuZHMuCgoqIEZvciBtb3JlIGRldGFpbCwgcGxlYXNlIHJlYWQgdGhlIGRhZGEyIHR1dG9yaWFsIG9uOgpodHRwczovL2JlbmpqbmViLmdpdGh1Yi5pby9kYWRhMi90dXRvcmlhbC5odG1sCgpgYGB7ciwgZXZhbCA9IEZ9CnNvdXJjZSgiaHR0cDovL2Jpb2NvbmR1Y3Rvci5vcmcvYmlvY0xpdGUuUiIpCmJpb2NMaXRlKCJkYWRhMiIpCmBgYAoKIyMgU3RlcCAxLiBsb2FkIHRoZSBsaWJyYXJ5CgpgYGB7ciBwYWNrYWdlcyx3YXJuaW5nID1GLCBlcnJvciA9IEYsIG1lc3NhZ2U9Rn0Kcm0obGlzdD1scygpKQpsaWJyYXJ5KCJkYWRhMiIsIHF1aWV0bHk9VFJVRSwgdmVyYm9zZT1GQUxTRSwgd2Fybi5jb25mbGljdHM9RkFMU0UpCmNhdChwYXN0ZSgiZGFkYTIgdmVyc2lvbjoiLCBwYWNrYWdlVmVyc2lvbigiZGFkYTIiKSkpCmBgYAoKIyMgU3RlcCAyLiBDcmVhdGUgYSBmaWxlIGxpc3QKCmBgYHtyfQojIyBzZXQgeW91ciB3b3JraW5nIGRpcmVjdG9yeSBoZXJlCnNldHdkKCIvVXNlcnMvY2hlbmdoYW96aHUvc2VxdWVuY2VfZGF0YS9GRi9mYXN0cSIpCiMjIHBhdGggdG8geW91ciBmYXN0cSBmaWxlcyAKcGF0aCA8LSBwYXN0ZTAoZ2V0d2QoKSwiL2FsbGRlbXVsdHgiKQpmbkZzIDwtIGxpc3QuZmlsZXMocGF0aCxwYXR0ZXJuPSJSMSIsZnVsbC5uYW1lcz1UKQpmblJzIDwtIGxpc3QuZmlsZXMocGF0aCxwYXR0ZXJuPSJSMiIsZnVsbC5uYW1lcz1UKQpzYW1wbGVfaWQgPSBzdHJfc3BsaXQobGlzdC5maWxlcyhwYXRoLHBhdHRlcm49IlIxIiksIl9SMSIsc2ltcGxpZnk9VClbLDFdCgpmaWx0X3BhdGggPC0gZmlsZS5wYXRoKHBhdGgsImZpbHRlcmVkIikKZmlsdEZzIDwtIGZpbGUucGF0aChmaWx0X3BhdGgsIHBhc3RlMChzYW1wbGVfaWQsIl9SMV9maWx0LmZhc3RxIikpCmZpbHRScyA8LSBmaWxlLnBhdGgoZmlsdF9wYXRoLCBwYXN0ZTAoc2FtcGxlX2lkLCJfUjJfZmlsdC5mYXN0cSIpKQpgYGAKCiMjIFN0ZXAgMy4gRmlsdGVyIGFuZCBUcmltCgoqIFRoZXkgb25seSBwYXJhbWV0ZXIgeW91IHNob3VsZCBzcGVjaWZ5IGluIHRoaXMgc3RlcCBpcyB0aGUgdHJ1bmNMZW4uIFRoZSBmaXJzdCBudW1iZXIgaXMgdGhlIGxlbmd0aCB0byB0cnVuY2F0ZSBmb3IgUjEgYW5kIHRoZSBzZWNvbmQgbnVtYmVyIGlzIGZvciBSMi4gWW91IGNhbiBnZXQgdGhlIHRydW5jYXRlIGxlbmd0aCBmcm9tIHRoZSBsYXN0IHN0ZXAgb2YgdGhlICoxNlMgQW1wbGljb24gRGVtdWx0aXBsZXggV29ya2Zsb3cqIHVzaW5nIGZhc3RRQwoKYGBge3IgZmlsaXRlciBhbmQgdHJpbX0Kb3V0IDwtIGZpbHRlckFuZFRyaW0oZm5GcywgZmlsdEZzLCBmblJzLCBmaWx0UnMsIHRydW5jTGVuPWMoMTk1LDE2MCksCiAgICAgICAgICAgICAgICAgICAgIG1heE49MCxtYXhFRT1jKDIsMiksIHRydW5jUT0yLAogICAgICAgICAgICAgICAgICAgICBybS5waGl4PVQsIGNvbXByZXNzPVQsIG11bHRpdGhyZWFkPVQpCmBgYAoKIyMgU3RlcCA0LiBMZWFybiB0aGUgRXJyb3IgUmF0ZXMKCiogVGhpcyBpcyBhIHZlcnkgdGltZSBjb25zdW1pbmcgc3RlcC4gRWFjaCBzdGVwIHNob3VsZCBzcGVuZCBhcm91bmQgMjAgfiA0MCBtaW51dGVzIGRlcGVuZHMgb24geW91ciBjb21wdXRlci4gSWYgeW91IHNlZSBhIG1lc3NhZ2Ugb2YgImZhaWxlZCB0byBjb252ZXJnZW5jZSBhZnRlciA2IHRpbWVzIG9mIGNhbGN1bGF0aW9uICIgb3Igc29tZXRoaW5nIHNpbWlsYXIsIGluY3JlYXNlIHRoZSBNQVhfQ09OU0lTVCBudW1iZXIuCgpgYGB7ciBsZWFybiB0aGUgZXJyb3IgcmF0ZXN9CmVyckYgPC0gbGVhcm5FcnJvcnMoZmlsdEZzLCBtdWx0aXRocmVhZD1UUlVFLCBNQVhfQ09OU0lTVCA9IDIwKQplcnJSIDwtIGxlYXJuRXJyb3JzKGZpbHRScywgbXVsdGl0aHJlYWQ9VFJVRSwgTUFYX0NPTlNJU1QgPSAzMCkKYGBgCgpgYGB7cn0KZGFkYTI6OjpjaGVja0NvbnZlcmdlbmNlKGVyckYpCmBgYAoKYGBge3J9CmRhZGEyOjo6Y2hlY2tDb252ZXJnZW5jZShlcnJSKQpgYGAKCmBgYHtyfQpwbG90RXJyb3JzKGVyckYsIG5vbWluYWxRPVRSVUUpCmBgYAoKYGBge3J9CnBsb3RFcnJvcnMoZXJyUiwgbm9taW5hbFE9VFJVRSkKYGBgCgojIyBTdGVwIDUuIERlcmVwbGljYXRpb24KCmBgYHtyIGRlcmVwbGljYXRpb259CmRlcmVwRnMgPC0gZGVyZXBGYXN0cShmaWx0RnMsIHZlcmJvc2U9VFJVRSkKZGVyZXBScyA8LSBkZXJlcEZhc3RxKGZpbHRScywgdmVyYm9zZT1UUlVFKQpgYGAKCmBgYHtyfQojIE5hbWUgdGhlIGRlcmVwLWNsYXNzIG9iamVjdHMgYnkgdGhlIHNhbXBsZSBuYW1lcwpuYW1lcyhkZXJlcEZzKSA8LSBzYW1wbGVfaWQKbmFtZXMoZGVyZXBScykgPC0gc2FtcGxlX2lkCmBgYAoKIyMgU3RlcCA2LiBTYW1wbGUgSW5mZXJlbmNlCgpgYGB7ciBzYW1wbGUgaW5mZXJlbmNlfQpkYWRhRnMgPC0gZGFkYShkZXJlcEZzLCBlcnI9ZXJyRiwgbXVsdGl0aHJlYWQ9VCkKZGFkYVJzIDwtIGRhZGEoZGVyZXBScywgZXJyPWVyclIsIG11bHRpdGhyZWFkPVQpCmBgYAoKIyMgU3RlcCA3LiBNZXJnZSBQYWlyZWQgUmVhZHMKCmBgYHtyIG1lcmdlIHBhaXJlZCByZWFkc30KbWVyZ2VycyA8LSBtZXJnZVBhaXJzKGRhZGFGcywgZGVyZXBGcywgZGFkYVJzLCBkZXJlcFJzLCB2ZXJib3NlPVRSVUUpCmBgYAoKIyMgU3RlcCA4LiBDb25zdHJ1Y3QgU2VxdWVuY2UgVGFibGUKCmBgYHtyIGNvbnN0cnVjdCBzZXF1ZW5jZSB0YWJsZX0Kc2VxdGFiIDwtIG1ha2VTZXF1ZW5jZVRhYmxlKG1lcmdlcnMpCmBgYAoKIyMgU3RlcCA5LiBSZW1vdmUgQ2hpbWVyYXMKCmBgYHtyIHJlbW92ZSBjaGltZXJhc30Kc2VxdGFiLm5vY2hpbSA8LSByZW1vdmVCaW1lcmFEZW5vdm8oc2VxdGFiLCBtZXRob2Q9ImNvbnNlbnN1cyIsIG11bHRpdGhyZWFkPVRSVUUsIHZlcmJvc2U9VFJVRSkKYGBgCgpgYGB7cn0KZGltKHNlcXRhYi5ub2NoaW0pCmBgYAoKIyMgU3RlcCAxMAoKYGBge3J9CmdldE4gPC0gZnVuY3Rpb24oeCkgc3VtKGdldFVuaXF1ZXMoeCkpCnRyYWNrIDwtIGNiaW5kKG91dCwgc2FwcGx5KGRhZGFGcywgZ2V0TiksIHNhcHBseShtZXJnZXJzLCBnZXROKSwgcm93U3VtcyhzZXF0YWIpLCByb3dTdW1zKHNlcXRhYi5ub2NoaW0pKQojIElmIHByb2Nlc3NpbmcgYSBzaW5nbGUgc2FtcGxlLCByZW1vdmUgdGhlIHNhcHBseSBjYWxsczogZS5nLiByZXBsYWNlIHNhcHBseShkYWRhRnMsIGdldE4pIHdpdGggZ2V0TihkYWRhRnMpCmNvbG5hbWVzKHRyYWNrKSA8LSBjKCJpbnB1dCIsICJmaWx0ZXJlZCIsICJkZW5vaXNlZCIsICJtZXJnZWQiLCAidGFibGVkIiwgIm5vbmNoaW0iKQpyb3duYW1lcyh0cmFjaykgPC0gc2FtcGxlX2lkIApoZWFkKHRyYWNrKQpgYGAKCiMjIFN0ZXAgMTEuIEFzaWduIFRheG9ub215CgoqIEluIHRoaXMgc3RlcCwgeW91IG5lZWQgdG8gaGF2ZSB0aGUgc2lsdmEgZGF0YWJhc2UgYXZhaWxhYmxlLiBUaGUgbW9zdCByZWNlbnQgdmVyc2lvbiBvZiBzaWx2YSBpcyAxMzIKCmBgYHtyIEFzaWduIFRheG9ub215fQojIyBBc2lnbiBUYXhvbm9teQp0YXhhIDwtIGFzc2lnblRheG9ub215KHNlcXRhYi5ub2NoaW0sIAogICAgICAgICAgICAgICAgICAgICAgICJUcmFpbmluZy9zaWx2YV9ucl92MTI4X3RyYWluX3NldC5mYS5neiIsIAogICAgICAgICAgICAgICAgICAgICAgIG11bHRpdGhyZWFkPVRSVUUsIHZlcmJvc2U9VCx0cnlSQz1UKQpgYGAKCmBgYHtyIEFkZCBTcGVjaWVzfQojIyBBZGQgU3BlY2llcwp0YXhhIDwtIGFkZFNwZWNpZXModGF4YSwgIlRyYWluaW5nL3NpbHZhX3NwZWNpZXNfYXNzaWdubWVudF92MTI4LmZhLmd6IiwKICAgICAgICAgICAgICAgICAgIHRyeVJDPVQsdmVyYm9zZT1UKQpgYGAKCmBgYHtyfQp0YXhhLnByaW50IDwtIHRheGEgIyBSZW1vdmluZyBzZXF1ZW5jZSByb3duYW1lcyBmb3IgZGlzcGxheSBvbmx5CnJvd25hbWVzKHRheGEucHJpbnQpIDwtIE5VTEwKaGVhZCh0YXhhLnByaW50KSAKYGBgCgojIyBTdGVwIDEyLiBTYXZlCgojIyBTYXZlIHRoZSBkYXRhLiBUaGUgZGF0YSBjbGVhbmluZyB3aWxsIGJlIGRvbmUgaW4gYW5vdGhlciBzY3JpcHQvcm1hcmtkb3duCgpgYGB7cn0Kc2F2ZS5pbWFnZShmaWxlPSdkYWRhMi5SZGF0YScpCmBgYA==