Description

source("http://bioconductor.org/biocLite.R")
biocLite("dada2")

Step 1. load the library

rm(list=ls())
library("dada2", quietly=TRUE, verbose=FALSE, warn.conflicts=FALSE)
cat(paste("dada2 version:", packageVersion("dada2")))

Step 2. Create a file list

## set your working directory here
setwd("/Users/chenghaozhu/sequence_data/FF/fastq")
## path to your fastq files 
path <- paste0(getwd(),"/alldemultx")
fnFs <- list.files(path,pattern="R1",full.names=T)
fnRs <- list.files(path,pattern="R2",full.names=T)
sample_id = str_split(list.files(path,pattern="R1"),"_R1",simplify=T)[,1]

filt_path <- file.path(path,"filtered")
filtFs <- file.path(filt_path, paste0(sample_id,"_R1_filt.fastq"))
filtRs <- file.path(filt_path, paste0(sample_id,"_R2_filt.fastq"))

Step 3. Filter and Trim

out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(195,160),
                     maxN=0,maxEE=c(2,2), truncQ=2,
                     rm.phix=T, compress=T, multithread=T)

Step 4. Learn the Error Rates

errF <- learnErrors(filtFs, multithread=TRUE, MAX_CONSIST = 20)
errR <- learnErrors(filtRs, multithread=TRUE, MAX_CONSIST = 30)
dada2:::checkConvergence(errF)
dada2:::checkConvergence(errR)
plotErrors(errF, nominalQ=TRUE)
plotErrors(errR, nominalQ=TRUE)

Step 5. Dereplication

derepFs <- derepFastq(filtFs, verbose=TRUE)
derepRs <- derepFastq(filtRs, verbose=TRUE)
# Name the derep-class objects by the sample names
names(derepFs) <- sample_id
names(derepRs) <- sample_id

Step 6. Sample Inference

dadaFs <- dada(derepFs, err=errF, multithread=T)
dadaRs <- dada(derepRs, err=errR, multithread=T)

Step 7. Merge Paired Reads

mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE)

Step 8. Construct Sequence Table

seqtab <- makeSequenceTable(mergers)

Step 9. Remove Chimeras

seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
dim(seqtab.nochim)

Step 10

getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(mergers, getN), rowSums(seqtab), rowSums(seqtab.nochim))
# If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
colnames(track) <- c("input", "filtered", "denoised", "merged", "tabled", "nonchim")
rownames(track) <- sample_id 
head(track)

Step 11. Asign Taxonomy

## Asign Taxonomy
taxa <- assignTaxonomy(seqtab.nochim, 
                       "Training/silva_nr_v128_train_set.fa.gz", 
                       multithread=TRUE, verbose=T,tryRC=T)
## Add Species
taxa <- addSpecies(taxa, "Training/silva_species_assignment_v128.fa.gz",
                   tryRC=T,verbose=T)
taxa.print <- taxa # Removing sequence rownames for display only
rownames(taxa.print) <- NULL
head(taxa.print) 

Step 12. Save

Save the data. The data cleaning will be done in another script/rmarkdown

save.image(file='dada2.Rdata')
LS0tCnRpdGxlOiAiMTZTIFNlcXVlbmNpbmcgRGF0YSBQcm9jZXNzaW5nIERBREEyIFdvcmtmbG93IgphdXRob3I6ICJDaGVuZ2hhbyBaaHUiCmRhdGU6ICIzLzcvMjAxOCIKb3V0cHV0OiBodG1sX25vdGVib29rCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKIyMgRGVzY3JpcHRpb24KCiogVGhpcyBpcyBhIHdvcmtmbG93IG9mIHVzaW5nIERBREEyIHRvIGRvIGZlYXR1cmUob3R1KSBwaWNraW5nIG9uIGRlbXVsdGlwbGV4ZWQgMTZTIHNlcXVlbmNpbmcgZGF0YS4gVGhpcyB3b3JrZmxvdyBzaG91bGQgYmUgcmFuIGFmdGVyIHlvdSBydW4gdGhlICoxNlMgQW1wbGljb24gRGVtdWx0aXBsZXggV29ya2Zsb3cqCgoqIFRvIHJ1biB0aGlzIHdvcmtmbG93LCB5b3UgbmVlZCB0byBoYXZlIFIsIFJzdHVkaW8sIGFuZCB0aGUgcGFja2FnZSBkYWRhMiBpbnN0YWxsZWQgaW4geW91ciBjb21wdXRlci4gVG8gaW5zdGFsbCBkYWRhMiwgcnVuIHRoZSBmb2xsb3dpbmcgY29tbWFuZHMuCgoqIEZvciBtb3JlIGRldGFpbCwgcGxlYXNlIHJlYWQgdGhlIGRhZGEyIHR1dG9yaWFsIG9uOgpodHRwczovL2JlbmpqbmViLmdpdGh1Yi5pby9kYWRhMi90dXRvcmlhbC5odG1sCgpgYGB7ciwgZXZhbCA9IEZ9CnNvdXJjZSgiaHR0cDovL2Jpb2NvbmR1Y3Rvci5vcmcvYmlvY0xpdGUuUiIpCmJpb2NMaXRlKCJkYWRhMiIpCmBgYAoKIyMgU3RlcCAxLiBsb2FkIHRoZSBsaWJyYXJ5CgpgYGB7ciBwYWNrYWdlcyx3YXJuaW5nID1GLCBlcnJvciA9IEYsIG1lc3NhZ2U9Rn0Kcm0obGlzdD1scygpKQpsaWJyYXJ5KCJkYWRhMiIsIHF1aWV0bHk9VFJVRSwgdmVyYm9zZT1GQUxTRSwgd2Fybi5jb25mbGljdHM9RkFMU0UpCmNhdChwYXN0ZSgiZGFkYTIgdmVyc2lvbjoiLCBwYWNrYWdlVmVyc2lvbigiZGFkYTIiKSkpCmBgYAoKIyMgU3RlcCAyLiBDcmVhdGUgYSBmaWxlIGxpc3QKCmBgYHtyfQojIyBzZXQgeW91ciB3b3JraW5nIGRpcmVjdG9yeSBoZXJlCnNldHdkKCIvVXNlcnMvY2hlbmdoYW96aHUvc2VxdWVuY2VfZGF0YS9GRi9mYXN0cSIpCiMjIHBhdGggdG8geW91ciBmYXN0cSBmaWxlcyAKcGF0aCA8LSBwYXN0ZTAoZ2V0d2QoKSwiL2FsbGRlbXVsdHgiKQpmbkZzIDwtIGxpc3QuZmlsZXMocGF0aCxwYXR0ZXJuPSJSMSIsZnVsbC5uYW1lcz1UKQpmblJzIDwtIGxpc3QuZmlsZXMocGF0aCxwYXR0ZXJuPSJSMiIsZnVsbC5uYW1lcz1UKQpzYW1wbGVfaWQgPSBzdHJfc3BsaXQobGlzdC5maWxlcyhwYXRoLHBhdHRlcm49IlIxIiksIl9SMSIsc2ltcGxpZnk9VClbLDFdCgpmaWx0X3BhdGggPC0gZmlsZS5wYXRoKHBhdGgsImZpbHRlcmVkIikKZmlsdEZzIDwtIGZpbGUucGF0aChmaWx0X3BhdGgsIHBhc3RlMChzYW1wbGVfaWQsIl9SMV9maWx0LmZhc3RxIikpCmZpbHRScyA8LSBmaWxlLnBhdGgoZmlsdF9wYXRoLCBwYXN0ZTAoc2FtcGxlX2lkLCJfUjJfZmlsdC5mYXN0cSIpKQpgYGAKCiMjIFN0ZXAgMy4gRmlsdGVyIGFuZCBUcmltCgoqIFRoZXkgb25seSBwYXJhbWV0ZXIgeW91IHNob3VsZCBzcGVjaWZ5IGluIHRoaXMgc3RlcCBpcyB0aGUgdHJ1bmNMZW4uIFRoZSBmaXJzdCBudW1iZXIgaXMgdGhlIGxlbmd0aCB0byB0cnVuY2F0ZSBmb3IgUjEgYW5kIHRoZSBzZWNvbmQgbnVtYmVyIGlzIGZvciBSMi4gWW91IGNhbiBnZXQgdGhlIHRydW5jYXRlIGxlbmd0aCBmcm9tIHRoZSBsYXN0IHN0ZXAgb2YgdGhlICoxNlMgQW1wbGljb24gRGVtdWx0aXBsZXggV29ya2Zsb3cqIHVzaW5nIGZhc3RRQwoKYGBge3IgZmlsaXRlciBhbmQgdHJpbX0Kb3V0IDwtIGZpbHRlckFuZFRyaW0oZm5GcywgZmlsdEZzLCBmblJzLCBmaWx0UnMsIHRydW5jTGVuPWMoMTk1LDE2MCksCiAgICAgICAgICAgICAgICAgICAgIG1heE49MCxtYXhFRT1jKDIsMiksIHRydW5jUT0yLAogICAgICAgICAgICAgICAgICAgICBybS5waGl4PVQsIGNvbXByZXNzPVQsIG11bHRpdGhyZWFkPVQpCmBgYAoKIyMgU3RlcCA0LiBMZWFybiB0aGUgRXJyb3IgUmF0ZXMKCiogVGhpcyBpcyBhIHZlcnkgdGltZSBjb25zdW1pbmcgc3RlcC4gRWFjaCBzdGVwIHNob3VsZCBzcGVuZCBhcm91bmQgMjAgfiA0MCBtaW51dGVzIGRlcGVuZHMgb24geW91ciBjb21wdXRlci4gSWYgeW91IHNlZSBhIG1lc3NhZ2Ugb2YgImZhaWxlZCB0byBjb252ZXJnZW5jZSBhZnRlciA2IHRpbWVzIG9mIGNhbGN1bGF0aW9uICIgb3Igc29tZXRoaW5nIHNpbWlsYXIsIGluY3JlYXNlIHRoZSBNQVhfQ09OU0lTVCBudW1iZXIuCgpgYGB7ciBsZWFybiB0aGUgZXJyb3IgcmF0ZXN9CmVyckYgPC0gbGVhcm5FcnJvcnMoZmlsdEZzLCBtdWx0aXRocmVhZD1UUlVFLCBNQVhfQ09OU0lTVCA9IDIwKQplcnJSIDwtIGxlYXJuRXJyb3JzKGZpbHRScywgbXVsdGl0aHJlYWQ9VFJVRSwgTUFYX0NPTlNJU1QgPSAzMCkKYGBgCgpgYGB7cn0KZGFkYTI6OjpjaGVja0NvbnZlcmdlbmNlKGVyckYpCmBgYAoKYGBge3J9CmRhZGEyOjo6Y2hlY2tDb252ZXJnZW5jZShlcnJSKQpgYGAKCmBgYHtyfQpwbG90RXJyb3JzKGVyckYsIG5vbWluYWxRPVRSVUUpCmBgYAoKYGBge3J9CnBsb3RFcnJvcnMoZXJyUiwgbm9taW5hbFE9VFJVRSkKYGBgCgojIyBTdGVwIDUuIERlcmVwbGljYXRpb24KCmBgYHtyIGRlcmVwbGljYXRpb259CmRlcmVwRnMgPC0gZGVyZXBGYXN0cShmaWx0RnMsIHZlcmJvc2U9VFJVRSkKZGVyZXBScyA8LSBkZXJlcEZhc3RxKGZpbHRScywgdmVyYm9zZT1UUlVFKQpgYGAKCmBgYHtyfQojIE5hbWUgdGhlIGRlcmVwLWNsYXNzIG9iamVjdHMgYnkgdGhlIHNhbXBsZSBuYW1lcwpuYW1lcyhkZXJlcEZzKSA8LSBzYW1wbGVfaWQKbmFtZXMoZGVyZXBScykgPC0gc2FtcGxlX2lkCmBgYAoKIyMgU3RlcCA2LiBTYW1wbGUgSW5mZXJlbmNlCgpgYGB7ciBzYW1wbGUgaW5mZXJlbmNlfQpkYWRhRnMgPC0gZGFkYShkZXJlcEZzLCBlcnI9ZXJyRiwgbXVsdGl0aHJlYWQ9VCkKZGFkYVJzIDwtIGRhZGEoZGVyZXBScywgZXJyPWVyclIsIG11bHRpdGhyZWFkPVQpCmBgYAoKIyMgU3RlcCA3LiBNZXJnZSBQYWlyZWQgUmVhZHMKCmBgYHtyIG1lcmdlIHBhaXJlZCByZWFkc30KbWVyZ2VycyA8LSBtZXJnZVBhaXJzKGRhZGFGcywgZGVyZXBGcywgZGFkYVJzLCBkZXJlcFJzLCB2ZXJib3NlPVRSVUUpCmBgYAoKIyMgU3RlcCA4LiBDb25zdHJ1Y3QgU2VxdWVuY2UgVGFibGUKCmBgYHtyIGNvbnN0cnVjdCBzZXF1ZW5jZSB0YWJsZX0Kc2VxdGFiIDwtIG1ha2VTZXF1ZW5jZVRhYmxlKG1lcmdlcnMpCmBgYAoKIyMgU3RlcCA5LiBSZW1vdmUgQ2hpbWVyYXMKCmBgYHtyIHJlbW92ZSBjaGltZXJhc30Kc2VxdGFiLm5vY2hpbSA8LSByZW1vdmVCaW1lcmFEZW5vdm8oc2VxdGFiLCBtZXRob2Q9ImNvbnNlbnN1cyIsIG11bHRpdGhyZWFkPVRSVUUsIHZlcmJvc2U9VFJVRSkKYGBgCgpgYGB7cn0KZGltKHNlcXRhYi5ub2NoaW0pCmBgYAoKIyMgU3RlcCAxMAoKYGBge3J9CmdldE4gPC0gZnVuY3Rpb24oeCkgc3VtKGdldFVuaXF1ZXMoeCkpCnRyYWNrIDwtIGNiaW5kKG91dCwgc2FwcGx5KGRhZGFGcywgZ2V0TiksIHNhcHBseShtZXJnZXJzLCBnZXROKSwgcm93U3VtcyhzZXF0YWIpLCByb3dTdW1zKHNlcXRhYi5ub2NoaW0pKQojIElmIHByb2Nlc3NpbmcgYSBzaW5nbGUgc2FtcGxlLCByZW1vdmUgdGhlIHNhcHBseSBjYWxsczogZS5nLiByZXBsYWNlIHNhcHBseShkYWRhRnMsIGdldE4pIHdpdGggZ2V0TihkYWRhRnMpCmNvbG5hbWVzKHRyYWNrKSA8LSBjKCJpbnB1dCIsICJmaWx0ZXJlZCIsICJkZW5vaXNlZCIsICJtZXJnZWQiLCAidGFibGVkIiwgIm5vbmNoaW0iKQpyb3duYW1lcyh0cmFjaykgPC0gc2FtcGxlX2lkIApoZWFkKHRyYWNrKQpgYGAKCiMjIFN0ZXAgMTEuIEFzaWduIFRheG9ub215CgoqIEluIHRoaXMgc3RlcCwgeW91IG5lZWQgdG8gaGF2ZSB0aGUgc2lsdmEgZGF0YWJhc2UgYXZhaWxhYmxlLiBUaGUgbW9zdCByZWNlbnQgdmVyc2lvbiBvZiBzaWx2YSBpcyAxMzIKCmBgYHtyIEFzaWduIFRheG9ub215fQojIyBBc2lnbiBUYXhvbm9teQp0YXhhIDwtIGFzc2lnblRheG9ub215KHNlcXRhYi5ub2NoaW0sIAogICAgICAgICAgICAgICAgICAgICAgICJUcmFpbmluZy9zaWx2YV9ucl92MTI4X3RyYWluX3NldC5mYS5neiIsIAogICAgICAgICAgICAgICAgICAgICAgIG11bHRpdGhyZWFkPVRSVUUsIHZlcmJvc2U9VCx0cnlSQz1UKQpgYGAKCmBgYHtyIEFkZCBTcGVjaWVzfQojIyBBZGQgU3BlY2llcwp0YXhhIDwtIGFkZFNwZWNpZXModGF4YSwgIlRyYWluaW5nL3NpbHZhX3NwZWNpZXNfYXNzaWdubWVudF92MTI4LmZhLmd6IiwKICAgICAgICAgICAgICAgICAgIHRyeVJDPVQsdmVyYm9zZT1UKQpgYGAKCmBgYHtyfQp0YXhhLnByaW50IDwtIHRheGEgIyBSZW1vdmluZyBzZXF1ZW5jZSByb3duYW1lcyBmb3IgZGlzcGxheSBvbmx5CnJvd25hbWVzKHRheGEucHJpbnQpIDwtIE5VTEwKaGVhZCh0YXhhLnByaW50KSAKYGBgCgojIyBTdGVwIDEyLiBTYXZlCgojIyBTYXZlIHRoZSBkYXRhLiBUaGUgZGF0YSBjbGVhbmluZyB3aWxsIGJlIGRvbmUgaW4gYW5vdGhlciBzY3JpcHQvcm1hcmtkb3duCgpgYGB7cn0Kc2F2ZS5pbWFnZShmaWxlPSdkYWRhMi5SZGF0YScpCmBgYA==