1) adapter removal /data/images/proton/DKlab/mr/rnaseq/cutadapt.sh #one sample example: ~/.local/bin/cutadapt -j 16 -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTAT -o wt01_R1_trm.fastq.gz -p wt01_R2_trm.fastq.gz /mnt/max/b/genomics_facility/DKlab/mr/rnaseq/RNA_Seq/HI.1370.002.Index_10.wt01_R1.fastq.gz /mnt/max/b/genomics_facility/DKlab/mr/rnaseq/RNA_Seq/HI.1370.002.Index_10.wt01_R2.fastq.gz # -o FILE, --output=FILE # Write trimmed reads to FILE. FASTQ or FASTA format is # chosen depending on input. The summary report is sent # to standard output. Use '{name}' in FILE to # demultiplex reads into multiple files. Default: write # to standard output # -p FILE, --paired-output=FILE # Write second read in a pair to FILE. 2) index building Hisat version 2.1.0 /data/results/tools/rnaseq/hisat/hisat2-2.1.0/hisat2-build -p 9 /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa mm9_hisat2 3) spliced alignment /data/images/proton/DKlab/mr/rnaseq/hisat.sh #one sample example: /data/results/tools/rnaseq/hisat/hisat2-2.1.0/hisat2 -k 1 -p 18 --dta -x mm9_hisat2 -1 wt01_R1_trm.fastq.gz -2 wt01_R2_trm.fastq.gz | /data/results/tools/samtools/samtools-1.3/samtools sort -@ 8 -o wt0h1_trm.bam # -k (default: 5) report up to alns per read # --dta reports alignments tailored for transcript assemblers (e.g. stringtie) hisat2 [options]* -x {-1 -2 | -U | --sra-acc } [-S ] Index filename prefix (minus trailing .X.ht2). Files with #1 mates, paired with files in . Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). Files with #2 mates, paired with files in . Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). Scoring options --mp MX,MN Sets the maximum (MX) and minimum (MN) mismatch penalties, both integers. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value. Default: MX = 6, MN = 2. --sp MX,MN Sets the maximum (MX) and minimum (MN) penalties for soft-clipping per base, both integers. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value. Default: MX = 2, MN = 1. --no-softclip Disallow soft-clipping. --np Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N. Default: 1. --rdg , Sets the read gap open () and extend () penalties. A read gap of length N gets a penalty of + N * . Default: 5, 3. --rfg , Sets the reference gap open () and extend () penalties. A reference gap of length N gets a penalty of + N * . Default: 5, 3. --score-min Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying L,0,-0.6 sets the minimum-score function f to f(x) = 0 + -0.6 * x, where x is the read length. See also: [setting function options]. The default is L,0,-0.2. Spliced alignment options --pen-cansplice Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0. --pen-noncansplice Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12. --pen-canintronlen Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1 --pen-noncanintronlen Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1 --min-intronlen Sets minimum intron length. Default: 20 --max-intronlen Sets maximum intron length. Default: 500000 4) quantitation /data/images/proton/DKlab/mr/rnaseq/stringtie2/README-per-gene.txt /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g "gene_name" -t "exon" -T 8 -a /data/images/proton/DKlab/mr/rnaseq/stringtie2/merged.gtf3 -o /data/images/proton/DKlab/mr/rnaseq/stringtie2/gene_counts_merged3.txt wt01_trm.bam wt02_trm.bam wt21_trm.bam wt22_trm.bam wt61_trm.bam wt62_trm.bam wtIFN_1_trm.bam wtIFN_2_trm.bam wtIL4_1_trm.bam wtIL4_2_trm.bam KO01_trm.bam KO02_trm.bam KO21_trm.bam KO22_trm.bam KO61_trm.bam KO62_trm.bam KOIFN_1_trm.bam KOIFN_2_trm.bam KOIL4_1_trm.bam KOIL4_2_trm.bam #with -k 20 multimaps: /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g "gene_name" -t "exon" -T 8 -a /data/images/proton/DKlab/mr/rnaseq/stringtie2/merged.gtf3 -o /data/images/proton/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20.txt wt01_trm_k20.bam wt02_trm_k20.bam wt21_trm_k20.bam wt22_trm_k20.bam wt61_trm_k20.bam wt62_trm_k20.bam wtIFN_1_trm_k20.bam wtIFN_2_trm_k20.bam wtIL4_1_trm_k20.bam wtIL4_2_trm_k20.bam KO01_trm_k20.bam KO02_trm_k20.bam KO21_trm_k20.bam KO22_trm_k20.bam KO61_trm_k20.bam KO62_trm_k20.bam KOIFN_1_trm_k20.bam KOIFN_2_trm_k20.bam KOIL4_1_trm_k20.bam KOIL4_2_trm_k20.bam /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g "gene_name" -t "exon" -T 8 -a /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -o /data/images/proton/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64.txt wt01_trm_k20.bam wt02_trm_k20.bam wt21_trm_k20.bam wt22_trm_k20.bam wt61_trm_k20.bam wt62_trm_k20.bam wtIFN_1_trm_k20.bam wtIFN_2_trm_k20.bam wtIL4_1_trm_k20.bam wtIL4_2_trm_k20.bam KO01_trm_k20.bam KO02_trm_k20.bam KO21_trm_k20.bam KO22_trm_k20.bam KO61_trm_k20.bam KO62_trm_k20.bam KOIFN_1_trm_k20.bam KOIFN_2_trm_k20.bam KOIL4_1_trm_k20.bam KOIL4_2_trm_k20.bam #use gene_id /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g "gene_id" -t "exon" -T 8 -a /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -o /data/images/proton/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt wt01_trm_k20.bam wt02_trm_k20.bam wt21_trm_k20.bam wt22_trm_k20.bam wt61_trm_k20.bam wt62_trm_k20.bam wtIFN_1_trm_k20.bam wtIFN_2_trm_k20.bam wtIL4_1_trm_k20.bam wtIL4_2_trm_k20.bam KO01_trm_k20.bam KO02_trm_k20.bam KO21_trm_k20.bam KO22_trm_k20.bam KO61_trm_k20.bam KO62_trm_k20.bam KOIFN_1_trm_k20.bam KOIFN_2_trm_k20.bam KOIL4_1_trm_k20.bam KOIL4_2_trm_k20.bam # paired-end /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g "gene_id" -t "exon" -T 8 -p -a /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -o /data/images/proton/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt wt01_trm_k20.bam wt02_trm_k20.bam wt21_trm_k20.bam wt22_trm_k20.bam wt61_trm_k20.bam wt62_trm_k20.bam wtIFN_1_trm_k20.bam wtIFN_2_trm_k20.bam wtIL4_1_trm_k20.bam wtIL4_2_trm_k20.bam KO01_trm_k20.bam KO02_trm_k20.bam KO21_trm_k20.bam KO22_trm_k20.bam KO61_trm_k20.bam KO62_trm_k20.bam KOIFN_1_trm_k20.bam KOIFN_2_trm_k20.bam KOIL4_1_trm_k20.bam KOIL4_2_trm_k20.bam # gene_counts_merged_k20_NCBIM37.64.txt 5) diff. exp. analysis require(edgeR) rnaseqMatrix1= read.table("gene_counts_merged3.txt",header=T,sep="\t",skip=1) rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]]; rownames(rnaseqMatrix0)=rnaseqMatrix1$Geneid; rnaseqMatrix = rnaseqMatrix0[rowSums(rnaseqMatrix0)>=2,] col_ordering = c(1,2,11,12)#0h rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study); et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") r <- rownames(topTags(et,n=NULL,adjust.method = "fdr")$table) c=cpm(exp_study)[r, order(exp_study$samples$group)] x=cbind(as.data.frame(tTags),c) summary(de <- decideTestsDGE(et, p=0.05)) #detags <- rownames(exp_study)[as.logical(de)] #plotSmear(et, de.tags=detags) #abline(h = c(-1, 1), col = "blue") write.table(tTags, file="edgeRgenes_featurecount-0h.csv") write.table(x, file="edgeRgenes_featurecountExpr-0h.csv")