#raw reads in: /data/images/proton/DKlab/mr/Polysomes/reads/Backup_Polysomes_TTP/ #adapter removal, also maps machine file id to sample #all samples: /data/images/proton/DKlab/mr/Polysomes/reads/cutadapt-calls.sh #one sample: This is cutadapt 1.9.1 with Python 2.7.9 Command line parameters: -q 20 -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L03_ugc_623_20_F3.csfasta /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L03_ugc_623_20_F3.QV.qual # fastq in /data/images/proton/DKlab/mr/Polysomes/reads/fastq/ -z, --zero-cap Change negative quality values to zero. This is enabled by default when -c/--colorspace is also enabled. Use the above option to disable it. -m LENGTH, --minimum-length=LENGTH Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted (default: 0). -q [5'CUTOFF,]3'CUTOFF, --quality-cutoff=[5'CUTOFF,]3'CUTOFF Trim low-quality bases from 5' and/or 3' ends of reads before adapter removal. If one value is given, only the 3' end is trimmed. If two comma-separated cutoffs are given, the 5' end is trimmed with the first cutoff, the 3' end with the second. See documentation for the algorithm. (default: no trimming) -a ADAPTER, --adapter=ADAPTER Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If the adapter sequence ends with the '$' character, the adapter is anchored to the end of the read and only found if it is a suffix of the read. -c, --colorspace Enable colorspace mode: Also trim the color that is adjacent to the found adapter. # adapter and qual-trimming stats in /data/images/proton/DKlab/mr/Polysomes/README-nreads.txt #alignment #one: /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --transcriptome-only -p 10 -n 2 -o KO_0h_replicate_Ib -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/KO_0h_replicate_I.fastq #more: /data/images/proton/DKlab/mr/Polysomes/reads/tophat/tophat-calls.sh1 -T/--transcriptome-only (map only to the transcriptome) -n/--transcriptome-mismatches [ default: 1 ] -G/--GTF (GTF/GFF with known transcripts) -C/--color (Solid - color space) #default settings, not modified: -g/--max-multihits [ default: 20 ] -x/--transcriptome-max-hits [ default: 60 ] # quantitation # A) per-transcript /data/images/proton/DKlab/mr/Polysomes/reads/stringtie/stringtie-calls-transcriptome.sh #one sample: /data/results/tools/rnaseq/stringtie/stringtie-1.3.3b.Linux_x86_64/stringtie ../tophat/KO_0h_replicate_Ib/accepted_hits.bam -p 10 -o ballgown/KO_0h_replicate_I/KO_0h_replicate_I.gtf -A ballgown/KO_0h_replicate_I/KO_0h_replicate_I.tab -B -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf &> KO_0h_replicate_I.log -B enable output of Ballgown table files which will be created in the same directory as the output GTF (requires -G, -o recommended) # merge transcripts per sample into common gtf /data/results/tools/rnaseq/stringtie/stringtie-1.3.3b.Linux_x86_64/stringtie --merge -o merged.gtf -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf ballgown/KO_0h_replicate_II/KO_0h_replicate_II.gtf ballgown/KO_0h_replicate_I/KO_0h_replicate_I.gtf ballgown/KO_2h_replicate_II/KO_2h_replicate_II.gtf ballgown/KO_2h_replicate_I/KO_2h_replicate_I.gtf ballgown/KO_6h_replicate_II/KO_6h_replicate_II.gtf ballgown/KO_6h_replicate_I/KO_6h_replicate_I.gtf ballgown/KO_IFN-gamma_replicate_II/KO_IFN-gamma_replicate_II.gtf ballgown/KO_IFN_replicate_I/KO_IFN_replicate_I.gtf ballgown/KO_IL4_replicate_II/KO_IL4_replicate_II.gtf ballgown/KO_IL4_replicate_I/KO_IL4_replicate_I.gtf ballgown/WT_0h_replicate_II/WT_0h_replicate_II.gtf ballgown/WT_0h_replicate_I/WT_0h_replicate_I.gtf ballgown/WT_2h_replicate_II/WT_2h_replicate_II.gtf ballgown/WT_2h_replicate_I/WT_2h_replicate_I.gtf ballgown/WT_6h_replicate_II/WT_6h_replicate_II.gtf ballgown/WT_6h_replicate_I/WT_6h_replicate_I.gtf ballgown/WT_IFN-gamma_replicate_II/WT_IFN-gamma_replicate_II.gtf ballgown/WT_IFN-gamma_replicate_I/WT_IFN-gamma_replicate_I.gtf ballgown/WT_IL4_replicate_II/WT_IL4_replicate_II.gtf ballgown/WT_IL4_replicate_I/WT_IL4_replicate_I.gtf # quantify again with merged.gtf /data/results/tools/rnaseq/stringtie/stringtie-1.3.3b.Linux_x86_64/stringtie ../tophat/KO_0h_replicate_Ib/accepted_hits.bam -p 10 -o ballgown/KO_0h_replicate_I/KO_0h_replicate_I.gtf -e -B -G merged.gtf &> KO_0h_replicate_I.log -e only estimate the abundance of given reference transcripts (requires -G) # results are collected in folder ballgown (needed for prepDE.py, see below) # generate count matrix for edgeR /data/results/tools/rnaseq/stringtie/stringtie-master/prepDE.py result in transcript_count_matrix.csv # diff. exp. Rscript edgeR2.r rnaseqMatrix0= read.table("transcript_count_matrix.csv",header=T,sep=",") .. #1st pairing col_ordering = c(11,12,1,2)#0h rnaseqMatrix =rnaseqMatrix0[,col_ordering]; # keep transcripts that have at least 5 total reads in one condition rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=5)|(rowSums(rnaseqMatrix[,3:4])>=5),]; .. write.table(tTags, file="edgeRtranscripts2-0h.csv") #with expression per sample: write.table(x, file="edgeRtranscripts2Expr-0h.csv") ls edgeRtranscripts2-?h.csv > edgeR-results2.txt ls edgeRtranscripts2-???.csv >> edgeR-results2.txt #awk -v f="/data/images/proton/DKlab/mr/Polysomes/reads/stringtie/merged.gtf" -f addGeneToEdger.awk # add columns "geneName geneID transcriptID " before results cat edgeR-results2.txt | awk -v f="/data/images/proton/DKlab/mr/Polysomes/reads/stringtie/merged.gtf" -f addGeneToEdger.awk # quantitation # B) per-gene /data/images/proton/DKlab/mr/Polysomes/reads/stringtie/per-gene/README-polysomeSeq-per-gene.txt /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O -g gene_name -t exon -T 8 -a ~/bak/doc/fleming/kafasla/DKlab/mr/Polysomes/reads/stringtie/merged.gtf -o gene_counts_merged3_featurecounts.txt ../../tophat/KO_0h_replicate_I/accepted_hits.bam ../../tophat/KO_0h_replicate_II/accepted_hits.bam ../../tophat/KO_2h_replicate_I/accepted_hits.bam ../../tophat/KO_2h_replicate_II/accepted_hits.bam ../../tophat/KO_6h_replicate_I/accepted_hits.bam ../../tophat/KO_6h_replicate_II/accepted_hits.bam ../../tophat/KO_IFN-gamma_replicate_II/accepted_hits.bam ../../tophat/KO_IFN_replicate_I/accepted_hits.bam ../../tophat/KO_IL4_replicate_I/accepted_hits.bam ../../tophat/KO_IL4_replicate_II/accepted_hits.bam ../../tophat/WT_0h_replicate_I/accepted_hits.bam ../../tophat/WT_0h_replicate_II/accepted_hits.bam ../../tophat/WT_2h_replicate_I/accepted_hits.bam ../../tophat/WT_2h_replicate_II/accepted_hits.bam ../../tophat/WT_6h_replicate_I/accepted_hits.bam ../../tophat/WT_6h_replicate_II/accepted_hits.bam ../../tophat/WT_IFN-gamma_replicate_I/accepted_hits.bam ../../tophat/WT_IFN-gamma_replicate_II/accepted_hits.bam ../../tophat/WT_IL4_replicate_I/accepted_hits.bam ../../tophat/WT_IL4_replicate_II/accepted_hits.bam -O Assign reads to all their overlapping meta-features (or features if -f is specified). -g Specify attribute type in GTF annotation. 'gene_id' by default. Meta-features used for read counting will be extracted from annotation using the provided value. -t Specify feature type in GTF annotation. 'exon' by default. Features used for read counting will be extracted from annotation using the provided value. -T Number of the threads. 1 by default. -a Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. # Rscript for edgeR in /data/images/proton/DKlab/mr/Polysomes/reads/stringtie/per-gene/README-polysomeSeq-per-gene.txt rnaseqMatrix1= read.table("gene_counts_merged3_featurecounts.txt",header=T,sep="\t",skip=1) .. write.table(tTags, file="edgeRgenes_featurecount-0h.csv") write.table(x, file="edgeRgenes_featurecountExpr-0h.csv")