# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3156069/ "The frequency of characteristic mutations in the clustered sequenced reads, T to C transitions when using 4-SU and G to A transitions when using 6-SG, are indicative of successfully crosslinked sequences. In our experience uncrosslinked RNAs labeled with 4-SU show a background mutation rate of approximately 20%. This rate is increases to approx. 50-80% upon crosslinking. A detailed description of the bioinformatic analysis can be found in the Supplementary material of the publication by Hafner et al.18" #0 get tr db cd /data/results/reference/mmu/mm9/mRNA-stranded /data/results/tools/align/cufflinks-2.2.1.Linux_x86_64/gffread -w Mus_musculus.NCBIM37.64-toMM9.fa -g /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf Warning: very large intron (4384418) for transcript ENSMUST00000127664 ls -l Mus_musculus.NCBIM37.64-toMM9.fa -rw-r--r-- 1 reczko users 180894604 Nov 30 15:25 Mus_musculus.NCBIM37.64-toMM9.fa /data/results/tools/align/cufflinks-2.2.1.Linux_x86_64/gffread -C -M -d loci-info.txt -K -w Mus_musculus.NCBIM37.64-toMM9.fa -g /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C coding only: discard mRNAs that have no CDS feature -M/--merge : cluster the input transcripts into loci, collapsing matching transcripts (those with the same exact introns and fully contained) -d : for -M option, write collapsing info to file -K for -M option: also collapse shorter, fully contained transcripts with fewer introns than the container /data/results/tools/align/bowtie-0.12.9/bowtie-inspect -s /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 Colorspace 1 SA-Sample 1 in 32 FTab-Chars 10 Sequence-1 40442 chr10:3134304-3227478 ENSMUST00000015346 3447 # 1. build colorspace index (reczko@fix:/data/results/reference/mmu/mm9/bowtie1$ /data/results/tools/align/bowtie-1.2.1.1/bowtie-build -C ../../Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa mm9c) cd /data/results/reference/mmu/mm9/mRNA-stranded /data/results/tools/align/bowtie-1.2.1.1/bowtie-build -C Mus_musculus.NCBIM37.64-toMM9.fa Mus_musculus.NCBIM37.64-toMM9 # 2. get seqs reczko@max:/data/images/proton/DKlab/mr/parclip/raw$ tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_1.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_4.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_7.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_2.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_5.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_8.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_3.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_6.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_9.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_10.tar.gz tar xzvf /mnt/max/c/hazapis/Backups/ugc_604_11.tar.gz # 3. link to ids # ids in ~/bak/doc/fleming/kafasla/sample-ids.txt ln -s ugc_604_1 0hrep1 ln -s ugc_604_4 0hrep2 ln -s ugc_604_7 0hrep3 ln -s ugc_604_2 2hrep1 ln -s ugc_604_5 2hrep2 ln -s ugc_604_8 2hrep3 ln -s ugc_604_3 6hrep1 ln -s ugc_604_6 6hrep2 ln -s ugc_604_9 6hrep3 ln -s ugc_604_10 IFN ln -s ugc_604_11 IL4 ln -s ugc_604_12 IGG #adapter removal /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/0hrep1/ugc_604_1_F3.csfasta ../raw/0hrep1/ugc_604_1_F3.QV.qual >& 0hrep1.log > 0hrep1.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/0hrep2/ugc_604_4_F3.csfasta ../raw/0hrep2/ugc_604_4_F3.QV.qual >& 0hrep2.log > 0hrep2.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/0hrep3/ugc_604_7_F3.csfasta ../raw/0hrep3/ugc_604_7_F3.QV.qual >& 0hrep3.log > 0hrep3.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/2hrep1/ugc_604_2_F3.csfasta ../raw/2hrep1/ugc_604_2_F3.QV.qual >& 2hrep1.log > 2hrep1.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/2hrep2/ugc_604_5_F3.csfasta ../raw/2hrep2/ugc_604_5_F3.QV.qual >& 2hrep2.log > 2hrep2.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/2hrep3/ugc_604_8_F3.csfasta ../raw/2hrep3/ugc_604_8_F3.QV.qual >& 2hrep3.log > 2hrep3.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/6hrep1/ugc_604_3_F3.csfasta ../raw/6hrep1/ugc_604_3_F3.QV.qual >& 6hrep1.log > 6hrep1.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/6hrep2/ugc_604_6_F3.csfasta ../raw/6hrep2/ugc_604_6_F3.QV.qual >& 6hrep2.log > 6hrep2.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/6hrep3/ugc_604_9_F3.csfasta ../raw/6hrep3/ugc_604_9_F3.QV.qual >& 6hrep3.log > 6hrep3.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/IGG/ugc_604_12_F3.csfasta ../raw/IGG/ugc_604_12_F3.QV.qual >& IGG.log > IGG.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/IL4/ugc_604_11_F3.csfasta ../raw/IL4/ugc_604_11_F3.QV.qual >& IL4.log > IL4.fastq /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../raw/IFN/ugc_604_10_F3.csfasta ../raw/IFN/ugc_604_10_F3.QV.qual >& IFN.log > IFN.fastq reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/0hrep1/ugc_604_1_F3.csfasta 0hrep1.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/0hrep2/ugc_604_4_F3.csfasta 0hrep2.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/0hrep3/ugc_604_7_F3.csfasta 0hrep3.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/2hrep1/ugc_604_2_F3.csfasta 2hrep1.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/2hrep2/ugc_604_5_F3.csfasta 2hrep2.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/2hrep3/ugc_604_8_F3.csfasta 2hrep3.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/6hrep1/ugc_604_3_F3.csfasta 6hrep1.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/6hrep2/ugc_604_6_F3.csfasta 6hrep2.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/6hrep3/ugc_604_9_F3.csfasta 6hrep3.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/IGG/ugc_604_12_F3.csfasta IGG.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/IL4/ugc_604_11_F3.csfasta IL4.csfasta reczko@max:/data/images/proton/DKlab/mr/parclip/tr$ ln -s ../raw/IFN/ugc_604_10_F3.csfasta IFN.csfasta #/data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 15 -c -a CGCCTTGGCCGTACAGCAG F3.csfasta F3.QV.qual >& .log > .fastq for i in *.fastq do echo $i awk -f /data/results/tools/formats/fastq2fasta.awk $i > $i.fasta done # 5. collapse reads for i in *.fasta do echo $i cat $i | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl > $i".coll" done # collapse reads stats: 0hrep1filt2b 11246516 11238549 99.92916028394927 pct_collapsed 0hrep2filt2b 5324894 5323730 99.97814040993117 pct_collapsed 0hrep3filt2b 16517025 16506254 99.93478849853409 pct_collapsed 2hrep1filt2b 7161880 7158466 99.95233095220809 pct_collapsed 2hrep2filt2b 7045507 7044606 99.98721170811412 pct_collapsed 2hrep3filt2b 9334804 9331015 99.95940996725801 pct_collapsed 6hrep1filt2b 16369479 16349163 99.87589097979233 pct_collapsed 6hrep2filt2b 11989612 11986104 99.97074133841862 pct_collapsed 6hrep3filt2b 5984369 5982891 99.9753023251073 pct_collapsed IFNfilt2b 5455740 5453906 99.9663840285644 pct_collapsed IGGfilt2b 5422503 5421323 99.97823883177198 pct_collapsed IL4filt2b 12613262 12604847 99.93328450641872 pct_collapsed # note: filt2b are the same as filt2 # 6. align # Recommended BOWTIE alignment paramters from: # Paralyzer documentation # /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/README.txt #>bowtie GENOME_INDEX -v 2 -m 10 --best --strata -f INPUT_FASTA_FILE OUTPUT_FILE #6a adapter removal vs stranded /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep1v3ar_st 0hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep2v3ar_st 0hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep3v3ar_st 0hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep1v3ar_st 2hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep2v3ar_st 2hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep3v3ar_st 2hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 #same aligner, can run in parallel using other /tmp dir /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep1v3ar_st 6hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep2v3ar_st 6hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep3v3ar_st 6hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IGGv3ar_st IGG.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IL4v3ar_st IL4.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IFNv3ar_st IFN.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 #6b adapter removal vs unstranded /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep1v3ar 0hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep2v3ar 0hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep3v3ar 0hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep1v3ar 2hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep2v3ar 2hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep3v3ar 2hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 #same aligner, can run in parallel using other /tmp dir /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep1v3ar 6hrep1.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep2v3ar 6hrep2.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep3v3ar 6hrep3.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IGGv3ar IGG.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IL4v3ar IL4.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IFNv3ar IFN.fastq.fasta.coll /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 #6c wo adapter removal /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep1v3 0hrep1.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep2v3 0hrep2.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep3v3 0hrep3.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep1v3 2hrep1.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep2v3 2hrep2.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 2hrep3v3 2hrep3.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 #same aligner, can run in parallel using other /tmp dir /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep1v3 6hrep1.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep2v3 6hrep2.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o 6hrep3v3 6hrep3.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IGGv3 IGG.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IL4v3 IL4.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 3 -p 10 -i -o IFNv3 IFN.csfasta /data/results/reference/mmu/mm9/mRNA/Mus_musculus.NCBIM37.64-toMM9 #6d stranded wo adapter removal /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 0hrep1v3_st 0hrep1.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 0hrep2v3_st 0hrep2.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 0hrep3v3_st 0hrep3.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 2hrep1v3_st 2hrep1.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 2hrep2v3_st 2hrep2.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 2hrep3v3_st 2hrep3.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 #same aligner, can run in parallel using other /tmp dir /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 6hrep1v3_st 6hrep1.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 6hrep2v3_st 6hrep2.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o 6hrep3v3_st 6hrep3.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o IGGv3_st IGG.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o IL4v3_st IL4.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerFix -m 10 -l 18 -v 3 -p 18 -i -o IFNv3_st IFN.csfasta /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9 # 7. convert bowtie to sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep1v3.bowtie > 0hrep1v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep2v3.bowtie > 0hrep2v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep3v3.bowtie > 0hrep3v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep1v3.bowtie > 2hrep1v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep2v3.bowtie > 2hrep2v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep3v3.bowtie > 2hrep3v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep1v3.bowtie > 6hrep1v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep2v3.bowtie > 6hrep2v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep3v3.bowtie > 6hrep3v3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IGGv3.bowtie > IGGv3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IFNv3.bowtie > IFNv3.sam /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IL4v3.bowtie > IL4v3.sam # 8. get alignment stats for i in *filt2_T_*csfastq.fasta.coll do echo $i wc $i done for i in *4p3.sam do echo $i cat $i | awk -f /data/images/proton2/countMultimaps1.awk done 0hrep1F-4p3.sam 0hrep1filt2_T_F3.csfastq.fasta.coll 11238549 reads n_alignments 11755762 aligned_reads 11034381 98.18332419959196 pct_aligned unique_aligned_reads 10313000 93.4624 % multimapped_reads 721381 6.53758 % avg_multimapping 2 0hrep2F-4p3.sam 0hrep2filt2_T_F3.csfastq.fasta.coll 5323730 reads n_alignments 5589925 aligned_reads 5247928 98.57614867771281 pct_aligned unique_aligned_reads 4905931 93.4832 % multimapped_reads 341997 6.5168 % avg_multimapping 2 0hrep3F-4p3.sam 0hrep3filt2_T_F3.csfastq.fasta.coll 16506254 reads n_alignments 17482140 aligned_reads 16274617 98.59667129804255 pct_aligned unique_aligned_reads 15067094 92.5803 % multimapped_reads 1207523 7.41967 % avg_multimapping 2 2hrep1F-4p3.sam 2hrep1filt2_T_F3.csfastq.fasta.coll 7158466 reads n_alignments 7461810 aligned_reads 7031654 98.22850314578571 pct_aligned unique_aligned_reads 6601498 93.8826 % multimapped_reads 430156 6.11742 % avg_multimapping 2 2hrep2F-4p3.sam 2hrep2filt2_T_F3.csfastq.fasta.coll 7044606 reads n_alignments 7355984 aligned_reads 6914080 98.14714974833227 pct_aligned unique_aligned_reads 6472176 93.6086 % multimapped_reads 441904 6.39136 % avg_multimapping 2 2hrep3F-4p3.sam 2hrep3filt2_T_F3.csfastq.fasta.coll 9331015 reads n_alignments 9787276 aligned_reads 9164488 98.21533884577401 pct_aligned unique_aligned_reads 8541700 93.2043 % multimapped_reads 622788 6.79567 % avg_multimapping 2 6hrep1F-4p3.sam 6hrep1filt2_T_F3.csfastq.fasta.coll 16349163 reads n_alignments 17107909 aligned_reads 16049980 98.17004087609867 pct_aligned unique_aligned_reads 14992051 93.4085 % multimapped_reads 1057929 6.59147 % avg_multimapping 2 6hrep2F-4p3.sam 6hrep2filt2_T_F3.csfastq.fasta.coll 11986104 reads n_alignments 12470215 aligned_reads 11809383 98.52561766525636 pct_aligned unique_aligned_reads 11148551 94.4042 % multimapped_reads 660832 5.59582 % avg_multimapping 2 6hrep3F-4p3.sam 6hrep3filt2_T_F3.csfastq.fasta.coll 5982891 reads n_alignments 6295874 aligned_reads 5895256 98.53523990325078 pct_aligned unique_aligned_reads 5494638 93.2044 % multimapped_reads 400618 6.7956 % avg_multimapping 2 IFNF-4p3.sam IFNfilt2_T_F3.csfastq.fasta.coll 5453906 reads n_alignments 5704195 aligned_reads 5365122 98.37210248948185 pct_aligned unique_aligned_reads 5026049 93.6801 % multimapped_reads 339073 6.31995 % avg_multimapping 2 IGGF-4p3.sam IGGfilt2_T_F3.csfastq.fasta.coll 5421323 reads n_alignments 5640294 aligned_reads 5317648 98.08764392012799 pct_aligned unique_aligned_reads 4995002 93.9325 % multimapped_reads 322646 6.06746 % avg_multimapping 2 IL4F-4p3.sam IL4filt2_T_F3.csfastq.fasta.coll 12604847 reads n_alignments 13208457 aligned_reads 12354236 98.01178863971931 pct_aligned unique_aligned_reads 11500015 93.0856 % multimapped_reads 854221 6.9144 % avg_multimapping 2 # 9. PARalyzer # required: # reczko@max:/data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta$ /home/reczko/bin/faToTwoBit genome.fa genome.2bit # input is *F-4p3.sam #@ 3reps 0h, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-0hF-4p3-HAFNER_APPROACH.iniC I identified a total of 20940029 reads mapping to 3853196 Unique genomic coordinates Making up 235711 groups Consisting of 26608 clusters #@ 3reps 2h, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-2hF-4p3-HAFNER_APPROACH.iniC I identified a total of 15217111 reads mapping to 3780215 Unique genomic coordinates Making up 198738 groups Consisting of 21008 clusters #@ 3reps 6h, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-6hF-4p3-HAFNER_APPROACH.iniC I identified a total of 22862597 reads mapping to 4105841 Unique genomic coordinates Making up 247379 groups Consisting of 27444 clusters #@ IL4, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniC I identified a total of 7961631 reads mapping to 2096976 Unique genomic coordinates Making up 114228 groups Consisting of 11874 clusters #@ IFN, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IFNF-4p3-HAFNER_APPROACH.iniC I identified a total of 3567454 reads mapping to 1251045 Unique genomic coordinates Making up 53339 groups Consisting of 5042 clusters #@ IGG, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IGGF-4p3-HAFNER_APPROACH.iniC I identified a total of 3581257 reads mapping to 860004 Unique genomic coordinates Making up 47740 groups Consisting of 4603 clusters ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniD # 10. groups+clusters to bed format # 10a. groups for i in groups-*-4p3-HAFNER_APPROACH.txtC do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk $i |sort -k1,1 -k2,2n |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/prep-bed2bigbed1.awk > $i.bed wc !$ done # group stats #bowtie 235711 groups-0hF-4p3-HAFNER_APPROACH.txtC.bed 198738 groups-2hF-4p3-HAFNER_APPROACH.txtC.bed 247379 groups-6hF-4p3-HAFNER_APPROACH.txtC.bed 53339 groups-IFNF-4p3-HAFNER_APPROACH.txtC.bed 47740 groups-IGGF-4p3-HAFNER_APPROACH.txtC.bed 114228 groups-IL4F-4p3-HAFNER_APPROACH.txtC.bed for i in groups-*-4p3-HAFNER_APPROACH.txtC.bed do bedtools intersect -a $i -b groups-IGGF-4p3-HAFNER_APPROACH.txtC.bed -v |sort -k1,1 -k2,2n > $i.noIGG.bed wc !$ done # group stats after IGG removal #bowtie 205689 groups-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 168222 groups-2hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 209138 groups-6hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 34142 groups-IFNF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 93325 groups-IL4F-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed # 10b. clusters for i in clus*-4p3-HAFNER_APPROACH.txtC do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk $i |sort -k1,1 -k2,2n |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/prep-bed2bigbed1.awk > $i.bed wc !$ done # cluster stats before IGG removal #bowtie 26608 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed 21008 clusters-2hF-4p3-HAFNER_APPROACH.txtC.bed 27444 clusters-6hF-4p3-HAFNER_APPROACH.txtC.bed 5042 clusters-IFNF-4p3-HAFNER_APPROACH.txtC.bed 4603 clusters-IGGF-4p3-HAFNER_APPROACH.txtC.bed 11874 clusters-IL4F-4p3-HAFNER_APPROACH.txtC.bed for i in clus*-4p3-HAFNER_APPROACH.txtC.bed do bedtools intersect -a $i -b clusters-IGGF-4p3-HAFNER_APPROACH.txtC.bed -v |sort -k1,1 -k2,2n > $i.noIGG.bed wc !$ done #bowtie 25116 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 19505 clusters-2hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 25367 clusters-6hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 4191 clusters-IFNF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 10943 clusters-IL4F-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed # 11. prepare UCSC tracks # get chr sizes #/data/results/tools/gbrowser/fetchChromSizes.sh mm9 > mm9.chrom.sizes # make bidBeds rm /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt for i in *4p3*txtC*noIGG.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/splitStrands1.awk $i /data/results/tools/gbrowser/tools/bedToBigBed -tab -type=bed6 $i".plus" mm9.chrom.sizes /data/images/proton/DKlab/mr/parclip/tracks2/$i".plus.bb" echo "track "$i".plus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "bigDataUrl "$i".plus.bb" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "shortLabel "$i".plus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "longLabel "$i".plus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "color 0,0,255" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt #echo "maxHeightPixels 128:30:11" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "autoScale on" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "visibility dense" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "type bigBed" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt /data/results/tools/gbrowser/tools/bedToBigBed -tab -type=bed6 $i".minus" mm9.chrom.sizes /data/images/proton/DKlab/mr/parclip/tracks2/$i".minus.bb" echo "track "$i".minus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "bigDataUrl "$i".minus.bb" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "shortLabel "$i".minus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "longLabel "$i".minus" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "color 0,0,255" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt #echo "maxHeightPixels 128:30:11" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "autoScale on" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "visibility dense" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo "type bigBed" >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt echo >> /data/images/proton/DKlab/mr/parclip/tracks2/trackDb.txt done awk -f /data/images/proton/orderTracks.awk ../../tracks2/trackDb.txt > ../../tracks2/trackDb2.txt cd /data/images/proton/DKlab/mr/parclip/tracks2 ln -s . mm9 #hub link http://genomics-lab.fleming.gr/cgi-bin/hgTracks?db=mm9&hubUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks2/hub.txt 12. # get genomic region stats cd /data/results/tools/chipseq/genomic_regions/defining_genomic_regions cat mm9-genes.bed | sort -k1V -k2,2n | bedtools2/bin/complementBed -i stdin -g mm9.genome > /data/results/reference/mmu/mm9/mm9-intergenic.bed # per genomic regions stats for each condition ./mget-region-stats1.sh # result in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/mregions.csv # get bed for each region: ./get-regions1.sh clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed # get avg. read counts and sd,se per region reczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/regions$ for i in * > do > awk -f ../get-region-read-stats1.awk $i > done clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.3UTR.bed 163.143 271.898 22.4258 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.5UTR.bed 166.977 314.66 47.9852 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.CODING_EXONS.bed 150.355 263.816 18.7961 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.EXONS.bed 152.982 266.643 13.6965 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.INTERGENEIC.txt 139.846 252.591 2.08674 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.INTRAGENIC_stranded 142.396 253.853 3.49718 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.INTRAGENIC_unstranded 144.136 256.713 2.50956 clusters-0hF-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed.INTRONS.bed 141.278 252.366 3.60082