#@ pos. control ELAV1 T->C conversion2 in 2h at chr8: 2@4,289,667 + 1@4,289,669 chr8:4,289,661-4,289,674 minus RefSeq Accession: NM_010485 genome - : >mm9_dna range=chr8:4289613-4289723 5'pad=0 3'pad=0 strand=- repeatMasking=none GTTTGGTTTTGTGACCATGACAAACTATGAAGAAGCTGCAATGGCCATAG CAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACAGGTTTCCTTC ^ ^ AAAACCAACAA /data/results/reference/mmu/mm9/mm9-coding-exons.fa >NM_010485_cds_0_0_chr8_4289600_r GTTCTCCCCTATGGGTGTAGATCACATGAGTGGGATTTCTGGTGTCAATGTCCCCGGCAATGCTTCCTCGGGCTGGTGCATCTTCATCTACAACCTTGGGCAAGACGCCGATGAGGGGATCCTCTGGCAGATGTTTGGCCCCTTTGGTGCAGTTACCAATGTGAAAGTGATTCGTGATTTCAACACCAACAAGTGCAAAGGGTTTGGTTTTGTGACCATGACAAACTATGAAGAAGCTGCAATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACAGGTTTCCTTCAAAACCAACAAGTCCCACAAATAA grep NM_010485 2hrep3-Cexons-k-v3.bowtie | grep "T>C" | head 522_2800_2812_F3 - NM_010485_cds_0_0_chr8_4289600_r 17 GAGATCACACGAGTGGGATT IqqqqqqqqqqqqqqqqqqI 3 10:T>C,19:T>G #@ IL4, minlen18nt,filtered,collapsed reads,Hafner settings #bowtie input BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4F-4p3.bowtie=COLLAPSED ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniC I identified a total of 7961631 reads mapping to 2096976 Unique genomic coordinates Making up 114228 groups Consisting of 11874 clusters clusters-IL4F-4p3-HAFNER_APPROACH.txtC.bed.noIGG.bed 76 29 2206 170 6337 4606 2365 69 MINIMUM_CLUSTER_SIZE=13 ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniD I identified a total of 7961631 reads mapping to 2096976 Unique genomic coordinates Making up 114228 groups Consisting of 0 clusters MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=2 BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/saet/IL4F-s1.bowtie=COLLAPSED ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniE I identified a total of 7757884 reads mapping to 1527856 Unique genomic coordinates Making up 109788 groups Consisting of 9863 clusters ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniF I identified a total of 7961631 reads mapping to 2096976 Unique genomic coordinates Making up 114228 groups Consisting of 0 clusters ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniG I identified a total of 7757884 reads mapping to 1527856 Unique genomic coordinates Making up 109788 groups Consisting of 9863 clusters BANDWIDTH=7 ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniI I identified a total of 7757884 reads mapping to 1527856 Unique genomic coordinates Making up 109788 groups Consisting of 9863 clusters BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4F-4p.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniJ I identified a total of 7555309 reads mapping to 1953187 Unique genomic coordinates Making up 107964 groups Consisting of 11273 clusters awk -f clusters2stats1.awk clusters-IL4F-4p3-HAFNER_APPROACH.txtJ clusters-IL4F-4p3-HAFNER_APPROACH.txtJ ndata 11273 avg 1 sdev 0 se 0 modelocation_from_end ndata 11273 avg 0 sdev 0 se 0 ./get-region-stats1.sh clusters-IL4F-4p3-HAFNER_APPROACH.txtJ.bed clusters-IL4F-4p3-HAFNER_APPROACH.txtJ.bed 73 25 2253 159 6531 4742 2402 66 #@ http://sgjlab.org/seqtrimmap-quick-guide/ Dealing with the first color of the reads The first color from SOLiD reads is problematic since it is also defined by the linker sequence. In the paper associated to this script (Marco and Griffiths-Jones 2012) we described why we should keep this first color. By default this pipeline removes the first character of each read, as a result the first color is kept (Ben Langmead, personal communication). However, Bowtie also offers a native option to deal with first colors. The option ‘–col-keepends’ allow the program to keep the first and last color of the read. We recommend to run our pipeline with the defaults settings. However, you can force the program to use ‘–col-keepends’ Bowtie option by adding the flag ‘-k’. BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/saet/IL4-s1k.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniK I identified a total of 9193920 reads mapping to 1733854 Unique genomic coordinates Making up 125263 groups Consisting of 22633 clusters I identified a total of 9173867 reads mapping to 2418682 Unique genomic coordinates Making up 134037 groups Consisting of 28399 clusters 133 48 4321 306 13361 9272 4612 129 BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4-cutad-k.bowtie ./PARalyzer 5g sample.ini-IL4F-4p3-HAFNER_APPROACH.iniN I identified a total of 7860847 reads mapping to 2322785 Unique genomic coordinates Making up 125845 groups Consisting of 26584 clusters clusters-IL4F-4p3-HAFNER_APPROACH.txtN.bed 166 45 5117 381 15650 10938 5460 177 clusters-IL4F-4p3-HAFNER_APPROACH.txtN ndata 26584 avg 8.74187 sdev 2.11576 se 0.0129764 modelocation_from_end ndata 26584 avg 3.94888 sdev 1.83232 se 0.011238 reczko@max:/data/images/proton/DKlab/mr/parclip/raw/saet$ /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 10 -l 18 -v 2 -p 10 -i -o IL4-saet-cutad-k IL4-cutad.fasta /data/results/reference/mmu/mm9/bowtie1/mm9c Total reads mapped: 24999508, out of 11411305 (219.07%) ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniO I identified a total of 7762587 reads mapping to 1615138 Unique genomic coordinates Making up 115734 groups Consisting of 20752 clusters 114 46 3990 276 12175 8579 4242 122 clusters-IL4F-4p3-HAFNER_APPROACH.txtO ndata 20752 avg 8.75925 sdev 2.07752 se 0.0144217 modelocation_from_end ndata 20752 avg 3.92825 sdev 1.82136 se 0.0126434 reczko@max:/data/images/proton/DKlab/mr/parclip/raw$ /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 10 -l 18 -v 2 -p 10 -i -o IL4-k IL4/ugc_604_11_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/ BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4-k.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniL awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk clusters-IL4F-4p3-HAFNER_APPROACH.txtL |sort -k1,1 -k2,2n |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/prep-bed2bigbed1.awk > clusters-IL4F-4p3-HAFNER_APPROACH.txtL.bed ./get-region-stats1.sh clusters-IL4F-4p3-HAFNER_APPROACH.txtL.bed;cat regions.txt clusters-IL4F-4p3-HAFNER_APPROACH.txtL.bed 170 50 5465 398 16689 11710 5837 184 BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4-cutad-k-v3.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniS I identified a total of 9008801 reads mapping to 2751515 Unique genomic coordinates Making up 145664 groups Consisting of 52261 clusters 306 82 10240 718 30517 21754 10893 352 #@BEST BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/IL4-k-v3.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniQ I identified a total of 10478450 reads mapping to 2864673 Unique genomic coordinates Making up 155199 groups Consisting of 55554 clusters clusters-IL4F-4p3-HAFNER_APPROACH.txtQ.bed 318 86 10927 755 32429 23135 11617 372 awk -f clusters2stats1.awk clusters-IL4F-4p3-HAFNER_APPROACH.txtQ clusters-IL4F-4p3-HAFNER_APPROACH.txtQ ndata 55554 avgL 9.35891 sdev 1.98838 se 0.00843609 modelocation_from_end ndata 55554 avgP 4.24941 sdev 1.57634 se 0.00668792 #r8 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 20 -l 18 -v 2 -p 10 -i -o IL4-k-m20 IL4/ugc_604_11_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=5 ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.iniM #3mm @r4 /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 10 -l 18 -v 3 -p 10 -i -o IL4-k-v3 IL4/ugc_604_11_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c #cutadapt @r8 /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 18 -c -a CGCCTTGGCCGTACAGCAG IL4/ugc_604_11_F3.csfasta IL4/ugc_604_11_F3.QV.qual >& IL4/cutadapt.log > IL4/ugc_604_11_F3.fastq awk -f /data/results/tools/formats/fastq2fasta.awk IL4/ugc_604_11_F3.fastq > IL4/ugc_604_11_F3.fasta /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 10 -l 18 -v 2 -p 10 -i -o IL4-cutad-k IL4/ugc_604_11_F3.fasta /data/results/reference/mmu/mm9/bowtie1/mm9c #fix /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4ParalyzerM -k -m 10 -l 18 -v 2 -p 20 -i -o IL4-cutad-k-M IL4/ugc_604_11_F3.fasta /data/results/reference/mmu/mm9/bowtie1/mm9c #saet+cutadapt @r7 reczko@max:/data/images/proton/DKlab/mr/parclip/raw/saet$ /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -z -m 18 -c -a CGCCTTGGCCGTACAGCAG IL4/ugc_604_11_F3.csfasta IL4/ugc_604_11_F3.QV.qual >& IL4/cutadapt.log > IL4-cutad.fastq awk -f /data/results/tools/formats/fastq2fasta.awk IL4-cutad.fastq > IL4-cutad.fasta /data/images/proton/DKlab/mr/parclip/raw/SeqTrimMap4Paralyzer -k -m 10 -l 18 -v 2 -p 10 -i -o IL4-saet-cutad-k IL4-cutad.fasta /data/results/reference/mmu/mm9/bowtie1/mm9c #@ /data/images/proton/DKlab/mr/parclip/raw/IL4-3UTR-cutad-k-v3.bowtie_stats # KDE code /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1/ReadGroup.java , line122 while( currentPosition <= _endPosition) { // calculate background & signal _background[(int) (currentPosition - _startPosition)] = 1 / (numberOfNonConversions * Math.sqrt(2 * Math.pow(bandwidth,2) * Math.PI)); _signal[(int) (currentPosition - _startPosition)] = 1 / (numberOfConversions * Math.sqrt(2 * Math.pow(bandwidth,2) * Math.PI)); Iterator backgroundIt = _nonConversionMap.keySet().iterator(); while( backgroundIt.hasNext() ) { long position = backgroundIt.next(); if( _countMap.get(position) >= minimumCountPerNucleotideForKDE ) { _background[(int) (currentPosition - _startPosition)] += (_nonConversionMap.get(position) * Math.exp(-.5 * Math.pow(Math.abs(currentPosition - position) / bandwidth,2))); _signal[(int) (currentPosition - _startPosition)] += (_conversionMap.get(position) * Math.exp(-.5 * Math.pow(Math.abs(currentPosition - position) / bandwidth,2))); } } currentPosition++; } _background = pdfOfArray(_background); _signal = pdfOfArray(_signal); currentPosition = _startPosition; while( currentPosition <= _endPosition) { // calculate the KDE _kdeClassifier[(int) (currentPosition - _startPosition)] = _signal[(int) (currentPosition - _startPosition)] / ( _background[(int) (currentPosition - _startPosition)] + _signal[(int) (currentPosition - _startPosition)] ); currentPosition++; } #2h ./SeqTrimMap4Paralyzer -k -m 50 -l 18 -v 2 -p 10 -i -o 2hrep1-k 2hrep1/ugc_604_2_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c ./SeqTrimMap4Paralyzer -k -m 50 -l 18 -v 2 -p 10 -i -o 2hrep2-k 2hrep2/ugc_604_5_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c ./SeqTrimMap4Paralyzer -k -m 50 -l 18 -v 2 -p 20 -i -o 2hrep3-k-m50 2hrep3/ugc_604_8_F3.csfasta /data/results/reference/mmu/mm9/bowtie1/mm9c BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/2hrep3-k-m50.bowtie reczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5$ ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-2hrep3-HAFNER_APPROACH.iniP I identified a total of 6745649 reads mapping to 1570608 Unique genomic coordinates Making up 84748 groups Consisting of 17225 clusters reczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5$ awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk clusters-2hrep3-HAFNER_APPROACH.txtP |sort -k1,1 -k2,2n |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/prep-bed2bigbed1.awk > clusters-2hrep3-HAFNER_APPROACH.txtP.bed reczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5$ ./get-region-stats1.sh clusters-2hrep3-HAFNER_APPROACH.txtP.bed;cat regions.txt 97 33 3514 256 9863 7365 3748 132 BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/2hrep3-k-v3.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-2hrep3-HAFNER_APPROACH.iniQ I identified a total of 7528811 reads mapping to 1823654 Unique genomic coordinates Making up 95984 groups Consisting of 32592 clusters 216 56 6501 523 18847 13750 6983 265 BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/raw/2hrep3-cutad-k-v3.bowtie ./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-2hrep3-HAFNER_APPROACH.iniR I identified a total of 6035950 reads mapping to 1741593 Unique genomic coordinates Making up 89164 groups Consisting of 30173 clusters 195 53 6098 484 17345 12832 6538 250