At /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_with_sdev are the clusters with the sdev info added, format: EnsemblID = transcript ID ClusterID = unique ID for the cluster ReadCount = number of reads that overlap the cluster by at least 1 nucleotide ModeLocation = coordinate of the location with the highest signal / (signal + background) value ConversionLocationCount = number of unique location where at least 1 conversion occurred ConversionEventCount = total number of conversions that occurred within the cluster NonConversionEventCount = total number of possible conversion events that did not occur ModeScore = score of the highest signal / (signal + background) value AvgConversionPct = average conversion % of all conversions in the group containing the cluster GroupConversionEventCount = number of all conversions in the group containing the cluster SdevConversionPct = sdev of conversion % of all conversions in the group containing the cluster MaxConversionPct = max. conversion % of all conversions in the group containing the cluster #@ cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c ./myp.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-mRNA/sh.2hrep1.ini &> 2hrep1.log2 ./myp.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-mRNA/sh.2hrep2.ini &> 2hrep2.mRNA-log2 ./myp.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-mRNA/sh.2hrep3.ini &> 2hrep3.mRNA-log2 ./mergeMyPar-dist-mRNA.sh sh-distributions-2hrep1.txt2 ./mergeMyPar-dist-mRNA.sh sh-distributions-2hrep2.txt2 ./mergeMyPar-dist-mRNA.sh sh-distributions-2hrep3.txt2 ./mergeMyPar-mRNA.sh sh-groups-2hrep1.txt2 ./mergeMyPar-mRNA.sh sh-groups-2hrep2.txt2 ./mergeMyPar-mRNA.sh sh-groups-2hrep3.txt2 ./myp.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-mRNA/sh.0hrep1.ini &> 0hrep1.log-mRNA & ./mergeMyPar-mRNA.sh sh-groups-0hrep1.txt2 ./mergeMyPar-dist-mRNA.sh sh-distributions-0hrep1.txt2 ./mergeMyPar-mRNA.sh sh-clusters-0hrep1.txt2 ./myp.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-mRNA/sh.IL4.ini &> IL4.log-mRNA & ./mergeMyPar-mRNA.sh sh-clusters-IL4.txt2 ./mergeMyPar-dist-mRNA.sh sh-distributions-IL4.txt2 ./mergeMyPar-mRNA.sh sh-groups-IL4.txt2 ( source myp-all2-mRNA.sh ./mergeMyPar2-mRNA.sh ./mergeMyPar.sh sh-clusters-0hrep2.txt2 ./mergeMyPar.sh sh-clusters-0hrep3.txt2 ./mergeMyPar.sh sh-clusters-2hrep1.txt2 ./mergeMyPar.sh sh-clusters-2hrep2.txt2 ./mergeMyPar.sh sh-clusters-2hrep3.txt2 ./mergeMyPar.sh sh-clusters-6hrep1.txt2 ./mergeMyPar.sh sh-clusters-6hrep2.txt2 ./mergeMyPar.sh sh-clusters-6hrep3.txt2 ./mergeMyPar.sh sh-clusters-IGG.txt2 ./mergeMyPar.sh sh-clusters-IL4.txt2 ./mergeMyPar.sh sh-clusters-IFN.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-IGG.txt2 ./mergeMyPar-dist.sh sh-distributions-IL4.txt2 ./mergeMyPar-dist.sh sh-distributions-IFN.txt2 ./mergeMyPar.sh sh-groups-0hrep1.txt2 ./mergeMyPar.sh sh-groups-0hrep2.txt2 ./mergeMyPar.sh sh-groups-0hrep3.txt2 ./mergeMyPar.sh sh-groups-2hrep1.txt2 ./mergeMyPar.sh sh-groups-2hrep2.txt2 ./mergeMyPar.sh sh-groups-2hrep3.txt2 ./mergeMyPar.sh sh-groups-6hrep1.txt2 ./mergeMyPar.sh sh-groups-6hrep2.txt2 ./mergeMyPar.sh sh-groups-6hrep3.txt2 ./mergeMyPar.sh sh-groups-IGG.txt2 ./mergeMyPar.sh sh-groups-IL4.txt2 ./mergeMyPar.sh sh-groups-IFN.txt2 ) ln -s ../PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv sh-clusters-0hrep2.txt2-mRNA.csv ln -s ../PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv sh-clusters-0hrep3.txt2-mRNA.csv ln -s ../PARalyzer_v1_1b/sh-distributions-0hrep2.txt2.csv sh-distributions-0hrep2.txt2-mRNA.csv ln -s ../PARalyzer_v1_1b/sh-distributions-0hrep3.txt2.csv sh-distributions-0hrep3.txt2-mRNA.csv # paralyzer to bed, more than 5 TtoC for i in sh-clusters*txt2-mRNA.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done # link files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-distributions*.txt2.csv do ln -s $i done #Diff to README*5.txt: no max conv cutoff, direct to bed #get avg+max TtoC (note >100% bug in /data/images/proton/DKlab/mr/parclip/paralyzer/README.PARalyzer_v1_1_src.txt, clipped to 100%) for i in sh-distributions*.txt2-mRNA.csv do cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # intersect maxTtoC_gt_0.25 with clusters #/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc/ sh-distributions-0hrep1.txt2.csv.avg.csv.bed sh-distributions-0hrep2.txt2.csv.avg.csv.bed sh-distributions-0hrep3.txt2.csv.avg.csv.bed sh-distributions-2hrep1.txt2.csv.avg.csv.bed sh-distributions-2hrep2.txt2.csv.avg.csv.bed sh-distributions-2hrep3.txt2.csv.avg.csv.bed sh-distributions-6hrep1.txt2.csv.avg.csv.bed sh-distributions-6hrep2.txt2.csv.avg.csv.bed sh-distributions-6hrep3.txt2.csv.avg.csv.bed sh-distributions-IFN.txt2.csv.avg.csv.bed sh-distributions-IGG.txt2.csv.avg.csv.bed sh-distributions-IL4.txt2.csv.avg.csv.bed #with sh-clusters-0hrep1.txt2.csv.bed.gt5tc sh-clusters-0hrep2.txt2.csv.bed.gt5tc sh-clusters-0hrep3.txt2.csv.bed.gt5tc sh-clusters-2hrep1.txt2.csv.bed.gt5tc sh-clusters-2hrep2.txt2.csv.bed.gt5tc sh-clusters-2hrep3.txt2.csv.bed.gt5tc sh-clusters-6hrep1.txt2.csv.bed.gt5tc sh-clusters-6hrep2.txt2.csv.bed.gt5tc sh-clusters-6hrep3.txt2.csv.bed.gt5tc sh-clusters-IFN.txt2.csv.bed.gt5tc sh-clusters-IGG.txt2.csv.bed.gt5tc sh-clusters-IL4.txt2.csv.bed.gt5tc # (note: /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk adds # AvgConversionPct_ConversionEventCount_SdevConversionPct MaxConversionPct # ) is bedtools intersect -a sh-clusters-0hrep1.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep1.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep2.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-0hrep2.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep2.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep3.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-0hrep3.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep3.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep1.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-2hrep1.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep1.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep2.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-2hrep2.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep2.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep3.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-2hrep3.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep3.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep1.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-6hrep1.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep1.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep2.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-6hrep2.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep2.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep3.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-6hrep3.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep3.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IFN.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-IFN.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IFN.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IGG.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-IGG.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IGG.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IL4.txt2-mRNA.csv.bed.gt5tc -b sh-distributions-IL4.txt2-mRNA.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IL4.txt2-mRNA.csv3.bed.gt5tc.gt0.25TtoC2 # wc *gt0.25TtoC2 # # strand stats for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2 do echo $i awk -f ../strandStats1.awk $i done # separate plus strand results for i in sh-clusters-*mRNA.csv3.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done 193393 1160358 29124166 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 101515 609090 15297865 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 106635 639810 15885435 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 56994 341964 8509777 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 290852 1745112 44230590 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 155385 932310 23682988 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 155713 934278 23431066 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 84327 505962 12716714 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 167355 1004130 25385298 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 90695 544170 13789543 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 178851 1073106 26828707 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 93054 558324 13960288 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 271231 1627386 40462705 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 147794 886764 22102104 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 238510 1431060 36279717 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 128111 768666 19518144 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 114574 687444 17133980 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 64028 384168 9599977 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 136129 816774 20357157 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2 73328 439968 10991821 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 96009 576054 14127569 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2 50953 305718 7511449 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 265482 1592892 40214310 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2 141457 848742 21447234 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus #remove IGG regions for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus -v -s |sort -k1,1 -k2,2n > $i.noIGG wc !$ done 101515 609090 14384230 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 90630 543780 12803308 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 56994 341964 7996831 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 51226 307356 7172075 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 155385 932310 22284523 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 145758 874548 20852048 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 84327 505962 11957771 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 71326 427956 10065082 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 90695 544170 12973288 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 82695 496170 11801714 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 93054 558324 13122802 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 84938 509628 11946194 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 147794 886764 20771958 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 135573 813438 18989369 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 128111 768666 18365145 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 117245 703470 16758381 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 64028 384168 9023725 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 58142 348852 8175812 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 73328 439968 10331869 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 64976 389856 9129517 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG #ok <0.25 50953 305718 7052872 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 0 0 0 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 141457 848742 20174121 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 124109 744654 17618125 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG ## To detect enriched high-scoring clusters, get highest modscore that is not uniformly distributed Rscript /data/images/proton/DKlab/mr/parclip/paralyzer/get_modescore_hist1.r modscore_cutoff for IFN 0.957 0.1134793327234 modscore_cutoff for IL4 0.969 0.216298175051088 modscore_cutoff for 0h_all_reps 0.961 0.120913954333128 modscore_cutoff for 2h_all_reps 0.962 0.149587618299848 modscore_cutoff for 6h_all_reps 0.962 0.0930348364107952 awk -v th=0.957 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 30743 184458 4532268 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.969 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 44558 267348 6689577 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.961 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 34499 206994 5148636 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.961 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 22598 135588 3331047 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.961 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 59867 359202 8990670 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 30436 182616 4515502 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 39295 235770 5914510 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 33609 201654 4980914 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 52185 313110 7664397 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 47072 282432 7097941 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 26115 156690 3875732 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt #prepare 2 of 3 intersection bedtools multiinter -i sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc # (note: use ../get-multiintersect-details-union1.awk of 1 of 3=union) #add details to intersection: awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed #merge book-ended regions: ENSMUST00000000391 1861 1885 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + ENSMUST00000000391 1885 1888 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G152.1_CCCCATAGGTTCCTGTGCTTGGATGCATGGTCATCAGCCGGTG_34_1893_5_6_52_0.9992483503045925_0.266667_5_0.500000|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed # filter protein_coding_genes and add annotation from /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9.headers cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/ for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*flt*bed3.gt5tc.details.merged.maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_with_sdev cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*bed3*anno.csv . cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*csv3*anno.csv . Rscript mapTRtoGenome7.r for i in *2.bed do sort -k1,1 -k2,2n $i | awk -f correct-bed.awk > $i.srt /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb done track type=bigBed name="IFN clusters" description="IFN paralyzer" bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/IFN-mapped.bb