At /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_with_sdev are the clusters with the sdev info added, format: EnsemblID = transcript ID ClusterID = unique ID for the cluster ReadCount = number of reads that overlap the cluster by at least 1 nucleotide ModeLocation = coordinate of the location with the highest signal / (signal + background) value ConversionLocationCount = number of unique location where at least 1 conversion occurred ConversionEventCount = total number of conversions that occurred within the cluster NonConversionEventCount = total number of possible conversion events that did not occur ModeScore = score of the highest signal / (signal + background) value AvgConversionPct = average conversion % of all conversions in the group containing the cluster GroupConversionEventCount = number of all conversions in the group containing the cluster SdevConversionPct = sdev of conversion % of all conversions in the group containing the cluster MaxConversionPct = max. conversion % of all conversions in the group containing the cluster #@ input: shrimp alignments at ls -lt /data/images/proton/DKlab/mr/parclip/shrimp/*bam.md.bam -rw-r--r-- 1 reczko users 2836636824 Jan 3 18:38 /data/images/proton/DKlab/mr/parclip/shrimp/6hrep3-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 5461722700 Jan 3 18:24 /data/images/proton/DKlab/mr/parclip/shrimp/6hrep2-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 6391396884 Jan 3 18:01 /data/images/proton/DKlab/mr/parclip/shrimp/6hrep1-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 4197518555 Jan 3 17:37 /data/images/proton/DKlab/mr/parclip/shrimp/2hrep3-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 3414659301 Jan 3 17:21 /data/images/proton/DKlab/mr/parclip/shrimp/2hrep2-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 3374490008 Jan 3 17:08 /data/images/proton/DKlab/mr/parclip/shrimp/2hrep1-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 6696981581 Jan 3 16:53 /data/images/proton/DKlab/mr/parclip/shrimp/0hrep3-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 2538372179 Jan 3 16:30 /data/images/proton/DKlab/mr/parclip/shrimp/0hrep2-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 4813349514 Jan 3 16:19 /data/images/proton/DKlab/mr/parclip/shrimp/0hrep1-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 5681200731 Jan 3 15:39 /data/images/proton/DKlab/mr/parclip/shrimp/IL4-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 2463158096 Jan 3 15:19 /data/images/proton/DKlab/mr/parclip/shrimp/IGG-15mMm.bam.md.bam -rw-r--r-- 1 reczko users 2774623092 Jan 3 15:09 /data/images/proton/DKlab/mr/parclip/shrimp/IFN-15mMm.bam.md.bam #@ cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b # step 1+2 of README-paralyze3.txt # paralyzer to bed, more than 5 TtoC #out: bed format # 1 2 3 4 5 6 7 8 9 10 11 12 #Chromosome,ClusterStart,ClusterEnd,ClusterID,ClusterSequence,ReadCount,ModeLocation,ConversionLocationCount,ConversionEventCount,NonConversionEventCount,ModeScore,Strand for i in sh-clusters*txt2.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done # link files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-distributions*.txt2.csv do ln -s $i done #Diff to README*5.txt: no max conv cutoff, direct to bed #get avg+max TtoC (note >100% bug in /data/images/proton/DKlab/mr/parclip/paralyzer/README.PARalyzer_v1_1_src.txt, clipped to 100%) for i in sh-distributions*.txt2.csv do cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # intersect maxTtoC_gt_0.25 with clusters #/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc/ sh-distributions-0hrep1.txt2.csv.avg.csv.bed sh-distributions-0hrep2.txt2.csv.avg.csv.bed sh-distributions-0hrep3.txt2.csv.avg.csv.bed sh-distributions-2hrep1.txt2.csv.avg.csv.bed sh-distributions-2hrep2.txt2.csv.avg.csv.bed sh-distributions-2hrep3.txt2.csv.avg.csv.bed sh-distributions-6hrep1.txt2.csv.avg.csv.bed sh-distributions-6hrep2.txt2.csv.avg.csv.bed sh-distributions-6hrep3.txt2.csv.avg.csv.bed sh-distributions-IFN.txt2.csv.avg.csv.bed sh-distributions-IGG.txt2.csv.avg.csv.bed sh-distributions-IL4.txt2.csv.avg.csv.bed #with sh-clusters-0hrep1.txt2.csv.bed.gt5tc sh-clusters-0hrep2.txt2.csv.bed.gt5tc sh-clusters-0hrep3.txt2.csv.bed.gt5tc sh-clusters-2hrep1.txt2.csv.bed.gt5tc sh-clusters-2hrep2.txt2.csv.bed.gt5tc sh-clusters-2hrep3.txt2.csv.bed.gt5tc sh-clusters-6hrep1.txt2.csv.bed.gt5tc sh-clusters-6hrep2.txt2.csv.bed.gt5tc sh-clusters-6hrep3.txt2.csv.bed.gt5tc sh-clusters-IFN.txt2.csv.bed.gt5tc sh-clusters-IGG.txt2.csv.bed.gt5tc sh-clusters-IL4.txt2.csv.bed.gt5tc # (note: /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk adds # AvgConversionPct_ConversionEventCount_SdevConversionPct MaxConversionPct # ) is bedtools intersect -a sh-clusters-0hrep1.txt2.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep2.txt2.csv.bed.gt5tc -b sh-distributions-0hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep3.txt2.csv.bed.gt5tc -b sh-distributions-0hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep1.txt2.csv.bed.gt5tc -b sh-distributions-2hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep2.txt2.csv.bed.gt5tc -b sh-distributions-2hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep3.txt2.csv.bed.gt5tc -b sh-distributions-2hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep1.txt2.csv.bed.gt5tc -b sh-distributions-6hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep2.txt2.csv.bed.gt5tc -b sh-distributions-6hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep3.txt2.csv.bed.gt5tc -b sh-distributions-6hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IFN.txt2.csv.bed.gt5tc -b sh-distributions-IFN.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IGG.txt2.csv.bed.gt5tc -b sh-distributions-IGG.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IL4.txt2.csv.bed.gt5tc -b sh-distributions-IL4.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2 # wc *gt0.25TtoC2 # # strand stats for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2 do echo $i awk -f ../strandStats1.awk $i done # separate plus strand results for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done 193393 1160358 29124166 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 101515 609090 15297865 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 106635 639810 15885435 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 56994 341964 8509777 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 290852 1745112 44230590 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 155385 932310 23682988 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 155713 934278 23431066 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 84327 505962 12716714 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 167355 1004130 25385298 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 90695 544170 13789543 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 178851 1073106 26828707 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 93054 558324 13960288 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 271231 1627386 40462705 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 147794 886764 22102104 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 238510 1431060 36279717 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 128111 768666 19518144 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 114574 687444 17133980 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 64028 384168 9599977 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 136129 816774 20357157 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2 73328 439968 10991821 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 96009 576054 14127569 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2 50953 305718 7511449 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 265482 1592892 40214310 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2 141457 848742 21447234 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus #remove IGG regions for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus -v -s |sort -k1,1 -k2,2n > $i.noIGG wc !$ done 101515 609090 14384230 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 90630 543780 12803308 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 56994 341964 7996831 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 51226 307356 7172075 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 155385 932310 22284523 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 145758 874548 20852048 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 84327 505962 11957771 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 71326 427956 10065082 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 90695 544170 12973288 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 82695 496170 11801714 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 93054 558324 13122802 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 84938 509628 11946194 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 147794 886764 20771958 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 135573 813438 18989369 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 128111 768666 18365145 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 117245 703470 16758381 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 64028 384168 9023725 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 58142 348852 8175812 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 73328 439968 10331869 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 64976 389856 9129517 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG #ok <0.25 50953 305718 7052872 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 0 0 0 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 141457 848742 20174121 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 124109 744654 17618125 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG ## To detect enriched high-scoring clusters, get highest modscore that is not uniformly distributed Rscript /data/images/proton/DKlab/mr/parclip/paralyzer/get_modescore_hist1.r modscore_cutoff for IFN 0.957 0.1134793327234 modscore_cutoff for IL4 0.969 0.216298175051088 modscore_cutoff for 0h_all_reps 0.959 0.100461110973894 modscore_cutoff for 2h_all_reps 0.96 0.207257556532147 modscore_cutoff for 6h_all_reps 0.955 0.179819664237969 awk -v th=0.957 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 30743 184458 4532268 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.969 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 44558 267348 6689577 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.959 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 34902 209412 5519193 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.959 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 22841 137046 3570050 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.959 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 60679 364074 9652082 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.96 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 30775 184650 4839359 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.96 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 39715 238290 6330032 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.96 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 33988 203928 5339585 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.955 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 54740 328440 8512192 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.955 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 49078 294468 7823315 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.955 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 27015 162090 4242020 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt #prepare 2 of 3 intersection bedtools multiinter -i sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc # (note: use ../get-multiintersect-details-union1.awk of 1 of 3=union) #add details to intersection: awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed #merge book-ended regions: ENSMUST00000000391 1861 1885 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + ENSMUST00000000391 1885 1888 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G152.1_CCCCATAGGTTCCTGTGCTTGGATGCATGGTCATCAGCCGGTG_34_1893_5_6_52_0.9992483503045925_0.266667_5_0.500000|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed # filter protein_coding_genes and add annotation from /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9.headers cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/ for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*flt*bed3.gt5tc.details.merged.maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_with_sdev cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*bed3*anno.csv . cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*csv3*anno.csv . Rscript mapTRtoGenome7.r for i in *2.bed do sort -k1,1 -k2,2n $i | awk -f correct-bed.awk > $i.srt /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb done track type=bigBed name="IFN clusters" description="IFN paralyzer" bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/IFN-mapped.bb #@ per sample, unfiltered cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings3_per_sample for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-*.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done Rscript get_modescore_hist4.r modscore_cutoff for IFN 0.957 0.1134793327234 modscore_cutoff for IL4 0.969 0.216298175051088 modscore_cutoff for 0h_rep1 0.962 0.0560802059206641 modscore_cutoff for 0h_rep2 0.963 0.0526080540848064 modscore_cutoff for 0h_rep3 0.961 0.120913954333128 modscore_cutoff for 2h_rep1 0.965 0.139925455405083 modscore_cutoff for 2h_rep2 0.964 0.0715323650379684 modscore_cutoff for 2h_rep3 0.962 0.149587618299848 modscore_cutoff for 6h_rep1 0.966 0.0579317957861902 modscore_cutoff for 6h_rep2 0.967 0.0800039136567543 modscore_cutoff for 6h_rep3 0.962 0.0930348364107952 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 26930 80790 764853 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 22960 68880 651289 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 38714 116142 1096177 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 #intersect each replicate with merged regions # bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus -s bedtools intersect -a sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 #@ 3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 918 2754 26121 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 910 2730 25779 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 1518 4554 43124 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 mkdir /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2 cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2 for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*gt0.25TtoC2*of3 do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv # link source of merged files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-?hrep?.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG do ln -s $i done