/home/reczko/bin/faToTwoBit genome.fa genome.2bit At /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_with_sdev are the clusters with the sdev info added, format: EnsemblID = transcript ID ClusterID = unique ID for the cluster ReadCount = number of reads that overlap the cluster by at least 1 nucleotide ModeLocation = coordinate of the location with the highest signal / (signal + background) value ConversionLocationCount = number of unique location where at least 1 conversion occurred ConversionEventCount = total number of conversions that occurred within the cluster NonConversionEventCount = total number of possible conversion events that did not occur ModeScore = score of the highest signal / (signal + background) value AvgConversionPct = average conversion % of all conversions in the group containing the cluster GroupConversionEventCount = number of all conversions in the group containing the cluster SdevConversionPct = sdev of conversion % of all conversions in the group containing the cluster MaxConversionPct = max. conversion % of all conversions in the group containing the cluster #@ cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c source myp-all2.sh ./mergeMyPar.sh sh-clusters-0hrep1.txt2 ./mergeMyPar.sh sh-clusters-0hrep2.txt2 ./mergeMyPar.sh sh-clusters-0hrep3.txt2 ./mergeMyPar.sh sh-clusters-2hrep1.txt2 ./mergeMyPar.sh sh-clusters-2hrep2.txt2 ./mergeMyPar.sh sh-clusters-2hrep3.txt2 ./mergeMyPar.sh sh-clusters-6hrep1.txt2 ./mergeMyPar.sh sh-clusters-6hrep2.txt2 ./mergeMyPar.sh sh-clusters-6hrep3.txt2 ./mergeMyPar.sh sh-clusters-IGG.txt2 ./mergeMyPar.sh sh-clusters-IL4.txt2 ./mergeMyPar.sh sh-clusters-IFN.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-0hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-2hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep1.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep2.txt2 ./mergeMyPar-dist.sh sh-distributions-6hrep3.txt2 ./mergeMyPar-dist.sh sh-distributions-IGG.txt2 ./mergeMyPar-dist.sh sh-distributions-IL4.txt2 ./mergeMyPar-dist.sh sh-distributions-IFN.txt2 ./mergeMyPar.sh sh-groups-0hrep1.txt2 ./mergeMyPar.sh sh-groups-0hrep2.txt2 ./mergeMyPar.sh sh-groups-0hrep3.txt2 ./mergeMyPar.sh sh-groups-2hrep1.txt2 ./mergeMyPar.sh sh-groups-2hrep2.txt2 ./mergeMyPar.sh sh-groups-2hrep3.txt2 ./mergeMyPar.sh sh-groups-6hrep1.txt2 ./mergeMyPar.sh sh-groups-6hrep2.txt2 ./mergeMyPar.sh sh-groups-6hrep3.txt2 ./mergeMyPar.sh sh-groups-IGG.txt2 ./mergeMyPar.sh sh-groups-IL4.txt2 ./mergeMyPar.sh sh-groups-IFN.txt2 # paralyzer to bed, more than 5 TtoC for i in sh-clusters*txt2.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done wc sh-cl*csv 86812 86812 9729358 sh-clusters-0hrep1.txt2.csv 62547 62547 6868606 sh-clusters-0hrep2.txt2.csv 121933 121933 13731766 sh-clusters-0hrep3.txt2.csv 68003 68003 7595478 sh-clusters-2hrep1.txt2.csv 102904 102904 11533603 sh-clusters-2hrep2.txt2.csv 87220 87220 9724885 sh-clusters-2hrep3.txt2.csv 105863 105863 11806316 sh-clusters-6hrep1.txt2.csv 106653 106653 11982486 sh-clusters-6hrep2.txt2.csv 68318 68318 7519037 sh-clusters-6hrep3.txt2.csv 76698 76698 8446438 sh-clusters-IFN.txt2.csv 50480 50480 5524868 sh-clusters-IGG.txt2.csv 102487 102487 11464303 sh-clusters-IL4.txt2.csv 1039918 1039918 115927144 total ( # link files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-distributions*.txt2.csv do ln -s $i done ) #Diff to README*5.txt: no max conv cutoff, direct to bed #get avg+max TtoC (note >100% bug in /data/images/proton/DKlab/mr/parclip/paralyzer/README.PARalyzer_v1_1_src.txt, clipped to 100%) for i in sh-distributions*.txt2.csv do cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # intersect maxTtoC_gt_0.25 with clusters #/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_gt5tc/ sh-distributions-0hrep1.txt2.csv.avg.csv.bed sh-distributions-0hrep2.txt2.csv.avg.csv.bed sh-distributions-0hrep3.txt2.csv.avg.csv.bed sh-distributions-2hrep1.txt2.csv.avg.csv.bed sh-distributions-2hrep2.txt2.csv.avg.csv.bed sh-distributions-2hrep3.txt2.csv.avg.csv.bed sh-distributions-6hrep1.txt2.csv.avg.csv.bed sh-distributions-6hrep2.txt2.csv.avg.csv.bed sh-distributions-6hrep3.txt2.csv.avg.csv.bed sh-distributions-IFN.txt2.csv.avg.csv.bed sh-distributions-IGG.txt2.csv.avg.csv.bed sh-distributions-IL4.txt2.csv.avg.csv.bed #with sh-clusters-0hrep1.txt2.csv.bed.gt5tc sh-clusters-0hrep2.txt2.csv.bed.gt5tc sh-clusters-0hrep3.txt2.csv.bed.gt5tc sh-clusters-2hrep1.txt2.csv.bed.gt5tc sh-clusters-2hrep2.txt2.csv.bed.gt5tc sh-clusters-2hrep3.txt2.csv.bed.gt5tc sh-clusters-6hrep1.txt2.csv.bed.gt5tc sh-clusters-6hrep2.txt2.csv.bed.gt5tc sh-clusters-6hrep3.txt2.csv.bed.gt5tc sh-clusters-IFN.txt2.csv.bed.gt5tc sh-clusters-IGG.txt2.csv.bed.gt5tc sh-clusters-IL4.txt2.csv.bed.gt5tc # (note: /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk adds # AvgConversionPct_ConversionEventCount_SdevConversionPct MaxConversionPct # ) is bedtools intersect -a sh-clusters-0hrep1.txt2.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep2.txt2.csv.bed.gt5tc -b sh-distributions-0hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep3.txt2.csv.bed.gt5tc -b sh-distributions-0hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep1.txt2.csv.bed.gt5tc -b sh-distributions-2hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep2.txt2.csv.bed.gt5tc -b sh-distributions-2hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep3.txt2.csv.bed.gt5tc -b sh-distributions-2hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep1.txt2.csv.bed.gt5tc -b sh-distributions-6hrep1.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep2.txt2.csv.bed.gt5tc -b sh-distributions-6hrep2.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep3.txt2.csv.bed.gt5tc -b sh-distributions-6hrep3.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IFN.txt2.csv.bed.gt5tc -b sh-distributions-IFN.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IGG.txt2.csv.bed.gt5tc -b sh-distributions-IGG.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IL4.txt2.csv.bed.gt5tc -b sh-distributions-IL4.txt2.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2 # wc *gt0.25TtoC2 # ( # strand stats for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2 do echo $i awk -f ../strandStats1.awk $i done ) # separate plus strand results for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done 47599 285594 7056227 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 24767 148602 3677077 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 31785 190710 4635894 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 16927 101562 2476495 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 74112 444672 11012214 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 38643 231858 5750871 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 34310 205860 5084794 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 17915 107490 2661722 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 53376 320256 7972853 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 28475 170850 4259007 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 48118 288708 7097802 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 25024 150144 3696092 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 66856 401136 9807800 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2 35285 211710 5188576 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 57900 347400 8628586 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2 30735 184410 4591174 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 34502 207012 5054044 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2 18531 111186 2719910 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 39263 235578 5748824 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2 21148 126888 3099601 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 27247 163482 3938727 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2 14438 86628 2090142 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 61854 371124 9139616 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2 32881 197286 4867022 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus #remove IGG regions for i in sh-clusters-*csv3.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus -v -s |sort -k1,1 -k2,2n > $i.noIGG wc !$ done 24767 148602 3677077 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 19948 119688 2939926 sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 16927 101562 2476495 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 13863 83178 2016967 sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 38643 231858 5750871 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 33564 201384 4965089 sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 17915 107490 2661722 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 15196 91176 2247456 sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 28475 170850 4259007 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 23090 138540 3426265 sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 25024 150144 3696092 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 20739 124434 3043788 sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 35285 211710 5188576 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 29731 178386 4338642 sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 30735 184410 4591174 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 25506 153036 3781598 sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 18531 111186 2719910 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 15381 92286 2246270 sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 21148 126888 3099601 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 17154 102924 2500590 sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 14438 86628 2090142 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 0 0 0 sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 32881 197286 4867022 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus 25901 155406 3788710 sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG ## To detect enriched high-scoring clusters, get highest modscore that is not uniformly distributed Rscript get_modescore_hist_ncRNA1.r modscore_cutoff for IFN 0.975 0.0768265077960643 modscore_cutoff for IL4 0.981 0.0560981172482947 modscore_cutoff for 0h_all_reps 0.978 0.0790217963769725 modscore_cutoff for 2h_all_reps 0.976 0.0717192750332509 modscore_cutoff for 6h_all_reps 0.972 0.150049183949894 awk -v th=0.975 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5988 35928 911808 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.981 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5842 35052 888441 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.978 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5537 33222 855811 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.978 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5071 30426 773365 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.978 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 7510 45060 1146969 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.976 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5296 31776 822914 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.976 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5772 34632 889090 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.976 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 6120 36720 942707 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.972 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 8533 51198 1293391 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.972 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 6744 40464 1037070 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.972 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 5899 35394 901422 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt #prepare 2 of 3 intersection bedtools multiinter -i sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc bedtools multiinter -i sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc # (note: use ../get-multiintersect-details-union1.awk of 1 of 3=union) #add details to intersection: awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed #merge book-ended regions: ENSMUST00000000391 1861 1885 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + ENSMUST00000000391 1885 1888 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G152.1_CCCCATAGGTTCCTGTGCTTGGATGCATGGTCATCAGCCGGTG_34_1893_5_6_52_0.9992483503045925_0.266667_5_0.500000|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f2="sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -v f3="sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt" -f ../get-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score2.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.intersection-all.bed3.gt5tc.details.merged.maxAvg.bed # add annotation from /data/results/reference/mmu/mm9/ncRNA/Mus_musculus.NCBIM37.64-toMM9.headers cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_ncRNA for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/*flt*bed3.gt5tc.details.merged.maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_ncRNA.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates_ncRNA.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates_ncRNA.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1_no_replicates_ncRNA.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.csv cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_with_sdev cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/*bed3*anno.csv . cp /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/*csv3*anno.csv . Rscript mapTRtoGenome7.r for i in *2.bed do sort -k1,1 -k2,2n $i | awk -f correct-bed.awk > $i.srt /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb done track type=bigBed name="IFN clusters" description="IFN paralyzer" bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/IFN-mapped.bb awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 26930 80790 764853 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 22960 68880 651289 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 wc !$ 38714 116142 1096177 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 #intersect each replicate with merged regions # bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus -s bedtools intersect -a sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt2of3 -s > sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 #@ 3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 918 2754 26121 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 910 2730 25779 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 wc !$ 1518 4554 43124 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed3.gt5tc.gt3of3 mkdir /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_gt5tc_gt0.25TtoC2 cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/res/recommended_settings_gt5tc_gt0.25TtoC2 for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/*gt0.25TtoC2*of3 do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv # link source of merged files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1c/sh-clusters-?hrep?.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG do ln -s $i done