cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b # step 1+2 of README-paralyze3.txt # paralyzer to bed, more than 5 TtoC for i in sh-clusters*txt2.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done # link files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-distributions*.txt2.csv do ln -s $i done #Diff to README*5.txt: no max conv cutoff, direct to bed #get avg+max TtoC (note >100% bug in /data/images/proton/DKlab/mr/parclip/paralyzer/README.PARalyzer_v1_1_src.txt, clipped to 100%) for i in sh-distributions*.txt2.csv do cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed.awk > $i".avg.csv2.bed" done # intersect maxTtoC_gt_0.25 with clusters #/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc/ sh-distributions-0hrep1.txt2.csv.avg.csv.bed sh-distributions-0hrep2.txt2.csv.avg.csv.bed sh-distributions-0hrep3.txt2.csv.avg.csv.bed sh-distributions-2hrep1.txt2.csv.avg.csv.bed sh-distributions-2hrep2.txt2.csv.avg.csv.bed sh-distributions-2hrep3.txt2.csv.avg.csv.bed sh-distributions-6hrep1.txt2.csv.avg.csv.bed sh-distributions-6hrep2.txt2.csv.avg.csv.bed sh-distributions-6hrep3.txt2.csv.avg.csv.bed sh-distributions-IFN.txt2.csv.avg.csv.bed sh-distributions-IGG.txt2.csv.avg.csv.bed sh-distributions-IL4.txt2.csv.avg.csv.bed #with sh-clusters-0hrep1.txt2.csv.bed.gt5tc sh-clusters-0hrep2.txt2.csv.bed.gt5tc sh-clusters-0hrep3.txt2.csv.bed.gt5tc sh-clusters-2hrep1.txt2.csv.bed.gt5tc sh-clusters-2hrep2.txt2.csv.bed.gt5tc sh-clusters-2hrep3.txt2.csv.bed.gt5tc sh-clusters-6hrep1.txt2.csv.bed.gt5tc sh-clusters-6hrep2.txt2.csv.bed.gt5tc sh-clusters-6hrep3.txt2.csv.bed.gt5tc sh-clusters-IFN.txt2.csv.bed.gt5tc sh-clusters-IGG.txt2.csv.bed.gt5tc sh-clusters-IL4.txt2.csv.bed.gt5tc #is bedtools intersect -a sh-clusters-0hrep1.txt2.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk > sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep2.txt2.csv.bed.gt5tc -b sh-distributions-0hrep2.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep3.txt2.csv.bed.gt5tc -b sh-distributions-0hrep3.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep1.txt2.csv.bed.gt5tc -b sh-distributions-2hrep1.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep2.txt2.csv.bed.gt5tc -b sh-distributions-2hrep2.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep3.txt2.csv.bed.gt5tc -b sh-distributions-2hrep3.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep1.txt2.csv.bed.gt5tc -b sh-distributions-6hrep1.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep2.txt2.csv.bed.gt5tc -b sh-distributions-6hrep2.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep3.txt2.csv.bed.gt5tc -b sh-distributions-6hrep3.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IFN.txt2.csv.bed.gt5tc -b sh-distributions-IFN.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IGG.txt2.csv.bed.gt5tc -b sh-distributions-IGG.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IL4.txt2.csv.bed.gt5tc -b sh-distributions-IL4.txt2.csv.avg.csv2.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct1.awk> sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2 # wc *gt0.25TtoC2 # # strand stats for i in sh-clusters-*csv2.bed.gt5tc.gt0.25TtoC2 do echo $i awk -f ../strandStats1.awk $i done sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 193393 plus_strand 101515 52.4916 % minus_strand 91878 47.5084 % sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 106635 plus_strand 56994 53.4477 % minus_strand 49641 46.5523 % sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 290852 plus_strand 155385 53.4241 % minus_strand 135467 46.5759 % sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 155713 plus_strand 84327 54.1554 % minus_strand 71386 45.8446 % sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 167355 plus_strand 90695 54.1932 % minus_strand 76660 45.8068 % sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 178851 plus_strand 93054 52.0288 % minus_strand 85797 47.9712 % sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 271231 plus_strand 147794 54.4901 % minus_strand 123437 45.5099 % sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 238510 plus_strand 128111 53.7131 % minus_strand 110399 46.2869 % sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 114574 plus_strand 64028 55.8835 % minus_strand 50546 44.1165 % sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 136129 plus_strand 73328 53.8666 % minus_strand 62801 46.1334 % sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 96009 plus_strand 50953 53.0711 % minus_strand 45056 46.9289 % sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2 nclusters 265482 plus_strand 141457 53.2831 % minus_strand 124025 46.7169 % # separate plus strand results for i in sh-clusters-*csv2.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done 193393 1160358 27383629 sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 101515 609090 14384230 sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 106635 639810 14925720 sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 56994 341964 7996831 sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 290852 1745112 41612922 sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 155385 932310 22284523 sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 155713 934278 22029649 sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 84327 505962 11957771 sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 167355 1004130 23879103 sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 90695 544170 12973288 sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 178851 1073106 25219048 sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 93054 558324 13122802 sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 271231 1627386 38021626 sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2 147794 886764 20771958 sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 238510 1431060 34133127 sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2 128111 768666 18365145 sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 114574 687444 16102814 sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2 64028 384168 9023725 sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 136129 816774 19131996 sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2 73328 439968 10331869 sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 96009 576054 13263488 sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2 50953 305718 7052872 sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 265482 1592892 37824972 sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2 141457 848742 20174121 sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus #remove IGG regions for i in sh-clusters-*csv2.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus -v -s |sort -k1,1 -k2,2n > $i.noIGG wc !$ done 101515 609090 14384230 sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 90630 543780 12803308 sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 56994 341964 7996831 sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 51226 307356 7172075 sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 155385 932310 22284523 sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 145758 874548 20852048 sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 84327 505962 11957771 sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 71326 427956 10065082 sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 90695 544170 12973288 sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 82695 496170 11801714 sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 93054 558324 13122802 sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 84938 509628 11946194 sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 147794 886764 20771958 sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 135573 813438 18989369 sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 128111 768666 18365145 sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 117245 703470 16758381 sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 64028 384168 9023725 sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 58142 348852 8175812 sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 73328 439968 10331869 sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 64976 389856 9129517 sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 50953 305718 7052872 sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 0 0 0 sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG 141457 848742 20174121 sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus 124109 744654 17618125 sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG #2 of 3 intersection bedtools multiinter -i sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc bedtools multiinter -i sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc bedtools multiinter -i sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc #add details to intersection: awk -v f1="sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f2="sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f3="sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.bed #merge book-ended regions: ENSMUST00000000391 1861 1885 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + ENSMUST00000000391 1885 1888 G203.2_CTGTGGTGGTGTGAATGAGAACGGCCCC_82_1889_3_17_301_0.9799596000097828_0.073161_9_0.220339|G152.1_CCCCATAGGTTCCTGTGCTTGGATGCATGGTCATCAGCCGGTG_34_1893_5_6_52_0.9992483503045925_0.266667_5_0.500000|G245.1_CTGTGGTGGTGTGAATGAGAACGGCCCC_53_1889_4_14_203_0.9728772245435562_0.114361_4_0.200000 + awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.bed #get max scoring cluster awk -f ../get-max-intersect-score1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f2="sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f3="sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -f ../get-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score1.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f2="sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f3="sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -f ../get-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score1.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed awk -v f1="sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f2="sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -v f3="sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG" -f ../get-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc |awk -f ../get-merged-multiintersect-details1.awk | awk -f ../get-max-intersect-score1.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed # filter protein_coding_genes and add annotation from /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9.headers cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/ for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done Rscript mapTRtoGenome7.r for i in *2.bed do sort -k1,1 -k2,2n $i | awk -f correct-bed.awk > $i.srt /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb done track type=bigBed name="IFN clusters" description="IFN paralyzer" bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/IFN-mapped.bb awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 wc !$ 26930 80790 764853 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 wc !$ 22960 68880 651289 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 wc !$ 38714 116142 1096177 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 #intersect each replicate with merged regions # bedtools intersect -a $i -b sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus -s bedtools intersect -a sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-0hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-0hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-0hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-2hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-2hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-2hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-6hrep1.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-6hrep2.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 bedtools intersect -a sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG -b sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt2of3 -s > sh-clusters-6hrep3.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.gt2of3 #@ 3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 wc !$ 918 2754 26121 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 wc !$ 910 2730 25779 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 awk -v th=2 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter-gt-2of3samples.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc | bedtools merge -i - > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 wc !$ 1518 4554 43124 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.gt3of3 mkdir /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2 cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2 for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/*gt0.25TtoC2*of3 do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno.csv ln -s !$ done awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IGG.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate1.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG |sort -k1,1 -k4,4 -k2,2n -k5,5n > sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv # link source of merged files for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-?hrep?.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG do ln -s $i done