ENSMUST00000000254-chr6-+-128884399-128885000,+,356,387,G4.1,AGCCAGAGCTGTAGCTACAGCTTTCGTTCTTT,16,356,0.8201182708827222,2,4,121 (+ 128884399 356) 128884755 ENSMUST00000000254-chr6-+-128884399-128885000,+,331,389,128884732,1,0,128884739,3,0,128884743,3,0,128884748,3,0,128884749,3,0,128884751,2,0,128884764,16,0,128884766,16,0,128884770,16,0,128884776,15,0,128884777,14,3,128884778,14,1,128884781,10,0,128884782,8,0,128884784,6,0,128884785,6,0,128884786,4,0,128884788,1,0 at 128884732 -1 is a T for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-distributions-???*.txt2-1kIntrons.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-nTtoC-and-nReads2.awk $i > $i".nT2C" done # cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/ # /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk adds: ## 1..6(same) AvgConversionPct MaxConversionPct ConversionEventCount SdevConversionPct #to dist # /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk #paralyzer+ format #Chromosome,Strand,ClusterStart,ClusterEnd,ClusterID,InfoType,AvgConversionPct,MaxConversionPct,ConversionEventCount,SdevConversionPct #1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 #out: bed format # 1 2 3 4 _ (7) _ (9) _ (10) 5 _ 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,AvgConversionPct,ConversionEventCount,SdevConversionPct, MaxConversionPct ,Strand for i in sh-distributions-???*.txt2-1kIntrons.csv do echo $i cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # intersect maxTtoC_gt_0.25 with clusters # (note: /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk adds # AvgConversionPct_ConversionEventCount_SdevConversionPct MaxConversionPct # ) awk -v f3="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.best.per-gene-exons.bed #sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.best.per-gene-exons.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed reczko@fix:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d$ ls -lt sh-clusters-IL4.txt2* -rw-r--r-- 1 reczko users 899345 Jun 13 18:54 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed -rw-r--r-- 1 reczko users 803917 Jun 13 18:54 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed -rw-r--r-- 1 reczko users 139404 Jun 13 18:54 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed -rw-r--r-- 1 reczko users 669119 Jun 13 18:54 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed -rw-r--r-- 1 reczko users 1494376 Jun 13 18:52 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed -rw-r--r-- 1 reczko users 297490 Jun 13 20:29 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.ncRNA -rw-r--r-- 1 reczko users 711960 Jun 13 20:29 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.cds.bed -rw-r--r-- 1 reczko users 121227 Jun 13 20:29 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.5utr.bed -rw-r--r-- 1 reczko users 579048 Jun 13 20:29 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon.3utr.bed -rw-r--r-- 1 reczko users 3102654 Jun 13 20:28 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.intron -rw-r--r-- 1 reczko users 1605162 Jun 13 20:28 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2.exon -rw-r--r-- 1 reczko users 4707816 Jun 13 20:28 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.anno2 -rw-r--r-- 1 reczko users 4640998 Jun 13 20:27 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.ded.genomic2.bed.collapsed.best.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed 5353 3utr 783 5utr 5818 cds 997 other 1st max: 3utr 663 5utr 334 cds 0 2nd max: 3utr 0 5utr 21 cds 976 clusters in introns (both PGC/ncRNA): 1111 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.intron sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.per-gene.bed:chr16 56188661 56188751 ENSMUST00000089360_G7004.1_66_814_4_13_484_0.9999706665542395_0.198641_8_0.343782 1000 + Senp7 protein_coding mi1 m2 mi22 m20