EnsemblID = transcript ID ClusterID = unique ID for the cluster ReadCount = number of reads that overlap the cluster by at least 1 nucleotide ModeLocation = coordinate of the location with the highest signal / (signal + background) value ConversionLocationCount = number of unique location where at least 1 conversion occurred ConversionEventCount = total number of conversions that occurred within the cluster NonConversionEventCount = total number of possible conversion events that did not occur ModeScore = score of the highest signal / (signal + background) value AvgConversionPct = average conversion % of all conversions in the group containing the cluster GroupConversionEventCount = number of all conversions in the group containing the cluster SdevConversionPct = sdev of conversion % of all conversions in the group containing the cluster MaxConversionPct = max. conversion % of all conversions in the group containing the cluster conversionFreq = ConversionEventCount / (ConversionEventCount + NonConversionEventCount) # filter: conversionFreq >0.05 && (ReadCount>=5) cd /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/ /home/reczko/bin/faToTwoBit mm9-mRNA-introns1k.fa mm9-mRNA-introns1k.2bit for i in IFN-15mMm.bam for i in 0hrep3-15mMm1kIntron.bam for i in 6hrep3-15mMm1kIntron.bam for i in IGG-15mMm.bam for i in 2hrep?-15mMm1kIntron.bam do echo $i # add MD flags for bowtie format # /data/results/tools/samtools/samtools-1.3/samtools calmd -b $i /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/Mus_musculus.NCBIM37.64-toMM9.max-intron-1k >& /dev/null > foo.bam2 /data/results/tools/samtools/samtools-1.3/samtools calmd -b $i /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.fa >& /dev/null > foo.bam2 samtools view -h foo.bam2 | awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/sam2bowtie.awk > $i.md.bt mv foo.bam2 $i.md.bam done #uor cd /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k cat 0hrep1-15mMm.bam.md.bt 0hrep2-15mMm.bam.md.bt 0hrep3-15mMm1kIntron.bam.md.bt > 0h-15mMm1kIntron.bam.md.bt wc 0h-15mMm1kIntron.bam.md.bt 222931180 1783449423 29141795018 0h-15mMm1kIntron.bam.md.bt gcc omit-reads.c -lJudy -o omit-reads ./omit-reads /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/0h-15mMm1kIntron.bam.md.bt # sanity check head -99999 /data/images/proton/DKlab/mr/parclip/shrimp-genomic-mRNA/IFN-15mMm1kIntron2.bam.md.bt | grep "T>C" | wc 13530 108240 1690155 ]0;/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1dreczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d$ head -99999 /data/images/proton/DKlab/mr/parclip/shrimp/0hrep1-15mMm.bam.md.bam.bt | grep "T>C" | wc 14657 117256 1463303 At /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_with_sdev are the clusters with the sdev info added, format: EnsemblID = transcript ID ClusterID = unique ID for the cluster ReadCount = number of reads that overlap the cluster by at least 1 nucleotide ModeLocation = coordinate of the location with the highest signal / (signal + background) value ConversionLocationCount = number of unique location where at least 1 conversion occurred ConversionEventCount = total number of conversions that occurred within the cluster NonConversionEventCount = total number of possible conversion events that did not occur ModeScore = score of the highest signal / (signal + background) value AvgConversionPct = average conversion % of all conversions in the group containing the cluster GroupConversionEventCount = number of all conversions in the group containing the cluster SdevConversionPct = sdev of conversion % of all conversions in the group containing the cluster MaxConversionPct = max. conversion % of all conversions in the group containing the cluster #needed for PARalyzer cs /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k /home/reczko/bin/faToTwoBit mm9-genomic-mRNA.fa mm9-genomic-mRNA.2bit /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/myp-all-1kIntrons2.sh # paralyzer to bed, more than 5 TtoC #/data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk #in: paralyzer format # 1 2 3 4 5 6 7 8 9 10 11 12 #Chromosome,Strand,ClusterStart,ClusterEnd,ClusterID,ClusterSequence,ReadCount,ModeLocation,ModeScore,ConversionLocationCount,ConversionEventCount,NonConversionEventCount #out: bed format # 1 2 3 4 _ 5 _ 6 _ 7 _ 8 _ 9 _ 10 11 12 # 1 2 3 4 5 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,ClusterSequence,ReadCount,ModeLocation,ConversionLocationCount,ConversionEventCount,NonConversionEventCount,ModeScore,Strand for i in sh-clusters-*.txt2-1kIntrons.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done # /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk adds: ## 1..6(same) AvgConversionPct MaxConversionPct ConversionEventCount SdevConversionPct #to dist # /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk #paralyzer+ format #Chromosome,Strand,ClusterStart,ClusterEnd,ClusterID,InfoType,AvgConversionPct,MaxConversionPct,ConversionEventCount,SdevConversionPct #1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 #out: bed format # 1 2 3 4 _ (7) _ (9) _ (10) 5 _ 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,AvgConversionPct,ConversionEventCount,SdevConversionPct, MaxConversionPct ,Strand for i in sh-distributions-???*.txt2-1kIntrons.csv do echo $i cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # intersect maxTtoC_gt_0.25 with clusters # (note: /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk adds # AvgConversionPct_ConversionEventCount_SdevConversionPct MaxConversionPct # ) #in: # 1 2 3 4 5 6 7 8 9 10 11 12 #in: ENSMUST00000000254-chr6-+-128884399-128885000 278 324 G5.2_ATTAGTTATTCTATTGGAGTATACAATACTCGAATAGTTCTCAGGCA_275_317_5_96_1643 0.7790820460717197 + ENSMUST00000000254-chr6-+-128884399-128885000 163 374 G5_0.004717_212_0.019691 0.126365 + #input 2 parts #cluster # 1 2 3 4 _ 5 _ 6 _ 7 _ 8 _ 9 _ 10 11 12 # 1 2 3 4 5 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,ClusterSequence,ReadCount,ModeLocation,ConversionLocationCount,ConversionEventCount,NonConversionEventCount,ModeScore,Strand #group/dist # 1 2 3 4 _ (7) _ (9) _ (10) 5 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,AvgConversionPct,ConversionEventCount,SdevConversionPct, MaxConversionPct ,Strand #input all: #cluster # 1 2 3 4 _ 5 _ 6 _ 7 _ 8 _ 9 _ 10 11 12 # 1 2 3 4 5 6 #Chromosome,ClusterStart,ClusterEnd,ClusterID,ClusterSequence,ReadCount,ModeLocation,ConversionLocationCount,ConversionEventCountC,NonConversionEventCount,ModeScore,Strand #group/dist # 7 8 9 10 11 12 #Chromosome,ClusterStart,ClusterEnd,ClusterID,AvgConversionPct,ConversionEventCountG,SdevConversionPct, MaxConversionPct ,Strand #out #Chromosome,ClusterStart,ClusterEnd,ClusterID_ClusterSequence_ReadCount_ModeLocation_ConversionLocationCount_ConversionEventCountC_NonConversionEventCount_ModeScore_AvgConversionPct_ConversionEventCountG_SdevConversionPct, MaxConversionPct ,Strand # bedtools intersect -a sh-clusters-0hrep1.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep1.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-2hrep1.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep1.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-6hrep1.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep2.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-0hrep2.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep2.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-2hrep2.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep2.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-6hrep2.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-0hrep3.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-0hrep3.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-2hrep3.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-2hrep3.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-2hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-6hrep3.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-6hrep3.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-6hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IFN.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-IFN.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IGG.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-IGG.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 bedtools intersect -a sh-clusters-IL4.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-IL4.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk> sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 # separate plus strand results for i in sh-clusters-*1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done 754552 4527312 131501428 sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 406385 2438310 71003130 sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 508797 3052782 88325829 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 275171 1651026 47880004 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 372362 2234172 63783284 sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 200678 1204068 34448891 sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 1118143 6708858 197457040 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 607183 3643098 107466384 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus #remove IGG regions for i in sh-clusters-*1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus -v -s |sort -k1,1 -k2,2n > $i.noIGG wc !$ done 406385 2438310 71003130 sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 372242 2233452 64914642 sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 275171 1651026 47880004 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 249803 1498818 43382238 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 200678 1204068 34448891 sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 0 0 0 sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG 607183 3643098 107466384 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus 543841 3263046 95912512 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG #topuniform=max(which(lp8>0.05)); #highest modscore that is not uniformly distributed Rscript /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/get_modescore_hist_1kIntrons.r modscore_cutoff for IFN 0.917 0.551879409932128 modscore_cutoff for IL4 0.959 0.117435487674906 modscore_cutoff for 0h_rep1 0.959 0.0625273967265635 "modscore_cutoff for 0h_rep2 0.945 0.103475484554532" "modscore_cutoff for 0h_rep3 0.945 0.124099813055241" "modscore_cutoff for 2h_rep1 0.962 0.0898276601965392" "modscore_cutoff for 2h_rep2 0.957 0.250358822416992" "modscore_cutoff for 2h_rep3 0.953 0.0684610535576249" "modscore_cutoff for 6h_rep1 0.951 0.157704846747281" "modscore_cutoff for 6h_rep2 0.952 0.397195483078005" "modscore_cutoff for 6h_rep3 0.918 0.0556072580989269" awk -v th=0.917 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 31723 190338 5688399 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.959 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ # 48906 293436 8984444 /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt awk -v th=0.959 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.945 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.945 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.962 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.957 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.953 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-2hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.951 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.952 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ awk -v th=0.918 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-6hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt wc !$ #2do include introns+cds # filter protein_coding_genes and add annotation from /data/results/reference/mmu/mm9/1kIntrons-stranded/Mus_musculus.NCBIM37.64-toMM9.headers for i in *txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno1.csv done #make genomicbed+tracks: cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d for i in *1kI*flt do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks2.awk $i > $i".genomic.bed" done cd /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks for i in /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/*genomic.bed do ln -s $i done for i in *genomic.bed do echo $i sort -k1,1 -k2,2n $i | awk -f /data/images/proton/DKlab/mr/parclip/tracks/correct-bed.awk > $i.srt; /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb; echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb; done http://genomics-lab.fleming.gr/cgi-bin/hgTracks?db=mm9&hubUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/hub.txt ENSMUST00000100497-chr5---143664794-143668433 1056 1126 G4560.1_TGATAGTTCGCCATGGATGACGATATCGCTGCGCTGGTCGTCGACAACGGCTCCGGCATGTGCAAAGCCGG_20_1094_8_8_68_0.999106697968966_1.000000_113_0.000000 1.000000 + 0hrep1 ENSMUST00000113676-chr5-+-67698194-67698936 425 459 G6588.1_AGGTCTTAGGGCACATCGCGGTGACCCAGGGTGAC_25_460_4_22_109_0.9998667980625998_0.833333_4_0.333333 1.000000 + ENSMUST00000022142-chr13---101183104-101184631 814 873 G981.1_GCAGAGTTGTGTCCTCTAGTTGTGTCATAGTGATTCAGTCTGAATGTTATATATTGTTTC_14_874_9_14_137_0.9979809824870751_1.000000_60_0.000000 1.000000 + #diff correct-bed2.awk to correct-bed.awk +: start-1 end-1 -: start-1 end-1 # test tracks version for G.Giagkas #source: cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/test-tracks for i in *1kI*flt do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks2.awk $i > $i".genomic.bed" done cd /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/test-tracks for i in *genomic.bed do echo $i sort -k1,1 -k2,2n $i | awk -f /data/images/proton/DKlab/mr/parclip/tracks/correct-bed.awk > $i.srt; /data/results/tools/gbrowser/bedToBigBed $i.srt /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/mm9.chrom.sizes $i.bb; echo "bigDataUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/tracks/"$i.bb; done cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/test-tracks cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/test-tracks ./update-hub.sh http://genomics-lab.fleming.gr/cgi-bin/hgTracks?db=mm9&hubUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/shrimp-mRNA-introns1k/test-tracks/hub.txt http://genomics-lab.fleming.gr/cgi-bin/hgTracks?udcTimeout=0&db=mm9&hubUrl=http://genomics-lab.fleming.gr/fleming/DKlab/mr/parclip/shrimp-mRNA-introns1k/test-tracks/hub.txt # outlier removal using MDS+trees: 0hrep1 2hrep3 6hrep3 # intersect clusters after outlier removal #prepare 2 of 3 intersection for i in sh*rep*1kIn*flt do cat $i | sort -k1,1 -k2,2n > $i.srt done # sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt ENSMUST00000170376-chr10---13243648-13244249 268 337 G6665.1_TTGAGTATGATAACTTGTTGCAATAAACTATTTTAATAAAATATAGCTTTGTTTAGTTAATGCTTTTTAG_10_338_5_6_77_0.999817838122349_1.000000_111_0.000000 1.000000 + # sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt ENSMUST00000170376-chr10---13243648-13244249 240 273 G16315.1_GTATAAATTTAATAACTTTGTATTGATTTTGAGT_21_274_4_8_217_0.9780242797679841_0.708333_4_0.343592 1.000000 + ENSMUST00000170376-chr10---13243648-13244249 321 351 G16316.1_TAGTTAATGCTTTTTAGCCATTAATTTATTT_28_352_4_10_258_0.9461657648651448_1.000000_120_0.000000 1.000000 + => 2 clusters overlapping with 1 larger bedtools multiinter -i sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons bedtools multiinter -i sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons bedtools multiinter -i sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons ## 2of2 #add paralyzer details to intersection: awk -v f1="sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed awk -v f1="sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed awk -v f1="sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed #merge book-ended regions: awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed wc sh-clusters-?h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed 1641 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed 909 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed 2569 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed ## 1of2 #add paralyzer details to intersection: awk -v f1="sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-1of2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed awk -v f1="sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-1of2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed awk -v f1="sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -v f2="sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.srt" -f ../get-multiintersect-details-1of2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed #merge book-ended regions: awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed wc -l sh-clusters-?h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed 84123 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed 61905 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed 95106 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed # add E/I , PCG anno for i in *maxAvg.bed do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno1.csv done #add 5/3utr/cds info: for i in *maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks2.awk $i > $i".genomic.bed" done grep ENSMUST00000170376 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed ENSMUST00000170376-chr10---13243648-13244249 268 273 G6665.1_TTGAGTATGATAACTTGTTGCAATAAACTATTTTAATAAAATATAGCTTTGTTTAGTTAATGCTTTTTAG_10_338_5_6_77_0.999817838122349_1.000000_111_0.000000_1.000000 1.000000 + ENSMUST00000170376-chr10---13243648-13244249 321 337 G6665.1_TTGAGTATGATAACTTGTTGCAATAAACTATTTTAATAAAATATAGCTTTGTTTAGTTAATGCTTTTTAG_10_338_5_6_77_0.999817838122349_1.000000_111_0.000000_1.000000 1.000000 + cluster split due to: sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons:ENSMUST00000170376-chr10---13243648-13244249 268 273 2 1,2 1 1 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons:ENSMUST00000170376-chr10---13243648-13244249 273 321 1 1 1 0 #gap sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons:ENSMUST00000170376-chr10---13243648-13244249 321 337 2 1,2 1 1 for i in *maxAvg.bed.genomic.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -wb -s > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -wb -s > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -wb -s > $i.cds.bed wc -l !$ done for i in sh-clusters-I??.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -wb -s > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -wb -s > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -wb -s > $i.cds.bed wc -l !$ done 12306 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 12342 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 31984 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed 8311 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 8025 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 25702 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed 13654 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 13264 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 38698 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed 4136 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.3utr.bed 4366 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.5utr.bed 12943 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.cds.bed 6349 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.3utr.bed 6316 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.5utr.bed 18795 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.cds.bed 'high confidence 2of2': 185 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 178 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 408 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed 134 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 137 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 336 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed 328 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.3utr.bed 409 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.5utr.bed 1021 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.cds.bed # keep matching clusters for i in *maxAvg.bed.genomic.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.cds.bed wc -l !$ done for i in sh-clusters-I??.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/keep_matching_clusters.awk > $i.cds.bed wc -l !$ done #@ merge bookended clusters bedtools merge -i A.bed -c 4 -o collapse ls -lt *srt*.genomic.bed -rw-r--r-- 1 reczko users 10796967 Jun 1 14:11 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed -rw-r--r-- 1 reczko users 294201 Jun 1 14:11 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed -rw-r--r-- 1 reczko users 7005100 Jun 1 14:11 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed -rw-r--r-- 1 reczko users 103642 Jun 1 14:11 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed -rw-r--r-- 1 reczko users 9545239 Jun 1 14:11 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed -rw-r--r-- 1 reczko users 187153 Jun 1 14:11 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed ]0;/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1dreczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d$ ls -lt *-I??.*.genomic.bed -rw-r--r-- 1 reczko users 5569621 May 23 12:53 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed -rw-r--r-- 1 reczko users 3593977 May 23 12:53 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed ]0;/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1dreczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d$ for i in *srt*.genomic.bed do cat $i |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6 -o collapse,max,distinct > foo > $i".collapsed" done for i in *-I??.*.genomic.bed do cat $i |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6 -o collapse,max,distinct > foo > $i".collapsed" done for i in *collapsed do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-best-collapsed-cluster1.awk $i > $i".best.bed" done for i in *best.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -wb -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,10 -o distinct,distinct,distinct,collapse > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -wb -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,10 -o distinct,distinct,distinct,collapse > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -wb -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,10 -o distinct,distinct,distinct,collapse > $i.cds.bed wc -l !$ done 11657 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 2782 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 14086 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed 8144 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 1676 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 11311 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed 12300 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 3475 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 17066 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed 4035 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed 1094 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed 5638 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed 6226 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed 1389 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed 8300 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed 205 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 41 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 211 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed 134 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 21 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 159 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed 359 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed 83 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed 418 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed bedtools multiinter -i sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.3utr.5utr.cds.bed bedtools multiinter -i sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.3utr.5utr.cds.bed bedtools multiinter -i sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.3utr.5utr.cds.bed bedtools multiinter -i sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed > sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.3utr.5utr.cds.bed bedtools multiinter -i sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed > sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.3utr.5utr.cds.bed awk -v f3="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed" -v f5="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed" -v fc="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-stats1.awk > sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.per-gene.bed awk -v f3="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.3utr.bed" -v f5="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.5utr.bed" -v fc="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.bed.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-stats1.awk > sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.per-gene.bed awk -v f3="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed" -v f5="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed" -v fc="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-stats1.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed awk -v f3="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed" -v f5="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed" -v fc="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-stats1.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed awk -v f3="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.3utr.bed" -v f5="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.5utr.bed" -v fc="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.bed.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-stats1.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed Dear Margarita and George, the per-gene processing results as discussed are at /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d: sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.genomic.bed.collapsed.best.per-gene.bed sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.per-gene.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.genomic.bed.collapsed.best.per-gene.bed 0h 6 cds polymorphic_pseudogene 2 5utr IG_V_gene 47 cds IG_V_gene 2 other IG_V_gene 3utr 5utr 2 cds 0 1251 3utr nonsense_mediated_decay 10584 3utr protein_coding 1 cds IG_C_gene 159 5utr nonsense_mediated_decay 2567 5utr protein_coding 648 cds nonsense_mediated_decay 15802 cds protein_coding 156 other nonsense_mediated_decay 1st max: 3utr 114 5utr 42 cds 0 2nd max: 3utr 0 5utr 0 cds 156 1085 other protein_coding 1st max: 3utr 525 5utr 560 cds 0 2nd max: 3utr 0 5utr 2 cds 1083 2h 4 cds polymorphic_pseudogene 23 cds IG_V_gene 2 other IG_V_gene 3utr 5utr 2 cds 0 832 3utr nonsense_mediated_decay 7072 3utr protein_coding 4 cds IG_C_gene 45 5utr nonsense_mediated_decay 1149 5utr protein_coding 390 cds nonsense_mediated_decay 10611 cds protein_coding 86 other nonsense_mediated_decay 1st max: 3utr 74 5utr 12 cds 0 2nd max: 3utr 0 5utr 0 cds 86 576 other protein_coding 1st max: 3utr 310 5utr 266 cds 0 2nd max: 3utr 0 5utr 3 cds 573 6h 2 cds polymorphic_pseudogene 24 cds IG_V_gene 1098 3utr nonsense_mediated_decay 10236 3utr protein_coding 2 cds IG_C_gene 113 5utr nonsense_mediated_decay 2060 5utr protein_coding 542 cds nonsense_mediated_decay 13091 cds protein_coding 138 other nonsense_mediated_decay 1st max: 3utr 105 5utr 33 cds 0 2nd max: 3utr 0 5utr 0 cds 138 849 other protein_coding 1st max: 3utr 437 5utr 412 cds 0 2nd max: 3utr 0 5utr 6 cds 843 IFN 1 3utr IG_V_gene 1 cds polymorphic_pseudogene 10 cds IG_V_gene 383 3utr nonsense_mediated_decay 3532 3utr protein_coding 1 cds IG_C_gene 36 5utr nonsense_mediated_decay 765 5utr protein_coding 197 cds nonsense_mediated_decay 5265 cds protein_coding 48 other nonsense_mediated_decay 1st max: 3utr 36 5utr 12 cds 0 2nd max: 3utr 0 5utr 0 cds 48 325 other protein_coding 1st max: 3utr 129 5utr 196 cds 0 2nd max: 3utr 0 5utr 1 cds 324 IL4 2 cds polymorphic_pseudogene 13 cds IG_V_gene 632 3utr nonsense_mediated_decay 5375 3utr protein_coding 2 cds IG_C_gene 54 5utr nonsense_mediated_decay 982 5utr protein_coding 298 cds nonsense_mediated_decay 7738 cds protein_coding 56 other nonsense_mediated_decay 1st max: 3utr 45 5utr 11 cds 0 2nd max: 3utr 0 5utr 0 cds 56 481 other protein_coding 1st max: 3utr 257 5utr 224 cds 0 2nd max: 3utr 0 5utr 2 cds 479 #@ add intron as categ # add E/I , PCG anno for i in *maxAvg.bed do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron2.awk $i > $i.anno.bed done for i in sh-clusters-I??.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron2.awk $i > $i.anno.bed done #get genomic coords, keep anno for i in *anno.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks3.awk $i > $i".genomic2.bed" done #bookend merge for i in *.genomic2.bed do cat $i |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8,9 -o collapse,max,distinct,collapse,collapse,collapse > foo > $i".collapsed" done #assign exon vs intron based on majority, exon for ties for i in *.genomic2.bed.collapsed do awk -f ../per-gene-exon-intron-stats1a.awk $i > $i.exon-intron done # double gene anno for same region in exon-intron.log, manual check of >5 pairs = > no exon/intron assignment conflict # get best cluster in collapsed for i in *genomic2.bed.collapsed do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-best-collapsed-cluster2.awk $i > $i".best.bed" done # intersect exon-intron-anno with transcript clusters for i in *.genomic2.bed.collapsed do #bedtools intersect -a $i.exon-intron -b $i.best.bed -wa -wb -s > $i.anno2 bedtools intersect -a $i.exon-intron -b $i.best.bed -wa -wb -s |awk 'BEGIN{OFS="\t"}{print $8,$9,$10,$11,$12,$13,$14,$7}' > $i.anno2 #bedtools intersect -a $i.exon-intron -b $i.best.bed -wa -wb -s |awk 'BEGIN{OFS="\t"}{print $8,$9,$10,$11,$12,$13,$14,$7}' > $i.anno2 done for i in *anno2 do awk '{if ($NF=="E"){print $0}}' $i > $i.exon done for i in *anno2 do awk '{if ($NF=="I"){print $0}}' $i > $i.intron wc -l !$ #awk '{if ($NF=="I"){print $0}}' $i|sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8 -o collapse,collapse,collapse,collapse,collapse > $i.intron #wc !$ #same done (for i in *anno2 do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8 -o collapse,collapse,collapse,collapse,collapse > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8 -o collapse,collapse,collapse,collapse,collapse > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8 -o collapse,collapse,collapse,collapse,collapse > $i.cds.bed wc -l !$ done ) for i in *anno2.exon do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n > $i.cds.bed wc -l !$ done for i in *anno2.exon do bedtools intersect -v -a $i -b /data/results/reference/mmu/cds.bed /data/results/reference/mmu/5utr.bed /data/results/reference/mmu/3utr.bed -wa -s |sort -k1,1 -k2,2n > $i.ncRNA wc -l !$ done awk -v f3="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed awk -v f3="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed awk -v f3="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed awk -v f3="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed awk -v f3="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed