# read quality assessment: /data/images/proton/DKlab/mr/parclip/raw/pipeline1.sh /data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 0hrep1filt2 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual wc 0hrep1/ugc_604_1_F3.csfasta 22967710 22967710 798859510 0hrep1/ugc_604_1_F3.csfasta wc 0hrep1filt_T_F3.csfasta 9835260 9835260 341609082 0hrep1filt_T_F3.csfasta # standart qual. filter removes >50% of reads # INDEX BUILDING # get ENSEMBL gtf annotation (for mm10) /data/results/reference/mmu/Mus_musculus.NCBIM37.64.gtf # lift coordinates to mm9 using https://genome.ucsc.edu/cgi-bin/hgLiftOver # prepare gtf for index building cd /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/ awk -f get-introns-max1k-v2.awk /data/results/reference/mmu/Mus_musculus.NCBIM37.64.gtf > Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.gtf # extract fasta seqs from whole genome using gtf /data/results/tools/align/cufflinks-2.2.1.Linux_x86_64/gffread -O -F -w Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.fa -g /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.gtf -M -d dupinfo.tr-with-1k-introns.txt # generate 2bit file needed for PARalyzer ln -s Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.fa mm9-mRNA-introns1k.fa /home/reczko/bin/faToTwoBit mm9-mRNA-introns1k.fa mm9-mRNA-introns1k.2bit #build index for shrimp export SHRIMP_FOLDER=/data/results/tools/align/SHRiMP_2_2_3 python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 010111111111111,101011111111111,110101111111111,111010111111111,111101011111111,111110101111111,111111010111111,111111101011111,111111110101111,111111111010111,111111111101011,111111111110101,111111111111010,111111111111101,111111111111111 --h-flag --shrimp-mode cs mm9-mRNA-introns1k.fa #ALIGNMENT # doc in /data/results/tools/align/SHRiMP_2_2_3/README.SHRiMP132 #one sample export SHRIMP_FOLDER=/data/results/tools/align/SHRiMP_2_2_3 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/mm9-mRNA-introns1k-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep2/*.csfasta -N 11 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% -e -255 -f -255 | samtools view -Sb - | samtools sort -o - - >2hrep2-15mMm1kIntron.bam 2>2hrep2-15mMm1kIntron-bam.log -N/--threads Number of Threads (default: 1) -n/--cmw-mode Match Mode (default: unpaired:2 paired:4) --local Perform local alignment (default: disabled) -o/--report Maximum Hits per Read (default: 10) -v/--vec-threshold SW Vector Hit Threshold (default: 47.00%) -h/--full-threshold SW Full Hit Threshold (default: 50.00%) -r/--cmw-threshold Window Generation Threshold (default: 55.00%) -w/--match-window Match Window Length (default: 140.00%) -e/--ext-r SW Gap Extend Score(Reference)(default: -7) -f/--ext-q SW Gap Extend Score (Query) (default: -3) #all samples: /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/shrimp-map-mRNA1.sh /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/shrimp-map-mRNA2.sh #alignment statistics in: /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/shrimp-map-mRNA1.logB /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/shrimp-map-mRNA2.log #get mismatch stats /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/get-mismatch-stats1.sh #PARALYZER pre-processing #prepare flags in bam (input $i) and convert to bowtie format (output $i.md.bt): /data/images/proton/DKlab/mr/parclip/paralyzer/README-paralyzer15.txt for i in *-15mMm1kIntron.bam do echo $i # add MD flags for bowtie format /data/results/tools/samtools/samtools-1.3/samtools calmd -b $i /data/results/reference/mmu/mm9/mRNA-genomic-max-intron-15k/Mus_musculus.NCBIM37.64-toMM9.tr-with-1k-introns.fa >& /dev/null > foo.bam2 samtools view -h foo.bam2 | awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/sam2bowtie.awk2 > $i.md.bt mv foo.bam2 $i.md.bam done #PARALYZER #one sample: ./myp-100chunks.sh /data/images/proton/DKlab/mr/parclip/paralyzer/ini-files-1kIntron/sh.IGG.ini &> IGG.1kIntrons-log2 ./mergeMyPar-100chunks.sh sh-clusters-IGG.txt2-1kIntrons wc -l sh-clusters-IGG.txt2-1kIntrons.csv ./mergeMyPar-100chunks.sh sh-groups-IGG.txt2-1kIntrons ./mergeMyPar-100chunks.sh sh-distributions-IGG.txt2-1kIntrons #all samples: /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/myp-all-1kIntrons2.sh #post-processing # paralyzer to bed, more than 5 TtoC for i in sh-clusters-*.txt2-1kIntrons2.csv do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2bed-gt5tc.awk $i > $i.bed.gt5tc done # get info from distributions # /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk adds: ## 1..6(same) AvgConversionPct MaxConversionPct ConversionEventCount SdevConversionPct for i in sh-distributions-???*.txt2-1kIntrons2.csv do echo $i cat $i| awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-TtoC-conversionPct-noCutoff2.awk|awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa-dist2bed2.awk > $i".avg.csv3.bed" done # merge info from distributions and clusters bedtools intersect -a sh-clusters-0hrep1.txt2-1kIntrons.csv.bed.gt5tc -b sh-distributions-0hrep1.txt2-1kIntrons.csv.avg.csv3.bed -wa -wb -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/add_conversion_pct2.awk > sh-clusters-0hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 # separate plus strand results for i in sh-clusters-*1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2 do wc $i awk -f ../filterPlusStrand.awk $i > $i.plus wc !$ done # more than 4 reads and convEvPct > 5: # cp=1.0*nc/(nc+nnc); # if ((cp>0.05)&&(nr>=5)) {print $0;} # and IGG removal: for i in sh-clusters-*1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus do wc $i bedtools intersect -a $i -b sh-clusters-IGG.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.f -v -s |awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_GE5reads_convEvPctGT5.awk |sort -k1,1 -k2,2n > $i.noIGG.f wc !$ done # adaptive filtering of modscore a) get cutoff #topuniform=max(which(lp8>0.05)); #highest modscore that is not uniformly distributed Rscript /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/get_modescore_hist_1kIntrons_f.r [1] "modscore_cutoff for IFN 0.946 0.0591007877911048" ^^^^^ this is the modscore cutoff # adaptive filtering of modscore b) filter awk -v th=0.945 -f /data/images/proton/DKlab/mr/parclip/paralyzer/filter_modescore1_keep_best.awk /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f > /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1d/sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt # filter protein_coding_genes and add annotation from /data/results/reference/mmu/mm9/1kIntrons-stranded/Mus_musculus.NCBIM37.64-toMM9.headers for i in *txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron1.awk $i |sort -k1,1 -k4,4 -k2,2n -k5,5n > $i.anno1.csv done for i in sh*rep*1kIn*f.flt do cat $i | sort -k1,1 -k2,2n > $i.srt done bedtools multiinter -i sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons bedtools multiinter -i sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons bedtools multiinter -i sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons ## 2of2 #add paralyzer details to intersection: awk -v f1="sh-clusters-0hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -v f2="sh-clusters-0hrep3.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed awk -v f1="sh-clusters-2hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -v f2="sh-clusters-2hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed awk -v f1="sh-clusters-6hrep1.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -v f2="sh-clusters-6hrep2.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.srt" -f ../get-multiintersect-details-2of2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed #merge book-ended regions: awk -f ../get-merged-multiintersect-details1.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed awk -f ../get-merged-multiintersect-details1.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed #get max scoring cluster, add max avgConvPct as score awk -f ../get-max-intersect-score2.awk sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed awk -f ../get-max-intersect-score2.awk sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.bed > sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.intersection-all.bed3-1kIntrons.merged.maxAvg.bed (for i in *1kI*flt do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks2.awk $i > $i".genomic.bed" done for i in *genomic.bed do echo $i sort -k1,1 -k2,2n $i | awk -f /data/images/proton/DKlab/mr/parclip/tracks/correct-bed.awk > $i.srt; done ) # add E/I , PCG anno for i in *.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron2.awk $i > $i.anno1.bed done for i in sh-clusters-I??.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt do echo $i awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/pa2annotate-ExonIntron2.awk $i > $i.anno1.bed done #get genomic coords, keep anno for i in *anno1.bed do echo $i awk -f /data/images/proton/DKlab/mr/parclip/shrimp-mRNA-introns1k/tracks/paralyzer2tracks3.awk $i > $i".genomic2.bed" done #bookend merge for i in *.anno1.bed.genomic2.bed do cat $i |sort -k1,1 -k2,2n |bedtools merge -i - -s -c 4,5,6,7,8,9 -o collapse,max,distinct,collapse,collapse,collapse > foo > $i".collapsed" done # get best cluster in collapsed for i in *.anno1.bed.genomic2.bed.collapsed do awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/get-best-collapsed-cluster2.awk $i > $i".best.bed" done # intersect exon-intron-anno with transcript clusters for i in *.anno1.bed.genomic2.bed.collapsed do bedtools intersect -a $i.exon-intron -b $i.best.bed -wa -wb -s |awk 'BEGIN{OFS="\t"}{print $8,$9,$10,$11,$12,$13,$14,$7}' > $i.anno2 done for i in *.anno1.bed.genomic2.bed.collapsed.anno2 do awk '{if ($NF=="E"){print $0}}' $i > $i.exon done for i in *.anno1.bed.genomic2.bed.collapsed.anno2 do awk '{if ($NF=="I"){print $0}}' $i > $i.intron wc -l !$ done for i in *.anno1.bed.genomic2.bed.collapsed.anno2.exon do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n > $i.cds.bed wc -l !$ done for i in *.anno1.bed.genomic2.bed.collapsed.anno2.exon do bedtools intersect -v -a $i -b /data/results/reference/mmu/cds.bed /data/results/reference/mmu/5utr.bed /data/results/reference/mmu/3utr.bed -wa -s |sort -k1,1 -k2,2n > $i.ncRNA wc -l !$ done #merge per-gene parts awk -v f3="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.3utr.bed" -v f5="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.5utr.bed" -v fc="sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.cds.bed" -f /data/images/proton/DKlab/mr/parclip/paralyzer/per-gene-exon-intron-stats1.awk > sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed #recollect per-gene for i in *f.flt.*anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n > $i.cds.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/intron.bed -wa -u -s |sort -k1,1 -k2,2n > $i.intron.bed wc -l !$ done for i in *f.flt*.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n > $i.cds.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/intron.bed -wa -u -s |sort -k1,1 -k2,2n > $i.intron.bed wc -l !$ done #all zero, as expected for i in *f.flt*.anno1.bed.genomic2.bed.collapsed.anno2.intron do bedtools intersect -a $i -b /data/results/reference/mmu/3utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.3utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/5utr.bed -wa -u -s |sort -k1,1 -k2,2n > $i.5utr.bed wc -l !$ bedtools intersect -a $i -b /data/results/reference/mmu/cds.bed -wa -u -s |sort -k1,1 -k2,2n > $i.cds.bed wc -l !$ done sh-clusters-0h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed sh-clusters-2h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed sh-clusters-6h.gt0.25TtoC2.plus.noIGG.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.flt.anno.bed.genomic2.bed.collapsed.best.per-gene-exons.bed 2nd annotation, clusters in PCGs: 7398 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed 1732 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed 8133 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed 9720 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed 5917 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed 1050 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed 6960 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed 7776 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed 8438 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed 2296 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed 10468 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed 11794 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed 2640 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed 663 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed 3175 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed 3612 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed 4213 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.3utr.bed 884 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.5utr.bed 5079 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.cds.bed 5674 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.best.per-gene-exons.bed.intron.bed 2nd annotation, ncRNA clusters overlapping with introns: 2523 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA.intron.bed 2084 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA.intron.bed 2985 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA.intron.bed 940 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA.intron.bed 1543 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.exon.ncRNA.intron.bed 2nd annotation, clusters in introns (both PGC/ncRNA): 292 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.3utr.bed 360 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.5utr.bed 2193 sh-clusters-0h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.cds.bed 291 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.3utr.bed 286 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.5utr.bed 2034 sh-clusters-2h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.cds.bed 329 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.3utr.bed 426 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.5utr.bed 2569 sh-clusters-6h.gt0.25TtoC2.plus.noIGG.f.flt.srt.union.bed3-1kIntrons.merged.maxAvg.bed.anno1.bed.genomic2.bed.collapsed.anno2.intron.cds.bed 94 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.3utr.bed 149 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.5utr.bed 752 sh-clusters-IFN.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.cds.bed 167 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.3utr.bed 202 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.5utr.bed 1137 sh-clusters-IL4.txt2-1kIntrons.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG.f.flt.anno1.bed.genomic2.bed.collapsed.anno2.intron.cds.bed