reczko@platia:/data/images/proton/run268$ bamToFastq -i /data/images/proton/run268/IonXpressRNA_015_rawlib.basecaller.bam -fq R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fq reczko@platia:/data/images/proton/run268$ /data/results/tools/align/bowtie2-2.2.3/bowtie2 -x /data/images/proton/run149/bb_wo_ins R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fq | /data/results/tools/samtools/samtools-0.1.19/samtools view -uhS -F4 - > R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.bam GSkP4-Ab42Lib (IonXpressRNA_015.bam) 3966886 reads; of these: 3966886 (100.00%) were unpaired; of these: 1710610 (43.12%) aligned 0 times 2256276 (56.88%) aligned exactly 1 time 0 (0.00%) aligned >1 times 56.88% overall alignment rate GSkP5_SOD-Lib (IonXpressRNA_016.bam) 5138815 reads; of these: 5138815 (100.00%) were unpaired; of these: 404728 (7.88%) aligned 0 times 4734087 (92.12%) aligned exactly 1 time 0 (0.00%) aligned >1 times 92.12% overall alignment rate bamToFastq -i /data/images/proton/run268/IonXpressRNA_016_rawlib.basecaller.bam -fq R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fq /data/results/tools/align/bowtie2-2.2.3/bowtie2 -x /data/images/proton/run149/bb_wo_ins R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fq | /data/results/tools/samtools/samtools-0.1.19/samtools view -uhS -F4 - > R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam cd t samtools view ../R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.bam |split -99999 ../get-ins2.sh for i in *ins do cat $i >> ins.fa done mv ins.fa ../GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa samtools view ../R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam |split -99999 ../get-ins2.sh cd .. /data/results/tools/align/multiple/cd-hit-v4.6.1-2012-08-27/cd-hit-est -i GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa -o GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa.clust -c 0.95 -n 5 -S 0 -r 0 -M 40000 &> GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa.log cd .. /data/results/tools/align/multiple/cd-hit-v4.6.1-2012-08-27/cd-hit-est -i GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa -o GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa.clust -c 0.95 -n 5 -S 0 -r 0 -M 40000 &> GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa.log reczko@platia:/data/images/proton/run268$ echo "GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa.clust" | awk -f /data/results/tools/align/multiple/cd-hit-clust-sizes1.awk read GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa.clust nread 85397 VAQJ1:03985:08902 1 reczko@platia:/data/images/proton/run268$ echo "GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa.clust" | awk -f /data/results/tools/align/multiple/cd-hit-clust-sizes1.awk read GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa.clust nread 17643 VAQJ1:03974:08721 1 ls *.clstr.size.fa > insert_samples.txt awk -f merge-inserts2.awk insert_samples.txt > insert_samples.csv #@@ 29022016 reczko@estia:/data/images/proton/run149$ awk -f /data/images/proton/run149/count-exclusive1.awk insert_samples.csv 4407514 1 5 16 reczko@estia:/data/images/proton/run149$ awk -f /data/images/proton/run149/get-exclusive1.awk insert_samples.csv 4407514 1 5 16 IMP1 1459 AGCGGCGGCACCGGGCGC IMP2 1483 ACCGCCTCCTGGTGG IMP2 1277 ACCGCCGAGCTTCGTGG IMP2 1073 ACCTCGAGCGTTCTGG IMP2 1017 ACCGGCTCGTGTGG IMP2 1016 AGCGGCATCAGCAGCTGG IMP3 1030 TGCGCGAGGTGCGG IMP3 1037 ACCACCGAGCGCCCGC IMP3 1416 ACCACCTGGTCGCGG IMP3 1428 ACCCCGACCACGGTCCTG IMP3 1058 TGCATGGTCGTCTTC IMP3 1077 GCGCCGAGGTGCGG IMP3 1708 TGCGTCATCGTGCGGACG IMP3 1507 TGCTGCATCGCGTTC IMP3 1358 CACCCCCGGCCCTGGTTCGAC IMP3 1256 TCGCACGGTGATGATC IMP3 1667 ACGCGAACGTGAGG IMP3 1012 TGCACGATCCACCGG IMP3 1550 AGCAGCTGGGCGCGG IMP3 1001 ACCACCTGGACGGTC IMP3 1126 ACCACGGTGACGATC IMP3 1521 CACCACGTCACGCCAGG reczko@estia:/data/images/proton/run149$ awk -f /data/images/proton/run149/get-avg-insert-len.awk insert_samples.csv 4407514 20.609 4^21= 4398046511104 3 start-codons 6 S positions 12 N positions =3 * 2^6 * 4^12 =3*24*16777216 =2147483648