#@
http://lindenb.github.io/jvarkit/PrettySam.html
export JAVA_HOME=/home/reczko/a/tools/libs/java/jdk1.8.0_66
git clone "https://github.com/lindenb/jvarkit.git"
]0;/data/results/tools/align/jvarkit2021reczko@max:/data/results/tools/align/jvarkit2021$ cd jvarkit/
]0;/data/results/tools/align/jvarkit2021/jvarkitreczko@max:/data/results/tools/align/jvarkit2021/jvarkit$ ./gradlew prettysam
Should you cite prettysam ? https://github.com/mr-c/shouldacite/blob/master/should-I-cite-this-software.md
The current reference is:
PrettySam : a SAM/BAM prettifier. Lindenbaum & al. 2018. figshare. https://doi.org/10.6084/m9.figshare.5853798.v1
cd /data/images/proton/external/Sketas/run454/
-r, -R, --reference
Indexed fasta Reference file. This file must be indexed with samtools
faidx and with picard CreateSequenceDictionary
java -jar /data/results/tools/align/picardtools/picard-tools-2.9.0/picard.jar CreateSequenceDictionary \
R=backbone_wo_insert.fa \
O=reference.dict
java -jar /data/results/tools/align/picardtools/picard-tools-2.9.0/picard.jar CreateSequenceDictionary R=backbone_wo_insert.fa O=reference.dict
]0;/data/images/proton/external/Sketas/run454reczko@max:/data/images/proton/external/Sketas/run454$ java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | head -999 > foo
time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | awk -f extract-insert-with-quals1.awk >baz
real 19m47.532s
user 35m18.851s
sys 0m43.664s
mv baz GSkP7_A_v2.fa
-rw-r--r-- 1 reczko users 209343015 Oct 21 15:34 GSkP7_A.fa
-rw-r--r-- 1 reczko users 208204231 Jan 12 18:32 GSkP7_A_v2.fa
wc GSkP7_A.fa
12034444 12034444 209343015 GSkP7_A.fa
]0;/data/images/proton/external/Sketas/run454reczko@max:/data/images/proton/external/Sketas/run454$ wc GSkP7_A_v2.fa
11968702 11968702 208204231 GSkP7_A_v2.fa
(/ 11968702 12034444.0)
0.9945371801140127
99.45% of the 1st version are correct
time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | awk -f extract-insert-with-quals2.awk > GSkP7_A_v2.fastq
time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP8_b.bam | awk -f extract-insert-with-quals2.awk > GSkP8_b_v2.fastq
--no-unicode
/data/images/proton/external/Sketas/run454/inserts-with-qual
source get-inserts-with-qual.sh
for i in *q
> do
> echo $i
> wc $i
> done
GSkP7_A.fastq
23937404 23937404 314674495 GSkP7_A.fastq (/ 23937404 4)
5984351
GSkP8_b.fastq
82916172 82916172 1086074469 GSkP8_b.fastq (/ 82916172 4)
20729043
IonXpress_009_IMP1_4-5-6-7PeptideLinrary.aligned.bam.fastq
21708764 21708764 345166745 IonXpress_009_IMP1_4-5-6-7PeptideLinrary.aligned.bam.fastq (/ 21708764 4)
5427191
IonXpress_011_IMP2_A4VRound4.aligned.bam.fastq
22554428 22554428 296250575 IonXpress_011_IMP2_A4VRound4.aligned.bam.fastq (/ 22554428 4)
5638607
IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq
28394788 28394788 376865183 IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq (/ 28394788 4)
7098697
R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fastq
3506904 3506904 52137054 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fastq (/ 3506904 4)
876726
R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq
633712 633712 9112414 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq (/ 633712 4)
158428
R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP4-Ab42.IonXpress_015.fastq.fastq
4852964 4852964 73000731 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP4-Ab42.IonXpress_015.fastq.fastq (/ 4852964 4)
1213241
R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq
1919972 1919972 27981101 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq (/ 1919972 4)
479993
/data/images/proton/external/Sketas/run454/all-inserts
for i in *fa
> do
> echo $i
> wc $i
> done
009_IMP1_4-5-6-7PeptideLinrary.ins.fa
10713288 10713288 215914802 009_IMP1_4-5-6-7PeptideLinrary.ins.fa (/ 10713288 2)
(/ 5356644 5427191.0)
0.9870011945406012
011_IMP2_A4VRound4.ins.fa
11158496 11158496 193996716 011_IMP2_A4VRound4.ins.fa (/ 11158496 2)
(/ 5579248 5638607.0)
0.9894727545296206
015_IMP3_Ab42Round2.ins.fa
12100984 12100984 212084351 015_IMP3_Ab42Round2.ins.fa (/ 12100984 2)
(/ 6050492 7098697.0)
0.8523383939334218
GSKP4-Ab42.IonXpress_015.fa
2441408 2441408 47104068 GSKP4-Ab42.IonXpress_015.fa (/ 2441408 2)
(/ 1220704 876726.0)
1.3923437881390537
GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa
1767810 1767810 33806445 GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa (/ 1767810 2)
(/ 883905 876726.0)
1.0081884191868382
GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa
320124 320124 6100840 GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa (/ 320124 2)
(/ 160062 158428.0) 1.0103138334132855
GSKP5-SOD.IonXpress_016.fa
968270 968270 18303583 GSKP5-SOD.IonXpress_016.fa (/ 968270 2)
(/ 484135 479993.0)
1.0086292925105158
GSkP7_A.fa
12034444 12034444 209343015 GSkP7_A.fa (/ 12034444 2)
(/ 6017222 5984351.0)
1.0054928262062168
GSkP8_b.fa
41700414 41700414 723413201 GSkP8_b.fa (/ 41700414 2)
(/ 20850207 20729043.0)
1.0058451323584983
awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' /data/images/proton/external/Sketas/run454/inserts-with-qual/R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq > foo
diff foo ../all-inserts/GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa |head
318a319,320
> >VAQJ1:01431:12606
> TGCGAGTGGCCGCGGCGGG
580a583,584
> >VAQJ1:01478:12714
> ACCTGGATCTGGGGCCTG
656a661,662
> >VAQJ1:01505:11930
> TGCTTCGTCATGTAGGCCCGG
samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01431:12606
4176:VAQJ1:01431:12606 16 bb_wo_ins 111 42 8M1I77M * 0 0 TGCTTAAGTTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT 4442424/*////-67:6<<4<>0;;;6;6;:6;:;;;9948.8774993:7718287,778298999;:<7=:3::6;6==;748 AS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 NM:i:1 MD:Z:85 YT:Z:UU
# seq not in read!
samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n TGCGAGTGGCCGCGGCGGG
4158:VAQJ1:01422:12624 0 bb_wo_ins 6 0 105M19I70M1I9M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATTGCGAGTGGCCGCGGCGGGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCTATTGCCATTGGCAAAATTGTGAGTGAAAGAAATTA 186;=4:;6;:;;5;:;::::4::1:9949;::::>3894;;4?>;;:5;5://*/67,7167918896==>099::98883933333)36888777,439395:8----,*,15554778288884736777.75533+/.61556-41515758776////22-335456.2247688:9.2.142443,--*---*-03 AS:i:-87 XN:i:0 XM:i:5 XO:i:2 XG:i:20 NM:i:25 MD:Z:97G51A0T1G0C30 YT:Z:UU
1533136:VAQJ1:07604:13101 0 bb_wo_ins 6 0 87M1D17M19I24M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGCGATCGCCCACAATTGCGAGTGGCCGCGGCGGGTGCTTAAGTTTTGGCACCGAATTT 39-43*/516688477776896;;3:::59:;;:<;288289<=3;;8876959782777-85:;<154:2999,43989977-4-132)/577779817673636675131-/141213::2;6651504544,411111--*--- AS:i:-77 XN:i:0 XM:i:2 XO:i:2 XG:i:20 NM:i:22 MD:Z:87^G9G28A2 YT:Z:UU
samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01478:12714
6746:VAQJ1:01478:12714 0 bb_wo_ins 6 3 104M1D19M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATACCGGCGTCGGGGGTACG 6>288/894:;<>7<><>>DD7<=4<=?7<=4<==B@7=6;::-666+65;;;6::9-556+4::;;;;<4:33333)48889887.644-4995;47744.222'-++2 AS:i:-48 XN:i:0 XM:i:12 XO:i:1 XG:i:1 NM:i:13 MD:Z:97G6^T1G1T0T0A0A2T0T0T2C0A0C2 YT:Z:UU
# seq not in read!, no ins
samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n ACCTGGATCTGGGGCCTG
6744:VAQJ1:01478:12711 16 bb_wo_ins 58 0 19M1D22M1D10M18I85M * 0 0 GTCTTCCCCAAGACCATAATTTCTGCTAGCCAATGGGGCGACGCCCACAATACCTGGATCTGGGGCCTGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT 87838/;;;5:9514..,.)./0456840,4344-66547761*33392<;;6::5<9:790888599344948397-8993<;;7;;4=;1;<;6:5:;6;:::;<=7<3<<<7=;3<<<7=6=<0;;<5:;:?;;::4:9/991<6=7;;68 AS:i:-77 XN:i:0 XM:i:1 XO:i:3 XG:i:20 NM:i:21 MD:Z:19^T22^T2G92 YT:Z:UU
samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01505:11930
7626:VAQJ1:01505:11930 16 bb_wo_ins 111 42 85M * 0 0 TGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT :;:695990:::58:8388188.779496::6877899?:>4?==7=<3?A<7>6<:/:;;5;:;9:::94:8/981;5:5;525 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:85 YT:Z:UU
# seq not in read!, no ins
java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar --no-unicode -R reference.fasta R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -C30 VAQJ1:01431:12606 > foo
>>>>> 4176
Read-Name : VAQJ1:01431:12606
Flag : 16
16 : read reverse strand
MAPQ : 42
Contig : bb_wo_ins (index:0)
Start : 111
End : 195
Strand : <--
Read-Length : 86
Cigar : 8M1I77M (N=3)
Sequence :
Read (0) : TGCTTAAGTT TTTGGCACCG AAATTTTAAC CGTTGAGTAC GGCCCATTGC CCATTGGCAA
Middle :
Ref (111) : NNNNNNNN-N NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMIM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM
Qual : 4442424/*/ ///-67:6<< 4<>0;;;6;6 ;:6;:;;;99 48.8774993 :7718287,7
Ref-Pos : 111 120 130 140 150 160
Read (60) : AATTGTGAGT GAAGAAATTA ATTGTT
Middle :
Ref (170) : NNNNNNNNNN NNNNNNNNNN NNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMM
Qual : 78298999;: <7=:3::6;6 ==;748
Ref-Pos : 170 180 190
Tags :
MD : 85 "String for mismatching positions"
XG : 1 "Reserved for end users"
NM : 1 "Edit distance to the reference"
XM : 0 "Reserved for end users"
XN : 0 "Reserved for end users"
>VAQJ1:01422:12624
TGCGAGTGGCCGCGGCGGG
>VAQJ1:01431:12606
TGCGAGTGGCCGCGGCGGG
>VAQJ1:01478:12711
ACCTGGATCTGGGGCCTG
>VAQJ1:01478:12714
ACCTGGATCTGGGGCCTG
>VAQJ1:01503:11905
TGCTTCGTCATGTAGGCCCGG
>VAQJ1:01505:11930
TGCTTCGTCATGTAGGCCCGG
awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq > foo
awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq > foo
diff foo ../all-inserts/015_IMP3_Ab42Round2.ins.fa > ../test-prettysam.txt
sort foo > ../test-prettysam.txt
sort ../all-inserts/015_IMP3_Ab42Round2.ins.fa > ../test-prettysam.txt2
diff ../test-prettysam.txt ../test-prettysam.txt2 > ../test-prettysam.txt3
diff ../test-prettysam.txt ../test-prettysam.txt2 | head
26d25
< >ETB1X:00024:03588
53d51
< >ETB1X:00026:03626
271a270
> >ETB1X:00032:10869
284d282
< >ETB1X:00033:03624
309a308
> >ETB1X:00033:09631
in new:
>ETB1X:00024:03588
ACCGTGGAGTGGTTG
samtools view IonXpress_015_IMP3_Ab42Round2.aligned.bam.bam | grep ETB1X:00024:03588
ETB1X:00024:03588 0 bb_wo_ins 2 0 7M1D35M2D28M1D35M15I41M1I52M * 0 0 CATGGTTAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGATATTGATATTGGTCTTCCCCAAGACCATATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATACCGTGGAGTGGTTGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTTCTGTGTAC ////)/)0)/7379::7:=><<==7)///)/779977/-****(*-//94:)048-444(45:55/5555::0555;888?6969<;;1:9998888)///)///)77705;;=4:0/////)/)/8880:5;<:,//9)...(.).366166:8855-333'350333(4:5:-5555)53897::///5:8818)5/5/596555999;< AS:i:-93 XN:i:0 XM:i:3 XO:i:5 XG:i:20 NM:i:23 MD:Z:7^A35^AT0A0T26^A27G100 YT:Z:UU
>>>>> 1790102
Read-Name : ETB1X:00024:03588
Flag : 0
MAPQ : 0
Contig : bb_wo_ins (index:0)
Start : 2
End : 203
Strand : -->
Read-Length : 214
Cigar : 7M1D35M2D28M1D35M15I41M1I52M (N=11)
Sequence :
Read (0) : CATGGTT-AA GTTATCGGTC GTCGTTCCCT CGGAGTGCAA AGA--TATTG ATATTGGTCT
Middle :
Ref (2) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMDMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMDDMMMMM MMMMMMMMMM
Qual : ////)/) 0) /7379::7:= ><<==7)/ //)/779977 /-* ***(* -//94:)048
Ref-Pos : 2 12 22 32 42 52
Read (57) : TCCCCAAGAC CAT-ATTTTC TGCTAGCCAA TGGGGCGATC GCCCACAATA CCGTGGAGTG
Middle :
Ref (62) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNN- ----------
Cigar-Operator : MMMMMMMMMM MMMDMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMI IIIIIIIIII
Qual : -444(45:55 /55 55::05 55;888?696 9<;;1:9998 888)///)// /)77705;;=
Ref-Pos : 62 72 82 92 102
Read (116) : GTTGTGCTTA AGTTTTGGCA CCGAAATTTT AACCGTTGAG TACGGCCCCA TTGCCCATTG
Middle :
Ref (111) : ----NNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNN-NNNN NNNNNNNNNN
Cigar-Operator : IIIIMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMIMMMM MMMMMMMMMM
Qual : 4:0/////)/ )/8880:5;< :,//9)...( .).366166: 8855-333'3 50333(4:5:
Ref-Pos : 117 127 137 147 156
Read (176) : GCAAAATTGT GAGTGAAGAA ATTAATTGTT CTGTGTAC
Middle :
Ref (166) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNN
# pipeline-v1 reported the same insert multiple times
#@ fwd hit
samtools view GSkP7_A.bam | grep 241YN:07288:02804
241YN:07288:02804 0 bb_wo_ins 104 0 7M15I79M * 0 0 CCACAATTGCGGGGGGGGGGCGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTA <7<;829*//=<::::::9$8797:<9<8<>>>4=8<<<7@<=0999287?;==8<>><<<@9=>8=>;=<<6<<6<7<<==6=9=><:::==8@7=8= AS:i:-50 XN:i:0 XM:i:0 XO:i:1 XG:i:15 NM:i:15 MD:Z:86 YT:Z:UU
>>>>> 1
Read-Name : 241YN:07288:02804
Flag : 0
MAPQ : 0
Contig : bb_wo_ins (index:0)
Start : 104
End : 189
Strand : -->
Read-Length : 101
Cigar : 7M15I79M (N=3)
Sequence :
Read (0) : CCACAATTGC GGGGGGGGGG CGTGCTTAAG TTTTGGCACC GAAATTTTAA CCGTTGAGTA
Middle :
Ref (104) : NNNNNNN--- ---------- --NNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMIII IIIIIIIIII IIMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM
Qual : <7<;829*// =<::::::9$ 8797:<9<8< >>>4=8<<<7 @<=0999287 ?;==8<>><<
Ref-Pos : 104 119 129 139
Read (60) : CGGCCCATTG CCCATTGGCA AAATTGTGAG TGAAGAAATT A
Middle :
Ref (149) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN N
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM M
Qual : <@9=>8=>;= <<6<<6<7<< ==6=9=><:: :==8@7=8 =
Ref-Pos : 149 159 169 179 189
#@ reverse hit
samtools view GSkP7_A.bam | grep 241YN:06420:11476
241YN:06420:11476 16 bb_wo_ins 47 0 20M1I44M14I16M1I64M * 0 0 ATTTGATATTGGTCTTCCCCAAAGACCATAATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATTGCGCTCGGACAGCTGCTTAAGTTTTGGCACCCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTAAGTGAAGAAAATTA ;6=:::995:7<;<7;3984'55667;:8483>>=;;<<<<<8>8><4=;93336664<<::1903(((((((.444,.7<9584882;<;6;;5)777*88*<:<4<6=>8<<;<;<<9=7=885;73:961/)/,'---)44-)//;;4=9*9990<9 AS:i:-76 XN:i:0 XM:i:4 XO:i:3 XG:i:16 NM:i:20 MD:Z:56G72G10T1A1 YT:Z:UU
>>>>> 445
Read-Name : 241YN:06420:11476
Flag : 16
16 : read reverse strand
MAPQ : 0
Contig : bb_wo_ins (index:0)
Start : 47
End : 190
Strand : <--
Read-Length : 160
Cigar : 20M1I44M14I16M1I64M (N=7)
Sequence :
Read (0) : ATTTGATATT GGTCTTCCCC AAAGACCATA ATTTTCTGCT AGCCAATGGG GCGATCGCCC
Middle :
Ref (47) : NNNNNNNNNN NNNNNNNNNN -NNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM IMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM
Qual : ;6=:::995: 7<;<7;3984 '55667;:84 83>>=;;<<< <<8>8><4=; 93336664<<
Ref-Pos : 47 57 76 86 96
Read (60) : ACAATTGCGC TCGGACAGCT GCTTAAGTTT TGGCACCCGA AATTTTAACC GTTGAGTACG
Middle :
Ref (106) : NNNNN----- ---------N NNNNNNNNNN NNNNN-NNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMIIIII IIIIIIIIIM MMMMMMMMMM MMMMMIMMMM MMMMMMMMMM MMMMMMMMMM
Qual : ::1903(((( (((.444,.7 <9584882;< ;6;;5)777* 88*<:<4<6= >8<<;<;<<9
Ref-Pos : 106 112 122 131 141
Read (120) : GCCCATTGCC CATTGGCAAA ATTGTAAGTG AAGAAAATTA
Middle :
Ref (151) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM
Qual : =7=885;73: 961/)/,'-- -)44-)//;; 4=9*9990<9
Ref-Pos : 151 161 171 181
#@ deletions in ref-seq lead to - and " " in readseq and qual, respectively:
Read-Name : 241YN:01016:11916
Read (0) : CTGGCGCGCC TCGTCCGGGC GG-AAAATCC TGTCTGGGCA AAGATTG-CG GCCAGGTCAG
Middle :
Ref (3,205) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMDMMMMMMM MMMMMMMMMM MMMMMMMDMM MMMMMMMMMM
Qual : 883;;;:747 7@=707/988 6< /<<=<8= :;:<<2::91 98:6/7: <8 =9==9=<< xad.csv
cd /data/results/tools/align/jvarkit/my-jvarkit-master/
/mnt/fix/c/solid.data_results.recovered.data/tools/align/jvarkit/my-jvarkit-master/src/main/java/com/github/lindenb/jvarkit/tools/biostar/Biostar59647.java
export JAVA_HOME=/home/reczko/a/tools/libs/java/jdk1.8.0_66
make biostar59647
/data/results/tools/align/jvarkit/my-jvarkit-master/dist/biostar59647.jar
sem --id `hostname` -j $THREADS "cat backbone_wo_insert.dict $i | java -Djvarkit.log.name=biostar59647 -Dfile.encoding=UTF8 -Xmx500m -cp "/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/apache-ant-1.8.2-bzip2.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-compress-1.4.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-jexl-2.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-logging-1.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/htsjdk-2.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/ngs-java-1.2.2.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/snappy-java-1.0.3-rc3.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/xz-1.5.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/commons-cli/commons-cli/1.3.1/commons-cli-1.3.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/org/slf4j/slf4j-api/1.7.13/slf4j-api-1.7.13.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/org/slf4j/slf4j-simple/1.7.13/slf4j-simple-1.7.13.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/my-jvarkit-master/dist/biostar59647.jar" com.github.lindenb.jvarkit.tools.biostar.Biostar59647 -r backbone_wo_insert.fa foo ref:9-20 | xmllint --format - |awk -f /data/images/proton/run149/extract_insert1.awk > $i.ins"
# xml insert example:
241YN:00258:06961
ATTTGATATTGGTCTTCCCCAAGACCGATAATTTTCTGCTAGCCAATGGGCGATCGCCCACAATAGCGTCTGGTCGCAGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTTA
0
0
bb_wo_ins
47
26M1I20M1D17M15I41M1I39M
#@ verify 1st vs 2nd solution
diff GSkP7_A.fa baz > foo
43,44d42
< >241YN:02689:12766
< ACCGGGACGGGCTAG
>>>>> 599
Read-Name : 241YN:02689:12766
Flag : 16
16 : read reverse strand
MAPQ : 8
Contig : bb_wo_ins (index:0)
Start : 111
End : 189
Strand : <--
Read-Length : 82
Cigar : 37M1I24M1I7M1I11M (N=7)
Sequence :
Read (0) : TGCTTAAGTT TTGGCACCGA AATTTTAACC GTTGAGTAAC GGCCCATTGC CCATTGGCAA
Middle :
Ref (111) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNN-NN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMIMM MMMMMMMMMM MMMMMMMMMM
Qual : .4.).)..(9 :85665146) 96(777/7/7 7087:;:497 3709//*:<5 =::7<8<;5<
Ref-Pos : 111 121 131 141 150 160
Read (60) : AATTTGTGAG CTGAAGGAAT TA
Middle :
Ref (170) : NN-NNNNNNN -NNNNNNNNN NN
Cigar-Operator : MMIMMMMMMM IMMMMMMMMM MM
Qual : ::5<;;:888 8762-(*(+( +)
Ref-Pos : 170 188
Tags :
MD : 73A5 "String for mismatching positions"
XG : 3 "Reserved for end users"
NM : 4 "Edit distance to the reference"
XM : 1 "Reserved for end users"
XN : 0 "Reserved for end users"
XO : 3 "Reserved for end users"
AS : -26 "Alignment score generated by aligner"
YT : UU "Reserved for end users"
<<<<< 599
57,58d54
< >241YN:04372:12362
< ACCGGGCACGACATG
>>>>> 630
Read-Name : 241YN:04372:12362
Flag : 16
16 : read reverse strand
MAPQ : 0
Contig : bb_wo_ins (index:0)
Start : 111
End : 189
Strand : <--
Read-Length : 84
Cigar : 16M1I28M1I7M1I3M1I9M1I16M (N=11)
Sequence :
Read (0) : TGCTTAAGTT TTGGCACCCG AAATTTTAAC CGTTGAGTAC GGCCCAATTG CCCGATTGGG
Middle :
Ref (111) : NNNNNNNNNN NNNNNN-NNN NNNNNNNNNN NNNNNNNNNN NNNNN-NNNN NNN-NNN-NN
Cigar-Operator : MMMMMMMMMM MMMMMMIMMM MMMMMMMMMM MMMMMMMMMM MMMMMIMMMM MMMIMMMIMM
Qual : 885/5/55)5 55/:993?:: 3:5)555/5/ 55/5555:85 05/?<6<785 /555558/88
Ref-Pos : 111 121 130 140 150 159
Read (60) : CAAAATTCGA TGGTGAAGAA ATTA
Middle :
Ref (167) : NNNNNNN-NN NNNNNNNNNN NNNN
Cigar-Operator : MMMMMMMIMM MMMMMMMMMM MMMM
Qual : 7/9972//0/ /)--,)//)= ;7>9
Ref-Pos : 167 176 186
Tags :
MD : 64T0G0A12 "String for mismatching positions"
XG : 5 "Reserved for end users"
NM : 8 "Edit distance to the reference"
XM : 3 "Reserved for end users"
XN : 0 "Reserved for end users"
XO : 5 "Reserved for end users"
AS : -48 "Alignment score generated by aligner"
YT : UU "Reserved for end users"
<<<<< 630
349,350d344
< >241YN:07995:12980
< ACCCGCGCCAGGTCG
>>>>> 835
Read-Name : 241YN:07995:12980
Flag : 0
MAPQ : 3
Contig : bb_wo_ins (index:0)
Start : 47
End : 123
Strand : -->
Read-Length : 76
Cigar : 63M1D13M (N=3)
Sequence :
Read (0) : ATTTGATATT GGTCTTCCCC AAGACCATAA TTTTCTGCTA GCCAATGGGG CGATCGCCCA
Middle :
Ref (47) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN
Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM
Qual : ===8====>9 =8<><9<@@7 @9>==8==>9 ===4===>>? ;;5=7<;;;4 <>>?;=<>8>
Ref-Pos : 47 57 67 77 87 97
Read (60) : CAA-TACCTG AGTCCCG
Middle :
Ref (107) : NNNNNNNNNN NNNNNNN
Cigar-Operator : MMMDMMMMMM MMMMMMM
Qual : <<6 <<:078 775/7).
Ref-Pos : 107 117
Tags :
MD : 56G6^T1G1T1A3T0T0T1 "String for mismatching positions"
XG : 1 "Reserved for end users"
NM : 8 "Edit distance to the reference"
XM : 7 "Reserved for end users"
XN : 0 "Reserved for end users"
XO : 1 "Reserved for end users"
AS : -32 "Alignment score generated by aligner"
YT : UU "Reserved for end users"
<<<<< 835