#@ http://lindenb.github.io/jvarkit/PrettySam.html export JAVA_HOME=/home/reczko/a/tools/libs/java/jdk1.8.0_66 git clone "https://github.com/lindenb/jvarkit.git" ]0;/data/results/tools/align/jvarkit2021reczko@max:/data/results/tools/align/jvarkit2021$ cd jvarkit/ ]0;/data/results/tools/align/jvarkit2021/jvarkitreczko@max:/data/results/tools/align/jvarkit2021/jvarkit$ ./gradlew prettysam Should you cite prettysam ? https://github.com/mr-c/shouldacite/blob/master/should-I-cite-this-software.md The current reference is: PrettySam : a SAM/BAM prettifier. Lindenbaum & al. 2018. figshare. https://doi.org/10.6084/m9.figshare.5853798.v1 cd /data/images/proton/external/Sketas/run454/ -r, -R, --reference Indexed fasta Reference file. This file must be indexed with samtools faidx and with picard CreateSequenceDictionary java -jar /data/results/tools/align/picardtools/picard-tools-2.9.0/picard.jar CreateSequenceDictionary \ R=backbone_wo_insert.fa \ O=reference.dict java -jar /data/results/tools/align/picardtools/picard-tools-2.9.0/picard.jar CreateSequenceDictionary R=backbone_wo_insert.fa O=reference.dict ]0;/data/images/proton/external/Sketas/run454reczko@max:/data/images/proton/external/Sketas/run454$ java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | head -999 > foo time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | awk -f extract-insert-with-quals1.awk >baz real 19m47.532s user 35m18.851s sys 0m43.664s mv baz GSkP7_A_v2.fa -rw-r--r-- 1 reczko users 209343015 Oct 21 15:34 GSkP7_A.fa -rw-r--r-- 1 reczko users 208204231 Jan 12 18:32 GSkP7_A_v2.fa wc GSkP7_A.fa 12034444 12034444 209343015 GSkP7_A.fa ]0;/data/images/proton/external/Sketas/run454reczko@max:/data/images/proton/external/Sketas/run454$ wc GSkP7_A_v2.fa 11968702 11968702 208204231 GSkP7_A_v2.fa (/ 11968702 12034444.0) 0.9945371801140127 99.45% of the 1st version are correct time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP7_A.bam | awk -f extract-insert-with-quals2.awk > GSkP7_A_v2.fastq time java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar -R reference.fasta GSkP8_b.bam | awk -f extract-insert-with-quals2.awk > GSkP8_b_v2.fastq --no-unicode /data/images/proton/external/Sketas/run454/inserts-with-qual source get-inserts-with-qual.sh for i in *q > do > echo $i > wc $i > done GSkP7_A.fastq 23937404 23937404 314674495 GSkP7_A.fastq (/ 23937404 4) 5984351 GSkP8_b.fastq 82916172 82916172 1086074469 GSkP8_b.fastq (/ 82916172 4) 20729043 IonXpress_009_IMP1_4-5-6-7PeptideLinrary.aligned.bam.fastq 21708764 21708764 345166745 IonXpress_009_IMP1_4-5-6-7PeptideLinrary.aligned.bam.fastq (/ 21708764 4) 5427191 IonXpress_011_IMP2_A4VRound4.aligned.bam.fastq 22554428 22554428 296250575 IonXpress_011_IMP2_A4VRound4.aligned.bam.fastq (/ 22554428 4) 5638607 IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq 28394788 28394788 376865183 IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq (/ 28394788 4) 7098697 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fastq 3506904 3506904 52137054 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_015.fastq (/ 3506904 4) 876726 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq 633712 633712 9112414 R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq (/ 633712 4) 158428 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP4-Ab42.IonXpress_015.fastq.fastq 4852964 4852964 73000731 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP4-Ab42.IonXpress_015.fastq.fastq (/ 4852964 4) 1213241 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq 1919972 1919972 27981101 R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq (/ 1919972 4) 479993 /data/images/proton/external/Sketas/run454/all-inserts for i in *fa > do > echo $i > wc $i > done 009_IMP1_4-5-6-7PeptideLinrary.ins.fa 10713288 10713288 215914802 009_IMP1_4-5-6-7PeptideLinrary.ins.fa (/ 10713288 2) (/ 5356644 5427191.0) 0.9870011945406012 011_IMP2_A4VRound4.ins.fa 11158496 11158496 193996716 011_IMP2_A4VRound4.ins.fa (/ 11158496 2) (/ 5579248 5638607.0) 0.9894727545296206 015_IMP3_Ab42Round2.ins.fa 12100984 12100984 212084351 015_IMP3_Ab42Round2.ins.fa (/ 12100984 2) (/ 6050492 7098697.0) 0.8523383939334218 GSKP4-Ab42.IonXpress_015.fa 2441408 2441408 47104068 GSKP4-Ab42.IonXpress_015.fa (/ 2441408 2) (/ 1220704 876726.0) 1.3923437881390537 GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa 1767810 1767810 33806445 GSkP4-Ab42Lib.IonXpressRNA_015.ins.fa (/ 1767810 2) (/ 883905 876726.0) 1.0081884191868382 GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa 320124 320124 6100840 GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa (/ 320124 2) (/ 160062 158428.0) 1.0103138334132855 GSKP5-SOD.IonXpress_016.fa 968270 968270 18303583 GSKP5-SOD.IonXpress_016.fa (/ 968270 2) (/ 484135 479993.0) 1.0086292925105158 GSkP7_A.fa 12034444 12034444 209343015 GSkP7_A.fa (/ 12034444 2) (/ 6017222 5984351.0) 1.0054928262062168 GSkP8_b.fa 41700414 41700414 723413201 GSkP8_b.fa (/ 41700414 2) (/ 20850207 20729043.0) 1.0058451323584983 awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' /data/images/proton/external/Sketas/run454/inserts-with-qual/R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.fastq > foo diff foo ../all-inserts/GSkP4-Ab42Lib.IonXpressRNA_016.ins.fa |head 318a319,320 > >VAQJ1:01431:12606 > TGCGAGTGGCCGCGGCGGG 580a583,584 > >VAQJ1:01478:12714 > ACCTGGATCTGGGGCCTG 656a661,662 > >VAQJ1:01505:11930 > TGCTTCGTCATGTAGGCCCGG samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01431:12606 4176:VAQJ1:01431:12606 16 bb_wo_ins 111 42 8M1I77M * 0 0 TGCTTAAGTTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT 4442424/*////-67:6<<4<>0;;;6;6;:6;:;;;9948.8774993:7718287,778298999;:<7=:3::6;6==;748 AS:i:-8 XN:i:0 XM:i:0 XO:i:1 XG:i:1 NM:i:1 MD:Z:85 YT:Z:UU # seq not in read! samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n TGCGAGTGGCCGCGGCGGG 4158:VAQJ1:01422:12624 0 bb_wo_ins 6 0 105M19I70M1I9M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATTGCGAGTGGCCGCGGCGGGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCTATTGCCATTGGCAAAATTGTGAGTGAAAGAAATTA 186;=4:;6;:;;5;:;::::4::1:9949;::::>3894;;;;:5;5://*/67,7167918896==>099::98883933333)36888777,439395:8----,*,15554778288884736777.75533+/.61556-41515758776////22-335456.2247688:9.2.142443,--*---*-03 AS:i:-87 XN:i:0 XM:i:5 XO:i:2 XG:i:20 NM:i:25 MD:Z:97G51A0T1G0C30 YT:Z:UU 1533136:VAQJ1:07604:13101 0 bb_wo_ins 6 0 87M1D17M19I24M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGCGATCGCCCACAATTGCGAGTGGCCGCGGCGGGTGCTTAAGTTTTGGCACCGAATTT 39-43*/516688477776896;;3:::59:;;:<;288289<=3;;8876959782777-85:;<154:2999,43989977-4-132)/577779817673636675131-/141213::2;6651504544,411111--*--- AS:i:-77 XN:i:0 XM:i:2 XO:i:2 XG:i:20 NM:i:22 MD:Z:87^G9G28A2 YT:Z:UU samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01478:12714 6746:VAQJ1:01478:12714 0 bb_wo_ins 6 3 104M1D19M * 0 0 GTTAAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGAATATTTGATATTGGTCTTCCCCAAGACCATAATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATACCGGCGTCGGGGGTACG 6>288/894:;<>7<><>>DD7<=4<=?7<=4<==B@7=6;::-666+65;;;6::9-556+4::;;;;<4:33333)48889887.644-4995;47744.222'-++2 AS:i:-48 XN:i:0 XM:i:12 XO:i:1 XG:i:1 NM:i:13 MD:Z:97G6^T1G1T0T0A0A2T0T0T2C0A0C2 YT:Z:UU # seq not in read!, no ins samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n ACCTGGATCTGGGGCCTG 6744:VAQJ1:01478:12711 16 bb_wo_ins 58 0 19M1D22M1D10M18I85M * 0 0 GTCTTCCCCAAGACCATAATTTCTGCTAGCCAATGGGGCGACGCCCACAATACCTGGATCTGGGGCCTGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT 87838/;;;5:9514..,.)./0456840,4344-66547761*33392<;;6::5<9:790888599344948397-8993<;;7;;4=;1;<;6:5:;6;:::;<=7<3<<<7=;3<<<7=6=<0;;<5:;:?;;::4:9/991<6=7;;68 AS:i:-77 XN:i:0 XM:i:1 XO:i:3 XG:i:20 NM:i:21 MD:Z:19^T22^T2G92 YT:Z:UU samtools view R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -n VAQJ1:01505:11930 7626:VAQJ1:01505:11930 16 bb_wo_ins 111 42 85M * 0 0 TGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTT :;:695990:::58:8388188.779496::6877899?:>4?==7=<3?A<7>6<:/:;;5;:;9:::94:8/981;5:5;525 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:85 YT:Z:UU # seq not in read!, no ins java -jar /data/results/tools/align/jvarkit2021/jvarkit/dist/prettysam.jar --no-unicode -R reference.fasta R_2016_01_29_13_01_36_user_IONAS-268-GSR9_GSkP4-5_GKLex1-4_160129.GSkP4-Ab42Lib.IonXpressRNA_016.bam | grep -C30 VAQJ1:01431:12606 > foo >>>>> 4176 Read-Name : VAQJ1:01431:12606 Flag : 16 16 : read reverse strand MAPQ : 42 Contig : bb_wo_ins (index:0) Start : 111 End : 195 Strand : <-- Read-Length : 86 Cigar : 8M1I77M (N=3) Sequence : Read (0) : TGCTTAAGTT TTTGGCACCG AAATTTTAAC CGTTGAGTAC GGCCCATTGC CCATTGGCAA Middle : Ref (111) : NNNNNNNN-N NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMIM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM Qual : 4442424/*/ ///-67:6<< 4<>0;;;6;6 ;:6;:;;;99 48.8774993 :7718287,7 Ref-Pos : 111 120 130 140 150 160 Read (60) : AATTGTGAGT GAAGAAATTA ATTGTT Middle : Ref (170) : NNNNNNNNNN NNNNNNNNNN NNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMM Qual : 78298999;: <7=:3::6;6 ==;748 Ref-Pos : 170 180 190 Tags : MD : 85 "String for mismatching positions" XG : 1 "Reserved for end users" NM : 1 "Edit distance to the reference" XM : 0 "Reserved for end users" XN : 0 "Reserved for end users" >VAQJ1:01422:12624 TGCGAGTGGCCGCGGCGGG >VAQJ1:01431:12606 TGCGAGTGGCCGCGGCGGG >VAQJ1:01478:12711 ACCTGGATCTGGGGCCTG >VAQJ1:01478:12714 ACCTGGATCTGGGGCCTG >VAQJ1:01503:11905 TGCTTCGTCATGTAGGCCCGG >VAQJ1:01505:11930 TGCTTCGTCATGTAGGCCCGG awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' R_2016_04_19_11_40_37_user_IONAS-288-ITlab-PHlab-DrSkretas_160419_ITC109_PHR16-17_GSKP4-5.GSKP5-SOD.IonXpress_016.fastq.fastq > foo awk '{print ">"substr($0,2);getline;print $0; getline;getline;}' IonXpress_015_IMP3_Ab42Round2.aligned.bam.fastq > foo diff foo ../all-inserts/015_IMP3_Ab42Round2.ins.fa > ../test-prettysam.txt sort foo > ../test-prettysam.txt sort ../all-inserts/015_IMP3_Ab42Round2.ins.fa > ../test-prettysam.txt2 diff ../test-prettysam.txt ../test-prettysam.txt2 > ../test-prettysam.txt3 diff ../test-prettysam.txt ../test-prettysam.txt2 | head 26d25 < >ETB1X:00024:03588 53d51 < >ETB1X:00026:03626 271a270 > >ETB1X:00032:10869 284d282 < >ETB1X:00033:03624 309a308 > >ETB1X:00033:09631 in new: >ETB1X:00024:03588 ACCGTGGAGTGGTTG samtools view IonXpress_015_IMP3_Ab42Round2.aligned.bam.bam | grep ETB1X:00024:03588 ETB1X:00024:03588 0 bb_wo_ins 2 0 7M1D35M2D28M1D35M15I41M1I52M * 0 0 CATGGTTAAGTTATCGGTCGTCGTTCCCTCGGAGTGCAAAGATATTGATATTGGTCTTCCCCAAGACCATATTTTCTGCTAGCCAATGGGGCGATCGCCCACAATACCGTGGAGTGGTTGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTAATTGTTCTGTGTAC ////)/)0)/7379::7:=><<==7>>>> 1790102 Read-Name : ETB1X:00024:03588 Flag : 0 MAPQ : 0 Contig : bb_wo_ins (index:0) Start : 2 End : 203 Strand : --> Read-Length : 214 Cigar : 7M1D35M2D28M1D35M15I41M1I52M (N=11) Sequence : Read (0) : CATGGTT-AA GTTATCGGTC GTCGTTCCCT CGGAGTGCAA AGA--TATTG ATATTGGTCT Middle : Ref (2) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMDMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMDDMMMMM MMMMMMMMMM Qual : ////)/) 0) /7379::7:= ><<==7>>4=8<<<7@<=0999287?;==8<>><<<@9=>8=>;=<<6<<6<7<<==6=9=><:::==8>>>> 1 Read-Name : 241YN:07288:02804 Flag : 0 MAPQ : 0 Contig : bb_wo_ins (index:0) Start : 104 End : 189 Strand : --> Read-Length : 101 Cigar : 7M15I79M (N=3) Sequence : Read (0) : CCACAATTGC GGGGGGGGGG CGTGCTTAAG TTTTGGCACC GAAATTTTAA CCGTTGAGTA Middle : Ref (104) : NNNNNNN--- ---------- --NNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMIII IIIIIIIIII IIMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM Qual : <7<;829*// =<::::::9$ 8797:<9<8< >>>4=8<<<7 @<=0999287 ?;==8<>><< Ref-Pos : 104 119 129 139 Read (60) : CGGCCCATTG CCCATTGGCA AAATTGTGAG TGAAGAAATT A Middle : Ref (149) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN N Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM M Qual : <@9=>8=>;= <<6<<6<7<< ==6=9=><:: :==8>=;;<<<<<8>8><4=;93336664<<::1903(((((((.444,.7<9584882;<;6;;5)777*88*<:<4<6=>8<<;<;<<9=7=885;73:961/)/,'---)44-)//;;4=9*9990<9 AS:i:-76 XN:i:0 XM:i:4 XO:i:3 XG:i:16 NM:i:20 MD:Z:56G72G10T1A1 YT:Z:UU >>>>> 445 Read-Name : 241YN:06420:11476 Flag : 16 16 : read reverse strand MAPQ : 0 Contig : bb_wo_ins (index:0) Start : 47 End : 190 Strand : <-- Read-Length : 160 Cigar : 20M1I44M14I16M1I64M (N=7) Sequence : Read (0) : ATTTGATATT GGTCTTCCCC AAAGACCATA ATTTTCTGCT AGCCAATGGG GCGATCGCCC Middle : Ref (47) : NNNNNNNNNN NNNNNNNNNN -NNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM IMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM Qual : ;6=:::995: 7<;<7;3984 '55667;:84 83>>=;;<<< <<8>8><4=; 93336664<< Ref-Pos : 47 57 76 86 96 Read (60) : ACAATTGCGC TCGGACAGCT GCTTAAGTTT TGGCACCCGA AATTTTAACC GTTGAGTACG Middle : Ref (106) : NNNNN----- ---------N NNNNNNNNNN NNNNN-NNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMIIIII IIIIIIIIIM MMMMMMMMMM MMMMMIMMMM MMMMMMMMMM MMMMMMMMMM Qual : ::1903(((( (((.444,.7 <9584882;< ;6;;5)777* 88*<:<4<6= >8<<;<;<<9 Ref-Pos : 106 112 122 131 141 Read (120) : GCCCATTGCC CATTGGCAAA ATTGTAAGTG AAGAAAATTA Middle : Ref (151) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM Qual : =7=885;73: 961/)/,'-- -)44-)//;; 4=9*9990<9 Ref-Pos : 151 161 171 181 #@ deletions in ref-seq lead to - and " " in readseq and qual, respectively: Read-Name : 241YN:01016:11916 Read (0) : CTGGCGCGCC TCGTCCGGGC GG-AAAATCC TGTCTGGGCA AAGATTG-CG GCCAGGTCAG Middle : Ref (3,205) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMDMMMMMMM MMMMMMMMMM MMMMMMMDMM MMMMMMMMMM Qual : 883;;;:747 7@=707/988 6< /<<=<8= :;:<<2::91 98:6/7: <8 =9==9=<< xad.csv cd /data/results/tools/align/jvarkit/my-jvarkit-master/ /mnt/fix/c/solid.data_results.recovered.data/tools/align/jvarkit/my-jvarkit-master/src/main/java/com/github/lindenb/jvarkit/tools/biostar/Biostar59647.java export JAVA_HOME=/home/reczko/a/tools/libs/java/jdk1.8.0_66 make biostar59647 /data/results/tools/align/jvarkit/my-jvarkit-master/dist/biostar59647.jar sem --id `hostname` -j $THREADS "cat backbone_wo_insert.dict $i | java -Djvarkit.log.name=biostar59647 -Dfile.encoding=UTF8 -Xmx500m -cp "/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/apache-ant-1.8.2-bzip2.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-compress-1.4.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-jexl-2.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/commons-logging-1.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/htsjdk-2.1.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/ngs-java-1.2.2.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/snappy-java-1.0.3-rc3.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/htsjdk-8dd4559dced3a243dde094415a9974c03aceb3e3/dist/xz-1.5.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/commons-cli/commons-cli/1.3.1/commons-cli-1.3.1.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/org/slf4j/slf4j-api/1.7.13/slf4j-api-1.7.13.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/jvarkit-master/lib/org/slf4j/slf4j-simple/1.7.13/slf4j-simple-1.7.13.jar:/mnt/fix/b/solid.data_results.recovered.data/tools/align/jvarkit/my-jvarkit-master/dist/biostar59647.jar" com.github.lindenb.jvarkit.tools.biostar.Biostar59647 -r backbone_wo_insert.fa foo ref:9-20 | xmllint --format - |awk -f /data/images/proton/run149/extract_insert1.awk > $i.ins" # xml insert example: 241YN:00258:06961 ATTTGATATTGGTCTTCCCCAAGACCGATAATTTTCTGCTAGCCAATGGGCGATCGCCCACAATAGCGTCTGGTCGCAGTGCTTAAGTTTTGGCACCGAAATTTTAACCGTTGAGTACGGCCCCATTGCCCATTGGCAAAATTGTGAGTGAAGAAATTTA 0 0 bb_wo_ins 47 26M1I20M1D17M15I41M1I39M #@ verify 1st vs 2nd solution diff GSkP7_A.fa baz > foo 43,44d42 < >241YN:02689:12766 < ACCGGGACGGGCTAG >>>>> 599 Read-Name : 241YN:02689:12766 Flag : 16 16 : read reverse strand MAPQ : 8 Contig : bb_wo_ins (index:0) Start : 111 End : 189 Strand : <-- Read-Length : 82 Cigar : 37M1I24M1I7M1I11M (N=7) Sequence : Read (0) : TGCTTAAGTT TTGGCACCGA AATTTTAACC GTTGAGTAAC GGCCCATTGC CCATTGGCAA Middle : Ref (111) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNN-NN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMIMM MMMMMMMMMM MMMMMMMMMM Qual : .4.).)..(9 :85665146) 96(777/7/7 7087:;:497 3709//*:<5 =::7<8<;5< Ref-Pos : 111 121 131 141 150 160 Read (60) : AATTTGTGAG CTGAAGGAAT TA Middle : Ref (170) : NN-NNNNNNN -NNNNNNNNN NN Cigar-Operator : MMIMMMMMMM IMMMMMMMMM MM Qual : ::5<;;:888 8762-(*(+( +) Ref-Pos : 170 188 Tags : MD : 73A5 "String for mismatching positions" XG : 3 "Reserved for end users" NM : 4 "Edit distance to the reference" XM : 1 "Reserved for end users" XN : 0 "Reserved for end users" XO : 3 "Reserved for end users" AS : -26 "Alignment score generated by aligner" YT : UU "Reserved for end users" <<<<< 599 57,58d54 < >241YN:04372:12362 < ACCGGGCACGACATG >>>>> 630 Read-Name : 241YN:04372:12362 Flag : 16 16 : read reverse strand MAPQ : 0 Contig : bb_wo_ins (index:0) Start : 111 End : 189 Strand : <-- Read-Length : 84 Cigar : 16M1I28M1I7M1I3M1I9M1I16M (N=11) Sequence : Read (0) : TGCTTAAGTT TTGGCACCCG AAATTTTAAC CGTTGAGTAC GGCCCAATTG CCCGATTGGG Middle : Ref (111) : NNNNNNNNNN NNNNNN-NNN NNNNNNNNNN NNNNNNNNNN NNNNN-NNNN NNN-NNN-NN Cigar-Operator : MMMMMMMMMM MMMMMMIMMM MMMMMMMMMM MMMMMMMMMM MMMMMIMMMM MMMIMMMIMM Qual : 885/5/55)5 55/:993?:: 3:5)555/5/ 55/5555:85 05/?<6<785 /555558/88 Ref-Pos : 111 121 130 140 150 159 Read (60) : CAAAATTCGA TGGTGAAGAA ATTA Middle : Ref (167) : NNNNNNN-NN NNNNNNNNNN NNNN Cigar-Operator : MMMMMMMIMM MMMMMMMMMM MMMM Qual : 7/9972//0/ /)--,)//)= ;7>9 Ref-Pos : 167 176 186 Tags : MD : 64T0G0A12 "String for mismatching positions" XG : 5 "Reserved for end users" NM : 8 "Edit distance to the reference" XM : 3 "Reserved for end users" XN : 0 "Reserved for end users" XO : 5 "Reserved for end users" AS : -48 "Alignment score generated by aligner" YT : UU "Reserved for end users" <<<<< 630 349,350d344 < >241YN:07995:12980 < ACCCGCGCCAGGTCG >>>>> 835 Read-Name : 241YN:07995:12980 Flag : 0 MAPQ : 3 Contig : bb_wo_ins (index:0) Start : 47 End : 123 Strand : --> Read-Length : 76 Cigar : 63M1D13M (N=3) Sequence : Read (0) : ATTTGATATT GGTCTTCCCC AAGACCATAA TTTTCTGCTA GCCAATGGGG CGATCGCCCA Middle : Ref (47) : NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN NNNNNNNNNN Cigar-Operator : MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM MMMMMMMMMM Qual : ===8====>9 =8<><9<@@7 @9>==8==>9 ===4===>>? ;;5=7<;;;4 <>>?;=<>8> Ref-Pos : 47 57 67 77 87 97 Read (60) : CAA-TACCTG AGTCCCG Middle : Ref (107) : NNNNNNNNNN NNNNNNN Cigar-Operator : MMMDMMMMMM MMMMMMM Qual : <<6 <<:078 775/7). Ref-Pos : 107 117 Tags : MD : 56G6^T1G1T1A3T0T0T1 "String for mismatching positions" XG : 1 "Reserved for end users" NM : 8 "Edit distance to the reference" XM : 7 "Reserved for end users" XN : 0 "Reserved for end users" XO : 1 "Reserved for end users" AS : -32 "Alignment score generated by aligner" YT : UU "Reserved for end users" <<<<< 835