]0;/data/images/proton/DKlab/mr/parclip/shrimpreczko@max:/data/images/proton/DKlab/mr/parclip/shrimp$ for i in /data/images/proton/DKlab/mr/parclip/raw/*/*.csfasta > do > echo $i > grep -C 2 T03233022020002200133010120230200221013012333332201 $i > done /data/images/proton/DKlab/mr/parclip/raw/0hrep1/ugc_604_1_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/0hrep2/ugc_604_4_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/0hrep3/ugc_604_7_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/2hrep1/ugc_604_2_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/2hrep2/ugc_604_5_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/2hrep3/ugc_604_8_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/6hrep1/ugc_604_3_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/6hrep2/ugc_604_6_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/6hrep3/ugc_604_9_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/IFN/ugc_604_10_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/IGG/ugc_604_12_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/IL4/ugc_604_11_F3.csfasta T22010303131123323311123311122310032003110033321323 >100_1000_159_F3 T03233022020002200133010120230200221013012333332201 >100_1000_3528_F3 T32103031311201203103313323301121133122310303300103 /data/images/proton/DKlab/mr/parclip/raw/saet/IL4saet1_T_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/saet/IL4saet1_U_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_10/ugc_604_10_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_11/ugc_604_11_F3.csfasta T22010303131123323311123311122310032003110033321323 >100_1000_159_F3 T03233022020002200133010120230200221013012333332201 >100_1000_3528_F3 T32103031311201203103313323301121133122310303300103 /data/images/proton/DKlab/mr/parclip/raw/ugc_604_12/ugc_604_12_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_1/ugc_604_1_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_2/ugc_604_2_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_3/ugc_604_3_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_4/ugc_604_4_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_5/ugc_604_5_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_6/ugc_604_6_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_7/ugc_604_7_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_8/ugc_604_8_F3.csfasta /data/images/proton/DKlab/mr/parclip/raw/ugc_604_9/ugc_604_9_F3.csfasta python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/splitreads.py 500000 /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta cd /data/results/reference/mmu/mm9/shrimp-mir/ ln -s /data/results/reference/mmu/mm9/mRNA-stranded/Mus_musculus.NCBIM37.64-toMM9.fa mm9-stranded-mRNA.fa export SHRIMP_FOLDER=/data/results/tools/align/SHRiMP_2_2_3 python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 00111111001111111100,00111111110011111100,00111111111100111100,00111111111111001100,00111111111111110000 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa cd /data/results/reference/mmu/mm9/shrimp python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 00111111011111111100,00111111110111111100,00111111111101111100,00111111111111011100,00111111111111101100,00111111111111110100,00111110111111111100,00111101111111110000,00111011111111110000,00110111111111110000,00101111111111110000 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa #16 masks mMm (m=mismatch,M=Match) python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 010111111111111111,101011111111111111,110101111111111111,111010111111111111,111101011111111111,111110101111111111,111111010111111111,111111101011111111,111111110101111111,111111111010111111,111111111101011111,111111111110101111,111111111111010111,111111111111101011,111111111111110101,1111111111111010 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa #16 masks mmm python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 000111111111111111,100011111111111111,110001111111111111,111000111111111111,111100011111111111,111110001111111111,111111000111111111,111111100011111111,111111110001111111,111111111000111111,111111111100011111,111111111110001111,111111111111000111,111111111111100011,111111111111110001,1111111111111000 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa #15 masks mmmm python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 000011111111111111,100001111111111111,110000111111111111,111000011111111111,111100001111111111,111110000111111111,111111000011111111,111111100001111111,111111110000111111,111111111000011111,111111111100001111,111111111110000111,111111111111000011,111111111111100001,111111111111110000 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa cd shrimp-14mmmmm/ #14 masks mmmmm python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 000001111111111111,100000111111111111,110000011111111111,111000001111111111,111100000111111111,111110000011111111,111111000001111111,111111100000111111,111111110000011111,111111111000001111,111111111100000111,111111111110000011,111111111111000001,111111111111100000 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa cd shrimp-18m/ #18 masks m python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 011111111111111111,101111111111111111,110111111111111111,111011111111111111,111101111111111111,111110111111111111,111111011111111111,111111101111111111,111111110111111111,111111111011111111,111111111101111111,111111111110111111,111111111111011111,111111111111101111,111111111111110111,111111111111111011,111111111111111101,111111111111111110 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa cd shrimp-16m/ #16 masks m python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 011111111111111,101111111111111,110111111111111,111011111111111,111101111111111,111110111111111,111111011111111,111111101111111,111111110111111,111111111011111,111111111101111,111111111110111,111111111111011,111111111111101,111111111111110,111111111111111 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa cd shrimp-15mMm/ #15 masks mMm python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 010111111111111,101011111111111,110101111111111,111010111111111,111101011111111,111110101111111,111111010111111,111111101011111,111111110101111,111111111010111,111111111101011,111111111110101,111111111111010,111111111111101,111111111111111 --h-flag --shrimp-mode cs mm9-stranded-mRNA.fa python /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/utils/project-db.py --seed 010111111111111,101011111111111,110101111111111,111010111111111,111101011111111,111110101111111,111111010111111,111111101011111,111111110101111,111111111010111,111111111101011,111111111110101,111111111111010,111111111111101,111111111111111 --h-flag --shrimp-mode cs mapkap1.fa head -50000 /data/images/proton/DKlab/mr/parclip/raw/2hrep1/*.csfasta > test.csfasta $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-18m/mm9-stranded-mRNA-cs test.csfasta -N 20 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% --shrimp-format -e -255 -f -255 -P >test-18m.out 2>test-18m.log Reads Matched: 24,968 (99.8720%) ... with QV >= 10: 24,968 (99.8720%) Reads Dropped: 0 (0.0000%) Total Matches: 231,547 Avg Hits/Matched Read: 9.27 Duplicate Hits Pruned: 155 nTC 4053 nonTC 8790 TCrate 31.558 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-15mMm/mm9-stranded-mRNA-cs test.csfasta -N 20 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% --shrimp-format -e -255 -f -255 -P >test-15mMm.out 2>test-15mMm.log Reads Matched: 24,996 (99.9840%) ... with QV >= 10: 24,996 (99.9840%) Reads Dropped: 0 (0.0000%) Total Matches: 249,960 Avg Hits/Matched Read: 10.00 Duplicate Hits Pruned: 376 awk -f shrimp2TtoC1.awk test-18m.out | tail nTC 7339 nonTC 13438 TCrate 35.3227 >113_3385_3152_F3 ENSMUST00000098950 + 5252 5295 2 47 50 188 6x(A)2x5x1x1x4x5x2x2x1(C)15 G: 5252 TCCCTTG-TTGTACACTAGATGGCTAGACAT-TTTTGTATATTAGTGTGT 5295 ||||||x||X||||XXX|||X||||X|X|X ||||||||||||||| T: -CCCTTGaTTgTACActaGATgGCTAgAcAtCTTTTGTATATTAGTG--- R: 2 T31002010303131123122320323322122200011333303211321 47 ACCCTTGGCCGTACAGCAGATCCGATAGACTCTTTTGTATATTAGTGCTG R: 2 T31002011303131123122320323322122200011333303211321 47 ACCCTTGTAATGCACTACTCGAATCGCTCAGAGGGGTGCGCGGCTGTAGT R: 2 T31002012303131123122320323322122200011333303211321 47 (single color err: 0->1) ACCCTTGATTACGTGATGAGCTTAGCGAGTCTCCCCACGCGCCGACATCA R: 2 T31002012301131123122320323322122200011333303211321 47 (single color err: 3->1) ACCCTTGATTGTACAGCAGATCCGATAGACTCTTTTGTATATTAGTGCTG $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs 2hr3/9500000_to_9999999.csfasta -N 8 -n 1 -o 2 -F -v 50% -h 50% -P >map.out 2>map.log Reads Matched: 9 (0.0337%) ... with QV >= 10: 5 (0.0187%) Reads Dropped: 0 (0.0000%) Total Matches: 15 Avg Hits/Matched Read: 1.67 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs 2hr3/9500000_to_9999999.csfasta -N 8 -n 1 --local -o 2 -F -v 50% -h 50% -P >map2.out 2>map2.log Reads Matched: 15 (0.0562%) ... with QV >= 10: 15 (0.0562%) Reads Dropped: 0 (0.0000%) Total Matches: 26 Avg Hits/Matched Read: 1.73 Duplicate Hits Pruned: 3 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs 2hr3/9500000_to_9999999.csfasta -N 8 -n 1 --local -o 2 -F -v 50% -h 50% -r 30% -w 150% -P >map3.out 2>map3.log Reads Matched: 16 (0.0599%) ... with QV >= 10: 16 (0.0599%) Reads Dropped: 0 (0.0000%) Total Matches: 28 Avg Hits/Matched Read: 1.75 Duplicate Hits Pruned: 3 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 15 -n 1 --local -o 2 -F -v 50% -h 50% -r 30% -w 150% -P >map3.out 2>map3.log Reads Matched: 5,484 (0.0576%) ... with QV >= 10: 5,484 (0.0576%) Reads Dropped: 0 (0.0000%) Total Matches: 9,888 Avg Hits/Matched Read: 1.80 Duplicate Hits Pruned: 1,251 #SAM output ref: ACCAATTTTAA GTTTCTTAAGTTATTTCCTTTAAAGTATATATTAAAGTGAAACCGAAGT TACTAATTcTTAAGTTAttTCcTTTAAAATaTATAT 485_3319_1817_F3 0 ENSMUST00000098950 1445 255 3H11M1D1M5I30M * 0 0 ACCAATTTTAATACTAATTCTTAAGTTATTTCCTTTAAAATATATAT * AS:i:158 NM:i:7 CS:Z:T23131010303130331230303203021032202120030003233333 CM:i:7 XX:Z:ACCAATTttAA-TACTAATTcTTAAGTTAttTCcTTTAAAATaTATAT ACCAATTTTAA TACTAATTCTTAAGTTATTTCCTTTAAAATATATAT ACCAATTttAA-TACTAATTcTTAAGTTAttTCcTTTAAAATaTATAT /data/results/tools/align/SHRiMP_2_2_3/SHRiMP_2_2_3/README --shrimp-format >947_1567_1384_F3 reftig_991 + 22901 22923 3 25 25 2020 18x2x3 FMT: readname contigname strand contigstart contigend readstart readend readlength score editstring 'contigstart' Start of alignment in genome (beginning with 1, not 0). 'contigend' End of alignment in genome (inclusive). 'readstart' Start of alignment in read (beginning with 1, not 0). 'readend' End of alignment in read (inclusive). 'readlength' Length of the read in bases/colours. 'score' Alignment score 'editstring' = size of a matching substring = mismatch, value is the tag letter () = gap in the reference, value shows the letters in the tag - = one-base gap in the tag (i.e. insertion in the reference) x = crossover (inserted between the appropriate two bases) A perfect match for 25-bp tags is: "25" A SNP at the 16th base of the tag is: "15A9" A four-base insertion in the reference: "3(TGCT)20" A four-base deletion in the reference: "5----20" Two sequencing errors: "4x15x6" (i.e. 25 matches with 2 crossovers) /data/results/tools/align/SHRiMP_2_2_3/bin/shrimp2sam usage: shrimp2sam shrimp_output_file reads_file genome_file1 [genome_file2 ...] >1_54_1454_F3 ENSMUST00000026406 - 907 935 6 34 50 156 2x1A2x2x2x12x7 G: 935 GCAACTGCGGCGCTGTACTCGAAGATAAGTATCGGGCTGCCTGACCCCCT 907 ||X ||X|X|X|||||||||||X|||||| T: -----TGcAGCgCtGtACTCGAAGATAaGTATCG---------------- reference seq, (rev.comp => refA SNPto readG CGATACTTATCTTCGAGTACAGCGCCGCATCATGAACCTGA cgatacttatcttcgagtacagcgctgca >ENSMUST00000026406 gene=Rdh5 CDS=182-1135 ACTCCTGGTCCACAGCGAGAGTCCACCCACTCCAGACTTTGGCCTTAGCTGTAGCTAGTGTGGGAGCCTGGGAAGTCTAGGAGCAAAGTCTCTCAAGCAGACAGAAAGCTACAGCTTCACACATTGTGTTGCCTGCCAGCTTTCCCCAGAGGCTGCCCTCAGCAGGGCATCTCATCCCATCATGTGGCTGCCTCTGCTTCTGGGTGCCTTGCTGTGGGCAGTGCTGTGGTTGCTCAGAGACCGGCAGAGCCTGCCGGCCAGTGATGCTTTCATCTTCATCACTGGCTGTGACTCTGGCTTTGGGCGCCTTCTGGCACTGCAACTTGACCAGAAGGGCTTCCAAGTCCTGGCCGGCTGCCTGACCCCCTCTGGAGCAGAAGACCTGCAGCAGATGGCCTCCTCCCGCCTCCACACAACACTACTGGATATCACTGATCCCCAGAATGTCCAGCAAGTTGCCAAGTGGGTGAAGACACGTGTTGGAGAAACTGGACTTTTTGGTCTGGTGAATAACGCTGGCGTAGCTGGTATCATCGGGCCCACACCATGGCTAACACAGGATGATTTCCAGAGAGTACTGAGTGTGAACACACTGGGGCCCATCGGTGTCACCCTTGCCCTGCTGCCCCTGCTACAGCAGGCCAGGGGTCGGGTGGTCAACATCACCAGTGTCTTGGGCCGCATAGCAGCCAATGGCGGGGGCTACTGTGTCTCCAAGTTTGGCCTGGAGGCCTTCTCTGACAGCCTGAGGCGGGACATGGCTCCGTTCGGAGTACAAGTCTCCATTGTGGAGCCTGGCTTCTTTCGAACCCCTGTGACCAACCTGGAGAGTCTGGAGAGCACCCTGAAGGCTTGTTGGGCCCGGCTACCTCCAGCTATACAGGCCCACTACGGGGAAGCCTTCCTCGATACTTATCTTCGAGTACAGCGCCGCATCATGAACCTGATCTGTGACCCAGAACTAACGAAGGTGACCAGCTGCCTGGAGCATGCCCTGACTGCTCGCCACCCCCGAACACGCTACAGCCCAGGCTGGGATGCCAAGCTGCTCTGGCTGCCTGCCTCCTACCTTCCAGCCAGGGTGGTGGATGCTGTGCTCACCTGGATCCTTCCCCGGCCCGCCCAGTCAGTCTCCTGATTCCAGCTTTACAGCAAGAGGCTGATTTTGAAAAGCAAGGCATCTATTTCTGTGTCTACCCAGTGCTGCCTGGTTTCTGATACCAATTAGGCTCTCAATAAATATGTATTGCTTTAAATC ELAVl1: >28217 chr8:4284782-4325100 ENSMUST00000098950 >mm9_dna range=chr8:4289661-4289686 5'pad=0 3'pad=0 strand=- repeatMasking=none GCAATGGCCATAGCAAGTCTGAACGG CAAGTCT at pos 1154, T at 1158, T at 1160 l16 AAGCTGCAATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACAGGTTTCCTTCAA #20nt match wo mm /data/images/proton/DKlab/mr/parclip/shrimp/map8.out 805_1834_1851_F3 16 ENSMUST00000098950 1142 255 9H20M21H * 0 0 GTCTGAACGGCTACCGCCTG * AS:i:160 NM:i:0 CS:Z:T23.0201.2231.2023.123212033013232310012211.3.32323 CM:i:2 XX:Z:CAGGCGGTAGCcGTTcAGAC CAGGCGGTAGCcGTTcAGAC GGTAGCCGTTCAGACTTG =ref_rev_comp GTCTGAACGGCTACCGCCTG GTCTGAACGGCTACCGCCTG=ref #without mm reczko@fix:/data/images/proton/DKlab/mr/parclip/shrimp$ $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-14mmmmm/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 19 -n 1 --local -o 2 -F -v 20% -h 20% -r 30% -w 150% >map6.out 2>map6.log 810_3040_1271_F3 0 ENSMUST00000098950 1127 255 14H21M15H * 0 0 CAATGGGCATAGCAAGTCTGA * AS:i:156 NM:i:1 CS:Z:T100302012303331123100313323102122121200221.2.33202 CM:i:1 XX:Z:CAaTGGGCATAGCAAGTCTGA /data/images/proton/DKlab/mr/parclip/shrimp/map10.out >307_2261_2440_F3 ENSMUST00000098950 + 1141 1154 33 46 50 100 1x7x6 G: 1141 CAAACTATGAAGAAGCTGCAATGGCCATAGCAAGTCTGAACGGCTACCGC 1154 |X||||||X||||| T: --------------------------------AgTCTGAAcGGCTA---- R: 33 T23100013303101123223310023322122301221200303230110 46 >470_3763_2842_F3 ENSMUST00000098950 + 1129 1147 1 19 50 150 x4x15 G: 1129 ATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACA 1147 X|||X|||||||||||||| T: aTGGcCATAGCAAGTCTGA------------------------------- R: 1 T23101013323102122122320223322133221333122323021232 19 >579_1668_2448_F3 ENSMUST00000098950 + 1136 1155 8 27 50 100 2x4x3x4x1x6 G: 1136 ATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACA 1155 ||X|||X||X|||XX||||| T: -------TAgCAAgTCtGAAcgGCTAC----------------------- R: 8 T003020103031011231203203231022230101332101..3303.2 27 TTAAGGTTAATGGTGATGAATCCGATGGAGATTGGTATCAAC (nt wo X) >677_3224_4503_F3 ENSMUST00000098950 + 1129 1154 1 26 50 132 x4A12x2G3x3 G: 1129 ATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACA 1154 X||| ||||||||||||X| |||X|| T: aTGGACATAGCAAGTCTgAGCGGcTA------------------------ R: 1 T23102113323102122322330023322233321222.022...31... 26 >888_2139_2726_F3 ENSMUST00000098950 + 1129 1147 1 19 50 100 x7x1x5x6 G: 1129 ATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACA 1147 X||||||XX||||X||||| T: aTGGCCAtaGCAAgTCTGA------------------------------- R: 1 T23.03010123101122122321223312133121313.23021.12333 19 >895_2705_3912_F3 ENSMUST00000098950 + 1133 1147 5 19 50 150 15 G: 1133 ATGGCCATAGCAAGTCTGAACGGCTACCGCCTGGGGGACAAAATTTTACA 1147 ||||||||||||||| T: ----CCATAGCAAGTCTGA------------------------------- R: 5 T23102013323102122122320223322133321330300303122320 19 >287_2476_908_F3 ENSMUST00000098950 - 1134 1147 31 44 50 100 x7x7 TTTGTCCCCCAGGCGGTAGCCGTTCAGACTTGCTATGGCCATTGCAGCTTCTTCATAGTT (ref,recComp) G: 1147 TTGACACATTCTTAGGGGAGCAACAGAACATCAGACTTGCTATGAAATAA 1134 X||||||X|||||| T: ------------------------------tCAGACTtGCTATG------ R: 31 T13020103031311231203202333221232122123132331320302 44 >734_3811_172_F3 ENSMUST00000098950 - 1131 1148 32 49 50 120 4x2x8x4 G: 1148 ATTTGACACATTCTTAGGGGAGCAACAGAACTTCAGACTTGCTATGGCCT 1131 ||||X|X|||||||X||| T: -------------------------------TTCAgAcTTGCTATgGCC- R: 32 T2022103031311231201233101023211302132320132333030. 49 /data/images/proton/DKlab/mr/parclip/shrimp/2hrep2.out >175_608_1669_F3 ENSMUST00000098950 - 1086 1107 21 42 50 126 4x1x2x7T7 (CtoT) G: 1107 GGACCCTCCCACTTCCACCTCATGGTCACAAAACCAAACCCTTTACATTT 1086 ||||XX|X|||||| ||||||| T: --------------------CATGgtCaCAAAACTAAACCCT-------- R: 21 T10303131123123203111013122231100012300100221123312 42 >35_1213_4196_F3 ENSMUST00000098950 - 1154 1182 19 47 50 130 4x3x3x6x3x1x4x2x3 TTTGTCCCCCAGGCGGTAGCCGTTCAGACTTGCTATGGCCATTGCAGCTTCTTCATAGTT (ref,recComp) G: 1182 TACTGTTGAGTGGCATTGAACCTGTAAAATTTTGTCCCCCAGGCGGTAGA 1154 ||||X||X||X|||||X||XX|||X|X|| T: ------------------AACCtGTaAAaTTTTGtCCccCAGgCgGT--- R: 19 T23102010300211131130101112002300013203101223101112 47 #TtoC on plus >52_2577_1782_F3 ENSMUST00000098950 + 4133 4171 9 47 50 188 4x3x1G3A1x2x1T3x18 G: 4133 GCCATCCTGGCTGTGCCTCTGGTCACAGAAGTGTGACTATATCTCCCTCC 4171 ||||X||X ||| |X|X |||X||||||||||||||||| T: --------GGCTgTGcGTCTAGtCaTAGAaGTGTGACTATATCTCCC--- R: 9 T23213012303231123122320223322121111212333322200032 47 ^p4154 >57_225_4873_F3 ENSMUST00000098950 + 1961 1993 14 46 50 182 4x2T2x1C3x2x17 G: 1961 AAGAGTTTCAGGGCAGCTCCAGTATATTCCAGAGTCAAACCTGAGCTCCA 1993 ||||X| ||X |||X|X|||||||||||||||| T: -------------CAGCtCTAGtCTATtCcAGAGTCAAACCTGAGC---- R: 14 T31002010303232123122320223322112221210010212231.21 46 ^ #in /data/images/proton/DKlab/mr/parclip/shrimp/map9.out >57_225_4873_F3 ENSMUST00000098950 + 1961 1993 14 46 50 182 4x2T2x1C3x2x17 G: 1961 AAGAGTTTCAGGGCAGCTCCAGTATATTCCAGAGTCAAACCTGAGCTCCA 1993 ||||X| ||X |||X|X|||||||||||||||| T: -------------CAGCtCTAGtCTATtCcAGAGTCAAACCTGAGC---- R: 14 T31002010303232123122320223322112221210010212231.21 46 ^p1955 >52_2577_1782_F3 ENSMUST00000098950 + 4133 4171 9 47 50 188 4x3x1G3A1x2x1T3x18 G: 4133 GCCATCCTGGCTGTGCCTCTGGTCACAGAAGTGTGACTATATCTCCCTCC 4171 ||||X||X ||| |X|X |||X||||||||||||||||| T: --------GGCTgTGcGTCTAGtCaTAGAaGTGTGACTATATCTCCC--- R: 9 T23213012303231123122320223322121111212333322200032 47 ^p4150 >159_2582_4600_F3 ENSMUST00000098950 + 4417 4455 7 45 50 202 x5x3x3x3G4T1x6x12 G: 4417 CAGAGAAATTAATGAGCCTCCAAGACAACGGCAGTTACCTATATGGGGTT 4455 X||||X||X||X|| |||| |X|||||X||||||||||| T: ------aATTAaTGaGCcTCGAAGATAaCGGCAgTTACCTATATG----- R: 7 T32302020303131123122320223331303111031023333110322 45 ^p4436 >288_927_4859_F3 ENSMUST00000098950 + 3394 3427 15 48 50 166 2x6x2x1x1T2x2x2x15 G: 3394 AAAGGACCTGGTGCTCAGAGCTAGCACCTTAATCACTATACCATCCCTAC 3427 ||X|||||X|XX ||X|X|X|||||||||||||| T: --------------TCaGAGCTaGcaTCTtAaTcACTATACCATCCCT-- R: 15 T01001013323111123222320223322233331123331013200211 48 >202_3874_3535_F3 ENSMUST00000098950 + 61 98 2 39 50 184 4T1G5T1T6x1x1x15 G: 61 TCGCCGTCGCCGTCGCCGTCGCCGTCGCCGCTACCGCTACCGCTACCGCT 98 |||| | ||||| | ||||||XXX|||||||||||||| T: -CGCCTTGGCCGTTGTCGTCGCcgtCGCCGCTACCGCTA----------- R: 2 T31330201030310112312332222330332310332333010202010 39 >252_2740_3853_F3 ENSMUST00000098950 + 1973 1996 18 41 50 186 4T6x13 G: 1973 CAGGGCAGCTCCAGTATATTCCAGAGTCAAACCTGAGCTCCAGGCATGCC 1996 |||| ||||||X|||||||||||| T: -----------------ATTCTAGAGTCaAACCTGAGCTCC--------- R: 18 T22010301131123122230223222123001021223220023021023 41 >331_3082_3278_F3 ENSMUST00000098950 + 3549 3586 9 50 50 201 3x13x2T7x2x1x3(ACGC)6 G: 3549 TCTGGGTAGCACTCTAGATAAAAGTCCCAGTGCATTCCTT----GATGAG 3586 |||X||||||||||||X| |||||||X|XX|| |||||| T: --------GCAcTCTAGATAAAAGtCTCAGTGCAtTccTTACGCGATGAG R: 9 T03010103131022232233000232221211312003203133323122 50 >357_381_154_F3 ENSMUST00000098950 + 55 96 1 42 50 184 4x1G6T1T2x2x2x1C10x9 G: 55 CGCCGTCGCCGTCGCCGTCGCCGTCGCCGTCGCCGCTACCGCTACCGCTA 96 ||||X |||||| | ||X|X|X ||||||||||X|||||||| T: CGCCgGCGCCGTTGTCGtCgCcCTCGCCGTCGCcGCTACCGC-------- R: 1 T233000333031011233223202233031233233231033032322.3 42 #AtoG on minus in /data/images/proton/DKlab/mr/parclip/shrimp/map9.out >67_2840_3909_F3 ENSMUST00000098950 - 2964 3005 3 49 50 144 4x5x4x5x2C2x2x1A6x1(AGTGA)5x3 G: 3005 CCGAACAGGCAGCCTCAATTTGTACATGGGCTTCTT-----GATACACTG 2964 ||||X||||X|||X||||X| ||X|X ||||||X |||||X|| T: --GAACaGGCAgCCTcAATTtGCACaTgAGCTTCTtAGTGAGATACaCT- R: 3 T00120102031330201030313112322232022332112223312120 49 ^p2984 >29_333_4938_F3 ENSMUST00000098950 - 3544 3578 7 41 50 176 2x3A4x1x1x2x9x9x3 G: 3578 AAATTTGGAATGCACTGGGACTTTTATCTAGAGTGCTACCCTTTTACCTT 3544 ||X|| ||||XXX|X||||||||X||||||||X|| T: ------GGaATACACTgggAcTTTTATCTaGAGTGCTAcCC--------- R: 7 T23302010103311123122320003322122211323200313320032 41 ^p3573 >70_4278_3261_F3 ENSMUST00000098950 - 3545 3579 5 39 50 196 1x2x8x1x1x12x4A5 G: 3579 AATTAGGAATGCACTGGGACTTTTATCTAGAGTGCTACCGTTTTACCTTT 3545 |X|X|||||||XXX|||||||||||X||| ||||| T: ----AgGaATGCACTgggACTTTTATCTAgAGTACTACC----------- R: 5 T21132103031311231221200033223322131231010101220... 39 ^p3550 >142_3750_4784_F3 ENSMUST00000098950 - 3545 3581 4 40 50 182 1x4x7x1x1A2x3C11x5 G: 3581 AAACAAGGAATGCACTGGGACTTTTATCTAGAGTGCTACCGTTTTACCTT 3545 |X|||X||||||XX ||X|| |||||||||||X|||| T: ---CaAGGaATGCACtgAGAcTTCTATCTAGAGTGcTACC---------- R: 4 T10302020303131113222320223322322211023101012232212 40 ^p3567 $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 15 -n 1 --local -o 2 -F -v 20% -h 20% -r 30% -w 150% >map4.out 2>map4.log 453_527_1470_F3 0 ENSMUST00000098950 1127 255 8H18M24H * 0 0 CAATGGCCATAGCAAGTC * AS:i:160 NM:i:0 CS:Z:T20011303010310301332330212322232022333212213002012 CM:i:1 XX:Z:CAATGGCCATAGCaAGTC $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-mir/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 18 -n 1 --local -o 2 -F -v 10% -h 10% -r 30% -w 150% >map5.out 2>map5.log $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-14mmmmm/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 15 -n 1 --local -o 2 -F -v 20% -h 20% -r 30% -w 150% >map6.out 2>map6.log [ -g/--open-r ] The score to open a gap along the genome sequence. Should be negative. Note: In the current implementation, the gap_open score does not include any extension. That is, a gap of length 1 is scored as: + [ -q/--open-q ] The score to open a gap along the read sequence. Should be negative. Note: If -g is set and -q is not set, the gap open penalty for the query will be set to the same value as specified for the reference. $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-14mmmmm/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 18 -n 1 --local -o 2 -v 20% -h 20% -r 30% -w 150% -g -255 -q -255 >map7.out 2>map7.log $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp-14mmmmm/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 19 -n 1 --local -o 2 -v 20% -h 20% -r 30% -w 150% >map8.out 2>map8.log $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-18m/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 19 -n 1 --local -o 2 -v 20% -h 20% -r 30% -w 150% --shrimp-format >map9.out 2>map9.log $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-18m/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep3/*.csfasta -N 20 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% --shrimp-format -e -255 -f -255 >map10.out 2>map10.log time $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-18m/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep2/*.csfasta -N 20 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% -P -e -255 -f -255 >2hrep2.out 2>2hrep2.log & #fix time $SHRIMP_FOLDER/bin/gmapper-cs -L /data/results/reference/mmu/mm9/shrimp/shrimp-18m/mm9-stranded-mRNA-cs /data/images/proton/DKlab/mr/parclip/raw/2hrep1/*.csfasta -N 19 -n 1 --local -o 10 -v 20% -h 20% -r 30% -w 150% -P -e -255 -f -255 >2hrep1.out 2>2hrep1.log & #max In SHRiMP 2.0.4, a new command-line option was introduced that can be used to load several miRNA-specific settings: "-M mirna" or "--mode mirna" is equivalent with: loading the 5 seeds mentioned above; plus "-H -n 1 -w 100% -U -a 0 -g -255 -q -255 -Z" Assume we have a set of (not so many...) reads that we really need to find mappings for, and the appropriate genome projection. We can try: $ $SHRIMP_FOLDER/bin/gmapper-cs -L database reads.csfasta \ -V -w 150% -n 1 -r 50% -v 55% -l 40% -Z -h 60% -a -1 \ >map.out 2>map.log The significance of these options is the following: -V : Do not automatically trim genome index list that are unusually long. In our tests with hg18, this results in about 1-2% more hits, but the running time can increase dramatically, to 3x or more. -w 150% : Enlarge the length of the genome window against which each read is being mapped. -n 1 : Enable window generation mapping mode 1. This is very costly when working with a large genome. The setting means a single spaced kmer match between a read and the genome can be enough to create a candidate mapping window. With seeds of weight 12 and a uniform random genome, a random match occurs once in every 4^12 locations. For 1/4 of hg18, this means about 44 random matches, for every spaced kmer and for every seed. With 4 seeds of average span 20 and reads of length 50, thats about (50-20)x4x44 ~= 5,000 random single spaced kmer matches. Investigating a candidate window location around each of those is costly. The default window generation mode (-n 2) requires at least 2 spaced kmer matches, which reduces dramatically the number of windows created around random matches. (Even though a match of 2 spaced seeds of weight 12 in a genome window does not guarantee 2x12=24 matches, by design of the seeds it does guarantee about 17 matches. As a result, the number of windows expected to be placed around random matches drops by a factor of at least 4^5 = 1024.) For paired mapping mode, you can try the window generation mode "-n 3" first, and, at the extreme, "-n 2". -r 50% : Set the window generation threshold to 50% of the read length. Combined with -n 1, the effect of this is that every single spaced kmer match from a seed with span greater than 50% of read length will generate a candidate mapping window. -v 55% : Lower the threshold for the vector SW filter. -l 40% : Increase the allowable overlap of one mapping location with a previously inspected mapping location that passed the vector filter. This helps if for some reason the data makes it hard for gmapper to center genome mapping locations around spaced kmer matches. -Z : Disable caching of previous vector SW runs. -h 60% : Lower the threshold for the full SW filter. -a -1 : Disable the restriction of the full SW dynamic programming algorithm to areas around the matching spaced kmers. This can help detecting larger indels, but the runtime of the full SW filter will increase drastically. Still, in some cases this is ok to do, as this runtime is much smaller than, say, the time spent in the vector SW filter. See the log file from a relevant run for these timings. #@ https://www.biostars.org/p/110955/ So happy to hear from you. I have tried default argumets, and got the terrible result: reads matched(17.4191%). I used the command: gmapper-cs SRR_test.fastq --load-mmap /Ssc_mmap --local -Q -N 15 --all-contigs --sigle-best-mapping >SRR_test.sam 2>SRR_test.log Another better result:reads matched (54.3101%) I used the command: gmapper-cs SRR_test.fastq --load-mmap /Ssc_mmap --local -Q -N 15 -r 45% -v 20% -h 20% --all-contigs --sigle-best-mapping >SRR_test.sam 2>SRR_test.log o #@ grep -A4 ENSMUST00000147337 2hrep2-15mMm.out > f grep -A4 ENSMUST00000147337 0hrep1-15mMm.out > foo T pos10 21 >2_3266_2931_F3 ENSMUST00000147337 + 2687 2716 11 40 50 166 4x1x3x1x1C1x18 G: 2687 AGGCAAGGTACCCAGCACCCTGACACCACACCACACCTCTGTCTACACCA 2716 ||||XX||XX |X||||||||||||||||| T: ----------CCCAgcACccCGaCACCACACCACACCTCT---------- R: 11 T23302010300001301123033111011110111102222210000000 40 3UTR of Mapkap1: ACTIONS QUERY SCORE START END QSIZE IDENTITY CHRO STRAND START END SPAN --------------------------------------------------------------------------------------------------- browser details YourSeq 34 1 34 34 100.0% 2 + 34480044 34480077 34 #shrimp2sam: 2_2303_3354_F3 16 ENSMUST00000147337 171 255 41M9H * 0 0 TCTAGCTCATATTCGACAGTCACACGAGACCAGTGATGACA * AS:i:176 NM:i:1 CS:Z:T23302010103131123123022223111121211222233333232322 CM:i:10 XX:Z:TgTcAtCaCTgGtCTCGTGTGACTGTCgAaTATgaGCTAGA >2_2303_3354_F3 ENSMUST00000147337 - 171 211 10 50 50 176 1x2x2x2x3x2x2T12x2x4x1x7 G: 211 GCCTCTCTCTGTCATCACTGGTCACGTGTGACTGTCGAATATGAGCTAGA 171 |X|X|X|X||X|X| ||||||||||||X|X|||XX|||||| T: ---------TgTcAtCaCTgGtCTCGTGTGACTGTCgAaTATgaGCTAGA R: 10 T23302010103131123123022223111121211222233333232322 50 cat sample.sam | samtools view -Sb - | samtools sort -o - - > sample.bam cat 2hrep2-15mMm-mapkap1.sam | samtools view -Sb - | samtools sort -o - - > 2hrep2-15mMm-mapkap1.bam samtools index 2hrep2-15mMm-mapkap1.bam cat $i | samtools view -Sb - | samtools sort -o - - > $i.bam samtools index $i.bam samtools calmd -b 0hrep1-15mMm-mapkap1.sam.bam /data/results/reference/mmu/mm9/shrimp/shrimp-15mMm/mapkap1.fa > foo.bam 48_934_1706_F3 16 ENSMUST00000124443 5 255 46M4H * 0 0 CCGGGTCGTGGCGCGGCTCAGTGTAACAGGGCTGCTGTTGCCGGCC * AS:i:132 NM:i:2 CS:Z:T33302013303101123123022203111121220133300012200030 CM:i:13 XX:Z:GGccGGCAACAGCAGCCcTgttaCACTGAGccGCGcCaCgAcCCGG MD:Z:19T1G24 # 2 15G12A6 410_3060_612_F3 16 ENSMUST00000124443 2 255 5H35M10H * 0 0 GTTCCGGGTCGTGGCACGGCTCTGGGTATCAGGGC * AS:i:122 NM:i:2 CS:Z:T10130201230310212331302222333112311322203020131003 CM:i:8 XX:Z:GCcCTGATACcCaGAGCcGTGccACGAccCGGAAC MD:Z:15G12A6 ENSMUST00000124443: GTTCCGGGTCGTGGCGCGGCTCTGGGTAACAGGGC x x GTTCCGGGTCGTGGCACGGCTCTGGGTATCAGGGC samtools calmd -b 0hrep1-15mMm-mapkap1.sam.bam /data/results/reference/mmu/mm9/shrimp/shrimp-15mMm/mapkap1.fa > foo.bam samtools view -h foo.bam | awk -f sam2tc1.awk | samtools view -Sb - | samtools sort -o - - > 0hrep1-15mMm-mapkap1.sam.tc.bam