1. adapter removal cd /data/images/proton/DKlab/mr/Polysomes/reads/fastq/ ../cutadapt-calls.sh &> ../cutadapt-calls.log & #@ OH mappings /data/images/proton/DKlab/mr/Polysomes/www/reads.txt WT_0h_replicate_I.bam 10662629 WT_0h_replicate_II_.bam 6199351 WT_2h_replicate_I.bam 7747392 WT_2h_replicate_II.bam 7116324 WT_6h_replicate_I.bam 4046875 WT_6h_replicate_II.bam 7144523 WT_IL4_replicate_I.bam 5723737 WT_IL4_replicate_II.bam 4102817 WT_IFN-gamma_replicate_I.bam 5376002 WT_IFN-gamma_replicate_II.bam 6428271 KO_0h_replicate_I.bam 5998807 KO_0h_replicate_II.bam 6896479 KO_2h_replicate_I.bam 8342830 KO_2h_replicate_II.bam 6396953 KO_6h_replicate_I.bam 7100017 KO_6h_replicate_II.bam 7406200 KO_IFN_replicate_I.bam 11375355 KO_IFN_replicate_I.bam 11375355 KO_IL4_replicate_I.bam 5554337 KO_IL4_replicate_II.bam 4865558 KO_IFN-gamma_replicate_II.bam 7517556 KO_IFN-gamma_replicate_II.bam 7517556 #@OH mapping example reczko@max:/data/images/proton/DKlab/mr/Polysomes/www$ samtools view WT_0h_replicate_I.bam | head 44_2736_4123 0 chr1 3015785 10 10H35M30H * 0 0 AGCAGATNNNNAGAATGTGNNGAAAGAGGAACAAT -5FGCB?(('*-4>@9841&'88>>>3;--2@FDA RG:Z:RNAseq1 NH:i:5 CM:i:5 NM:i:0 CQ:Z:@@@@@@;/@@>0/;2>@->2>*>/*28:/82*.?0/2*8--6.**.28/*02///2.= CS:Z:T233020103032312231232222031111222002223201103300322222311210233223310113320 143_3595_940 0 chr1 3015786 1 20H25M30H * 0 0 GCAGATGTGGAGAATGTGNNNNNAT 9:82;6@@@@@8/?@@6-?2?/-@/@@828?@2@82///2/*.-/!**2=@*3.8>0=*/<***84*-2 CS:Z:T023011023233020103031312231110222031111222003122023.11300033030213111331110 reczko@max:/data/images/proton/DKlab/mr/Polysomes/reads/bowtie$ samtools view 0hrep1b2.bam | grep 44_2736_4123 44_2736_4123_F3 4 * 0 0 * * 0 0 TTAGACATATGTCGGTCGTGGGGATCCCCGGGAAGGGTGACCATTAATGGGGGTCCGCAGTTGGTTCACCTTGA @@@@@;/@@>0/;2>@->2>*>/*28:/82*.?0/2*8--6.**.28/*02///2.= XM:i:0 #tophat wo adapter rem reczko@fix:/data/images/proton/DKlab/mr/Polysomes/reads/tophat$ /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat -p 10 -o wt0hrep1 -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.csfasta /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.QV.qual #tophat w adapter rem /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat -p 10 -o wt0hrep1b -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq reczko@fix:/data/results/reference/mmu/mm9/bwa$ samtools flagstat /data/images/proton/DKlab/mr/Polysomes/reads/tophat/wt0hrep1b/accepted_hits.bam 4335821 + 0 in total (QC-passed reads + QC-failed reads) 0 + 0 duplicates 4335821 + 0 mapped (100.00%:-nan%) #tophat w adapter rem, w annotation /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat -p 10 -o wt0hrep1c -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq #tophat w adapter rem, w annotation, 2mm /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat -p 10 -n 2 -o wt0hrep1d -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq 6712044 + 0 in total (QC-passed reads + QC-failed reads) 0 + 0 duplicates 6712044 + 0 mapped (100.00%:-nan%) reczko@max:/data/images/proton/DKlab/mr/Polysomes/reads/tophat$ /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --transcriptome-only -p 10 -n 2 -o wt0hrep1h -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq 6712414 + 0 mapped (100.00%:-nan%) @BEST #--no-novel-juncs /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --no-novel-juncs -p 10 -n 2 -o wt0hrep1e -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq 6699716 + 0 mapped (100.00%:-nan%) # 3mm /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --no-novel-juncs -p 10 -n 3 -o wt0hrep1f -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq #tophat w adapter rem, w annotation, 2mm, fr-1st /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --library-type fr-firststrand -p 10 -n 2 -o wt0hrep1d-fr-1st -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq 6706021 mapped (100.00%) #tophat w adapter rem, w annotation, 2mm, fr-2nd /data/results/tools/align/tophat-1.4.0.Linux_x86_64/tophat --library-type fr-secondstrand -p 10 -n 2 -o wt0hrep1d-fr-2nd -G /data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.gtf -C /data/results/reference/mmu/mm9/bowtie1/mm9c ../fastq/0hrep1b.fastq 6710672 mapped (100.00%) # bowtie only /data/results/tools/align/bowtie-0.12.9/bowtie /data/results/reference/mmu/mm9/bowtie1/mm9c -p 10 -C -v 2 -m 10 -S --best --strata -f ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.csfasta -Q /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.QV.qual |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai - | samtools sort -o - - > 0hrep1.bam # reads processed: 45640751 # reads with at least one reported alignment: 109407 (0.24%) # reads that failed to align: 45475235 (99.64%) # reads with alignments suppressed due to -m: 56109 (0.12%) Reported 109407 alignments to 1 output stream(s) [bam_sort_core] merging from 19 files... reczko@max:/data/images/proton/DKlab/mr/Polysomes/reads/bowtie$ samtools flagstat 0hrep1.bam 45640751 + 0 in total (QC-passed reads + QC-failed reads) 0 + 0 duplicates 109407 + 0 mapped (0.24%:-nan%) #with cutadapt /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -q 20 -z -m 20 -c -a CGCCTTGGCCGTACAGCAG ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.csfasta /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.QV.qual >& 0hrep1.log > 0hrep1.fastq Total reads processed: 45,640,751 Reads with adapters: 17,161,839 (37.6%) Reads that were too short: 18,731,020 (41.0%) Reads written (passing filters): 26,909,731 (59.0%) Total basepairs processed: 3,423,056,325 bp Quality-trimmed: 903,636,225 bp (26.4%) Total written (filtered): 1,555,805,373 bp (45.5%) /data/results/tools/align/bowtie-0.12.9/bowtie /data/results/reference/mmu/mm9/bowtie1/mm9c -p 10 -C -v 2 -m 10 -S --best --strata ../fastq/0hrep1.fastq |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai - | samtools sort -o - - > 0hrep1b.bam # reads processed: 26909731 # reads with at least one reported alignment: 1044247 (3.88%) # reads that failed to align: 25675048 (95.41%) # reads with alignments suppressed due to -m: 190436 (0.71%) Reported 1044247 alignments to 1 output stream(s) #cutadapt min.len 15: /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -q 20 -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.csfasta /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.QV.qual >& 0hrep1b.log > 0hrep1b.fastq Total reads processed: 45,640,751 Reads with adapters: 17,161,839 (37.6%) Reads that were too short: 17,573,802 (38.5%) Reads written (passing filters): 28,066,949 (61.5%) Total basepairs processed: 3,423,056,325 bp Quality-trimmed: 903,636,225 bp (26.4%) Total written (filtered): 1,575,877,359 bp (46.0%) /data/results/tools/align/bowtie-0.12.9/bowtie /data/results/reference/mmu/mm9/bowtie1/mm9c -p 10 -C -v 2 -m 10 -S --best --strata ../fastq/0hrep1b.fastq |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai - | samtools sort -o - - > 0hrep1b2.bam # reads processed: 28066949 # reads with at least one reported alignment: 1694297 (6.04%) # reads that failed to align: 25709002 (91.60%) # reads with alignments suppressed due to -m: 663650 (2.36%) Reported 1694297 alignments to 1 output stream(s) #cutadapt min.qual 15 /data/results/tools/adapter/cutadapt-1.9.1/bin/cutadapt -q 15 -z -m 15 -c -a CGCCTTGGCCGTACAGCAG ../Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.csfasta /data/images/proton/DKlab/polysomes/reads/Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_F3.QV.qual >& 0hrep1c.log > 0hrep1c.fastq /data/results/tools/align/bowtie-0.12.9/bowtie /data/results/reference/mmu/mm9/bowtie1/mm9c -p 10 -C -v 2 -m 10 -S --best --strata ../fastq/0hrep1c.fastq |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai - | samtools sort -o - - > 0hrep1c.bam #@ seqtrimmap awk -f /data/results/tools/formats/fastq2fasta.awk /data/images/proton/DKlab/mr/Polysomes/reads/fastq/0hrep1b.fastq > /data/images/proton/DKlab/mr/Polysomes/reads/fastq/0hrep1b.fa ./SeqTrimMap4Paralyzer -c -m 10 -l 20 -v 3 -p 10 -i -o wt0hrep1b /data/images/proton/DKlab/mr/Polysomes/reads/fastq/0hrep1b.fa /data/results/reference/mmu/mm9/bowtie1/mm9c #@ bwa reczko@fix:/data/images/proton/DKlab/mr/Polysomes/reads$ /data/results/tools/solid/solid2fastq.pl Backup_Polysomes_TTP/ugc_623_BC_FRAG_W_Paulina_150506_L02_ugc_623_9_ WT0hrep1 /data/results/tools/align/bwa/bwa-0.6.0/bwa aln -c -n 0.06 -o 2 -t 8 /data/results/reference/mmu/mm9/bwa/mm9c /data/images/proton/DKlab/mr/Polysomes/reads/WT0hrep1.single.fastq.gz > /data/images/proton/DKlab/mr/Polysomes/reads/WT0hrep1.single.fastq.bai https://www.biostars.org/p/13780/ bwa aln -c -n 0.06 -o 2 -t 8 -q 10 ~/genomes/hydra/ACZUJGI/color/hydra ~/hydra/solid/hsamp_F3.fastq.gz > /scratch/hydra/hsamp_F3.sai bwa aln -c -n 0.06 -o 2 -t 8 -q 10 ~/genomes/hydra/ACZUJGI/color/hydra ~/hydra/solid/hsamp_R3.fastq.gz > /scratch/hydra/hsamp_R3.sai bwa sampe -P ~/genomes/hydra/ACZUJGI/color/hydra /scratch/hydra/hsamp_F3.sai /scratch/hydra/hsamp_R3.sai ~/hydra/solid/hsamp_F3.fastq.gz ~/hydra/solid/hsamp_R3.fastq.gz | samtools view -bS -|samtools sort - /scratch/hydra/hsamp_solid I think the proper answer is don't use -q with colorspace as it's designed for base-space. If you disregard that, you can pipe the output to this command (then to SAM) $BWA_COMMAND \ | awk 'BEGIN{FS=OFS="\t"} \ ($1 ~ /^@/){ print $0} \ ($1 !~ /^@/){ $11 = substr($11, 0, length($10)); print $0}' \ | $SAMTOOLS_COMMAND > $OUT That makes sure the qualities are the same length as the sequence. You could also trim your reads with this: https://github.com/brentp/bio-playground/blob/master/solidstuff/solid-trimmer.py #@ check #examples: Unstranded Interpretation: 1.72% of total reads were mapped to genome regions that we cannot determine the “standness of transcripts” (such as regions that having both strands transcribed). For the remaining 98.28% (1 - 0.0172 = 0.9828) of reads, half can be explained by “1++,1–,2+-,2-+”, while the other half can be explained by “1+-,1-+,2++,2–”. We conclude that this is NOT a strand specific dataset because “strandness of reads” was independent of “standness of transcripts” Stranded Interpretation: 0.72% of total reads were mapped to genome regions that we cannot determine the “standness of transcripts” (such as regions that having both strands transcribed). For the remaining 99.28% (1 - 0.0072 = 0.9928) of reads, the vast majority was explained by “1++,1–,2+-,2-+”, suggesting a strand-specific dataset. reczko@max:/data/images/proton/DKlab/mr/Polysomes/reads/bowtie$ for i in *bam do echo $i infer_experiment.py --r /data/results/reference/mmu/mm9/mm9_Ensembl.bed -i $i -s 1000000 done 0hrep1b2.bam Total 891205 usable reads were sampled This is SingleEnd Data Fraction of reads failed to determine: 0.0425 Fraction of reads explained by "++,--": 0.6367 Fraction of reads explained by "+-,-+": 0.3208 0hrep1b2-fr.bam Fraction of reads failed to determine: 0.1070 Fraction of reads explained by "++,--": 0.4934 Fraction of reads explained by "+-,-+": 0.3996 0hrep1b2-fr-norc.bam Fraction of reads failed to determine: 0.0223 Fraction of reads explained by "++,--": 0.6424 Fraction of reads explained by "+-,-+": 0.3353 0hrep1b2-rf.bam Fraction of reads failed to determine: 0.1070 Fraction of reads explained by "++,--": 0.4934 Fraction of reads explained by "+-,-+": 0.3996 0hrep1b2-rf-norc.bam Fraction of reads failed to determine: 0.0223 Fraction of reads explained by "++,--": 0.6424 Fraction of reads explained by "+-,-+": 0.3353 0hrep1b.bam Fraction of reads failed to determine: 0.0476 Fraction of reads explained by "++,--": 0.7086 Fraction of reads explained by "+-,-+": 0.2437 wt0hrep1b/accepted_hits.bam Fraction of reads failed to determine: 0.1053 Fraction of reads explained by "++,--": 0.7331 Fraction of reads explained by "+-,-+": 0.1616 wt0hrep1d/accepted_hits.bam Fraction of reads failed to determine: 0.0677 Fraction of reads explained by "++,--": 0.7396 Fraction of reads explained by "+-,-+": 0.1928 wt0hrep1d-fr-1st/accepted_hits.bam Fraction of reads failed to determine: 0.0679 Fraction of reads explained by "++,--": 0.7380 Fraction of reads explained by "+-,-+": 0.1941 wt0hrep1d-fr-2nd/accepted_hits.bam Fraction of reads failed to determine: 0.0677 Fraction of reads explained by "++,--": 0.7396 Fraction of reads explained by "+-,-+": 0.1928 WT_0hrep1 common mappings: disagree chr1:103,551,597-103,551,655 agree chr1:103,765,788-103,765,955