for i in ../*fastq do awk -f ~/tools/formats/fastq2fasta.awk $i > $i.fa done grep ">" -n R_2019_07_23_14_35_17_user_IONAS-425-GKlab_SPlab_190722_GK3R249-264_SPD1-2.SPD1_300bpfrag.IonXpress_049.fa |wc 9016 9016 219845 ]0;/data/images/proton2/run425/patalanoreczko@max:/data/images/proton2/run425/patalano$ grep ">" -n R_2019_07_23_14_35_17_user_IONAS-425-GKlab_SPlab_190722_GK3R249-264_SPD1-2.SPD2_300bpfrag.IonXpress_050.fa |wc 660 660 15285 /data/results/tools/blast/ncbi-blast-2.2.25+/bin/makeblastdb -in RBCL.fa -parse_seqids -dbtype nucl cat R_2019_07_23_14_35_17_user_IONAS-425-GKlab_SPlab_190722_GK3R249-264_SPD1-2.SPD2_300bpfrag.IonXpress_050.fa| /data/results/tools/blast/ncbi-blast-2.2.25+/bin/blastn -num_threads 2 -evalue 6 -max_target_seqs 1 -outfmt 6 -db /data/images/proton2/run425/patalano/RBCL.fa > SPD2.blast.txt cat R_2019_07_23_14_35_17_user_IONAS-425-GKlab_SPlab_190722_GK3R249-264_SPD1-2.SPD1_300bpfrag.IonXpress_049.fa| /data/results/tools/blast/ncbi-blast-2.2.25+/bin/blastn -num_threads 2 -evalue 6 -max_target_seqs 1 -outfmt 6 -db /data/images/proton2/run425/patalano/RBCL.fa > SPD1.blast.txt http://www.metagenomics.wiki/tools/blast/blastn-output-format-6 BLASTn tabular output format 6 Column headers: qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore 1. qseqid query (e.g., gene) sequence id 2. sseqid subject (e.g., reference genome) sequence id 3. pident percentage of identical matches 4. length alignment length 5. mismatch number of mismatches 6. gapopen number of gap openings 7. qstart start of alignment in query 8. qend end of alignment in query 9. sstart start of alignment in subject 10. send end of alignment in subject 11. evalue expect value 12. bitscore bit score awk -f count-species.awk SPD1.blast.txt Number of aligened reads: 8699 of 9016 NC_016734.1:5364555084 6460 74.2614 pct Brassica napus chloroplast, complete genome NC_007944.1:5871760159 1340 15.4041 pct Gossypium hirsutum chloroplast, complete genome NC_007977.1:5455356010 701 8.0584 pct Helianthus annuus chloroplast, complete genome NC_034671.1:5867860105 195 2.24164 pct Citrus reticulata plastid, complete genome NC_001666.2:5687458304 3 0.0344867 pct Zea mays chloroplast, complete genome awk -f count-species.awk SPD2.blast.txt Number of aligened reads: 621 of 660 NC_007944.1:5871760159 323 52.0129 pct Gossypium hirsutum chloroplast, complete genome cotton NC_016734.1:5364555084 244 39.2915 pct Brassica napus chloroplast, complete genome rapeseed NC_007977.1:5455356010 32 5.15298 pct Helianthus annuus chloroplast, complete genome subflower NC_034671.1:5867860105 19 3.05958 pct Citrus reticulata plastid, complete genome mandarine NC_001666.2:5687458304 3 0.483092 pct Zea mays chloroplast, complete genome 1. qseqid query (e.g., gene) sequence id 2. sseqid subject (e.g., reference genome) sequence id 3. pident percentage of identical matches 4. length alignment length 5. mismatch number of mismatches 6. gapopen number of gap openings 7. qstart start of alignment in query 8. qend end of alignment in query 9. sstart start of alignment in subject 10. send end of alignment in subject 11. evalue expect value 12. bitscore bit score max 8 mm: ]0;/data/images/proton2/run425/patalanoreczko@max:/data/images/proton2/run425/patalano$ awk -f count-species2.awk SPD1.blast.txt Number of aligened reads: 6693 NC_016734.1:5364555084 5491 82.0409 pct Brassica napus chloroplast, complete genome NC_007944.1:5871760159 605 9.03929 pct Gossypium hirsutum chloroplast, complete genome NC_007977.1:5455356010 446 6.66368 pct Helianthus annuus chloroplast, complete genome NC_034671.1:5867860105 151 2.25609 pct Citrus reticulata plastid, complete genome ]0;/data/images/proton2/run425/patalanoreczko@max:/data/images/proton2/run425/patalano$ awk -f count-species2.awk SPD2.blast.txt Number of aligened reads: 97 NC_016734.1:5364555084 72 74.2268 pct Brassica napus chloroplast, complete genome NC_034671.1:5867860105 13 13.4021 pct Citrus reticulata plastid, complete genome NC_007944.1:5871760159 4 4.12371 pct Gossypium hirsutum chloroplast, complete genome NC_007977.1:5455356010 7 7.21649 pct Helianthus annuus chloroplast, complete genome NC_001666.2:5687458304 1 1.03093 pct Zea mays chloroplast, complete genome