#!/bin/bash

echo ""
echo "Gene expression estimation from bam files using FeatureCounts has been started."
echo "--------------------------------------------------------------------------------"

for i in *.bam
do
    #get basename without .bam extension
    x="${i%%.*}"

    #print the file that is currently processed
    echo "generating counts for $i..."

    #set file name for featurecounts output
    filename="$x""_counts.txt"

    #run featurecounts with selected parameters 
    featureCounts -a Mus_musculus.GRCm38.92.gtf -o $filename $i -t 'three_prime_utr' -g 'gene_name' --largestOverlap --readExtension3 500 --readExtension5 500 -s 1 &>> featurecounts.log #for quantseq

    #delete lines starting with '#',     keep sample names without GK prefix and .bam suffix,     keep only columns : genesid, counts 
    sed -i '/^#/d' $filename
    sed -i -e 's/.bam//g' $filename
    #sed -i -e 's/GK3R[0-9]\+-//g' $filename
    sed -i '1d' $filename
    cut -d$'\t' -f 1,7 $filename | sponge $filename
    
    echo "finished counting for $x."
    echo "Count table and summary statistics has been stored in seperate files."
    echo "--------------------------------------------------------------------------------"

done

echo "Operation has been completed!"
echo ""

