#!/bin/bash
#########################
# script BMix - contains the entire preprocessing pipeline
#
# input in a config file: 
#        bam_file - full path of the sorted alignment file in .bam format
#        ref_file - full path of the .fasta file corresponding to the reference genome
#        sample_name - name of the sample
#        work_folder - full path of the folder where the result files should be stored
#        cov_min - minimum coverage
#        refine_cov - minimum coverage of the binding site boundaries
#        separate_strands - indicates if the strands are considered separately or not for learning parameters
#
# example of use: ./BMix CONFIG.txt
#
# Written by Monica Golumbeanu
# monica.golumbeanu@bsse.ethz.ch
#########################

start=`date +%s`

# load matlab module
# module load matlab/R2013a

# Load all the necessary variables
source $1

# Create the work folder if not existent
if [ ! -d $WORK_FOLDER ]; then
    mkdir $WORK_FOLDER
    echo $WORK_FOLDER " created"
fi

## Construct the mpileup file
echo "Constructing mpileup file ..."
mpileup_file=$WORK_FOLDER$SAMPLE_NAME".mpileup"
samtools mpileup -ABQ0 -d10000000 -f $REF_FILE $BAM_FILE > $mpileup_file 2> "/dev/null"


## Extract the mismatch information and build the mismatch profiles
echo "Extracting mismatch information ..."
mismatch_folder=$WORK_FOLDER"MismSummaries/"
# Create the mismatch summary folder if not existent
if [ ! -d $mismatch_folder ]; then
    mkdir $mismatch_folder
    echo $mismatch_folder " created"
fi
awk -v cov_min=$COV_MIN -v path=$mismatch_folder -f extract_mism $mpileup_file


## Apply the model and classify the TCs
echo "Classifying mismatches"
tc_folder=$WORK_FOLDER"TC_Results/"
if [ ! -d $tc_folder ]; then
    mkdir $tc_folder
    echo $tc_folder " created"
fi
matlab -nodesktop -nosplash -r "BMix_learn('$mismatch_folder', '$tc_folder', $COV_MIN, $CONFIDENCE_PER, $SEPARATE_STRANDS); exit();"


## Construct the binding sites
bash construct_sites $tc_folder $WORK_FOLDER $REFINE_COV $CONFIDENCE_PER $BAM_FILE

## Write execution time to the log file
end=`date +%s`
runtime=$((end-start))
echo "Total execution time (s): " $runtime > $WORK_FOLDER"Log.txt"
