import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PARCLIPbowtieParser {
	String _filename;
	private static final Transliteration _oppositeStrandTransliterate = Transliteration.compile("ACGT", "TGCA");
	
	public PARCLIPbowtieParser(String filename) {
		_filename = filename;
	}
	
    public Genome parseFile( boolean isCollapsed, String conversion, int minimumReadLength, int maximumNonConversionMismatchesAllowed,int chunk) throws IOException {
		Genome currentGenome = new Genome();
		
		Pattern crosslinkPattern = Pattern.compile("^(\\d+):"+conversion+"$");
		BufferedReader inputFile = new BufferedReader( new FileReader( _filename ) );
		String currentReadID = "";
		HashMap<Byte, Integer> conversionLocations = new HashMap<Byte, Integer>(0);		
		PARCLIPread currentRead = new PARCLIPread("", '\u0000', (long) 0, (long) 0, 0, Byte.valueOf("0"), conversionLocations);
		
		String line;		
		int nread = 0;
		while( ( line = inputFile.readLine() ) != null ) {	
			String[] lineArray = line.split("\t");
			String readID = lineArray[0];
			int readCount = 1;
			
			
// If the FASTA file was collapsed, we need to get the correct count
			if( isCollapsed) {
				String[] collapsedArray = readID.split("-");
				readID = collapsedArray[0];
				readCount = Integer.parseInt(collapsedArray[1]);
			}

			char strand = lineArray[1].charAt(0);
			String chromosome = lineArray[2];
			long startPosition = Long.parseLong(lineArray[3]) + 1;
			long endPosition = startPosition + (long)lineArray[4].length() - 1;
			byte mismatchCount = 0;
			int c=10*(chromosome.charAt(chromosome.length()-2)-'0')+ (chromosome.charAt(chromosome.length()-1)-'0');//MR27042018
			//			if (lineArray[4].length()>17)  { //MR03012018
			if (c==chunk)  { //MR04012018			    
			conversionLocations = new HashMap<Byte, Integer>(0);

			nread++;

			if ( (nread % 500000)==0) {
			    //		    long mem=totalMemory() - freeMemory() ;
			    //System.gc();
			    System.out.println("nr "+nread+" chr "+chromosome+" l"+chromosome.length()+"  e "+c);//+" mem "+ mem);

			}
			if ( nread >45000000 ) {System.out.println("OOM prevention. Input chopped here"); break;}
			
			if( lineArray.length == 8 ) {
				String mismatchInfo[] = lineArray[7].split(",");
				for( String currentMismatch : mismatchInfo ) {
					String currentMismatchCrosslink = currentMismatch;
					if( strand == '-' ) {
						currentMismatchCrosslink = _oppositeStrandTransliterate.translate(currentMismatchCrosslink); 
					}		
					Matcher match = crosslinkPattern.matcher( currentMismatchCrosslink );
					if( match.find() ) {
						if( strand == '+' ) {
							conversionLocations.put( Byte.valueOf(match.group(1)), readCount);
						}
						else {
							conversionLocations.put( Byte.valueOf(Integer.toString(lineArray[4].length() - 1 - Integer.parseInt(match.group(1)))), readCount);
						}
					}
					else {
						mismatchCount++;
					}
				}
			}			
			
			
			
			
			if( readID.equals(currentReadID) ) {
				if( mismatchCount == currentRead.getNumberOfMismatches() ) {
					currentRead.setUnique(false);
				}
				if( mismatchCount < currentRead.getNumberOfMismatches() ) {
					currentRead = null;
					currentRead = new PARCLIPread(
							chromosome,
							strand,
							startPosition,
							endPosition,
							readCount,
							mismatchCount,
							conversionLocations
					);	
				}
			}
			else {
				if( currentRead.isUnique() 
						&& currentRead.getReadCount() > 0 
						&& currentRead.getNumberOfMismatches() <= maximumNonConversionMismatchesAllowed 
						&& currentRead.getLength() >= minimumReadLength
				) {
						AlignedPARCLIPread currentAlignedRead = new AlignedPARCLIPread(
						currentRead.getStrand(),
						currentRead.getStartPosition(),
						currentRead.getEndPosition(),
						currentRead.getReadCount(),
						currentRead.getNumberOfMismatches(),
						currentRead.getConversionMap()
					);
					currentGenome.addRead(currentRead.getChromosome(), currentAlignedRead);
				}
				currentReadID = readID;
				currentRead = null;
				currentRead = new PARCLIPread(
						chromosome,
						strand,
						startPosition,
						endPosition,
						readCount,
						mismatchCount,
						conversionLocations
				);
			}
			}
		}
		if( currentRead.isUnique() 
				&& currentRead.getReadCount() > 0 
				&& currentRead.getNumberOfMismatches() <= maximumNonConversionMismatchesAllowed 
				&& currentRead.getLength() >= minimumReadLength
		) {
				AlignedPARCLIPread currentAlignedRead = new AlignedPARCLIPread(
				currentRead.getStrand(),
				currentRead.getStartPosition(),
				currentRead.getEndPosition(),
				currentRead.getReadCount(),
				currentRead.getNumberOfMismatches(),
				currentRead.getConversionMap()
			);
			currentGenome.addRead(currentRead.getChromosome(), currentAlignedRead);
		}
		inputFile.close();
		
		return currentGenome;
	}
}
