import java.io.File;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;


public class MyTwoBitParser {
	
	private final char[] _nucleotides = { 'T','C','A','G' };
	private final Transliteration _oppositeStrandTransliterate = Transliteration.compile("ACGT", "TGCA");

	private File _inputFileLocation;
	private RandomAccessFile _inputFile;
	private HashMap<String, Long> _chromosomeLocations = new HashMap<String, Long>(10);
	
	
	
	private String _currentChromosome;
	private long _currentChromosomeStartPosition;
	private long _currentPositionWithinSequence;
	private long _currentPosition;
	private long _currentChromosomelength;
	private ArrayList<GenomicBlock> _nBlockList = new ArrayList<GenomicBlock>();
	private ArrayList<GenomicBlock> _maskedBlockList = new ArrayList<GenomicBlock>();
	private GenomicBlock _currentNBlock = new GenomicBlock();
	private GenomicBlock _currentMaksedBlock = new GenomicBlock();
	private Iterator<GenomicBlock> _nBlockIterator;
	private Iterator<GenomicBlock> _maksedBlockIterator;
	
	
	
	
	
	public MyTwoBitParser( String fileLocation ) throws Exception {
		_inputFileLocation = new File(fileLocation);
		_inputFile = new RandomAccessFile(_inputFileLocation,"r");
// Identify the number of chromosomes in the file
		readFourBytes();
		readFourBytes();
		int _chromosomeCount = (int)readFourBytes();
		readFourBytes();		
// Get the location of the chromosomes within the file
		for( int i = 0; i < _chromosomeCount; i++ ) {
			int chromosomeNameLength = _inputFile.read();
			StringBuffer chromosomeName = new StringBuffer(chromosomeNameLength);
			for( int j = 0; j < chromosomeNameLength; j++ ) {
				chromosomeName.append((char)_inputFile.read());
			}
			_chromosomeLocations.put(chromosomeName.toString(), readFourBytes());
		}	
	}
	
	
	
	private void loadChromosome( String chromosome ) throws Exception {
		if( !_chromosomeLocations.containsKey(chromosome) ) {
			throw new Exception(chromosome + " is not found in the .2bit file");
		}
		_inputFile.seek(_chromosomeLocations.get(chromosome));
		_currentChromosomelength = readFourBytes();
		
// Keep track of where the Ns are located	
		_nBlockList = new ArrayList<GenomicBlock>();
		
		_currentNBlock.setStartPosition(-1);
		_currentNBlock.setEndPosition(-1);
		_nBlockList.add(_currentNBlock);
		int blockCount = (int)readFourBytes();
		for( int i = 0; i < blockCount; i++ ) {
			GenomicBlock tempBlock = new GenomicBlock();
			tempBlock.setStartPosition(readFourBytes());
			_nBlockList.add(tempBlock);
		}
		for( int i = 0; i < blockCount; i++ ) {
			GenomicBlock tempBlock = _nBlockList.get(i + 1);
			tempBlock.setEndPosition(readFourBytes());
			_nBlockList.set(i + 1, tempBlock);
		}		
		
// Identify the masked sequence blocks	
		_maskedBlockList = new ArrayList<GenomicBlock>();
		
		_currentMaksedBlock.setStartPosition(-1);
		_currentMaksedBlock.setEndPosition(-1);
		_maskedBlockList.add(_currentMaksedBlock);
		blockCount = (int)readFourBytes();
		for( int i = 0; i < blockCount; i++ ) {
			GenomicBlock tempBlock = new GenomicBlock();
			tempBlock.setStartPosition(readFourBytes());
			_maskedBlockList.add(tempBlock);
		}
		for( int i = 0; i < blockCount; i++ ) {
			GenomicBlock tempBlock = _maskedBlockList.get(i + 1);
			tempBlock.setEndPosition(readFourBytes());
			_maskedBlockList.set(i + 1, tempBlock);
		}	
		readFourBytes();
		_currentChromosomeStartPosition = _inputFile.getFilePointer();
		resetChromosome();
	}
	

	private void resetChromosome() {
		_currentPositionWithinSequence = 0;
		_nBlockIterator = _nBlockList.iterator();
		_maksedBlockIterator = _maskedBlockList.iterator();
		_currentNBlock = _nBlockList.get(0);
		_currentMaksedBlock = _maskedBlockList.get(0);
		_currentPosition = _currentChromosomeStartPosition;
	}
	
	public String getSequence(String chromosome, char strand, long startPosition, long endPosition) throws Exception {
// because it indexes @ 0
		startPosition--;
		endPosition--;
		if( !chromosome.equals(_currentChromosome) ) {
			loadChromosome(chromosome);
			_currentChromosome = chromosome;
		}
		if( endPosition < startPosition || endPosition > _currentChromosomelength ) {
			throw new Exception("Coordinates entered are not valid!");
		}
		StringBuffer sequence = new StringBuffer((int) (endPosition - startPosition + 1));
		if( startPosition < _currentPositionWithinSequence ) { resetChromosome(); }
		_inputFile.seek((long) (Math.floor((startPosition - _currentPositionWithinSequence) / 4)  + _currentPosition ));
		_currentPositionWithinSequence = startPosition - (startPosition % 4);
		do {
			int currentByte = _inputFile.read();
			for( int i = 0; i < 8; i+=2 ) {
				if( _currentPositionWithinSequence >= startPosition && _currentPositionWithinSequence <= endPosition ) {
					while( _currentNBlock.getEndPosition() < _currentPositionWithinSequence && _nBlockIterator.hasNext() ) {
						_currentNBlock = _nBlockIterator.next();
					}
					if( _currentPositionWithinSequence >= _currentNBlock.getStartPosition() && _currentPositionWithinSequence <= _currentNBlock.getEndPosition() ) {
						sequence.append('N');
					}
					else {
						sequence.append(_nucleotides[currentByte >> (6-i) & 3]);
					}
				}
				_currentPositionWithinSequence++;
			}
			
		} while( _currentPositionWithinSequence <= (int) endPosition );
		_currentPosition = _inputFile.getFilePointer();
		
		
		if( strand == '-' ) {
			sequence.reverse();
			return _oppositeStrandTransliterate.translate(sequence.toString());
		} else { return sequence.toString(); }
	}
	public void close() throws Exception {
		_inputFile.close();
	}
	
	private long readFourBytes() throws Exception {
		return _inputFile.read()
			+ _inputFile.read()*0x100
			+ _inputFile.read()*0x10000
			+ _inputFile.read()*0x1000000;
	}	
	
}
