import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ListIterator;


public class ReadGroup implements Comparable<ReadGroup> {
	
	private String _chromosome;
	private char _strand;
	private long _startPosition;
	private long _endPosition;
	private String _sequence;
	private ArrayList<AlignedPARCLIPread> _readList = new ArrayList<AlignedPARCLIPread>(10);
	private HashMap<Long, Integer> _conversionMap = new HashMap<Long, Integer>(30, (float) 0.99);
	private HashMap<Long, Integer> _nonConversionMap = new HashMap<Long, Integer>(30, (float) 0.99);
	private HashMap<Long, Integer> _countMap = new HashMap<Long, Integer>(30, (float) 0.99);
	private String _filterType;
	private double[] _signal;
	private double[] _background;
	private double[] _kdeClassifier;
	private ArrayList<KDECluster> _clusterList = new ArrayList<KDECluster>(1);
	
	
	public ReadGroup(String chromosome, char strand, long startPosition, long endPosition ) {
		_chromosome = chromosome;
		_strand = strand;
		_startPosition = startPosition;
		_endPosition = endPosition;
	}
	public void setSequence( String sequence ) {
		_sequence = sequence;
	}
	public void addRead( AlignedPARCLIPread read ) {
		if( read.getStartPosition() < _startPosition ) { _startPosition = read.getStartPosition(); }
		if( read.getEndPosition() > _endPosition ) { _endPosition = read.getEndPosition(); }
		incorporateConversionMap(read.getStartPosition(), read.getEndPosition(), read.getConversionMap());
		long currentPosition = read.getStartPosition();
		while( currentPosition <= read.getEndPosition() ) {
			if( _countMap.containsKey(currentPosition) ) {
				_countMap.put(currentPosition, _countMap.get(currentPosition) + read.getReadCount() );
			}
			else {
				_countMap.put(currentPosition, read.getReadCount());
			}
			currentPosition++;
		}
		_readList.add(read);
	}
	
	
	
	
	
	public String getChromosome() { 
		return _chromosome;
	}
	public char getStrand() {
		return _strand;
	}
	public long getStartPosition() {
		return _startPosition;
	}
	public long getEndPosition() {
		return _endPosition;
	}
	public String getSequence() {
		return _sequence;
	}
	public int getReadCount() {
		int sum = 0;
		if( _readList.size() > 0 ) {
			Iterator<AlignedPARCLIPread> readIt = _readList.iterator();
			while( readIt.hasNext() ) {
				sum += readIt.next().getReadCount();
			}
		}
		return sum;
	}
	public int getNumberOfClusters() {
		return _clusterList.size();
	}
	public ArrayList<KDECluster> getClusterList() {
		return _clusterList;
	}
	
//***********************************************************************	
// Caculate the the distributions	
	public void calculateDistributions(			
			String conversion,
			double bandwidth,
			int minimumCountPerNucleotideForKDE
		) {
		int numberOfNonConversions = 0;
		int numberOfConversions = 0;
// Calculate Signal & Background Distributions
		_signal = new double[(int) (_endPosition - _startPosition + 1)];
		_background = new double[(int) (_endPosition - _startPosition + 1)];
		_kdeClassifier = new double[(int) (_endPosition - _startPosition + 1)];
		
		String sequence = _sequence;
		if( _strand == '-' ) { sequence = new StringBuffer(sequence).reverse().toString(); }
		long currentPosition = _startPosition;
		while( currentPosition <= _endPosition) {
// Does this position of the sequence contain a 'T'			
			if( sequence.charAt((int) (currentPosition - _startPosition)) == conversion.charAt(0) ){
				if( _conversionMap.containsKey(currentPosition) ) {
					_nonConversionMap.put(currentPosition, _countMap.get(currentPosition) - _conversionMap.get(currentPosition));
					numberOfNonConversions += _countMap.get(currentPosition) - _conversionMap.get(currentPosition);
					numberOfConversions += _conversionMap.get(currentPosition);
				}
				else { 
					_nonConversionMap.put(currentPosition, _countMap.get(currentPosition));
					_conversionMap.put(currentPosition, 0);
					numberOfNonConversions += _countMap.get(currentPosition);
				}
			}
			currentPosition++;
		}
		currentPosition = _startPosition;
		while( currentPosition <= _endPosition) {
// calculate background & signal
			_background[(int) (currentPosition - _startPosition)] = 1 / (numberOfNonConversions * Math.sqrt(2 * Math.pow(bandwidth,2) * Math.PI));
			_signal[(int) (currentPosition - _startPosition)] = 1 / (numberOfConversions * Math.sqrt(2 * Math.pow(bandwidth,2) * Math.PI));
			Iterator<Long> backgroundIt = _nonConversionMap.keySet().iterator();
			while( backgroundIt.hasNext() ) {
				long position = backgroundIt.next();
				if( _countMap.get(position) >= minimumCountPerNucleotideForKDE ) {
					_background[(int) (currentPosition - _startPosition)] += (_nonConversionMap.get(position) * Math.exp(-.5 * Math.pow(Math.abs(currentPosition - position) / bandwidth,2)));
					_signal[(int) (currentPosition - _startPosition)] += (_conversionMap.get(position) * Math.exp(-.5 * Math.pow(Math.abs(currentPosition - position) / bandwidth,2)));
				}
			}
			currentPosition++;
		}
		_background = pdfOfArray(_background);
		_signal = pdfOfArray(_signal);
		
		currentPosition = _startPosition;
		while( currentPosition <= _endPosition) {
// calculate the KDE
			_kdeClassifier[(int) (currentPosition - _startPosition)] = _signal[(int) (currentPosition - _startPosition)] / ( _background[(int) (currentPosition - _startPosition)] + _signal[(int) (currentPosition - _startPosition)] );
			currentPosition++;
		}		
		
	}
//***********************************************************************	

//***********************************************************************	
// Generate clusters based on distribution only
	public void createClustersBasedOnDistribution( int minimumCountPerNucleotideForCluster, int additionalNucleotides ) {
		String sequence = _sequence;
		if( _strand == '-' ) { sequence = new StringBuffer(sequence).reverse().toString(); }		
		
		KDECluster currentCluster = new KDECluster();
		currentCluster.setStartPosition(-2000000);
		currentCluster.setEndPosition(-2000000);
		currentCluster.setMode(-2000000, -0.1);
		boolean withinCluster = false;
		
		
		long currentPosition = _startPosition;
		while( currentPosition <= _endPosition ) {
			
			if( !withinCluster 
				&& _kdeClassifier[(int) (currentPosition - _startPosition)] > 0.5
				&& _countMap.get(currentPosition) >= minimumCountPerNucleotideForCluster
			) {
// Does it overlap with the previous cluster??			
				boolean combineClusters = false;
				if( currentCluster.getEndPosition() + (long) additionalNucleotides >= currentPosition ) {
// Do those in between nucleotides have a large enough count
					long tempPosition = currentCluster.getEndPosition();
					while( tempPosition <= currentPosition ) {
						if( _countMap.get(tempPosition) >= minimumCountPerNucleotideForCluster ) {
							combineClusters = true;
						}
						else {
							combineClusters = false;
							break;
						}
						tempPosition++;
					}					
				}
				if( combineClusters ) {
					long tempPosition = currentPosition;
					while( tempPosition <= Math.min(currentPosition + (long) additionalNucleotides, _endPosition) ) {
						if( _countMap.get(tempPosition) >= minimumCountPerNucleotideForCluster ) {
							currentCluster.setEndPosition(tempPosition);
						}
						else { break; }
						tempPosition++;
					}
				}
				else {
// If they don't... do we store the previous current cluster?
					tryToaddCluster(currentCluster);
					currentCluster = new KDECluster();
// make sure that all of the nucleotdies have a large enough count					
					long tempPosition = currentPosition;
					while( tempPosition >= Math.max(currentPosition - additionalNucleotides, _startPosition) ) {
						if( _countMap.get(tempPosition) >= minimumCountPerNucleotideForCluster ) {
							currentCluster.setStartPosition(tempPosition);
						}
						else { break; }
						
						tempPosition--;
					}
					tempPosition = currentPosition;
					while( tempPosition <= Math.min(currentPosition + additionalNucleotides, _endPosition) ) {
						if( _countMap.get(tempPosition) >= minimumCountPerNucleotideForCluster ) {
							currentCluster.setEndPosition(tempPosition);
						}
						else { break; }
						tempPosition++;
					}					
				}
				withinCluster = true;
			}
			else if ( withinCluster
				&& _kdeClassifier[(int) (currentPosition - _startPosition)] > 0.5
				&& _countMap.get(currentPosition) >= minimumCountPerNucleotideForCluster
			) {
				long tempPosition = currentCluster.getEndPosition();
				while( tempPosition <= Math.min(currentPosition + additionalNucleotides, _endPosition) ) {
					if( _countMap.get(tempPosition) >= minimumCountPerNucleotideForCluster ) {
						currentCluster.setEndPosition(tempPosition);
					}
					else { break; }
					tempPosition++;
				}				
			}
			else if ( withinCluster && currentPosition > currentCluster.getEndPosition() ) {
				withinCluster = false;
			}
			
			currentPosition++;
		}
		tryToaddCluster(currentCluster);
	}
	
	
	private void tryToaddCluster( KDECluster currentCluster ) {
		String sequence = _sequence;
		if( _strand == '-' ) { sequence = new StringBuffer(sequence).reverse().toString(); }

		if( currentCluster.getStartPosition() - _startPosition  >= 0 ) {
			long tempPosition = currentCluster.getStartPosition();
			while( tempPosition <= currentCluster.getEndPosition() ) {
				if( _kdeClassifier[(int) (tempPosition - _startPosition)] > currentCluster.getModeScore() ) {
					currentCluster.setMode(tempPosition, _kdeClassifier[(int) (tempPosition - _startPosition)]);
				}
				tempPosition++;
			}
			if( currentCluster.getModeScore() > 0.5 ) {
				if( _strand == '+' ) {
					currentCluster.setSequence(
							sequence.substring( 
									(int) (currentCluster.getStartPosition() - _startPosition), 
									(int) (currentCluster.getEndPosition() - _startPosition) + 1
							)
					);
				}
				else {
					currentCluster.setSequence(
							new StringBuffer(sequence.substring( 
									(int) (currentCluster.getStartPosition() - _startPosition), 
									(int) (currentCluster.getEndPosition() - _startPosition) + 1
									)
							).reverse().toString()
					);									
				}
				_clusterList.add(currentCluster);
			}
		}
	}
//***********************************************************************	
	
//***********************************************************************	
// Generate clusters based on the read data	
	public void createClustersBasedOnReads( int minimumCountPerNucleotideForCluster ) {
		String sequence = _sequence;
		if( _strand == '-' ) { sequence = new StringBuffer(sequence).reverse().toString(); }
		ArrayList<AlignedPARCLIPread> signalReadList = new ArrayList<AlignedPARCLIPread>(_readList.size());
		Iterator<AlignedPARCLIPread> readIt = _readList.iterator();
		while( readIt.hasNext() ) {
			AlignedPARCLIPread currentRead = readIt.next();
			if( currentRead.getConversionMap().keySet().size() > 0 ) {
				long currentPosition = currentRead.getStartPosition();
				while( currentPosition <= currentRead.getEndPosition() ) {
					if( _kdeClassifier[(int) (currentPosition - _startPosition)] > 0.5 ) {
						signalReadList.add(currentRead);
						break;
					}
					currentPosition++;
				}
			}
		}
		Collections.sort(signalReadList);		
		readIt = signalReadList.iterator();
		
		long groupStart = -1;
		long groupEnd = -1;
		boolean firstReadSeen = false;

		while( readIt.hasNext() ) {
			AlignedPARCLIPread currentRead = readIt.next();
			if( !firstReadSeen ) {
				groupStart = currentRead.getStartPosition();
				groupEnd = currentRead.getEndPosition();
				firstReadSeen = true;
			}
			if( currentRead.getStartPosition() > groupEnd || !readIt.hasNext() ) {
				if( groupEnd > groupStart ) {

					KDECluster currentCluster = new KDECluster();
					boolean clusterStarted = false;
					long currentPosition = groupStart;
					while( currentPosition <= groupEnd ) {
						if(  _countMap.get(currentPosition) >= minimumCountPerNucleotideForCluster && !clusterStarted ) {
							currentCluster.setStartPosition(currentPosition);
							currentCluster.setEndPosition(currentPosition);
							currentCluster.setMode(currentPosition, _kdeClassifier[(int) (currentPosition - _startPosition)]);
							clusterStarted = true;							
						}
						else if ( _countMap.get(currentPosition) >= minimumCountPerNucleotideForCluster && clusterStarted ) {
							currentCluster.setEndPosition(currentPosition);
							if( _kdeClassifier[(int) (currentPosition - _startPosition)] > currentCluster.getModeScore() ) {
								currentCluster.setMode(currentPosition, _kdeClassifier[(int) (currentPosition - _startPosition)]);
							}
							
						}						
						if ( 
							( _countMap.get(currentPosition) < minimumCountPerNucleotideForCluster && clusterStarted )
							|| currentPosition == groupEnd
						) {
							if( currentCluster.getModeScore() > 0.5 ) {
								if( _strand == '+' ) {
									currentCluster.setSequence(
											sequence.substring( 
													(int) (currentCluster.getStartPosition() - _startPosition), 
													(int) (currentCluster.getEndPosition() - _startPosition) + 1
											)
									);
								}
								else {
									currentCluster.setSequence(
											new StringBuffer(sequence.substring( 
													(int) (currentCluster.getStartPosition() - _startPosition), 
													(int) (currentCluster.getEndPosition() - _startPosition) + 1
											)
									).reverse().toString());									
								}
								_clusterList.add(currentCluster);
							}
							currentCluster = new KDECluster();
							clusterStarted = false;
						}
						currentPosition++;
					}
				}
				groupStart = currentRead.getStartPosition();
				groupEnd =currentRead.getEndPosition();								
			}
			else if( currentRead.getEndPosition() > groupEnd ) {
				groupEnd = currentRead.getEndPosition();
			}					
		}
	}
//***********************************************************************	
// Make clusters using a slightly modified version of CCR calling from the Haffner et al paper
	
	public void createHaffnerClusters(int minimumCountPerNucleotideForCluster, int additionalNucleotides ) {

		long modePosition = -1;
		int modeConversionCount = 0;
		for( long i = _startPosition; i <= _endPosition; i++ ) {
			int currentConversionCount = 0;
			if( _conversionMap.containsKey(i) ) { currentConversionCount = _conversionMap.get(i); }
			int currentCount = _countMap.get(i);
			if( currentCount >= minimumCountPerNucleotideForCluster && currentConversionCount > modeConversionCount ) {
				modeConversionCount = currentConversionCount;
				modePosition = i;
			}
		}
		if( modePosition >= 0 ) {
		
			String sequence = _sequence;
			if( _strand == '-' ) { sequence = new StringBuffer(sequence).reverse().toString(); }
			
			long clusterStartPosition = 0;
			long clusterEndPosition = 0;
			long smallestPosition = Math.max(_startPosition, modePosition - (long) additionalNucleotides);
			for( long i = modePosition; i >= smallestPosition; i-- ) {
				int currentCount = _countMap.get(i);
				if( currentCount >= minimumCountPerNucleotideForCluster ) { clusterStartPosition = i; }
				else { break; }
			}
			
			long largestPosition = Math.min(_endPosition, modePosition + (long) additionalNucleotides);
			for( long i = modePosition; i <= largestPosition; i++ ) {
				int currentCount = _countMap.get(i);
				if( currentCount >= minimumCountPerNucleotideForCluster ) { clusterEndPosition = i; }
				else { break; }
			}
			KDECluster currentCluster = new KDECluster();
			
			currentCluster.setStartPosition(clusterStartPosition);
			currentCluster.setEndPosition(clusterEndPosition);
			currentCluster.setMode(modePosition, _conversionMap.get(modePosition) / _countMap.get(modePosition));
			if( currentCluster.getStartPosition() - currentCluster.getStartPosition() >= 42 ) { System.exit(0); }
			Iterator<AlignedPARCLIPread> readIt = _readList.iterator();
			while( readIt.hasNext() ) {
				AlignedPARCLIPread currentRead = readIt.next();
				if( anyOverlap(clusterStartPosition, clusterEndPosition, currentRead.getStartPosition(), currentRead.getEndPosition())) {
					currentCluster.addRead(currentRead);
				}
			}	
			if( _strand == '+' ) {
				currentCluster.setSequence(
						sequence.substring( 
								(int) (currentCluster.getStartPosition() - _startPosition), 
								(int) (currentCluster.getEndPosition() - _startPosition) + 1
						)
				);
			}
			else {
				currentCluster.setSequence(
						new StringBuffer(sequence.substring( 
								(int) (currentCluster.getStartPosition() - _startPosition), 
								(int) (currentCluster.getEndPosition() - _startPosition) + 1
						)
				).reverse().toString());									
			}
			_clusterList.add(currentCluster);
		}

	}
//***********************************************************************	
// Incorporate conversion information from reads..		
	private void incorporateConversionMap( long readStartPosition, long readEndPosition, HashMap<Byte, Integer> readConversionMap ) {
		Iterator<Byte> locIt = readConversionMap.keySet().iterator();
		while( locIt.hasNext() ) {
			Byte readLocation = locIt.next();
			long location = readStartPosition + (long) readLocation;
			if( _conversionMap.containsKey(location) ) {
				_conversionMap.put(location, _conversionMap.get(location) + readConversionMap.get(readLocation));
			}
			else {
				_conversionMap.put(location,readConversionMap.get(readLocation));
			}
		}
	}
//***********************************************************************	

//********************************************************************
// Generate a PDF of an Array	
	private double[] pdfOfArray( double[] array ) {
		double arraySum = 0;
		for( int i = 0; i < array.length; i++ ) {
			arraySum += array[i];
		}
		for( int i = 0; i < array.length; i++ ) {
			array[i] = array[i] / arraySum;
		}
		return array;
	}	
//********************************************************************	
//***********************************************************************	
// For Comparing...	
	@Override
	public int hashCode() {
		return Integer.parseInt(_strand + Long.toString( (_startPosition*31) + (_endPosition*29) ));
	}
	@Override
	public boolean equals( Object other ) {
		if( other == null ) { return false; }
		if( other == this ) { return true; }
		if( this.getClass() != other.getClass() ) { return false; }
		ReadGroup otherGroup = (ReadGroup) other;
		if( otherGroup.getChromosome().equals(this.getChromosome())
				&& otherGroup.getStrand() == this.getStrand() 
				&& otherGroup.getStartPosition() == this.getStartPosition()
				&& otherGroup.getEndPosition() == this.getEndPosition()
		) {
			return true;
		}
		else { return false; }
	}
//***********************************************************************	
//***************************************************************			
// For Sorting...
	public int compareTo(ReadGroup read2) {
		if( this.getChromosome().equals(read2.getChromosome()) && this.getStrand() == read2.getStrand() ) {
			if( this.getStartPosition() == read2.getStartPosition() ) {
				return (int) (this.getEndPosition() - read2.getEndPosition());
			}
			return (int) (this.getStartPosition() - read2.getStartPosition());
		}
		if( this.getChromosome().equals(read2.getChromosome()) ) {
			if( this.getStrand() == '+' ) { return -1; }
			return 1;
		}
		Collator chromCompare = Collator.getInstance();
		return chromCompare.compare( this.getChromosome(), read2.getChromosome() );
	}
//***************************************************************		
	public void determineClusterReads() {
		ListIterator<KDECluster> clusterIt = _clusterList.listIterator();
		while( clusterIt.hasNext() ) {
			KDECluster currentCluster = clusterIt.next();
			Iterator<AlignedPARCLIPread> readIt = _readList.iterator();
			while( readIt.hasNext() ) {
				AlignedPARCLIPread currentRead = readIt.next();
				if( anyOverlap(
						currentRead.getStartPosition(), currentRead.getEndPosition(),
						currentCluster.getStartPosition(), currentCluster.getEndPosition()
					)
				) {
					currentCluster.addRead(currentRead);
				}
			}
			clusterIt.set(currentCluster);
		}
	}
	
	
//********************************************************************		
// join an array list into a comma separated string of the values		
	public String displayKDE() {
		return join(_kdeClassifier);
	}
	
	
//********************************************************************		

//********************************************************************		
// join an array list into a comma separated string of the values		
	public String displaySignal() {
		return join(_signal);
	}	
//********************************************************************		
//********************************************************************		
// join an array list into a comma separated string of the values		
	public String displayBackground() {
		return join(_background);
	}	
//********************************************************************		
//********************************************************************		
// join an array list into a comma separated string of the values		
	public String displayConversionPercent() {
		double[] result = new double[(int) (_endPosition - _startPosition + 1)];
		long currentPosition = _startPosition;
		while( currentPosition <= _endPosition ) {
			if( _conversionMap.containsKey(currentPosition) ) {
				result[(int) (currentPosition - _startPosition)] =
					(double) _conversionMap.get(currentPosition) / (double) _countMap.get(currentPosition);
			}
			else { result[(int) (currentPosition - _startPosition)] = -1.0; }
			currentPosition++;
		}
		
		return join(result);
	}	
//********************************************************************			
//********************************************************************		
// join an array list into a comma separated string of the values		
	public String displayReadCount() {
		double[] result = new double[(int) (_endPosition - _startPosition + 1)];
		long currentPosition = _startPosition;
		while( currentPosition <= _endPosition ) {
			result[(int) (currentPosition - _startPosition)] = _countMap.get(currentPosition);
			currentPosition++;
		}
		
		return join(result);
	}	
//********************************************************************		
	
//********************************************************************		
// join an array list into a comma separated string of the values	
	private String join(double[] array ) {
		String result = "";
		for(int i = 0; i < array.length; i++ ) {
			result = result + "," + Double.toString(array[i]);
		}
		return result;
	}
//********************************************************************	
	
	
	public int getNumberOfConversions() {
		int sum = 0;
		Iterator<Long> longIt = _conversionMap.keySet().iterator();
		while( longIt.hasNext() ) {
			sum += _conversionMap.get( longIt.next() );
		}
		return sum;
	}
	public int getNumberOfConversionlocations() {
		int sum = 0;
		Iterator<Long> longIt = _conversionMap.keySet().iterator();
		while( longIt.hasNext() ) {
			if( _conversionMap.get( longIt.next() ) >= 1 ) { sum++; }
		}
		return sum;
	}
	
//***************************************************************			
// Determine if there is any overlap between any two genomic blocks
	private boolean anyOverlap(long start1, long end1, long start2, long end2 ) {
		if( 
			( start1 >= start2 && start1 <= end2 ) ||
			( end1 >= start2 && end1 <= end2 ) ||
			( start2 >= start1 && start2 <= end1 ) ||
			( end2 >= start1 && end2 <= end1 ) 
		) { 
			return true;
		}
		else {
			return false;
		}
	}
//***************************************************************		
	public void setFilterType(String type) {
		_filterType = type;
	}
	public String getFilterType() {
		return _filterType;
	}

	
}
