#/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl --help
#-e|e_cnt - error analysis max error count allowed - optional (default 3) 
#	- This is the maximum number of errors alloweallowed - optional (default 3) 
#	- This is the maximum number of errors allowed per read. 
#	  If the number is greater then the read length it is equivalent to 
#	  having the error analysis turned off.  For example if this is set to 5, 
#	  then if 5 such errors are found, the read is kept, but if 6 are found 
#	  then the read does not pass the analysis.
#-d|e_sc - error analysis max QV score - optional (default 10) 
#	- this is the maximum score for error analysis.  This must be a number 
#	  between 0-34 since there are currently no scores above 34. 
#	  So if the score is 10 the an error is tagged if it has a QV value of 10 
#	  or less.
#-p|p_cnt - Polyclonal analysis count required - optional (default 1) 
#	- This is the count required for the polyclonal analysis. 
#	  This number must be between [0-10].  Zero is equivalent to having the 
#	  polyclonal analysis turned off.  For example if this is set to 5, 
#	  then at least 5 of the first 10 positions must exceed the passed 
#	  polyclonal QV score.
#-q|p_qv - Polyclonal analysis min QV score - optional (default 25) 
#	- This is the minimum score for the polyclonal analysis. 
#	  This must be a number between 0-34 since there are currently 
#	  no scores above 34.
#-u|tr_len - desired length of read after truncation - optional 
#	- This is the length of the sequence desired, any color calls after this 
#	  length are removed. This option must be filled in if truncation is turned on 
#	  and be an integer greater than 0. 
#-a|qv_analysis - turn on/off analysis of quality values - optional (default off) 
#	- Analysis of the quality values for all of the inputted reads and 
#	  the passing reads.  Analysis returns a file with a matrix of a count 
#	  of scores by position. 
#	  [on,yes,y,off,no,n]
#example: SOLiD_preprocess_filter_v2_MR1.pl -i mp -f a_F3.csfasta -g a_F3_QV.qual -r b_R3.csfasta 
# -s b_R3_QV.qual -p 3 -q 22 -e 10 -d 9 -v off -o test_test 




#9 highqual start
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 9  -o 0hrep1filt7 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual
cat 0hrep1filt7_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl >  0hrep1filt7_T_F3.csfasta.coll
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p10  0hrep1filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p10.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p10.bam;samtools index  0hrep1F-4p10.bam
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p10.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p10.conversion.stats
AtoC 517998 9.74356
GtoA 494720 9.3057
CtoA 393703 7.40557
TtoG 390794 7.35085
GtoC 288313 5.42318
AtoT 363640 6.84008
AtoG 408175 7.67779
GtoT 244998 4.60842
CtoT 406600 7.64816
CtoG 443212 8.33683
TtoA 669083 12.5855
TtoC 695075 13.0744

#8 highqual start
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 8  -o 0hrep1filt6 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual
cat 0hrep1filt6_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl >  0hrep1filt6_T_F3.csfasta.coll
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p9  0hrep1filt6_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p9.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p9.bam;samtools index !$
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p9.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p9.conversion.stats
AtoC 1149850 7.5904
GtoA 1240966 8.19188
TtoG 1608036 10.615
CtoA 1132298 7.47454
GtoC 1120880 7.39917
AtoT 1245940 8.22471
AtoG 1291815 8.52755
GtoT 548708 3.62214
CtoT 1344343 8.87429
CtoG 1059029 6.99088
TtoA 1893276 12.4979
TtoC 1513591 9.99154


#5 highqual start, max. 1err
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 5 -e 1  -o 0hrep1filt4 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual
cat 0hrep1filt4_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl >  0hrep1filt4_T_F3.csfasta.coll
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p7  0hrep1filt4_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p7.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p7.bam;samtools index !$
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p7.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p7.conversion.stats
AtoC 4470251 8.30193
GtoA 4059403 7.53893
TtoG 4819767 8.95104
CtoA 4181503 7.76568
GtoC 3606336 6.69751
AtoT 5108293 9.48687
AtoG 4992818 9.27242
GtoT 2463548 4.57518
CtoT 4260106 7.91166
CtoG 3603078 6.69146
TtoA 6238741 11.5863
TtoC 6042057 11.221


#10 highqual start, max. 1err
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 10 -e 1  -o 0hrep1filt3 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual
cat 0hrep1filt3_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl >  0hrep1filt3_T_F3.csfasta.coll
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p5  0hrep1filt3_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p5.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p5.bam;samtools index !$
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p5.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p5.conversion.stats
AtoC 56196 6.07166
GtoA 50695 5.47731
TtoG 109458 11.8263
CtoA 96148 10.3882
GtoC 66328 7.16636
AtoG 81383 8.79297
AtoT 78226 8.45188
GtoT 42861 4.63089
CtoG 73822 7.97605
CtoT 35355 3.81991
TtoA 109382 11.8181
TtoC 125692 13.5803

#10 highqual start, (default max. 3err)
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 10 -o 0hrep1filt2 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual 
cat 0hrep1filt2_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl >  0hrep1filt2_T_F3.csfasta.coll
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p4  0hrep1filt2_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p4.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p4.bam
samtools index 0hrep1F-4p4.bam
(
bam=readSortedBam("0hrep1F-4p4.bam")
countTable <- getAllSub( bam, minCov = 10 )
png("substPlot-0hrep1F-4p4.png",width=1024,height=1200,pointsize = 22)
plotSubstitutions( countTable, highlight = "TC" ) # TC 3.06% (without -p10: 2.7%)
dev.off()
)
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p4.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p4.conversion.stats
AtoC 79338 8.37926
GtoA 51295 5.41751
TtoG 87165 9.2059
CtoA 31930 3.37228
GtoC 68090 7.1913
AtoG 79917 8.44041
AtoT 41927 4.42811
GtoT 36403 3.84469
CtoG 69022 7.28974
CtoT 103401 10.9207
TtoA 151570 16.008
TtoC 146780 15.5021

(#1mm
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 1 -p 10 -i -o 0hrep1F-4p8  0hrep1filt2_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p8.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p8.bam
samtools index 0hrep1F-4p8.bam
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p8.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p8.conversion.stats
# not mm's !
)
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -p 5 -a on -o 0hrep1filt5 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual

/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 10 -o 0hrep1filt2 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual 
# 3mm
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 3 -p 10 -i -o 0hrep1F-4p6  0hrep1filt2_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl 0hrep1F-4p6.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > 0hrep1F-4p6.bam
samtools index !$
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p6.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p6.conversion.stats
AtoC 159044 11.4624
GtoA 79407 5.72293
CtoA 75531 5.44358
TtoG 91401 6.58735
GtoC 143173 10.3186
AtoT 135697 9.77979
AtoG 153727 11.0792
GtoT 49017 3.5327
CtoT 55773 4.01961
CtoG 87409 6.29964
TtoA 224802 16.2017
TtoC 132543 9.55248


# get TC stats for old TIA alignment:
reczko@max:/data/images/proton/DKlab/mr/parclip/raw$  /data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/TIA_PARCLIPraw/csfasta/ugc_604_10_Genome.bowtie |samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai -  | samtools sort -o - - > TIA_ugc_604_10_Genome_old.bam
samtools index !$
bam=readSortedBam("TIA_ugc_604_10_Genome_old.bam"
+)
countTable <- getAllSub( bam, minCov = 10 )
png("substPlot-TIA_ugc_604_10_Genome_old.png",width=1024,height=1200,pointsize = 22)
plotSubstitutions( countTable, highlight = "TC" )
dev.off()
/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 TIA_ugc_604_10_Genome_old.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > TIA_ugc_604_10_Genome_old.bam.conversion.stats
AtoC 45591433 8.66212
GtoA 34800405 6.61188
CtoA 31338114 5.95407
TtoG 57667920 10.9566
GtoC 32066772 6.09251
AtoG 54808564 10.4133
AtoT 55939168 10.6281
GtoT 30513152 5.79733
CtoT 38702321 7.35323
CtoG 33019686 6.27356
TtoA 52301639 9.93702
TtoC 59581961 11.3202


/data/results/tools/align/rnaseqmut/rnaseqmut-master/bin/rnaseqmut.linux.x64 -r /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa -m 0 0hrep1F-4p3.sam.bam  | awk -f /data/results/tools/align/rnaseqmut/count-conversions1.awk > 0hrep1F-4p3.conversion.stats
AtoC 6071730 8.08974
GtoA 5410862 7.20922
TtoG 7049780 9.39285
CtoA 6334372 8.43967
GtoC 5262681 7.01179
AtoT 6778973 9.03204
AtoG 7536796 10.0417
GtoT 3268056 4.35423
CtoT 5592896 7.45176
CtoG 4956693 6.60411
TtoA 8369983 11.1518
TtoC 8421894 11.221




/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 10 -o IGGfilt3 -f IGG/ugc_604_12_F3.csfasta -g IGG/ugc_604_12_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -p 10 -o 0hrep1filt2 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual 
cd /data/images/proton/DKlab/mr/parclip/raw
#errors (encoded by ".") are counted only in the first 20 colors
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o IGGfilt7 -p 9 -f IGG/ugc_604_12_F3.csfasta -g IGG/ugc_604_12_F3.QV.qual
#/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 0hrep1filt7 -p 9 -f 0hrep1/ugc_604_1_F3.csfasta -g 0hrep1/ugc_604_1_F3.QV.qual 
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 0hrep2filt7 -p 9 -f 0hrep2/ugc_604_4_F3.csfasta -g 0hrep2/ugc_604_4_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 0hrep3filt7 -p 9 -f 0hrep3/ugc_604_7_F3.csfasta -g 0hrep3/ugc_604_7_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o  2hrep1filt7 -p 9 -f  2hrep1/ugc_604_2_F3.csfasta -g  2hrep1/ugc_604_2_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 2hrep2filt7 -p 9 -f 2hrep2/ugc_604_5_F3.csfasta -g 2hrep2/ugc_604_5_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 2hrep3filt7 -p 9 -f 2hrep3/ugc_604_8_F3.csfasta -g 2hrep3/ugc_604_8_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o  6hrep1filt7 -p 9 -f  6hrep1/ugc_604_3_F3.csfasta -g  6hrep1/ugc_604_3_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 6hrep2filt7 -p 9 -f 6hrep2/ugc_604_6_F3.csfasta -g 6hrep2/ugc_604_6_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o 6hrep3filt7 -p 9 -f 6hrep3/ugc_604_9_F3.csfasta -g 6hrep3/ugc_604_9_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o  IL4filt7 -p 9 -f  IL4/ugc_604_11_F3.csfasta -g  IL4/ugc_604_11_F3.QV.qual
/data/results/tools/solid/qc-filter/SOLiD_preprocess_filter_v2_MR1.pl -a on -o IFNfilt7 -p 9 -f IFN/ugc_604_10_F3.csfasta -g IFN/ugc_604_10_F3.QV.qual


for i in  *filt7_T_*csfasta
 do
 echo $i
 cat  $i | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl > $i".coll"
 done
#cat  0hrep1filt2_T_F3.csfasta | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/collapseFA.pl > test.fasta

./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep1F-4p3  0hrep1filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep2F-4p3  0hrep2filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 0hrep3F-4p3  0hrep3filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c

./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 2hrep1F-4p3  2hrep1filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 2hrep2F-4p3  2hrep2filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer -m 10 -l 18 -v 2 -p 10 -i -o 2hrep3F-4p3  2hrep3filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c

./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o IGGF-4p10 IGGfilt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o IL4F-4p10  IL4filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o IFNF-4p10  IFNfilt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c

./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o 6hrep1F-4p10  6hrep1filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o 6hrep2F-4p10  6hrep2filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c
./SeqTrimMap4Paralyzer2 -m 10 -l 18 -v 2 -p 10 -i -o 6hrep3F-4p10  6hrep3filt7_T_F3.csfasta.coll  /data/results/reference/mmu/mm9/bowtie1/mm9c


/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep1F-4p10.bowtie > 0hrep1F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep2F-4p10.bowtie > 0hrep2F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/0hrep3F-4p10.bowtie > 0hrep3F-4p10.sam

/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep1F-4p10.bowtie > 2hrep1F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep2F-4p10.bowtie > 2hrep2F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/2hrep3F-4p10.bowtie > 2hrep3F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep1F-4p10.bowtie > 6hrep1F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep2F-4p10.bowtie > 6hrep2F-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/6hrep3F-4p10.bowtie > 6hrep3F-4p10.sam

/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IGGF-4p10.bowtie > IGGF-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IFNF-4p10.bowtie > IFNF-4p10.sam
/data/results/tools/samtools/samtools-1.3/misc/bowtie2sam.pl /data/images/proton/DKlab/mr/parclip/raw/IL4F-4p10.bowtie > IL4F-4p10.sam


#samtools view -bhS 0hF-4p_PARalyzer_Utilized.sam  | samtools sort -o - - > test.bam

#time ./PARalyzer 50G sample.ini-IGG-b 
#I identified a total of 4237592 reads mapping to 1072388 Unique genomic coordinates
#Making up 58618 groups
#Consisting of 21306 clusters

#@ 1rep IGG, filtered reads, PAPpipe settings

cd /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5
./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IGGF-4p3-HAFNER_APPROACH.ini
./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IFNF-4p3-HAFNER_APPROACH.ini
./PARalyzer 5G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-IL4F-4p3-HAFNER_APPROACH.ini
./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-0hF-4p3-HAFNER_APPROACH.ini
./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-2hF-4p3-HAFNER_APPROACH.ini
./PARalyzer 9G /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/sample.ini-6hF-4p3-HAFNER_APPROACH.ini 
#mod HERE

./prepParPipe1.sh 0hF-4p3-HAFNER_APPROACH
./prepParPipe1.sh IGGF-4p3-HAFNER_APPROACH

# cp IGGF-4p2_PARalyzer_Utilized.sam test.sam
# samtools view -bS -t /data/results/reference/mmu/Mus_musculus/UCSC/mm9/Sequence/WholeGenomeFasta/genome.fa.fai test.sam  | samtools sort -o - - > test.bam
# samtools index test.bam
# Stage readAttributes
#/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/readAttributesTC_MR1.pl test.sam > test.attr
# Stage annotateReads
#mod HERE
#cp clusters-IGGF-4p2.txt test.clusters
#cp groups-IGGF-4p2.txt test.groups
#cp test.attr test.clusters test.groups /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/test3
#cd /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/test3
#./anno-mm9.sh
#cp test.clusterbed test.readcsv test.groupbed /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/
# Stage read_bed
/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/reads2bed.py test.readcsv > test.readbed
# Stage annotateClusters
# Stage addInfo
intersectBed -a test.readbed -b test.clusterbed -wao -s | 		awk -F "	" 'BEGIN{OFS=",";} { print  		$5,$7,$8,$9,$11,$12}' | sed '/,\-1/d' | 		/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/editClustersTC.pl .6 > test.clusters.csv
# Stage visbed
cat test.clusters.csv | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/visclusterbed.py > test.clusters.bed
# Stage geneLvl
perl /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/geneLevel.pl test.clusters.csv /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/gencode.v11.annotation.gtf > test.gene_cl.csv
# Stage annotateGroups
#/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/annotate.pl -g /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/gencode.v11.annotation.gtf -p /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/annotationRank.txt -r /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/mm9_rmsk.bed.gz -s /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/mm9_rmsk_info -strict -oi test.groups | /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/PARclusters2bed.py > test.groupbed
# Stage addInfoGroups
intersectBed -a test.readbed -b test.groupbed -wao -s | 		awk -F "	" 'BEGIN{OFS=",";} { print  		$5,$7,$8,$9,$11,$12}' | sed '/,\-1/d' | 		/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/editClustersTC.pl > test.groups.csv
# Stage geneLvlGroups
perl /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/geneLevel.pl test.groups.csv /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/gencode.v11.annotation.gtf > test.gene_gr.csv
# Stage statsTable
/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/extractDataTC_MR1.pl test > test.clusters.txt
# Stage spatialPerl
perl /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/Spatial.pl -g /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/gencode.v11.annotation.gtf -a test -strict -t /data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/files/isoforms.fpkm_tracking

# Stage spatialR
unset TMP; unset TEMP; TEMPDIR="/tmp/47609700210084528-0"  setsid  Rscript - <<'!'
            
		yn<-'n'
		fn<-'test'
		resolution<-3
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/colorRamps_2.3.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/schoolmath_0.4.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/gtools_3.4.1.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/ellipse_0.3-8.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/RColorBrewer_1.1-2.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		install.packages('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/LSD_2.5.tar.gz', lib='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(colorRamps, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(schoolmath, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(gtools, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(ellipse, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(RColorBrewer, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(LSD, lib.loc='/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/')
		library(parallel)
		source('/data/images/proton/DKlab/mr/parclip/parpipe/PARpipe-master/scripts/Spatial.R')
	
!

awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk clusters-6hF-4p10-HAFNER_APPROACH.txt > clusters-6hF-4p10-HAFNER_APPROACH.bed
awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk clusters-IGGF-4p10-HAFNER_APPROACH.txt  >  clusters-IGGF-4p10-HAFNER_APPROACH.bed
reczko@max:/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5$

for i in groups-*-4p3-HAFNER_APPROACH.txt
do
    awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk $i > $i.bed
done
for i in groups-*-4p3-HAFNER_APPROACH.txt.bed
do
    bedtools intersect -a $i -b groups-IGGF-4p3-HAFNER_APPROACH.txt.bed -v > $i.noIGG.bed
    wc !$
done


#mapkap1 chr2:34,287,523-34,480,470
groups-6hF-4p10-HAFNER_APPROACH.bed
*chr2	34303627	34303649	G21172	0	+
chr2	34365162	34365184	G21173	0	+
!chr2	34402254	34402272	G21174	0	+
*chr2	34403898	34403919	G21175	0	+
groups-2hF-4p10-HAFNER_APPROACH.txt
*chr2,+,34303625,34303647,G23737,CACTGCCACTGAGCCTTGGCCAC,39,0,0
chr2,+,34350666,34350687,G23738,CCAGGCACACTACTAGGCATGG,6,0,0
chr2,+,34365162,34365183,G23739,TTGGATGGCTACGTGTACTTGG,7,0,0
!chr2,+,34402252,34402272,G23740,GCTGCAATATGACAGCAGAGC,16,0,0
*chr2,+,34403898,34403917,G23741,GCTATTCACATTATACAGCA,31,0,0
groups-0hF-4p10-HAFNER_APPROACH.txt
*chr2,+,34303625,34303649,G20221,CACTGCCACTGAGCCTTGGCCACTG,58,0,0
!chr2,+,34402253,34402272,G20222,CTGCAATATGACAGCAGAGC,5,0,0
*chr2,+,34403898,34403918,G20223,GCTATTCACATTATACAGCAT,16,0,0
chr2,+,34464385,34464407,G20224,TCAGAACCAATGCCAGCCAAAAG,8,0,0
chr2,+,34562653,34562671,G20225,GGGCATGACAGCAGGGCAA,11,0,0
chr2,+,34681311,34681329,G20226,AGCCTTGGGGCATCAATGT,27,0,0

groups-0hF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
chr2	34292499	34292521	G185927	nr 15 nc 0	+
chr2	34299023	34299046	G185928	nr 8 nc 0	+
chr2	34313489	34313508	G185930	nr 5 nc 0	+
chr2	34337027	34337049	G185931	nr 18 nc 0	+
chr2	34350666	34350687	G185932	nr 6 nc 0	+
chr2	34354381	34354401	G185933	nr 5 nc 0	+
chr2	34364372	34364390	G185934	nr 5 nc 0	+
chr2	34373531	34373551	G185936	nr 8 nc 0	+
chr2	34397462	34397481	G185937	nr 7 nc 0	+
chr2	34401621	34401642	G185938	nr 16 nc 0	+
!chr2	34402253	34402275	G185939	nr 6 nc 0	+
chr2	34415520	34415541	G185941	nr 5 nc 0	+
chr2	34431608	34431627	G185942	nr 7 nc 0	+
chr2	34438845	34438863	G185943	nr 5 nc 0	+
chr2	34440070	34440090	G185944	nr 8 nc 0	+
chr2	34451154	34451178	G185945	nr 5 nc 0	+
chr2	34452604	34452624	G185946	nr 6 nc 0	+
chr2	34456041	34456064	G185947	nr 5 nc 0	+
chr2	34460214	34460239	G185948	nr 6 nc 0	+
chr2	34464385	34464407	G185949	nr 40 nc 0	+


groups-IFNF-4p10-HAFNER_APPROACH.txt
*chr2,+,34303627,34303646,G6995,CTGCCACTGAGCCTTGGCCA,12,0,0
!chr2,+,34402252,34402272,G6996,GCTGCAATATGACAGCAGAGC,237,0,0
*chr2,+,34403898,34403918,G6997,GCTATTCACATTATACAGCAT,14,0,0

groups-IL4F-4p10-HAFNER_APPROACH.txt
*chr2,+,34303625,34303649,G10439,CACTGCCACTGAGCCTTGGCCACTG,23,0,0
*chr2,+,34403898,34403918,G10440,GCTATTCACATTATACAGCAT,19,0,0

groups-IL4F-4p3-HAFNER_APPROACH.txt
chr2,+,34292499,34292518,G95634,CTGCCTTGGCATGGCAGGTG,5,0,0
*chr2,+,34303625,34303651,G95635,CACTGCCACTGAGCCTTGGCCACTGTT,39,0,0
chr2,+,34308820,34308841,G95636,TTTGCAAAATATGCTACTATGC,19,0,0
chr2,+,34308916,34308936,G95637,AGTATTAGTGTCAGATAAAGA,7,0,0
chr2,+,34365162,34365183,G95638,TTGGATGGCTACGTGTACTTGG,18,0,0
chr2,+,34389208,34389231,G95639,TCTCCAAAATATTCACCGGCATTT,6,0,0
*chr2,+,34403898,34403919,G95640,GCTATTCACATTATACAGCATT,406,0,0
chr2,+,34414736,34414754,G95641,GTAGCTTGGAATTTCCTTA,5,0,0
chr2,+,34431606,34431627,G95642,GGTAAGTTTCCTAAAGGTTAAT,5,0,0
chr2,+,34457603,34457622,G95643,TGATCTTGAGCGGCCTTAAT,6,0,0
chr2,+,34479832,34479853,G95644,CTCCTCATAGCAAGTATGATAT,5,0,0

groups-IGGF-4p10-HAFNER_APPROACH.txt
*chr2,+,34303625,34303646,G4981,CACTGCCACTGAGCCTTGGCCA,30,0,0
*chr2,+,34403898,34403916,G4982,GCTATTCACATTATACAGC,9,0,0



for i in groups-*-4p3-HAFNER_APPROACH.txt
do
    awk -f /data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_5/clusters2bed.awk $i > $i.bed
done

for i in groups-*-4p3-HAFNER_APPROACH.txt.bed
do
    bedtools intersect -a $i -b groups-IGGF-4p3-HAFNER_APPROACH.txt.bed -v > $i.noIGG.bed
    wc !$
done
321321  1927926 11425398 groups-0hF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
  271739 1630434 9651623 groups-2hF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
329958  1979748 11738606 groups-6hF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
   54661  327966 1897915 groups-IFNF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
0 0 0 groups-IGGF-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed
  151519  909114 5344514 groups-IL4F-4p3-HAFNER_APPROACH.txt.bed.noIGG.bed

bedtools intersect -a clusters-6hF-4p10-HAFNER_APPROACH.bed -b clusters-IGGF-4p10-HAFNER_APPROACH.bed -wo > foo
#not in IGG
bedtools intersect -a clusters-6hF-4p10-HAFNER_APPROACH.bed -b clusters-IGGF-4p10-HAFNER_APPROACH.bed -v |wc 

