library(GenomicRanges)
library(GenomicAlignments)
library(GenomicFeatures)

source("/data/results/tools/r/GenomicFeatures/R/my-coordinate-mapping-methods.R")
setwd("/data/results/reference/mmu")

txdb <- makeTxDbFromGFF(file ="Mus_musculus.NCBIM37.64.gtf", format= "gtf", dataSource="Mus_musculus.NCBIM37.64", organism= "Mus musculus")
read_csv<-read.csv(file="/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/sh-clusters-IFN.txt2.csv.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv", sep="", header = FALSE)

m_cols<-data.frame(GeneName=read_csv[,1], CDS_start=read_csv[,2],CDS_end=read_csv[,3])
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6], mcols=m_cols )
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6])
gr_ranges<-makeGRangesFromDataFrame(df)

mapTrRegion2Genome=function(ts,te,trid){
 ex=exons(txdb, filter=list(tx_name = trid ));
 r=ranges(ex);
 s=start(r);
 e=end(r);
 st=as.logical(strand(ex)[1]=="-");
 sn=as.character(seqnames(ex)[1])
 if (sn=="MT"){sn="M";}
 sn=paste("chr",sn,sep="");
 ne=length(ex);
 sok=0;
 if (st) {
  for (j in (ne):1) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (ts <= width) {
      if (sok==0){
        mts=end-ts;
	mtsa=start;
	sok=1;sei=j;
      }
   } else{
     ts = ts- width;
   }
   if (te <= width)  break;
   te=te-width;
   }   
   return(c(st,end-te+1,mts+1,mtsa,end,sei,j));
} else {
   for (j in 1:ne) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
#   print (c(j,ts,te,start,end,sok))
   if (ts <= width) {
      if (sok==0){
        mts=start+ts;
	mtsa=end;
	sok=1;sei=j;
      }
   } else{
    ts = ts - width;
   }
   if (te <= width)  break;
   te=te-width;
  }   
  return(c(st,start+te-1,mts-1,mtsa,start,sei,j));
 }
}

mapAllTrRegions2Genome=function(gr_ranges){
 all_s=NULL;
 all_e=NULL;
 all_st=NULL;
 all_id=NULL;
 all_i=NULL;
 ids=as.vector(gr_ranges@seqnames)
 for (i in 1:length(ids)){
    m=mapTrRegion2Genome(start(gr_ranges)[i],end(gr_ranges)[i],ids[i]);
    d=m[6]-m[7];d=d*d;
    if (m[1]==0){st="-";}else{st="+";}
    all_id=append(all_id,ids[i]);
    all_st=append(all_st,st);
    all_i=append(all_i,i);
    if (d==0){
     all_s=append(all_s,m[3]);
     all_e=append(all_e,m[2]);
    } else {
      if (1){#span >=1 intron
       all_s=append(all_s,m[3]);
       all_e=append(all_e,m[4]);
       all_id=append(all_id,ids[i]);
       all_s=append(all_s,m[5]);
       all_e=append(all_e,m[2]);   
       all_st=append(all_st,st);
       all_i=append(all_i,i);
      }else{
       print (c("Err cluster spans 2 introns",i))
      }
    }
 }
 return(cbind(all_id,all_s,all_e,all_st,all_i))
}


mm=mapAllTrRegions2Genome(gr_ranges)
write.table(mm,"IFN-mapped2.csv", sep="\t", row.names=FALSE)
