library(GenomicRanges)
library(GenomicAlignments)
library(GenomicFeatures)

source("/data/results/tools/r/GenomicFeatures/R/my-coordinate-mapping-methods.R")
setwd("/data/results/reference/mmu")

txdb <- makeTxDbFromGFF(file ="Mus_musculus.NCBIM37.64.gtf", format= "gtf", dataSource="Mus_musculus.NCBIM37.64", organism= "Mus musculus")
read_csv<-read.csv(file="/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/sh-clusters-IFN.txt2.csv.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv", sep="", header = FALSE)

m_cols<-data.frame(GeneName=read_csv[,1], CDS_start=read_csv[,2],CDS_end=read_csv[,3])
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6], mcols=m_cols )
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6])
gr_ranges<-makeGRangesFromDataFrame(df)

tr <- transcripts(txdb)

names(tr)=as.list(tr$tx_name)
names(gr_ranges)=as.list(gr_ranges@seqnames)


mapAllTr2Genome=function(gr_ranges){
 all_s=NULL;
 all_e=NULL;
 all_st=NULL;
 ids=as.vector(gr_ranges@seqnames)
 for (i in 1:length(ids)){
    m=mapTr2Genome(start(gr_ranges)[i],ids[i]);
    all_s=append(all_s,m[2]);
    m=mapTr2Genome(end(gr_ranges)[i],ids[i]);
    all_e=append(all_e,m[2]);
    all_st=append(all_st,m[1]);
 }
 return(cbind(ids,all_c))
}


mapTr2Genome=function(ts,trid){
 ex=exons(txdb, filter=list(tx_name = trid ));
 r=ranges(ex);
 s=start(r);
 e=end(r);
 st=as.logical(strand(ex)[1]=="-");
 ne=length(ex);
 if (st) {
  for (j in (ne):1) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (ts <= width)  break;
   ts = ts- width;
   }
  } else {
   for (j in 1:ne) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (ts <= width)  break;
    ts = ts - width;
   }
  }
  ts=ts-1;
 if (st=="-") {
   return(c(st,end-ts));
 }  else{
    return (c(st,start+ts));
 }
}


mapTrRegion2Genome=function(ts,te,trid){
 ex=exons(txdb, filter=list(tx_name = trid ));
 r=ranges(ex);
 s=start(r);
 e=end(r);
 st=as.logical(strand(ex)[1]=="-");
 ne=length(ex);
 sok=0;
 if (st) {
  for (j in (ne):1) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (ts <= width) {
      if (sok==0){
        mts=end-ts;
	mtsa=start;
	sok=1;sei=j;
      }
   } else{
     ts = ts- width;
   }
   if (te <= width)  break;
   te=te-width;
   }   
   return(c(st,end-te+1,mts+1,mtsa,end,sei,j));
} else {
   for (j in 1:ne) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
#   print (c(j,ts,te,start,end,sok))
   if (ts <= width) {
      if (sok==0){
        mts=start+ts;
	mtsa=end;
	sok=1;sei=j;
      }
   } else{
    ts = ts - width;
   }
   if (te <= width)  break;
   te=te-width;
  }   
  return(c(st,start+te-1,mts-1,mtsa,start,sei,j));
 }
}

mapAllTrRegions2Genome=function(gr_ranges){
 all_s=NULL;
 all_e=NULL;
 all_st=NULL;
 all_id=NULL;
 all_i=NULL;
 ids=as.vector(gr_ranges@seqnames)
 for (i in 1:length(ids)){
 print (i);
    m=mapTrRegion2Genome(start(gr_ranges)[i],end(gr_ranges)[i],ids[i]);
    d=m[6]-m[7];d=d*d;
    if (m[1]==0){st="-";}else{st="+";}
    all_id=append(all_id,ids[i]);
    all_st=append(all_st,st);
    all_i=append(all_i,i);
    if (d==0){
     all_s=append(all_s,m[3]);
     all_e=append(all_e,m[2]);
    } else {
      if (d==1){#span 1 intron
       all_s=append(all_s,m[3]);
       all_e=append(all_e,m[4]);
       all_id=append(all_id,ids[i]);
       all_s=append(all_s,m[5]);
       all_e=append(all_e,m[2]);   
       all_st=append(all_st,st);
       all_i=append(all_i,i);
      }else{
       print (c("Err cluster spans 2 introns",i))
      }
    }
 }
 return(cbind(all_id,all_s,all_e,all_st,all_i))
}
write.table(mm,"IFN-mapped.csv", sep="\t", row.names=FALSE)


mapTrRegion2Genome(803,832,"ENSMUST00000013766");

r=mapFromTranscripts(gr_ranges, tr)

df2 <- data.frame(seqnames=seqnames(r),
                 starts=start(r)-1,
                 ends=end(r),
                 names=c(rep(".", length(r))),
                 scores=c(rep(".", length(r))),
                 strands=strand(r))

write.table(df2, file="~/foo.bed", quote=F, sep="\t", row.names=F, col.names=F)


GR <- transcripts(txdb, filter=list(tx_name = "ENSMUST00000013766"))
GR <- exons(txdb, filter=list(tx_name = "ENSMUST00000013766"))
> GR
GRanges object with 7 ranges and 1 metadata column:
      seqnames               ranges strand |   exon_id
         <Rle>            <IRanges>  <Rle> | <integer>
  [1]        5 [31351008, 31351129]      + |     98246
  [2]        5 [31351835, 31351953]      + |     98252
  [3]        5 [31354570, 31354641]      + |     98254
  [4]        5 [31354835, 31354906]      + |     98256
  [5]        5 [31355136, 31355257]      + |     98258
  [6]        5 [31356334, 31356431]      + |     98261
  [7]        5 [31356636, 31357006]      + |     98267
  -------


GRList <- exonsBy(txdb, by = "tx")
tx_ids <- names(GRList)
head(select(txdb, keys=tx_ids, columns="TXNAME", keytype="TXID"))