library(doParallel)

# create a cluster with cl nodes
nthr=10
cl <- makeCluster(nthr,outfile="par.log")
registerDoParallel(cl)

# load the specific libraries to all cluster nodes
clusterEvalQ(cl, {library(GenomicRanges);library(GenomicAlignments);library(GenomicFeatures)})
nthr=10



 
mapTrRegion2Genome=function(ts,te,trid,txdb){
#print(txdb)
#print(trid)
ex=exons(txdb, filter=list(tx_name = trid ));
# print("A");
 r=ranges(ex);
 s=start(r);
 e=end(r);rl=NULL;
 st=as.logical(strand(ex)[1]=="-");
 st2=as.character(strand(ex)[1]);
 sn=as.character(seqnames(ex)[1]);
 if (sn=="MT"){sn="M";}
 sn=paste("chr",sn,sep="");
 ne=length(ex);
 sok=0;
# print(c("B",s,e,st2)); 
 if (st) {
  for (j in (ne):1) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (sok==1){
    rl=append(rl,end);
   }
   if (ts <= width) {
     if (sok==0){
       mts=end-ts;
       mtsa=start;
       rl=c(mts+1);
       sok=1;sei=j;
     }
   } else{
     ts = ts- width;
   }
   if (te <= width)  {
     rl=append(rl,end-te+1);	
     break;
   } else {
     te=te-width;
     if (sok==1){
      rl=append(rl,start);
     }
   }
  }   
  return(c(st2,sn,rl));
} else {
   for (j in 1:ne) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (sok==1){
    rl=append(rl,start);
   }
#   print (c(j,ts,te,start,end,sok))
   if (ts <= width) {
      if (sok==0){
        mts=start+ts;
        rl=c(mts-1);
	mtsa=end;
	sok=1;sei=j;
      }
   } else{
    ts = ts - width;
   }
   if (te <= width) {
     rl=append(rl,start+te-1);	   
     break;
   } else {     
     te=te-width;
     if (sok==1){
      rl=append(rl,end);
     }
   }
  }   
  return(c(st2,sn,rl));
 }
}

mapAllTrRegions2Genome=function(gr_ranges,txdb){
 all_s=NULL;
 all_e=NULL;
 all_st=NULL;
 all_id=NULL;
 all_sn=NULL;
 all_i=NULL;
 ids=as.vector(gr_ranges@seqnames)
 for (i in 1:length(ids)){
    m=mapTrRegion2Genome(start(gr_ranges)[i],end(gr_ranges)[i],ids[i],txdb);
    for (r in seq(3,length(m),2)){
      all_id=append(all_id,ids[i]);
      all_st=append(all_st,m[1]);
      all_i=append(all_i,i);
      all_sn=append(all_sn,m[2]);
      all_s=append(all_s,m[r]);
      all_e=append(all_e,m[r+1]);
    }
 }
 return(cbind(all_sn,all_s,all_e,all_st,all_id,all_i))
}


#list=foreach(i=1:nthr, .combine=rbind,.packages=c("mapAllTrRegions2Genome","mapTrRegion2Genome","GenomicRanges","GenomicAlignments","GenomicFeatures")) %dopar%
list=foreach(i=1:nthr, .combine=rbind,.packages=c("GenomicRanges","GenomicAlignments","GenomicFeatures")) %dopar%
{
setwd("/data/results/reference/mmu")

txdb <- makeTxDbFromGFF(file ="Mus_musculus.NCBIM37.64.gtf", format= "gtf", dataSource="Mus_musculus.NCBIM37.64", organism= "Mus musculus")
read_csv<-read.csv(file="/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings_gt5tc_gt0.25TtoC2/sh-clusters-IFN.txt2.csv.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv", sep="", header = FALSE)

m_cols<-data.frame(GeneName=read_csv[,1], CDS_start=read_csv[,2],CDS_end=read_csv[,3])
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6], mcols=m_cols )
df<-data.frame(seqnames=read_csv[,4], start=read_csv[,5], end=read_csv[,6])
gr_ranges<-makeGRangesFromDataFrame(df)
lra=length(gr_ranges)
nch=round(lra/nthr)

s=(i-1)*nch+1;
  if(i<nthr) {
   e=i*nch;
  }else{
   e=lra;
  }
  print (c(i,s,e))
  g=gr_ranges[s:e];
#  c(i,s,e)
#  mapAllTrRegions2Genome(g,txdb)
 all_s=NULL;
 all_e=NULL;
 all_st=NULL;
 all_id=NULL;
 all_sn=NULL;
 all_i=NULL;
 ids=as.vector(g@seqnames)
 for (x in 1:length(ids)){
     ts=start(g)[x];te=end(g)[x];trid=ids[x];
ex=exons(txdb, filter=list(tx_name = trid ));
# print("A");
 r=ranges(ex);
 s=start(r);
 e=end(r);rl=NULL;
 st=as.logical(strand(ex)[1]=="-");
 st2=as.character(strand(ex)[1]);
 sn=as.character(seqnames(ex)[1]);
 if (sn=="MT"){sn="M";}
 sn=paste("chr",sn,sep="");
 ne=length(ex);
 sok=0;
# print(c("B",s,e,st2)); 
 if (st) {
  for (j in (ne):1) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (sok==1){
    rl=append(rl,end);
   }
   if (ts <= width) {
     if (sok==0){
       mts=end-ts;
       mtsa=start;
       rl=c(mts+1);
       sok=1;sei=j;
     }
   } else{
     ts = ts- width;
   }
   if (te <= width)  {
     rl=append(rl,end-te+1);	
     break;
   } else {
     te=te-width;
     if (sok==1){
      rl=append(rl,start);
     }
   }
  }   
  m=(c(st2,sn,rl));
} else {
   for (j in 1:ne) {
   start = s[j]
   end = e[j];
   width = end - start + 1;
   if (sok==1){
    rl=append(rl,start);
   }
#   print (c(j,ts,te,start,end,sok))
   if (ts <= width) {
      if (sok==0){
        mts=start+ts;
        rl=c(mts-1);
	mtsa=end;
	sok=1;sei=j;
      }
   } else{
    ts = ts - width;
   }
   if (te <= width) {
     rl=append(rl,start+te-1);	   
     break;
   } else {     
     te=te-width;
     if (sok==1){
      rl=append(rl,end);
     }
   }
  }   
       m=(c(st2,sn,rl));
 }

    for (r in seq(3,length(m),2)){
      all_id=append(all_id,ids[x]);
      all_st=append(all_st,m[1]);
      all_i=append(all_i,x);
      all_sn=append(all_sn,m[2]);
      all_s=append(all_s,m[r]);
      all_e=append(all_e,m[r+1]);
    }
 }
 cbind(all_sn,all_s,all_e,all_st,all_id,all_i)
}

# stop the cluster
stopCluster(cl)

write.table(list,"IFN-mapped3.csv", sep="\t", row.names=FALSE)





mm=mapAllTrRegions2Genome(gr_ranges)
write.table(mm,"IFN-mapped2.csv", sep="\t", row.names=FALSE)
