setwd("~/Documents/HuR_PARclip_withHeaders/reference_gtf/")
library(GenomicFeatures)
library(GenomicRanges)
library(rtracklayer)
GTFfile = "Mus_musculus.NCBIM37.64-toMM9.gtf"
txdb <- makeTxDbFromGFF(GTFfile, format="gtf") 
utr5=fiveUTRsByTranscript(txdb, use.names=T)
utr3=threeUTRsByTranscript(txdb, use.names=T)

d1=read.csv("IL4-q20.peaks",header=T,sep="\t")
colnames(d1)[1]="Chr"
I0=IRanges(d1$Start,d1$End)
R0=Rle(d1$Chr)
S0=Rle(d1$Strand)
meta0=subset(d1, select=c("GeneName", "TranscriptID","customID"))
gr0=GRanges(R0,I0,S0, meta0)

t1=subsetByOverlaps(gr0, utr3)
n_occur <- data.frame(table(elementMetadata(t1)$TranscriptID))
n_occur[n_occur$Freq> 1,]

t2=subsetByOverlaps(gr0, utr5)
n_occur <- data.frame(table(elementMetadata(t2)$TranscriptID))
n_occur[n_occur$Freq> 1,]


t3=subsetByOverlaps(gr0,cds(txdb))
n_occur <- data.frame(table(elementMetadata(t3)$TranscriptID))
n_occur[n_occur$Freq> 1,]

dd1=as.data.frame(elementMetadata(t1)$customID)
colnames(dd1)="customID"
dd2=as.data.frame(elementMetadata(t2)$customID)
colnames(dd2)="customID"
dd3=as.data.frame(elementMetadata(t3)$customID)
colnames(dd3)="customID"

dd=d1
dd$utr3=d1$customID%in%dd1$customID
summary(dd$utr3)
dd$utr5=dd$customID%in%dd2$customID
summary(dd$utr5)
dd$cds=dd$customID%in%dd3$customID
summary(dd$cds)
setwd("~/giagkas/ASPEAK/IL4_v2/")
write.table(dd, file = "IL4-q20.peaks_annot_v2.txt", quote=F, row.names = F, sep = "\t" )