setwd("~/giagkas/RNAseq_POLYseq/")
rp1=read.table("RNA-Poly_0htotal.txt", header = T, sep = "\t")
rp2=read.table("RNA-Poly_2htotal.txt", header = T, sep = "\t")
rp3=read.table("RNA-Poly_6htotal.txt", header = T, sep = "\t")
rp4=read.table("RNA-Poly_IFNtotal.txt", header = T, sep = "\t")
rp5=read.table("RNA-Poly_IL4total.txt", header = T, sep = "\t")

#DEGS
r1=subset(rp1, (rp1$wtRPKM.RNA>=1)&(rp1$PValue.RNA<0.05)&(abs(rp1$logFC.RNA)>log2(1.5)))
write.table(r1, file = "0hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r2=subset(rp2, (rp2$wtRPKM.RNA>=1)&(rp2$PValue.RNA<0.05)&(abs(rp2$logFC.RNA)>log2(1.5)))
write.table(r2, file = "2hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r3=subset(rp3, (rp3$wtRPKM.RNA>=1)&(rp3$PValue.RNA<0.05)&(abs(rp3$logFC.RNA)>log2(1.5)))
write.table(r3, file = "6hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r4=subset(rp4, (rp4$wtRPKM.RNA>=1)&(rp4$PValue.RNA<0.05)&(abs(rp4$logFC.RNA)>log2(1.5)))
write.table(r4, file = "IFNRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r5=subset(rp5, (rp5$wtRPKM.RNA>=1)&(rp5$PValue.RNA<0.05)&(abs(rp5$logFC.RNA)>log2(1.5)))
write.table(r5, file = "IL4RNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")

p1=subset(rp1, (rp1$wtRPKM.POLY>=1)&(rp1$wtRPKM.RNA>=1)&(rp1$PValue.POLY<0.05)&(abs(rp1$logFC.POLY)>log2(1.5)))
write.table(p1, file = "0hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p2=subset(rp2, (rp2$wtRPKM.POLY>=1)&(rp2$wtRPKM.RNA>=1)&(rp2$PValue.POLY<0.05)&(abs(rp2$logFC.POLY)>log2(1.5)))
write.table(p2, file = "2hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p3=subset(rp3, (rp3$wtRPKM.POLY>=1)&(rp3$wtRPKM.RNA>=1)&(rp3$PValue.POLY<0.05)&(abs(rp3$logFC.POLY)>log2(1.5)))
write.table(p3, file = "6hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p4=subset(rp4, (rp4$wtRPKM.POLY>=1)&(rp4$wtRPKM.RNA>=1)&(rp4$PValue.POLY<0.05)&(abs(rp4$logFC.POLY)>log2(1.5)))
write.table(p4, file = "IFNPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p5=subset(rp5, (rp5$wtRPKM.POLY>=1)&(rp5$wtRPKM.RNA>=1)&(rp5$PValue.POLY<0.05)&(abs(rp5$logFC.POLY)>log2(1.5)))
write.table(p5, file = "IL4POLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")

r1g=as.data.frame(r1$GeneName)
colnames(r1g)="GeneName"
r2g=as.data.frame(r2$GeneName)
colnames(r2g)="GeneName"
r3g=as.data.frame(r3$GeneName)
colnames(r3g)="GeneName"
r4g=as.data.frame(r4$GeneName)
colnames(r4g)="GeneName"
r5g=as.data.frame(r5$GeneName)
colnames(r5g)="GeneName"

p1g=as.data.frame(p1$GeneName)
colnames(p1g)="GeneName"
p2g=as.data.frame(p2$GeneName)
colnames(p2g)="GeneName"
p3g=as.data.frame(p3$GeneName)
colnames(p3g)="GeneName"
p4g=as.data.frame(p4$GeneName)
colnames(p4g)="GeneName"
p5g=as.data.frame(p5$GeneName)
colnames(p5g)="GeneName"


gn=rbind(r1g,r2g,r3g,r4g,r5g,p1g,p2g,p3g,p4g,p5g)
gn=subset(gn , !duplicated(gn$GeneName))

r1s=subset(rp1, select = c(1,2,4,21))
colnames(r1s)=c("GeneName", "logFC_RNA_0", "pval_RNA_0", "wtRPKM_RNA_0")
r2s=subset(rp2, select = c(1,2,4,21))
colnames(r2s)=c("GeneName", "logFC_RNA_2", "pval_RNA_2", "wtRPKM_RNA_2")
r3s=subset(rp3, select = c(1,2,4,21))
colnames(r3s)=c("GeneName", "logFC_RNA_6", "pval_RNA_6", "wtRPKM_RNA_6")
r4s=subset(rp4, select = c(1,2,4,21))
colnames(r4s)=c("GeneName", "logFC_RNA_IFN", "pval_RNA_IFN", "wtRPKM_RNA_IFN")
r5s=subset(rp5, select = c(1,2,4,21))
colnames(r5s)=c("GeneName", "logFC_RNA_IL4", "pval_RNA_IL4", "wtRPKM_RNA_IL4")


p1s=subset(rp1, select = c(1,10,12,23))
colnames(p1s)=c("GeneName", "logFC_POLY_0", "pval_POLY_0", "wtRPKM_POLY_0")
p2s=subset(rp2, select = c(1,10,12,23))
colnames(p2s)=c("GeneName", "logFC_POLY_2", "pval_POLY_2", "wtRPKM_POLY_2")
p3s=subset(rp3, select = c(1,10,12,23))
colnames(p3s)=c("GeneName", "logFC_POLY_6", "pval_POLY_6", "wtRPKM_POLY_6")
p4s=subset(rp4, select = c(1,10,12,23))
colnames(p4s)=c("GeneName", "logFC_POLY_IFN", "pval_POLY_IFN", "wtRPKM_POLY_IFN")
p5s=subset(rp5, select = c(1,10,12,23))
colnames(p5s)=c("GeneName", "logFC_POLY_IL4", "pval_POLY_IL4", "wtRPKM_POLY_IL4")



mr1=merge(gn, r1s, by="GeneName", all.x = T)
mr2=merge(mr1, r2s, by="GeneName", all.x = T)
mr3=merge(mr2, r3s, by="GeneName", all.x = T)
mr4=merge(mr3, r4s, by="GeneName", all.x = T)
mr5=merge(mr4, r5s, by="GeneName", all.x = T)

mr6=merge(mr5, p1s, by="GeneName", all.x = T)
mr7=merge(mr6, p2s, by="GeneName", all.x = T)
mr8=merge(mr7, p3s, by="GeneName", all.x = T)
mr9=merge(mr8, p4s, by="GeneName", all.x = T)
mr10=merge(mr9, p5s, by="GeneName", all.x = T)

#read gene biotype annotation
setwd("~/Documents/HuR_PARclip_withHeaders/reference_gtf/")
library(GenomicRanges)
library(rtracklayer)
GTFfile = "Mus_musculus.NCBIM37.64-toMM9.gtf"
GTF <- import.gff(GTFfile, format="gtf")
gna=elementMetadata(GTF)$gene_name
gbt=elementMetadata(GTF)$gene_biotype
z=cbind(as.data.frame(gna),as.data.frame(gbt))
z = z[!duplicated(z$gna),]
colnames(z)=c("GeneName", "Biotype")

####
setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
tt=merge(z,mr10, by="GeneName", all.y=T)
write.table(tt, file = "total_significant.txt", quote = F, row.names = F, sep="\t")


r02=nrow(subset(r1, r1$GeneName%in%r2$GeneName))
r06=nrow(subset(r1, r1$GeneName%in%r3$GeneName))
r0IF=nrow(subset(r1, r1$GeneName%in%r4$GeneName))
r0IL=nrow(subset(r1, r1$GeneName%in%r5$GeneName))
r26=nrow(subset(r2, r2$GeneName%in%r3$GeneName))
r2IF=nrow(subset(r2, r2$GeneName%in%r4$GeneName))
r2IL=nrow(subset(r2, r2$GeneName%in%r5$GeneName))
r6IF=nrow(subset(r3, r3$GeneName%in%r4$GeneName))
r6IL=nrow(subset(r3, r3$GeneName%in%r5$GeneName))
rII=nrow(subset(r4, r4$GeneName%in%r5$GeneName))

p02=nrow(subset(p1, p1$GeneName%in%p2$GeneName))
p06=nrow(subset(p1, p1$GeneName%in%p3$GeneName))
p0IF=nrow(subset(p1, p1$GeneName%in%p4$GeneName))
p0IL=nrow(subset(p1, p1$GeneName%in%p5$GeneName))
p26=nrow(subset(p2, p2$GeneName%in%p3$GeneName))
p2IF=nrow(subset(p2, p2$GeneName%in%p4$GeneName))
p2IL=nrow(subset(p2, p2$GeneName%in%p5$GeneName))
p6IF=nrow(subset(p3, p3$GeneName%in%p4$GeneName))
p6IL=nrow(subset(p3, p3$GeneName%in%p5$GeneName))
pII=nrow(subset(p4, p4$GeneName%in%p5$GeneName))

#RNA DEGs overlaps total
fr1=data.frame(nrow(r1),r02,r06,r0IF,r0IL)
colnames(fr1)=c("0h", "2h", "6h", "IFN", "IL4") 
fr2=data.frame(NA,nrow(r2),r26,r2IF,r2IL)
colnames(fr2)=c("0h", "2h", "6h", "IFN", "IL4") 
fr3=data.frame(NA,NA,nrow(r3),r6IF,r6IL)
colnames(fr3)=c("0h", "2h", "6h", "IFN", "IL4") 
fr4=data.frame(NA,NA,NA,nrow(r4),rII)
colnames(fr4)=c("0h", "2h", "6h", "IFN", "IL4") 
fr5=data.frame(NA,NA,NA,NA,nrow(r5))
colnames(fr5)=c("0h", "2h", "6h", "IFN", "IL4") 

df=rbind(fr1,fr2,fr3,fr4,fr5)
rownames(df)=c("0h", "2h", "6h", "IFN", "IL4") 

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(df, file = "overlaps_DEGs_RNAseqtotal.txt", quote = F, sep = "\t")

#RNA DEGs overlaps total percent
fr1=data.frame(nrow(r1)/nrow(r1),r02/nrow(r1),r06/nrow(r1),r0IF/nrow(r1),r0IL/nrow(r1))
colnames(fr1)=c("0h", "2h", "6h", "IFN", "IL4") 
fr2=data.frame(r02/nrow(r2),nrow(r2)/nrow(r2),r26/nrow(r2),r2IF/nrow(r2),r2IL/nrow(r2))
colnames(fr2)=c("0h", "2h", "6h", "IFN", "IL4") 
fr3=data.frame(r06/nrow(r3),r26/nrow(r3),nrow(r3)/nrow(r3),r6IF/nrow(r3),r6IL/nrow(r3))
colnames(fr3)=c("0h", "2h", "6h", "IFN", "IL4") 
fr4=data.frame(r0IF/nrow(r4),r2IF/nrow(r4),r6IF/nrow(r4),nrow(r4)/nrow(r4),rII/nrow(r4))
colnames(fr4)=c("0h", "2h", "6h", "IFN", "IL4") 
fr5=data.frame(r0IL/nrow(r5),r2IL/nrow(r5),r6IL/nrow(r5),rII/nrow(r5),nrow(r5)/nrow(r5))
colnames(fr5)=c("0h", "2h", "6h", "IFN", "IL4") 

dfp=rbind(fr1,fr2,fr3,fr4,fr5)
rownames(dfp)=c("0h", "2h", "6h", "IFN", "IL4") 
dfp=round(dfp,3)

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(dfp, file = "overlaps_DEGs_RNAseqtotalpercent.txt", quote = F, sep = "\t")


#Polysomes DEGs overlaps total
fp1=data.frame(nrow(p1),p02,p06,p0IF,p0IL)
colnames(fp1)=c("0h", "2h", "6h", "IFN", "IL4") 
fp2=data.frame(NA,nrow(p2),p26,p2IF,p2IL)
colnames(fp2)=c("0h", "2h", "6h", "IFN", "IL4") 
fp3=data.frame(NA,NA,nrow(p3),p6IF,p6IL)
colnames(fp3)=c("0h", "2h", "6h", "IFN", "IL4") 
fp4=data.frame(NA,NA,NA,nrow(p4),pII)
colnames(fp4)=c("0h", "2h", "6h", "IFN", "IL4") 
fp5=data.frame(NA,NA,NA,NA,nrow(p5))
colnames(fp5)=c("0h", "2h", "6h", "IFN", "IL4") 

df=rbind(fp1,fp2,fp3,fp4,fp5)
rownames(df)=c("0h", "2h", "6h", "IFN", "IL4") 

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(df, file = "overlaps_DEGs_POLYseqtotal.txt", quote = F, sep = "\t")


#poly DEGs overlaps total percent
fp1=data.frame(nrow(p1)/nrow(p1),p02/nrow(p1),p06/nrow(p1),p0IF/nrow(p1),p0IL/nrow(p1))
colnames(fp1)=c("0h", "2h", "6h", "IFN", "IL4") 
fp2=data.frame(p02/nrow(p2),nrow(p2)/nrow(p2),p26/nrow(p2),p2IF/nrow(p2),p2IL/nrow(p2))
colnames(fp2)=c("0h", "2h", "6h", "IFN", "IL4") 
fp3=data.frame(p06/nrow(p3),p26/nrow(p3),nrow(p3)/nrow(p3),p6IF/nrow(p3),p6IL/nrow(p3))
colnames(fp3)=c("0h", "2h", "6h", "IFN", "IL4") 
fp4=data.frame(p0IF/nrow(p4),p2IF/nrow(p4),p6IF/nrow(p4),nrow(p4)/nrow(p4),pII/nrow(p4))
colnames(fp4)=c("0h", "2h", "6h", "IFN", "IL4") 
fp5=data.frame(p0IL/nrow(p5),p2IL/nrow(p5),p6IL/nrow(p5),pII/nrow(p5),nrow(p5)/nrow(p5))
colnames(fp5)=c("0h", "2h", "6h", "IFN", "IL4") 

dfp=rbind(fp1,fp2,fp3,fp4,fp5)
rownames(dfp)=c("0h", "2h", "6h", "IFN", "IL4") 
dfp=round(dfp,3)

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(dfp, file = "overlaps_DEGs_POLYseqtotalpercent.txt", quote = F, sep = "\t")





#### protein coding
setwd("~/giagkas/RNAseq_POLYseq/")
rp1=read.table("RNA-Poly_0htotal.txt", header = T, sep = "\t")
rp2=read.table("RNA-Poly_2htotal.txt", header = T, sep = "\t")
rp3=read.table("RNA-Poly_6htotal.txt", header = T, sep = "\t")
rp4=read.table("RNA-Poly_IFNtotal.txt", header = T, sep = "\t")
rp5=read.table("RNA-Poly_IL4total.txt", header = T, sep = "\t")

#DEGS protein coding
r1=subset(rp1, (rp1$wtRPKM.RNA>=1)&(rp1$PValue.RNA<0.05)&(abs(rp1$logFC.RNA)>log2(1.5))&(rp1$Biotype=="protein_coding"))
r2=subset(rp2, (rp2$wtRPKM.RNA>=1)&(rp2$PValue.RNA<0.05)&(abs(rp2$logFC.RNA)>log2(1.5))&(rp2$Biotype=="protein_coding"))
r3=subset(rp3, (rp3$wtRPKM.RNA>=1)&(rp3$PValue.RNA<0.05)&(abs(rp3$logFC.RNA)>log2(1.5))&(rp3$Biotype=="protein_coding"))
r4=subset(rp4, (rp4$wtRPKM.RNA>=1)&(rp4$PValue.RNA<0.05)&(abs(rp4$logFC.RNA)>log2(1.5))&(rp4$Biotype=="protein_coding"))
r5=subset(rp5, (rp5$wtRPKM.RNA>=1)&(rp5$PValue.RNA<0.05)&(abs(rp5$logFC.RNA)>log2(1.5))&(rp5$Biotype=="protein_coding"))

p1=subset(rp1, (rp1$wtRPKM.POLY>=1)&(rp1$wtRPKM.RNA>=1)&(rp1$PValue.POLY<0.05)&(abs(rp1$logFC.POLY)>log2(1.5))&(rp1$Biotype=="protein_coding"))
p2=subset(rp2, (rp2$wtRPKM.POLY>=1)&(rp2$wtRPKM.RNA>=1)&(rp2$PValue.POLY<0.05)&(abs(rp2$logFC.POLY)>log2(1.5))&(rp2$Biotype=="protein_coding"))
p3=subset(rp3, (rp3$wtRPKM.POLY>=1)&(rp3$wtRPKM.RNA>=1)&(rp3$PValue.POLY<0.05)&(abs(rp3$logFC.POLY)>log2(1.5))&(rp3$Biotype=="protein_coding"))
p4=subset(rp4, (rp4$wtRPKM.POLY>=1)&(rp4$wtRPKM.RNA>=1)&(rp4$PValue.POLY<0.05)&(abs(rp4$logFC.POLY)>log2(1.5))&(rp4$Biotype=="protein_coding"))
p5=subset(rp5, (rp5$wtRPKM.POLY>=1)&(rp5$wtRPKM.RNA>=1)&(rp5$PValue.POLY<0.05)&(abs(rp5$logFC.POLY)>log2(1.5))&(rp5$Biotype=="protein_coding"))



r02=nrow(subset(r1, r1$GeneName%in%r2$GeneName))
r06=nrow(subset(r1, r1$GeneName%in%r3$GeneName))
r0IF=nrow(subset(r1, r1$GeneName%in%r4$GeneName))
r0IL=nrow(subset(r1, r1$GeneName%in%r5$GeneName))
r26=nrow(subset(r2, r2$GeneName%in%r3$GeneName))
r2IF=nrow(subset(r2, r2$GeneName%in%r4$GeneName))
r2IL=nrow(subset(r2, r2$GeneName%in%r5$GeneName))
r6IF=nrow(subset(r3, r3$GeneName%in%r4$GeneName))
r6IL=nrow(subset(r3, r3$GeneName%in%r5$GeneName))
rII=nrow(subset(r4, r4$GeneName%in%r5$GeneName))

p02=nrow(subset(p1, p1$GeneName%in%p2$GeneName))
p06=nrow(subset(p1, p1$GeneName%in%p3$GeneName))
p0IF=nrow(subset(p1, p1$GeneName%in%p4$GeneName))
p0IL=nrow(subset(p1, p1$GeneName%in%p5$GeneName))
p26=nrow(subset(p2, p2$GeneName%in%p3$GeneName))
p2IF=nrow(subset(p2, p2$GeneName%in%p4$GeneName))
p2IL=nrow(subset(p2, p2$GeneName%in%p5$GeneName))
p6IF=nrow(subset(p3, p3$GeneName%in%p4$GeneName))
p6IL=nrow(subset(p3, p3$GeneName%in%p5$GeneName))
pII=nrow(subset(p4, p4$GeneName%in%p5$GeneName))

#RNA DEGs overlaps 
fr1=data.frame(nrow(r1),r02,r06,r0IF,r0IL)
colnames(fr1)=c("0h", "2h", "6h", "IFN", "IL4") 
fr2=data.frame(NA,nrow(r2),r26,r2IF,r2IL)
colnames(fr2)=c("0h", "2h", "6h", "IFN", "IL4") 
fr3=data.frame(NA,NA,nrow(r3),r6IF,r6IL)
colnames(fr3)=c("0h", "2h", "6h", "IFN", "IL4") 
fr4=data.frame(NA,NA,NA,nrow(r4),rII)
colnames(fr4)=c("0h", "2h", "6h", "IFN", "IL4") 
fr5=data.frame(NA,NA,NA,NA,nrow(r5))
colnames(fr5)=c("0h", "2h", "6h", "IFN", "IL4") 

df=rbind(fr1,fr2,fr3,fr4,fr5)
rownames(df)=c("0h", "2h", "6h", "IFN", "IL4") 

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(df, file = "overlaps_DEGs_RNAseqPC.txt", quote = F, sep = "\t")

#RNA DEGs overlaps  percent
fr1=data.frame(nrow(r1)/nrow(r1),r02/nrow(r1),r06/nrow(r1),r0IF/nrow(r1),r0IL/nrow(r1))
colnames(fr1)=c("0h", "2h", "6h", "IFN", "IL4") 
fr2=data.frame(r02/nrow(r2),nrow(r2)/nrow(r2),r26/nrow(r2),r2IF/nrow(r2),r2IL/nrow(r2))
colnames(fr2)=c("0h", "2h", "6h", "IFN", "IL4") 
fr3=data.frame(r06/nrow(r3),r26/nrow(r3),nrow(r3)/nrow(r3),r6IF/nrow(r3),r6IL/nrow(r3))
colnames(fr3)=c("0h", "2h", "6h", "IFN", "IL4") 
fr4=data.frame(r0IF/nrow(r4),r2IF/nrow(r4),r6IF/nrow(r4),nrow(r4)/nrow(r4),rII/nrow(r4))
colnames(fr4)=c("0h", "2h", "6h", "IFN", "IL4") 
fr5=data.frame(r0IL/nrow(r5),r2IL/nrow(r5),r6IL/nrow(r5),rII/nrow(r5),nrow(r5)/nrow(r5))
colnames(fr5)=c("0h", "2h", "6h", "IFN", "IL4") 

dfp=rbind(fr1,fr2,fr3,fr4,fr5)
rownames(dfp)=c("0h", "2h", "6h", "IFN", "IL4") 
dfp=round(dfp,3)

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(dfp, file = "overlaps_DEGs_RNAseqPCpercent.txt", quote = F, sep = "\t")


#Polysomes DEGs overlaps 
fp1=data.frame(nrow(p1),p02,p06,p0IF,p0IL)
colnames(fp1)=c("0h", "2h", "6h", "IFN", "IL4") 
fp2=data.frame(NA,nrow(p2),p26,p2IF,p2IL)
colnames(fp2)=c("0h", "2h", "6h", "IFN", "IL4") 
fp3=data.frame(NA,NA,nrow(p3),p6IF,p6IL)
colnames(fp3)=c("0h", "2h", "6h", "IFN", "IL4") 
fp4=data.frame(NA,NA,NA,nrow(p4),pII)
colnames(fp4)=c("0h", "2h", "6h", "IFN", "IL4") 
fp5=data.frame(NA,NA,NA,NA,nrow(p5))
colnames(fp5)=c("0h", "2h", "6h", "IFN", "IL4") 

df=rbind(fp1,fp2,fp3,fp4,fp5)
rownames(df)=c("0h", "2h", "6h", "IFN", "IL4") 


setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(df, file = "overlaps_DEGs_POLYseqPC.txt", quote = F, sep = "\t")

#poly DEGs overlaps percent
fp1=data.frame(nrow(p1)/nrow(p1),p02/nrow(p1),p06/nrow(p1),p0IF/nrow(p1),p0IL/nrow(p1))
colnames(fp1)=c("0h", "2h", "6h", "IFN", "IL4") 
fp2=data.frame(p02/nrow(p2),nrow(p2)/nrow(p2),p26/nrow(p2),p2IF/nrow(p2),p2IL/nrow(p2))
colnames(fp2)=c("0h", "2h", "6h", "IFN", "IL4") 
fp3=data.frame(p06/nrow(p3),p26/nrow(p3),nrow(p3)/nrow(p3),p6IF/nrow(p3),p6IL/nrow(p3))
colnames(fp3)=c("0h", "2h", "6h", "IFN", "IL4") 
fp4=data.frame(p0IF/nrow(p4),p2IF/nrow(p4),p6IF/nrow(p4),nrow(p4)/nrow(p4),pII/nrow(p4))
colnames(fp4)=c("0h", "2h", "6h", "IFN", "IL4") 
fp5=data.frame(p0IL/nrow(p5),p2IL/nrow(p5),p6IL/nrow(p5),pII/nrow(p5),nrow(p5)/nrow(p5))
colnames(fp5)=c("0h", "2h", "6h", "IFN", "IL4") 

dfp=rbind(fp1,fp2,fp3,fp4,fp5)
rownames(dfp)=c("0h", "2h", "6h", "IFN", "IL4") 
dfp=round(dfp,3)

setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
write.table(dfp, file = "overlaps_DEGs_POLYseqPCpercent.txt", quote = F, sep = "\t")


#### overlaps in expression (WT)
r1=subset(rp1, (rp1$wtRPKM.RNA>=1)&(rp1$Biotype=="protein_coding"))
r2=subset(rp2, (rp2$wtRPKM.RNA>=1)&(rp2$Biotype=="protein_coding"))
r3=subset(rp3, (rp3$wtRPKM.RNA>=1)&(rp3$Biotype=="protein_coding"))
r4=subset(rp4, (rp4$wtRPKM.RNA>=1)&(rp4$Biotype=="protein_coding"))
r5=subset(rp5, (rp5$wtRPKM.RNA>=1)&(rp5$Biotype=="protein_coding"))

p1=subset(rp1, (rp1$wtRPKM.POLY>=1)&(rp1$Biotype=="protein_coding"))
p2=subset(rp2, (rp2$wtRPKM.POLY>=1)&(rp2$Biotype=="protein_coding"))
p3=subset(rp3, (rp3$wtRPKM.POLY>=1)&(rp3$Biotype=="protein_coding"))
p4=subset(rp4, (rp4$wtRPKM.POLY>=1)&(rp4$Biotype=="protein_coding"))
p5=subset(rp5, (rp5$wtRPKM.POLY>=1)&(rp5$Biotype=="protein_coding"))


m1=nrow(subset(r1, p1$GeneName%in%r1$GeneName))
m2=nrow(subset(r2, p2$GeneName%in%r2$GeneName))
m3=nrow(subset(r3, p3$GeneName%in%r3$GeneName))
m4=nrow(subset(r4, p4$GeneName%in%r4$GeneName))
m5=nrow(subset(r5, p5$GeneName%in%r5$GeneName))

#install.packages('VennDiagram')
library(VennDiagram)

png(filename = "0h_AbundVsTranslation_WT_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r1), area2 = nrow(p1), 
                   cross.area  = m1, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "2h_AbundVsTranslation_WT_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r2), area2 = nrow(p2), 
                   cross.area  = m2, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()


png(filename = "6h_AbundVsTranslation_WT_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r3), area2 = nrow(p3), 
                   cross.area  = m3, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "IFN_AbundVsTranslation_WT_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r4), area2 = nrow(p4), 
                   cross.area  = m4, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "IL4_AbundVsTranslation_WT_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r5), area2 = nrow(p5), 
                   cross.area  = m5, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()





#### overlaps in expression (KO)
r1=subset(rp1, (rp1$koRPKM.RNA>=1)&(rp1$Biotype=="protein_coding"))
r2=subset(rp2, (rp2$koRPKM.RNA>=1)&(rp2$Biotype=="protein_coding"))
r3=subset(rp3, (rp3$koRPKM.RNA>=1)&(rp3$Biotype=="protein_coding"))
r4=subset(rp4, (rp4$koRPKM.RNA>=1)&(rp4$Biotype=="protein_coding"))
r5=subset(rp5, (rp5$koRPKM.RNA>=1)&(rp5$Biotype=="protein_coding"))

p1=subset(rp1, (rp1$koRPKM.POLY>=1)&(rp1$Biotype=="protein_coding"))
p2=subset(rp2, (rp2$koRPKM.POLY>=1)&(rp2$Biotype=="protein_coding"))
p3=subset(rp3, (rp3$koRPKM.POLY>=1)&(rp3$Biotype=="protein_coding"))
p4=subset(rp4, (rp4$koRPKM.POLY>=1)&(rp4$Biotype=="protein_coding"))
p5=subset(rp5, (rp5$koRPKM.POLY>=1)&(rp5$Biotype=="protein_coding"))


m1=nrow(subset(r1, p1$GeneName%in%r1$GeneName))
m2=nrow(subset(r2, p2$GeneName%in%r2$GeneName))
m3=nrow(subset(r3, p3$GeneName%in%r3$GeneName))
m4=nrow(subset(r4, p4$GeneName%in%r4$GeneName))
m5=nrow(subset(r5, p5$GeneName%in%r5$GeneName))

#install.packages('VennDiagram')
library(VennDiagram)

png(filename = "0h_AbundVsTranslation_KO_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r1), area2 = nrow(p1), 
                   cross.area  = m1, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "2h_AbundVsTranslation_KO_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r2), area2 = nrow(p2), 
                   cross.area  = m2, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()


png(filename = "6h_AbundVsTranslation_KO_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r3), area2 = nrow(p3), 
                   cross.area  = m3, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "IFN_AbundVsTranslation_KO_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r4), area2 = nrow(p4), 
                   cross.area  = m4, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()

png(filename = "IL4_AbundVsTranslation_KO_PC.png", width = 800, height = 800)
grid.newpage()
draw.pairwise.venn(area1 = nrow(r5), area2 = nrow(p5), 
                   cross.area  = m5, category = c("Abundance", "Translation"), lty = "blank", 
                   fill = c("skyblue", "pink1"))

dev.off()


