setwd("~/giagkas/RNAseq_POLYseq/")
rp1=read.table("RNA-Poly_0htotal.txt", header = T, sep = "\t")
rp2=read.table("RNA-Poly_2htotal.txt", header = T, sep = "\t")
rp3=read.table("RNA-Poly_6htotal.txt", header = T, sep = "\t")
rp4=read.table("RNA-Poly_IFNtotal.txt", header = T, sep = "\t")
rp5=read.table("RNA-Poly_IL4total.txt", header = T, sep = "\t")

#DEGS
r1=subset(rp1, (rp1$wtRPKM.RNA>=1)&(rp1$PValue.RNA<0.05)&(abs(rp1$logFC.RNA)>log2(1.5)))
write.table(r1, file = "0hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r2=subset(rp2, (rp2$wtRPKM.RNA>=1)&(rp2$PValue.RNA<0.05)&(abs(rp2$logFC.RNA)>log2(1.5)))
write.table(r2, file = "2hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r3=subset(rp3, (rp3$wtRPKM.RNA>=1)&(rp3$PValue.RNA<0.05)&(abs(rp3$logFC.RNA)>log2(1.5)))
write.table(r3, file = "6hRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r4=subset(rp4, (rp4$wtRPKM.RNA>=1)&(rp4$PValue.RNA<0.05)&(abs(rp4$logFC.RNA)>log2(1.5)))
write.table(r4, file = "IFNRNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
r5=subset(rp5, (rp5$wtRPKM.RNA>=1)&(rp5$PValue.RNA<0.05)&(abs(rp5$logFC.RNA)>log2(1.5)))
write.table(r5, file = "IL4RNAseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")

p1=subset(rp1, (rp1$wtRPKM.POLY>=1)&(rp1$wtRPKM.RNA>=1)&(rp1$PValue.POLY<0.05)&(abs(rp1$logFC.POLY)>log2(1.5)))
write.table(p1, file = "0hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p2=subset(rp2, (rp2$wtRPKM.POLY>=1)&(rp2$wtRPKM.RNA>=1)&(rp2$PValue.POLY<0.05)&(abs(rp2$logFC.POLY)>log2(1.5)))
write.table(p2, file = "2hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p3=subset(rp3, (rp3$wtRPKM.POLY>=1)&(rp3$wtRPKM.RNA>=1)&(rp3$PValue.POLY<0.05)&(abs(rp3$logFC.POLY)>log2(1.5)))
write.table(p3, file = "6hPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p4=subset(rp4, (rp4$wtRPKM.POLY>=1)&(rp4$wtRPKM.RNA>=1)&(rp4$PValue.POLY<0.05)&(abs(rp4$logFC.POLY)>log2(1.5)))
write.table(p4, file = "IFNPOLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")
p5=subset(rp5, (rp5$wtRPKM.POLY>=1)&(rp5$wtRPKM.RNA>=1)&(rp5$PValue.POLY<0.05)&(abs(rp5$logFC.POLY)>log2(1.5)))
write.table(p5, file = "IL4POLYseq_RPKM_pval_logFC_filtered.txt", row.names = F,quote = F, sep = "\t")

r1g=as.data.frame(r1$GeneName)
colnames(r1g)="GeneName"
r2g=as.data.frame(r2$GeneName)
colnames(r2g)="GeneName"
r3g=as.data.frame(r3$GeneName)
colnames(r3g)="GeneName"
r4g=as.data.frame(r4$GeneName)
colnames(r4g)="GeneName"
r5g=as.data.frame(r5$GeneName)
colnames(r5g)="GeneName"

p1g=as.data.frame(p1$GeneName)
colnames(p1g)="GeneName"
p2g=as.data.frame(p2$GeneName)
colnames(p2g)="GeneName"
p3g=as.data.frame(p3$GeneName)
colnames(p3g)="GeneName"
p4g=as.data.frame(p4$GeneName)
colnames(p4g)="GeneName"
p5g=as.data.frame(p5$GeneName)
colnames(p5g)="GeneName"


gn=rbind(r1g,r2g,r3g,r4g,r5g,p1g,p2g,p3g,p4g,p5g)
gn=subset(gn , !duplicated(gn$GeneName))

r1s=subset(rp1, select = c(1,2,4,21,20))
colnames(r1s)=c("GeneName", "logFC_RNA_0", "pval_RNA_0", "wtRPKM_RNA_0","koRPKM_RNA_0")
r2s=subset(rp2, select = c(1,2,4,21,20))
colnames(r2s)=c("GeneName", "logFC_RNA_2", "pval_RNA_2", "wtRPKM_RNA_2","koRPKM_RNA_2")
r3s=subset(rp3, select = c(1,2,4,21,20))
colnames(r3s)=c("GeneName", "logFC_RNA_6", "pval_RNA_6", "wtRPKM_RNA_6","koRPKM_RNA_6")
r4s=subset(rp4, select = c(1,2,4,21,20))
colnames(r4s)=c("GeneName", "logFC_RNA_IFN", "pval_RNA_IFN", "wtRPKM_RNA_IFN","koRPKM_RNA_IFN")
r5s=subset(rp5, select = c(1,2,4,21,20))
colnames(r5s)=c("GeneName", "logFC_RNA_IL4", "pval_RNA_IL4", "wtRPKM_RNA_IL4","koRPKM_RNA_IL4")


p1s=subset(rp1, select = c(1,10,12,23,22))
colnames(p1s)=c("GeneName", "logFC_POLY_0", "pval_POLY_0", "wtRPKM_POLY_0","koRPKM_POLY_0")
p2s=subset(rp2, select = c(1,10,12,23,22))
colnames(p2s)=c("GeneName", "logFC_POLY_2", "pval_POLY_2", "wtRPKM_POLY_2","koRPKM_POLY_2")
p3s=subset(rp3, select = c(1,10,12,23,22))
colnames(p3s)=c("GeneName", "logFC_POLY_6", "pval_POLY_6", "wtRPKM_POLY_6","koRPKM_POLY_6")
p4s=subset(rp4, select = c(1,10,12,23,22))
colnames(p4s)=c("GeneName", "logFC_POLY_IFN", "pval_POLY_IFN", "wtRPKM_POLY_IFN","koRPKM_POLY_IFN")
p5s=subset(rp5, select = c(1,10,12,23,22))
colnames(p5s)=c("GeneName", "logFC_POLY_IL4", "pval_POLY_IL4", "wtRPKM_POLY_IL4","koRPKM_POLY_IL4")



mr1=merge(gn, r1s, by="GeneName", all.x = T)
mr2=merge(mr1, r2s, by="GeneName", all.x = T)
mr3=merge(mr2, r3s, by="GeneName", all.x = T)
mr4=merge(mr3, r4s, by="GeneName", all.x = T)
mr5=merge(mr4, r5s, by="GeneName", all.x = T)

mr6=merge(mr5, p1s, by="GeneName", all.x = T)
mr7=merge(mr6, p2s, by="GeneName", all.x = T)
mr8=merge(mr7, p3s, by="GeneName", all.x = T)
mr9=merge(mr8, p4s, by="GeneName", all.x = T)
mr10=merge(mr9, p5s, by="GeneName", all.x = T)

#read gene biotype annotation
setwd("~/Documents/HuR_PARclip_withHeaders/reference_gtf/")
library(GenomicRanges)
library(rtracklayer)
GTFfile = "Mus_musculus.NCBIM37.64-toMM9.gtf"
GTF <- import.gff(GTFfile, format="gtf")
gna=elementMetadata(GTF)$gene_name
gbt=elementMetadata(GTF)$gene_biotype
z=cbind(as.data.frame(gna),as.data.frame(gbt))
z = z[!duplicated(z$gna),]
colnames(z)=c("GeneName", "Biotype")

####
setwd("~/giagkas/RNAseq_POLYseq/overlaps/")
tt=merge(z,mr10, by="GeneName", all.y=T)
write.table(tt, file = "total_significant_v2.txt", quote = F, row.names = F, sep="\t")



