#ENSMUSG00000079102	Dab2	1797	protein_coding
#ENSMUSG00000022150	Dab2	2776.27	protein_coding
setwd("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq-vs-polysomeseq/")
require(edgeR)
bt=read.table("/data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.transcript-gene_name.txt",sep="\t")
rownames(bt)=bt$V2;
%bt=bt[,(names(x) %in% c("V2","V4")]
bt2=cbind(substring(bt[,2],2),as.character(bt[,4]))
colnames(bt2)=c("EnsemblID","biotype")
rownames(bt2)=bt2[,1]

gl=read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/deg/Mus_musculus.NCBIM37.64-toMM9.gene-lenghts.txt",sep="\t")
gl.names=gl$V1;
rownames(gl)=gl$V1
colnames(gl)=c("ID","geneID","genelength");

#poly1= read.table("gene_counts_merged_k20_NCBIM37.64.txt",header=T,sep="\t",skip=1)
poly1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/Polysomes/deg/gene_counts_merged_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
poly0 =poly1[,7:dim(poly1)[2]];
rownames(poly0)=poly1$Geneid;

col_ordering = c(1,2,11,12)#0h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_0h_replicate_I",      
"CPM_KO_0h_replicate_II",    "CPM_WT_0h_replicate_I",      
"CPM_WT_0h_replicate_II",    "RPKM_KO_0h_replicate_I",      
"RPKM_KO_0h_replicate_II",    "RPKM_WT_0h_replicate_I",      
"RPKM_WT_0h_replicate_II",    "RPKM_KO_0h",  
"RPKM_WT_0h")
#x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(x, file="edgeRgenesPolyExpr-0h.csv",quote=FALSE,row.names=FALSE,sep="\t")
x_m0=x
print ("P-m0")
#-1 means WT > KO # 1  means KO > WT
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))

for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }

col_ordering = c(3,4,13,14)#2h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_2h_replicate_I",      
"CPM_KO_2h_replicate_II",    "CPM_WT_2h_replicate_I",      
"CPM_WT_2h_replicate_II",    "RPKM_KO_2h_replicate_I",      
"RPKM_KO_2h_replicate_II",    "RPKM_WT_2h_replicate_I",      
"RPKM_WT_2h_replicate_II",    "RPKM_KO_2h",  
"RPKM_WT_2h")#2h
#x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(x, file="edgeRgenesPolyExpr-2h.csv",quote=FALSE,row.names=FALSE)
x_2h=x
print ("P-2h")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }

col_ordering = c(5,6,15,16)#6h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_6h_replicate_I",      
"CPM_KO_6h_replicate_II",    "CPM_WT_6h_replicate_I",      
"CPM_WT_6h_replicate_II",    "RPKM_KO_6h_replicate_I",      
"RPKM_KO_6h_replicate_II",    "RPKM_WT_6h_replicate_I",      
"RPKM_WT_6h_replicate_II",    "RPKM_KO_6h",  
"RPKM_WT_6h")#6h
#x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(x, file="edgeRgenesPolyExpr-6h.csv",quote=FALSE,row.names=FALSE)
x_6h=x
print ("P-6h")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))

for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }


col_ordering = c(7,8,17,18)#IFN
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m1_replicate_I",      
"CPM_KO_m1_replicate_II",    "CPM_WT_m1_replicate_I",      
"CPM_WT_m1_replicate_II",    "RPKM_KO_m1_replicate_I",      
"RPKM_KO_m1_replicate_II",    "RPKM_WT_m1_replicate_I",      
"RPKM_WT_m1_replicate_II",    "RPKM_KO_m1",  
"RPKM_WT_m1")#m1
#x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(x, file="edgeRgenesPolyExpr-IFN.csv")
print ("P-m1")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
x_m1=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }
summary(de <- decideTestsDGE(et, p=0.05))
#-1    24
#0  19748
#1     34

col_ordering = c(9,10,19,20)#IL4
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m2_replicate_I",      
"CPM_KO_m2_replicate_II",    "CPM_WT_m2_replicate_I",      
"CPM_WT_m2_replicate_II",    "RPKM_KO_m2_replicate_I",      
"RPKM_KO_m2_replicate_II",    "RPKM_WT_m2_replicate_I",      
"RPKM_WT_m2_replicate_II",    "RPKM_KO_m2",  
"RPKM_WT_m2")#m2
#x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE)
#x=x[!duplicated(x),]
write.table(x, file="edgeRgenesPolyExpr-IL4.csv")
x_m2=x
print ("P-m2")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
#dim(x_m2)
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }
#write.table(x, file="edgeRgenesExpr-IL4.csv")
summary(de <- decideTestsDGE(et, p=0.05))
#-1   215
#0  18093
#1    106
#summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))




#rnaseq data
rnaseqMatrix1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]];
rownames(rnaseqMatrix0)=rnaseqMatrix1$Geneid;

col_ordering = c(11,12,1,2)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
#exp2=vst(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#rx=cbind(as.data.frame(tTags), gl[rownames(tTags),2],gl[rownames(tTags),3],c,1000*c/gl[rownames(tTags),3])
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_0h_replicate_I",      
"CPM_KO_0h_replicate_II",    "CPM_WT_0h_replicate_I",      
"CPM_WT_0h_replicate_II",    "RPKM_KO_0h_replicate_I",      
"RPKM_KO_0h_replicate_II",    "RPKM_WT_0h_replicate_I",      
"RPKM_WT_0h_replicate_II",    "RPKM_KO_0h",  
"RPKM_WT_0h")
#rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(rx, file="edgeRgenesExpr-0h.csv",quote=FALSE,row.names=FALSE)
rx_m0=rx

print ("R-m0")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }


col_ordering = c(13,14,3,4)#2h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_2h_replicate_I",      
"CPM_KO_2h_replicate_II",    "CPM_WT_2h_replicate_I",      
"CPM_WT_2h_replicate_II",    "RPKM_KO_2h_replicate_I",      
"RPKM_KO_2h_replicate_II",    "RPKM_WT_2h_replicate_I",      
"RPKM_WT_2h_replicate_II",    "RPKM_KO_2h",  
"RPKM_WT_2h")#2h
#rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(rx, file="edgeRgenesExpr-2h.csv",quote=FALSE,row.names=FALSE)
rx_2h=rx
print ("R-2h")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }


col_ordering = c(15,16,5,6)#6h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_6h_replicate_I",      
"CPM_KO_6h_replicate_II",    "CPM_WT_6h_replicate_I",      
"CPM_WT_6h_replicate_II",    "RPKM_KO_6h_replicate_I",      
"RPKM_KO_6h_replicate_II",    "RPKM_WT_6h_replicate_I",      
"RPKM_WT_6h_replicate_II",    "RPKM_KO_6h",  
"RPKM_WT_6h")#6h
#rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(rx, file="edgeRgenesExpr-6h.csv",quote=FALSE,row.names=FALSE)
rx_6h=rx
print ("R-6h")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }

col_ordering = c(17,18,7,8)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m1_replicate_I",      
"CPM_KO_m1_replicate_II",    "CPM_WT_m1_replicate_I",      
"CPM_WT_m1_replicate_II",    "RPKM_KO_m1_replicate_I",      
"RPKM_KO_m1_replicate_II",    "RPKM_WT_m1_replicate_I",      
"RPKM_WT_m1_replicate_II",    "RPKM_KO_m1",  
"RPKM_WT_m1")#m1
#rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(rx, file="edgeRgenesExpr-IFN.csv",quote=FALSE,row.names=FALSE)
rx_m1=rx
print ("R-m1")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }

col_ordering = c(19,20,9,10)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m2_replicate_I",      
"CPM_KO_m2_replicate_II",    "CPM_WT_m2_replicate_I",      
"CPM_WT_m2_replicate_II",    "RPKM_KO_m2_replicate_I",      
"RPKM_KO_m2_replicate_II",    "RPKM_WT_m2_replicate_I",      
"RPKM_WT_m2_replicate_II",    "RPKM_KO_m2",  
"RPKM_WT_m2")#m2
#rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE)
write.table(rx, file="edgeRgenesExpr-IL4.csv",quote=FALSE,row.names=FALSE)
rx_m2=rx
print ("R-m2")
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none"))
summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH"))
summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH"))

all_m0=merge(x=rx_m0,y=x_m0,by="EnsemblID",all.x=T,all.y=T)
write.table(all_m0, file="rna_poly_m0.csv",quote=FALSE,row.names=FALSE)
all_m1=merge(x=rx_m1,y=x_m1,by="EnsemblID",all.x=T,all.y=T)
write.table(all_m1, file="rna_poly_m1.csv",quote=FALSE,row.names=FALSE)
all_m2=merge(x=rx_m2,y=x_m2,by="EnsemblID",all.x=T,all.y=T)
write.table(all_m2, file="rna_poly_m2.csv",quote=FALSE,row.names=FALSE)
all_2h=merge(x=rx_2h,y=x_2h,by="EnsemblID",all.x=T,all.y=T)
write.table(all_2h, file="rna_poly_2h.csv",quote=FALSE,row.names=FALSE)
all_6h=merge(x=rx_6h,y=x_6h,by="EnsemblID",all.x=T,all.y=T)
write.table(all_6h, file="rna_poly_6h.csv",quote=FALSE,row.names=FALSE)

#write DEG tables
deg=all_m0[(all_m0$PValue.x<=0.05)&(!is.na(all_m0$PValue.x)),]
write.table(deg, file="rna_poly_m0_rna_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m0[(all_m0$PValue.y<=0.05)&(!is.na(all_m0$PValue.y)),]
write.table(deg, file="rna_poly_m0_poly_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),]
write.table(deg, file="rna_poly_m0_rna_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m0[(all_m0$FDR.y<=0.05)&(!is.na(all_m0$FDR.y)),]
write.table(deg, file="rna_poly_m0_poly_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m0[(all_m0$FDR.x<=0.1)&(!is.na(all_m0$FDR.x)),]
write.table(deg, file="rna_poly_m0_rna_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)
deg=all_m0[(all_m0$FDR.y<=0.1)&(!is.na(all_m0$FDR.y)),]
write.table(deg, file="rna_poly_m0_poly_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)

deg=all_m1[(all_m1$PValue.x<=0.05)&(!is.na(all_m1$PValue.x)),]
write.table(deg, file="rna_poly_m1_rna_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m1[(all_m1$PValue.y<=0.05)&(!is.na(all_m1$PValue.y)),]
write.table(deg, file="rna_poly_m1_poly_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m1[(all_m1$FDR.x<=0.05)&(!is.na(all_m1$FDR.x)),]
write.table(deg, file="rna_poly_m1_rna_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m1[(all_m1$FDR.y<=0.05)&(!is.na(all_m1$FDR.y)),]
write.table(deg, file="rna_poly_m1_poly_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m1[(all_m1$FDR.x<=0.1)&(!is.na(all_m1$FDR.x)),]
write.table(deg, file="rna_poly_m1_rna_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)
deg=all_m1[(all_m1$FDR.y<=0.1)&(!is.na(all_m1$FDR.y)),]
write.table(deg, file="rna_poly_m1_poly_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)

deg=all_m2[(all_m2$PValue.x<=0.05)&(!is.na(all_m2$PValue.x)),]
write.table(deg, file="rna_poly_m2_rna_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m2[(all_m2$PValue.y<=0.05)&(!is.na(all_m2$PValue.y)),]
write.table(deg, file="rna_poly_m2_poly_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),]
write.table(deg, file="rna_poly_m2_rna_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m2[(all_m2$FDR.y<=0.05)&(!is.na(all_m2$FDR.y)),]
write.table(deg, file="rna_poly_m2_poly_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_m2[(all_m2$FDR.x<=0.1)&(!is.na(all_m2$FDR.x)),]
write.table(deg, file="rna_poly_m2_rna_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)
deg=all_m2[(all_m2$FDR.y<=0.1)&(!is.na(all_m2$FDR.y)),]
write.table(deg, file="rna_poly_m2_poly_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)

deg=all_2h[(all_2h$PValue.x<=0.05)&(!is.na(all_2h$PValue.x)),]
write.table(deg, file="rna_poly_2h_rna_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_2h[(all_2h$PValue.y<=0.05)&(!is.na(all_2h$PValue.y)),]
write.table(deg, file="rna_poly_2h_poly_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_2h[(all_2h$FDR.x<=0.05)&(!is.na(all_2h$FDR.x)),]
write.table(deg, file="rna_poly_2h_rna_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_2h[(all_2h$FDR.y<=0.05)&(!is.na(all_2h$FDR.y)),]
write.table(deg, file="rna_poly_2h_poly_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_2h[(all_2h$FDR.x<=0.1)&(!is.na(all_2h$FDR.x)),]
write.table(deg, file="rna_poly_2h_rna_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)
deg=all_2h[(all_2h$FDR.y<=0.1)&(!is.na(all_2h$FDR.y)),]
write.table(deg, file="rna_poly_2h_poly_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)

deg=all_6h[(all_6h$PValue.x<=0.05)&(!is.na(all_6h$PValue.x)),]
write.table(deg, file="rna_poly_6h_rna_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_6h[(all_6h$PValue.y<=0.05)&(!is.na(all_6h$PValue.y)),]
write.table(deg, file="rna_poly_6h_poly_deg_pval.csv",quote=FALSE,row.names=FALSE)
deg=all_6h[(all_6h$FDR.x<=0.05)&(!is.na(all_6h$FDR.x)),]
write.table(deg, file="rna_poly_6h_rna_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_6h[(all_6h$FDR.y<=0.05)&(!is.na(all_6h$FDR.y)),]
write.table(deg, file="rna_poly_6h_poly_deg_fdr_0.05.csv",quote=FALSE,row.names=FALSE)
deg=all_6h[(all_6h$FDR.x<=0.1)&(!is.na(all_6h$FDR.x)),]
write.table(deg, file="rna_poly_6h_rna_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)
deg=all_6h[(all_6h$FDR.y<=0.1)&(!is.na(all_6h$FDR.y)),]
write.table(deg, file="rna_poly_6h_poly_deg_fdr_0.1.csv",quote=FALSE,row.names=FALSE)


#all_m2=merge(x=rx_m2,y=x_m2,by="EnsemblID",all.y=T)
#write.table(all_m2, file="rna_poly_m2.csv",quote=FALSE,row.names=FALSE)

plot (all_m0$logFC.x,all_m0$logFC.y,pch=".")
fit <- lm(logFC.x ~ logFC.y,data=all_m0)
summary(fit)
abline(fit,col="red")

cor.test(all_m0$logFC.x,all_m0$logFC.y)
t = 13.899, df = 14786, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.09762432 0.12944386
sample estimates:
      cor 
0.1135632 
#@a slight but significant correlation for m0

plot (all_m0$logFC.y,all_m0$logFC.x,pch=".")
fit <- lm(logFC.y ~ logFC.x,data=all_m0)
summary(fit)
abline(fit,col="red")
cor.test(all_m0$logFC.y,all_m0$logFC.x)

plot (all_m1$logFC.y,all_m1$logFC.x,pch=".")
fit <- lm(logFC.y ~ logFC.x,data=all_m1)
summary(fit)
abline(fit,col="red")
cor.test(all_m1$logFC.y,all_m1$logFC.x)

plot (all_m2$logFC.y,all_m2$logFC.x,pch=".")
fit <- lm(logFC.y ~ logFC.x,data=all_m2)
summary(fit)
abline(fit,col="red")
cor.test(all_m2$logFC.y,all_m2$logFC.x)

cor.test(all_2h$logFC.y,all_2h$logFC.x)
t = -3.0899, df = 14628, p-value = 0.002006
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.041726743 -0.009339115
sample estimates:
        cor 
-0.02553963 
#@a very slight but significant ANTIcorrelation for 2h

cor.test(all_6h$logFC.y,all_6h$logFC.x)
t = -2.0075, df = 14619, p-value = 0.04472
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.0328015800 -0.0003917769
sample estimates:
        cor 
-0.01660104 
#@a very slight but significant ANTIcorrelation for 6h


lim_m1=all_m1[ (all_m1$logFC.x>-6)&(all_m1$logFC.x<6)&(all_m1$logFC.y>-6)&(all_m1$logFC.y<6),]
cor.test(lim_m1$logFC.y,lim_m1$logFC.x)
t = -0.5662, df = 14159, p-value = 0.5713
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.02122711  0.01171323
sample estimates:
         cor 
-0.004758232 

lim_m1=all_m1[ (all_m1$logFC.x>-4)&(all_m1$logFC.x<4)&(all_m1$logFC.y>-4)&(all_m1$logFC.y<4),]
cor.test(lim_m1$logFC.y,lim_m1$logFC.x)
t = -0.6534, df = 14150, p-value = 0.5135
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.02196657  0.01098400
sample estimates:
         cor 
-0.005492776 

lim_m1=all_m1[ (all_m1$logFC.x>-2)&(all_m1$logFC.x<2)&(all_m1$logFC.y>-2)&(all_m1$logFC.y<2),]
cor.test(lim_m1$logFC.y,lim_m1$logFC.x)
t = -1.9938, df = 13877, p-value = 0.0462
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.0335499580 -0.0002854185
sample estimates:
        cor 
-0.01692237 

#@a very slight but significant ANTIcorrelation for m1 at abs(logFC) <2

lim_m2=all_m2[ (all_m2$logFC.x>-6)&(all_m2$logFC.x<6)&(all_m2$logFC.y>-6)&(all_m2$logFC.y<6),]
cor.test(lim_m2$logFC.y,lim_m2$logFC.x)
t = -0.5834, df = 14380, p-value = 0.5596
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.02120682  0.01147941
sample estimates:
         cor 
-0.004865003 

lim_m2=all_m2[ (all_m2$logFC.x>-4)&(all_m2$logFC.x<4)&(all_m2$logFC.y>-4)&(all_m2$logFC.y<4),]
cor.test(lim_m2$logFC.y,lim_m2$logFC.x)
t = -0.70469, df = 14372, p-value = 0.481
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.02222392  0.01047104
sample estimates:
         cor 
-0.005878011 

lim_m2=all_m2[ (all_m2$logFC.x>-2)&(all_m2$logFC.x<2)&(all_m2$logFC.y>-2)&(all_m2$logFC.y<2),]
cor.test(lim_m2$logFC.y,lim_m2$logFC.x)
t = -0.055904, df = 13870, p-value = 0.9554
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.01711578  0.01616667
sample estimates:
          cor 
-0.0004746831 

lim_m2=all_m2[ (all_m2$logFC.x>-1)&(all_m2$logFC.x<1)&(all_m2$logFC.y>-1)&(all_m2$logFC.y<1),]
cor.test(lim_m2$logFC.y,lim_m2$logFC.x)
t = -2.3985, df = 10747, p-value = 0.01648
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.04201649 -0.00422704
sample estimates:
        cor 
-0.02313003 
#@a very slight but significant ANTIcorrelation for m1 at abs(logFC) <1

#low Poly expression, test RNA
lpWT_m2=all_m2[ (abs(log2(1+all_m2$RPKM_WT_m2.x))<1),]
lpKO_m2=all_m2[ (abs(log2(1+all_m2$RPKM_KO_m2.x))<1),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F)


#low Poly expression, test RNA
lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<2),]
lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<2),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F)

#low Poly expression, test RNA
lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<5),]
lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<5),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F)

lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<1)&((log2(1+all_m2$RPKM_WT_m2.x))>0.5),]
lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<1)&((log2(1+all_m2$RPKM_WT_m2.x))>0.5),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F)


lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<4)&((log2(1+all_m2$RPKM_WT_m2.y))<4),]
lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<4)&((log2(1+all_m2$RPKM_WT_m2.y))<4),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F,range=1)

lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))>4)|((log2(1+all_m2$RPKM_WT_m2.y))>4),]
lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))>4)|((log2(1+all_m2$RPKM_WT_m2.y))>4),]
lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y)
lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y)
summary(lpWT_m2_rna)
summary(lpKO_m2_rna)
wilcox.test(lpWT_m2_rna,lpKO_m2_rna)
boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F,range=1)



#Translational efficiency ( see: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5388891/ Fig. 3)
#paired RNA-seq and Ribo-seq datasets from different human cell lines were examined for relationship between mRNA expression level and TE (calculated as the (log2) ratio between densities of ribosome footprint and RNA-seq reads). Lower panels: comparisons between the 10% of genes with lowest and highest expression levels are presented; p values calculated using Wilcoxon’s test.

all_m2$TranslEff_WT=log2((1+all_m2$RPKM_WT_m2.x)/(1+all_m2$RPKM_WT_m2.y))
plot(all_m2$TE_WT,log2(1+all_m2$RPKM_WT_m2.y),pch=".")
cor.test(log2(1+all_m2$RPKM_WT_m2.y),all_m2$TE_WT)

all_m2$TranslEff_KO=log2(1+all_m2$RPKM_KO_m2.x)-log2(1+all_m2$RPKM_KO_m2.y)
plot(all_m2$TE_KO,log2(1+all_m2$RPKM_KO_m2.y),pch=".")
cor.test(log2(1+all_m2$RPKM_KO_m2.y),all_m2$TE_KO)

#@ volcano plots
deg=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),]
deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),]

a=all_m2;
plot(a$logFC.x,-log10(a$PValue.x))

# FC histogram
deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),]
hdata=hist(deg$logFC.x,breaks=30,plot=F)
hdata$counts[hdata$counts>0] <- log(hdata$counts[hdata$counts>0], 2)
plot(hdata)

deg2=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),]
hdata=hist(deg2$logFC.x,breaks=30,plot=F)
hdata$counts[hdata$counts>0] <- log(hdata$counts[hdata$counts>0], 2)
plot(hdata)
