setwd("/home/tzanos/Desktop/HuR/KOvsWT/e")
require(edgeR)

gl=read.table("/home/tzanos/Desktop/HuR/KOvsWT/e/mm10e.gene-lengths.txt",sep="\t")
gl.names=gl$V1;
rownames(gl)=gl$V1
colnames(gl)=c("ID","geneID","genelength");

##rnaseq data
rnaseqMatrix1= read.table("/home/tzanos/Desktop/HuR/KOvsWT/e/RNAseq/gene_counts_merged_k20_mm10e.txt",header=T,sep="\t",skip=1)
rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]];
rownames(rnaseqMatrix0)=as.character(rnaseqMatrix1$Geneid);
colnames(rnaseqMatrix0)
# [1] "wt01_trm_k20.bam"    "wt02_trm_k20.bam"    "wt21_trm_k20.bam"   
# [4] "wt22_trm_k20.bam"    "wt61_trm_k20.bam"    "wt62_trm_k20.bam"   
# [7] "wtIFN_1_trm_k20.bam" "wtIFN_2_trm_k20.bam" "wtIL4_1_trm_k20.bam"
#[10] "wtIL4_2_trm_k20.bam" "KO01_trm_k20.bam"    "KO02_trm_k20.bam"   
#[13] "KO21_trm_k20.bam"    "KO22_trm_k20.bam"    "KO61_trm_k20.bam"   
#[16] "KO62_trm_k20.bam"    "KOIFN_1_trm_k20.bam" "KOIFN_2_trm_k20.bam"
#[19] "KOIL4_1_trm_k20.bam" "KOIL4_2_trm_k20.bam"

col_ordering = c(11,12,1,2)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)

rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))

write.table(rx, file="edgeRgenesExpr-0h.csv",quote=FALSE,row.names=FALSE)
rx_m0=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}
#[1] "wt01_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     499   19560 
#[1] "wt02_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical    1021   19038 
#[1] "KO01_trm_k20.bam"
#Mode   FALSE    TRUE 
#   Mode   FALSE    TRUE 
#logical     875   19184 
#   Mode   FALSE    TRUE 
#logical     606   19453 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}
#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7512   12547 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7697   12362 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7538   12521 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7856   12203 

col_ordering = c(13,14,3,4)#2h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-2h.csv",quote=FALSE,row.names=FALSE)
rx_2h=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}
#[1] "wt21_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     801   18544 
#[1] "wt22_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     889   18456
#[1] "KO21_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     535   18810 
#[1] "KO22_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     394   18951  

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}

#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7668   11677 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7664   11681 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7539   11806 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7528   11817 

col_ordering = c(15,16,5,6)#6h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-6h.csv",quote=FALSE,row.names=FALSE)
rx_6h=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}

#[1] "wt61_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     521   18884  
#[1] "wt62_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical    1000   18405 
#[1] "KO61_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     650   18755 
#[1] "KO62_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     574   18831 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}

#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7789   11616 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7977   11428 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7940   11465 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7966   11439 

col_ordering = c(17,18,7,8)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-IFN.csv",quote=FALSE,row.names=FALSE)
rx_m1=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}
#[1] "wtIFN_1_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     700   17537 
#[1] "wtIFN_2_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     460   17777 
#[1] "KOIFN_1_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     520   17717 
#[1] "KOIFN_2_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     610   17627 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}
#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6977   11260 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6969   11268 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7093   11144 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7063   11174 

col_ordering = c(19,20,9,10)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-IL4.csv",quote=FALSE,row.names=FALSE)
rx_m2=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}
#[1] "wtIL4_1_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     475   18774 
#[1] "wtIL4_2_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     732   18517 
#[1] "KOIL4_1_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     518   18731 
#[1] "KOIL4_2_trm_k20.bam"
#   Mode   FALSE    TRUE 
#logical     526   18723 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}
#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6807   12442 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6790   12459 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6861   12388 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    6844   12405 


##polysomeseq
poly1= read.table("/home/tzanos/Desktop/HuR/KOvsWT/e/PolysomeSeq/gene_counts_merged_mm10e.txt",header=T,sep="\t",skip=1)
poly0 =poly1[,7:dim(poly1)[2]];
rownames(poly0)=poly1$Geneid;
colnames(poly0)
#  [1] "KO_0h_replicate_IIb"       
#[2] "KO_0h_replicate_Ib"        
#[3] "KO_2h_replicate_IIb"       
#[4] "KO_2h_replicate_Ib"        
#[5] "KO_6h_replicate_IIb"       
#[6] "KO_6h_replicate_Ib"        
#[7] "KO_IFN.gamma_replicate_IIb"
#[8] "KO_IFN_replicate_Ib"       
#[9] "KO_IL4_replicate_IIb"      
#[10] "KO_IL4_replicate_Ib"       
#[11] "WT_0h_replicate_IIb"       
#[12] "WT_0h_replicate_Ib"        
#[13] "WT_2h_replicate_IIb"       
#[14] "WT_2h_replicate_Ib"        
#[15] "WT_6h_replicate_IIb"       
#[16] "WT_6h_replicate_Ib"        
#[17] "WT_IFN.gamma_replicate_IIb"
#[18] "WT_IFN.gamma_replicate_Ib" 
#[19] "WT_IL4_replicate_IIb"      
#[20] "WT_IL4_replicate_Ib"      


col_ordering = c(1,2,11,12)#0h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-0h.csv",quote=FALSE,row.names=FALSE)
x_m0=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}
#[1] "WT_0h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical    1739   17307 
#[1] "WT_0h_replicate_Ib
#   Mode   FALSE    TRUE 
#logical     426   18620 
#[1] "KO_0h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     604   18442 
#[1] "KO_0h_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     754   18292  

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}
#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3092   15954 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    1976   17070 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3186   15860 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3268   15778 

col_ordering = c(3,4,13,14)#2h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-2h.csv",quote=FALSE,row.names=FALSE)
x_2h=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}
#[1] "WT_2h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical    1333   18721 
#[1] "WT_2h_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     692   19362 
#[1] "KO_2h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     800   19254 
#[1] "KO_2h_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     668   19386 

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}
#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2873   17181 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2464   17590 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2646   17408 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2712   17342 

col_ordering = c(5,6,15,16)#6h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-6h.csv",quote=FALSE,row.names=FALSE)
x_6h=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}
#[1] "WT_6h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     494   19201 
#[1] "WT_6h_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical    1943   17752 
#[1] "KO_6h_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     575   19120 
#[1] "KO_6h_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     364   19331  

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}
#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2741   16954 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3202   16493 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2209   17486 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2628   17067 

col_ordering = c(7,8,17,18)#IFN
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-IFN.csv")
x_m1=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}
#[1] "WT_IFN.gamma_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     719   19565
#[1] "WT_IFN.gamma_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical    1551   18733 
#[1] "KO_IFN.gamma_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     813   19471  
#[1] "KO_IFN_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     226   20058 

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}

#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2467   17817 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    4525   15759 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3190   17094 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2625   17659 

summary(de <- decideTestsDGE(et, p=0.05))
#        WT-KO
# Down      40
# NotSig 20219
# Up        25


col_ordering = c(9,10,19,20)#IL4
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-IL4.csv")
x_m2=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}
#[1] "WT_IL4_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical    1267   17387 
#[1] "WT_IL4_replicate_Ib
#   Mode   FALSE    TRUE 
#logical     494   18160 
#[1] "KO_IL4_replicate_IIb"
#   Mode   FALSE    TRUE 
#logical     884   17770 
#[1] "KO_IL4_replicate_Ib"
#   Mode   FALSE    TRUE 
#logical     475   18179 

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}
#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3497   15157 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3240   15414 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2407   16247 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2376   16278 

summary(de <- decideTestsDGE(et, p=0.05))
#        WT-KO
# Down     187
# NotSig 18257
# Up       210


# merge rnaseq and polysomeseq
library(psych)
all_m0=merge(rx_m0,x_m0,by="Row.names")
all_m1=merge(rx_m1,x_m1,by="Row.names")
all_m2=merge(rx_m2,x_m2,by="Row.names")
all_2h=merge(rx_2h,x_2h,by="Row.names")
all_6h=merge(rx_6h,x_6h,by="Row.names")

write.csv(all_m0, file="all_m0.csv",row.names=F)
write.csv(all_m1, file="all_m1.csv",row.names=F)
write.csv(all_m2, file="all_m2.csv",row.names=F)
write.csv(all_2h, file="all_2h.csv",row.names=F)
write.csv(all_6h, file="all_6h.csv",row.names=F)



# get 95% CI for terciles at cutoffs
btercile1 <- function(x,B){
  bstrap <- c()
  for (i in 1:B){
    q=quantile(sample(x,length(x),replace=T),seq(1,2)/3,na.rm=T);
    bstrap <- c(bstrap,q[1])
  }
  return (bstrap);
}

l1={};l2={};l3={};
l1l={};l2l={};l3l={};
l1u={};l2u={};l3u={};
x=seq(0.005,2,0.005);
for (th in x){
  #m2
  lpWT_m2=all_m2[ abs(all_m2$logFC.x)<th,] #get subset=all with abs(RNAseq.logFC) < th 
  lpWT_m2_rna=lpWT_m2$logFC.y # get PolysomeSeq.logFC for subset
  q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
  l1=c(l1,q[1]);
  o <- btercile1(lpWT_m2_rna,1000)
  l1l=c(l1l,quantile(o,.025))
  l1u=c(l1u,quantile(o,.975))
  #m1
  lpWT_m2=all_m1[ abs(all_m1$logFC.x)<th,]
  lpWT_m2_rna=lpWT_m2$logFC.y
  q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
  l2=c(l2,q[1]);
  o <- btercile1(lpWT_m2_rna,1000)
  l2l=c(l2l,quantile(o,.025))
  l2u=c(l2u,quantile(o,.975))
  #m0
  lpWT_m2=all_m0[ abs(all_m0$logFC.x)<th,]
  lpWT_m2_rna=lpWT_m2$logFC.y
  q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
  l3=c(l3,q[1]);
  o <- btercile1(lpWT_m2_rna,1000)
  l3l=c(l3l,quantile(o,.025))
  l3u=c(l3u,quantile(o,.975));
  print (c(th,length(lpWT_m2_rna)))
}

max_rna_vs_poly=rbind(l1,l2,l3,l1l,l2l,l3l,l1u,l2u,l3u);
write.table(max_rna_vs_poly, file="max_rna_vs_poly.csv",quote=FALSE,col.names=FALSE)

#plot tercile figure, m0,m1,m2
max_rna_vs_poly=read.table("max_rna_vs_poly.csv",header=F)
l1=max_rna_vs_poly[1,-1]; #m2
l2=max_rna_vs_poly[2,-1]; #m1
l3=max_rna_vs_poly[3,-1]; #m0
l1l=max_rna_vs_poly[4,-1];
l2l=max_rna_vs_poly[5,-1];
l3l=max_rna_vs_poly[6,-1];
l1u=max_rna_vs_poly[7,-1];
l2u=max_rna_vs_poly[8,-1];
l3u=max_rna_vs_poly[9,-1];
png("Max_abundance_log2FC_vs__translation_log2FC_tercile_v3.png", width = 8, height = 8, units = 'in', res = 300)
plot(x,l3,ylim=c(-0.57,.009),xlab="cutoff for log2 abundance foldchange",ylab="First tercile( log2 translation foldchange)",pch=".",col="blue",xlim=c(0.00,2), xaxs="i",axes=F,yaxs="i")
axis(1, at = seq(0,2,0.25), labels =  seq(0,2,0.25))
axis(2, at = seq(-0.75,0,0.1))

grid(nx=NULL, ny=NULL) #grid over boxplot
lines(x,l3,col="blue")
lines(x,l2,col="green")
lines(x,l1,col="red")
legend("topright", pch=c("-","-","-"), col=c("blue","green","red"), c("M0","M1","M2"), bty="o", cex=.8, box.col="black")
polygon.x <- c(x, rev(x))
polygon.y <- c(l1l, rev(l1u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("red", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

polygon.y <- c(l2l, rev(l2u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("green", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

polygon.y <- c(l3l, rev(l3u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("blue", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

dev.off()

#plot tercile figure, m1,m2
png("Max_abundance_log2FC_vs__translation_log2FC_tercile_m1_m2.png", width = 8, height = 8, units = 'in', res = 300)
plot(x,l1,ylim=c(-0.55,-.15),xlab="cutoff for log2 abundance foldchange",ylab="First tercile( log2 translation foldchange)",pch=".",col="blue",xlim=c(0.00,2), xaxs="i",axes=F,yaxs="i")
axis(1, at = seq(0,2,0.25), labels =  seq(0,2,0.25))
axis(2, at = seq(-0.75,0,0.1))

grid(nx=NULL, ny=NULL) #grid over boxplot
#lines(x,l3,col="blue")
lines(x,l2,col="red")
lines(x,l1,col="blue")
legend("topright", pch=c("-","-","-"), col=c("blue","red"), c("M1","M2"), bty="o", cex=.8, box.col="black")
polygon.x <- c(x, rev(x))
polygon.y <- c(l1l, rev(l1u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("blue", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

polygon.y <- c(l2l, rev(l2u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("red", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

dev.off()

