> require(edgeR)
 
> > bt=read.table("/data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.transcript-gene_name.txt",sep="\t")
rownames(bt)=bt$V2;
%bt=bt[,(names(x) %in% c("V2","V4")]
bt2=cbind(substring(bt[,2],2),as.character(bt[,4]))
colnames(bt2)=c("EnsemblID","biotype")
rownames(bt2)=bt2[,1]
bt3=bt2[!duplicated(rownames(bt2)),]
rownames(bt3)=bt3[,1]
bt4=bt2[bt2[,2]=="protein_coding",]

> gl=read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/deg/Mus_musculus.NCBIM37.64-toMM9.gene-lenghts.txt",sep="\t")
gl.names=gl$V1;
rownames(gl)=gl$V1
colnames(gl)=c("ID","geneID","genelength");

> poly1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/Polysomes/deg/gene_counts_merged_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
poly0 =poly1[,7:dim(poly1)[2]];
rownames(poly0)=poly1$Geneid;

col_ordering = c(1,2,11,12)#0h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
#poly2=merge(x=poly,y=bt3,by="row.names",all.x=TRUE)
poly=poly[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_0h_replicate_I",      
"CPM_KO_0h_replicate_II",    "CPM_WT_0h_replicate_I",      
"CPM_WT_0h_replicate_II",    "RPKM_KO_0h_replicate_I",      
"RPKM_KO_0h_replicate_II",    "RPKM_WT_0h_replicate_I",      
"RPKM_WT_0h_replicate_II",    "RPKM_KO_0h",  
"RPKM_WT_0h")

> + + + + + + + + > > x_m0=x

> > col_ordering = c(7,8,17,18)#IFN
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
poly=poly[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m1_replicate_I",      
"CPM_KO_m1_replicate_II",    "CPM_WT_m1_replicate_I",      
"CPM_WT_m1_replicate_II",    "RPKM_KO_m1_replicate_I",      
"RPKM_KO_m1_replicate_II",    "RPKM_WT_m1_replicate_I",      
"RPKM_WT_m1_replicate_II",    "RPKM_KO_m1",  
"RPKM_WT_m1")#m1

> > > > > > > > > > > > > > > > > > > + + + + + + + + > > x_m1=x

> > col_ordering = c(9,10,19,20)#IL4
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
poly=poly[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(x)=x$Row.names
x=x[,!(names(x) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=x,y=i,by="row.names",all.x=TRUE)
x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
colnames(x)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m2_replicate_I",      
"CPM_KO_m2_replicate_II",    "CPM_WT_m2_replicate_I",      
"CPM_WT_m2_replicate_II",    "RPKM_KO_m2_replicate_I",      
"RPKM_KO_m2_replicate_II",    "RPKM_WT_m2_replicate_I",      
"RPKM_WT_m2_replicate_II",    "RPKM_KO_m2",  
"RPKM_WT_m2")#m2

> > > > > > > > > > > > > > > > > > > + + + + + + + + > > x_m2=x

> rnaseqMatrix1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]];
rownames(rnaseqMatrix0)=rnaseqMatrix1$Geneid;

col_ordering = c(11,12,1,2)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
#exp2=vst(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#rx=cbind(as.data.frame(tTags), gl[rownames(tTags),2],gl[rownames(tTags),3],c,1000*c/gl[rownames(tTags),3])
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_0h_replicate_I",      
"CPM_KO_0h_replicate_II",    "CPM_WT_0h_replicate_I",      
"CPM_WT_0h_replicate_II",    "RPKM_KO_0h_replicate_I",      
"RPKM_KO_0h_replicate_II",    "RPKM_WT_0h_replicate_I",      
"RPKM_WT_0h_replicate_II",    "RPKM_KO_0h",  
"RPKM_WT_0h")

> rx_m0=rx

> + + + + + + + + > > > > col_ordering = c(17,18,7,8)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m1_replicate_I",      
"CPM_KO_m1_replicate_II",    "CPM_WT_m1_replicate_I",      
"CPM_WT_m1_replicate_II",    "RPKM_KO_m1_replicate_I",      
"RPKM_KO_m1_replicate_II",    "RPKM_WT_m1_replicate_I",      
"RPKM_WT_m1_replicate_II",    "RPKM_KO_m1",  
"RPKM_WT_m1")#m1

> > > > > > > > > > rx_m1=rx

> + + + + + + + + > > > > col_ordering = c(19,20,9,10)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rownames(rx)=rx$Row.names
rx=rx[,!(names(rx) %in% c("Row.names","ID"))]
i=as.data.frame(c)
x1=merge(x=rx,y=i,by="row.names",all.x=TRUE)
rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
colnames(rx)=c("EnsemblID",                 "logFC",                      
"logCPM",                    "PValue",                     
"FDR",                       "geneID",                     
"genelength",                "CPM_KO_m2_replicate_I",      
"CPM_KO_m2_replicate_II",    "CPM_WT_m2_replicate_I",      
"CPM_WT_m2_replicate_II",    "RPKM_KO_m2_replicate_I",      
"RPKM_KO_m2_replicate_II",    "RPKM_WT_m2_replicate_I",      
"RPKM_WT_m2_replicate_II",    "RPKM_KO_m2",  
"RPKM_WT_m2")#m2

> > > > > > > > > > > > > > > > > + + + + + + + + > > rx_m2=rx

> > all_m0=merge(x=rx_m0,y=x_m0,by="EnsemblID",all.x=T,all.y=T)

all_m1=merge(x=rx_m1,y=x_m1,by="EnsemblID",all.x=T,all.y=T)

all_m2=merge(x=rx_m2,y=x_m2,by="EnsemblID",all.x=T,all.y=T)

> 
> l1={};l2={};l3={};
l1l={};l2l={};l3l={};
l1u={};l2u={};l3u={};
x=seq(0.005,2,0.005);
for (th in x){
lpWT_m2=all_m2[ abs(all_m2$logFC.x)<th,]
lpWT_m2_rna=lpWT_m2$logFC.y
q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
l1=c(l1,q[1]);
o <- btercile1(lpWT_m2_rna,1000)
l1l=c(l1l,quantile(o,.025))
l1u=c(l1u,quantile(o,.975))
lpWT_m2=all_m1[ abs(all_m1$logFC.x)<th,]
lpWT_m2_rna=lpWT_m2$logFC.y
q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
l2=c(l2,q[1]);
o <- btercile1(lpWT_m2_rna,1000)
l2l=c(l2l,quantile(o,.025))
l2u=c(l2u,quantile(o,.975))
lpWT_m2=all_m0[ abs(all_m0$logFC.x)<th,]
lpWT_m2_rna=lpWT_m2$logFC.y
q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
l3=c(l3,q[1]);
o <- btercile1(lpWT_m2_rna,1000)
l3l=c(l3l,quantile(o,.025))
l3u=c(l3u,quantile(o,.975));
print (c(th,length(lpWT_m2_rna)))
}

max_rna_vs_poly=rbind(l1,l2,l3,l1l,l2l,l3l,l1u,l2u,l3u);

> > > > + + + + + + + + + + + + + + + + + + + + + + + Error: could not find function "btercile1"
> > > > # with 95%CI:
btercile1 <- function(x,B){
 bstrap <- c()
 for (i in 1:B){
  q=quantile(sample(x,length(x),replace=T),seq(1,2)/3,na.rm=T)
  bstrap <- c(bstrap,q[1])
 }
}

> + + + + + + > > l1={};l2={};l3={};
l1l={};l2l={};l3l={};
l1u={};l2u={};l3u={};
x=seq(0.005,2,0.005);
for (th in x){
lpWT_m2=all_m2[ abs(all_m2$logFC.x)<th,]
lpWT_m2_rna=lpWT_m2$logFC.y
q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
l1=c(l1,q[1]);
o <- btercile1(lpWT_m2_rna,1000)
l1l=c(l1l,quantile(o,.025))
l1u=c(l1u,quantile(o,.975))
lpWT_m2=all_m1[ abs(all_m1$logFC.x)<th,]
lpWT_m2_rna=lpWT_m2$logFC.y
q=quantile(lpWT_m2_rna,seq(1,2)/3,na.rm=T)
)))
}

max_rna_vs_poly=rbind(l1,l2,l3,l1l,l2l,l3l,l1u,l2u,l3u);
write.table(max_rna_vs_poly, file="max_rna_vs_poly.csv",quote=FALSE,col.names=FALSE)
max_rna_vs_poly=read.table("max_rna_vs_poly.csv",header=F,row.names=T)
l1=max_rna_vs_poly[1,];
l2=max_rna_vs_poly[2,];
l3=max_rna_vs_poly[3,];
l1l=max_rna_vs_poly[4,];
l2l=max_rna_vs_poly[5,];
l3l=max_rna_vs_poly[6,];
l1u=max_rna_vs_poly[7,];
l2u=max_rna_vs_poly[8,];
l3u=max_rna_vs_poly[9,];

temp=l3u[1]
l3u[1]=l3u[2]
#png("Max_abundance_log2FC_vs__translation_log2FC_tercile_v2.png",width=1024,height=1024,pointsize = 22)
#png("Max_abundance_log2FC_vs__translation_log2FC_tercile_v2.png",width=2048,height=2048,pointsize = 32)
png("Max_abundance_log2FC_vs__translation_log2FC_tercile_v3.png", width = 8, height = 8, units = 'in', res = 300)
plot(x,l3,ylim=c(-0.505,.005),xlab="max(abs(log2(foldchange abundance)))",ylab="First tercile(log2(foldchange translation))",pch=".",col="blue",xlim=c(0.00,2), xaxs="i",axes=F,yaxs="i")
axis(1, at = seq(0,2,0.25), labels =  seq(0,2,0.25))
axis(2, at = seq(-0.5,0,0.1))

grid(nx=NULL, ny=NULL) #grid over boxplot
lines(x,l3,col="blue")
lines(x,l2,col="green")
lines(x,l1,col="red")
legend("topright", pch=c("-","-","-"), col=c("blue","green","red"), c("M0","M1","M2"), bty="o", cex=.8, box.col="black")
polygon.x <- c(x, rev(x))
polygon.y <- c(l1l, rev(l1u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("red", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

polygon.y <- c(l2l, rev(l2u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("green", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

polygon.y <- c(l3l, rev(l3u))
# By default, R won't fill in the polygon
#polygon(x=polygon.x, y=polygon.y)
# But we also may not want an opaque polygon
#polygon(x=polygon.x, y=polygon.y, col='darkgrey')
polygon(x=polygon.x, y=polygon.y, col=adjustcolor("blue", alpha.f=0.4), border=NA);#adjustcolor("red", alpha.f=0.0))

dev.off()
