setwd("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq-vs-polysomeseq/")
require(edgeR)

gl=read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/deg/Mus_musculus.NCBIM37.64-toMM9.gene-lenghts.txt",sep="\t")
gl.names=gl$V1;
rownames(gl)=gl$V1
colnames(gl)=c("ID","geneID","genelength");

#poly1= read.table("gene_counts_merged_k20_NCBIM37.64.txt",header=T,sep="\t",skip=1)
poly1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/Polysomes/deg/gene_counts_merged_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
poly0 =poly1[,7:dim(poly1)[2]];
rownames(poly0)=poly1$Geneid;
colnames(poly0)
# [1] "KO_0h_replicate_I.bam"         "KO_0h_replicate_II.bam"       
# [3] "KO_2h_replicate_I.bam"         "KO_2h_replicate_II.bam"       
# [5] "KO_6h_replicate_I.bam"         "KO_6h_replicate_II.bam"       
# [7] "KO_IFN.gamma_replicate_II.bam" "KO_IFN_replicate_I.bam"       
# [9] "KO_IL4_replicate_I.bam"        "KO_IL4_replicate_II.bam"      
#[11] "WT_0h_replicate_I.bam"         "WT_0h_replicate_II.bam"       
#[13] "WT_2h_replicate_I.bam"         "WT_2h_replicate_II.bam"       
#[15] "WT_6h_replicate_I.bam"         "WT_6h_replicate_II.bam"       
#[17] "WT_IFN.gamma_replicate_I.bam"  "WT_IFN.gamma_replicate_II.bam"
#[19] "WT_IL4_replicate_I.bam"        "WT_IL4_replicate_II.bam"      


col_ordering = c(1,2,11,12)#0h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
x=cbind(x,c,1000*c/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-0h.csv",quote=FALSE,row.names=FALSE)
x_m0=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }

col_ordering = c(3,4,13,14)#2h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
x=cbind(x,c,1000*c/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-2h.csv",quote=FALSE,row.names=FALSE)
x_2h=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }

col_ordering = c(5,6,15,16)#6h
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
x=cbind(x,c,1000*c/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-6h.csv",quote=FALSE,row.names=FALSE)
x_6h=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }


col_ordering = c(7,8,17,18)#IFN
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
x=cbind(x,c,1000*c/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-IFN.csv")
x_m1=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }
summary(de <- decideTestsDGE(et, p=0.05))
#-1    24
#0  19748
#1     34

col_ordering = c(9,10,19,20)#IL4
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#x=cbind(as.data.frame(tTags),r)
#x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3])
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
x=cbind(x,c,1000*c/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-IL4.csv")
x_m2=x
for (i in 8:11){
 print (colnames(x)[i])
 tt=summary(x[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(x)[i])
 tt=summary(x[,i]>1)
 print(tt)
 }
write.table(x, file="edgeRgenesExpr-IL4.csv")
summary(de <- decideTestsDGE(et, p=0.05))
#-1   215
#0  18093
#1    106

#rnaseq data
rnaseqMatrix1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt",header=T,sep="\t",skip=1)
rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]];
rownames(rnaseqMatrix0)=rnaseqMatrix1$Geneid;
colnames(rnaseqMatrix0)
# [1] "wt01_trm_k20.bam"    "wt02_trm_k20.bam"    "wt21_trm_k20.bam"   
# [4] "wt22_trm_k20.bam"    "wt61_trm_k20.bam"    "wt62_trm_k20.bam"   
# [7] "wtIFN_1_trm_k20.bam" "wtIFN_2_trm_k20.bam" "wtIL4_1_trm_k20.bam"
#[10] "wtIL4_2_trm_k20.bam" "KO01_trm_k20.bam"    "KO02_trm_k20.bam"   
#[13] "KO21_trm_k20.bam"    "KO22_trm_k20.bam"    "KO61_trm_k20.bam"   
#[16] "KO62_trm_k20.bam"    "KOIFN_1_trm_k20.bam" "KOIFN_2_trm_k20.bam"
#[19] "KOIL4_1_trm_k20.bam" "KOIL4_2_trm_k20.bam"

col_ordering = c(11,12,1,2)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
#exp2=vst(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
#rx=cbind(as.data.frame(tTags), gl[rownames(tTags),2],gl[rownames(tTags),3],c,1000*c/gl[rownames(tTags),3])
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)

rx=rx[,!(names(rx) %in% "ID")]
rx=cbind(rx,c,1000*c/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-0h.csv",quote=FALSE,row.names=FALSE)
rx_m0=rx
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }


col_ordering = c(13,14,3,4)#2h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rx=cbind(rx,c,1000*c/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-2h.csv",quote=FALSE,row.names=FALSE)
rx_2h=rx
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }


col_ordering = c(15,16,5,6)#6h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rx=cbind(rx,c,1000*c/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-6h.csv",quote=FALSE,row.names=FALSE)
rx_6h=rx
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }

col_ordering = c(17,18,7,8)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rx=cbind(rx,c,1000*c/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-IFN.csv",quote=FALSE,row.names=FALSE)
rx_m1=rx
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }

col_ordering = c(19,20,9,10)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
#rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,]
conditions = factor(c("KO","KO","WT","WT"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rx=cbind(rx,c,1000*c/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-IL4.csv",quote=FALSE,row.names=FALSE)
rx_m2=rx
for (i in 8:11){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>0)
 print(tt)
 }
for (i in 12:15){
 print (colnames(rx)[i])
 tt=summary(rx[,i]>1)
 print(tt)
 }


# merge rnaseq and polysomeseq
require(psych)
all_m0=merge(rx_m0,x_m0,by="Row.names")
all_m1=merge(rx_m1,x_m1,by="Row.names")
all_m2=merge(rx_m2,x_m2,by="Row.names")
all_2h=merge(rx_2h,x_2h,by="Row.names")
all_6h=merge(rx_6h,x_6h,by="Row.names")


c1=cor.test(log2(all_m0[,17]),log2(all_m0[,33]))
c2=cor.test(log2(all_m0[,16]),log2(all_m0[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all_m0)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.152668961 0.119065879 2.943821880 0.003241865 

c1=cor.test(log2(all_m1[,17]),log2(all_m1[,33]))
c2=cor.test(log2(all_m1[,16]),log2(all_m1[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all_m1)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1229041 0.1141721 0.7452367 0.4561286

c1=cor.test(log2(all_m2[,17]),log2(all_m2[,33]))
c2=cor.test(log2(all_m2[,16]),log2(all_m2[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all_m2)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.09780989 0.11561807 1.52762039 0.12660682

c1=cor.test(log2(all_2h[,17]),log2(all_2h[,33]))
c2=cor.test(log2(all_2h[,16]),log2(all_2h[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all_2h)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1058291 0.1092332 0.2945222 0.7683589 

c1=cor.test(log2(all_6h[,17]),log2(all_6h[,33]))
c2=cor.test(log2(all_6h[,16]),log2(all_6h[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all_6h)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1342746 0.1221058 1.0577373 0.2901752 

#=> only m0 has sig. higher cor(rna,poly) in WT than cor in KO


#
> summary(all_m0$logFC.x)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-11.24000  -0.17960   0.01064   0.10260   0.23900  11.11000 
> summary(all_m1$logFC.x)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-8.995000 -0.103400  0.004837  0.032350  0.142500 10.740000 
> summary(all_m2$logFC.x)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-8.444000 -0.097750  0.001712  0.011950  0.115900  9.853000 
> summary(all_2h$logFC.x)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-9.05600 -0.24220 -0.01712 -0.06133  0.18900  5.65600 
> summary(all_6h$logFC.x)
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-10.400000  -0.121500   0.002427   0.065060   0.173100  11.270000

# rna changed: abs(logFC.x)> 0.2

abs(logFC.x)>0.2
all2_m0=all_m0[abs(all_m0$logFC.x)>0.2,]
all2_m1=all_m1[abs(all_m1$logFC.x)>0.2,]
all2_m2=all_m2[abs(all_m2$logFC.x)>0.2,]
all2_2h=all_2h[abs(all_2h$logFC.x)>0.2,]
all2_6h=all_6h[abs(all_6h$logFC.x)>0.2,]


c1=cor.test(log2(all2_m0[,17]),log2(all2_m0[,33]))
c2=cor.test(log2(all2_m0[,16]),log2(all2_m0[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all2_m0)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.16058763 0.12220662 2.40133835 0.01633522 

c1=cor.test(log2(all2_m1[,17]),log2(all2_m1[,33]))
c2=cor.test(log2(all2_m1[,16]),log2(all2_m1[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all2_m1)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1404625 0.1317778 0.4299130 0.6672589 

c1=cor.test(log2(all2_m2[,17]),log2(all2_m2[,33]))
c2=cor.test(log2(all2_m2[,16]),log2(all2_m2[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all2_m2)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1076119 0.1201364 0.5906979 0.5547228 

c1=cor.test(log2(all2_2h[,17]),log2(all2_2h[,33]))
c2=cor.test(log2(all2_2h[,16]),log2(all2_2h[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all2_2h)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1021293 0.1101030 0.4987349 0.6179661 

c1=cor.test(log2(all2_6h[,17]),log2(all2_6h[,33]))
c2=cor.test(log2(all2_6h[,16]),log2(all2_6h[,32]))
tt=paired.r(c1$estimate,c2$estimate,NULL,dim(all2_6h)[1])
print (c(c1$estimate,c2$estimate,tt$z,tt$p))
#0.1618487 0.1592575 0.1420221 0.8870625 

# compare cor's only for changed RNA => worse


#@
#sd of ratio of log2 RPKMs

c1=(  (1+log2(all_m0[,33]))/(1+log2(all_m0[,17]) ) )
c2=(  (1+log2(all_m0[,32]))/(1+log2(all_m0[,16]) ) )
sd(  (1+log2(all_m0[,33]))/(1+log2(all_m0[,17]) ) )
[1] 42.44673
sd(  (1+log2(all_m0[,32]))/(1+log2(all_m0[,16]) ) )
[1] 111.5453
var.test(c1,c2)
	F test to compare two variances

data:  c1 and c2
F = 0.14481, num df = 14787, denom df = 14787, p-value < 2.2e-16
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.1402121 0.1495500
sample estimates:
ratio of variances 
         0.1448058 

c1=(  (1+log2(all_m1[,33]))/(1+log2(all_m1[,17]) ) )
c2=(  (1+log2(all_m1[,32]))/(1+log2(all_m1[,16]) ) )
sd(  (1+log2(all_m1[,33]))/(1+log2(all_m1[,17]) ) )
[1] 77.41006
sd(  (1+log2(all_m1[,32]))/(1+log2(all_m1[,16]) ) )
[1] 137.1239
var.test(c1,c2)
	F test to compare two variances

data:  c1 and c2
F = 0.31869, num df = 14163, denom df = 14163, p-value < 2.2e-16
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.3083635 0.3293623
sample estimates:
ratio of variances 
           0.31869 

c1=(  (1+log2(all_m2[,33]))/(1+log2(all_m2[,17]) ) )
c2=(  (1+log2(all_m2[,32]))/(1+log2(all_m2[,16]) ) )
sd(  (1+log2(all_m2[,33]))/(1+log2(all_m2[,17]) ) )
[1] 158.4641
sd(  (1+log2(all_m2[,32]))/(1+log2(all_m2[,16]) ) )
[1] 415.9155
var.test(c1,c2)
	F test to compare two variances

data:  c1 and c2
F = 0.14516, num df = 14385, denom df = 14385, p-value < 2.2e-16
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.1404938 0.1499845
sample estimates:
ratio of variances 
         0.1451616 


c1=(  (1+log2(all_2h[,33]))/(1+log2(all_2h[,17]) ) )
c2=(  (1+log2(all_2h[,32]))/(1+log2(all_2h[,16]) ) )
sd(  (1+log2(all_2h[,33]))/(1+log2(all_2h[,17]) ) )
[1] 99.89885
sd(  (1+log2(all_2h[,32]))/(1+log2(all_2h[,16]) ) )
[1] 103.1915
var.test(c1,c2)
	F test to compare two variances

data:  c1 and c2
F = 0.9372, num df = 14629, denom df = 14629, p-value = 8.79e-05
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.9073140 0.9680753
sample estimates:
ratio of variances 
         0.9372024 


c1=(  (1+log2(all_6h[,33]))/(1+log2(all_6h[,17]) ) )
c2=(  (1+log2(all_6h[,32]))/(1+log2(all_6h[,16]) ) )
sd(  (1+log2(all_6h[,33]))/(1+log2(all_6h[,17]) ) )
[1] 242.6754
sd(  (1+log2(all_6h[,32]))/(1+log2(all_6h[,16]) ) )
[1] 42.21529
var.test(c1,c2)
	F test to compare two variances

data:  c1 and c2
F = 33.045, num df = 14620, denom df = 14620, p-value < 2.2e-16
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 31.99130 34.13438
sample estimates:
ratio of variances 
          33.04547 

#=> all sig, but sd(WT)>sd(KO) in 6h


#@
#same with log2FC
c1=log2(  (1+(all_m0[,33]))/(1+(all_m0[,17]) ) )
c2=log2(  (1+(all_m0[,32]))/(1+(all_m0[,16]) ) )
sd(c1)
sd(c2)
var.test(c1,c2)
[1] 2.598676
[1] 2.715029
	F test to compare two variances

data:  c1 and c2
F = 0.91613, num df = 14787, denom df = 14787, p-value = 1.008e-07
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.8870637 0.9461403
sample estimates:
ratio of variances 
         0.9161259 
ks.test(c1,c2)
	Two-sample Kolmogorov-Smirnov test

data:  c1 and c2
D = 0.01677, p-value = 0.03124
alternative hypothesis: two-sided


c1=log2(  (1+(all_m1[,33]))/(1+(all_m1[,17]) ) )
c2=log2(  (1+(all_m1[,32]))/(1+(all_m1[,16]) ) )
sd(c1)
sd(c2)
var.test(c1,c2)
[1] 2.726441
[1] 2.727541

	F test to compare two variances

data:  c1 and c2
F = 0.99919, num df = 14163, denom df = 14163, p-value = 0.9617
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.9668174 1.0326553
sample estimates:
ratio of variances 
         0.9991942
ks.test(c1,c2)

	Two-sample Kolmogorov-Smirnov test

data:  c1 and c2
D = 0.0091076, p-value = 0.5996
alternative hypothesis: two-sided
	 

c1=log2(  (1+(all_m2[,33]))/(1+(all_m2[,17]) ) )
c2=log2(  (1+(all_m2[,32]))/(1+(all_m2[,16]) ) )
sd(c1)
sd(c2)
var.test(c1,c2)
[1] 2.794872
[1] 2.698236
	F test to compare two variances

data:  c1 and c2
F = 1.0729, num df = 14385, denom df = 14385, p-value = 2.445e-05
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 1.038411 1.108558
sample estimates:
ratio of variances 
          1.072911 
 ks.test(c1,c2)

	Two-sample Kolmogorov-Smirnov test

data:  c1 and c2
D = 0.024121, p-value = 0.0004635
alternative hypothesis: two-sided
# small p-val means: not from the same dist


c1=log2(  (1+(all_2h[,33]))/(1+(all_2h[,17]) ) )
c2=log2(  (1+(all_2h[,32]))/(1+(all_2h[,16]) ) )
sd(c1)
sd(c2)
var.test(c1,c2)
[1] 2.63588
[1] 2.64525
	F test to compare two variances

data:  c1 and c2
F = 0.99293, num df = 14629, denom df = 14629, p-value = 0.6678
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.9612629 1.0256371
sample estimates:
ratio of variances 
         0.9929285 
ks.test(c1,c2)

	Two-sample Kolmogorov-Smirnov test

data:  c1 and c2
D = 0.012509, p-value = 0.2025
alternative hypothesis: two-sided


c1=log2(  (1+(all_6h[,33]))/(1+(all_6h[,17]) ) )
c2=log2(  (1+(all_6h[,32]))/(1+(all_6h[,16]) ) )
sd(c1)
sd(c2)
var.test(c1,c2)
[1] 2.666194
[1] 2.695197
	F test to compare two variances

data:  c1 and c2
F = 0.97859, num df = 14620, denom df = 14620, p-value = 0.1908
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.9473761 1.0108405
sample estimates:
ratio of variances 
          0.978594 
ks.test(c1,c2)

	Two-sample Kolmogorov-Smirnov test

data:  c1 and c2
D = 0.020724, p-value = 0.00375
alternative hypothesis: two-sided
qqplot(c1,c2)
summary(c1)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-10.3300  -1.0730   0.8213   0.6244   2.3260  15.7700 
summary(c2)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-11.0800  -1.0370   0.9093   0.6892   2.4380  14.1200 


# only m0 sig. higher sd in KO
# only m2 sig. lower  sd in KO
# 6h: no sig. sd diff, but sig diff. distribution (KStest)


#@
# omit -1<log2FC<1
c1=log2(  (1+(all_m0[,33]))/(1+(all_m0[,17]) ) )
c2=log2(  (1+(all_m0[,32]))/(1+(all_m0[,16]) ) )
all_m0$polyVSrnaWT=c1
all_m0$polyVSrnaKO=c2
all2_m0=all_m0[ (abs(all_m0$polyVSrnaWT)>1)&(abs(all_m0$polyVSrnaKO)>1),]

c1=log2(  (1+(all_m1[,33]))/(1+(all_m1[,17]) ) )
c2=log2(  (1+(all_m1[,32]))/(1+(all_m1[,16]) ) )
all_m1$polyVSrnaWT=c1
all_m1$polyVSrnaKO=c2
all2_m1=all_m1[ (abs(all_m1$polyVSrnaWT)>1)|(abs(all_m1$polyVSrnaKO)>1),]

c1=log2(  (1+(all_m2[,33]))/(1+(all_m2[,17]) ) )
c2=log2(  (1+(all_m2[,32]))/(1+(all_m2[,16]) ) )
all_m2$polyVSrnaWT=c1
all_m2$polyVSrnaKO=c2
all2_m2=all_m2[ (abs(all_m2$polyVSrnaWT)>1)&(abs(all_m2$polyVSrnaKO)>1),]

c1=log2(  (1+(all_2h[,33]))/(1+(all_2h[,17]) ) )
c2=log2(  (1+(all_2h[,32]))/(1+(all_2h[,16]) ) )
all_2h$polyVSrnaWT=c1
all_2h$polyVSrnaKO=c2
all2_2h=all_2h[ (abs(all_2h$polyVSrnaWT)>1)&(abs(all_2h$polyVSrnaKO)>1),]

c1=log2(  (1+(all_6h[,33]))/(1+(all_6h[,17]) ) )
c2=log2(  (1+(all_6h[,32]))/(1+(all_6h[,16]) ) )
all_6h$polyVSrnaWT=c1
all_6h$polyVSrnaKO=c2
all2_6h=all_6h[ (abs(all_6h$polyVSrnaWT)>1)&(abs(all_6h$polyVSrnaKO)>1),]

> t.test(all_m0$polyVSrnaWT,all_m0$polyVSrnaKO,paired=T)

	Paired t-test

data:  all_m0$polyVSrnaWT and all_m0$polyVSrnaKO
t = 1.7501, df = 14787, p-value = 0.08012
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.001510982  0.026690399
sample estimates:
mean of the differences 
             0.01258971 
> t.test(all_m1$polyVSrnaWT,all_m1$polyVSrnaKO,paired=T)

	Paired t-test

data:  all_m1$polyVSrnaWT and all_m1$polyVSrnaKO
t = 4.6948, df = 14163, p-value = 2.694e-06
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.01598026 0.03888882
sample estimates:
mean of the differences 
             0.02743454 

> t.test(all_m2$polyVSrnaWT,all_m2$polyVSrnaKO,paired=T)

	Paired t-test

data:  all_m2$polyVSrnaWT and all_m2$polyVSrnaKO
t = -11.889, df = 14385, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.09013647 -0.06462082
sample estimates:
mean of the differences 
            -0.07737865 

> t.test(all_2h$polyVSrnaWT,all_2h$polyVSrnaKO,paired=T)

	Paired t-test

data:  all_2h$polyVSrnaWT and all_2h$polyVSrnaKO
t = -8.5666, df = 14629, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.07777672 -0.04881197
sample estimates:
mean of the differences 
            -0.06329435 

> t.test(all_6h$polyVSrnaWT,all_6h$polyVSrnaKO,paired=T)

	Paired t-test

data:  all_6h$polyVSrnaWT and all_6h$polyVSrnaKO
t = -9.9729, df = 14620, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.07762764 -0.05212534
sample estimates:
mean of the differences 
            -0.06487649 


#@ cluster
#m0
#wt
wt=cbind(log2(1+(all_m0[,33])),log2(1+(all_m0[,17])))
ko=cbind(log2(1+(all_m0[,32])),log2(1+(all_m0[,16])))

wiss={};  for (i in 2:20){k=kmeans(wt, centers=i, iter.max=200);wiss=c(wiss,k$tot.withinss);print(c(i, k$tot.withinss)) }
plot(wiss)
#opt
i=12;
k=kmeans(wt, centers=i, iter.max=200);
pdf("m0_wt_rna_vs_poly.pdf")
plot (wt[,1],wt[,2])
points(k$centers, col="red", pch="*")
dev.off()
i=12;
k2=kmeans(ko, centers=i, iter.max=200);
pdf("m0_ko_rna_vs_poly.pdf")
plot (ko[,1],ko[,2])
points(k2$centers, col="red", pch="*")
dev.off()

all2_m0=all_m0[(log2(1+all_m0[,17])<1)&(log2(1+all_m0[,16])<1),]
ks.test(log2(1+(all2_m0[,33])),log2(1+(all2_m0[,32])))
	Two-sample Kolmogorov-Smirnov test

data:  log2(1 + (all2_m0[, 33])) and log2(1 + (all2_m0[, 32]))
D = 0.042986, p-value = 5.815e-05
 wilcox.test(log2(1+(all2_m0[,33])),log2(1+(all2_m0[,32])))
	Wilcoxon rank sum test with continuity correction
data:  log2(1 + (all2_m0[, 33])) and log2(1 + (all2_m0[, 32]))
W = 16337000, p-value = 0.0386

all2_m0=all_m0[(log2(1+all_m0[,17])<2)&(log2(1+all_m0[,16])<2),]
wilcox.test(log2(1+(all2_m0[,33])),log2(1+(all2_m0[,32])))
	Wilcoxon rank sum test with continuity correction
data:  log2(1 + (all2_m0[, 33])) and log2(1 + (all2_m0[, 32]))
W = 28052000, p-value = 0.009731
alternative hypothesis: true location shift is not equal to 0

all2_m0=all_m0[(log2(1+all_m0[,32])<2)&(log2(1+all_m0[,33])<2),]
wilcox.test(log2(1+(all2_m0[,16])),log2(1+(all2_m0[,17])))
W = 9671800, p-value = 0.8287
all2_m0=all_m0[(log2(1+all_m0[,32])<1)&(log2(1+all_m0[,33])<1),]
wilcox.test(log2(1+(all2_m0[,16])),log2(1+(all2_m0[,17])))
W = 394130, p-value = 0.9896


all2_m1=all_m1[(log2(1+all_m1[,17])<2)&(log2(1+all_m1[,16])<2),]
wilcox.test(log2(1+(all2_m1[,33])),log2(1+(all2_m1[,32])))
W = 28448000, p-value = 0.8214

all2_m2=all_m2[(log2(1+all_m2[,17])<2)&(log2(1+all_m2[,16])<2),]
wilcox.test(log2(1+(all2_m2[,33])),log2(1+(all2_m2[,32])))
W = 23757000, p-value = 7.789e-60

all2_2h=all_2h[(log2(1+all_2h[,17])<2)&(log2(1+all_2h[,16])<2),]
wilcox.test(log2(1+(all2_2h[,33])),log2(1+(all2_2h[,32])))
W = 29820000, p-value = 0.3855

all2_6h=all_6h[(log2(1+all_6h[,17])<2)&(log2(1+all_6h[,16])<2),]
wilcox.test(log2(1+(all2_6h[,33])),log2(1+(all2_6h[,32])))
W = 32859000, p-value = 0.005982


wt=cbind(log2(1+(all_m1[,33])),log2(1+(all_m1[,17])))
ko=cbind(log2(1+(all_m1[,32])),log2(1+(all_m1[,16])))
k=kmeans(wt, centers=i, iter.max=200);
pdf("m1_wt_rna_vs_poly.pdf")
plot (wt[,1],wt[,2])
points(k$centers, col="red", pch="*")
dev.off()
i=12;
k2=kmeans(ko, centers=i, iter.max=200);
pdf("m1_ko_rna_vs_poly.pdf")
plot (ko[,1],ko[,2])
points(k2$centers, col="red", pch="*")
dev.off()

wt=cbind(log2(1+(all_m2[,33])),log2(1+(all_m2[,17])))
ko=cbind(log2(1+(all_m2[,32])),log2(1+(all_m2[,16])))
k=kmeans(wt, centers=i, iter.max=200);
pdf("m2_wt_rna_vs_poly.pdf")
plot (wt[,1],wt[,2])
points(k$centers, col="red", pch="*")
dev.off()
i=12;
k2=kmeans(ko, centers=i, iter.max=200);
pdf("m2_ko_rna_vs_poly.pdf")
plot (ko[,1],ko[,2])
points(k2$centers, col="red", pch="*")
dev.off()

wt=cbind(log2(1+(all_2h[,33])),log2(1+(all_2h[,17])))
ko=cbind(log2(1+(all_2h[,32])),log2(1+(all_2h[,16])))
k=kmeans(wt, centers=i, iter.max=200);
pdf("2h_wt_rna_vs_poly.pdf")
plot (wt[,1],wt[,2])
points(k$centers, col="red", pch="*")
dev.off()
i=12;
k2=kmeans(ko, centers=i, iter.max=200);
pdf("2h_ko_rna_vs_poly.pdf")
plot (ko[,1],ko[,2])
points(k2$centers, col="red", pch="*")
dev.off()

wt=cbind(log2(1+(all_6h[,33])),log2(1+(all_6h[,17])))
ko=cbind(log2(1+(all_6h[,32])),log2(1+(all_6h[,16])))
k=kmeans(wt, centers=i, iter.max=200);
pdf("6h_wt_rna_vs_poly.pdf")
plot (wt[,1],wt[,2])
points(k$centers, col="red", pch="*")
dev.off()
i=12;
k2=kmeans(ko, centers=i, iter.max=200);
pdf("6h_ko_rna_vs_poly.pdf")
plot (ko[,1],ko[,2])
points(k2$centers, col="red", pch="*")
dev.off()


changed<-subset(rx, (rx$PValue <0.05)&((rx$logFC<=-1.5)|(rx$logFC>=1.5)))
change_up<-subset(rx, (rx$PValue <0.05)&(rx$logFC>=1.5))
change_down<-subset(rx, (rx$PValue <0.05)&(rx$logFC<=-1.5))


qq=qqplot(rx$logFC,x$logFC)

fit <- lm(y ~ x,data=qq)
summary(fit)
abline(fit)

    qq2 <- list(x = qq$x[6000:12000], y = qq$y[6000:12000]) 
fit <- lm(y ~ x,data=qq2)
summary(fit)
abline(fit)

lm(formula = y ~ x, data = qq2)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.041206 -0.007692  0.002405  0.011306  0.022819 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.0377079  0.0001847  -204.1   <2e-16 ***
x            3.1979062  0.0025021  1278.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.01428 on 5999 degrees of freedom
Multiple R-squared:  0.9963,	Adjusted R-squared:  0.9963 
F-statistic: 1.633e+06 on 1 and 5999 DF,  p-value: < 2.2e-16

ks.test(rx$logFC,x$logFC)

	Two-sample Kolmogorov-Smirnov test

data:  rx$logFC and x$logFC
D = 0.21546, p-value < 2.2e-16
alternative hypothesis: two-sided

#@ omit rnaseq-DEGs from polysomeseq:

changed<-subset(rx, (rx$PValue <0.05)&((rx$logFC<=-1.5)|(rx$logFC>=1.5)))

plot(log2(y[,15]),log2(y[,16]), col= "grey", ylab="log2(RPKM_m0_wt_translation)", xlab = "log2(RPKM_m0_wt_translation)", main="m0 translation", pch=20,cex=0.8)
points(log2(change_up[,15]),log2(change_up[,16]), col= "green", pch=20,cex=0.8)
points(log2(change_down[,15]),log2(change_down[,16]), col= "red", pch=20,cex=0.8)

l={}
for (i in rownames(changed)){
 ii=as.numeric(which(rownames(x2)==i));
 print (c(i,ii,is.na(ii)))
 l=c(l,x[i])
 }
#xo=x[rownames
#An inner join of df1 and df2:
#Return only the rows in which the left table have matching keys in the right table.
#total <- merge(data frameA,data frameB,by="ID")
all=merge(rx,x,by="Row.names")
#merge(tt,z["symbol"],by="row.names",all.x=TRUE)
plot (all$logFC.x,all$logFC.y)

log2(1.2)= 0.263
log2(1.5)= 0.585

summary( ((all$FDR.x<=0.05)) | ((all$FDR.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   14616     172       0 
> summary( ((all$logFC.x>0.5)&(all$FDR.x<=0.05)) | ((all$logFC.x<(-0.5))&(all$FDR.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   14616     172       0 
> summary( ((all$logFC.x>0.5)&(all$FDR.x<=0.05)) | ((all$logFC.x<(-0.5))&(all$FDR.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   14616     172       0 
> summary( ((all$logFC.x>0.263)&(all$FDR.x<=0.05)) | ((all$logFC.x<(-0.263))&(all$FDR.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   14616     172       0 
> summary( ((all$logFC.x>0.263)&(all$PValue.x<=0.05)) | ((all$logFC.x<(-0.263))&(all$PValue.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   13740    1048       0 
> summary( ((all$logFC.x>0.55)&(all$PValue.x<=0.05)) | ((all$logFC.x<(-0.55))&(all$PValue.x<=0.05)) )
   Mode   FALSE    TRUE    NA's 
logical   13852     936       0 

rch=all[ ((all$logFC.x>0.263)&(all$PValue.x<=0.05)) | ((all$logFC.x<(-0.263))&(all$PValue.x<=0.05)) ,]
ruch=all[ !( ((all$logFC.x>0.263)&(all$PValue.x<=0.05)) | ((all$logFC.x<(-0.263))&(all$PValue.x<=0.05)) ) ,]

summary(rch$logFC.y)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-3.19300 -0.62170  0.07457 -0.03481  0.62480  3.37000
summary(ruch$logFC.y)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-5.06900 -0.54930  0.11920  0.03632  0.66070  4.68200 
> ks.test(ruch$logFC.y,rch$logFC.y)

	Two-sample Kolmogorov-Smirnov test

data:  ruch$logFC.y and rch$logFC.y
D = 0.046174, p-value = 0.03146
alternative hypothesis: two-sided

Warning message:
In ks.test(ruch$logFC.y, rch$logFC.y) :
  p-value will be approximate in the presence of ties
> qqplot(ruch$logFC.y,rch$logFC.y)

subrch=rch[abs(rch$logFC.y)<=2,]
summary(subrch$logFC.y)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-1.99500 -0.59890  0.08463  0.01127  0.61960  1.99600 
length(subrch$logFC.y)
[1] 1007

subruch=ruch[abs(ruch$logFC.y)<=2,]
summary(subruch$logFC.y)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-1.99800 -0.49790  0.13350  0.07073  0.65400  1.99900 
> length(subruch$logFC.y)
[1] 13210


ks.test(subruch$logFC.y,subrch$logFC.y)

	Two-sample Kolmogorov-Smirnov test

data:  subruch$logFC.y and subrch$logFC.y
D = 0.044891, p-value = 0.04605
alternative hypothesis: two-sided

Warning message:
In ks.test(subruch$logFC.y, subrch$logFC.y) :
  p-value will be approximate in the presence of ties


# 1st to 3rd quartile of locFC (to omit extremes):
subruch=ruch[abs(ruch$logFC.y)<=0.7,]
subruch=ruch[abs(ruch$logFC.y)<=0.7,]
ks.test(subruch$logFC.y,subrch$logFC.y)

	Two-sample Kolmogorov-Smirnov test

data:  subruch$logFC.y and subrch$logFC.y
D = 0.039111, p-value = 0.3835   # => same distribution
alternative hypothesis: two-sided

Warning message:
In ks.test(subruch$logFC.y, subrch$logFC.y) :
  p-value will be approximate in the presence of ties


plot(log2(all[,16]),log2(all[,32]))
#KO
cor.test(log2(all[,16]),log2(all[,33]))


	Pearson's product-moment correlation

data:  log2(all[, 16]) and log2(all[, 33])
t = 15.131, df = 14786, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1075825 0.1393262
sample estimates:
      cor 
0.1234859 

#WT
 cor.test(log2(all[,17]),log2(all[,32]))
	Pearson's product-moment correlation

data:  log2(all[, 17]) and log2(all[, 32])
t = 19.021, df = 14786, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1387725 0.1702379
sample estimates:
      cor 
0.1545444 

http://vassarstats.net/rdiff.html
Fisher r-to-z transformation
Sample A 	Sample B 	
Q
ra =
 na =
	
rb =
 nb =
z =
P   	one-tailed 0.0033	
two-tailed 	0.0065  # 

#on platia
require(psych)
tt=paired.r(0.1545444,0.1234859,NULL,14788)
> str(tt)
List of 4
 $ test: chr "test of difference between two independent correlations"
 $ z   : num 2.72
 $ p   : num 0.00646
 $ Call: language paired.r(xy = 0.1545444, xz = 0.1234859, yz = NULL, n = 14788)
 - attr(*, "class")= chr [1:2] "psych" "paired.r"

wt_abun2transl=log2((1+all[,17])/(1+all[,32]) )
ko_abun2transl=log2((1+all[,16])/(1+all[,33]) )
qqplot(wt_abun2transl,ko_abun2transl)
summary(ko_abun2transl-wt_abun2transl)
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-1268.0000    -0.3043     0.0019     1.1320     0.4902  1477.0000 


library(ellipse)

#@
EnsemblID logFC.x logCPM.x PValue.x FDR.x geneID.x genelength.x CPM_KO_m2_replicate_I.x CPM_KO_m2_replicate_II.x CPM_WT_m2_replicate_I.x CPM_WT_m2_replicate_II.x RPKM_KO_m2_replicate_I.x RPKM_KO_m2_replicate_II.x RPKM_WT_m2_replicate_I.x RPKM_WT_m2_replicate_II.x RPKM_KO_m2.x RPKM_WT_m2.x logFC.y logCPM.y PValue.y FDR.y geneID.y genelength.y CPM_KO_m2_replicate_I.y CPM_KO_m2_replicate_II.y CPM_WT_m2_replicate_I.y CPM_WT_m2_replicate_II.y RPKM_KO_m2_replicate_I.y RPKM_KO_m2_replicate_II.y RPKM_WT_m2_replicate_I.y RPKM_WT_m2_replicate_II.y RPKM_KO_m2.y RPKM_WT_m2.y TE_WT TE_KO
ENSMUSG00000022150 -0.132107113100571 10.2892120441771 0.331913609425355 1 Dab2 2776.27 133.116419612598 136.764913696411 120.019380338259 120.84081756306 47.9479372008481 49.2621084031491 43.2304424059111 43.5263204094197 48.6050228019986 43.3783814076654 1.18653374049147 9.28065118430811 0.00230156674087398 0.0943898662949966 Dab2 2776.27 13.6086794794468 13.504520740025 1.41953630806048 8.93584890025314 4.9017853016626 4.86426779096593 0.511310610301044 3.2186526887706 4.88302654631427 1.86498164953582 3.95325924620707 3.07585575808247