setwd("/home/tzanos/Desktop/HuR/M1vsM2")
require(edgeR)

gl=read.table("/mnt/max/b/genomics_facility/DKlab/mr/tzanos/mm10-GRCm38.p6/genome/mm10b2.gene-lenghts.txt",sep="\t")
gl.names=gl$V1;
rownames(gl)=gl$V1
colnames(gl)=c("ID","geneID","genelength");

#rnaseq data
rnaseqMatrix1= read.table("/mnt/max/b/genomics_facility/DKlab/mr/tzanos/mm10-GRCm38.p6/rnaseq/featurecounts/gene_counts_merged_k20_mm10.txt",header=T,sep="\t",skip=1)
rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]];
rownames(rnaseqMatrix0)=as.character(rnaseqMatrix1$Geneid);
colnames(rnaseqMatrix0)
# [1] "wt01_trm_k20.bam"    "wt02_trm_k20.bam"    "wt21_trm_k20.bam"   
# [4] "wt22_trm_k20.bam"    "wt61_trm_k20.bam"    "wt62_trm_k20.bam"   
# [7] "wtIFN_1_trm_k20.bam" "wtIFN_2_trm_k20.bam" "wtIL4_1_trm_k20.bam"
#[10] "wtIL4_2_trm_k20.bam" "KO01_trm_k20.bam"    "KO02_trm_k20.bam"   
#[13] "KO21_trm_k20.bam"    "KO22_trm_k20.bam"    "KO61_trm_k20.bam"   
#[16] "KO62_trm_k20.bam"    "KOIFN_1_trm_k20.bam" "KOIFN_2_trm_k20.bam"
#[19] "KOIL4_1_trm_k20.bam" "KOIL4_2_trm_k20.bam"


col_ordering = c(9,10,7,8)#WT_M1vsM2
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("IFN","IFN","IL4","IL4"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-WT_M1vsM2.csv",quote=FALSE,row.names=FALSE)
rx_m1=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}

#[1] "wtIL4_1_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     469   20510 
#[1] "wtIL4_2_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     709   20270 
#[1] "wtIFN_1_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical    1037   19942 
#[1] "wtIFN_2_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     668   20311 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}

#[1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7948   13031 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    7906   13073 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8472   12507 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8487   12492 


col_ordering = c(19,20,17,18)#KO_M1vsM2
rnaseqMatrix =rnaseqMatrix0[,col_ordering];
rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),]
conditions = factor(c("IFN","IFN","IL4","IL4"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
rx=rx[,!(names(rx) %in% "ID")]
rownames(rx)=rx$Row.names;
rx=rx[,!(names(rx) %in% "Row.names")]
rx2=merge(x=rx,y=c,by="row.names",all.x=TRUE)
rx=rx2[,!(names(rx2) %in% "ID")]
rx=cbind(rx,1000*rx[,8]/rx$genelength,1000*rx[,9]/rx$genelength,1000*rx[,10]/rx$genelength,1000*rx[,11]/rx$genelength)
rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15]))
write.table(rx, file="edgeRgenesExpr-KO_M1vsM2.csv",quote=FALSE,row.names=FALSE)
rx_m2=rx
for (i in 8:11){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>0)
  print(tt)
}

#[1] "KOIL4_1_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     514   20577 
#[1] "KOIL4_2_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     518   20573 
#[1] "KOIFN_1_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     757   20334 
#[1] "KOIFN_2_trm_k20.bam"
#Mode   FALSE    TRUE 
#logical     922   20169 

for (i in 12:15){
  print (colnames(rx)[i])
  tt=summary(rx[,i]>1)
  print(tt)
}

#1] "1000 * rx[, 8]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8092   12999 
#[1] "1000 * rx[, 9]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8086   13005 
#[1] "1000 * rx[, 10]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8706   12385 
#[1] "1000 * rx[, 11]/rx$genelength"
#Mode   FALSE    TRUE 
#logical    8690   12401


#polysomeseq
poly1= read.table("/mnt/max/b/genomics_facility/DKlab/mr/tzanos/mm10-GRCm38.p6/polysomeseq/featurecounts/gene_counts_merged_mm10.txt",header=T,sep="\t",skip=1)
poly0 =poly1[,7:dim(poly1)[2]];
rownames(poly0)=poly1$Geneid;
colnames(poly0)
#[1] "KO_0h_replicate_IIb"        "KO_0h_replicate_Ib"         "KO_2h_replicate_IIb"        "KO_2h_replicate_Ib"        
#[5] "KO_6h_replicate_IIb"        "KO_6h_replicate_Ib"         "KO_IFN.gamma_replicate_IIb" "KO_IFN_replicate_Ib"       
#[9] "KO_IL4_replicate_IIb"       "KO_IL4_replicate_Ib"        "WT_0h_replicate_IIb"        "WT_0h_replicate_Ib"        
#[13] "WT_2h_replicate_IIb"        "WT_2h_replicate_Ib"         "WT_6h_replicate_IIb"        "WT_6h_replicate_Ib"        
#[17] "WT_IFN.gamma_replicate_IIb" "WT_IFN.gamma_replicate_Ib"  "WT_IL4_replicate_IIb"       "WT_IL4_replicate_Ib"    


col_ordering = c(19,20,17,18)#WT_M1vsM2
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("IFN","IFN","IL4","IL4"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-WT_M1vsM2.csv")
x_m1=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}

#[1] "WT_IL4_replicate_IIb"
#Mode   FALSE    TRUE 
#logical    1379   17851 
#[1] "WT_IL4_replicate_Ib"
#Mode   FALSE    TRUE 
#logical     514   18716 
#[1] "WT_IFN.gamma_replicate_IIb"
#Mode   FALSE    TRUE 
#logical     616   18614 
#[1] "WT_IFN.gamma_replicate_Ib"
#Mode   FALSE    TRUE 
#logical    1220   18010 

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}

#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3895   15335 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3604   15626 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2355   16875 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3687   15543 

summary(de <- decideTestsDGE(et, p=0.05))
#        IL4-IFN
#Down       113
#NotSig   18957
#Up         160

col_ordering = c(9,10,7,8)#KO_M1vsM2
poly =poly0[,col_ordering];
poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),]
conditions = factor(c("IFN","IFN","IL4","IL4"));
exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE)
x=x[,!(names(x) %in% "ID")]
rownames(x)=x$Row.names;
x=x[,!(names(x) %in% "Row.names")]
x2=merge(x=x,y=c,by="row.names",all.x=TRUE)
x=x2[,!(names(x2) %in% "ID")]
x=cbind(x,1000*x[,8]/x$genelength,1000*x[,9]/x$genelength,1000*x[,10]/x$genelength,1000*x[,11]/x$genelength)
x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15]))
write.table(x, file="edgeRgenesPolyExpr-KO_M1vsM2.csv")
x_m2=x
for (i in 8:11){
  print (colnames(x)[i])
  tt=summary(x[,i]>0)
  print(tt)
}

#[1] "KO_IL4_replicate_IIb"
#Mode   FALSE    TRUE 
#logical    1415   20451 
#[1] "KO_IL4_replicate_Ib"
#Mode   FALSE    TRUE 
#logical     800   21066 
#[1] "KO_IFN.gamma_replicate_IIb"
#Mode   FALSE    TRUE 
#logical    1012   20854 
#[1] "KO_IFN_replicate_Ib"
#Mode   FALSE    TRUE 
#logical     299   21567 

for (i in 12:15){
  print (colnames(x)[i])
  tt=summary(x[,i]>1)
  print(tt)
}

#[1] "1000 * x[, 8]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3464   18402 
#[1] "1000 * x[, 9]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3370   18496 
#[1] "1000 * x[, 10]/x$genelength"
#Mode   FALSE    TRUE 
#logical    3353   18513 
#[1] "1000 * x[, 11]/x$genelength"
#Mode   FALSE    TRUE 
#logical    2615   19251 

#write.table(x, file="edgeRgenesExpr-IL4.csv")
summary(de <- decideTestsDGE(et, p=0.05))
#       IL4-IFN
#Down       377
#NotSig   20981
#Up         508


# merge rnaseq and polysomeseq
require(psych)
all_m1=merge(rx_m1,x_m1,by="Row.names")
all_m2=merge(rx_m2,x_m2,by="Row.names")

write.csv(all_m1, file="all_WT.csv",row.names=F)
write.csv(all_m2, file="all_KO.csv",row.names=F)