#only PCG #ENSMUSG00000079102 Dab2 1797 protein_coding #ENSMUSG00000022150 Dab2 2776.27 protein_coding setwd("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq-vs-polysomeseq/") require(edgeR) bt=read.table("/data/results/reference/mmu/Mus_musculus.NCBIM37.64-toMM9.transcript-gene_name.txt",sep="\t") rownames(bt)=bt$V2; %bt=bt[,(names(x) %in% c("V2","V4")] bt2=cbind(substring(bt[,2],2),as.character(bt[,4])) colnames(bt2)=c("EnsemblID","biotype") rownames(bt2)=bt2[,1] bt4=bt2[bt2[,2]=="protein_coding",] #bt4=bt2[bt2$biotype=="protein_coding",] bt3=bt2[!duplicated(rownames(bt2)),] rownames(bt3)=bt3[,1] gl=read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/deg/Mus_musculus.NCBIM37.64-toMM9.gene-lenghts.txt",sep="\t") gl.names=gl$V1; rownames(gl)=gl$V1 colnames(gl)=c("ID","geneID","genelength"); #poly1= read.table("gene_counts_merged_k20_NCBIM37.64.txt",header=T,sep="\t",skip=1) poly1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/Polysomes/deg/gene_counts_merged_NCBIM37.64_id.txt",header=T,sep="\t",skip=1) poly0 =poly1[,7:dim(poly1)[2]]; rownames(poly0)=poly1$Geneid; col_ordering = c(1,2,11,12)#0h poly =poly0[,col_ordering]; poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),] #poly2=merge(x=poly,y=bt3,by="row.names",all.x=TRUE) poly=poly[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #x=cbind(as.data.frame(tTags),r) #x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3]) x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(x)=x$Row.names x=x[,!(names(x) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=x,y=i,by="row.names",all.x=TRUE) x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength) x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15])) colnames(x)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_0h_replicate_I", "CPM_KO_0h_replicate_II", "CPM_WT_0h_replicate_I", "CPM_WT_0h_replicate_II", "RPKM_KO_0h_replicate_I", "RPKM_KO_0h_replicate_II", "RPKM_WT_0h_replicate_I", "RPKM_WT_0h_replicate_II", "RPKM_KO_0h", "RPKM_WT_0h") #x1=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE) write.table(x, file="edgeRgenesPolyExpr-0h_pcg.csv",quote=FALSE,row.names=FALSE,sep="\t") x_m0=x print ("P-m0") #-1 means WT > KO # 1 means KO > WT summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(x)[i]) tt=summary(x[,i]>0) print(tt) } for (i in 12:15){ print (colnames(x)[i]) tt=summary(x[,i]>1) print(tt) } col_ordering = c(3,4,13,14)#2h poly =poly0[,col_ordering]; poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),] poly=poly[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #x=cbind(as.data.frame(tTags),r) #x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3]) x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(x)=x$Row.names x=x[,!(names(x) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=x,y=i,by="row.names",all.x=TRUE) x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength) x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15])) colnames(x)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_2h_replicate_I", "CPM_KO_2h_replicate_II", "CPM_WT_2h_replicate_I", "CPM_WT_2h_replicate_II", "RPKM_KO_2h_replicate_I", "RPKM_KO_2h_replicate_II", "RPKM_WT_2h_replicate_I", "RPKM_WT_2h_replicate_II", "RPKM_KO_2h", "RPKM_WT_2h")#2h #x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE) write.table(x, file="edgeRgenesPolyExpr-2h_pcg.csv",quote=FALSE,row.names=FALSE) x_2h=x print ("P-2h") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(x)[i]) tt=summary(x[,i]>0) print(tt) } for (i in 12:15){ print (colnames(x)[i]) tt=summary(x[,i]>1) print(tt) } col_ordering = c(5,6,15,16)#6h poly =poly0[,col_ordering]; poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),] poly=poly[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #x=cbind(as.data.frame(tTags),r) #x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3]) x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(x)=x$Row.names x=x[,!(names(x) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=x,y=i,by="row.names",all.x=TRUE) x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength) x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15])) colnames(x)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_6h_replicate_I", "CPM_KO_6h_replicate_II", "CPM_WT_6h_replicate_I", "CPM_WT_6h_replicate_II", "RPKM_KO_6h_replicate_I", "RPKM_KO_6h_replicate_II", "RPKM_WT_6h_replicate_I", "RPKM_WT_6h_replicate_II", "RPKM_KO_6h", "RPKM_WT_6h")#6h #x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE) write.table(x, file="edgeRgenesPolyExpr-6h_pcg.csv",quote=FALSE,row.names=FALSE) x_6h=x print ("P-6h") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(x)[i]) tt=summary(x[,i]>0) print(tt) } for (i in 12:15){ print (colnames(x)[i]) tt=summary(x[,i]>1) print(tt) } col_ordering = c(7,8,17,18)#IFN poly =poly0[,col_ordering]; poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),] poly=poly[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #x=cbind(as.data.frame(tTags),r) #x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3]) x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(x)=x$Row.names x=x[,!(names(x) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=x,y=i,by="row.names",all.x=TRUE) x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength) x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15])) colnames(x)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_m1_replicate_I", "CPM_KO_m1_replicate_II", "CPM_WT_m1_replicate_I", "CPM_WT_m1_replicate_II", "RPKM_KO_m1_replicate_I", "RPKM_KO_m1_replicate_II", "RPKM_WT_m1_replicate_I", "RPKM_WT_m1_replicate_II", "RPKM_KO_m1", "RPKM_WT_m1")#m1 #x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE) write.table(x, file="edgeRgenesPolyExpr-IFN_pcg.csv") print ("P-m1") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) x_m1=x for (i in 8:11){ print (colnames(x)[i]) tt=summary(x[,i]>0) print(tt) } for (i in 12:15){ print (colnames(x)[i]) tt=summary(x[,i]>1) print(tt) } summary(de <- decideTestsDGE(et, p=0.05)) #-1 24 #0 19748 #1 34 col_ordering = c(9,10,19,20)#IL4 poly =poly0[,col_ordering]; poly = poly[(rowSums(poly[,1:2])>=2)&(rowSums(poly[,3:4])>=2),] poly=poly[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=poly, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #x=cbind(as.data.frame(tTags),r) #x=cbind(as.data.frame(tTags), gl[r,2],gl[r,3]) x=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(x)=x$Row.names x=x[,!(names(x) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=x,y=i,by="row.names",all.x=TRUE) x=cbind(x1,1000*x1[,8]/x$genelength,1000*x1[,9]/x$genelength,1000*x1[,10]/x$genelength,1000*x1[,11]/x$genelength) x=cbind(x,0.5*(x[,12]+x[,13]),0.5*(x[,14]+x[,15])) colnames(x)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_m2_replicate_I", "CPM_KO_m2_replicate_II", "CPM_WT_m2_replicate_I", "CPM_WT_m2_replicate_II", "RPKM_KO_m2_replicate_I", "RPKM_KO_m2_replicate_II", "RPKM_WT_m2_replicate_I", "RPKM_WT_m2_replicate_II", "RPKM_KO_m2", "RPKM_WT_m2")#m2 #x=merge(x=x,y=bt2,by="EnsemblID",all.x=TRUE) #x=x[!duplicated(x),] write.table(x, file="edgeRgenesPolyExpr-IL4_pcg.csv") x_m2=x print ("P-m2") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) #dim(x_m2) for (i in 8:11){ print (colnames(x)[i]) tt=summary(x[,i]>0) print(tt) } for (i in 12:15){ print (colnames(x)[i]) tt=summary(x[,i]>1) print(tt) } #write.table(x, file="edgeRgenesExpr-IL4_pcg.csv") summary(de <- decideTestsDGE(et, p=0.05)) #-1 215 #0 18093 #1 106 #summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) #rnaseq data rnaseqMatrix1= read.table("~/bak/doc/fleming/kafasla/DKlab/mr/rnaseq/stringtie2/gene_counts_merged_k20_NCBIM37.64_id.txt",header=T,sep="\t",skip=1) rnaseqMatrix0 =rnaseqMatrix1[,7:dim(rnaseqMatrix1)[2]]; rownames(rnaseqMatrix0)=rnaseqMatrix1$Geneid; col_ordering = c(11,12,1,2)#0h rnaseqMatrix =rnaseqMatrix0[,col_ordering]; rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),] #rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,] rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) #exp2=vst(exp_study) tTags = topTags(et,n=NULL,adjust.method = "fdr") #r <- rownames(topTags(et)$table) r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] #rx=cbind(as.data.frame(tTags), gl[rownames(tTags),2],gl[rownames(tTags),3],c,1000*c/gl[rownames(tTags),3]) rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(rx)=rx$Row.names rx=rx[,!(names(rx) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=rx,y=i,by="row.names",all.x=TRUE) rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength) rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15])) colnames(rx)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_0h_replicate_I", "CPM_KO_0h_replicate_II", "CPM_WT_0h_replicate_I", "CPM_WT_0h_replicate_II", "RPKM_KO_0h_replicate_I", "RPKM_KO_0h_replicate_II", "RPKM_WT_0h_replicate_I", "RPKM_WT_0h_replicate_II", "RPKM_KO_0h", "RPKM_WT_0h") #rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE) write.table(rx, file="edgeRgenesExpr-0h_pcg.csv",quote=FALSE,row.names=FALSE) rx_m0=rx print ("R-m0") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(rx)[i]) tt=summary(rx[,i]>0) print(tt) } for (i in 12:15){ print (colnames(rx)[i]) tt=summary(rx[,i]>1) print(tt) } col_ordering = c(13,14,3,4)#2h rnaseqMatrix =rnaseqMatrix0[,col_ordering]; rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),] #rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,] rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(rx)=rx$Row.names rx=rx[,!(names(rx) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=rx,y=i,by="row.names",all.x=TRUE) rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength) rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15])) colnames(rx)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_2h_replicate_I", "CPM_KO_2h_replicate_II", "CPM_WT_2h_replicate_I", "CPM_WT_2h_replicate_II", "RPKM_KO_2h_replicate_I", "RPKM_KO_2h_replicate_II", "RPKM_WT_2h_replicate_I", "RPKM_WT_2h_replicate_II", "RPKM_KO_2h", "RPKM_WT_2h")#2h #rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE) write.table(rx, file="edgeRgenesExpr-2h_pcg.csv",quote=FALSE,row.names=FALSE) rx_2h=rx print ("R-2h") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(rx)[i]) tt=summary(rx[,i]>0) print(tt) } for (i in 12:15){ print (colnames(rx)[i]) tt=summary(rx[,i]>1) print(tt) } col_ordering = c(15,16,5,6)#6h rnaseqMatrix =rnaseqMatrix0[,col_ordering]; rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),] #rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,] rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(rx)=rx$Row.names rx=rx[,!(names(rx) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=rx,y=i,by="row.names",all.x=TRUE) rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength) rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15])) colnames(rx)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_6h_replicate_I", "CPM_KO_6h_replicate_II", "CPM_WT_6h_replicate_I", "CPM_WT_6h_replicate_II", "RPKM_KO_6h_replicate_I", "RPKM_KO_6h_replicate_II", "RPKM_WT_6h_replicate_I", "RPKM_WT_6h_replicate_II", "RPKM_KO_6h", "RPKM_WT_6h")#6h #rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE) write.table(rx, file="edgeRgenesExpr-6h_pcg.csv",quote=FALSE,row.names=FALSE) rx_6h=rx print ("R-6h") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(rx)[i]) tt=summary(rx[,i]>0) print(tt) } for (i in 12:15){ print (colnames(rx)[i]) tt=summary(rx[,i]>1) print(tt) } col_ordering = c(17,18,7,8)#IFN rnaseqMatrix =rnaseqMatrix0[,col_ordering]; rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),] #rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,] rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(rx)=rx$Row.names rx=rx[,!(names(rx) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=rx,y=i,by="row.names",all.x=TRUE) rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength) rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15])) colnames(rx)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_m1_replicate_I", "CPM_KO_m1_replicate_II", "CPM_WT_m1_replicate_I", "CPM_WT_m1_replicate_II", "RPKM_KO_m1_replicate_I", "RPKM_KO_m1_replicate_II", "RPKM_WT_m1_replicate_I", "RPKM_WT_m1_replicate_II", "RPKM_KO_m1", "RPKM_WT_m1")#m1 #rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE) write.table(rx, file="edgeRgenesExpr-IFN_pcg.csv",quote=FALSE,row.names=FALSE) rx_m1=rx print ("R-m1") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) for (i in 8:11){ print (colnames(rx)[i]) tt=summary(rx[,i]>0) print(tt) } for (i in 12:15){ print (colnames(rx)[i]) tt=summary(rx[,i]>1) print(tt) } col_ordering = c(19,20,9,10)#IL4 rnaseqMatrix =rnaseqMatrix0[,col_ordering]; rnaseqMatrix = rnaseqMatrix[(rowSums(rnaseqMatrix[,1:2])>=2)&(rowSums(rnaseqMatrix[,3:4])>=2),] #rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,] rnaseqMatrix=rnaseqMatrix[rownames(poly) %in% bt4[,1],] conditions = factor(c("KO","KO","WT","WT")); exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO")) tTags = topTags(et,n=NULL,adjust.method = "fdr") r <- rownames(tTags) c=cpm(exp_study)[r, order(exp_study$samples$group)] rx=merge(x=as.data.frame(tTags),y=gl,by="row.names",all.x=TRUE) rownames(rx)=rx$Row.names rx=rx[,!(names(rx) %in% c("Row.names","ID"))] i=as.data.frame(c) x1=merge(x=rx,y=i,by="row.names",all.x=TRUE) rx=cbind(x1,1000*x1[,8]/rx$genelength,1000*x1[,9]/rx$genelength,1000*x1[,10]/rx$genelength,1000*x1[,11]/rx$genelength) rx=cbind(rx,0.5*(rx[,12]+rx[,13]),0.5*(rx[,14]+rx[,15])) colnames(rx)=c("EnsemblID", "logFC", "logCPM", "PValue", "FDR", "geneID", "genelength", "CPM_KO_m2_replicate_I", "CPM_KO_m2_replicate_II", "CPM_WT_m2_replicate_I", "CPM_WT_m2_replicate_II", "RPKM_KO_m2_replicate_I", "RPKM_KO_m2_replicate_II", "RPKM_WT_m2_replicate_I", "RPKM_WT_m2_replicate_II", "RPKM_KO_m2", "RPKM_WT_m2")#m2 #rx=merge(x=rx,y=bt2,by="EnsemblID",all.x=TRUE) write.table(rx, file="edgeRgenesExpr-IL4_pcg.csv",quote=FALSE,row.names=FALSE) rx_m2=rx print ("R-m2") summary(de <- decideTestsDGE(et, p=0.05,adjust.method="none")) summary(de <- decideTestsDGE(et, p=0.05,adjust.method="BH")) summary(de <- decideTestsDGE(et, p=0.1,adjust.method="BH")) all_m0=merge(x=rx_m0,y=x_m0,by="EnsemblID",all.x=T,all.y=T) write.table(all_m0, file="rna_poly_m0_pcg.csv",quote=FALSE,row.names=FALSE) all_m1=merge(x=rx_m1,y=x_m1,by="EnsemblID",all.x=T,all.y=T) write.table(all_m1, file="rna_poly_m1_pcg.csv",quote=FALSE,row.names=FALSE) all_m2=merge(x=rx_m2,y=x_m2,by="EnsemblID",all.x=T,all.y=T) write.table(all_m2, file="rna_poly_m2_pcg.csv",quote=FALSE,row.names=FALSE) all_2h=merge(x=rx_2h,y=x_2h,by="EnsemblID",all.x=T,all.y=T) write.table(all_2h, file="rna_poly_2h_pcg.csv",quote=FALSE,row.names=FALSE) all_6h=merge(x=rx_6h,y=x_6h,by="EnsemblID",all.x=T,all.y=T) write.table(all_6h, file="rna_poly_6h_pcg.csv",quote=FALSE,row.names=FALSE) #write DEG tables deg=all_m0[(all_m0$PValue.x<=0.05)&(!is.na(all_m0$PValue.x)),] write.table(deg, file="rna_poly_m0_rna_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m0[(all_m0$PValue.y<=0.05)&(!is.na(all_m0$PValue.y)),] write.table(deg, file="rna_poly_m0_poly_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),] write.table(deg, file="rna_poly_m0_rna_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m0[(all_m0$FDR.y<=0.05)&(!is.na(all_m0$FDR.y)),] write.table(deg, file="rna_poly_m0_poly_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m0[(all_m0$FDR.x<=0.1)&(!is.na(all_m0$FDR.x)),] write.table(deg, file="rna_poly_m0_rna_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m0[(all_m0$FDR.y<=0.1)&(!is.na(all_m0$FDR.y)),] write.table(deg, file="rna_poly_m0_poly_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$PValue.x<=0.05)&(!is.na(all_m1$PValue.x)),] write.table(deg, file="rna_poly_m1_rna_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$PValue.y<=0.05)&(!is.na(all_m1$PValue.y)),] write.table(deg, file="rna_poly_m1_poly_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$FDR.x<=0.05)&(!is.na(all_m1$FDR.x)),] write.table(deg, file="rna_poly_m1_rna_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$FDR.y<=0.05)&(!is.na(all_m1$FDR.y)),] write.table(deg, file="rna_poly_m1_poly_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$FDR.x<=0.1)&(!is.na(all_m1$FDR.x)),] write.table(deg, file="rna_poly_m1_rna_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m1[(all_m1$FDR.y<=0.1)&(!is.na(all_m1$FDR.y)),] write.table(deg, file="rna_poly_m1_poly_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$PValue.x<=0.05)&(!is.na(all_m2$PValue.x)),] write.table(deg, file="rna_poly_m2_rna_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$PValue.y<=0.05)&(!is.na(all_m2$PValue.y)),] write.table(deg, file="rna_poly_m2_poly_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),] write.table(deg, file="rna_poly_m2_rna_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$FDR.y<=0.05)&(!is.na(all_m2$FDR.y)),] write.table(deg, file="rna_poly_m2_poly_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$FDR.x<=0.1)&(!is.na(all_m2$FDR.x)),] write.table(deg, file="rna_poly_m2_rna_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_m2[(all_m2$FDR.y<=0.1)&(!is.na(all_m2$FDR.y)),] write.table(deg, file="rna_poly_m2_poly_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$PValue.x<=0.05)&(!is.na(all_2h$PValue.x)),] write.table(deg, file="rna_poly_2h_rna_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$PValue.y<=0.05)&(!is.na(all_2h$PValue.y)),] write.table(deg, file="rna_poly_2h_poly_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$FDR.x<=0.05)&(!is.na(all_2h$FDR.x)),] write.table(deg, file="rna_poly_2h_rna_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$FDR.y<=0.05)&(!is.na(all_2h$FDR.y)),] write.table(deg, file="rna_poly_2h_poly_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$FDR.x<=0.1)&(!is.na(all_2h$FDR.x)),] write.table(deg, file="rna_poly_2h_rna_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_2h[(all_2h$FDR.y<=0.1)&(!is.na(all_2h$FDR.y)),] write.table(deg, file="rna_poly_2h_poly_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$PValue.x<=0.05)&(!is.na(all_6h$PValue.x)),] write.table(deg, file="rna_poly_6h_rna_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$PValue.y<=0.05)&(!is.na(all_6h$PValue.y)),] write.table(deg, file="rna_poly_6h_poly_deg_pval_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$FDR.x<=0.05)&(!is.na(all_6h$FDR.x)),] write.table(deg, file="rna_poly_6h_rna_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$FDR.y<=0.05)&(!is.na(all_6h$FDR.y)),] write.table(deg, file="rna_poly_6h_poly_deg_fdr_0.05_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$FDR.x<=0.1)&(!is.na(all_6h$FDR.x)),] write.table(deg, file="rna_poly_6h_rna_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) deg=all_6h[(all_6h$FDR.y<=0.1)&(!is.na(all_6h$FDR.y)),] write.table(deg, file="rna_poly_6h_poly_deg_fdr_0.1_pcg.csv",quote=FALSE,row.names=FALSE) # t-tests th=2 lpWT_m2=all_m2[ abs(all_m2$logFC.x) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.443 -0.504 -0.019 0.045 0.535 4.545 8977 > "m2" "2" "0.0494822664366724" mean of x "1.17288624805893e-09" "0.0452957984838306" > > > > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.774 -0.555 0.096 0.002 0.617 4.256 8587 > "m0" "2" "3.19818151415381e-08" mean of x "0.810309934175411" "0.0019136835382783" > > > > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -6.770 -0.448 -0.035 -0.027 0.382 3.916 9357 > "m1" "2" "8.79425760596691e-07" mean of x "3.09697740165175e-05" "-0.0273096781715256" > > > > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.598 -0.452 0.091 0.072 0.623 7.008 8733 > > > "2h" "2" "6.23729881313274e-27" mean of x "1.38760985581717e-20" "0.0723421917547784" > > > > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.640 -0.496 0.040 0.038 0.568 3.940 8787 > "6h" "2" "1.18482827467189e-07" mean of x "1.1678226675821e-07" "0.0381097926498994" th=1 lpWT_m2=all_m2[ abs(all_m2$logFC.x) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.443 -0.509 -0.029 0.031 0.509 4.545 8431 > > 33.33333% 66.66667% -0.3348161 0.2960241 > > > > > > "m0" "1" "7.509499334158e-26" mean of x "4.82640811533809e-07" "0.0412198490545182" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.774 -0.490 0.141 0.041 0.642 4.256 7767 > > 33.33333% 66.66667% -0.2444547 0.4717079 > > > > > > "m1" "1" "2.84266304567023e-07" mean of x "1.48084055374561e-05" "-0.0286993232161985" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -6.770 -0.447 -0.036 -0.029 0.377 3.916 8904 > > 33.33333% 66.66667% -0.2853840 0.2222257 > > > > > > "2h" "1" "1.69358944273302e-34" mean of x "2.19144725032914e-26" "0.0852285137378491" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.598 -0.427 0.106 0.085 0.629 7.008 8081 > > 33.33333% 66.66667% -0.2601956 0.4386564 > > > > > > "6h" "1" "3.29785605186336e-06" mean of x "2.03969438430146e-06" "0.0347666362617721" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.640 -0.496 0.034 0.035 0.562 3.940 8215 > > 33.33333% 66.66667% -0.3053082 0.3658209 # th=2 "m2" "2" "0.0494822664366724" mean of x "1.17288624805893e-09" "0.0452957984838306" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.443 -0.504 -0.019 0.045 0.535 4.545 8977 > > 33.33333% 66.66667% -0.3256543 0.3159896 > > > > > > "m0" "2" "3.19818151415381e-08" mean of x "0.810309934175411" "0.0019136835382783" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.774 -0.555 0.096 0.002 0.617 4.256 8587 > > 33.33333% 66.66667% -0.2912019 0.4424496 > > > > > > "m1" "2" "8.79425760596691e-07" mean of x "3.09697740165175e-05" "-0.0273096781715256" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -6.770 -0.448 -0.035 -0.027 0.382 3.916 9357 > > 33.33333% 66.66667% -0.2856956 0.2253795 > > > > > > "2h" "2" "6.23729881313274e-27" mean of x "1.38760985581717e-20" "0.0723421917547784" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.598 -0.452 0.091 0.072 0.623 7.008 8733 > > 33.33333% 66.66667% -0.2835306 0.4265741 > > > > > > "6h" "2" "1.18482827467189e-07" mean of x "1.1678226675821e-07" "0.0381097926498994" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.640 -0.496 0.040 0.038 0.568 3.940 8787 > > 33.33333% 66.66667% -0.3000163 0.3705821 #th=3 "m2" "3" "0.031224031575992" mean of x "3.10055276239341e-10" "0.0468285628683791" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.443 -0.503 -0.018 0.047 0.536 4.545 9033 > > 33.33333% 66.66667% -0.3250144 0.3188448 > > > > > > "m0" "3" "9.6085567615028e-06" mean of x "0.386987446505878" "-0.00687208460571757" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.774 -0.566 0.088 -0.007 0.613 4.256 8706 > > 33.33333% 66.66667% -0.3042622 0.4361162 > > > > > > "m1" "3" "1.62965905062589e-06" mean of x "5.01368089871722e-05" "-0.0265688458369084" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -6.770 -0.448 -0.034 -0.027 0.384 3.916 9396 > > 33.33333% 66.66667% -0.2852705 0.2259270 > > > > > > "2h" "3" "3.65868329000971e-26" mean of x "5.50459016130191e-20" "0.0707860096243342" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.598 -0.455 0.090 0.071 0.623 7.008 8829 > > 33.33333% 66.66667% -0.2854987 0.4247847 > > > > > > "6h" "3" "1.3825302516371e-07" mean of x "1.20901415891605e-07" "0.0379636026611981" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.640 -0.497 0.040 0.038 0.568 3.940 8853 > > 33.33333% 66.66667% -0.3009515 0.3704489 # "m2" "9999999999" "0.0293507640900841" mean of x "2.65960199510844e-10" "0.0469971782428288" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.443 -0.502 -0.018 0.047 0.536 4.545 9042 > > 33.33333% 66.66667% -0.3250011 0.3190915 > > > > > > "m0" "9999999999" "2.83098838378808e-05" mean of x "0.280945917931849" "-0.00854888605770309" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.774 -0.568 0.086 -0.009 0.612 4.256 8766 > > 33.33333% 66.66667% -0.3079590 0.4349307 > > > > > > "m1" "9999999999" "1.987077242163e-06" mean of x "5.83334975016764e-05" "-0.0263353460365235" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -6.770 -0.448 -0.033 -0.026 0.384 3.916 9402 > > 33.33333% 66.66667% -0.2851090 0.2259498 > > > > > > "2h" "9999999999" "4.20407148848072e-26" mean of x "5.23654407347374e-20" "0.070794213112866" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -4.598 -0.455 0.090 0.071 0.623 7.008 8863 > > 33.33333% 66.66667% -0.2857200 0.4247101 > > > > > > "6h" "9999999999" "1.16526527878564e-07" mean of x "1.02443985236801e-07" "0.0381501042039266" > Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -3.640 -0.497 0.040 0.038 0.568 3.940 8901 > > 33.33333% 66.66667% -0.3008691 0.3706407 l1={};l2={};l3={} x=seq(0.01,1,0.01); for (th in x){ lpWT_m2=all_m2[ abs(all_m2$logFC.x)-6)&(all_m1$logFC.x<6)&(all_m1$logFC.y>-6)&(all_m1$logFC.y<6),] cor.test(lim_m1$logFC.y,lim_m1$logFC.x) t = -0.5662, df = 14159, p-value = 0.5713 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.02122711 0.01171323 sample estimates: cor -0.004758232 lim_m1=all_m1[ (all_m1$logFC.x>-4)&(all_m1$logFC.x<4)&(all_m1$logFC.y>-4)&(all_m1$logFC.y<4),] cor.test(lim_m1$logFC.y,lim_m1$logFC.x) t = -0.6534, df = 14150, p-value = 0.5135 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.02196657 0.01098400 sample estimates: cor -0.005492776 lim_m1=all_m1[ (all_m1$logFC.x>-2)&(all_m1$logFC.x<2)&(all_m1$logFC.y>-2)&(all_m1$logFC.y<2),] cor.test(lim_m1$logFC.y,lim_m1$logFC.x) t = -1.9938, df = 13877, p-value = 0.0462 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.0335499580 -0.0002854185 sample estimates: cor -0.01692237 #@a very slight but significant ANTIcorrelation for m1 at abs(logFC) <2 lim_m2=all_m2[ (all_m2$logFC.x>-6)&(all_m2$logFC.x<6)&(all_m2$logFC.y>-6)&(all_m2$logFC.y<6),] cor.test(lim_m2$logFC.y,lim_m2$logFC.x) t = -0.5834, df = 14380, p-value = 0.5596 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.02120682 0.01147941 sample estimates: cor -0.004865003 lim_m2=all_m2[ (all_m2$logFC.x>-4)&(all_m2$logFC.x<4)&(all_m2$logFC.y>-4)&(all_m2$logFC.y<4),] cor.test(lim_m2$logFC.y,lim_m2$logFC.x) t = -0.70469, df = 14372, p-value = 0.481 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.02222392 0.01047104 sample estimates: cor -0.005878011 lim_m2=all_m2[ (all_m2$logFC.x>-2)&(all_m2$logFC.x<2)&(all_m2$logFC.y>-2)&(all_m2$logFC.y<2),] cor.test(lim_m2$logFC.y,lim_m2$logFC.x) t = -0.055904, df = 13870, p-value = 0.9554 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.01711578 0.01616667 sample estimates: cor -0.0004746831 lim_m2=all_m2[ (all_m2$logFC.x>-1)&(all_m2$logFC.x<1)&(all_m2$logFC.y>-1)&(all_m2$logFC.y<1),] cor.test(lim_m2$logFC.y,lim_m2$logFC.x) t = -2.3985, df = 10747, p-value = 0.01648 alternative hypothesis: true correlation is not equal to 0 95 percent confidence interval: -0.04201649 -0.00422704 sample estimates: cor -0.02313003 #@a very slight but significant ANTIcorrelation for m1 at abs(logFC) <1 #low Poly expression, test RNA lpWT_m2=all_m2[ (abs(log2(1+all_m2$RPKM_WT_m2.x))<1),] lpKO_m2=all_m2[ (abs(log2(1+all_m2$RPKM_KO_m2.x))<1),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F) #low Poly expression, test RNA lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<2),] lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<2),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F) #low Poly expression, test RNA lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<5),] lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<5),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F) lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<1)&((log2(1+all_m2$RPKM_WT_m2.x))>0.5),] lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<1)&((log2(1+all_m2$RPKM_WT_m2.x))>0.5),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F) lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))<4)&((log2(1+all_m2$RPKM_WT_m2.y))<4),] lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))<4)&((log2(1+all_m2$RPKM_WT_m2.y))<4),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F,range=1) lpWT_m2=all_m2[ ((log2(1+all_m2$RPKM_WT_m2.x))>4)|((log2(1+all_m2$RPKM_WT_m2.y))>4),] lpKO_m2=all_m2[ ((log2(1+all_m2$RPKM_KO_m2.x))>4)|((log2(1+all_m2$RPKM_WT_m2.y))>4),] lpWT_m2_rna=log2(1+lpWT_m2$RPKM_WT_m2.y) lpKO_m2_rna=log2(1+lpKO_m2$RPKM_KO_m2.y) summary(lpWT_m2_rna) summary(lpKO_m2_rna) wilcox.test(lpWT_m2_rna,lpKO_m2_rna) boxplot(lpWT_m2_rna,lpKO_m2_rna,names=c("WT_m2","KO_m2"),ylab="log2(RPKM_rna)",outline=F,range=1) #Translational efficiency ( see: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5388891/ Fig. 3) #paired RNA-seq and Ribo-seq datasets from different human cell lines were examined for relationship between mRNA expression level and TE (calculated as the (log2) ratio between densities of ribosome footprint and RNA-seq reads). Lower panels: comparisons between the 10% of genes with lowest and highest expression levels are presented; p values calculated using Wilcoxon’s test. all_m2$TranslEff_WT=log2((1+all_m2$RPKM_WT_m2.x)/(1+all_m2$RPKM_WT_m2.y)) plot(all_m2$TE_WT,log2(1+all_m2$RPKM_WT_m2.y),pch=".") cor.test(log2(1+all_m2$RPKM_WT_m2.y),all_m2$TE_WT) all_m2$TranslEff_KO=log2(1+all_m2$RPKM_KO_m2.x)-log2(1+all_m2$RPKM_KO_m2.y) plot(all_m2$TE_KO,log2(1+all_m2$RPKM_KO_m2.y),pch=".") cor.test(log2(1+all_m2$RPKM_KO_m2.y),all_m2$TE_KO) #@ volcano plots deg=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),] deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),] a=all_m2; plot(a$logFC.x,-log10(a$PValue.x)) # FC histogram deg=all_m2[(all_m2$FDR.x<=0.05)&(!is.na(all_m2$FDR.x)),] hdata=hist(deg$logFC.x,breaks=30,plot=F) hdata$counts[hdata$counts>0] <- log(hdata$counts[hdata$counts>0], 2) plot(hdata) deg2=all_m0[(all_m0$FDR.x<=0.05)&(!is.na(all_m0$FDR.x)),] hdata=hist(deg2$logFC.x,breaks=30,plot=F) hdata$counts[hdata$counts>0] <- log(hdata$counts[hdata$counts>0], 2) plot(hdata)