t=read.table("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
> summary(log2(na.omit(t$fc_vs_PHR24r.IgG.bam)))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-8.2500 -2.0410 -0.7585 -0.7537  0.4825  8.1740 
> hist(log2((t$fc_vs_PHR24r.IgG.bam)))
> hist(log2((t$fc_vs_PHR19r.Input.bam)))
> summary(log2((t$fc_vs_PHR19r.Input.bam)))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  -7.43   -1.58   -0.31   -0.25    1.01    8.32   46157 
plot(ecdf(log2((t$fc_vs_PHR19r.Input.bam))),col="green")
plot(ecdf(log2((t$fc_vs_PHR24r.IgG.bam))),add=T,col="blue")
my.grid <- seq(0.05,0.95, 0.05) 
my.xgrid <- seq(0.5,4, 0.5) 
abline(h=my.grid,v=my.xgrid, col="grey", lty=2) 
#boxplot(a, col="red", add = TRUE) 

vsIgG:   log2fc>2.5: top5%
vsInput: log2fc>2.8: top5%

png("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG_blue_PHR19r-Input_green.png")
 plot(ecdf(log2((t$fc_vs_PHR19r.Input.bam))),col="green")
plot(ecdf(log2((t$fc_vs_PHR24r.IgG.bam))),add=T,col="blue")
my.grid <- seq(0.05,0.95, 0.05) 
my.xgrid <- seq(0.5,4, 0.5) 
abline(h=my.grid,v=my.xgrid ,col="grey", lty=2) 
dev.off()

t=read.table("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
s=na.omit(t[(log2(t$fc_vs_PHR24r.IgG.bam)>=1)&(t$PHR23r.FUBP2.bam>=10),])
dim(s)
#[1] 2531   19
selectDE=order(s$fc_vs_PHR24r.IgG.bam, decreasing=T)#[1:100]
write.table(s[selectDE,],"RPKM_PHR23r-FUBP2.bam_vs_top5pct-PHR24r-IgG.csv",row.names=FALSE,col.names=T,quote=FALSE,sep="\t")

t=read.table("RPKM_PHR20r-bcatPoly.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
s=na.omit(t[(log2(t$fc_vs_PHR24r.IgG.bam)>=1)&(t$PHR20r.bcatPoly.bam>=10),])
dim(s)
#[1] 3293   19
selectDE=order(s$fc_vs_PHR24r.IgG.bam, decreasing=T)#[1:100]
write.table(s[selectDE,],"RPKM_PHR20r-bcatPoly.bam_vs_top5pct-PHR24r-IgG.csv",row.names=FALSE,col.names=T,quote=FALSE,sep="\t")

t=read.table("RPKM_PHR21r-bcatMono.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
s=na.omit(t[(log2(t$fc_vs_PHR24r.IgG.bam)>=1)&(t$PHR21r.bcatMono.bam>=10),])
dim(s)
#[1] 3632   19
selectDE=order(s$fc_vs_PHR24r.IgG.bam, decreasing=T)#[1:100]
write.table(s[selectDE,],"RPKM_PHR21r-bcatMono.bam_vs_top5pct-PHR24r-IgG.csv",row.names=FALSE,col.names=T,quote=FALSE,sep="\t")

t=read.table("RPKM_PHR22r-TCF4.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
s=na.omit(t[(log2(t$fc_vs_PHR24r.IgG.bam)>=1)&(t$PHR22r.TCF4.bam>=10),])
dim(s)

 selectDE=order(s$fc_vs_PHR24r.IgG.bam, decreasing=T)#[1:100]
write.table(s[selectDE,],"RPKM_PHR22r-TCF4.bam_vs_top5pct-PHR24r-IgG.csv",row.names=FALSE,col.names=T,quote=FALSE,sep="\t")



#@ clustering
require(mclust)
 setwd("/data/images/proton2/run341/rpkm/")
 t=read.table("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T)
r1=t[(t$RPKM_PHR23r.FUBP2.bam>=1.0),]
 x=(log2((r1$fc_vs_PHR24r.IgG.bam)))
 mod4 = densityMclust(x)
summary(mod4)
plot(mod4, what = "BIC")

png("FUBP2_vs_IgG_fc_density.png")
 plot(mod4, what = "density", data = x, breaks = 15,xlab="log2 foldchange FUBP2_vs_IgG")
 dev.off()
png("FUBP2_vs_IgG_fc_density.png")
 plot(mod4, what = "density", data = x, breaks = 80,xlab="log2 foldchange IgG_vs_Input")
  dev.off()
  

 c=read.table("RPKM_PHR24r-IgG.bam_vs_PHR19r-Input.bam",header=T)
r1=c[(c$RPKM_PHR24r.IgG.bam>=1.0),]
x=(log2((r1$fc_vs_PHR19r.Input.bam)))
 mod4 = densityMclust(x)
summary(mod4)
plot(mod4, what = "BIC")

png("FUBP2_vs_IgG_fc_density.png")
 plot(mod4, what = "density", data = x, breaks = 15,xlab="log2 foldchange FUBP2_vs_IgG")
 dev.off()
