
modscore_analyis=function(ms2,id){
m=ms2
m[m>1]=1
#o=0.0005
o=0.001
st=0.02
#st=0.01
l8=NULL;
lp8=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l8=c(l8,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp8=c(lp8,te$p.value);
}

topuniform=max(which(lp8>0.05)); #highest modscore that is not uniformly distributed
print (paste("modscore_cutoff for",id,l8[topuniform],lp8[topuniform],sep=" "));
x=cbind(l8,lp8)
colnames(x)=c("modscore","pvalue_AD_uniform_test");
write.table(x,paste("pvalue_AD_uniform_test_",id,".txt",sep=""),row.names=F)
png(paste("modscore-hist-",id,".png",sep=""))
hist(m,breaks=70,main=id,xlab="modscore")
dev.off()

png(paste("pvalue_AD_uniform_test_",id,".png",sep=""))
plot(l8,-log10(lp8),col="red",pch=20,cex=1,main=id,xlab="modscore",ylab="-log10(pvalue_AD_uniform_test)",xaxt="n")
axis(1,at=seq(0.5,1,by=0.05))
abline(h=-log10(0.05))
text(0.52,-log10(0.05)+0.14,"-log10(0.05)",cex=0.6)
dev.off()
}

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv",sep="\t")
ms=unlist(strsplit(as.character(t[,7]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,7]),"_"))[seq(8,length(ms),by=10)])
summary((ms))
#    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#  0.5000   0.8259   0.9443   0.9060   0.9979 116.8000
modscore_analyis(ms,"IFN")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-IL4.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.csv",sep="\t")
ms=unlist(strsplit(as.character(t[,7]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,7]),"_"))[seq(8,length(ms),by=10)])
summary((ms))
modscore_analyis(ms,"IL4")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"0hrep1")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"0hrep2")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-0hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"0hrep3")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"2hrep1")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"2hrep2")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-2hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"2hrep3")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep1.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"6hrep1")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep2.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"6hrep2")

t=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/sh-clusters-6hrep3.txt2.csv3.bed.gt5tc.gt0.25TtoC2.plus.noIGG",sep="\t")
ms=unlist(strsplit(as.character(t[,4]),"_"))
ms=as.numeric(unlist(strsplit(as.character(t[,4]),"_"))[seq(8,length(ms),by=11)])
summary((ms))
modscore_analyis(ms,"6hrep3")





t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])
modscore_analyis(ms2,"0h")

t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-2h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])
modscore_analyis(ms2,"2h")

t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-0h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])
modscore_analyis(ms2,"0h")


m=ms
m[m>1]=1
png("IFN-modscore-hist.png")
hist(m,breaks=70,main="IFN",xlab="modscore")
dev.off()
st=0.025
l=NULL;
lp=NULL;
for (x in seq(0.5,1-st,by=st)){
y=m[(m>x)&(m<=x+st)];
l=c(l,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp=c(lp,te$p.value);
}
plot(l,(lp))

st=0.05
l2=NULL;
lp2=NULL;
ll2=NULL;
for (x in seq(0.5,1-st,by=st)){
y=m[(m>x)&(m<=x+st)];
l2=c(l2,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp2=c(lp2,te$p.value);
ll2=c(ll2,length(y));
}
points(l2,(lp2),add=T,col="green")

st=0.01
l3=NULL;
lp3=NULL;
ll3=NULL;
for (x in seq(0.5,1-st,by=st)){
y=m[(m>x)&(m<=x+st)];
l3=c(l3,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp3=c(lp3,te$p.value);
ll3=c(ll3,length(y));
}
points(l3,lp3,,col="blue")


o=0.01
st=0.025
l2=NULL;
lp2=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l2=c(l2,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp2=c(lp2,te$p.value);
}
points(l2,(lp2),col="black")

o=0.005
st=0.025
l3=NULL;
lp3=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l3=c(l3,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp3=c(lp3,te$p.value);
}
points(l3,lp3,col="blue")
o=0.0025
st=0.025
l4=NULL;
lp4=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l4=c(l4,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp4=c(lp4,te$p.value);
}
points(l4,lp4,col="green")

o=0.001
st=0.025
l5=NULL;
lp5=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l5=c(l5,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp5=c(lp5,te$p.value);
}
points(l5,lp5,col="red")

o=0.0001
st=0.025
l6=NULL;
lp6=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l6=c(l6,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp6=c(lp6,te$p.value);
}
points(l6,lp6,col="blue",pch=".")

o=0.0001
st=0.02
l7=NULL;
lp7=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l7=c(l7,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp7=c(lp7,te$p.value);
}
points(l7,lp7,col="green",pch=".")

o=0.0005
st=0.02
l8=NULL;
lp8=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l8=c(l8,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp8=c(lp8,te$p.value);
}

png("pvalue_AD_uniform_test_IFN.png")
plot(l8,-log10(lp8),col="red",pch=20,cex=1,main="IFN",xlab="modscore",ylab="-log10(pvalue_AD_uniform_test)",xaxt="n")
axis(1,at=seq(0.5,1,by=0.05))
abline(h=-log10(0.05))
text(0.52,-log10(0.05)+0.14,"-log10(0.05)",cex=0.6)
dev.off()

t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])

m=ms2
m[m>1]=1
o=0.0005
st=0.02
l8=NULL;
lp8=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l8=c(l8,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp8=c(lp8,te$p.value);
}
png("modscore-hist-6h.png")
hist(m,breaks=70,main="6h",xlab="modscore")
dev.off()

png("pvalue_AD_uniform_test_6h.png")
plot(l8,-log10(lp8),col="red",pch=20,cex=1,main="6h",xlab="modscore",ylab="-log10(pvalue_AD_uniform_test)",xaxt="n")
axis(1,at=seq(0.5,1,by=0.05))
abline(h=-log10(0.05))
text(0.52,-log10(0.05)+0.14,"-log10(0.05)",cex=0.6)
dev.off()


t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])

m=ms2
m[m>1]=1
o=0.0005
st=0.02
l8=NULL;
lp8=NULL;
for (x in seq(0.5,1-st,by=o)){
y=m[(m>x)&(m<=x+st)];
l8=c(l8,x+0.5*st)
#print (c(x,x+st))
te=ad.test(y,"punif",x,x+st);
lp8=c(lp8,te$p.value);
}

png("pvalue_AD_uniform_test_6h.png")
plot(l8,-log10(lp8),col="red",pch=20,cex=1,main="6h",xlab="modscore",ylab="-log10(pvalue_AD_uniform_test)",xaxt="n")
axis(1,at=seq(0.5,1,by=0.05))
abline(h=-log10(0.05))
text(0.52,-log10(0.05)+0.14,"-log10(0.05)",cex=0.6)
dev.off()




ce=as.numeric(unlist(strsplit(as.character(t[,7]),"_"))[seq(6,length(ms2),by=10)])
summary ((ce))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   6.00    8.00   10.00   22.11   18.00  909.00 
quants=quantile(ms,seq(0,1,by=0.25))
t$qual= as.numeric(cut(ms,              quants,             include.lowest=TRUE))
write.table(t,"/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-IFN.txt2.csv2.bed.gt5tc.gt0.25TtoC2.plus.noIGG.anno.qual.csv",sep="\t")

t2=read.table("/data/images/proton/DKlab/mr/parclip/paralyzer/PARalyzer_v1_1b/res/recommended_settings2/sh-clusters-6h.gt0.25TtoC2.plus.noIGG.intersection-all.bed2.gt5tc.details.merged.maxAvg.bed.anno.csv",sep="\t")
ms2=unlist(strsplit(as.character(t2[,7]),"_"))
ms2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(8,length(ms2),by=11)])
summary(ms2)
    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.5000  0.8561  0.9601  0.9184  0.9986  1.9980 
ce2=as.numeric(unlist(strsplit(as.character(t2[,7]),"_"))[seq(6,length(ms2),by=11)])
summary ((ce2))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   6.00    8.00   12.00   28.81   21.00 1754.00




curve(x/0.7, add = TRUE, col = "violet")


library(ggplot2)
    set.seed(1)
    x<-runif(100,-1,1)
    dd<-data.frame(x)
    ks.test(x,"punif",-1,1)

    ed <- ecdf(dd$x)
    maxdiffidx <- which.max(abs(ed(dd$x)-punif(dd$x,-1,1)))
    maxdiffat <- dd$x[maxdiffidx]

    p<-ggplot(aes(x),data=dd)+stat_ecdf()+theme_bw()+stat_function(fun=punif,args=list(-1,1))
    p<-p+labs(title="ECDF and theoretical CDF")+geom_vline(xintercept=maxdiffat, lty=2)
    p

install.packages("nortest")
library(nortest)
ad.test(MYDATA)

install.packages("goftest")
library(goftest)
 x <- rnorm(10, mean=2, sd=1)
 ad.test(x, "pnorm", mean=2, sd=1)

	Anderson-Darling test of goodness-of-fit
	Null hypothesis: Normal distribution
	with parameters mean = 2, sd = 1

data:  x
An = 0.30794, p-value = 0.9308

> ad.test(x, "pnorm", mean=2, sd=2)

	Anderson-Darling test of goodness-of-fit
	Null hypothesis: Normal distribution
	with parameters mean = 2, sd = 2

data:  x
An = 1.08, p-value = 0.3162

> ad.test(x, "pnorm", mean=1, sd=2)

	Anderson-Darling test of goodness-of-fit
	Null hypothesis: Normal distribution
	with parameters mean = 1, sd = 2

data:  x
An = 2.5385, p-value = 0.04849

t$p.value