require(edgeR)
rnaseqMatrix0= read.table("transcript_count_matrix.csv",header=T,sep=",")
#all wt VS all ko
col_ordering = c(11,13,15,16,17,18,19,20,12,14,5,6,1,2,9,10,3,4,7,8)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","WT","WT","WT","WT","WT","WT","WT","WT","KO","KO","KO","KO","KO","KO","KO","KO","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
-1   285
0  73540
1    293
write.table(tTags, file="edgeRtranscripts-wtVSko3.csv")
write.table(x, file="edgeRtranscriptsExpr-wtVSko3.csv")

reczko@max:/data/images/proton/DKlab/orsalia/stringtie$ awk -f addGeneToEdger.awk
edgeRtranscriptsExpr-wtVSko3.csv
ref transcripts 936109
transcripts in edgeRtranscriptsExpr-wtVSko to edgeRtranscriptsExpr-wtVSko.withGID.csv : 74118


(# with several points per group, the anticorrelation for all points should not be as strict (cor < -0.1 instead of cor < -0.5)
# otherwise we seek transcipts that is alt-spliced in *ALL* points.
awk -f getEdgerDEresultsAndGetAltSplicedGroups2.awk edgeRtranscriptsExpr-wtVSko3.withGID.csv > edgeRtranscriptsExpr-wtVSko3.withGID.csv.alt_spliced.csv
ref transcripts 
reczko@max:/data/images/proton/DKlab/orsalia/stringtie$ wc !$
wc edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced.csv
21  3463 36090 edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced.csv
)

# test anticorr<=-0.5 in each condition:
awk -v verb=0 -f getEdgerDEresultsAndGetAltSplicedGroupsInAtLeastOneCondition1.awk edgeRtranscriptsExpr-wtVSko.withGID.csv > edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
wc edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
   20  1200 15717 edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
transcripts_wt_vs_ko_alt_spliced_in_at_least_one_condition.xlsx
   
library(ballgown)
library(RSkittleBrewer)
library(genefilter)
library(dplyr)
library(devtools)
pheno_data = read.csv("DKphenodata.csv") #pheno_data = read.csv("geuvadis_phenodata.csv")
samples=c("./ballgown/Ko01","./ballgown/Ko02","./ballgown/KO21","./ballgown/KO22","./ballgown/Ko61","./ballgown/Ko62","./ballgown/KOIFN1","./ballgown/KOIFN2","./ballgown/Ko1IL4","./ballgown/Ko2IL4","./ballgown/wt01","./ballgown/wt02","./ballgown/wt21","./ballgown/wt22","./ballgown/wt61","./ballgown/wt62","./ballgown/wtIFN1","./ballgown/wtIFN2","./ballgown/wt01IL4","./ballgown/wt02IL4"	 )
#@ save all:
bg_chrX = ballgown(samples=samples,  pData=pheno_data) #bg_chrX = ballgown(dataDir = "ballgown", samplePattern = "ERR", pData=pheno_data)
sampleNames(bg_chrX)
bg_chrX_filt = subset(bg_chrX,"rowVars(texpr(bg_chrX)) >1",genomesubset=TRUE)
full_table <- texpr(bg_chrX , 'all')
png("EdgeR all-wtVSko-Isoform expression in Angel2.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.961" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()
png("EdgeR all-wtVSko-Isoform expression in Mettl2.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.3064" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()
png("EdgeR all-wtVSko-Isoform expression in Mitf.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.13319" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()




col_ordering = c(11,13,5,6)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
#detags <- rownames(exp_study)[as.logical(de)]
#plotSmear(et, de.tags=detags)
#abline(h = c(-1, 1), col = "blue")
write.table(tTags, file="edgeRtranscripts-0h.csv")
write.table(x, file="edgeRtranscriptsExpr-0h.csv")


col_ordering = c(15,16,1,2)#2h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-2h.csv")
write.table(x, file="edgeRtranscriptsExpr-2h.csv")

col_ordering = c(17,18,9,10)#6h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-6h.csv")
write.table(x, file="edgeRtranscriptsExpr-6h.csv")

col_ordering = c(19,20,3,4)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-IFN.csv")
write.table(x, file="edgeRtranscriptsExpr-IFN.csv")

col_ordering = c(12,14,7,8)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study)
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-IL4.csv")
write.table(x, file="edgeRtranscriptsExpr-IL4.csv")

awk -f addGeneToEdger.awk all_pairs_edger.txt
edgeRtranscriptsExpr-0h.csv

transcripts in edgeRtranscriptsExpr-0h to edgeRtranscriptsExpr-0h.withGID.csv : 65358

awk -f getEdgerDEresultsAndGetAltSplicedNsig.awk edgeRtranscriptsExpr-0h.withGID.csv > foo
ref transcripts 
ngenes ok 139 avg#okIsoforms 2.45324

(for i in edgeRtr*.withGID.csv
> do
> echo $i
> awk -f getEdgerDEresultsAndGetAltSplicedBest1.awk $i > $i".alt_spliced.csv"
> wc $i".alt_spliced.csv"
> done
)

rm isoCorr-hist.csv
for i in edgeRtranscriptsExpr*.withGID.csv
 do
 echo $i
 awk -f /data/images/proton/DKlab/orsalia/stringtie/getEdgerDEresultsAndGetAltSplicedGroups-AntiCorr-Histogram1.awk $i > $i".hist"
done

h=read.table("/data/images/proton/DKlab/orsalia/stringtie/edgeRtranscriptsExpr-0h.withGID.csv.hist")
png("EdgeR-isoform expression-correlation-0h.png",width=1024,height=1024,pointsize = 22)
hist(h$V3,br=20,xlab="Isoform expression correlation",main="0h")
dev.off()

h=read.table("/data/images/proton/DKlab/orsalia/stringtie/edgeRtranscriptsExpr-2h.withGID.csv.hist")
png("EdgeR-isoform expression-correlation-2h.png",width=1024,height=1024,pointsize = 22)
hist(h$V3,br=20,xlab="Isoform expression correlation",main="0h")
dev.off()

h=read.table("/data/images/proton/DKlab/orsalia/stringtie/edgeRtranscriptsExpr-6h.withGID.csv.hist")
png("EdgeR-isoform expression-correlation-6h.png",width=1024,height=1024,pointsize = 22)
hist(h$V3,br=20,xlab="Isoform expression correlation",main="0h")
dev.off()

h=read.table("/data/images/proton/DKlab/orsalia/stringtie/edgeRtranscriptsExpr-IFN.withGID.csv.hist")
png("EdgeR-isoform expression-correlation-IFN.png",width=1024,height=1024,pointsize = 22)
hist(h$V3,br=20,xlab="Isoform expression correlation",main="0h")
dev.off()

h=read.table("/data/images/proton/DKlab/orsalia/stringtie/edgeRtranscriptsExpr-IL4.withGID.csv.hist")
png("EdgeR-isoform expression-correlation-IL4.png",width=1024,height=1024,pointsize = 22)
hist(h$V3,br=20,xlab="Isoform expression correlation",main="0h")
dev.off()



for i in edgeRtranscriptsExpr*.withGID.csv
 do
 echo $i
 awk -f getEdgerDEresultsAndGetAltSplicedGroups1.awk $i > $i".alt_spliced.csv"
  wc $i".alt_spliced.csv"
done

> > > > > edgeRtranscriptsExpr-0h.withGID.csv
   56   896 11521 edgeRtranscriptsExpr-0h.withGID.csv.alt_spliced.csv
edgeRtranscriptsExpr-2h.withGID.csv
   65  1040 13508 edgeRtranscriptsExpr-2h.withGID.csv.alt_spliced.csv
edgeRtranscriptsExpr-6h.withGID.csv
  42  672 8620 edgeRtranscriptsExpr-6h.withGID.csv.alt_spliced.csv
edgeRtranscriptsExpr-IFN.withGID.csv
   49   784 10014 edgeRtranscriptsExpr-IFN.withGID.csv.alt_spliced.csv
edgeRtranscriptsExpr-IL4.withGID.csv
  48  768 9963 edgeRtranscriptsExpr-IL4.withGID.csv.alt_spliced.csv



#to get MSTG.x : lookup genenames in /data/images/proton/DKlab/orsalia/stringtie/merged.gtf
  
png("EdgeR top1-0h-Isoform expression-0h in Zfp143.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.14498" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" , "Ko01" ,   "Ko02" ))
dev.off()  
png("EdgeR top1-0h-Isoform expression in Zfp143.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.14498" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  


png("EdgeR top1-2h-Isoform expression-2h in Kif5c.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.8743" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt21" ,   "wt22" , "KO21" ,   "KO22" ))
dev.off()  
png("EdgeR top1-2h-Isoform expression in Kif5c.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.8743" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

png("EdgeR top1-6h-Isoform expression in Katna1.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.1028" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  


png("EdgeR top1-IFN-Isoform expression in Gm12250.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.2216" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

png("EdgeR top1-IL4-Isoform expression in Dyrk2.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.1692" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

png("EdgeR Isoform expression in Ddx41.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.4205" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

png("EdgeR Isoform expression in Stk11ip.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.312" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

png("EdgeR Isoform expression in Mapkap1.png",width=4000,height=2000,pointsize = 22)
plotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.8671" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ))
dev.off()  

#add transcriptNames for lables: MR20072017
source("/data/results/tools/rnaseq/ballgown/ballgown/R/myplotTranscripts.R")

png("EdgeR Isoform expressionV2 in Mapkap1.png",width=4000,height=2000,pointsize = 22)
myplotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.8671" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ),blackBorders=F,labelTranscripts=T)
dev.off()


png("EdgeR Isoform expression in Mitf.png",width=4000,height=2000,pointsize = 22)
myplotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.13319" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ),blackBorders=F,labelTranscripts=T)
dev.off()

png("EdgeR Isoform expression in Abr.png",width=4000,height=2000,pointsize = 22)
myplotTranscripts(ballgown::geneIDs(bg_chrX)[match("MSTRG.2544" ,ballgown::geneIDs(bg_chrX))], bg_chrX,		samples=c("wt01" ,   "wt02" ,   "wt21" ,   "wt22" ,   "wt61" ,   "wt62" ,   "wtIFN1" , "wtIFN2" , "wt01IL4" ,"wt02IL4", "Ko01" ,   "Ko02" ,   "KO21" ,   "KO22" ,   "Ko61" ,   "Ko62" ,   "KOIFN1" , "KOIFN2" , "Ko1IL4" , "Ko2IL4" ),blackBorders=F,labelTranscripts=T)
dev.off()




