require(edgeR)
rnaseqMatrix0= read.table("transcript_count_matrix.csv",header=T,sep=",")
colnames(rnaseqMatrix0)
#        1      2         3         4         5         6          7
# [1] "KO21"    "KO22"    "KOIFN1"  "KOIFN2"  "Ko01"    "Ko02"    "Ko1IL4"
#        8       9        10          11       12       13         14
#[8] "Ko2IL4"  "Ko61"    "Ko62"    "wt01"    "wt01IL4" "wt02"    "wt02IL4"
#     15       16         17        18        19         20
#[15] "wt21"    "wt22"    "wt61"    "wt62"    "wtIFN1"  "wtIFN2"
#all wt VS all ko
col_ordering = c(11,13,15,16,17,18,19,20,12,14,5,6,1,2,9,10,3,4,7,8)
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","WT","WT","WT","WT","WT","WT","WT","WT","KO","KO","KO","KO","KO","KO","KO","KO","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
-1   285
0  73540
1    293
write.table(tTags, file="edgeRtranscripts-wtVSko3.csv")
write.table(x, file="edgeRtranscriptsExpr-wtVSko3.csv")
# with several points per group, the anticorrelation for all points should not be as strict (cor < -0.1 instead of cor < -0.5)
# otherwise we seek transcipts that is alt-spliced in *ALL* points.
#awk -f getEdgerDEresultsAndGetAltSplicedGroups2.awk edgeRtranscriptsExpr-wtVSko3.withGID.csv > edgeRtranscriptsExpr-wtVSko3.withGID.csv.alt_spliced.csv
#ref transcripts 
#reczko@max:/data/images/proton/DKlab/orsalia/stringtie$ wc !$
#wc edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced.csv
#21  3463 36090 edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced.csv


# test anticorr<=-0.5 in each condition:
#awk -v verb=0 -f getEdgerDEresultsAndGetAltSplicedGroupsInAtLeastOneCondition1.awk edgeRtranscriptsExpr-wtVSko.withGID.csv > edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
#wc edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
#   20  1200 15717 edgeRtranscriptsExpr-wtVSko.withGID.csv.alt_spliced_in_GE_one_condition.csv
#transcripts_wt_vs_ko_alt_spliced_in_at_least_one_condition.xlsx



col_ordering = c(11,13,5,6)#0h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
#r <- rownames(topTags(et)$table)
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
#detags <- rownames(exp_study)[as.logical(de)]
#plotSmear(et, de.tags=detags)
#abline(h = c(-1, 1), col = "blue")
write.table(tTags, file="edgeRtranscripts-0h.csv")
write.table(x, file="edgeRtranscriptsExpr-0h.csv")


col_ordering = c(15,16,1,2)#2h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-2h.csv")
write.table(x, file="edgeRtranscriptsExpr-2h.csv")

col_ordering = c(17,18,9,10)#6h
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-6h.csv")
write.table(x, file="edgeRtranscriptsExpr-6h.csv")

col_ordering = c(19,20,3,4)#IFN
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-IFN.csv")
write.table(x, file="edgeRtranscriptsExpr-IFN.csv")

col_ordering = c(12,14,7,8)#IL4
rnaseqMatrix =rnaseqMatrix0[,col_ordering];rnaseqMatrix = rnaseqMatrix[rowSums(rnaseqMatrix)>=2,];conditions = factor(c("WT","WT","KO","KO"));
exp_study = DGEList(counts=rnaseqMatrix, group=conditions);exp_study = calcNormFactors(exp_study);exp_study = estimateCommonDisp(exp_study);exp_study = estimateTagwiseDisp(exp_study);et = exactTest(exp_study,pair=c("WT","KO"))
tTags = topTags(et,n=NULL,adjust.method = "fdr")
r <- rownames(tTags)
c=cpm(exp_study)[r, order(exp_study$samples$group)]
x=cbind(as.data.frame(tTags),c)
summary(de <- decideTestsDGE(et, p=0.05))
write.table(tTags, file="edgeRtranscripts-IL4.csv")
write.table(x, file="edgeRtranscriptsExpr-IL4.csv")
