require(metaseqR)

the.contrasts=c(
"wt_vs_tg")

the.path="/data/images/proton2/run423/wwwDouniLab"

transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt4")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_run423d"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
        "meanvar","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


#@
]0;/data/images/proton2/run423/wwwDouniLabreczko@max:/data/images/proton2/run423/wwwDouniLab$ R

R version 3.3.3 (2017-03-06) -- "Another Canoe"
Copyright (C) 2017 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> 
require(metaseqR)

the.contrasts=c(
"wt_vs_tg")

the.path="/data/images/proton2/run423/wwwDouniLab"

transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt2")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_run423b"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
        "meanvar","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


require(metaseqR)

the.contrasts=c(
"wt_vs_tg")

the.path="/data/images/proton2/run423/wwwDouniLab"

transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt3")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_run423c"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
        "meanvar","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)

cd /data/images/proton2/run423/wwwDouniLab/hg19
ln -s /data/images/proton2/run423/tophat_hg19_033/sort_uniq.bam ED3R1_K471-wt1.bam
ln -s /data/images/proton2/run423/tophat_hg19_034/sort_uniq.bam ED3R8_K517-tg4.bam
ln -s /data/images/proton2/run423/tophat_hg19_042/sort_uniq.bam ED3R2_K513-wt2.bam
ln -s /data/images/proton2/run423/tophat_hg19_043/sort_uniq.bam ED3R3_K514-wt3.bam
ln -s /data/images/proton2/run423/tophat_hg19_044/sort_uniq.bam ED3R4_K518-wt4.bam
ln -s /data/images/proton2/run423/tophat_hg19_045/sort_uniq.bam ED3R5_K473-tg1.bam
ln -s /data/images/proton2/run423/tophat_hg19_046/sort_uniq.bam ED3R6_K483-tg2.bam
ln -s /data/images/proton2/run423/tophat_hg19_047/sort_uniq.bam ED3R7_K485-tg3.bam
cp ../reads.txt .
awk -f /data/images/proton/make-stranded-sampled-hg19-tracks1.awk reads.txt > make-stranded-sampled-hg19-tracks.sh
source make-stranded-sampled-hg19-tracks.sh


#@ 25072019
 require(metaseqR)

the.contrasts=c(
"wt_vs_tg")

the.path="/data/images/proton2/run423/wwwDouniLab"

transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt4")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_run423e"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
        "meanvar","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)

#@
#custom metaseqr heatmap
require(made4)
zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423d/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt')
dat=read.csv(zz,header=T,sep='\t')
dat2=dat
dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),]
cl=cbind(dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R8_K517,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485)
rownames(cl)=dat$gene_name
colnames(cl)=c("ED3R1_K471","ED3R2_K513","ED3R3_K514","ED3R8_K517","ED3R5_K473","ED3R6_K483","ED3R7_K485")

png("run423-DEG-heatmap-ave-dualScale.png",width=800,height=2600)
heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45)
dev.off()


#custom metaseqr heatmap
zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423e/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt')
dat=read.csv(zz,header=T,sep='\t')
dat2=dat
dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),]
cl=cbind(dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485)
rownames(cl)=dat$gene_name
colnames(cl)=c("ED3R1_K471_wt","ED3R2_K513_wt","ED3R3_K514_wt","ED3R5_K473_tg","ED3R6_K483_tg","ED3R7_K485_tg")

png("run423-DEG-heatmap-ave-dualScale-e.png",width=800,height=3200)
heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45)
dev.off()

zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt')
dat=read.csv(zz,header=T,sep='\t')
dat2=dat
dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),]
cl=cbind(dat$log2_normalized_counts_ED3R4_K518,dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R8_K517,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485)
rownames(cl)=dat$gene_name
colnames(cl)=c("ED3R4_K518_wt","ED3R1_K471_wt","ED3R2_K513_wt","ED3R3_K514_wt","ED3R8_K517_tg","ED3R5_K473_tg","ED3R6_K483_tg","ED3R7_K485_tg")

png("run423-DEG-heatmap-ave-dualScale-a.png",width=800,height=2600)
heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45)
dev.off()