#@ only run 347
the.path <- "/data/images/proton2/run347/wwwDKlab"
the.contrasts.1 <- c(
	"C_vs_K",
	"C_vs_CL4",	
	"K_vs_KL4",
	"CL4_vs_KL4"
)
# Read transcript data from external file
transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)


# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.run347.txt")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

# Run metaseqR
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run347"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias",
        "biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)

#@ merged run 343+347

# Read transcript data from external file
transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

# Run metaseqR
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run347merged343"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias",
        "biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)

#@ merged run 343+347 only on GE3AREsite-geneset

# Run metaseqR
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run347merged343GE3AREsite-geneset"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


#run343...

# AREsites only
# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt2")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

# Run metaseqR
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run343cV2"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


#@
library(metaseqR)
the.path <- "/data/images/proton2/run343/www"
the.contrasts.1 <- c(
	"C_vs_K",
	"C_vs_CL4",	
	"K_vs_KL4",
	"CL4_vs_KL4"
)

metaseqr(
    sample.list=file.path(the.path,"targets.txt2"),
    contrast=the.contrasts.1,
    annotation="download",
    org="mm10",
    refdb="ensembl",
    count.type="utr",
    normalization="deseq",
    statistics="deseq",
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run343b"),
    restrict.cores=0.5,
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias",
        "biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05,
    export.what=c("annotation","p.value","adj.p.value","fold.change",
		"counts","flags"),
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.counts.table=TRUE,
	report.top=0.05
)

#@
library(metaseqR)

the.path <- "/data/images/proton2/run343/www"

the.contrasts.1 <- c(
	"C_vs_K",
	"C_vs_C_L4",	
	"C_vs_K_L4",
	"K_vs_C_L4",
	"K_vs_K_L4",
	"C_L4_vs_K_L4"
)

metaseqr(
    sample.list=file.path(the.path,"targets.txt"),
    contrast=the.contrasts.1,
    annotation="download",
    org="mm10",
    refdb="ensembl",
    count.type="utr",
    normalization="deseq",
    statistics="deseq",
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run343"),
    restrict.cores=0.5,
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","meandiff",
        "meanvar","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05,
    export.what=c("annotation","p.value","adj.p.value","fold.change",
		"counts","flags"),
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.counts.table=TRUE,
	report.top=0.05
)


reczko@max:/data/images/proton2/run343/www/are$ awk -f getGenesWith2AREs1.awk Mus_musculus_ATTTA.gtf > Mus_musculus_ATTTA_more_than_one_AREs.txt
  42469  254814 3822693 Mus_musculus_ATTTA_more_than_once_AREs.txt
   5856    5856  111264 Mus_musculus_ATTTA_more_than_once.txt

reczko@max:/data/images/proton2/run343/www/metaseqr_quantseq_run343b$ zcat /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt.gz | awk -f ../intersect_with_GE2AREs1.awk > /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt
gzip -9 /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt

#@
library(metaseqR)
the.path <- "/data/images/proton/run343/www"
the.contrasts.1 <- c(
	"C_vs_K",
	"C_vs_C_L4",	
	"C_vs_K_L4",
	"K_vs_C_L4",
	"K_vs_K_L4",
	"C_L4_vs_K_L4"
)
the.contrasts.1 <- c(
	"C_vs_K",
	"C_vs_CL4",	
	"K_vs_KL4",
	"CL4_vs_KL4"
)

# Read transcript data from external file
transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz")
rownames(transcript.data) <- as.character(transcript.data$transcript_id)

# metaseqR related variables outside the pipeline
multic <- check.parallel(0.5) # If wish to use multiple cores
assign("VERBOSE",TRUE,envir=metaseqR:::meta.env)

# Read targets files
message("Reading targets file...")
targets <- read.targets("targets.txt2")

# Do the counting based with bam files in the targets file
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)

# Create a counts table to be passed as "embedded" annotation
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)

# Run metaseqR
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run343c"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


chromosome	start	end	gene_id	gc_content	strand	gene_name	biotype	p-value_deseq	FDR_deseq	log2_normalized_fold_change_CL4_vs_KL4	log2_normalized_counts_DK3R7-C1L4	log2_normalized_counts_DK3R8-C2L4	rpgm_normalized_counts_DK3R7-C1L4	rpgm_normalized_counts_DK3R8-C2L4	log2_normalized_counts_DK3R10b-K23L4	log2_normalized_counts_DK3R11-K2L4	log2_normalized_counts_DK3R12-K3L4	rpgm_normalized_counts_DK3R10b-K23L4	rpgm_normalized_counts_DK3R11-K2L4	rpgm_normalized_counts_DK3R12-K3L4	LN	AR	MD	MN	QN	KN	CM	BT
b:
chrY	1096861	1245759	ENSMUSG00000068457	0.3693	-	Uty	protein_coding	0.00993922428272647	1	2.45066140900957	1	1.58496250072116	2.54731640217031e-05	5.09463280434063e-05	3.16992500144231	3.58496250072116	4.32192809488736	0.000203785312173625	0.000280204804238735	0.00048399011641236	0	0	0	0	0	0	0	0
c:
chrY	1096861	1245759	ENSMUSG00000068457	0.3693	-	Uty	protein_coding	0.020947931980266	1	2.15200309344505	1.5	5.09463280434063e-05	3.68009657367524	0.000314169022934339	1	2	2.54731640217031e-05	7.64194920651094e-05	3.16992500144231	3.70043971814109	4.16992500144231	0.000203785312173625	0.000305677968260438	0.000433043788368953	0	0	0	0	0	0	0	0

#@31082017 4ommisions:
targets <- read.targets("targets-wo-C3L4-C3-K23-K23L4.txt")
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run347merged343_wo_C3L4_C3-K23-K23L4"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias",
        "biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values="normalized",
    export.stats="mean"
)


#@01092017 re-computable xls:
targets <- read.targets("targets-wo-C3L4-C3-K23-K23L4.txt")
r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic)
the.counts <- cbind(r2c.out$mergedann,r2c.out$counts)
metaseqr(
	counts=the.counts,
    sample.list=targets$samples,
    contrast=the.contrasts.1,
    annotation="embedded",
	gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz",
	id.col=4, # required with embedded annotation
	gc.col=5, # required with embedded annotation
	bt.col=8, # required with embedded annotation
	name.col=7, # required with embedded annotation
    org="custom",
    count.type="utr",
    normalization="deseq", # or whatever supported
    statistics="deseq", # or whatever supported, more than one also
    fig.format=c("png","pdf"),
    export.where=file.path(the.path,"metaseqr_quantseq_run347merged343_wo_C3L4_C3-K23-K23L4b"),
    restrict.cores=0.5, # fraction of available cores to use
    qc.plots=c(
        "mds","biodetection","countsbio","saturation","readnoise","filtered",
        "correl","pairwise","boxplot","gcbias","lengthbias",
        "biodist","volcano","deheatmap"
    ),
    exon.filters=NULL,
    gene.filters=list(
       length=list(
            length=500
        ),
        avg.reads=list(
            average.per.bp=100,
            quantile=0.25
        ),
        expression=list(
            median=TRUE,
            mean=FALSE,
            quantile=NA,
            known=NA,
            custom=NA
        ),  # it's the default anyway
        biotype=get.defaults("biotype.filter","mm10")
    ),
    pcut=0.05, # only for the truncated significant list output, all results are exported anyway
    export.what=c("annotation","p.value","adj.p.value","fold.change","stats",
        "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added
    export.scale=c("log2","rpgm"),
    export.values=c("normalized","raw"),
    export.stats="mean"
)