#@ only run 347 the.path <- "/data/images/proton2/run347/wwwDKlab" the.contrasts.1 <- c( "C_vs_K", "C_vs_CL4", "K_vs_KL4", "CL4_vs_KL4" ) # Read transcript data from external file transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.run347.txt") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) # Run metaseqR metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run347"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias", "biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@ merged run 343+347 # Read transcript data from external file transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) # Run metaseqR metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run347merged343"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias", "biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@ merged run 343+347 only on GE3AREsite-geneset # Run metaseqR metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run347merged343GE3AREsite-geneset"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #run343... # AREsites only # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt2") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) # Run metaseqR metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run343cV2"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@ library(metaseqR) the.path <- "/data/images/proton2/run343/www" the.contrasts.1 <- c( "C_vs_K", "C_vs_CL4", "K_vs_KL4", "CL4_vs_KL4" ) metaseqr( sample.list=file.path(the.path,"targets.txt2"), contrast=the.contrasts.1, annotation="download", org="mm10", refdb="ensembl", count.type="utr", normalization="deseq", statistics="deseq", fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run343b"), restrict.cores=0.5, qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias", "biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, export.what=c("annotation","p.value","adj.p.value","fold.change", "counts","flags"), export.scale=c("log2","rpgm"), export.values="normalized", export.counts.table=TRUE, report.top=0.05 ) #@ library(metaseqR) the.path <- "/data/images/proton2/run343/www" the.contrasts.1 <- c( "C_vs_K", "C_vs_C_L4", "C_vs_K_L4", "K_vs_C_L4", "K_vs_K_L4", "C_L4_vs_K_L4" ) metaseqr( sample.list=file.path(the.path,"targets.txt"), contrast=the.contrasts.1, annotation="download", org="mm10", refdb="ensembl", count.type="utr", normalization="deseq", statistics="deseq", fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run343"), restrict.cores=0.5, qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","meandiff", "meanvar","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, export.what=c("annotation","p.value","adj.p.value","fold.change", "counts","flags"), export.scale=c("log2","rpgm"), export.values="normalized", export.counts.table=TRUE, report.top=0.05 ) reczko@max:/data/images/proton2/run343/www/are$ awk -f getGenesWith2AREs1.awk Mus_musculus_ATTTA.gtf > Mus_musculus_ATTTA_more_than_one_AREs.txt 42469 254814 3822693 Mus_musculus_ATTTA_more_than_once_AREs.txt 5856 5856 111264 Mus_musculus_ATTTA_more_than_once.txt reczko@max:/data/images/proton2/run343/www/metaseqr_quantseq_run343b$ zcat /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt.gz | awk -f ../intersect_with_GE2AREs1.awk > /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt gzip -9 /data/results/tools/rnaseq/metaseqr/mm10_more_2AREs/gene_data_mm10.txt #@ library(metaseqR) the.path <- "/data/images/proton/run343/www" the.contrasts.1 <- c( "C_vs_K", "C_vs_C_L4", "C_vs_K_L4", "K_vs_C_L4", "K_vs_K_L4", "C_L4_vs_K_L4" ) the.contrasts.1 <- c( "C_vs_K", "C_vs_CL4", "K_vs_KL4", "CL4_vs_KL4" ) # Read transcript data from external file transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt2") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) # Run metaseqR metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10_more_than_2AREs/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run343c"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) chromosome start end gene_id gc_content strand gene_name biotype p-value_deseq FDR_deseq log2_normalized_fold_change_CL4_vs_KL4 log2_normalized_counts_DK3R7-C1L4 log2_normalized_counts_DK3R8-C2L4 rpgm_normalized_counts_DK3R7-C1L4 rpgm_normalized_counts_DK3R8-C2L4 log2_normalized_counts_DK3R10b-K23L4 log2_normalized_counts_DK3R11-K2L4 log2_normalized_counts_DK3R12-K3L4 rpgm_normalized_counts_DK3R10b-K23L4 rpgm_normalized_counts_DK3R11-K2L4 rpgm_normalized_counts_DK3R12-K3L4 LN AR MD MN QN KN CM BT b: chrY 1096861 1245759 ENSMUSG00000068457 0.3693 - Uty protein_coding 0.00993922428272647 1 2.45066140900957 1 1.58496250072116 2.54731640217031e-05 5.09463280434063e-05 3.16992500144231 3.58496250072116 4.32192809488736 0.000203785312173625 0.000280204804238735 0.00048399011641236 0 0 0 0 0 0 0 0 c: chrY 1096861 1245759 ENSMUSG00000068457 0.3693 - Uty protein_coding 0.020947931980266 1 2.15200309344505 1.5 5.09463280434063e-05 3.68009657367524 0.000314169022934339 1 2 2.54731640217031e-05 7.64194920651094e-05 3.16992500144231 3.70043971814109 4.16992500144231 0.000203785312173625 0.000305677968260438 0.000433043788368953 0 0 0 0 0 0 0 0 #@31082017 4ommisions: targets <- read.targets("targets-wo-C3L4-C3-K23-K23L4.txt") r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run347merged343_wo_C3L4_C3-K23-K23L4"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias", "biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@01092017 re-computable xls: targets <- read.targets("targets-wo-C3L4-C3-K23-K23L4.txt") r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts.1, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_quantseq_run347merged343_wo_C3L4_C3-K23-K23L4b"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias", "biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values=c("normalized","raw"), export.stats="mean" )