require(metaseqR) the.contrasts=c( "wt_vs_tg") the.path="/data/images/proton2/run423/wwwDouniLab" transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt4") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_run423d"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","meandiff", "meanvar","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@ ]0;/data/images/proton2/run423/wwwDouniLabreczko@max:/data/images/proton2/run423/wwwDouniLab$ R R version 3.3.3 (2017-03-06) -- "Another Canoe" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > require(metaseqR) the.contrasts=c( "wt_vs_tg") the.path="/data/images/proton2/run423/wwwDouniLab" transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt2") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_run423b"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","meandiff", "meanvar","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) require(metaseqR) the.contrasts=c( "wt_vs_tg") the.path="/data/images/proton2/run423/wwwDouniLab" transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt3") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_run423c"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","meandiff", "meanvar","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) cd /data/images/proton2/run423/wwwDouniLab/hg19 ln -s /data/images/proton2/run423/tophat_hg19_033/sort_uniq.bam ED3R1_K471-wt1.bam ln -s /data/images/proton2/run423/tophat_hg19_034/sort_uniq.bam ED3R8_K517-tg4.bam ln -s /data/images/proton2/run423/tophat_hg19_042/sort_uniq.bam ED3R2_K513-wt2.bam ln -s /data/images/proton2/run423/tophat_hg19_043/sort_uniq.bam ED3R3_K514-wt3.bam ln -s /data/images/proton2/run423/tophat_hg19_044/sort_uniq.bam ED3R4_K518-wt4.bam ln -s /data/images/proton2/run423/tophat_hg19_045/sort_uniq.bam ED3R5_K473-tg1.bam ln -s /data/images/proton2/run423/tophat_hg19_046/sort_uniq.bam ED3R6_K483-tg2.bam ln -s /data/images/proton2/run423/tophat_hg19_047/sort_uniq.bam ED3R7_K485-tg3.bam cp ../reads.txt . awk -f /data/images/proton/make-stranded-sampled-hg19-tracks1.awk reads.txt > make-stranded-sampled-hg19-tracks.sh source make-stranded-sampled-hg19-tracks.sh #@ 25072019 require(metaseqR) the.contrasts=c( "wt_vs_tg") the.path="/data/images/proton2/run423/wwwDouniLab" transcript.data <- read.delim("/data/results/tools/rnaseq/metaseqr/mm10/transcript_data_mm10.txt.gz") rownames(transcript.data) <- as.character(transcript.data$transcript_id) # metaseqR related variables outside the pipeline multic <- check.parallel(0.5) # If wish to use multiple cores assign("VERBOSE",TRUE,envir=metaseqR:::meta.env) # Read targets files message("Reading targets file...") targets <- read.targets("targets.txt4") # Do the counting based with bam files in the targets file r2c.out <- read2count(targets,transcript.data,has.all.fields=TRUE,multic=multic) # Create a counts table to be passed as "embedded" annotation the.counts <- cbind(r2c.out$mergedann,r2c.out$counts) metaseqr( counts=the.counts, sample.list=targets$samples, contrast=the.contrasts, annotation="embedded", gene.file="/data/results/tools/rnaseq/metaseqr/mm10/gene_data_mm10.txt.gz", id.col=4, # required with embedded annotation gc.col=5, # required with embedded annotation bt.col=8, # required with embedded annotation name.col=7, # required with embedded annotation org="custom", count.type="utr", normalization="deseq", # or whatever supported statistics="deseq", # or whatever supported, more than one also fig.format=c("png","pdf"), export.where=file.path(the.path,"metaseqr_run423e"), restrict.cores=0.5, # fraction of available cores to use qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise","filtered", "correl","pairwise","boxplot","gcbias","lengthbias","meandiff", "meanvar","biodist","volcano","deheatmap" ), exon.filters=NULL, gene.filters=list( length=list( length=500 ), avg.reads=list( average.per.bp=100, quantile=0.25 ), expression=list( median=TRUE, mean=FALSE, quantile=NA, known=NA, custom=NA ), # it's the default anyway biotype=get.defaults("biotype.filter","mm10") ), pcut=0.05, # only for the truncated significant list output, all results are exported anyway export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts","flags"), # if you use pandora, the fields "meta.p.value" and "adj.meta.p.value" should be added export.scale=c("log2","rpgm"), export.values="normalized", export.stats="mean" ) #@ #custom metaseqr heatmap require(made4) zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423d/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt') dat=read.csv(zz,header=T,sep='\t') dat2=dat dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),] cl=cbind(dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R8_K517,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485) rownames(cl)=dat$gene_name colnames(cl)=c("ED3R1_K471","ED3R2_K513","ED3R3_K514","ED3R8_K517","ED3R5_K473","ED3R6_K483","ED3R7_K485") png("run423-DEG-heatmap-ave-dualScale.png",width=800,height=2600) heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45) dev.off() #custom metaseqr heatmap zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423e/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt') dat=read.csv(zz,header=T,sep='\t') dat2=dat dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),] cl=cbind(dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485) rownames(cl)=dat$gene_name colnames(cl)=c("ED3R1_K471_wt","ED3R2_K513_wt","ED3R3_K514_wt","ED3R5_K473_tg","ED3R6_K483_tg","ED3R7_K485_tg") png("run423-DEG-heatmap-ave-dualScale-e.png",width=800,height=3200) heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45) dev.off() zz=gzfile('/data/images/proton2/run423/wwwDouniLab/metaseqr_run423/lists/metaseqr_all_out_wt_vs_tg.txt.gz','rt') dat=read.csv(zz,header=T,sep='\t') dat2=dat dat=dat[(dat$FDR_deseq<=0.05)&(abs(dat$log2_normalized_fold_change_wt_vs_tg)>=1)&(!is.na(dat$FDR_deseq)),] cl=cbind(dat$log2_normalized_counts_ED3R4_K518,dat$log2_normalized_counts_ED3R1_K471,dat$log2_normalized_counts_ED3R2_K513,dat$log2_normalized_counts_ED3R3_K514,dat$log2_normalized_counts_ED3R8_K517,dat$log2_normalized_counts_ED3R5_K473,dat$log2_normalized_counts_ED3R6_K483,dat$log2_normalized_counts_ED3R7_K485) rownames(cl)=dat$gene_name colnames(cl)=c("ED3R4_K518_wt","ED3R1_K471_wt","ED3R2_K513_wt","ED3R3_K514_wt","ED3R8_K517_tg","ED3R5_K473_tg","ED3R6_K483_tg","ED3R7_K485_tg") png("run423-DEG-heatmap-ave-dualScale-a.png",width=800,height=2600) heatplot(cl,scale="none", returnSampleTree=TRUE,method="ave",dualScale=T,lhei = c(1,19),cexRow=1,cexCol=1,margins=c(12,8),srtCol=45) dev.off()