library(metaseqR) the.path="/data/images/proton/run341/www" metaseqr( sample.list=file.path(the.path,"targets.txt"), contrast=c("Input_vs_Test"), org="hg19", refdb="refseq", normalization="edger", statistics="edger", export.where=file.path(the.path,"metaseqr_run341"), qc.plots=c( "mds","biodetection","countsbio","saturation","readnoise", "correl","pairwise","boxplot","volcano","biodist","deheatmap" ), pcut=0.05, export.what=c("annotation","p.value","adj.p.value","fold.change","stats", "counts"), export.scale=c("log2"), export.values="normalized", export.counts.table=TRUE, restrict.cores=0.3, fig.format=c("png","pdf") ) #@ /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -M -s 1 -J -f -t exon -T 4 -a /data/results/reference/hg19/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf -o /data/images/proton2/run341/rpkm/PHR23r-FUBP2.cnt PHR23r-FUBP2.bam /data/images/proton2/run341/rpkm/PHR23r-FUBP2.cnt Geneid Chr Start End Strand Length PHR23r-FUBP2.bam NEAT1 chr11 65190269 65194003 + 3735 2016 MIR612 chr11 65211929 65212028 + 100 8 MALAT1 chr11 65265233 65273939 + 8707 42741 /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -M -s 1 -J -f -t exon -T 4 -a /data/results/reference/hg19/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf -o /data/images/proton2/run341/rpkm/PHR23r-FUBP2.cnt PHR23r-FUBP2.bam PHR22r-TCF4.bam PHR21r-bcatMono.bam PHR20r-bcatPoly.bam PHR24r-IgG.bam PHR19r-Input.bam /data/images/proton/run15/www/con_b.bam /data/images/proton/run15/www/con_a.bam NEAT1 chr11 65190269 65194003 + 3735 2016 191 155 98 34 169 30800 26403 MIR612 chr11 65211929 65212028 + 100 8 3 4 2 0 4 0 0 MALAT1 chr11 65265233 65273939 + 8707 42741 3462 1615 710 243 1016 6041 5954 By default, featureCounts does not count multi-overlapping reads. Users can specify the ‘-O’ option (set allowMultiOverlap to TRUE in R) to fully count them for each overlapping meta- feature/feature (each overlapping meta-feature/feature receives a count of 1 from a read), or specify both ‘-O’ and ‘–fraction’ options (set both allowMultiOverlap and fraction to TRUE in R) to assign a fractional count to each overlapping meta-feature/feature (each overlapping meta-feature/feature receives a count of 1/y from a read where y is the total number of meta-features/features overlapping with the read). /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -t exon -T 4 -a /data/results/reference/hg19/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf -o /data/images/proton2/run341/rpkm/exon.cnt PHR23r-FUBP2.bam PHR22r-TCF4.bam PHR21r-bcatMono.bam PHR20r-bcatPoly.bam PHR24r-IgG.bam PHR19r-Input.bam /data/images/proton/run15/www/con_b.bam /data/images/proton/run15/www/con_a.bam NEAT1 chr11 65190269 65194003 + 3735 2016 191 155 98 34 169 30488.33 26399.50 MIR612 chr11 65211929 65212028 + 100 8 3 4 2 0 4 0 0 MALAT1 chr11 65265233 65273939 + 8707 42740.50 3461.50 1615 710 243 1015.33 6040 5953.50 reczko@max:/data/images/proton2/run341/rpkm$ awk -f getRPKM1.awk exon.cnt > getRPKM1.log t=read.table("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG.bam_PHR19r-Input.bam",header=T) > summary(log2(na.omit(t$fc_vs_PHR24r.IgG.bam))) Min. 1st Qu. Median Mean 3rd Qu. Max. -8.2500 -2.0410 -0.7585 -0.7537 0.4825 8.1740 > hist(log2((t$fc_vs_PHR24r.IgG.bam))) > hist(log2((t$fc_vs_PHR19r.Input.bam))) > summary(log2((t$fc_vs_PHR19r.Input.bam))) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's -7.43 -1.58 -0.31 -0.25 1.01 8.32 46157 plot(ecdf(log2((t$fc_vs_PHR19r.Input.bam))),col="green") plot(ecdf(log2((t$fc_vs_PHR24r.IgG.bam))),add=T,col="blue") my.grid <- seq(0.05,0.95, 0.05) my.xgrid <- seq(0.5,4, 0.5) abline(h=my.grid,v=my.xgrid, col="grey", lty=2) #boxplot(a, col="red", add = TRUE) vsIgG: log2fc>2.5: top5% vsInput: log2fc>2.8: top5% png("RPKM_PHR23r-FUBP2.bam_vs_PHR24r-IgG_blue_PHR19r-Input_green.png") plot(ecdf(log2((t$fc_vs_PHR19r.Input.bam))),col="green") plot(ecdf(log2((t$fc_vs_PHR24r.IgG.bam))),add=T,col="blue") my.grid <- seq(0.05,0.95, 0.05) my.xgrid <- seq(0.5,4, 0.5) abline(h=my.grid,v=my.xgrid ,col="grey", lty=2) dev.off() #@ MALAT /data/images/proton2/run341/www/aspeak/ASPeak_FUBP2_vs_IgG/exons/rpkm/all/chr11.csv chr11 NR_002819_exon_0_0_chr11_65265209_f 16.2196188647353 /data/images/proton2/run341/www/aspeak/ASPeak_FUBP2_vs_IgG/exons/inputBed/PHR23r-FUBP2_exons.bed chr11 65270610 65270737 NR_002819_exon_0_0_chr11_65265209_f 50 + /data/images/proton2/run341/www/aspeak/ASPeak_FUBP2_vs_IgG/exons/inputBed/PHR24r-IgG_exons.bed chr11 65270091 65270186 NR_002819_exon_0_0_chr11_65265209_f 50 + /data/images/proton2/run341/www/aspeak/ASPeak_FUBP2_vs_IgG/exons/inputBed/con_b/chr11.bed chr11 65265397 65265414 NR_002819_exon_0_0_chr11_65265209_f 50 + reczko@max:/data/images/proton2/run341/rpkm/all$ awk -f ../getRPKM_with_control.awk ../exon.cnt > getRPKM_with_control.log #@ full lncRNA annotation at (http://www.gencodegenes.org/releases/26lift37.html ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_26/GRCh37_mapping/gencode.v26lift37.long_noncoding_RNAs.gtf.gz /data/results/reference/hsa/gencode/gencode.v26lift37.long_noncoding_RNAs.gtf awk 'NR>5 {print $1,$4,$5,$10}' gencode.v19.long_noncoding_RNAs.gtf > lncRNA.hg19.bed perl -p -i -e "s/[\";]//g" lncRNA.hg19.bed #awk -v f1="/data/results/reference/hg19/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" -f /data/results/tools/formats/remove-exact-matching-regions-from-gtf.awk /data/results/reference/hsa/gencode/gencode.v26lift37.long_noncoding_RNAs.gtf > /data/results/reference/hg19/ucsc_plus_gencode ) is subset in: grep exon /data/results/reference/hsa/gencode/gencode.v26lift37.annotation.gtf > /data/results/reference/hsa/gencode/gencode.v26lift37.annotation_exons.gtf #/data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -t exon -T 4 -a /data/results/reference/hsa/gencode/gencode.v26lift37.annotation_exons.gtf -o /data/images/proton2/run341/rpkm/exonGencode.cnt PHR23r-FUBP2.bam PHR22r-TCF4.bam PHR21r-bcatMono.bam PHR20r-bcatPoly.bam PHR24r-IgG.bam PHR19r-Input.bam /data/images/proton/run15/www/con_b.bam /data/images/proton/run15/www/con_a.bam /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -g gene_name -t exon -T 4 -a /data/results/reference/hsa/gencode/gencode.v26lift37.annotation_exons.gtf -o /data/images/proton2/run341/rpkm/exonGencode.cnt PHR23r-FUBP2.bam PHR22r-TCF4.bam PHR21r-bcatMono.bam PHR20r-bcatPoly.bam PHR24r-IgG.bam PHR19r-Input.bam /data/images/proton/run15/www/con_b.bam /data/images/proton/run15/www/con_a.bam cd /data/images/proton2/run341/rpkm/all /data/images/proton2/run341/rpkm/getRPKM_with_control.awk awk -f ../getRPKM_with_control.awk ../exonGencode.cnt > getRPKM_with_control.log #edit header /data/images/proton2/run341/rpkm/all/process-RPKM2.r