@@ after call with mth, omit FAGsB1 MAGsB2 t=read.table("genes2.counts.matrix2") sel=c(1,2,3,4,5,7,8) mydata2=t[,sel] mydata=t(mydata2) d <- dist(mydata) # euclidean distances between the rows fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dim pdf("mds3.pdf") x <- fit$points[,1] y <- fit$points[,2] plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Multidimensional Scaling", type="n") text(x, y, labels = row.names(mydata), cex=.7) dev.off() write.table(mydata2,"genes2.counts.matrix4",quote=F,sep="\t") /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix4 --method edgeR --samples_file samples_described4 --output genes4.edgeR @@ /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix3 --method edgeR --samples_file samples_described3 --output genes3.edgeR $TRINITY_HOME/Analysis/DifferentialExpression/run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix3 --method edgeR --samples_file samples_described3 --output genes3.edgeR @@ MDS plot mydata=t(read.table("genes2.counts.matrix2")) d <- dist(mydata) # euclidean distances between the rows fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dim pdf("mds2.pdf") x <- fit$points[,1] y <- fit$points[,2] plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Multidimensional Scaling", type="n") text(x, y, labels = row.names(mydata), cex=.7) dev.off() FAGsA2 FAGsB1 are very similar, MAGsB1 is in FAG cluster sel=c(1,3,4,5,7,9) t=t(mydata[sel,]) summary(t) FAGsA1 MAGsA1 GUT1 MAGsA2 Min. : 0 Min. : 0.0 Min. : 0 Min. : 0.0 1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0.0 Median : 0 Median : 0.0 Median : 0 Median : 0.0 Mean : 576 Mean : 394.9 Mean : 412 Mean : 186.6 3rd Qu.: 40 3rd Qu.: 39.0 3rd Qu.: 8 3rd Qu.: 21.0 Max. :4388042 Max. :2112586.7 Max. :4146223 Max. :574876.1 FAGsB2 MAGsB1 MAGsB2 Min. : 0.00 Min. : 0.0 Min. : 0.0 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.0 Median : 0.00 Median : 0.0 Median : 0.0 Mean : 5.08 Mean : 140.1 Mean : 338.9 3rd Qu.: 0.00 3rd Qu.: 0.0 3rd Qu.: 46.0 Max. :30894.76 Max. :1091155.2 Max. :1453594.6 write.table(t,"genes2.counts.matrix3",quote=F,sep="\t") @@ > t=read.table("genes2.counts.matrix2") > summary(t) FAGsA1 FAGsA2 MAGsA1 GUT1 Min. : 0 Min. : 0 Min. : 0.0 Min. : 0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 0 Median : 0 Median : 0 Median : 0.0 Median : 0 Mean : 576 Mean : 439 Mean : 394.9 Mean : 412 3rd Qu.: 40 3rd Qu.: 39 3rd Qu.: 39.0 3rd Qu.: 8 Max. :4388042 Max. :3331319 Max. :2112586.7 Max. :4146223 MAGsA2 FAGsB1 FAGsB2 MAGsB1 Min. : 0.0 Min. : 0 Min. : 0.00 Min. : 0.0 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 0.0 Median : 0.0 Median : 0 Median : 0.00 Median : 0.0 Mean : 186.6 Mean : 406 Mean : 5.08 Mean : 140.1 3rd Qu.: 21.0 3rd Qu.: 21 3rd Qu.: 0.00 3rd Qu.: 0.0 Max. :574876.1 Max. :3306722 Max. :30894.76 Max. :1091155.2 MAGsB2 Min. : 0.0 1st Qu.: 0.0 Median : 0.0 Mean : 338.9 3rd Qu.: 46.0 Max. :1453594.6 sel=t$GUT1<=8 summary(sel) Mode FALSE TRUE NA's logical 8361 2835 0 t2=t[sel,] write.table(t2,"genes2.counts.matrix2.noGUT1",quote=F,sep="\t") $TRINITY_HOME/Analysis/DifferentialExpression/run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix2.noGUT1 --method edgeR --samples_file samples_described --output genes2.noGUT1.edgeR @@ ln -s ../008_v2/RSEM.genes.results 008.genes2 ln -s ../011_v2/RSEM.genes.results 011.genes2 ln -s ../012_v2/RSEM.genes.results 012.genes2 ln -s ../013_v2/RSEM.genes.results 013.genes2 ln -s ../016_v2/RSEM.genes.results 016.genes2 ln -s ../../run124/001_v2/RSEM.genes.results 001.genes2 ln -s ../../run124/003_v2/RSEM.genes.results 003.genes2 ln -s ../../run124/004_v2/RSEM.genes.results 004.genes2 ln -s ../../run124/009_v2/RSEM.genes.results 009.genes2 export TRINITY_HOME=/data/results/tools/align/trinity/trinityrnaseq_r20140413p1 export PATH=/data/results/tools/align/bowtie2-2.2.3:/data/results/tools/align/tophat-2.0.12.Linux_x86_64:/data/results/tools/align/rsem/rsem-1.2.19:/home/reczko/ocamlbrew/ocaml-4.01.0/.opam/system/bin:/home/reczko/ocamlbrew/ocaml-4.01.0/.opam/system/bin:/home/reczko/ocamlbrew/ocaml-4.01.0/bin:/data/results/tools/chipseq/macs/bin:/data/results/tools/bedtools/BEDTools-Version-2.14.3/bin:/data/results/tools/chipseq/ngsplot/bin:/data/results/tools/r/R-3.1.2/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games; export PATH $TRINITY_HOME/util/abundance_estimates_to_matrix.pl --est_method RSEM --out_prefix genes2 001.genes2 003.genes2 004.genes2 008.genes2 009.genes2 011.genes2 012.genes2 013.genes2 016.genes2 $TRINITY_HOME/Analysis/DifferentialExpression/run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix2 --method edgeR --samples_file samples_described --output genes2.edgeR # annotate with /data/results/tools/denovo/annotate-trinity-DE1.awk , loads /data/results/reference/olfly/tr2/fa/trimmed/tri1/trinity_out_dir/blast/trinotate_annotation_report_tr2_v2.csv /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix --method edgeR --samples_file samples_described.txt --output genes2.edgeR # for all with pval<=0.01 /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis_pval_0.01.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix --method edgeR --samples_file samples_described.txt --output genes2.edgeR /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis2_pval_0.01.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix2 --method edgeR --samples_file samples_described.txt --output genes2.edgeR /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis2_pval_0.05.pl --matrix /data/images/proton/run125/www/genes2.counts.matrix --method edgeR --samples_file samples_described.txt --output genes2.edgeR.pval0.05 @@ # get mrna's with blastx hits: cd /data/images/bak/reczko/doc/mth/mcgill/tr2 awk -f /data/results/tools/trinity/get-annotated-mrna1.awk trinotate_annotation_report_tr2_v2.csv > trinotate_annotation_report_tr2_v2-with-blast-hit.csv awk -f /data/results/tools/trinity/filter-annotated-mrna1.awk Trinity.fasta2 > Trinity.fasta2-with-blastx-hit @@ edgeR on estia with: reczko@estia:/data/images/proton/run125/www/genes.edgeR.pval0.05$ R R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows" # qunatile missing in R >=3.0 @@ 16022015 reczko@n2:/data/images/proton/run125/www$ /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis2_pval_0.05_no_replicates.pl --matrix /data/images/proton/run125/www/genes.counts.matrix2 --method edgeR --samples_file /data/images/proton/run125/www/samples_described-MAG.txt --output genes.edgeR.MAG @@ up to 03022015 reczko@n1:/data/images/proton/run125/www$ ls -l ../???/RSEM.genes.results ln -s ../008/RSEM.genes.results 008.genes ln -s ../011/RSEM.genes.results 011.genes ln -s ../012/RSEM.genes.results 012.genes ln -s ../013/RSEM.genes.results 013.genes ln -s ../016/RSEM.genes.results 016.genes ln -s ../../run124/001/RSEM.genes.results 001.genes ln -s ../../run124/003/RSEM.genes.results 003.genes ln -s ../../run124/004/RSEM.genes.results 004.genes ln -s ../../run124/009/RSEM.genes.results 009.genes export TRINITY_HOME=/data/results/tools/align/trinity/trinityrnaseq_r20140413p1 export PATH=/data/results/tools/align/bowtie2-2.2.3:/data/results/tools/align/tophat-2.0.12.Linux_x86_64:/data/results/tools/align/rsem/rsem-1.2.19:/home/reczko/ocamlbrew/ocaml-4.01.0/.opam/system/bin:/home/reczko/ocamlbrew/ocaml-4.01.0/.opam/system/bin:/home/reczko/ocamlbrew/ocaml-4.01.0/bin:/data/results/tools/chipseq/macs/bin:/data/results/tools/bedtools/BEDTools-Version-2.14.3/bin:/data/results/tools/chipseq/ngsplot/bin:/data/results/tools/r/R-3.1.2/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games; export PATH $TRINITY_HOME/util/abundance_estimates_to_matrix.pl --est_method RSEM --out_prefix genes 001.genes 003.genes 004.genes 008.genes 009.genes 011.genes 012.genes 013.genes 016.genes $TRINITY_HOME/Analysis/DifferentialExpression/run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes.counts.matrix --method edgeR --samples_file samples_described.txt --output genes.edgeR # annotate with /data/results/tools/denovo/annotate-trinity-DE1.awk , loads /data/results/reference/olfly/tr2/fa/trimmed/tri1/trinity_out_dir/blast/trinotate_annotation_report_tr2_v2.csv /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes.counts.matrix --method edgeR --samples_file samples_described.txt --output genes.edgeR # for all with pval<=0.01 /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis_pval_0.01.pl --matrix /data/images/proton/run125/www/genes.counts.matrix --method edgeR --samples_file samples_described.txt --output genes.edgeR /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis2_pval_0.01.pl --matrix /data/images/proton/run125/www/genes.counts.matrix2 --method edgeR --samples_file samples_described.txt --output genes.edgeR /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis2_pval_0.05.pl --matrix /data/images/proton/run125/www/genes.counts.matrix --method edgeR --samples_file samples_described.txt --output genes.edgeR.pval0.05 http://trinityrnaseq.github.io/analysis/diff_expression_analysis.html#with_bio_replicates Be sure to have a samples_described.txt file that describes the relationship between samples and replicates. For example: conditionA condA-rep1 conditionA condA-rep2 conditionB condB-rep1 conditionB condB-rep2 conditionC condC-rep1 conditionC condC-rep2 where condA-rep1, condA-rep2, condB-rep1, etc…, are all column names in the counts.matrix generated earlier (see top of page). Your sample names that group the replicates are user-defined here. Using edgeR with replicates: $TRINITY_HOME/Analysis/DifferentialExpression/run_DE_analysis.pl --matrix SP2.rnaseq.counts.matrix --method edgeR --samples_file samples_described.txt reczko@n1:/data/images/proton/run125/www$ /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl "my" variable $cmd masks earlier declaration in same scope at /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl line 374. ################################################################################################# # # Required: # # --matrix matrix of raw read counts (not normalized!) # # --method edgeR|DESeq (DESeq only supported here w/ bio replicates) # # # Optional: # # --samples_file tab-delimited text file indicating biological replicate relationships. # ex. # cond_A cond_A_rep1 # cond_A cond_A_rep2 # cond_B cond_B_rep1 # cond_B cond_B_rep2 # # # General options: # # --min_rowSum_counts default: 2 (only those rows of matrix meeting requirement will be tested) # # --output|o name of directory to place outputs (default: $method.$pid.dir) # # --reference_sample name of a sample to which all other samples should be compared. # (default is doing all pairwise-comparisons among samples) # ############################################################################################### # # ## EdgeR-related parameters # ## (no biological replicates) # # --dispersion edgeR dispersion value (default: 0.1) set to 0 for poisson (sometimes breaks...) # # http://www.bioconductor.org/packages/release/bioc/html/edgeR.html # ############################################################################################### # # ## DE-Seq related parameters # # --DESEQ_method "pooled", "pooled-CR", "per-condition", "blind" # --DESEQ_sharingMode "maximum", "fit-only", "gene-est-only" # --DESEQ_fitType fitType = c("parametric", "local") # # ## (no biological replicates) # note: FIXED as: method=blind, sharingMode=fit-only # # http://www.bioconductor.org/packages/release/bioc/html/DESeq.html # ################################################################################################ /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/sample_data/test_full_edgeR_pipeline/ @@ /data/results/tools/denovo/trinity/Trinotate_r20131110/sample_data/runMe.sh @@ /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix Trinity5to10_genes.counts.matrix --method edgeR --output Trinity5to10-pairwiseDE-edgeR prepare as in /data/results/tools/denovo/trinity/Trinotate_r20131110/sample_data/samples_n_reads_described.txt.gz Sp_hs hs_rep1 1M_READS_sample/Sp.hs.1M.left.fq 1M_READS_sample/Sp.hs.1M.right.fq Sp_log log_rep1 1M_READS_sample/Sp.log.1M.left.fq 1M_READS_sample/Sp.log.1M.right.fq Sp_ds ds_rep1 1M_READS_sample/Sp.ds.1M.left.fq 1M_READS_sample/Sp.ds.1M.right.fq Sp_plat plat_rep1 1M_READS_sample/Sp.plat.1M.left.fq 1M_READS_sample/Sp.plat.1M.right.fq ## Trinity.pl configuration settings: --CPU=6 --JM=10G --seqType=fq --SS_lib_type=RF ## Load Expression info and DE analysis results for Trinotate-web # import the expression data (counts, fpkms, and samples) echo "###################################" echo Loading Component Expression Matrix echo "###################################" #$TRINOTATE_HOME/util/transcript_expression/import_samples_n_expression_matrix.pl --sqlite Trinotate.sqlite --component_mode --samples_file Trinity5-6-7-samples.txt --count_matrix Trinity567_genes.counts.matrix --fpkm_matrix Trinity567_genes.TMM.fpkm.matrix --bulk_load $TRINOTATE_HOME/util/transcript_expression/import_samples_n_expression_matrix.pl --sqlite Trinotate.sqlite --component_mode --samples_file Trinity5-6-7-samples.txt --count_matrix Trinity567_genes.counts.matrix --fpkm_matrix Trinity567_genes.TMM.fpkm.matrix --bulk_load # import the fpkm and DE analysis stuff $TRINOTATE_HOME/util/transcript_expression/import_expression_and_DE_results.pl --sqlite Trinotate.sqlite --component_mode --samples_file Trinity5-6-7-samples.txt --count_matrix Trinity567_genes.counts.matrix --fpkm_matrix Trinity567_genes.TMM.fpkm.matrix --DE_dir Trinity5-6-7-pairwiseDE-edgeR echo "###################################" echo Loading Transcript Expression Matrix echo "###################################" $TRINOTATE_HOME/util/transcript_expression/import_samples_n_expression_matrix.pl --sqlite Trinotate.sqlite --transcript_mode --samples_file samples_n_reads_described.txt --count_matrix Trinity_trans.counts.matrix --fpkm_matrix Trinity_trans.counts.matrix.TMM_normalized.FPKM --bulk_load # import the DE analysis results: echo "###########################" echo Loading DE results for transcripts echo "###########################" $TRINOTATE_HOME/util/transcript_expression/import_DE_results.pl --sqlite Trinotate.sqlite --DE_dir edgeR_trans --transcript_mode --bulk_load echo "###########################" echo Loading DE results for components echo "###########################" $TRINOTATE_HOME/util/transcript_expression/import_DE_results.pl --sqlite Trinotate.sqlite --DE_dir edgeR_components --component_mode --bulk_load echo "######################################################" echo Loading transcription profile clusters for transcripts echo "######################################################" # import the transcription profile cluster stuff $TRINOTATE_HOME/util/transcript_expression/import_transcript_clusters.pl --group_name DE_all_vs_all --analysis_name edgeR_trans/diffExpr.P0.001_C2.matrix.R.all.RData.clusters_fixed_P_20 --sqlite Trinotate.sqlite edgeR_trans/diffExpr.P0.001_C2.matrix.R.all.RData.clusters_fixed_P_20/*matrix echo "###########################" echo Generating report table echo "###########################" $TRINOTATE_HOME/Trinotate Trinotate.sqlite report > Trinotate_report.xls # Load annotations $TRINOTATE_HOME/util/annotation_importer/import_transcript_names.pl Trinotate.sqlite Trinotate_report.xls @@ 1850 /data/results/tools/align/trinity/trinityrnaseq_r20140413p1/Analysis/DifferentialExpression/my_run_DE_analysis.pl --matrix /data/images/proton/run125/www/genes.counts.matrix --method edgeR --samples_file samples_described.txt --output genes.edgeR 1851 cd genes.edgeR-pval0.01 1852 ls *csv 1853 cd ../../ 1854 cd ../KMlab/ 1855 ls -l 1856 cd olfly/ 1857 ls -l 1858 ls tr2/ 1859 ls -l tr2/ 1860 ln -s /data/images/proton/run125/www/genes.edgeR-pval0.01 run125 1861 cd run125/ 1862 ls 1863 ln -s /data/images/proton/run125/www/samples_described.txt 1864 cat samples_described.txt 1865 ls 1866 ln -s /data/images/proton/run125/www/Auto_user_ION-125-KMathiopoulos_RNAseq_150121_KMR27-28-31-32-33_189.pdf 1867 ln -s /data/images/proton/run124/www/Auto_user_ION-124-KMathiopoulos_RNAseq_150120_KMR25-26-29-30_188.pdf