for i in DKR18*bam do echo $i /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -M -s 1 -J -f -t exon -T 4 -a /data/results/reference/mmu/Mus_musculus.GRCm38.92.exons.gtf -o $i.cnt $i done /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -g gene_name -t exon -T 4 -a /data/results/reference/hsa/gencode/gencode.v26lift37.annotation_exons.gtf -o /data/images/proton2/run341/rpkm/exonGencode.cnt PHR23r-FUBP2.bam PHR22r-TCF4.bam PHR21r-bcatMono.bam PHR20r-bcatPoly.bam PHR24r-IgG.bam PHR19r-Input.bam /data/images/proton/run15/www/con_b.bam /data/images/proton/run15/www/con_a.bam cd /data/images/proton2/run341/rpkm/all /data/images/proton2/run341/rpkm/getRPKM_with_control.awk awk -f ../getRPKM_with_control.awk ../exonGencode.cnt > getRPKM_with_control.log #edit header /data/images/proton2/run341/rpkm/all/process-RPKM2.r #@DKlab #run392 only /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -g gene_name -t exon -T 4 -a /data/results/reference/mmu/Mus_musculus.GRCm38.92.gtf -o exonGencode.cnt /data/images/proton/DKlab/run392/DKR20-HURRIPC57-2M.bam /data/images/proton/DKlab/run392/DKR4b-WT1M.bam /data/images/proton/DKlab/run392/DKR4a-WT1M.bam /data/images/proton/DKlab/run392/DKR4c-WT1M.bam || Total reads : 5940985 || || Successfully assigned reads : 2023947 (34.1%) || #all Liver samples /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 1 -J -f -g gene_name -t exon -T 4 -a /data/results/reference/mmu/Mus_musculus.GRCm38.92.gtf -o exonGencode.cnt /data/images/proton/DKlab/run392/DKR4b-WT1M.bam DKR5_InputWt-2M.bam DKR6_InputWt-3F.bam DKR7_InputTg6105-1M.bam DKR8_InputTg6105-2M.bam DKR9-InputTg6105-3F.bam DKR13r_M2RiPWt-1M.bam DKR14r_M2RiPWt-2M.bam DKR15r_M2RiPWt-3F.bam DKR16_M2RiPTg6105-1M.bam DKR17r_M2RiPTg6105-2M.bam DKR18_M2RiPTg6105-3F.bam DKR19-HuRRiPC57-1M.bam DKR21-HuRRiPC57-3F.bam /data/images/proton/DKlab/run392/DKR20-HURRIPC57-2M.bam DKR22-IgGRiPC57-1M.bam DKR23r-IgGRiPC57-2M.bam DKR24r-IgGRiPC57-3F.bam DKR10-InputC57-1M.bam DKR11-InputC57-2M.bam DKR12-InputC57-3F.bam /data/results/tools/rnaseq/subread/subread-1.5.2-source/bin/featureCounts -O --fraction -M -s 0 -J -f -g gene_name -t exon -T 4 -a /data/results/reference/mmu/Mus_musculus.GRCm38.92.gtf -o exonGencodeU.cnt /data/images/proton/DKlab/run392/DKR4b-WT1M.bam DKR5_InputWt-2M.bam DKR6_InputWt-3F.bam DKR7_InputTg6105-1M.bam DKR8_InputTg6105-2M.bam DKR9-InputTg6105-3F.bam DKR13r_M2RiPWt-1M.bam DKR14r_M2RiPWt-2M.bam DKR15r_M2RiPWt-3F.bam DKR16_M2RiPTg6105-1M.bam DKR17r_M2RiPTg6105-2M.bam DKR18_M2RiPTg6105-3F.bam DKR19-HuRRiPC57-1M.bam DKR21-HuRRiPC57-3F.bam /data/images/proton/DKlab/run392/DKR20-HURRIPC57-2M.bam DKR22-IgGRiPC57-1M.bam DKR23r-IgGRiPC57-2M.bam DKR24r-IgGRiPC57-3F.bam DKR10-InputC57-1M.bam DKR11-InputC57-2M.bam DKR12-InputC57-3F.bam log into featurecount-log.txt #reczko@fix:/data/images/proton/DKlab/run392/run402$ awk -f /data/images/proton/DKlab/run392/run402/getRPKMs.awk exonGencode.cnt > exonGencode.rpkm awk -f /data/images/proton/DKlab/run392/run402/getRPKMs_groups.awk exonGencodeU.cnt > exonGencode.groups.rpkm r=read.table("/data/images/proton/DKlab/run392/run402/exonGencode.groups.rpkm",header=T) str(r) > 'data.frame': 801623 obs. of 21 variables: $ Geneid : Factor w/ 53672 levels "0610005C13Rik",..: 1709 22130 52662 52662 52662 52662 52662 52662 52662 17021 ... $ Chr : Factor w/ 45 levels "1","10","11",..: 1 1 1 1 1 1 1 1 1 1 ... $ Start : int 3073253 3102016 3213609 3205901 3213439 3206523 3670552 3421702 3214482 3252757 ... $ End : int 3074322 3102125 3216344 3207317 3215632 3207317 3671498 3421901 3216968 3253236 ... $ Strand : Factor w/ 2 levels "+","-": 1 1 2 2 2 2 2 2 2 1 ... $ Length : int 1070 110 2736 1417 2194 795 947 200 2487 480 ... $ DKR13r_M2RiPWt_1M : num 0 0 0 0 0 ... $ DKR14r_M2RiPWt_2M : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR15r_M2RiPWt_3F : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR16_M2RiPTg6105_1M : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR17r_M2RiPTg6105_2M: num 0 0 0 0 0 0 0 0 0 0 ... $ DKR18_M2RiPTg6105_3F : num 0 0 0 0 0 ... $ DKR19_HuRRiPC57_1M : num 0 0 0.00687 0 0.00856 ... $ DKR21_HuRRiPC57_3F : num 0 0 0 0 0 ... $ DKR20_HURRIPC57_2M : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR22_IgGRiPC57_1M : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR23r_IgGRiPC57_2M : num 0 0 0 0 0 0 0 0 0 0 ... $ DKR24r_IgGRiPC57_3F : num 0 0 0 0 0 ... $ iWT : num 0 0 0 0 0 0 0 0 0 0 ... $ iTg6105 : num 0 0 0 0 0 0 0 0 0 0 ... $ iC57 : num 0 0 0 0 0 ... Version 1.5.2 Usage: featureCounts [options] -a -o input_file1 [input_file2] ... ## Mandatory arguments: -a Name of an annotation file. GTF/GFF format by default. See -F option for more format information. Inbuilt annotations (SAF format) is available in 'annotation' directory of the package. -o Name of the output file including read counts. A separate file including summary statistics of counting results is also included in the output ('.summary') input_file1 [input_file2] ... A list of SAM or BAM format files. They can be either name or location sorted. If not files provided, input is expected. ## Optional arguments: # Annotation -F Specify format of the provided annotation file. Acceptable formats include 'GTF' (or compatible GFF format) and 'SAF'. 'GTF' by default. For SAF format, please refer to Users Guide. -t Specify feature type in GTF annotation. 'exon' by default. Features used for read counting will be extracted from annotation using the provided value. -g Specify attribute type in GTF annotation. 'gene_id' by default. Meta-features used for read counting will be extracted from annotation using the provided value. -A Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two- column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file. # Level of summarization -f Perform read counting at feature level (eg. counting reads for exons rather than genes). # Overlap between reads and features -O Assign reads to all their overlapping meta-features (or features if -f is specified). --minOverlap Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. Number of overlapping bases is counted from both reads if paired end. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to. --fracOverlap Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be within range [0,1]. 0 by default. Number of overlapping bases is counted from both reads if paired end. Both this option and '--minOverlap' option need to be satisfied for read assignment. --largestOverlap Assign reads to a meta-feature/feature that has the largest number of overlapping bases. --readExtension5 Reads are extended upstream by bases from their 5' end. --readExtension3 Reads are extended upstream by bases from their 3' end. --read2pos <5:3> Reduce reads to their 5' most base or 3' most base. Read counting is then performed based on the single base the read is reduced to. # Multi-mapping reads -M Multi-mapping reads will also be counted. For a multi- mapping read, all its reported alignments will be counted. The 'NH' tag in BAM/SAM input is used to detect multi-mapping reads. # Fractional counting --fraction Assign fractional counts to features. This option must be used together with '-M' or '-O' or both. When '-M' is specified, each reported alignment from a multi-mapping read (identified via 'NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where x is the total number of alignments reported for the same read. When '-O' is specified, each overlapping feature will receive a fractional count of 1/y, where y is the total number of features overlapping with the read. When both '-M' and '-O' are specified, each alignment will carry a fractional count of 1/(x*y). # Read filtering -Q The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default. --splitOnly Count split alignments only (ie. alignments with CIGAR string containing 'N'). An example of split alignments is exon-spanning reads in RNA-seq data. --nonSplitOnly If specified, only non-split alignments (CIGAR strings do not contain letter 'N') will be counted. All the other alignments will be ignored. --primary Count primary alignments only. Primary alignments are identified using bit 0x100 in SAM/BAM FLAG field. --ignoreDup Ignore duplicate reads in read counting. Duplicate reads are identified using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one of the reads is a duplicate read for paired end data. # Strandness -s Perform strand-specific read counting. Acceptable values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default. # Exon-exon junctions -J Count number of reads supporting each exon-exon junction. Junctions were identified from those exon-spanning reads in the input (containing 'N' in CIGAR string). Counting results are saved to a file named '.jcounts' -G Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files. This optional argument can be used with '-J' option to improve read counting for junctions. # Parameters specific to paired end reads -p If specified, fragments (or templates) will be counted instead of reads. This option is only applicable for paired-end reads. -B Only count read pairs that have both ends aligned. -P Check validity of paired-end distance when counting read pairs. Use -d and -D to set thresholds. -d Minimum fragment/template length, 50 by default. -D Maximum fragment/template length, 600 by default. -C Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands. --donotsort Do not sort reads in BAM/SAM input. Note that reads from the same pair are required to be located next to each other in the input. # Number of CPU threads -T Number of the threads. 1 by default. # Miscellaneous -R Output detailed assignment result for each read. A text file will be generated for each input file, including names of reads and meta-features/features reads were assigned to. See Users Guide for more details. --tmpDir Directory under which intermediate files are saved (later removed). By default, intermediate files will be saved to the directory specified in '-o' argument. --maxMOp Maximum number of 'M' operations allowed in a CIGAR string. 10 by default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are merged in the CIGAR string. -v Output version of the program.