#BANDWIDTH=3 #integer# = Size of bandwidth for KDE calculation (default 3) #CONVERSION=#character from#>#character to# #character from# = Character representing the modified ribonucleotide (default 'T') #character to# = Character representing what the modified ribonucleotide is read as by rTranscriptase (default 'C') #note: only 1 conversion is possible at this time; in the future we may implement the ability to have 2 modified ribonucleotides #MINIMUM_READ_COUNT_PER_GROUP=#integer# #MINIMUM_READ_COUNT_PER_GROUP=5 #integer# = Minimum number of reads required to call a group (default 10) #MINIMUM_READ_COUNT_PER_CLUSTER=#integer# #integer# = Minimum number of reads required to call a cluster (default 1) MINIMUM_READ_COUNT_FOR_KDE=5 #integer# = Minimum read depth at a location to make a KDE estimate (default 1) => (recommended: 5) #MINIMUM_CLUSTER_SIZE=#integer# #integer# = Minimum length required for a cluster to be reported (default 1) MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=2 #integer# = Minimum number of separate locations to have a reported conversion for a cluster to be reported (default 1) => (recommended: 2) #note: setting this to 0 will cause errors, if you are looking for sites that may have no conversions, I recommended analyzing the 'groups' output file (see below) #MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=#integer# #integer# = Minimum number of conversion events within a region to report a cluster (default 1) #note: setting this to 0 will cause errors, if you are looking for sites that may have no conversions, I recommended analyzing the 'groups' output file (see below) #MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=#integer# #integer# = Minimum read depth for a location to be included within a cluster (default 1) #MINIMUM_READ_LENGTH=#integer# #integer# = Minimum length of mapped read to be included in the analysis (default 1) #MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=#integer# #integer# = Maximum number of non-conversion mismatches of a mapped read to be included in the analysis (default 5) #SAM_FILE=/mnt/max/b/genomics_facility/DKlab/parclip/HuR_CLIPs_Sweden/mRNAnew/genome/0hrep1/0hrep1.bam #BOWTIE_FILE=#filepath/filename# #filepath/filename# = Location and name of a bowtie output file to be analyzed #note: there can be multiple BOWTIE alignment files used as input, just create a new line with a new 'BOWTIE_FILE=' parameter GENOME_2BIT_FILE=/data/results/reference/mmu/mm9/shrimp/shrimp-15mMm/mm9-stranded-mRNA.2bit #filepath/filename# = Location of the UCSC .2bit file of the genome against which the reads were aligned #*****1 of the following***** EXTEND_BY_READ #HAFNER_APPROACH #ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=#integer# #**************************** #EXTEND_BY_READ #Including this line means that the cluster will be extended beyond the signal to include a region such that it extends to the end of any read that falls within the cluster and cont#ained a conversion, or until the minimum read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met #HAFNER_APPROACH #Identifies the location with the largest number of conversion events and extends the cluster up to ( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt in each direction from that p#oint, or until the minimum read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met #ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=#integer# #integer# = The maximum number of reads to extend beyond the positive signal in each direction (default 0) #the cluster is defined as the region where the conversion KDE is above the background KDE and then extended up to #integer#, or until the minimum read depth (MINIMUM_READ_COUNT_FOR#_CLUSTER_INCLUSION parameter) is no longer met OUTPUT_CLUSTERS_FILE=sh-clusters-IGG.txt2 #filepath/filename# = Location and name of the resulting clusters file *****Other Options: #FILTER_FILE=#filepath/filename#=#flag# #filepath/filename# = Location of the UCSC .bed file (http://genome.ucsc.edu/FAQ/FAQformat.html#format1) containing genomic coordinates for regions you would like to filter #flag# = Text that will be added to clusters / groups if they overlap one of these regions #note: multiple filter files may be used; just add additional lines; if multiple filter files are used and a cluster overlaps multiple regions, only one will be reported OUTPUT_GROUPS_FILE=sh-groups-IGG.txt2 #filepath/filename# = Location and name of the resulting groups file; contains the information of the groups prior to cluster generation OUTPUT_DISTRIBUTIONS_FILE=sh-distributions-IGG.txt2 #filepath/filename# = Location and name of the resulting distributions file; contains the signal KDE, background KDE, read count & conversion % for all locations within each group OUTPUT_READS_FILE=sh-IGG_PARalyzer_Utilized.sam BOWTIE_FILE=/data/images/proton/DKlab/mr/parclip/shrimp/IGG-15mMm.bam.md.bam.bt #filepath/filename# = Location and name of a bowtie output file to be analyzed #note: adding the '=COLLAPSED' flag means that the FASTA file aligned to the genome(s) was first collapsed by the 'fastx_collapser' program from the FASTX toolkit #SPECIAL_CHROMOSOME=#chromosome#=#filepath/filename# #note: this is to be used if not all of the chromosomes that you aligned to are in the Genome.2bit file #chromosome# = chromosome name (i.e. chrX) #filepath/filename# = Location of the ucsc .2bit file that contains this particular chromosome #FIND_MIRNA_SEEDMATCHES=#filepath/filename# #filepath/filename# = Location of the file that contains mature miRNA name and sequences #MAXIMUM_SEED_MATCH_LENGTH=#integer# #integer# = maximum length of seed match; must be greater than or equal to 6 (not recommended to go above 12) #note: this will search all clusters for sites that match all seeds of #integer#-1m/A through 6mer #note: this still needs more work to include all different seed-match types #OUTPUT_MIRNA_TARGETS_FILE=#filepath/filename# #filepath/filename# = Location and filename of a file displaying all miRNA-cluster targets