## ################################################### # # This cofiguration file contains all settings for a run # of SpliceMap. # # lines begining with '#' are comments # lists begin with '> tag' and end with '<' on separate lines # ################################################### ## ######################### ## Required Settings ## ## # Directory of the chromosome files in FASTA format # Each chromosome should be in a separate file (can be concatenated) # ie. chr1.fa, chr2.fa, ... # (single value) genome_dir = ~/genome/hg18/ ## # These are the two lists of sequencer reads files. # "reads_list2" can be commeted out if reads are not paired-end. # Make sure the order of both lists are the same! # Also, "reads_list1" must be the first pair. # Note: pair-reads should be in the "forward-reverse" format. # (multiple values) > reads_list1 fastq_100k_1_1.txt fastq_100k_3_1.txt < > reads_list2 fastq_100k_1_2.txt fastq_100k_3_2.txt < ## # Format of the sequencer reads, also make sure reads are # not split over multiple lines. # Choices are: FASTA, FASTQ, RAW # (single value) read_format = FASTQ ## # Format of the quality string if FASTQ is used # Choices are: # phred-33 -- Phred base 33 (same as Sanger format) [default] # phred-64 -- Phred base 64 (same as Illumina 1.3+) # solexa -- Format used by solexa machines # (single value) quality_format = phred-33 ## # The short reads mapper used # choices are "bowtie", "eland" or "seqmap" # Bowtie is recommended # this can be commented out if the mapping has already been done and the # appropriate ".t" files exist in the temp directory [advanced]. # (single value) mapper = bowtie ######################### ## Optional Settings ## ## # these are annotations used to find novel junctions, in ref or bed format. # (optional) # (single value) #annotations = all.gene.refFlat.txt ## # Directory name of the directory that stores temporary files # (optional) Default = temp # (single value) temp_path = temp ## # Directory name of the directory that stores the output files # (optional) Default = output # (single value) out_path = output ## # Maximum intron size, this is absolute 99th-percentile maximum. # Introns beyond this size will be ignored. # (optional) If you don't set this, we will assume a mamalian genome (400,000) # (single value) max_intron = 400000 ## # 25-th intron size, this is the lower 25th-percentile intron size # This is not the smallest size that SpliceMap will search. That is about ~25bp. # (optional) If you don't set this, we will assume a mammalian genome (20,000) # (single value) min_intron = 20000 ## # Maximum number of multi-hits # If a 25-mer seed has more than this many multi-hits, it will be discarded. # (optional) Default is 10 # (single value) max_multi_hit = 10 ## # Full read length # SpliceMap will only use the first "full_read_length" bp for mapping. # If the read is shorter than "full_read_length", the full read will be used before head clip. # If you comment this out SpliceMap will use as many as possible. # This is for the case where the reads might have N's at the end. # It is always desireable to cut off the N's # (optional) # (single value) # full_read_length = 70 ## # Number of bases to clip off the head of the read # This clipping is applied after "full_read_length" # (optional) # (single value) # head_clip_length = 0 ## # Maximum number of mismatches allowed in seeding for junction search # Choices are 0,1(default) or 2 # (optional) # (single value) seed_mismatch = 1 ## # Maximum number of mismatches allowed in entire read # No limit on value, however SpliceMap can only identify reads with # a maximum of 2 mismatches per 25bp. # Default is 2. # (optional) # (single value) read_mismatch = 2 ## # Maximum number of bases allowed to be soft clipped from the ends of reads during # alignment. This is required as mismatches near junctions could cause parts of a # a read to not map. # Default is 40. # (optional) # (single value) #max_clip_allowed = 40 ## # Generate a SAM file # choices are # "cuff" -- Generate Cufflinks compatible SAM file # "sam" -- Generate regular SAM file # # If this is commented out, no SAM file will be generated # (optional) # (single value) sam_file = cuff ## # Name of the chromosome file with wildcards # If none is given the default of "chr*.fa" will be used [this is compatible with the UCSC genome files] # If you genome file is concatentated, just type the file name # (optional) # (single value) chromosome_wildcard = chr*.fa ## # Number of chromosomes to process at once, to take advantage of multi-core systems. # This is not threading, so it will take extra memory. However, running 2 at a time should be fine. # (optional) Default = 1 # (single value) num_chromosome_together = 2 ## ################################################### # # Bowtie specific options # these have no meaning if another mapper is used # ################################################### ## ######################### ## Required Settings ## ## # Base of bowtie index, this should be the same genome as the # chromosome files # eg. if you bowtie files are "genome/hg18/genome.1.ewbt", É # then your base dir is "genome/hg18/genome" # (single value) bowtie_base_dir = ~/genome/hg18/genome ######################### ## Optional Settings ## ## # Number of threads to use for mapping (SpliceMap may use this option in future) # Default value is 2 # (optional) # (single value) num_threads = 2 ## # Try hard? # choices are "yes" or "no" # Default value is yes. (it is not much slower, about 15%) # I'm unsure if this is required, feel free to try with # this option off and let me know your results. # (optional) # (single value) try_hard = yes