SHORE API
|
On disk DNA suffix array and hash. More...
Classes | |
class | incremental_hash |
Update the hash value of a sequence incrementally. More... | |
class | incremental_query |
Incrementally elongate a query while keeping track of the suffix array range. More... | |
class | incremental_reverse_query |
Incrementally elongate a query while keeping track of the suffix array range (queries the reverse index). More... | |
class | iterator |
Iterator that decodes raw suffix array coordinates into chromosome-position pairs. More... | |
Public Types | |
enum | SeqEnc { ENC_DNA =0, ENC_CHAR =1, ENC_UNKNOWN } |
Enum of possible encodings of the DNA sequence. | |
typedef intpack::const_iterator | raw_iterator |
typedef std::pair < raw_iterator, raw_iterator > | rawrange |
typedef std::pair< iterator, iterator > | range |
Public Member Functions | |
suffix_index (const std::string &indexfile, const bool map_lcp=false, const bool map_rev=false) | |
Constructor: mmap the index file. | |
void | decode_position (shore::refseq_coor &res, const size_t raw, const int reverse_offset) |
Decode a raw suffix array coordinate to a chromosome-position pair. More... | |
raw_iterator | array_begin () const |
Direct access to the start of the suffix array data. | |
raw_iterator | array_end () const |
Direct access to the end of the suffix array data. | |
raw_iterator | lcp_begin () const |
Direct access to the start of the LCP array data. | |
raw_iterator | lcp_end () const |
Direct access to the end of the LCP array data. | |
packeddna_iterator | sequence_begin () const |
Direct access to the start of the sequence data. | |
packeddna_iterator | sequence_end () const |
Direct access to the end of the sequence data. | |
iterator | coor_begin () const |
iterator | coor_end () const |
template<typename Iter > | |
rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy=0) const |
Find all perfect matches for the given sequence (version for nuc::packed_base iterators). | |
template<typename Iter > | |
rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::base > >::type *dummy=0) const |
Find all perfect matches for the given sequence (version for nuc::base iterators). | |
template<typename Iter > | |
rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, char > >::type *dummy=0) const |
Find all perfect matches for the given sequence (version for char iterators). | |
template<typename Iter > | |
range | match (Iter f, Iter t) const |
Find all perfect matches for the given sequence. | |
range | match (const std::string &str) const |
Find all perfect matches for the given sequence. | |
template<typename Iter > | |
std::pair< rawrange, Iter > | match5_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy=0) const |
Find the longest perfect match for the 5'end of the given sequence. | |
template<typename Iter > | |
std::pair< rawrange, Iter > | match5_reverse_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy=0) const |
Find the longest perfect match for the 5'end of the given sequence. | |
template<typename Iter > | |
std::pair< range, Iter > | match5 (Iter f, Iter t) const |
Find the longest perfect match for the 5'end of the given sequence. | |
std::pair< range, std::string::const_iterator > | match5 (const std::string &query) const |
Find the longest perfect match for the 5'end of the given sequence. | |
template<typename Iter > | |
std::pair< range, Iter > | match5_reverse (Iter f, Iter t) const |
Find the longest perfect match for the 5'end of the given sequence. | |
std::pair< range, std::string::const_iterator > | match5_reverse (const std::string &query) const |
Find the longest perfect match for the 5'end of the given sequence. | |
template<typename Iter > | |
size_t | count (Iter f, Iter t) const |
Count the number of occurences of a kmer. | |
size_t | count (const std::string &str) const |
Count the number of occurences of a kmer. | |
template<typename Iter > | |
double | calc_freq (Iter f, Iter t) const |
Calculate the frequency of a kmer. | |
double | calc_freq (const std::string &str) const |
Calculate the frequency of a kmer. | |
const idxinfo | get_header () const |
Get the index header. | |
template<typename Iter > | |
suffix_index::rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy) const |
template<typename Iter > | |
suffix_index::rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::base > >::type *dummy) const |
template<typename Iter > | |
suffix_index::rawrange | match_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, char > >::type *dummy) const |
template<typename Iter > | |
suffix_index::range | match (Iter f, Iter t) const |
template<typename Iter > | |
std::pair < suffix_index::rawrange, Iter > | match5_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy) const |
template<typename Iter > | |
std::pair< suffix_index::range, Iter > | match5 (Iter f, Iter t) const |
template<typename Iter > | |
std::pair < suffix_index::rawrange, Iter > | match5_reverse_raw (Iter f, Iter t, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy) const |
template<typename Iter > | |
std::pair< suffix_index::range, Iter > | match5_reverse (Iter f, Iter t) const |
Static Public Member Functions | |
static void | build (const std::string &fasta_fn, const std::string &out_fn, std::ostream *const log=0, const int kmersize=10, const bool with_lcp=false, const bool with_rev=false) |
Create index file from fasta input. | |
static void | build (const std::vector< std::string > &fasta_fn, const std::string &out_fn, std::ostream *const log=0, const int kmersize=10, const bool with_lcp=false, const bool with_rev=false) |
Create index file from fasta input. | |
static void | build (const std::vector< std::string > &fasta_fn, std::ostream &out, std::ostream *const log=0, const int kmersize=10, const bool with_lcp=false, const bool with_rev=false) |
Create index file from fasta input. | |
template<typename Iter > | |
static size_t | lhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy=0) |
Calculate the hash index for a DNA sequence. More... | |
template<typename Iter > | |
static size_t | rhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::packed_base > >::type *dummy=0) |
Calculate the hash index for a DNA sequence. More... | |
template<typename Iter > | |
static size_t | lhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::base > >::type *dummy=0) |
template<typename Iter > | |
static size_t | lhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, char > >::type *dummy=0) |
template<typename Iter > | |
static size_t | rhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, nuc::base > >::type *dummy=0) |
template<typename Iter > | |
static size_t | rhash (Iter f, Iter t, const size_t n, typename boost::enable_if< boost::is_same< typename std::iterator_traits< Iter >::value_type, char > >::type *dummy=0) |
static const std::string | unhash (const size_t hash, const size_t len) |
Reconstructs the DNA sequence from a hash (inverse of lhash()). | |
On disk DNA suffix array and hash.
void shore::suffix_index::decode_position | ( | shore::refseq_coor & | res, |
const size_t | raw, | ||
const int | reverse_offset | ||
) |
Decode a raw suffix array coordinate to a chromosome-position pair.
reverse_offset | Length of the match for reverse index matches, zero for forward index matches. |
|
inlinestatic |
Calculate the hash index for a DNA sequence.
f | Start of the DNA. |
t | End of the DNA. |
n | Length of the sequence to be hashed, may not be equal to t-f. |
The hash returned will point to the first suffix array entry that matches the DNA.
|
inlinestatic |
Calculate the hash index for a DNA sequence.
f | Start of the DNA. |
t | End of the DNA. |
n | Length of the sequence to be hashed, may not be equal to t-f. |
The hash returned will point to the end of the suffix array entries that match the DNA.