SHORE API
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
suffix_index.hpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright 2008,2009,2010,2011,2012 Stephan Ossowski, Korbinian Schneeberger,
4  * Felix Ott, Joerg Hagmann, Alf Scotland, Sebastian Bender
5  *
6  * This file is part of SHORE.
7  *
8  * SHORE is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * SHORE is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with SHORE. If not, see <http://www.gnu.org/licenses/>.
20  */
21 
25 
26 #ifndef SHORE_ALGO_SUFFIX_INDEX_HPP__
27 #define SHORE_ALGO_SUFFIX_INDEX_HPP__
28 
29 #include <iostream>
30 #include <map>
31 #include <string>
32 #include <vector>
33 
34 #include <boost/utility/enable_if.hpp>
35 #include <boost/type_traits.hpp>
36 
38 #include "shore/datatype/coor.hpp"
40 #include "shore/base/memops.hpp"
41 #include "shore/algo/suffix.hpp"
42 
43 namespace shore {
44 
47 {
48  public:
49 
51  enum SeqEnc
52  {
53  ENC_DNA=0,
54  ENC_CHAR=1,
55  ENC_UNKNOWN
56  };
57 
58  class iterator;
59  typedef intpack::const_iterator raw_iterator;
60  typedef std::pair<raw_iterator,raw_iterator> rawrange;
61  typedef std::pair<iterator,iterator> range;
62 
63  private:
64 
65  enum HeaderKeys
66  {
67  HK_FMT=0,
68  HK_SEQTOTAL=1,
69  HK_ENCODING=2,
70  HK_BITSPERBASE=3,
71  HK_SEQBYTES=4,
72  HK_BITSPERINDEX=5,
73  HK_INDEXBYTES=6,
74  HK_KMERSIZE=7,
75  HK_BITSPERHASHENT=8,
76  HK_HASHBYTES=9,
77  HK_BITSPERLCP=10,
78  HK_LCPBYTES=11,
79  HK_NSEQ=12,
80  HK_SEQ=13,
81  HK_REVINDEX=14,
82  HK_ENDMETASECTION=15
83  };
84 
85  static const std::string HK[16];
86  static const std::string SE[3];
87 
88  struct idxinfo
89  {
90  std::string version;
91  size_t seqtotal;
92  SeqEnc enc;
93  int bitsperbase;
94  size_t seqbytes;
96  int bitsperindex;
97  size_t indexbytes;
98  int kmersize;
100  int bitsperhashent;
101  size_t hashbytes;
102 
104  int bitsperlcp;
105  size_t lcpbytes;
106 
107  size_t nseq;
108 
109  bool have_reverse_index;
110 
112  std::map<size_t,size_t> offsets;
113  std::vector<std::string> labels;
114  std::vector<std::string> md5sums;
115 
116  idxinfo();
117  void parse(std::istream& in);
118  void print(std::ostream& out) const;
119  };
120 
121  idxinfo m_idxinfo;
122 
123  shore::mmapping m_mmapping_seq;
124  shore::mmapping m_mmapping_idx;
125  shore::mmapping m_mmapping_hash;
126  shore::mmapping m_mmapping_lcp;
127  shore::mmapping m_mmapping_rev_idx;
128  shore::mmapping m_mmapping_rev_hash;
129  shore::mmapping m_mmapping_rev_lcp;
130 
131 
132  packeddna_iterator m_seqbeg;
133  packeddna_iterator m_seqend;
134  std::reverse_iterator<packeddna_iterator> m_rev_seqbeg;
135  std::reverse_iterator<packeddna_iterator> m_rev_seqend;
136 
137  raw_iterator m_idxbeg;
138  raw_iterator m_idxend;
139 
140  raw_iterator m_hashbeg;
141  raw_iterator m_hashend;
142 
143  raw_iterator m_lcpbeg;
144  raw_iterator m_lcpend;
145 
146  raw_iterator m_rev_idxbeg;
147  raw_iterator m_rev_idxend;
148 
149  raw_iterator m_rev_hashbeg;
150  raw_iterator m_rev_hashend;
151 
152  raw_iterator m_rev_lcpbeg;
153  raw_iterator m_rev_lcpend;
154 
155 
156  static void decode_position(shore::refseq_coor &res,
157  const size_t raw,
158  const std::map<size_t,size_t> &offsets,
159  const uint64_t seqtotal,
160  const int reverse_offset);
161 
162  template<typename SeqIterator>
163  static void build_hash_recursive(shore::intpack &hash,
164  SeqIterator seq_beg,SeqIterator seq_end,
165  const raw_iterator arr_beg,
166  const rawrange r,
167  std::string &seed,const int kmersize)
168  {
169  for(int i=0;i<4;++i)
170  {
171  const shore::nuc::packed_base b=
172  static_cast<shore::nuc::packed_base>(i);
173 
174  seed+=shore::nuc::enc(shore::nuc::unpack(b));
175 
176  const packeddna_iterator f=packeddna_iterator::begin(seed);
177  packeddna_iterator t=packeddna_iterator::end(seed);
178 
179  // trim trailing zeroes, i.e. adenines
180  // (these do not change the hash, but may advance
181  // the suffix array position)
182  while(((t-1)!=f)&&((*(t-1))==0))
183  --t;
184 
185  const rawrange r_new=suffix_query(seq_beg,seq_end,r.first,r.second,f,t);
186 
187  if(seed.size()==size_t(kmersize))
188  {
189  hash[lhash(f,t,kmersize)]=r_new.first-arr_beg;
190  }
191  else
192  {
193  build_hash_recursive(hash,seq_beg,seq_end,arr_beg,r_new,seed,kmersize);
194  }
195  seed.resize(seed.size()-1);
196  }
197  }
198 
199  public:
200 
202  suffix_index(const std::string& indexfile,
203  const bool map_lcp=false,const bool map_rev=false);
204 
209  void decode_position(shore::refseq_coor &res,const size_t raw,const int reverse_offset);
210 
212  raw_iterator array_begin() const;
214  raw_iterator array_end() const;
216  raw_iterator lcp_begin() const;
218  raw_iterator lcp_end() const;
220  packeddna_iterator sequence_begin() const;
222  packeddna_iterator sequence_end() const;
223 
224  iterator coor_begin() const;
225  iterator coor_end() const;
226 
229  template<typename Iter>
230  rawrange match_raw(Iter f,Iter t,
231  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,nuc::packed_base> >::type *dummy=0) const;
234  template<typename Iter>
235  rawrange match_raw(Iter f,Iter t,
236  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,nuc::base> >::type *dummy=0) const;
239  template<typename Iter>
240  rawrange match_raw(Iter f,Iter t,
241  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,char> >::type *dummy=0) const;
242 
244  template<typename Iter>
245  range match(Iter f,Iter t) const;
247  range match(const std::string& str) const;
248 
251  template<typename Iter>
252  std::pair<rawrange,Iter> match5_raw(Iter f,Iter t,
253  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,nuc::packed_base> >::type *dummy=0) const;
254 
257  template<typename Iter>
258  std::pair<rawrange,Iter> match5_reverse_raw(Iter f,Iter t,
259  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,nuc::packed_base> >::type *dummy=0) const;
260 
263  template<typename Iter>
264  std::pair<range,Iter> match5(Iter f,Iter t) const;
265 
268  std::pair<range,std::string::const_iterator> match5(const std::string &query) const;
269 
272  template<typename Iter>
273  std::pair<range,Iter> match5_reverse(Iter f,Iter t) const;
274 
277  std::pair<range,std::string::const_iterator> match5_reverse(const std::string &query) const;
278 
280  template<typename Iter>
281  size_t count(Iter f,Iter t) const;
283  size_t count(const std::string &str) const;
285  template<typename Iter>
286  double calc_freq(Iter f,Iter t) const;
288  double calc_freq(const std::string &str) const;
289 
291  const idxinfo get_header() const;
292 
294  static void build(const std::string& fasta_fn,const std::string& out_fn,
295  std::ostream*const log=0,const int kmersize=10,
296  const bool with_lcp=false,const bool with_rev=false);
297 
299  static void build(const std::vector<std::string>& fasta_fn,
300  const std::string& out_fn,
301  std::ostream*const log=0,const int kmersize=10,
302  const bool with_lcp=false,const bool with_rev=false);
303 
305  static void build(const std::vector<std::string>& fasta_fn,
306  std::ostream& out,
307  std::ostream*const log=0,const int kmersize=10,
308  const bool with_lcp=false,const bool with_rev=false);
309 
317  template<typename Iter>
318  static size_t lhash(Iter f,Iter t,const size_t n,
319  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
320  nuc::packed_base> >::type *dummy=0)
321  {
322  size_t ret=0;
323  size_t j=0;
324  size_t b=0;
325  for(Iter i=f;(i!=t)&&(j<n);++i,++j)
326  {
327  b=*i;
328  if(b>3)
329  {
330  ++ret;
331  break;
332  }
333 
334  ret=(ret<<2);
335  ret|=b;
336  }
337  ret=(ret<<((n-j)<<1));
338 
339  if(b>3)
340  --ret;
341 
342  return ret;
343  }
344 
352  template<typename Iter>
353  static size_t rhash(Iter f,Iter t,const size_t n,
354  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
355  nuc::packed_base> >::type *dummy=0)
356  {
357  size_t ret=0;
358  size_t j=0;
359  for(Iter i=f;(i!=t)&&(j<n);++i,++j)
360  {
361  const size_t b=*i;
362  if(b>3)
363  break;
364 
365  ret=(ret<<2);
366  ret|=b;
367  }
368  ++ret;
369  ret=(ret<<((n-j)<<1));
370  return ret;
371  }
372 
373  template<typename Iter>
374  static size_t lhash(Iter f,Iter t,const size_t n,
375  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
376  nuc::base> >::type *dummy=0)
377  {
378  const base2packed_iterator<Iter> ff(f);
379  const base2packed_iterator<Iter> tt(t);
380  return lhash<typename base2packed_iterator<Iter>::proxy_type>(ff,tt,n);
381  }
382 
383  template<typename Iter>
384  static size_t lhash(Iter f,Iter t,const size_t n,
385  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
386  char> >::type *dummy=0)
387  {
388  const char2packed_iterator<Iter> ff(f);
389  const char2packed_iterator<Iter> tt(t);
390  return lhash<typename char2packed_iterator<Iter>::proxy_type>(ff,tt,n);
391  }
392 
393  template<typename Iter>
394  static size_t rhash(Iter f,Iter t,const size_t n,
395  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
396  nuc::base> >::type *dummy=0)
397  {
398  const base2packed_iterator<Iter> ff(f);
399  const base2packed_iterator<Iter> tt(t);
400  return rhash<typename base2packed_iterator<Iter>::proxy_type>(ff,tt,n);
401  }
402 
403  template<typename Iter>
404  static size_t rhash(Iter f,Iter t,const size_t n,
405  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
406  char> >::type *dummy=0)
407  {
408  const char2packed_iterator<Iter> ff(f);
409  const char2packed_iterator<Iter> tt(t);
410  return rhash<typename char2packed_iterator<Iter>::proxy_type>(ff,tt,n);
411  }
412 
415  static const std::string unhash(const size_t hash,const size_t len);
416 
419  class iterator
420  :public boost::iterator_facade<
421  iterator,const shore::refseq_coor,
422  boost::random_access_traversal_tag,const shore::refseq_coor>
423  {
424  private:
425 
426  friend class boost::iterator_core_access;
427  friend class suffix_index;
428 
429  raw_iterator m_it;
430  const std::map<size_t,size_t>* m_offsets;
431  const uint64_t m_seqtotal;
432  const int m_reverse_offset;
433 
434 
435  iterator(const raw_iterator& it,
436  const std::map<size_t,size_t>*const s,
437  const uint64_t seqtotal,const int reverse_offset);
438 
439  void increment();
440  void decrement();
441  void advance(difference_type n);
442  difference_type distance_to(const iterator& other) const;
443  bool equal(const iterator& other) const;
444 
445  reference dereference() const;
446 
447  public:
448 
449  iterator();
450  };
451 
454  {
455  private:
456 
457  size_t m_lhash;
458  size_t m_rhash;
459 
460  size_t m_hashshift;
461 
462  int m_left_updated;
463  int m_right_updated;
464 
465  public:
466 
467  incremental_hash(const size_t kmerlen);
468  incremental_hash(const incremental_hash &other);
469  incremental_hash &operator=(const incremental_hash &other);
470 
472  void hash(const nuc::packed_base b);
473 
475  template<typename Iter>
476  void hash(Iter f,Iter t,
477  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,nuc::packed_base> >::type *dummy=0)
478  {
479  int a_up=0;
480  m_left_updated=m_right_updated=0;
481  while((m_hashshift>0)&&(f!=t))
482  {
483  if(nuc::amb(nuc::unpack(*f))!=1)
484  {
485  // ambiguous base symbol: fill with thymine
486  m_lhash+=((1<<m_hashshift)-1);
487  m_left_updated+=(a_up+1);
488  m_hashshift=0;
489  }
490  else
491  {
492  m_hashshift-=2;
493 
494  if((*f)!=0)
495  {
496  const size_t bval=*f;
497  m_lhash|=(bval<<m_hashshift);
498  m_left_updated+=(a_up+1);
499  a_up=0;
500  }
501  else
502  ++a_up;
503 
504  m_rhash=m_lhash;
505  m_rhash+=(1<<m_hashshift);
506 
507  ++m_right_updated;
508  }
509  ++f;
510  }
511  }
512 
513  size_t left() const;
514  size_t right() const;
515  int left_updated() const;
516  int right_updated() const;
517  };
518 
522  {
523  private:
524 
525  const suffix_index *m_parent;
526 
527  size_t m_query_offset;
528  rawrange m_range;
529 
530  incremental_hash m_hash;
531 
532  public:
533 
534  incremental_query(const suffix_index &parent);
535  incremental_query(const incremental_query &other);
536 
537  incremental_query &operator=(const incremental_query &other);
538 
540  void match(const nuc::packed_base b);
542  void match(const nuc::base b);
544  void match(const char b);
545 
549  template<typename Iter>
550  void match(Iter f,Iter t,
551  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
552  nuc::packed_base> >::type *dummy=0)
553  {
554  m_hash.hash(f,t);
555 
556  if(m_hash.left_updated()>0)
557  m_range.first=m_parent->m_idxbeg+m_parent->m_hashbeg[m_hash.left()];
558 
559  if(m_hash.right_updated()>0)
560  m_range.second=m_parent->m_idxbeg+m_parent->m_hashbeg[m_hash.right()];
561 
562  const int hashed=std::min(m_hash.left_updated(),m_hash.right_updated());
563 
564  if(hashed>1)
565  {
566  f+=hashed-1;
567  m_query_offset+=hashed-1;
568  }
569 
570  while(f!=t)
571  {
572  m_range=suffix_query_incremental(m_parent->m_seqbeg,
573  m_parent->m_seqend,
574  m_range.first,
575  m_range.second,
576  *f,m_query_offset);
577 
578  ++m_query_offset;
579  ++f;
580  }
581  }
582 
585  template<typename Iter>
586  void match(Iter f,Iter t,
587  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
588  nuc::base> >::type *dummy=0)
589  {
590  const base2packed_iterator<Iter> ff(f);
591  const base2packed_iterator<Iter> tt(t);
592  match<typename base2packed_iterator<Iter>::proxy_type>(ff,tt);
593  }
594 
597  template<typename Iter>
598  void match(Iter f,Iter t,
599  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
600  char> >::type *dummy=0)
601  {
602  const char2packed_iterator<Iter> ff(f);
603  const char2packed_iterator<Iter> tt(t);
604  match<typename char2packed_iterator<Iter>::proxy_type>(ff,tt);
605  }
606 
607  const rawrange &get_raw_range() const;
608  range get_range() const;
609  size_t get_count() const;
610  };
611 
615  {
616  private:
617 
618  const suffix_index *m_parent;
619 
620  size_t m_query_offset;
621  rawrange m_range;
622 
623  incremental_hash m_hash;
624 
625  public:
626 
629 
630  incremental_reverse_query &operator=(const incremental_reverse_query &other);
631 
632  void match(const nuc::packed_base b);
633  void match(const nuc::base b);
634  void match(const char b);
635 
638  template<typename Iter>
639  void match(Iter f,Iter t,
640  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
641  nuc::packed_base> >::type *dummy=0)
642  {
643  m_hash.hash(f,t);
644 
645  if(m_hash.left_updated()>0)
646  m_range.first=m_parent->m_idxbeg+m_parent->m_rev_hashbeg[m_hash.left()];
647 
648  if(m_hash.right_updated()>0)
649  m_range.second=m_parent->m_idxbeg+m_parent->m_rev_hashbeg[m_hash.right()];
650 
651  const int hashed=std::min(m_hash.left_updated(),m_hash.right_updated());
652 
653  if(hashed>1)
654  {
655  f+=hashed-1;
656  m_query_offset+=hashed-1;
657  }
658 
659  while(f!=t)
660  {
661  m_range=suffix_query_incremental(m_parent->m_rev_seqbeg,
662  m_parent->m_rev_seqend,
663  m_range.first,
664  m_range.second,
665  *f,m_query_offset);
666 
667  ++m_query_offset;
668  ++f;
669  }
670  }
671 
674  template<typename Iter>
675  void match(Iter f,Iter t,
676  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
677  nuc::base> >::type *dummy=0)
678  {
679  const base2packed_iterator<Iter> ff(f);
680  const base2packed_iterator<Iter> tt(t);
681  match<typename base2packed_iterator<Iter>::proxy_type>(ff,tt);
682  }
683 
686  template<typename Iter>
687  void match(Iter f,Iter t,
688  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
689  char> >::type *dummy=0)
690  {
691  const char2packed_iterator<Iter> ff(f);
692  const char2packed_iterator<Iter> tt(t);
693  match<typename char2packed_iterator<Iter>::proxy_type>(ff,tt);
694  }
695 
696  const rawrange &get_raw_range() const;
697  range get_range() const;
698  size_t get_count() const;
699  };
700 };
701 
702 // template member function definitions ///////////////////////////////////////////////////////////////
703 
704 template<typename Iter>
705 suffix_index::rawrange suffix_index::match_raw(Iter f,Iter t,
706  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
707  nuc::packed_base> >::type *dummy) const
708 {
709  /*
710  if(m_idxinfo.kmersize==0)
711  return suffix_query(m_seqbeg,m_seqend,m_idxbeg,f,t);
712 
713  const size_t x=m_hashbeg[lhash(f,t,m_idxinfo.kmersize)];
714  const size_t y=m_hashbeg[rhash(f,t,m_idxinfo.kmersize)];
715 
716  return suffix_query(m_seqbeg,m_seqend,m_idxbeg+x,m_idxbeg+y,f,t);
717  */
718 
719  incremental_query iq(*this);
720  iq.match(f,t);
721  return iq.get_raw_range();
722 }
723 
724 template<typename Iter>
725 suffix_index::rawrange suffix_index::match_raw(Iter f,Iter t,
726  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
727  nuc::base> >::type *dummy) const
728 {
729  const base2packed_iterator<Iter> ff(f);
730  const base2packed_iterator<Iter> tt(t);
731  return match_raw<typename base2packed_iterator<Iter>::proxy_type>(ff,tt);
732 }
733 
734 template<typename Iter>
735 suffix_index::rawrange suffix_index::match_raw(Iter f,Iter t,
736  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
737  char> >::type *dummy) const
738 {
739  const char2packed_iterator<Iter> ff(f);
740  const char2packed_iterator<Iter> tt(t);
741  return match_raw<typename char2packed_iterator<Iter>::proxy_type>(ff,tt);
742 }
743 
744 template<typename Iter>
745 suffix_index::range suffix_index::match(Iter f,Iter t) const
746 {
747  rawrange r=match_raw(f,t);
748  iterator df(r.first,&m_idxinfo.offsets,m_idxinfo.seqtotal,0);
749  iterator dt(r.second,&m_idxinfo.offsets,m_idxinfo.seqtotal,0);
750  return range(df,dt);
751 }
752 
753 template<typename Iter>
754 size_t suffix_index::count(Iter f,Iter t) const
755 {
756  const suffix_index::rawrange r=match_raw(f,t);
757  return r.second-r.first;
758 }
759 
760 template<typename Iter>
761 double suffix_index::calc_freq(Iter f,Iter t) const
762 {
763  const size_t size=t-f;
764  // assuming kmer is shorter that the shortest sequence in the index
765  const double kmer_total=m_idxinfo.seqtotal-(size*m_idxinfo.nseq);
766  if(kmer_total<=0) return 0.0;
767  return static_cast<double>(count(f,t))/kmer_total;
768 }
769 
770 template<typename Iter>
771 std::pair<suffix_index::rawrange,Iter>
772 suffix_index::match5_raw(Iter f,Iter t,
773  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
774  nuc::packed_base> >::type *dummy) const
775 {
776  if(m_idxinfo.kmersize==0)
777  {
778  const suffix_query_result<raw_iterator,Iter> q=
779  suffix_query_prefix(m_seqbeg,m_seqend,m_idxbeg,f,t);
780  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
781  }
782 
783  const size_t x=m_hashbeg[lhash(f,t,m_idxinfo.kmersize)];
784  const size_t y=m_hashbeg[rhash(f,t,m_idxinfo.kmersize)];
785 
786  if(x<y)
787  {
788  const suffix_query_result<raw_iterator,Iter> q=
789  suffix_query_prefix(m_seqbeg,m_seqend,m_idxbeg+x,m_idxbeg+y,f,t);
790 
791  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
792  }
793 
794  const suffix_query_result<raw_iterator,Iter> q=
795  suffix_query_prefix(m_seqbeg,m_seqend,m_idxbeg,m_idxend,f,t);
796 
797  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
798 }
799 
800 template<typename Iter>
801 std::pair<suffix_index::range,Iter> suffix_index::match5(Iter f,Iter t) const
802 {
803  const std::pair<rawrange,Iter> r=match5_raw(f,t);
804  iterator df(r.first.first,&m_idxinfo.offsets,m_idxinfo.seqtotal,0);
805  iterator dt(r.first.second,&m_idxinfo.offsets,m_idxinfo.seqtotal,0);
806  return std::make_pair(range(df,dt),r.second);
807 }
808 
809 template<typename Iter>
810 std::pair<suffix_index::rawrange,Iter>
812  typename boost::enable_if<boost::is_same<typename std::iterator_traits<Iter>::value_type,
813  nuc::packed_base> >::type *dummy) const
814 {
815  if(!m_mmapping_rev_idx.is_mapped())
816  throw std::runtime_error("suffix_index::match5_reverse_raw: "
817  "reverse index must be present and "
818  "mmapped for this method");
819 
820  // reverse complement prefix match:
821  // find the forward complement in the reverse index
822  if(m_idxinfo.kmersize==0)
823  {
824  const suffix_query_result<raw_iterator,Iter> q=
825  suffix_query_prefix(m_rev_seqbeg,m_rev_seqend,m_rev_idxbeg,f,t);
826  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
827  }
828 
829  const size_t x=m_rev_hashbeg[lhash(f,t,m_idxinfo.kmersize)];
830  const size_t y=m_rev_hashbeg[rhash(f,t,m_idxinfo.kmersize)];
831 
832  if(x<y)
833  {
834  const suffix_query_result<raw_iterator,Iter> q=
835  suffix_query_prefix(m_rev_seqbeg,m_rev_seqend,m_rev_idxbeg+x,m_rev_idxbeg+y,f,t);
836 
837  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
838  }
839 
840  const suffix_query_result<raw_iterator,Iter> q=
841  suffix_query_prefix(m_rev_seqbeg,m_rev_seqend,m_rev_idxbeg,m_rev_idxend,f,t);
842 
843  return std::make_pair(rawrange(q.array_beg,q.array_end),q.query_end);
844 }
845 
846 template<typename Iter>
847 std::pair<suffix_index::range,Iter> suffix_index::match5_reverse(Iter f,Iter t) const
848 {
849  const std::pair<rawrange,Iter> r=match5_reverse_raw(f,t);
850  const int reverse_offset=f-r.second;
851  iterator df(r.first.first,&m_idxinfo.offsets,m_idxinfo.seqtotal,reverse_offset);
852  iterator dt(r.first.second,&m_idxinfo.offsets,m_idxinfo.seqtotal,reverse_offset);
853  return std::make_pair(range(df,dt),r.second);
854 }
855 
856 } // namespace
857 
858 #endif // SHORE_ALGO_SUFFIX_INDEX_HPP__
859