SHORE API
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
align.hpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright 2008,2009,2010,2011,2012 Stephan Ossowski, Korbinian Schneeberger,
4  * Felix Ott, Joerg Hagmann, Alf Scotland, Sebastian Bender
5  *
6  * This file is part of SHORE.
7  *
8  * SHORE is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * SHORE is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with SHORE. If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 
26 
87 #ifndef SHORE_ALGO_ALIGN_HPP__
88 #define SHORE_ALGO_ALIGN_HPP__
89 
90 #include <iterator>
91 #include <string>
92 #include <vector>
93 
97 #include "shore/stream/streams.hpp"
101 #include "shore/processing/filter.hpp" // conversion
102 #include "shore/processing/feed.hpp"
104 
105 #include "shore/base/util.hpp"
106 #include "shore/base/stringops.hpp"
107 
108 namespace shore {
109 
112 {
113  ALN_NONE=-1,
114  ALN_GLOBAL=0,
115  ALN_DANGLING_REF=1,
116  ALN_DANGLING_QRY=2,
117  ALN_DANGLING_ANY=3,
118  ALN_LOCAL=7
119 };
120 
121 S_MAKE_ENUM_TRAITS(AlignmentMode)
122 
123 
125 class dp_nuc_scoring
126 {
127  private:
128 
130  int m_scoringmatrix[256];
131 
132  public:
133 
134  dp_nuc_scoring();
135  dp_nuc_scoring(const dp_nuc_scoring &other);
136  dp_nuc_scoring & operator=(const dp_nuc_scoring &other);
137 
139  int get_pair_score(const shore::nuc::base ref,
140  const shore::nuc::base qry) const;
141 
144  int get_ref_score(const shore::nuc::base ref) const;
145 
148  int get_qry_score(const shore::nuc::base qry) const;
149 
151 
153  void set_scores(const int match,const int mismatch,const int gap);
155  void set_scores(const int match,const int mismatch,const int gap,const int ambmatch);
159  void read_scores(const std::string &fn,const int defaultscore=-1);
160 
162  int get_best_score() const;
163 
165  void print(std::ostream &out) const;
166 };
167 
169 
172 {
173  private:
174 
176  std::vector<int> m_matrix;
177 
178  size_t m_size;
179 
181  int m_ncol;
182 
184  int m_nrow;
185 
187  int m_globalmax;
189  int m_lastrowmax;
191  int m_lastcolmax;
192 
195  std::vector<size_t> m_globalmax_pos;
198  std::vector<size_t> m_lastrowmax_pos;
201  std::vector<size_t> m_lastcolmax_pos;
202 
203  public:
204 
205  class const_iterator
206  {
207  protected:
208 
209  friend class dp_alignment_matrix;
210 
212  const dp_alignment_matrix *m_const_that;
213 
215  int m_index;
216 
218  const_iterator(const dp_alignment_matrix *matrix);
219 
220  public:
221 
223  const_iterator();
224 
226  const_iterator(const const_iterator &other);
227 
229  const_iterator & operator=(const const_iterator &other);
230 
232  int get() const;
233 
236  int get_prev() const;
237 
240  int get_ref_prev() const;
241 
244  int get_qry_prev() const;
245 
247  void next();
248 
250  void ref_next();
251 
253  void qry_next();
254 
256  void ref_return();
257 
259  void qry_return();
260 
262  void prev();
263 
265  void ref_prev();
266 
268  void qry_prev();
269 
271  bool is_begin() const;
272 
274  bool is_end() const;
275 
279  size_t get_ref_pos() const;
280 
284  size_t get_qry_pos() const;
285 
287  size_t get_field_index() const;
288 
290  template<typename RefValue,typename QryValue,typename Scoring>
291  int get_best_score(const RefValue & rv,const QryValue & qv,
292  const Scoring & scoring,
293  const bool local);
294  };
295 
296  class iterator
297  :public const_iterator
298  {
299  private:
300 
301  friend class dp_alignment_matrix;
302 
304  dp_alignment_matrix *m_that;
305 
306 
308  iterator(dp_alignment_matrix *matrix);
309 
310  public:
311 
313  iterator();
314 
316  iterator(const iterator &other);
317 
319  iterator & operator=(const iterator &other);
320 
323  void set(const int value);
324  void set_ref_last(const int value);
325  void set_qry_last(const int value);
326  void set_last(const int value);
327  };
328 
331 
333  void reset_maxima();
334 
336  void resize(const size_t ref_size,const size_t qry_size);
337 
339  int get_score(const AlignmentMode right_ends) const;
340 
343  size_t num_global_max() const;
344 
347  size_t num_ref_max() const;
348 
351  size_t num_qry_max() const;
352 
354  int global_max() const;
355 
357  int ref_max() const;
358 
360  int qry_max() const;
361 
363  iterator begin();
364 
366  const_iterator last() const;
367 
369  const_iterator global_max_pos(size_t index) const;
370 
372  const_iterator ref_max_pos(size_t index) const;
373 
375  const_iterator qry_max_pos(size_t index) const;
376 
378  template<typename RefIter,typename QryIter>
379  void print(std::ostream &os,
380  RefIter ir,QryIter iq) const;
381 };
382 
384 
386 template<typename RefType,typename QryType>
387 struct dp_traits
388 {
394  typedef void scoring_type;
406 };
407 
409 template<>
411 {
412  typedef dp_nuc_scoring scoring_type;
414 };
415 
417 
419 template<typename RefIter,typename QryIter,typename Scoring>
420 void dp_align(dp_alignment_matrix & matrix,
421  RefIter rbeg,const size_t rsize,
422  QryIter qbeg,const size_t qsize,
423  const Scoring scoring,const AlignmentMode left_ends,
424  const AlignmentMode exclude_left=ALN_NONE,
425  const AlignmentMode exclude_right=ALN_NONE);
426 
428 
429 template<typename SeqIter>
430 struct dp_sequence
431 {
433  SeqIter beg;
435  SeqIter end;
437  size_t ofs;
439  size_t size;
440 
441  dp_sequence()
442  :ofs(0),size(0)
443  {}
444 
445  dp_sequence(const SeqIter & b,
446  const SeqIter & e,
447  const int of=0,const size_t sz=0)
448  :beg(b),end(e),ofs(of),size(sz)
449  {
450  if(size==0)
451  size=e-b;
452  else
453  size=std::min(size_t(e-b),size);
454  }
455 };
456 
458 
460 template<typename RefIter,typename QryIter=RefIter,
461  typename Scoring=typename dp_traits<typename std::iterator_traits<RefIter>::value_type,
462  typename std::iterator_traits<QryIter>::value_type>::scoring_type>
464 {
467 
470 
472  const Scoring *scoring;
473 
475  dp_sequence<RefIter> ref;
476 
478  dp_sequence<QryIter> qry;
479 
482  int ref_id;
483 
486  int ref_hits;
487 
490 
491 
492  dp_alignment()
493  :left_ends(ALN_DANGLING_QRY),
494  right_ends(ALN_LOCAL),
495  scoring(0),
496  ref_id(0),
497  ref_hits(0)
498  {}
499 };
500 
502 
505 {
509 
512  std::vector<AlignmentMode> left_ends;
513 
516  std::vector<AlignmentMode> right_ends;
517 
518 
520  :report_all_best(false)
521  {}
522 };
523 
525 template<typename RefIter,typename QryIter=RefIter,
526  typename Scoring=typename dp_traits<typename std::iterator_traits<RefIter>::value_type,
527  typename std::iterator_traits<QryIter>::value_type>::scoring_type>
529 :public shore::pipe_facade<dp_aligner_pipe<RefIter,QryIter,Scoring>,
530  dp_sequence<QryIter>,
531  dp_alignment<RefIter,QryIter,Scoring> >,
532  public buffer_chain<dp_alignment<RefIter,QryIter,Scoring> >
533 {
534  public:
535 
536  typedef dp_sequence<QryIter> append_type;
538 
540 
541  private:
542 
543  friend class pipeline_core_access;
544 
545  dp_aligner_config m_conf;
546 
548  AlignmentMode m_end_modes[8];
549 
551  Scoring m_scoring;
552 
554  dp_sequence<RefIter> m_ref;
555 
557  dp_sequence<QryIter> m_qry;
558 
560  dp_alignment_matrix *m_alignment_matrix;
561 
562  int m_score;
563 
564  AlignmentMode m_left_ends;
565  AlignmentMode m_right_ends;
566 
569 
570  std::vector<AlignmentMode> m_active_modes;
571 
572 
573  void add_mode(const AlignmentMode left,const AlignmentMode right);
574 
575 
576  bool has_weaker_constraint(const AlignmentMode left_a,
577  const AlignmentMode left_b) const;
578 
579  void add_to_chain();
580 
581  void run();
582 
584  void next();
585 
586  void append(const dp_sequence<QryIter> & query);
587 
588  void flush();
589 
590  public:
591 
594 
595  dp_aligner_pipe(const dp_aligner_pipe & other);
596 
599 
601  void set_ref(RefIter rb,RefIter re);
602 
604  const dp_sequence<RefIter> get_ref() const;
605 
607  const Scoring & get_scoring() const;
608 
610  void set_scoring(const Scoring & matrix);
611 };
612 
614 
616 template<typename RefIter,typename QryIter=RefIter,
618  typename std::iterator_traits<QryIter>::value_type>::scoring_type>
620 :public shore::pipe_facade<dp_multialigner_pipe<RefIter,QryIter,Scoring>,
621  dp_sequence<QryIter>,
622  dp_alignment<RefIter,QryIter,Scoring> >,
623  public buffer_chain<dp_alignment<RefIter,QryIter,Scoring> >
624 {
625  public:
626 
627  typedef dp_sequence<QryIter> append_type;
629 
631 
632  private:
633 
634  friend class pipeline_core_access;
635 
636  struct alignment_grabber
637  :public shore::plugin<current_type>
638  {
640  int id;
641 
643  int i)
644  :that(t),id(i)
645  {}
646 
647  virtual int apply(std::deque<current_type *> & used,
648  std::vector<current_type *> & unused,
649  bool flush)
650  {
651  if(used.empty())
653 
654  const int score=
655  used.front()->alignment_matrix.get_score(used.front()->right_ends);
656 
657  // Steal a buffer.
658  if(score>=that->m_score)
659  {
660  if(score>that->m_score)
661  {
662  that->m_unused.insert(that->m_unused.end(),
663  that->m_used.begin(),
664  that->m_used.end());
665  that->m_used.clear();
666  that->m_score=score;
667  }
668  if(that->m_conf.report_all_best
669  ||that->m_used.empty())
670  {
671  used.front()->ref_id=id;
672  that->m_used.push_back(used.front());
673  }
674  else
675  that->m_unused.push_back(used.front());
676  }
677  else
678  that->m_unused.push_back(used.front());
679 
680  used.pop_front();
681 
682  // Return a buffer.
683  if(!that->m_unused.empty())
684  {
685  unused.push_back(that->m_unused.back());
686  that->m_unused.pop_back();
687  }
688 
690  }
691  };
692 
693  dp_aligner_config m_conf;
694 
696  Scoring m_scoring;
697 
699  std::vector<dp_aligner_pipe<RefIter,QryIter,Scoring> *> m_aligners;
700 
701  std::deque<current_type *> m_used;
702  std::vector<current_type *> m_unused;
703 
704  std::vector<alignment_grabber *> m_grabbers;
705 
706  int m_score;
707  size_t m_ref_hits;
708 
710 
711 
712  void push_activated();
713 
715  void next();
716 
717  void append(const dp_sequence<QryIter> & query);
718 
719  void flush();
720 
721  public:
722 
725 
727 
730 
732  void add_ref(RefIter rb,RefIter re,const int id);
733 
735  const Scoring & get_scoring() const;
736 
738  void set_scoring(const Scoring & matrix);
739 };
740 
742 
744 struct dp_trace
745 {
748 
751 
754 
756  std::string alignment_string;
757 
759  int score;
760 
761  int ref_id;
762 
763  int ref_hits;
764 
767 
770 
772  size_t ref_pos;
773 
775  size_t ref_end;
776 
778  size_t qry_pos;
779 
781  size_t qry_end;
782 
786 
790 
794 
795 
797  dp_trace()
798  :alignment_id(0),
799  left_ends(ALN_DANGLING_QRY),
800  right_ends(ALN_LOCAL),
801  score(0),
802  ref_id(0),
803  ref_hits(0),
804  ref_totalsize(0),
805  qry_totalsize(0),
806  ref_pos(0),ref_end(0),
807  qry_pos(0),qry_end(0),
808  soft_clipped(false),
809  unique_alignment(false),
810  unique_mapping(XFLAG_NA)
811  {}
812 };
813 
816 {
819 
822 
825 
828  bool soft_clip;
829 
832 
833 
836  :trace_all_alignments(false),
837  trace_all_mappings(false),
838  check_unique_mapping(false),
839  soft_clip(false),
840  build_alignment(true)
841  {}
842 };
843 
845 template<typename RefIter,typename QryIter=RefIter,
846  typename Scoring=typename dp_traits<typename std::iterator_traits<RefIter>::value_type,
847  typename std::iterator_traits<QryIter>::value_type>::scoring_type,
848  typename AlnBuilder=typename dp_traits<typename std::iterator_traits<RefIter>::value_type,
849  typename std::iterator_traits<QryIter>::value_type>::alignment_builder_type>
851 :public pipe_facade<dp_backtracer_pipe<RefIter,QryIter,Scoring,AlnBuilder>,
852  dp_alignment<RefIter,QryIter,Scoring>,
853  dp_trace>,
854  public buffer_chain<dp_trace>
855 {
856  private:
857 
858  friend class pipeline_core_access;
859 
860  enum TraceOp
861  {
862  TRACE_MATCH=1,
863  TRACE_REFGAP=2,
864  TRACE_QRYGAP=4
865  };
866 
867  struct trace_state
868  {
870  dp_alignment_matrix::const_iterator iter;
871 
873  TraceOp trace_op;
874 
875  trace_state(const dp_alignment_matrix::const_iterator &it,
876  const TraceOp tr)
877  :iter(it),trace_op(tr)
878  {}
879  };
880 
881  dp_backtracer_config m_conf;
882 
884  const dp_alignment<RefIter,QryIter,Scoring> *m_alignment;
885 
887  int m_alignment_id;
888 
890  size_t m_trace_index;
891 
893  size_t m_trace_size;
894 
896  size_t m_left_count;
897 
898  int m_trace_score;
899  int m_trace_ref_end;
900  int m_trace_qry_end;
901  bool m_trace_aln_unique;
902  ExtendedFlag m_trace_map_unique;
903 
905  AlnBuilder m_ws;
906 
907  std::vector<trace_state> m_trace_stack;
908  std::vector<trace_state> m_trace_stack_save;
909 
911  bool m_assess_leftends_mode;
912 
914  std::vector<bool> m_visited;
915  std::vector<bool> m_visited_save;
916 
918  bool m_trace_all_branches;
919 
920  void set_trace_size(const size_t size);
921 
924  ExtendedFlag mark_visited(const dp_alignment_matrix::const_iterator &it);
925 
926  void init_trace(const dp_alignment_matrix::const_iterator &it);
927 
928  // \brief Start a backtrace from the next end position.
929  // \return Whether a new trace is ready to emit.
930  bool start_trace();
931 
932  // \brief Collect all alignments, put alternative paths on the stack.
933  // \return Whether a new trace is ready to emit.
934  bool continue_trace();
935 
937  void assess_leftends();
938 
939 
940  void next();
941 
942  void append(const dp_alignment<RefIter,QryIter,Scoring> & a);
943 
944  void flush();
945 
946  public:
947 
950 };
951 
953 
954 class dp_trace_filter
955 :public plugin<dp_trace>
956 {
957  public:
958 
959  struct config
960  {
961  double threshold_rel;
962  double threshold_abs;
963  bool threshold_global;
964  bool reject_ambiguous;
965 
966  config();
967  };
968 
969  private:
970 
971  friend class pipeline_core_access;
972 
973  config m_conf;
974 
975  int m_best_score;
976 
977  virtual int apply(std::deque<dp_trace *> & used,
978  std::vector<dp_trace *> & unused,
979  const bool flushed);
980 
981  public:
982 
983  dp_trace_filter(const int best_score,const config & c);
984 };
985 
986 struct dp_mapping
987 {
989  const shore::read *read;
991  const dp_trace *trace;
993 
994  dp_mapping()
995  :read(0),
996  trace(0)
997  {}
998 };
999 
1000 class dp_mapper_pipe
1001 :public shore::pipe_facade<dp_mapper_pipe,shore::read,dp_mapping>
1002 {
1003  public:
1004 
1005  struct config
1006  {
1008  dp_aligner_config align_conf;
1009 
1011  dp_backtracer_config trace_conf;
1012 
1013  dp_trace_filter::config filter_conf;
1014  };
1015 
1016  private:
1017 
1018  friend class shore::pipeline_core_access;
1019 
1020  config m_conf;
1021 
1022  dp_nuc_scoring m_scoring;
1023 
1025  dp_multialigner_pipe<shore::dna_iterator> m_aligner;
1026  dp_backtracer_pipe<shore::dna_iterator> m_tracer;
1027  dp_trace_filter m_filter;
1028  shore::extractor<dp_trace> m_extractor;
1029 
1030  int m_ref_id;
1031 
1032  dp_mapping m_current;
1033 
1034  void append(const shore::read &r);
1035  void next();
1036 
1037  public:
1038 
1039  dp_mapper_pipe(const int best_score,const config &c);
1040  dp_mapper_pipe(const dp_mapper_pipe & other);
1041  ~dp_mapper_pipe();
1042 
1043  void add_ref(shore::dna_iterator rb,shore::dna_iterator re);
1044  void add_ref(shore::dna_iterator rb_fwd,shore::dna_iterator re_fwd,
1046 
1048  void set_scoring(const dp_nuc_scoring & matrix);
1049 };
1050 
1051 typedef shore::pipe_box<dp_mapper_pipe> dp_mapper;
1052 
1054 
1055 class dp_mapping_writer
1056 {
1057  private:
1058 
1059  shore::ostreams m_os;
1060 
1061  public:
1062 
1063  typedef dp_mapping append_type;
1064 
1065  dp_mapping_writer(const std::string & outfile);
1066 
1067  void append(const dp_mapping & m);
1068  void flush();
1069 };
1070 
1071 typedef shore::sink<dp_mapping_writer> dp_mapping_sink;
1072 
1073 } // namespace shore
1074 
1075 // Include inline and template implementation.
1076 #include "align.i.hpp"
1077 #include "align.t.hpp"
1078 
1079 #endif // SHORE_ALGO_ALIGN_HPP__
1080