SHORE API
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
alignment_string_ops.hpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright 2008,2009,2010,2011,2012 Stephan Ossowski, Korbinian Schneeberger,
4  * Felix Ott, Joerg Hagmann, Alf Scotland, Sebastian Bender
5  *
6  * This file is part of SHORE.
7  *
8  * SHORE is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * SHORE is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with SHORE. If not, see <http://www.gnu.org/licenses/>.
20  */
21 
25 
26 #ifndef SHORE_ALIGNMENT_STRING_OPS_HPP__
27 #define SHORE_ALIGNMENT_STRING_OPS_HPP__
28 
29 #include <iterator>
30 #include <limits>
31 #include <string>
32 
34 
35 namespace shore {
36 
39 {
40  ALN_NOOP=0,
60  ALN_GRAPH=256,
63 };
64 
65 class alignment_tokenizer;
66 class alignment_helper;
67 
71 {
72  public:
73 
74  typedef std::string::const_iterator iterator;
75  typedef std::reverse_iterator<iterator> reverse_iterator;
76 
77  private:
78 
79  friend class alignment_tokenizer;
80  friend class alignment_builder;
81  friend class alignment_helper;
82 
83  static const std::string CIGAR_TRF;
84 
87  bool m_sequence_omitted;
88 
90  iterator m_ref_beg;
92  iterator m_ref_end;
94  iterator m_qry_beg;
96  iterator m_qry_end;
97 
99  int m_operation;
101  long m_opsize;
103  long m_refsize;
105  long m_qrysize;
107  long m_nchars;
108 
109  public:
110 
111  alignment_token();
112  void clear();
113 
116  bool sequence_omitted() const;
117 
119  int operation() const;
120 
122  long opsize() const;
123 
125  long refsize() const;
126 
128  long querysize() const;
129 
131  long nchars() const;
132 
136  iterator ref_begin() const;
137 
141  iterator ref_end() const;
142 
146  iterator query_begin() const;
147 
151  iterator query_end() const;
152 
153  void print(std::ostream &os) const;
154 
159  void to_string(std::string &res,const size_t pos=0,
160  const size_t size=std::numeric_limits<size_t>::max(),
161  const bool omit_refbases=false) const;
162  void to_cigar(std::string &res,const size_t pos=0,
163  const size_t size=std::numeric_limits<size_t>::max()) const;
164 };
165 
166 struct alignment;
167 
180 {
181  private:
182 
183  bool m_reverse;
184 
185  const std::string *m_aln;
186  long m_offset;
187  bool m_hasdata;
188 
189  alignment_token m_current;
190 
191  std::string m_ref;
192  std::string m_qry;
193 
194 
195  void tokenize_fwd();
196  void tokenize_rev();
197 
198  public:
199 
201 
203  void set_alignment(const std::string &aln);
204 
206  const std::string &get_alignment() const;
207 
209  void set_reverse(const bool rev);
210 
212  const alignment_token &current() const;
213 
215  bool has_data();
216 
218  void next();
219 };
220 
223 {
224  private:
225 
227  std::vector<shore::nuc::base> m_ref;
229  std::vector<shore::nuc::base> m_qry;
231  std::vector<AlignmentOperation> m_ops;
232 
234  std::string m_ws_ref;
236  std::string m_ws_qry;
238  alignment_token m_ws_tok;
239 
241  AlignmentOperation m_ws_op;
242 
243 
245  void build_at(std::string & buf,size_t i);
246 
247  public:
248 
250  void to_string(std::string & buf,const bool reverse);
251 
253  void add_softclip(const shore::nuc::base b);
254 
256  void add_qry(const shore::nuc::base b);
257 
259  void add_ref(const shore::nuc::base b);
260 
262  void add_paired(const shore::nuc::base r,const shore::nuc::base q);
263 
265  size_t size() const;
266 
268  bool empty() const;
269 
271  void resize(const size_t size);
272 
274  void clear();
275 };
276 
279 {
280  private:
281 
282  alignment_tokenizer m_alntok;
283  std::string m_workspace;
284 
285  public:
286 
289  size_t extract_ref(const std::string &alignment,std::string &ref);
290 
295  size_t extract_qry(const std::string &alignment,std::string &qry,
296  const bool include_softclipped=false);
297 
299  void extract_seq(const std::string &alignment,
300  std::string &ref,std::string &qry,
301  const bool include_softclipped=false);
302 
304  size_t calc_size_of_qry(const std::string &alignment,
305  const bool include_softclipped=false);
306 
308  size_t calc_size_on_ref(const std::string &alignment);
309 
311  int calc_editdistance(const std::string &alignment);
312 
314  std::pair<int,int> calc_indels(const std::string &alignment);
315 
320  int move_start_by_qrybases(std::string &alignment,int nbases);
321 
325  int move_start_by_refbases(std::string &alignment,int nbases);
326 
331  int move_end_by_qrybases(std::string &alignment,int nbases);
332 
336  int move_end_by_refbases(std::string &alignment,int nbases);
337 
339  void rebuild(std::string &alignment);
340 
342  void invert(std::string &alignment);
343 
345  void revcomp(std::string &alignment);
346 
356  void paste(std::string &alignment,const std::string &other,
357  int distance=0,bool inv=false,bool forcefrag=true,
358  bool nomove=false);
359 
364  void cigar(const std::string &alignment,
365  std::string &cigar,const char sep=0);
366 
368 
370  size_t calc_size_on_ref(const shore::alignment &a);
371 
373  long calc_end_excl(const shore::alignment &a);
374 
377 
379  void resize(shore::alignment &f,const size_t newsize);
380 
382 
384  static std::string compose(alignment_tokenizer &ref,
385  alignment_tokenizer &qry);
386 };
387 
388 } // namespace
389 
390 #endif // SHORE_ALIGNMENT_STRING_OPS_HPP__
391