SHORE API
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
text.hpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright 2008,2009,2010,2011,2012 Stephan Ossowski, Korbinian Schneeberger,
4  * Felix Ott, Joerg Hagmann, Alf Scotland, Sebastian Bender
5  *
6  * This file is part of SHORE.
7  *
8  * SHORE is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * SHORE is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with SHORE. If not, see <http://www.gnu.org/licenses/>.
20  */
21 
25 
26 #ifndef IO_TEXT_HPP__
27 #define IO_TEXT_HPP__
28 
29 #include <iostream>
30 #include <vector>
31 #include <set>
32 #include <map>
33 
36 #include "shore/stream/streams.hpp"
37 #include "shore/algo/sort_file.hpp"
38 
39 namespace shore {
40 
43 {
44  private:
45 
46  shore::istreams m_in;
47 
48  std::string m_buf;
49 
51  char m_comment;
53  bool m_skipempty;
54 
56 
57  size_t m_blocksize;
58  std::set<size_t> m_blocks;
59  off_t m_streampos;
60  off_t m_endofblock;
61 
62 
63  bool next_line(std::string &line);
64 
65  public:
66 
67  typedef std::string current_type;
68 
69  basic_line_reader(const std::string& fn);
70  basic_line_reader(std::istream*const in);
71 
73  void set_commentchar(const char c);
75  void set_skipempty(const bool s);
76 
79 
80  bool next(std::string &line);
81 
82  const std::string& get_name() const;
83 
85  void seek(const std::streampos position);
86 
88  template<typename Cmp>
89  void seek_to(Cmp cmp,const std::string& key)
90  {
91  if(m_in[0].eof())
92  m_in[0].clear();
93 
94  if(m_in[0].tellg()==std::streampos(-1))
95  throw std::runtime_error("file "+m_in.get_streamname(0)
96  +" is not seekable");
97 
98  m_streampos=shore::line_sorter<Cmp>(cmp).upper_bound(m_in[0],key).first;
99  }
100 
102  template<typename Cmp>
103  void seek_to_lower(Cmp cmp,const std::string& key)
104  {
105  if(m_in[0].eof())
106  m_in[0].clear();
107 
108  if(m_in[0].tellg()==std::streampos(-1))
109  throw std::runtime_error("file "+m_in.get_streamname(0)
110  +" is not seekable");
111 
112  m_streampos=shore::line_sorter<Cmp>(cmp).lower_bound(m_in[0],key).first;
113  }
114 
119  void set_blocks(const size_t blocksize,const std::set<size_t>& blocks);
120 };
121 
124 :public shore::monolithic<basic_line_reader>
125 {
126  private:
127 
129 
130  public:
131 
132  line_reader(const std::string& fn,compare_type cmp=compare_type());
133 
134  line_reader(std::istream*const in,compare_type cmp=compare_type());
135 
136  line_reader &set_commentchar(const char c);
137 
138  line_reader &set_skipempty(const bool s);
139 
140  shore::signal<const std::string&>& sigmetadata();
141 
142  const std::string& get_name() const;
143 
145  void seek(const std::streampos position);
146 
147  void set_blocks(const size_t blocksize,const std::set<size_t>& blocks);
148 
150  void seek_to(const std::string& key);
151 
153  void seek_to_lower(const std::string& key);
154 };
155 
158 
161 {
162  private:
163 
164  shore::ostreams m_out;
165 
166  public:
167 
168  typedef std::string append_type;
169 
170  line_writer(const std::string& fn);
171 
172  line_writer(std::ostream*const out);
173 
174  void append(const std::string& s);
175 
176  void flush();
177 };
178 
181 
190 {
191  private:
192 
193  std::string m_header;
194 
196  std::multimap<size_t,std::string*> m_colmap;
197 
198  struct section
199  {
200  section():beg(0),end(0),str(0),indent(0) {}
201 
202  section(size_t b,size_t e,std::string* s)
203  :beg(b),end(e),str(s)
204  {}
205 
206  size_t beg;
207  size_t end;
208  std::string* str;
209  size_t indent;
210  };
211 
212  line_reader m_linereader;
213 
214  char m_comment;
215  char m_delim;
216 
217  std::string m_filename;
218  std::vector<std::string> m_colspec;
219  std::set<std::string> m_optionalcolumns;
220 
221  std::vector<std::string> m_current;
222 
223  std::vector<section> m_sections;
224 
225  std::string m_editbuf;
226 
227  bool m_hasdata;
228 
229 
230  void tokenize();
231 
232  void read_header(const std::string& qcomm);
233 
234  public:
235 
236  typedef std::vector<std::string> current_type;
237 
238  row_reader(const std::string& fn,const std::vector<std::string>& colspec,
239  line_reader::compare_type cmp=line_reader::compare_type());
240 
242  row_reader(const std::string& fn,const size_t ncol,
243  line_reader::compare_type cmp=line_reader::compare_type());
244 
245  template<typename Iterator>
246  row_reader(const std::string& fn,Iterator bcolspec,Iterator ecolspec,
247  line_reader::compare_type cmp=line_reader::compare_type())
248  :m_linereader(fn,cmp),
249  m_comment('#'),
250  m_delim('\t'),
251  m_filename(fn),
252  m_colspec(bcolspec,ecolspec),
253  m_current(m_colspec.size()),
254  m_sections(m_colspec.size()),
255  m_hasdata(false)
256  {}
257 
258  row_reader(const std::string& fn,const std::vector<std::string>& colspec,
260  line_reader::compare_type cmp=line_reader::compare_type());
261 
262  template<typename Iterator>
263  row_reader(const std::string& fn,Iterator bcolspec,Iterator ecolspec,
265  line_reader::compare_type cmp=line_reader::compare_type())
266  :m_linereader(fn,cmp),
267  m_comment('#'),
268  m_delim('\t'),
269  m_filename(fn),
270  m_colspec(bcolspec,ecolspec),
271  m_current(m_colspec.size()),
272  m_sections(m_colspec.size()),
273  m_hasdata(false)
274  {
275  sigmetadata().connect(mdslot);
276  }
277 
278  row_reader(std::istream*const in,const std::vector<std::string>& colspec,
279  line_reader::compare_type cmp=line_reader::compare_type());
280 
281  template<typename Iterator>
282  row_reader(std::istream*const in,Iterator bcolspec,Iterator ecolspec,
283  line_reader::compare_type cmp=line_reader::compare_type())
284  :m_linereader(in,cmp),
285  m_comment('#'),
286  m_delim('\t'),
287  m_filename("<stream>"),
288  m_colspec(bcolspec,ecolspec),
289  m_current(m_colspec.size()),
290  m_sections(m_colspec.size()),
291  m_hasdata(false)
292  {}
293 
294  row_reader(std::istream*const in,const std::vector<std::string>& colspec,
296  line_reader::compare_type cmp=line_reader::compare_type());
297 
298  template<typename Iterator>
299  row_reader(std::istream*const in,Iterator bcolspec,Iterator ecolspec,
301  line_reader::compare_type cmp=line_reader::compare_type())
302  :m_linereader(in,cmp),
303  m_comment('#'),
304  m_delim('\t'),
305  m_filename("<stream>"),
306  m_colspec(bcolspec,ecolspec),
307  m_current(m_colspec.size()),
308  m_sections(m_colspec.size()),
309  m_hasdata(false)
310  {
311  sigmetadata().connect(mdslot);
312  }
313 
315 
316  row_reader(const std::string& fn,const std::string& colspec,
317  line_reader::compare_type cmp=line_reader::compare_type());
318 
319  row_reader(const std::string& fn,const std::string& colspec,
321  line_reader::compare_type cmp=line_reader::compare_type());
322 
323  row_reader(std::istream*const in,const std::string& colspec,
324  line_reader::compare_type cmp=line_reader::compare_type());
325 
326  row_reader(std::istream*const in,const std::string& colspec,
328  line_reader::compare_type cmp=line_reader::compare_type());
329 
330  shore::signal<const std::string&>& sigmetadata();
331 
332  row_reader &set_commentchar(const char c)
333  {
334  m_comment=c;
335  m_linereader.set_commentchar(c);
336  return *this;
337  }
338 
345  void set_optional(const std::string &columnname);
346 
347  const std::string& get_header() const;
348 
349  bool has_data();
350 
351  const std::vector<std::string>& current() const;
352 
353  void next();
354 
355  const std::string& current_line() const;
356 
357  void edit(const size_t col,const std::string& s);
358 
359  const std::string& current_edit();
360 
361  std::string spec_str();
362 
363  // \brief Find the file column number for a reader column.
364  size_t get_filecol(const size_t readercol);
365 
366  void seek(const std::streampos position);
367 };
368 
371 
372 } // namespace
373 
374 #endif // IO_TEXT_HPP__
375