26 #ifndef SHORE_ALGO_SUFFIX_HPP__
27 #define SHORE_ALGO_SUFFIX_HPP__
44 template<
typename Iter,
typename Oter>
46 bool initialize_counts=
true,
int shift=0)
49 std::fill(obeg,oend,0);
51 for(Iter i=ibeg;i!=iend;++i)
57 template<
typename Iter>
58 void counts_to_intervals(Iter beg,Iter
end)
60 typedef typename std::iterator_traits<Iter>::value_type value_type;
83 template<
typename Iter,
typename Oter>
84 void suffix_array(Iter ibeg,Iter iend,Oter obeg,
bool initialize_sa=
true,
85 typename std::iterator_traits<Iter>::value_type imax=
86 std::numeric_limits<
typename std::iterator_traits<Iter>::value_type>::max())
90 typedef typename std::iterator_traits<Iter>::value_type ivalue_type;
91 typedef typename std::iterator_traits<Oter>::value_type ovalue_type;
93 typedef typename std::reverse_iterator<Oter> ROter;
95 const size_t n=iend-ibeg;
96 const Oter oend=obeg+n;
99 std::fill(obeg,oend,0);
104 const ROter r_obeg(oend);
105 const ROter r_oend(obeg);
111 ivalue_type prev=*(iend-1);
112 ivalue_type s_type=ivalue_type(0);
116 throw std::runtime_error(
"suffix_array: negative value "
119 for(
size_t i=n-2;;--i)
121 const ivalue_type curr=ibeg[i];
124 throw std::runtime_error(
"suffix_array: negative value "
127 if(curr<(prev+s_type))
129 types[i]=s_type=ivalue_type(1);
133 s_type=ivalue_type(0);
147 counts_to_intervals(buckets.
begin(),buckets.
end());
151 const Oter lms_end=obeg+nlms;
155 for(
size_t i=1;i<n-1;++i)
160 obeg[--buckets[ibeg[i]]]=i;
167 counts_to_intervals(buckets.
begin(),buckets.
end());
170 obeg[buckets[*ilast]]=n-1;
173 for(Oter i=obeg;i!=oend;++i)
175 const ovalue_type x=*i;
176 if((x!=0)&&(types[x-1]==0))
178 const ivalue_type c=ibeg[x-1];
179 obeg[buckets[c]]=x-1;
185 counts_to_intervals(buckets.begin(),buckets.end());
187 for(ROter i=r_obeg;i!=r_oend;++i)
189 const ovalue_type x=*i;
190 if((x!=0)&&(types[x-1]==1))
192 const ivalue_type c=ibeg[x-1];
194 obeg[buckets[c]]=x-1;
199 for(Oter i=obeg,j=obeg;i!=oend;++i)
201 const ovalue_type idx=*i;
202 if((idx>0)&&(types[idx]>types[idx-1]))
213 Oter nam_beg=obeg+(n>>1);
215 std::fill(nam_beg,oend,n-1);
217 ovalue_type pprev=*obeg;
219 nam_beg[pprev>>1]=nnam;
220 for(Oter i=(obeg+1);i!=lms_end;++i)
222 const ovalue_type p=*i;
224 const size_t smax=n-std::max(p,pprev);
225 for(
size_t j=0;j<smax;++j)
227 if((ibeg[p+j]!=ibeg[pprev+j])||(types[p+j]!=types[pprev+j]))
232 if((j>0)&&(types[p+j]>types[p+j-1]))
242 for(Oter i=oend-1,j=oend-1;i!=nam_beg-1;--i)
244 const ovalue_type v=*i;
256 else for(
size_t i=0;i<nlms;++i)
261 counts_to_intervals(buckets.begin(),buckets.end());
267 for(
size_t i=1;i<n-1;++i)
279 for(Oter i=obeg;i!=lms_end;++i)
284 std::fill(lms_end,oend,0);
288 for(ROter i(lms_end);i!=r_oend;++i)
292 obeg[--buckets[ibeg[j]]]=j;
299 counts_to_intervals(buckets.
begin(),buckets.
end());
302 obeg[buckets[*ilast]]=n-1;
305 for(Oter i=obeg;i!=oend;++i)
307 const ovalue_type x=*i;
308 if((x!=0)&&(types[x-1]==0))
310 const ivalue_type c=ibeg[x-1];
311 obeg[buckets[c]]=x-1;
317 counts_to_intervals(buckets.begin(),buckets.end());
319 for(ROter i=r_obeg;i!=r_oend;++i)
321 const ovalue_type x=*i;
322 if((x!=0)&&(types[x-1]==1))
324 const ivalue_type c=ibeg[x-1];
326 obeg[buckets[c]]=x-1;
331 template<
typename Ater,
typename Oter>
332 void suffix_ranks(Ater abeg,Ater aend,Oter rbeg)
334 typedef typename std::iterator_traits<Oter>::value_type value_type;
336 for(Ater i=abeg;i!=aend;++i)
341 template<
typename Ster,
typename Ater>
344 std::pair<size_t,size_t> ret(0,0);
346 const size_t size=send-sbeg;
347 const Ater aend=abeg+
size;
351 suffix_ranks(abeg,aend,rnk.
begin());
361 Ster k=sbeg+(*(abeg+r-1))+h;
362 Ster j=sbeg+(*(abeg+r))+h;
363 while((j!=send)&&(k!=send)&&((*j)==(*k)))
383 template<
typename Ster,
typename Ater,
typename Oter>
386 typedef typename std::iterator_traits<Oter>::value_type value_type;
388 const size_t size=send-sbeg;
389 const Ater aend=abeg+
size;
393 suffix_ranks(abeg,aend,rnk.
begin());
402 Ster k=sbeg+(*(abeg+r-1))+h;
403 Ster j=sbeg+(*(abeg+r))+h;
404 while((j!=send)&&(k!=send)&&((*j)==(*k)))
418 template<
typename Ster,
typename Ater,
typename Qter>
423 typedef typename std::pair<Ater,Ater> range;
424 typedef typename std::iterator_traits<Ster>::value_type svalue_t;
425 typedef typename std::iterator_traits<Ater>::value_type avalue_t;
426 typedef typename std::iterator_traits<Qter>::value_type qvalue_t;
429 return range(abeg,aend);
431 return range(abeg,abeg);
433 range ret(abeg,aend);
435 const size_t k=send-sbeg;
441 for(Qter q=qbeg;q!=qend;++q)
443 idxL=(*ret.first)+ofs;
444 idxR=(*(ret.second-1))+ofs;
446 Ater bound=ret.second;
447 size_t step=(bound-ret.first)>>1;
448 while((idxL>=k)||(sbeg[idxL]<
static_cast<svalue_t
>(*q)))
450 const Ater itmp=ret.first+step;
451 const size_t idxM=(*itmp)+ofs;
452 if((idxM>=k)||(sbeg[idxM]<
static_cast<svalue_t
>(*q)))
462 step=(bound-ret.first)>>1;
464 if(ret.first==ret.second)
467 idxL=(*ret.first)+ofs;
471 step=(ret.second-bound)>>1;
473 while((idxR<k)&&(sbeg[idxR]>
static_cast<svalue_t
>(*q)))
475 const Ater itmp=ret.second-step;
476 const size_t idxM=(*(itmp-1))+ofs;
477 if((idxM<k)&&(sbeg[idxM]>
static_cast<svalue_t
>(*q)))
487 step=(ret.second-bound)>>1;
489 if(ret.first==ret.second)
492 idxR=(*(ret.second-1))+ofs;
501 template<
typename Ater,
typename Qter>
502 struct suffix_query_result
513 suffix_query_result(Ater b,Ater e,Qter q)
514 :array_beg(b),array_end(e),array_ins(b),query_end(q)
519 template<
typename Ster,
typename Ater,
typename Qter>
524 typedef typename std::pair<Ater,Ater> range;
525 typedef typename std::iterator_traits<Ster>::value_type svalue_t;
526 typedef typename std::iterator_traits<Ater>::value_type avalue_t;
527 typedef typename std::iterator_traits<Qter>::value_type qvalue_t;
529 suffix_query_result<Ater,Qter> ret(abeg,aend,qbeg);
536 const size_t k=send-sbeg;
541 Ater cur_array_end=aend;
543 for(
size_t ofs=0;ret.query_end!=qend;++ret.query_end,++ofs)
545 idxL=(*ret.array_ins)+ofs;
546 idxR=(*(cur_array_end-1))+ofs;
548 Ater bound=cur_array_end;
549 size_t step=(bound-ret.array_ins)>>1;
550 while((idxL>=k)||(sbeg[idxL]<
static_cast<svalue_t
>(*ret.query_end)))
552 const Ater itmp=ret.array_ins+step;
553 const size_t idxM=(*itmp)+ofs;
554 if((idxM>=k)||(sbeg[idxM]<
static_cast<svalue_t
>(*ret.query_end)))
564 step=(bound-ret.array_ins)>>1;
566 if(ret.array_ins==cur_array_end)
569 idxL=(*ret.array_ins)+ofs;
573 step=(cur_array_end-bound)>>1;
575 while((idxR<k)&&(sbeg[idxR]>
static_cast<svalue_t
>(*ret.query_end)))
577 const Ater itmp=cur_array_end-step;
578 const size_t idxM=(*(itmp-1))+ofs;
579 if((idxM<k)&&(sbeg[idxM]>
static_cast<svalue_t
>(*ret.query_end)))
589 step=(cur_array_end-bound)>>1;
591 if(ret.array_ins==cur_array_end)
594 idxR=(*(cur_array_end-1))+ofs;
597 ret.array_beg=ret.array_ins;
598 ret.array_end=cur_array_end;
611 template<
typename Ster,
typename Ater,
typename Query>
617 typedef typename std::pair<Ater,Ater> range;
618 typedef typename std::iterator_traits<Ster>::value_type svalue_t;
619 typedef typename std::iterator_traits<Ater>::value_type avalue_t;
620 typedef Query qvalue_t;
623 return range(abeg,aend);
625 range ret(abeg,aend);
627 const size_t k=send-sbeg;
632 idxL=(*ret.first)+ofs;
633 idxR=(*(ret.second-1))+ofs;
635 Ater bound=ret.second;
636 size_t step=(bound-ret.first)>>1;
637 while((idxL>=k)||(sbeg[idxL]<
static_cast<svalue_t
>(q)))
639 const Ater itmp=ret.first+step;
640 const size_t idxM=(*itmp)+ofs;
641 if((idxM>=k)||(sbeg[idxM]<
static_cast<svalue_t
>(q)))
651 step=(bound-ret.first)>>1;
653 if(ret.first==ret.second)
656 idxL=(*ret.first)+ofs;
660 step=(ret.second-bound)>>1;
662 while((idxR<k)&&(sbeg[idxR]>
static_cast<svalue_t
>(q)))
664 const Ater itmp=ret.second-step;
665 const size_t idxM=(*(itmp-1))+ofs;
666 if((idxM<k)&&(sbeg[idxM]>
static_cast<svalue_t
>(q)))
676 step=(ret.second-bound)>>1;
678 if(ret.first==ret.second)
681 idxR=(*(ret.second-1))+ofs;
688 template<
typename Ster,
typename Ater,
typename Qter>
690 Ster sbeg,Ster send,Ater abeg,Qter qbeg,Qter qend)
692 return suffix_query(sbeg,send,abeg,abeg+(send-sbeg),qbeg,qend);
696 template<
typename Ster,
typename Ater,
typename Qter>
698 Ster sbeg,Ster send,Ater abeg,Qter qbeg,Qter qend)
703 template<
typename Ster,
typename Ater>
704 std::ostream& suffix_dump(std::ostream& os,Ster sbeg,Ster send,Ater abeg)
706 const Ater aend=abeg+(send-sbeg);
707 for(Ater i=abeg;i!=aend;++i)
710 for(Ster j=sbeg+(*i);j!=send;++j)
718 template<
typename Ster,
typename Ater>
722 const size_t size=send-sbeg;
723 const Ater aend=abeg+
size;
728 std::cerr<<
"sort"<<std::endl;
729 std::vector<typename std::iterator_traits<Ater>::value_type> v(abeg,aend);
730 std::sort(v.begin(),v.end());
732 std::cerr<<
"test uniqueness"<<std::endl;
733 if(std::unique(v.begin(),v.end())!=v.end())
735 std::cerr<<
"sarr uniqueness FAIL"<<std::endl;
739 std::cerr<<
"test min"<<std::endl;
740 if((*std::min_element(v.begin(),v.end()))!=0)
742 std::cerr<<
"sarr min FAIL"<<std::endl;
746 std::cerr<<
"test max"<<std::endl;
747 if((*std::max_element(v.begin(),v.end()))!=(size-1))
749 std::cerr<<
"sarr max FAIL"<<std::endl;
754 if(pass&&(abeg!=aend))
756 std::cerr<<
"test ordering"<<std::endl;
758 for(Ater i=(abeg+1);i!=aend;++i)
760 Ster j=sbeg+std::max(*i,*last);
761 Ster k=sbeg+std::min(*i,*last);
762 if(std::lexicographical_compare(j,send,k,send)!=((*last)>(*i)))
764 std::cerr<<
"sarr sort FAIL"<<std::endl;
781 std::cerr<<
"sarr OK"<<std::endl;
788 #endif // SHORE_ALGO_SUFFIX_HPP__