Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/suffix_array_log2/manber_myers_log2.h
Go to the documentation of this file.
00001 #ifndef H_SUFFIX_ARRAY_MAMBER_MEYERS_OPT
00002 #define H_SUFFIX_ARRAY_MAMBER_MEYERS_OPT
00003 
00010 #include "utils/preconditions/preconditions.h"
00011 #include <vector>
00012 #include <algorithm>
00013 #include "debug/ppdebug.h"
00014 #include "utils/static_assert/static_assert.h"
00015 #include <limits>
00016 #include "debug/ppdebug.h"
00017 
00018 namespace strings {
00019 namespace suffix_array {
00020 
00029 template <typename IndexType>
00030 class ManberMyersLog2_ {
00031  private:
00032 
00039   struct Suffix {
00040     IndexType index;
00041     IndexType pos_n;
00042     IndexType pos_2n;
00043 
00044     bool operator<(const Suffix& other) const {
00045       return (pos_n < other.pos_n) ||
00046              ((pos_n == other.pos_n) && (pos_2n < other.pos_2n));
00047     }
00048   };
00049 
00050  public:
00051 
00071   template<typename _Iterator>
00072    static void buildSuffixArray(_Iterator first,
00073        _Iterator last, std::vector<IndexType>* out)
00074    {
00075      typedef typename iterator_traits<_Iterator>::difference_type DifferenceType;
00076      typedef typename iterator_traits<_Iterator>::value_type ValueType;
00077 
00078      STATIC_ASSERT(std::numeric_limits<ValueType>::is_integer,
00079         "Value type of iterator should be integer");
00080 
00081      STATIC_ASSERT(std::numeric_limits<IndexType>::digits >=
00082                    std::numeric_limits<ValueType>::digits,
00083                    "IndexType is unable to hold iterator value type!");
00084 
00085      // This is very special case -- check that if IndexType does not have
00086      // additional bits over ValueType, we can't use maxvalue
00087      if (std::numeric_limits<IndexType>::digits ==
00088          std::numeric_limits<ValueType>::digits) {
00089        for (_Iterator it = first; it != last; ++it) {
00090          if ((*it) == std::numeric_limits<ValueType>::max()) {
00091            throw std::invalid_argument("Input sequence can't contain maxvalue."
00092                "Try to reduce size of the alphabet.");
00093          }
00094        }
00095      }
00096 
00098      DifferenceType n = last - first;
00099 
00100      if (n < 0 || n >= std::numeric_limits<IndexType>::max() ) {
00101         throw std::overflow_error("The length of the sequence is too big "
00102             "to process. Have you passed the [first,last) in correct order?");
00103      }
00104 
00105 
00107      std::vector<IndexType>& result = *out;
00108      result.resize(n);
00109 
00111      std::vector<Suffix> suffixes(n);
00112 
00113      _Iterator it = first;
00114      for (IndexType i = 0; i < n; i++) {
00115         suffixes[i].index = i;
00116         // Move the range [ValueType::min, ValueType::max] to
00117         // [IndexType::min+1, IndexType::min+ValueType::size]
00118         suffixes[i].pos_n = (std::numeric_limits<IndexType>::min() + 1) + 
00119                 ((*it) - std::numeric_limits<ValueType>::min());
00120         // note that suffixes[i].pos_2n does not need to be initialized here
00121         ++it;
00122      }
00123      assert(it == last);
00124 
00125      IndexType current_size = 1; // the processed size
00126      do {
00127        for (IndexType i = 0; i < n; i++) {
00128           result[suffixes[i].index] = suffixes[i].pos_n;
00129        }
00130 
00131        // Compute the pos_2n
00132        for (IndexType i = 0; i < n; i++) {
00133          IndexType tail_start = suffixes[i].index + current_size;
00134          // short suffixes (already sorted at this stage) goes before
00135          // other, we reserved value 0 for them
00136          suffixes[i].pos_2n = (tail_start >= n) ? 
00137            std::numeric_limits<IndexType>::min() : 1 + result[tail_start];
00138        }
00139 
00140        sort(suffixes.begin(), suffixes.end());
00141        result[0] = 0;
00142        for (IndexType i = 1; i < n; i++) {
00143          // check for equality using two operators less
00144          if (!(suffixes[i-1] < suffixes[i]) &&
00145              !(suffixes[i] < suffixes[i-1])) {
00146             result[i] = result[i-1];
00147          } else {
00148             result[i] = i;
00149          }
00150        }
00151 
00152        for (IndexType i = 0; i < n; i++) {
00153           suffixes[i].pos_n = result[i];
00154        }
00155 
00156        current_size *= 2;
00157      } while (current_size < n);
00158 
00159      for (IndexType i = 0; i < n; i++) {
00160         result[i] = suffixes[i].index;
00161      }
00162    }
00163 };
00164 
00165 typedef ManberMyersLog2_<int> ManberMyersLog2;
00166 
00167 } // namespace suffix_array
00168 } // namespace strings
00169 
00170 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines