Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_SUFFIX_ARRAY_MAMBER_MEYERS_OPT 00002 #define H_SUFFIX_ARRAY_MAMBER_MEYERS_OPT 00003 00010 #include "utils/preconditions/preconditions.h" 00011 #include <vector> 00012 #include <algorithm> 00013 #include "debug/ppdebug.h" 00014 #include "utils/static_assert/static_assert.h" 00015 #include <limits> 00016 #include "debug/ppdebug.h" 00017 00018 namespace strings { 00019 namespace suffix_array { 00020 00029 template <typename IndexType> 00030 class ManberMyersLog2_ { 00031 private: 00032 00039 struct Suffix { 00040 IndexType index; 00041 IndexType pos_n; 00042 IndexType pos_2n; 00043 00044 bool operator<(const Suffix& other) const { 00045 return (pos_n < other.pos_n) || 00046 ((pos_n == other.pos_n) && (pos_2n < other.pos_2n)); 00047 } 00048 }; 00049 00050 public: 00051 00071 template<typename _Iterator> 00072 static void buildSuffixArray(_Iterator first, 00073 _Iterator last, std::vector<IndexType>* out) 00074 { 00075 typedef typename iterator_traits<_Iterator>::difference_type DifferenceType; 00076 typedef typename iterator_traits<_Iterator>::value_type ValueType; 00077 00078 STATIC_ASSERT(std::numeric_limits<ValueType>::is_integer, 00079 "Value type of iterator should be integer"); 00080 00081 STATIC_ASSERT(std::numeric_limits<IndexType>::digits >= 00082 std::numeric_limits<ValueType>::digits, 00083 "IndexType is unable to hold iterator value type!"); 00084 00085 // This is very special case -- check that if IndexType does not have 00086 // additional bits over ValueType, we can't use maxvalue 00087 if (std::numeric_limits<IndexType>::digits == 00088 std::numeric_limits<ValueType>::digits) { 00089 for (_Iterator it = first; it != last; ++it) { 00090 if ((*it) == std::numeric_limits<ValueType>::max()) { 00091 throw std::invalid_argument("Input sequence can't contain maxvalue." 00092 "Try to reduce size of the alphabet."); 00093 } 00094 } 00095 } 00096 00098 DifferenceType n = last - first; 00099 00100 if (n < 0 || n >= std::numeric_limits<IndexType>::max() ) { 00101 throw std::overflow_error("The length of the sequence is too big " 00102 "to process. Have you passed the [first,last) in correct order?"); 00103 } 00104 00105 00107 std::vector<IndexType>& result = *out; 00108 result.resize(n); 00109 00111 std::vector<Suffix> suffixes(n); 00112 00113 _Iterator it = first; 00114 for (IndexType i = 0; i < n; i++) { 00115 suffixes[i].index = i; 00116 // Move the range [ValueType::min, ValueType::max] to 00117 // [IndexType::min+1, IndexType::min+ValueType::size] 00118 suffixes[i].pos_n = (std::numeric_limits<IndexType>::min() + 1) + 00119 ((*it) - std::numeric_limits<ValueType>::min()); 00120 // note that suffixes[i].pos_2n does not need to be initialized here 00121 ++it; 00122 } 00123 assert(it == last); 00124 00125 IndexType current_size = 1; // the processed size 00126 do { 00127 for (IndexType i = 0; i < n; i++) { 00128 result[suffixes[i].index] = suffixes[i].pos_n; 00129 } 00130 00131 // Compute the pos_2n 00132 for (IndexType i = 0; i < n; i++) { 00133 IndexType tail_start = suffixes[i].index + current_size; 00134 // short suffixes (already sorted at this stage) goes before 00135 // other, we reserved value 0 for them 00136 suffixes[i].pos_2n = (tail_start >= n) ? 00137 std::numeric_limits<IndexType>::min() : 1 + result[tail_start]; 00138 } 00139 00140 sort(suffixes.begin(), suffixes.end()); 00141 result[0] = 0; 00142 for (IndexType i = 1; i < n; i++) { 00143 // check for equality using two operators less 00144 if (!(suffixes[i-1] < suffixes[i]) && 00145 !(suffixes[i] < suffixes[i-1])) { 00146 result[i] = result[i-1]; 00147 } else { 00148 result[i] = i; 00149 } 00150 } 00151 00152 for (IndexType i = 0; i < n; i++) { 00153 suffixes[i].pos_n = result[i]; 00154 } 00155 00156 current_size *= 2; 00157 } while (current_size < n); 00158 00159 for (IndexType i = 0; i < n; i++) { 00160 result[i] = suffixes[i].index; 00161 } 00162 } 00163 }; 00164 00165 typedef ManberMyersLog2_<int> ManberMyersLog2; 00166 00167 } // namespace suffix_array 00168 } // namespace strings 00169 00170 #endif