Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_DUVAL 00002 #define H_DUVAL 00003 00015 #include "debug/ppdebug.h" 00016 #include "utils/preconditions/preconditions.h" 00017 00018 namespace strings { 00019 namespace cyclic { 00020 00021 template<typename T> 00022 class Duval { 00023 public: 00024 typedef int SizeType; 00039 static void minimumSuffixes(T* sequence, SizeType length, 00040 std::vector<int>* out) { 00041 Preconditions::check(length > 0); 00042 Preconditions::check(length < std::numeric_limits<SizeType>::max() - 1); 00043 00044 int start = 0; 00045 int i; 00046 int j = 2; 00047 // Warning: original Duval's pseudocode works by indexing from 1 00048 // We will shift sequence and index original minus one in the out array 00049 sequence--; // simple trick how to shift sequence 00050 00051 out->clear(); 00052 out->resize(length); 00053 std::vector<int> f(length + 1); 00054 00055 out->at(0) = 0; 00056 f[2] = 1; 00057 while (j <= length) { 00058 i = start + f[j - start]; 00059 while ((j <= length) && sequence[i] <= sequence[j]) { 00060 if (sequence[i] == sequence[j]) { 00061 out->at(j - 1) = out->at(i - 1) + j - i; 00062 i++; 00063 } else { 00064 out->at(j - 1) = start; 00065 i = start + 1; 00066 } 00067 00068 j++; 00069 assert(j >= start); 00070 if (j - start <= length) { 00071 f[j - start] = i - start; 00072 } 00073 }; 00074 start = out->at(i - 1) + j - i; 00075 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00076 // ! BIG WARNING: original pseudocode omits "j <= length" check ! 00077 // ! and the result may be out of bounds ! 00078 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00079 if (j <= length && start == j - 1) { 00080 out->at(j - 1) = start; 00081 j++; 00082 } 00083 } 00084 } 00085 00102 static int leastCyclicShift(T* sequence, int length) { 00103 Preconditions::check(length > 0); 00104 // Duval's original article uses indexing from 1, 00105 // we need to adjust only "start" and "return" values 00106 int start = -1; 00107 while (start < 2 * length - 1) { 00108 int i = start + 1; 00109 int j = start + 2; 00110 // Invariant is such that 00111 // a_{start+1} ... a_{i-1} is Lyndon word 00112 // a_{start+1} ... a_{i-1} == a_{j-i+1} ... a_{j-1} 00113 while (j < 2 * length && 00114 sequence[i % length] <= sequence[j % length]) { 00115 if ((j == start + 1 + length) && 00116 (length % (j - i) == 0)) { 00117 return start + 1; // we have found the cyclic shift we needed 00118 } 00119 if (sequence[i % length] == sequence[j % length]) { 00120 // the same, we can increment both i and j 00121 i++; 00122 } else { 00123 // we need to reset i 00124 i = start + 1; 00125 } 00126 ++j; 00127 } 00128 // now we have sequence[i] > sequence[j] 00129 // and we can start outputting lydon factors 00130 while (start < i) { 00131 start += j - i; 00132 }; 00133 } 00134 // oops, we should never reach this point. 00135 assert(false); 00136 } 00137 00150 static int leastCyclicShiftEmaxx(T* sequence, int length) { 00151 Preconditions::check(length > 0); 00152 // Duval's original article uses indexing from 1, 00153 // we need to adjust only "start" and "return" values 00154 int start = 0; 00155 int answer = 0; 00156 while (start < length) { 00157 answer = start; 00158 int i = start; 00159 int j = start + 1; 00160 while ((j < 2 * length) && 00161 (sequence[i % length] <= sequence[j % length])) { 00162 if (sequence[ i % length] < sequence[ j % length]) { 00163 i = start; 00164 } else { 00165 ++j; 00166 } 00167 ++start; 00168 } 00169 while (start <= i) { 00170 start += j - i; 00171 } 00172 } 00173 return answer; 00174 } 00175 }; 00176 00177 } // namespace cyclic 00178 } // namespace strings 00179 #endif