Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/cyclic/duval.h
Go to the documentation of this file.
00001 #ifndef H_DUVAL
00002 #define H_DUVAL
00003 
00015 #include "debug/ppdebug.h"
00016 #include "utils/preconditions/preconditions.h"
00017 
00018 namespace strings {
00019 namespace cyclic {
00020 
00021 template<typename T>
00022 class Duval {
00023  public:
00024   typedef int SizeType;
00039    static void minimumSuffixes(T* sequence, SizeType length, 
00040        std::vector<int>* out) {
00041      Preconditions::check(length > 0);
00042      Preconditions::check(length < std::numeric_limits<SizeType>::max() - 1);
00043 
00044      int start = 0;
00045      int i;
00046      int j = 2;
00047      // Warning: original Duval's pseudocode works by indexing from 1
00048      // We will shift sequence and index original minus one in the out array
00049      sequence--; // simple trick how to shift sequence
00050 
00051      out->clear();
00052      out->resize(length);
00053      std::vector<int> f(length + 1);
00054 
00055      out->at(0) = 0;
00056      f[2] = 1;
00057      while (j <= length) {
00058        i = start + f[j - start];
00059        while ((j <= length) && sequence[i] <= sequence[j]) {
00060          if (sequence[i] == sequence[j]) {
00061            out->at(j - 1) = out->at(i - 1) + j - i;
00062            i++;
00063          } else {
00064            out->at(j - 1) = start;
00065            i = start + 1;
00066          }
00067 
00068          j++;
00069          assert(j >= start);
00070          if (j - start <= length) {
00071            f[j - start] = i - start;
00072          }
00073        };
00074        start = out->at(i - 1) + j - i;
00075        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
00076        // ! BIG WARNING: original pseudocode omits "j <= length" check !
00077        // ! and the result may be out of bounds                        !
00078        // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00079        if (j <= length && start == j - 1) {
00080          out->at(j - 1) = start;
00081          j++;
00082        }
00083      }
00084    }
00085 
00102    static int leastCyclicShift(T* sequence, int length) {
00103      Preconditions::check(length > 0);
00104      // Duval's original article uses indexing from 1,
00105      // we need to adjust only "start" and "return" values
00106      int start = -1;
00107      while (start < 2 * length - 1) {
00108        int i = start + 1;
00109        int j = start + 2;
00110        // Invariant is such that
00111        // a_{start+1} ... a_{i-1} is Lyndon word
00112        // a_{start+1} ... a_{i-1} == a_{j-i+1} ... a_{j-1}
00113        while (j < 2 * length && 
00114               sequence[i % length] <= sequence[j % length]) {
00115          if ((j  == start + 1 + length) && 
00116              (length % (j - i) == 0)) {
00117            return start + 1; // we have found the cyclic shift we needed
00118          }
00119          if (sequence[i % length] == sequence[j % length]) {
00120            // the same, we can increment both i and j
00121            i++;
00122          } else {
00123            // we need to reset i
00124            i = start + 1;
00125          }
00126          ++j;
00127        }
00128        // now we have sequence[i] > sequence[j]
00129        // and we can start outputting lydon factors
00130        while (start < i) {
00131          start += j - i;
00132        };
00133      }
00134      // oops, we should never reach this point.
00135      assert(false);
00136    }
00137 
00150    static int leastCyclicShiftEmaxx(T* sequence, int length) {
00151      Preconditions::check(length > 0);
00152      // Duval's original article uses indexing from 1,
00153      // we need to adjust only "start" and "return" values
00154      int start = 0;
00155      int answer = 0;
00156      while (start < length) {
00157        answer = start;
00158        int i = start;
00159        int j = start + 1;
00160        while ((j < 2 * length) &&
00161            (sequence[i % length] <= sequence[j % length])) {
00162          if (sequence[ i % length] < sequence[ j % length]) {
00163            i = start;
00164          } else {
00165            ++j;
00166          }
00167          ++start;
00168        }
00169        while (start <= i) {
00170          start += j - i;
00171        }
00172      }
00173      return answer;
00174    }
00175 };
00176 
00177 } // namespace cyclic
00178 } // namespace strings
00179 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines