Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/suffix_array_check/suffix_array_check.h
Go to the documentation of this file.
00001 #ifndef H_STRINGS_SUFFIX_ARRAY_CHECK
00002 #define H_STRINGS_SUFFIX_ARRAY_CHECK
00003 
00011 #include "utils/preconditions/preconditions.h"
00012 #include <vector>
00013 #include <map>
00014 
00015 namespace strings {
00016 namespace suffix_array {
00017 
00018 /*
00019 Quick intro:
00020 
00021 Theorem 2 from the paper
00022 An array SA[0,n) of integers is the suffixx array of a string s[0,n)
00023 if and only if the following conditions are satisfied:
00024 1. For all i \in [0,n), SA[i] \in [0,n).
00025 2. For all i \in [1,n), s[SA[i - 1]] <= s[SA[i]].
00026 3. For all i \in [1,n) such that s[SA[i - 1]] = s[SA[i]] and
00027   SA[i - 1] = n - 1, there exists j, k \in [0,n) such that
00028   SA[j] = SA[i - 1]+1, SA[k] = SA[i]+1 and j<k.
00029 
00030 Authors of the papers suggest this pseudocode for (rephrased) 3rd condition
00031   for i=n-1 downto 0:
00032     A[s[SA[i]]] = i
00033   A[s[n-1]]++ // skip SA[i]=n-1
00034   for i=0 to n-1:
00035     if SA[i]>1:
00036       c = s[SA[i] - 1]
00037       check SA[A[c]] + 1 == SA[i]
00038       A[c]++
00039 
00040 Where A is map from alphabet to integers.
00041 */
00042 
00043 
00047 template <typename T>
00048 class SuffixArrayChecker {
00049  FRIEND_TEST(SuffixArrayCheck, condition1);
00050  protected:
00057   static bool checkCondition1Holds(const int SA[], int length) {
00058     for (int i = 0; i < length; i++) {
00059       if (SA[i] < 0 || SA[i] >= length) {
00060         return false;
00061       }
00062     }
00063     return true;
00064   }
00065 
00074   static bool checkCondition2Holds(const T s[], const int SA[], int length) {
00075     for (int i = 1; i < length; i++) {
00076       if (s[SA[i - 1]] > s[SA[i]]) {
00077         return false;
00078       }
00079     }
00080     return true;
00081   }
00082   
00108   static bool checkCondition3HoldsKarkkainen(
00109       const T s[], const int SA[], int length) {
00110     std::map<T, int> A;
00111     for (int i = length - 1; i >= 0; i--) {
00112       A[s[SA[i]]] = i; 
00113     }
00114     A[s[length - 1]]++; // skip SA[i]=n-1
00115     for (int i = 0; i < length; i++) {
00116       // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00117       // ! BIG WARNING: In original pseudocode, there is SA[i] > 1 !
00118       // ! hovewer correct version is SA[i] >= 1                   !
00119       // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00120       if (SA[i] >= 1) {
00121         T c = s[SA[i] - 1];
00122         if (SA[A[c]] + 1 != SA[i]) {
00123           return false;
00124         }
00125         A[c]++;
00126       }
00127     }
00128     return true;
00129   }
00130 
00145   static bool checkCondition3HoldsInverses(
00146       const T s[], const int SA[], int length) {
00147     // Note, we need length+1, see below
00148     std::vector<int> ISA(length + 1, -1); // inverse array
00149     for (int i = 0; i < length; i++) ISA[SA[i]] = i;
00150 
00151     for (int i = 1; i < length; i++) {
00152       if ((s[SA[i - 1]] == s[SA[i]]) && (SA[i-1] != length - 1)) {
00153         // No need to worry about bounds, we assume 0 < SA[] < length
00154         // and ISA's size is length+1 so we do not need to check for
00155         // overflow in ISA[SA[]+1]
00156         int j = ISA[SA[i-1] + 1];
00157         int k = ISA[SA[i] + 1];
00158         if (j == -1 || k == -1 || j >= k) {
00159           return false;
00160         }
00161       }
00162     }
00163     return true;
00164   }
00165 
00166  public:
00177   static bool isValidSuffixArray(const T s[], const int SA[], int
00178       length) {
00179     // Note: we need to first check out-of-bounds
00180     if (!checkCondition1Holds(SA, length)) {
00181       return false;
00182     }
00183     return checkCondition2Holds(s, SA, length) && 
00184            checkCondition3HoldsKarkkainen(s, SA, length);
00185   }
00186 
00197   static bool isValidSuffixArrayInverses(const T s[], const int SA[], int
00198       length) {
00199     // Note: we need to first check out-of-bounds
00200     if (!checkCondition1Holds(SA, length)) {
00201       return false;
00202     }
00203     return checkCondition2Holds(s, SA, length) && 
00204            checkCondition3HoldsInverses(s, SA, length);
00205   }
00206 
00207 };
00208 
00209 } // namespace suffix_array
00210 } // namespace strings
00211 
00212 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines