Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_STRINGS_SUFFIX_ARRAY_CHECK 00002 #define H_STRINGS_SUFFIX_ARRAY_CHECK 00003 00011 #include "utils/preconditions/preconditions.h" 00012 #include <vector> 00013 #include <map> 00014 00015 namespace strings { 00016 namespace suffix_array { 00017 00018 /* 00019 Quick intro: 00020 00021 Theorem 2 from the paper 00022 An array SA[0,n) of integers is the suffixx array of a string s[0,n) 00023 if and only if the following conditions are satisfied: 00024 1. For all i \in [0,n), SA[i] \in [0,n). 00025 2. For all i \in [1,n), s[SA[i - 1]] <= s[SA[i]]. 00026 3. For all i \in [1,n) such that s[SA[i - 1]] = s[SA[i]] and 00027 SA[i - 1] = n - 1, there exists j, k \in [0,n) such that 00028 SA[j] = SA[i - 1]+1, SA[k] = SA[i]+1 and j<k. 00029 00030 Authors of the papers suggest this pseudocode for (rephrased) 3rd condition 00031 for i=n-1 downto 0: 00032 A[s[SA[i]]] = i 00033 A[s[n-1]]++ // skip SA[i]=n-1 00034 for i=0 to n-1: 00035 if SA[i]>1: 00036 c = s[SA[i] - 1] 00037 check SA[A[c]] + 1 == SA[i] 00038 A[c]++ 00039 00040 Where A is map from alphabet to integers. 00041 */ 00042 00043 00047 template <typename T> 00048 class SuffixArrayChecker { 00049 FRIEND_TEST(SuffixArrayCheck, condition1); 00050 protected: 00057 static bool checkCondition1Holds(const int SA[], int length) { 00058 for (int i = 0; i < length; i++) { 00059 if (SA[i] < 0 || SA[i] >= length) { 00060 return false; 00061 } 00062 } 00063 return true; 00064 } 00065 00074 static bool checkCondition2Holds(const T s[], const int SA[], int length) { 00075 for (int i = 1; i < length; i++) { 00076 if (s[SA[i - 1]] > s[SA[i]]) { 00077 return false; 00078 } 00079 } 00080 return true; 00081 } 00082 00108 static bool checkCondition3HoldsKarkkainen( 00109 const T s[], const int SA[], int length) { 00110 std::map<T, int> A; 00111 for (int i = length - 1; i >= 0; i--) { 00112 A[s[SA[i]]] = i; 00113 } 00114 A[s[length - 1]]++; // skip SA[i]=n-1 00115 for (int i = 0; i < length; i++) { 00116 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00117 // ! BIG WARNING: In original pseudocode, there is SA[i] > 1 ! 00118 // ! hovewer correct version is SA[i] >= 1 ! 00119 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00120 if (SA[i] >= 1) { 00121 T c = s[SA[i] - 1]; 00122 if (SA[A[c]] + 1 != SA[i]) { 00123 return false; 00124 } 00125 A[c]++; 00126 } 00127 } 00128 return true; 00129 } 00130 00145 static bool checkCondition3HoldsInverses( 00146 const T s[], const int SA[], int length) { 00147 // Note, we need length+1, see below 00148 std::vector<int> ISA(length + 1, -1); // inverse array 00149 for (int i = 0; i < length; i++) ISA[SA[i]] = i; 00150 00151 for (int i = 1; i < length; i++) { 00152 if ((s[SA[i - 1]] == s[SA[i]]) && (SA[i-1] != length - 1)) { 00153 // No need to worry about bounds, we assume 0 < SA[] < length 00154 // and ISA's size is length+1 so we do not need to check for 00155 // overflow in ISA[SA[]+1] 00156 int j = ISA[SA[i-1] + 1]; 00157 int k = ISA[SA[i] + 1]; 00158 if (j == -1 || k == -1 || j >= k) { 00159 return false; 00160 } 00161 } 00162 } 00163 return true; 00164 } 00165 00166 public: 00177 static bool isValidSuffixArray(const T s[], const int SA[], int 00178 length) { 00179 // Note: we need to first check out-of-bounds 00180 if (!checkCondition1Holds(SA, length)) { 00181 return false; 00182 } 00183 return checkCondition2Holds(s, SA, length) && 00184 checkCondition3HoldsKarkkainen(s, SA, length); 00185 } 00186 00197 static bool isValidSuffixArrayInverses(const T s[], const int SA[], int 00198 length) { 00199 // Note: we need to first check out-of-bounds 00200 if (!checkCondition1Holds(SA, length)) { 00201 return false; 00202 } 00203 return checkCondition2Holds(s, SA, length) && 00204 checkCondition3HoldsInverses(s, SA, length); 00205 } 00206 00207 }; 00208 00209 } // namespace suffix_array 00210 } // namespace strings 00211 00212 #endif