Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_STRINGS_SUFFIX_ARRAY_LCP 00002 #define H_STRINGS_SUFFIX_ARRAY_LCP 00003 #include "utils/preconditions/preconditions.h" 00004 00005 namespace strings { 00006 namespace suffix_array { 00007 00008 00009 class LCPKasai { 00010 public: 00018 template<typename _Iterator, typename _SAIterator> 00019 static void getHeightArray(_Iterator seq_first, _Iterator seq_last, 00020 _SAIterator sa_first, _SAIterator sa_last, 00021 std::vector<int> * out) { 00022 Preconditions::checkNotNull(out); 00023 Preconditions::check(seq_last - seq_first > 0, 00024 "sequence must contain at least one character"); 00025 Preconditions::check(seq_last - seq_first == sa_last - sa_first, 00026 "sequence and it's corresponding suffix array must have " 00027 "same length"); 00028 00029 unsigned int n = seq_last - seq_first; 00036 std::vector<int> rank(n); 00037 out->clear(); 00038 out->resize(n); 00039 00040 00041 for (unsigned int i = 0; i < n; i++) { 00042 rank[*(sa_first + i)] = i; 00043 } 00044 00045 const int undefined = -1; 00046 (*out)[0] = undefined; 00047 00048 int h = 0; 00049 // Basic idea of the algorithm: 00050 // let j-1 = sa[rank[i]] 00051 // 00052 // Theorem 1 from paper: 00053 // Let Height[Rank[i-1]] = lcp(A_{j-1}, A_{i-1}) > 1. Then 00054 // Height[Rank[i]] = lcp(A_{k}, A_{i}) >= Height[Rank[i-1]] - 1 00055 // where k = sa[Rank[i] - 1]. 00056 // 00057 // Therefore, we compute Height[Rank[i]] iterating over i. 00058 // in each step, new Height is at least old minus one. 00059 00060 for (unsigned int i = 0; i < n; i++) { 00061 assert(rank[i] >= 0); 00062 if (rank[i] == 0) { 00063 // we set the pos[0] to undefined already, nothing to do 00064 continue; 00065 } 00066 00067 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00068 // ! Warning: original pseudocode is k:=Pos[Rank[i]-1] ! 00069 // ! but the correct variable is not k but j ! 00070 // ! @see Two Space Saving Tricks ! 00071 // ! for Linear Time LCP Array Computation ! 00072 // ! by Manzini, Giovanni ! 00073 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 00074 int j = *(sa_first + (rank[i] - 1)); 00075 // we know, that the height is at least h, 00076 // compute the height by finding first non-match 00077 _Iterator ita = seq_first + (i + h); 00078 _Iterator itb = seq_first + (j + h); 00079 while ((ita != seq_last) && (itb != seq_last) && 00080 (*ita == *itb)) { 00081 // Note: this will be called at most 2n 00082 // times because maximum value of h is n and it 00083 // can decrease by one at most n times 00084 ++ita; 00085 ++itb; 00086 ++h; 00087 } 00088 (*out)[rank[i]] = h; 00089 // decrease h for next iteration 00090 if (h > 0) { 00091 h--; 00092 } 00093 } 00094 } 00095 }; 00096 00097 } // namespace suffix_array 00098 } // namespace strings 00099 #endif