Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/suffix_array_lcp_kasai/lcp_kasai.h
Go to the documentation of this file.
00001 #ifndef H_STRINGS_SUFFIX_ARRAY_LCP
00002 #define H_STRINGS_SUFFIX_ARRAY_LCP
00003 #include "utils/preconditions/preconditions.h"
00004 
00005 namespace strings {
00006 namespace suffix_array {
00007 
00008 
00009 class LCPKasai {
00010  public:
00018   template<typename _Iterator, typename _SAIterator>
00019   static void getHeightArray(_Iterator seq_first, _Iterator seq_last,
00020       _SAIterator sa_first, _SAIterator sa_last,
00021       std::vector<int> * out) {
00022     Preconditions::checkNotNull(out);
00023     Preconditions::check(seq_last - seq_first > 0,
00024         "sequence must contain at least one character");
00025     Preconditions::check(seq_last - seq_first == sa_last - sa_first,
00026         "sequence and it's corresponding suffix array must have "
00027         "same length");
00028 
00029     unsigned int n = seq_last - seq_first;
00036     std::vector<int> rank(n);
00037     out->clear();
00038     out->resize(n);
00039 
00040 
00041     for (unsigned int i = 0; i < n; i++) {
00042       rank[*(sa_first + i)] = i;
00043     }
00044 
00045     const int undefined = -1;
00046     (*out)[0] = undefined;
00047 
00048     int h = 0;
00049     // Basic idea of the algorithm:
00050     // let j-1 = sa[rank[i]]
00051     //
00052     // Theorem 1 from paper:
00053     // Let Height[Rank[i-1]] = lcp(A_{j-1}, A_{i-1}) > 1. Then
00054     // Height[Rank[i]] = lcp(A_{k}, A_{i}) >= Height[Rank[i-1]] - 1
00055     // where k = sa[Rank[i] - 1].
00056     // 
00057     // Therefore, we compute Height[Rank[i]] iterating over i.
00058     // in each step, new Height is at least old minus one.
00059 
00060     for (unsigned int i = 0; i < n; i++) {
00061       assert(rank[i] >= 0);
00062       if (rank[i] == 0) {
00063         // we set the pos[0] to undefined already, nothing to do
00064         continue;
00065       }
00066 
00067       // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00068       // ! Warning: original pseudocode is k:=Pos[Rank[i]-1] !
00069       // ! but the correct variable is not k but j           !
00070       // ! @see Two Space Saving Tricks                      !
00071       // ! for Linear Time LCP Array Computation             !
00072       // ! by Manzini, Giovanni                              !
00073       // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
00074       int j = *(sa_first + (rank[i] - 1));
00075       // we know, that the height is at least h,
00076       // compute the height by finding first non-match
00077       _Iterator ita = seq_first + (i + h);
00078       _Iterator itb = seq_first + (j + h);
00079       while ((ita != seq_last) && (itb != seq_last) &&
00080              (*ita == *itb)) {
00081         // Note: this will be called at most 2n
00082         // times because maximum value of h is n and it
00083         // can decrease by one at most n times
00084         ++ita;
00085         ++itb;
00086         ++h;
00087       }
00088       (*out)[rank[i]] = h;
00089       // decrease h for next iteration
00090       if (h > 0) {
00091         h--;
00092       }
00093     }
00094   }
00095 };
00096 
00097 } // namespace suffix_array
00098 } // namespace strings
00099 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines