Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_STRINGS_SUFFIX_ARRAY_LCP_MANZINI 00002 #define H_STRINGS_SUFFIX_ARRAY_LCP_MANZINI 00003 00010 #include "utils/branch_predict/branch_predict.h" 00011 #include "utils/preconditions/preconditions.h" 00012 #include "utils/macros/evil_constructors.h" 00013 #include "debug/ppdebug.h" 00014 00015 namespace strings { 00016 namespace suffix_array { 00017 00018 class LCPManzini { 00019 00020 template<typename _Iterator> 00021 static void compute_counts(_Iterator first, _Iterator last, int alphabet_size, 00022 std::vector<int>* out) 00023 { 00024 // Check preconditions 00025 Preconditions::check(alphabet_size > 0); 00026 _Iterator it = first; 00027 while (it != last) { 00028 Preconditions::checkRange((int)*it, alphabet_size); 00029 ++it; 00030 } 00031 // Do the real work 00032 out->clear(); 00033 out->resize(alphabet_size + 1); // Note: +1 is mandatory! 00034 it = first; 00035 while (it != last) { 00036 (*out)[1 + (*it)]++; 00037 ++it; 00038 } 00039 00040 for (int i = 1; i <= alphabet_size; i++) { 00041 (*out)[i] += (*out)[i - 1]; 00042 } 00043 out->pop_back(); // remove last element, which is not needed 00044 // and we stored it only for convenience 00045 00046 } 00047 00061 // Pseudocode taken from [ltLCP] 00062 // Procedure Sa2RankNext(rank next) 00063 // 1. j = count[t[n]]++; 00064 // 2. rank_next[j]=0; 00065 // 3. for(i=1;i<=n;i++) { 00066 // 4. if(sa[i]==1) 00067 // 5. eos_pos = i; 00068 // 6. else { 00069 // 7. c = t[sa[i]-1]; 00070 // 8. j = count[c]++; 00071 // 9. rank_next[j]=i; 00072 // 10. } 00073 // 11. } 00074 // 12. return eos_pos; 00075 template<typename _Iterator, typename _SAIterator> 00076 static int compute_rank_next(_Iterator seq_first, _Iterator seq_last, 00077 _SAIterator sa_first, _SAIterator sa_last, int alphabet_size, 00078 std::vector<int> *rank_next) 00079 { 00080 Preconditions::check((seq_last - seq_first) == (sa_last - sa_first), 00081 "Sequence and suffix array lengths does not match"); 00082 Preconditions::check(seq_last - seq_first > 0, 00083 "Can't compute rank_next on empty sequence"); 00084 00085 std::vector<int> count; 00086 // Warning: this step is not in the pseudocode, but 00087 // if you read paper carefully, you will know that 00088 // count should be initialized 00089 compute_counts(seq_first, seq_last, alphabet_size, &count); 00090 int n = seq_last - seq_first; 00091 int j = count[*(seq_last - 1)]++; 00092 int eos_pos = - 1; // undefined 00093 rank_next->clear(); 00094 rank_next->resize(seq_last - seq_first); 00095 00096 (*rank_next)[j] = -1; // marker 00097 for (int i = 0; i < n; i++) { 00098 if (UNLIKELY(*(sa_first + i) == 0)) { 00099 eos_pos = i; 00100 } else { 00101 int tmp = *(sa_first + i) - 1; 00102 j = count[*(seq_first + tmp)]++; 00103 (*rank_next)[j] = i; 00104 } 00105 } 00106 assert(eos_pos != -1); 00107 return eos_pos; 00108 } 00109 00110 // Procedure Lcp9 00111 // 1. k = Sa2RankNext(lcp); 00112 // 2. h=0; 00113 // 3. for(i=1;i<=n;i++) { 00114 // 4. nextk = lcp[k]; 00115 // 5. if(k==1) lcp[k]=-1; 00116 // 6. else { 00117 // 7. j = sa[k-1]; 00118 // 8. while(i+h<=n && j+h<=n && t[i+h]==t[j+h]) 00119 // 9. h++; 00120 // 10. lcp[k] = h; 00121 // 11. } 00122 // 12. if(h>0) h--; 00123 // 13. k=nextk; 00124 // 14. } 00125 public: 00126 template<typename _Iterator, typename _SAIterator> 00127 static void getHeightArray(_Iterator seq_first, _Iterator seq_last, 00128 _SAIterator sa_first, _SAIterator sa_last, int alphabet_size, 00129 std::vector<int> *out) 00130 { 00131 Preconditions::check((seq_last - seq_first) == (sa_last - sa_first)); 00132 00133 std::vector<int>& lcp(*out); // shorthand 00134 int current = compute_rank_next(seq_first, seq_last, 00135 sa_first, sa_last, alphabet_size, out); 00136 int h = 0; 00137 int n = seq_last - seq_first; 00138 for (int i = 0; i < n; i++) { 00139 assert(current >= 0); // current is not a marker 00140 assert(i == *(sa_first + current)); // current == rank[i] 00141 int next = lcp[current]; 00142 // skip the rank[i]==0 00143 if (current != 0) { 00144 int j = *(sa_first + (current - 1)); 00145 _Iterator ita = seq_first + (i + h); 00146 _Iterator itb = seq_first + (j + h); 00147 while ((ita != seq_last) && (itb != seq_last) && 00148 (*ita == *itb)) { 00149 ++ita; 00150 ++itb; 00151 ++h; 00152 } 00153 lcp[current] = h; 00154 if (h > 0) { 00155 h--; 00156 } 00157 } 00158 // rank[i+1] = rankNext[rank[i]] 00159 current = next; 00160 } 00161 assert(current == -1); // marker 00162 } 00163 00164 private: 00165 DISALLOW_EVIL_CONSTRUCTORS(LCPManzini); 00166 }; 00167 00168 } // namespace suffix_array 00169 } // namespace strings 00170 00171 #endif