Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_STRINGS_SEARCH_KMP_KMP 00002 #define H_STRINGS_SEARCH_KMP_KMP 00003 00004 #pragma GCC diagnostic warning "-Wsign-compare" 00005 00006 #include "utils/preconditions/preconditions.h" 00007 #include <vector> 00008 #include <iterator> 00009 #include <algorithm> 00010 #include "strings/search_callback/search_callback.h" 00011 #include "utils/macros/unused.h" 00012 #include <stdio.h> 00019 namespace strings { 00020 namespace search { 00021 00022 class KMP { 00023 private: 00024 template <typename _PatternIterator, typename IndexType> 00025 static void prepare(_PatternIterator pattern_first, _PatternIterator pattern_last, 00026 std::vector<IndexType > *out_) 00027 { 00028 // typedefs 00029 typedef typename std::iterator_traits<_PatternIterator>::difference_type LengthType; 00030 typedef typename std::vector<IndexType>::size_type VectorSizeType; 00031 // preconditions 00032 LengthType pattern_length = pattern_last - pattern_first; 00033 Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?"); 00034 Preconditions::check(std::numeric_limits<VectorSizeType>::max() >= pattern_length); 00035 // for unsigned types, we want to reserve max() as "-1" 00036 Preconditions::check(std::numeric_limits<IndexType>::max() > pattern_length); 00037 Preconditions::checkNotNull(out_); 00038 // algorithm 00039 std::vector<IndexType> &out = *out_; 00040 out.clear(); 00041 out.resize(pattern_length); 00042 const IndexType MINUS_ONE = (IndexType) -1; 00043 out[0] = MINUS_ONE; 00044 if (pattern_length == 1) { 00045 return; // special case! 00046 } 00047 00048 IndexType pos = MINUS_ONE; 00049 for (LengthType i = 1; i < pattern_length; i++) { 00050 _PatternIterator it_current = pattern_first + i; 00051 while ((pos != MINUS_ONE) && 00052 (*(pattern_first + (pos + 1)) != *it_current)) { 00053 pos = out[pos]; // go with backlink 00054 } 00055 if (*(pattern_first + (pos +1) ) == *it_current) { 00056 pos++; 00057 } 00058 out[i] = pos; 00059 } 00060 } 00061 00062 template <typename _Iterator, typename _PatternIterator, typename IndexType> 00063 static void search( 00064 _Iterator first, 00065 _Iterator last, 00066 _PatternIterator pattern_first, 00067 _PatternIterator pattern_last, 00068 std::vector<IndexType>& data, 00069 strings::search_callback::SearchCallback<_Iterator>* callback 00070 ) { 00071 // typedefs 00072 typedef typename std::iterator_traits<_PatternIterator>::difference_type PatternLengthType; 00073 typedef typename std::iterator_traits<_Iterator>::difference_type LengthType; 00074 typedef typename std::vector<IndexType>::size_type VectorSizeType; 00075 // lengths 00076 PatternLengthType pattern_length = pattern_last - pattern_first; 00077 LengthType length = last - first; 00078 // preconditions 00079 Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?"); 00080 Preconditions::check(length > 0, "wrong order of [first, last) ?"); 00081 Preconditions::check(std::numeric_limits<VectorSizeType>::max() >= pattern_length); 00082 Preconditions::check(std::numeric_limits<IndexType>::max() > pattern_length); 00083 // algorithm 00084 const IndexType MINUS_ONE = (IndexType) -1; 00085 00086 IndexType pos = MINUS_ONE; 00087 for (LengthType i = 0; i < length; i++) { 00088 _Iterator it_current = first + i; 00089 while ((pos != MINUS_ONE) && 00090 (*(pattern_first + (pos + 1)) != *it_current)) { 00091 pos = data[pos]; // go with backlink 00092 } 00093 if (*(pattern_first + (pos + 1) ) == *it_current) { 00094 pos++; 00095 } 00096 if (pos == pattern_length - 1) { 00097 callback->foundMatch(first + (i - pattern_length + 1)); 00098 pos = data[pos]; // go with backlink 00099 } 00100 } 00101 00102 } 00103 00104 public: 00105 template <typename _Iterator, typename _PatternIterator> 00106 static void search( 00107 _Iterator first, 00108 _Iterator last, 00109 _PatternIterator pattern_first, 00110 _PatternIterator pattern_last, 00111 strings::search_callback::SearchCallback<_Iterator>* callback 00112 ) { 00113 Preconditions::check(last - first >= 0); 00114 Preconditions::check(pattern_last - pattern_first >= 0); 00115 // nothing to do 00116 // note that both sides are positive and thus there is no problem 00117 // if automatic cast will occur 00118 if (last - first < pattern_last - pattern_first) { 00119 return; 00120 } 00121 vector<int> data; 00122 prepare(pattern_first, pattern_last, &data); 00123 search(first, last, pattern_first, pattern_last, data, callback); 00124 } 00125 }; 00126 00127 } // namespace suffix_array 00128 } // namespace strings 00129 #endif