Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/search_kmp/kmp.h
Go to the documentation of this file.
00001 #ifndef H_STRINGS_SEARCH_KMP_KMP
00002 #define H_STRINGS_SEARCH_KMP_KMP
00003 
00004 #pragma GCC diagnostic warning "-Wsign-compare"
00005 
00006 #include "utils/preconditions/preconditions.h"
00007 #include <vector>
00008 #include <iterator>
00009 #include <algorithm>
00010 #include "strings/search_callback/search_callback.h"
00011 #include "utils/macros/unused.h"
00012 #include <stdio.h>
00019 namespace strings {
00020 namespace search {
00021 
00022 class KMP {
00023  private:
00024   template <typename _PatternIterator, typename IndexType>
00025   static void prepare(_PatternIterator pattern_first, _PatternIterator pattern_last,
00026       std::vector<IndexType > *out_)
00027   {
00028     // typedefs
00029     typedef typename std::iterator_traits<_PatternIterator>::difference_type LengthType;
00030     typedef typename std::vector<IndexType>::size_type VectorSizeType;
00031     // preconditions
00032     LengthType pattern_length = pattern_last - pattern_first;
00033     Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?");
00034     Preconditions::check(std::numeric_limits<VectorSizeType>::max() >= pattern_length);
00035     // for unsigned types, we want to reserve max() as "-1"
00036     Preconditions::check(std::numeric_limits<IndexType>::max() > pattern_length);
00037     Preconditions::checkNotNull(out_);
00038     // algorithm
00039     std::vector<IndexType> &out = *out_;
00040     out.clear();
00041     out.resize(pattern_length);
00042     const IndexType MINUS_ONE = (IndexType) -1;
00043     out[0] = MINUS_ONE;
00044     if (pattern_length == 1) {
00045       return; // special case!
00046     }
00047 
00048     IndexType pos = MINUS_ONE;
00049     for (LengthType i = 1; i < pattern_length; i++) {
00050       _PatternIterator it_current = pattern_first + i;
00051       while ((pos != MINUS_ONE) && 
00052              (*(pattern_first + (pos + 1)) != *it_current)) {
00053         pos = out[pos]; // go with backlink
00054       }
00055       if (*(pattern_first + (pos +1) ) == *it_current) {
00056         pos++;
00057       }
00058       out[i] = pos;
00059     }
00060   }
00061 
00062   template <typename _Iterator, typename _PatternIterator, typename IndexType>
00063   static void search(
00064       _Iterator first,
00065       _Iterator last, 
00066       _PatternIterator pattern_first,
00067       _PatternIterator pattern_last,
00068       std::vector<IndexType>& data,
00069       strings::search_callback::SearchCallback<_Iterator>* callback
00070       ) {
00071     // typedefs
00072     typedef typename std::iterator_traits<_PatternIterator>::difference_type PatternLengthType;
00073     typedef typename std::iterator_traits<_Iterator>::difference_type LengthType;
00074     typedef typename std::vector<IndexType>::size_type VectorSizeType;
00075     // lengths
00076     PatternLengthType pattern_length = pattern_last - pattern_first;
00077     LengthType length = last - first;
00078     // preconditions
00079     Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?");
00080     Preconditions::check(length > 0, "wrong order of [first, last) ?");
00081     Preconditions::check(std::numeric_limits<VectorSizeType>::max() >= pattern_length);
00082     Preconditions::check(std::numeric_limits<IndexType>::max() > pattern_length);
00083     // algorithm
00084     const IndexType MINUS_ONE = (IndexType) -1;
00085 
00086     IndexType pos = MINUS_ONE;
00087     for (LengthType i = 0; i < length; i++) {
00088       _Iterator it_current = first + i;
00089       while ((pos != MINUS_ONE) && 
00090              (*(pattern_first + (pos + 1)) != *it_current)) {
00091         pos = data[pos]; // go with backlink
00092       }
00093       if (*(pattern_first + (pos + 1) ) == *it_current) {
00094         pos++;
00095       }
00096       if (pos == pattern_length - 1) {
00097         callback->foundMatch(first + (i - pattern_length + 1));
00098         pos = data[pos]; // go with backlink
00099       }
00100     }
00101 
00102   }
00103 
00104  public:
00105   template <typename _Iterator, typename _PatternIterator>
00106   static void search(
00107       _Iterator first,
00108       _Iterator last, 
00109       _PatternIterator pattern_first,
00110       _PatternIterator pattern_last,
00111       strings::search_callback::SearchCallback<_Iterator>* callback 
00112       ) {
00113     Preconditions::check(last - first >= 0);
00114     Preconditions::check(pattern_last - pattern_first >= 0);
00115     // nothing to do
00116     // note that both sides are positive and thus there is no problem
00117     // if automatic cast will occur
00118     if (last - first < pattern_last - pattern_first) {
00119       return;
00120     }
00121     vector<int> data;
00122     prepare(pattern_first, pattern_last, &data);
00123     search(first, last, pattern_first, pattern_last, data, callback);
00124   }
00125 };
00126 
00127 } // namespace suffix_array
00128 } // namespace strings
00129 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines