Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
src/strings/search_rabin_karp/rabin_karp.h
Go to the documentation of this file.
00001 #ifndef H_STRINGS_SEARCH_RABIN_KARP_RABIN_KARP
00002 #define H_STRINGS_SEARCH_RABIN_KARP_RABIN_KARP
00003 
00006 #include "utils/preconditions/preconditions.h"
00007 #include <vector>
00008 #include <iterator>
00009 #include <algorithm>
00010 #include "strings/suffix_array_naive/sort_helper.h"
00011 #include "strings/search_callback/search_callback.h"
00012 #include "utils/macros/unused.h"
00013 #include <stdio.h>
00014 #include "rolling_hash.h"
00022 namespace strings {
00023 namespace search {
00024 
00025 class RabinKarp {
00026  private:
00027    template<typename _Iterator, typename _PatternIterator>
00028    static bool checkMatch(_Iterator start, _PatternIterator pattern_first, _PatternIterator
00029        pattern_last)
00030    {
00031      _Iterator it_text = start;
00032      _PatternIterator it_pat = pattern_first;
00033      while (it_pat != pattern_last) {
00034        if (*it_text != *it_pat) {
00035          return false;
00036        }
00037        ++it_text;
00038        ++it_pat;
00039      }
00040     return true;
00041    }
00042 
00043  public:
00044   template <typename _Iterator, typename _PatternIterator>
00045   static void search(
00046       _Iterator first,
00047       _Iterator last, 
00048       _PatternIterator pattern_first,
00049       _PatternIterator pattern_last,
00050       bool checkFalsePositives,
00051       strings::search_callback::SearchCallback<_Iterator>* callback 
00052       ) {
00053     /*
00054     STATIC_ASSERT(std::iterator_traits<_Iterator>::value_type ==
00055                   std::iterator_traits<_PatternIterator>::value_type,
00056                   "text and pattern should be of same type");
00057     STATIC_ASSERT(std::iterator_traits<_Iterator>::difference_type ==
00058                   std::iterator_traits<_PatternIterator>::value_type,
00059       "Iterator difference types are different!");
00060     */
00061     typedef typename std::iterator_traits<_PatternIterator>::value_type ValueType;
00062     typedef typename std::iterator_traits<_PatternIterator>::difference_type SizeType;
00063     SizeType pattern_length = pattern_last - pattern_first;
00064     SizeType text_length = last - first;
00065     Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?");
00066     Preconditions::check(text_length > 0, "wrong order of [first, last) ?");
00067     if (text_length < pattern_length) {
00068       return; // nothing to do
00069     }
00070 
00071     typedef long long int HashValue;
00072     // Empirically determined good values causing no collisions on real testdata
00073     const HashValue GOOD_PRIME = 2038074743;
00074     const HashValue GOOD_C = 101; // is a group generator
00075     RollingHash<HashValue> hashFunction = RollingHash<HashValue>(pattern_length, GOOD_PRIME, GOOD_C);
00076     
00077     // compute hash value of the pattern
00078     _PatternIterator pattern_it = pattern_first;
00079     while (pattern_it != pattern_last) {
00080       hashFunction.roll(*pattern_it, (ValueType) 0);
00081       ++pattern_it;
00082     }
00083 
00084     const HashValue patternHash = hashFunction.getHash();
00085 
00086     hashFunction = RollingHash<HashValue>(pattern_length, GOOD_PRIME, GOOD_C);
00087     _Iterator it_new = first;
00088     for (SizeType i = 0; i < pattern_length; i++) {
00089       hashFunction.roll(*it_new, (ValueType) 0);
00090       ++it_new;
00091     }
00092 
00093     _Iterator it_old = first;
00094     while (it_new != last) {
00095       if (patternHash == hashFunction.getHash()) {
00096         if (checkFalsePositives && !checkMatch(it_old, pattern_first,
00097               pattern_last)) {
00098           // report an mismatch
00099         } else {
00100           callback->foundMatch(it_old);
00101         }
00102       }
00103 
00104       hashFunction.roll(*it_new, *it_old);
00105       ++it_new;
00106       ++it_old;
00107     }
00108 
00109   }
00110 };
00111 
00112 } // namespace suffix_array
00113 } // namespace strings
00114 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines