Effective implementation of algorithms (Master Thesis)
Effective and error-free implementation of algorithms
|
00001 #ifndef H_STRINGS_SEARCH_RABIN_KARP_RABIN_KARP 00002 #define H_STRINGS_SEARCH_RABIN_KARP_RABIN_KARP 00003 00006 #include "utils/preconditions/preconditions.h" 00007 #include <vector> 00008 #include <iterator> 00009 #include <algorithm> 00010 #include "strings/suffix_array_naive/sort_helper.h" 00011 #include "strings/search_callback/search_callback.h" 00012 #include "utils/macros/unused.h" 00013 #include <stdio.h> 00014 #include "rolling_hash.h" 00022 namespace strings { 00023 namespace search { 00024 00025 class RabinKarp { 00026 private: 00027 template<typename _Iterator, typename _PatternIterator> 00028 static bool checkMatch(_Iterator start, _PatternIterator pattern_first, _PatternIterator 00029 pattern_last) 00030 { 00031 _Iterator it_text = start; 00032 _PatternIterator it_pat = pattern_first; 00033 while (it_pat != pattern_last) { 00034 if (*it_text != *it_pat) { 00035 return false; 00036 } 00037 ++it_text; 00038 ++it_pat; 00039 } 00040 return true; 00041 } 00042 00043 public: 00044 template <typename _Iterator, typename _PatternIterator> 00045 static void search( 00046 _Iterator first, 00047 _Iterator last, 00048 _PatternIterator pattern_first, 00049 _PatternIterator pattern_last, 00050 bool checkFalsePositives, 00051 strings::search_callback::SearchCallback<_Iterator>* callback 00052 ) { 00053 /* 00054 STATIC_ASSERT(std::iterator_traits<_Iterator>::value_type == 00055 std::iterator_traits<_PatternIterator>::value_type, 00056 "text and pattern should be of same type"); 00057 STATIC_ASSERT(std::iterator_traits<_Iterator>::difference_type == 00058 std::iterator_traits<_PatternIterator>::value_type, 00059 "Iterator difference types are different!"); 00060 */ 00061 typedef typename std::iterator_traits<_PatternIterator>::value_type ValueType; 00062 typedef typename std::iterator_traits<_PatternIterator>::difference_type SizeType; 00063 SizeType pattern_length = pattern_last - pattern_first; 00064 SizeType text_length = last - first; 00065 Preconditions::check(pattern_length > 0, "wrong order of [first, last) ?"); 00066 Preconditions::check(text_length > 0, "wrong order of [first, last) ?"); 00067 if (text_length < pattern_length) { 00068 return; // nothing to do 00069 } 00070 00071 typedef long long int HashValue; 00072 // Empirically determined good values causing no collisions on real testdata 00073 const HashValue GOOD_PRIME = 2038074743; 00074 const HashValue GOOD_C = 101; // is a group generator 00075 RollingHash<HashValue> hashFunction = RollingHash<HashValue>(pattern_length, GOOD_PRIME, GOOD_C); 00076 00077 // compute hash value of the pattern 00078 _PatternIterator pattern_it = pattern_first; 00079 while (pattern_it != pattern_last) { 00080 hashFunction.roll(*pattern_it, (ValueType) 0); 00081 ++pattern_it; 00082 } 00083 00084 const HashValue patternHash = hashFunction.getHash(); 00085 00086 hashFunction = RollingHash<HashValue>(pattern_length, GOOD_PRIME, GOOD_C); 00087 _Iterator it_new = first; 00088 for (SizeType i = 0; i < pattern_length; i++) { 00089 hashFunction.roll(*it_new, (ValueType) 0); 00090 ++it_new; 00091 } 00092 00093 _Iterator it_old = first; 00094 while (it_new != last) { 00095 if (patternHash == hashFunction.getHash()) { 00096 if (checkFalsePositives && !checkMatch(it_old, pattern_first, 00097 pattern_last)) { 00098 // report an mismatch 00099 } else { 00100 callback->foundMatch(it_old); 00101 } 00102 } 00103 00104 hashFunction.roll(*it_new, *it_old); 00105 ++it_new; 00106 ++it_old; 00107 } 00108 00109 } 00110 }; 00111 00112 } // namespace suffix_array 00113 } // namespace strings 00114 #endif