rabin karp algorithm
class Encoder:
def __init__(self, string, to_show, warning: int = None, mod_value=11):
self.control = warning if warning is not None else to_show
self.range = [0, to_show]
self.hashed = 0
self.container = []
"""to avoid large value not needed in python (but consumes large space if ignored)"""
self.mod_value = mod_value
self.encode_them(string)
def encode_them(self, string):
self.container = [ord(_) for _ in string]
length = self.range[1] - self.range[0]
for i in range(length):
self.hashed += self.container[i] * (10 ** ((length - i - 1) % self.mod_value))
def shift(self):
if self.range[1] >= self.control:
return ""
length = self.range[1] - self.range[0]
self.hashed -= self.container[self.range[0]] * (10 ** ((length - 1) % self.mod_value))
self.hashed *= 10
self.hashed += self.container[self.range[1]]
self.range[0], self.range[1] = self.range[0] + 1, self.range[1] + 1
def rabin_karp_match(main_string, sub_string, mod_value=11):
main_length, sub_length = len(main_string), len(sub_string)
if sub_string == 0:
return 0
if sub_length > main_length:return -1
main, sub = Encoder(main_string, sub_length, main_length, mod_value), Encoder(sub_string, sub_length, warning=None,
mod_value=mod_value)
matches = [0]
for i in range(main_length - sub_length + 1):
if main.hashed == sub.hashed:
if main_string[i: i + sub_length] == sub_string:
matches[0] += 1
matches.append(i)
main.shift()
return tuple(matches)
print(rabin_karp_match("Rem-Rem", "Rem", 2))
print(rabin_karp_match("AYA-AYA", "AYA"))
print(rabin_karp_match("JOJO", "JO"))
print(rabin_karp_match("aaaa", "b"))
print(rabin_karp_match("sd",""))