Skip to content

Commit

Permalink
added all pattern algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
AKSHITHA-CHILUKA committed Oct 7, 2024
1 parent 5b79f2f commit 9849381
Show file tree
Hide file tree
Showing 7 changed files with 346 additions and 0 deletions.
78 changes: 78 additions & 0 deletions Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# aho_corasick.py

class AhoCorasick:
def __init__(self):
self.num_nodes = 1
self.edges = [{}]
self.fail = [-1]
self.output = [[]]

def add_word(self, word, index):
"""
Adds a word to the Trie structure.
Parameters:
word (str): The word to add.
index (int): The index of the word for output.
"""
current_node = 0
for char in word:
if char not in self.edges[current_node]:
self.edges[current_node][char] = self.num_nodes
self.edges.append({})
self.fail.append(-1)
self.output.append([])
self.num_nodes += 1
current_node = self.edges[current_node][char]
self.output[current_node].append(index)

def build(self):
"""
Constructs the failure links for the Trie structure.
"""
from collections import deque
queue = deque()
for char in self.edges[0]:
child_node = self.edges[0][char]
self.fail[child_node] = 0
queue.append(child_node)

while queue:
current_node = queue.popleft()
for char in self.edges[current_node]:
child_node = self.edges[current_node][char]
queue.append(child_node)
fallback_node = self.fail[current_node]
while fallback_node != -1 and char not in self.edges[fallback_node]:
fallback_node = self.fail[fallback_node]
self.fail[child_node] = self.edges[fallback_node].get(char, 0)
self.output[child_node].extend(self.output[self.fail[child_node]])

def search(self, text):
"""
Searches for patterns in the given text using the Aho-Corasick algorithm.
Parameters:
text (str): The text to search for patterns.
Prints the starting index of each found pattern.
"""
current_node = 0
for i in range(len(text)):
while current_node != -1 and text[i] not in self.edges[current_node]:
current_node = self.fail[current_node]
if current_node == -1:
current_node = 0
continue
current_node = self.edges[current_node][text[i]]
for pattern_index in self.output[current_node]:
print(f"Pattern found at index {i}")

# Example usage
if __name__ == "__main__":
ac = AhoCorasick()
patterns = ["he", "she", "his", "hers"]
for index, pattern in enumerate(patterns):
ac.add_word(pattern, index)
ac.build()
ac.search("ushers")
33 changes: 33 additions & 0 deletions Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# bitap_algorithm.py

def bitap_search(text, pattern):
"""
Bitap algorithm (also known as Shift-Or algorithm) for pattern searching.
This function finds all occurrences of 'pattern' in 'text' using bitwise operations.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
m = len(pattern)
if m == 0:
return
all_ones = (1 << len(text)) - 1
R = [0] * (m + 1)
for i in range(m):
R[i] = all_ones << i

for i in range(len(text)):
for j in range(m):
if text[i] == pattern[m - 1 - j]:
R[j] = R[j] | (1 << i)
else:
R[j] = R[j] & ~(1 << i)
if R[m - 1] & (1 << i):
print(f"Pattern found at index {i - m + 1}")

# Example usage
if __name__ == "__main__":
bitap_search("abcabcabc", "abc")
49 changes: 49 additions & 0 deletions Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# boyer_moore.py

def bad_character_heuristic(pattern):
"""
Preprocesses the pattern to create the bad character table.
Parameters:
pattern (str): The pattern to preprocess.
Returns:
dict: A dictionary mapping characters to their last occurrence index.
"""
bad_char = {}
for i in range(len(pattern)):
bad_char[pattern[i]] = i
return bad_char

def boyer_moore(text, pattern):
"""
Boyer-Moore algorithm for pattern searching.
This function finds all occurrences of 'pattern' in 'text'
using the Boyer-Moore algorithm, which skips sections of the text.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
bad_char = bad_character_heuristic(pattern)
m = len(pattern)
n = len(text)
s = 0 # Shift of the pattern with respect to text

while s <= n - m:
j = m - 1

while j >= 0 and pattern[j] == text[s + j]:
j -= 1

if j < 0:
print(f"Pattern found at index {s}")
s += (m - bad_char.get(text[s + m], -1)) if s + m < n else 1
else:
s += max(1, j - bad_char.get(text[s + j], -1))

# Example usage
if __name__ == "__main__":
boyer_moore("ababcabcab", "abc")
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# kmp_pattern_search.py

def kmp_pattern_search(text, pattern):
"""
Knuth-Morris-Pratt (KMP) algorithm for pattern searching.
This function finds all occurrences of 'pattern' in 'text'
using the KMP algorithm, which preprocesses the pattern for efficient searching.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
def compute_lps(pattern):
"""
Computes the Longest Prefix Suffix (LPS) array for the pattern.
Parameters:
pattern (str): The pattern to preprocess.
Returns:
list: The LPS array.
"""
lps = [0] * len(pattern)
length = 0
i = 1
while i < len(pattern):
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length - 1]
else:
lps[i] = 0
i += 1
return lps

lps = compute_lps(pattern)
i = j = 0 # Index for text and pattern
while i < len(text):
if text[i] == pattern[j]:
i += 1
j += 1

if j == len(pattern):
print(f"Pattern found at index {i - j}")
j = lps[j - 1]
elif i < len(text) and text[i] != pattern[j]:
if j != 0:
j = lps[j - 1]
else:
i += 1

# Example usage
if __name__ == "__main__":
kmp_pattern_search("ababcabcab", "abc")
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# naive_pattern_search.py

def naive_pattern_search(text, pattern):
"""
Naive pattern search algorithm.
This function searches for all occurrences of 'pattern' in 'text'
by checking each position.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
n = len(text)
m = len(pattern)

for i in range(n - m + 1):
match = True
for j in range(m):
if text[i + j] != pattern[j]:
match = False
break
if match:
print(f"Pattern found at index {i}")

# Example usage
if __name__ == "__main__":
naive_pattern_search("ababcabcab", "abc")
46 changes: 46 additions & 0 deletions Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# rabin_karp.py

def rabin_karp(text, pattern):
"""
Rabin-Karp algorithm for pattern searching.
This function finds all occurrences of 'pattern' in 'text'
using a hashing technique.
Parameters:
text (str): The text in which to search for the pattern.
pattern (str): The pattern to search for.
Prints the starting index of each occurrence of the pattern.
"""
d = 256 # Number of characters in the input alphabet
q = 101 # A prime number for hashing
m = len(pattern)
n = len(text)
p = 0 # Hash value for pattern
t = 0 # Hash value for text
h = 1

# Calculate the value of h
for i in range(m - 1):
h = (h * d) % q

# Calculate the initial hash values for pattern and text
for i in range(m):
p = (d * p + ord(pattern[i])) % q
t = (d * t + ord(text[i])) % q

# Slide the pattern over text one by one
for i in range(n - m + 1):
if p == t: # Check for a match
if text[i:i + m] == pattern:
print(f"Pattern found at index {i}")

if i < n - m:
t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q
# We might get negative value of t, converting it to positive
if t < 0:
t += q

# Example usage
if __name__ == "__main__":
rabin_karp("ababcabcab", "abc")
52 changes: 52 additions & 0 deletions Algorithms_and_Data_Structures/Pattern Search/suffix_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# suffix_array.py

def build_suffix_array(s):
"""
Builds the suffix array for the given string.
Parameters:
s (str): The input string.
Returns:
list: The suffix array.
"""
suffixes = sorted([(s[i:], i) for i in range(len(s))])
return [suffix[1] for suffix in suffixes]

def kasai_lcp_array(s, suffix_array):
"""
Constructs the LCP (Longest Common Prefix) array.
Parameters:
s (str): The input string.
suffix_array (list): The suffix array.
Returns:
list: The LCP array.
"""
n = len(s)
rank = [0] * n
lcp = [0] * n

for i, suffix_index in enumerate(suffix_array):
rank[suffix_index] = i

h = 0
for i in range(n):
if rank[i] > 0:
j = suffix_array[rank[i] - 1]
while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]):
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1
return lcp

# Example usage
if __name__ == "__main__":
text = "banana"
suffix_array = build_suffix_array(text)
lcp = kasai_lcp_array(text, suffix_array)

print("Suffix Array:", suffix_array)
print("LCP Array:", lcp)

0 comments on commit 9849381

Please sign in to comment.