-
Notifications
You must be signed in to change notification settings - Fork 213
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5b79f2f
commit 9849381
Showing
7 changed files
with
346 additions
and
0 deletions.
There are no files selected for viewing
78 changes: 78 additions & 0 deletions
78
Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# aho_corasick.py | ||
|
||
class AhoCorasick: | ||
def __init__(self): | ||
self.num_nodes = 1 | ||
self.edges = [{}] | ||
self.fail = [-1] | ||
self.output = [[]] | ||
|
||
def add_word(self, word, index): | ||
""" | ||
Adds a word to the Trie structure. | ||
Parameters: | ||
word (str): The word to add. | ||
index (int): The index of the word for output. | ||
""" | ||
current_node = 0 | ||
for char in word: | ||
if char not in self.edges[current_node]: | ||
self.edges[current_node][char] = self.num_nodes | ||
self.edges.append({}) | ||
self.fail.append(-1) | ||
self.output.append([]) | ||
self.num_nodes += 1 | ||
current_node = self.edges[current_node][char] | ||
self.output[current_node].append(index) | ||
|
||
def build(self): | ||
""" | ||
Constructs the failure links for the Trie structure. | ||
""" | ||
from collections import deque | ||
queue = deque() | ||
for char in self.edges[0]: | ||
child_node = self.edges[0][char] | ||
self.fail[child_node] = 0 | ||
queue.append(child_node) | ||
|
||
while queue: | ||
current_node = queue.popleft() | ||
for char in self.edges[current_node]: | ||
child_node = self.edges[current_node][char] | ||
queue.append(child_node) | ||
fallback_node = self.fail[current_node] | ||
while fallback_node != -1 and char not in self.edges[fallback_node]: | ||
fallback_node = self.fail[fallback_node] | ||
self.fail[child_node] = self.edges[fallback_node].get(char, 0) | ||
self.output[child_node].extend(self.output[self.fail[child_node]]) | ||
|
||
def search(self, text): | ||
""" | ||
Searches for patterns in the given text using the Aho-Corasick algorithm. | ||
Parameters: | ||
text (str): The text to search for patterns. | ||
Prints the starting index of each found pattern. | ||
""" | ||
current_node = 0 | ||
for i in range(len(text)): | ||
while current_node != -1 and text[i] not in self.edges[current_node]: | ||
current_node = self.fail[current_node] | ||
if current_node == -1: | ||
current_node = 0 | ||
continue | ||
current_node = self.edges[current_node][text[i]] | ||
for pattern_index in self.output[current_node]: | ||
print(f"Pattern found at index {i}") | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
ac = AhoCorasick() | ||
patterns = ["he", "she", "his", "hers"] | ||
for index, pattern in enumerate(patterns): | ||
ac.add_word(pattern, index) | ||
ac.build() | ||
ac.search("ushers") |
33 changes: 33 additions & 0 deletions
33
Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# bitap_algorithm.py | ||
|
||
def bitap_search(text, pattern): | ||
""" | ||
Bitap algorithm (also known as Shift-Or algorithm) for pattern searching. | ||
This function finds all occurrences of 'pattern' in 'text' using bitwise operations. | ||
Parameters: | ||
text (str): The text in which to search for the pattern. | ||
pattern (str): The pattern to search for. | ||
Prints the starting index of each occurrence of the pattern. | ||
""" | ||
m = len(pattern) | ||
if m == 0: | ||
return | ||
all_ones = (1 << len(text)) - 1 | ||
R = [0] * (m + 1) | ||
for i in range(m): | ||
R[i] = all_ones << i | ||
|
||
for i in range(len(text)): | ||
for j in range(m): | ||
if text[i] == pattern[m - 1 - j]: | ||
R[j] = R[j] | (1 << i) | ||
else: | ||
R[j] = R[j] & ~(1 << i) | ||
if R[m - 1] & (1 << i): | ||
print(f"Pattern found at index {i - m + 1}") | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
bitap_search("abcabcabc", "abc") |
49 changes: 49 additions & 0 deletions
49
Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# boyer_moore.py | ||
|
||
def bad_character_heuristic(pattern): | ||
""" | ||
Preprocesses the pattern to create the bad character table. | ||
Parameters: | ||
pattern (str): The pattern to preprocess. | ||
Returns: | ||
dict: A dictionary mapping characters to their last occurrence index. | ||
""" | ||
bad_char = {} | ||
for i in range(len(pattern)): | ||
bad_char[pattern[i]] = i | ||
return bad_char | ||
|
||
def boyer_moore(text, pattern): | ||
""" | ||
Boyer-Moore algorithm for pattern searching. | ||
This function finds all occurrences of 'pattern' in 'text' | ||
using the Boyer-Moore algorithm, which skips sections of the text. | ||
Parameters: | ||
text (str): The text in which to search for the pattern. | ||
pattern (str): The pattern to search for. | ||
Prints the starting index of each occurrence of the pattern. | ||
""" | ||
bad_char = bad_character_heuristic(pattern) | ||
m = len(pattern) | ||
n = len(text) | ||
s = 0 # Shift of the pattern with respect to text | ||
|
||
while s <= n - m: | ||
j = m - 1 | ||
|
||
while j >= 0 and pattern[j] == text[s + j]: | ||
j -= 1 | ||
|
||
if j < 0: | ||
print(f"Pattern found at index {s}") | ||
s += (m - bad_char.get(text[s + m], -1)) if s + m < n else 1 | ||
else: | ||
s += max(1, j - bad_char.get(text[s + j], -1)) | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
boyer_moore("ababcabcab", "abc") |
59 changes: 59 additions & 0 deletions
59
Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# kmp_pattern_search.py | ||
|
||
def kmp_pattern_search(text, pattern): | ||
""" | ||
Knuth-Morris-Pratt (KMP) algorithm for pattern searching. | ||
This function finds all occurrences of 'pattern' in 'text' | ||
using the KMP algorithm, which preprocesses the pattern for efficient searching. | ||
Parameters: | ||
text (str): The text in which to search for the pattern. | ||
pattern (str): The pattern to search for. | ||
Prints the starting index of each occurrence of the pattern. | ||
""" | ||
def compute_lps(pattern): | ||
""" | ||
Computes the Longest Prefix Suffix (LPS) array for the pattern. | ||
Parameters: | ||
pattern (str): The pattern to preprocess. | ||
Returns: | ||
list: The LPS array. | ||
""" | ||
lps = [0] * len(pattern) | ||
length = 0 | ||
i = 1 | ||
while i < len(pattern): | ||
if pattern[i] == pattern[length]: | ||
length += 1 | ||
lps[i] = length | ||
i += 1 | ||
else: | ||
if length != 0: | ||
length = lps[length - 1] | ||
else: | ||
lps[i] = 0 | ||
i += 1 | ||
return lps | ||
|
||
lps = compute_lps(pattern) | ||
i = j = 0 # Index for text and pattern | ||
while i < len(text): | ||
if text[i] == pattern[j]: | ||
i += 1 | ||
j += 1 | ||
|
||
if j == len(pattern): | ||
print(f"Pattern found at index {i - j}") | ||
j = lps[j - 1] | ||
elif i < len(text) and text[i] != pattern[j]: | ||
if j != 0: | ||
j = lps[j - 1] | ||
else: | ||
i += 1 | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
kmp_pattern_search("ababcabcab", "abc") |
29 changes: 29 additions & 0 deletions
29
Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# naive_pattern_search.py | ||
|
||
def naive_pattern_search(text, pattern): | ||
""" | ||
Naive pattern search algorithm. | ||
This function searches for all occurrences of 'pattern' in 'text' | ||
by checking each position. | ||
Parameters: | ||
text (str): The text in which to search for the pattern. | ||
pattern (str): The pattern to search for. | ||
Prints the starting index of each occurrence of the pattern. | ||
""" | ||
n = len(text) | ||
m = len(pattern) | ||
|
||
for i in range(n - m + 1): | ||
match = True | ||
for j in range(m): | ||
if text[i + j] != pattern[j]: | ||
match = False | ||
break | ||
if match: | ||
print(f"Pattern found at index {i}") | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
naive_pattern_search("ababcabcab", "abc") |
46 changes: 46 additions & 0 deletions
46
Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# rabin_karp.py | ||
|
||
def rabin_karp(text, pattern): | ||
""" | ||
Rabin-Karp algorithm for pattern searching. | ||
This function finds all occurrences of 'pattern' in 'text' | ||
using a hashing technique. | ||
Parameters: | ||
text (str): The text in which to search for the pattern. | ||
pattern (str): The pattern to search for. | ||
Prints the starting index of each occurrence of the pattern. | ||
""" | ||
d = 256 # Number of characters in the input alphabet | ||
q = 101 # A prime number for hashing | ||
m = len(pattern) | ||
n = len(text) | ||
p = 0 # Hash value for pattern | ||
t = 0 # Hash value for text | ||
h = 1 | ||
|
||
# Calculate the value of h | ||
for i in range(m - 1): | ||
h = (h * d) % q | ||
|
||
# Calculate the initial hash values for pattern and text | ||
for i in range(m): | ||
p = (d * p + ord(pattern[i])) % q | ||
t = (d * t + ord(text[i])) % q | ||
|
||
# Slide the pattern over text one by one | ||
for i in range(n - m + 1): | ||
if p == t: # Check for a match | ||
if text[i:i + m] == pattern: | ||
print(f"Pattern found at index {i}") | ||
|
||
if i < n - m: | ||
t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q | ||
# We might get negative value of t, converting it to positive | ||
if t < 0: | ||
t += q | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
rabin_karp("ababcabcab", "abc") |
52 changes: 52 additions & 0 deletions
52
Algorithms_and_Data_Structures/Pattern Search/suffix_array.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# suffix_array.py | ||
|
||
def build_suffix_array(s): | ||
""" | ||
Builds the suffix array for the given string. | ||
Parameters: | ||
s (str): The input string. | ||
Returns: | ||
list: The suffix array. | ||
""" | ||
suffixes = sorted([(s[i:], i) for i in range(len(s))]) | ||
return [suffix[1] for suffix in suffixes] | ||
|
||
def kasai_lcp_array(s, suffix_array): | ||
""" | ||
Constructs the LCP (Longest Common Prefix) array. | ||
Parameters: | ||
s (str): The input string. | ||
suffix_array (list): The suffix array. | ||
Returns: | ||
list: The LCP array. | ||
""" | ||
n = len(s) | ||
rank = [0] * n | ||
lcp = [0] * n | ||
|
||
for i, suffix_index in enumerate(suffix_array): | ||
rank[suffix_index] = i | ||
|
||
h = 0 | ||
for i in range(n): | ||
if rank[i] > 0: | ||
j = suffix_array[rank[i] - 1] | ||
while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): | ||
h += 1 | ||
lcp[rank[i]] = h | ||
if h > 0: | ||
h -= 1 | ||
return lcp | ||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
text = "banana" | ||
suffix_array = build_suffix_array(text) | ||
lcp = kasai_lcp_array(text, suffix_array) | ||
|
||
print("Suffix Array:", suffix_array) | ||
print("LCP Array:", lcp) |