From 98493816255fa8f83be7fb082980608af7e3a19b Mon Sep 17 00:00:00 2001 From: ~Chiluka Akshitha <22wh1a12b5@bvrithyderabad.edu.in> Date: Mon, 7 Oct 2024 19:20:51 +0530 Subject: [PATCH] added all pattern algorithms --- .../Pattern Search/aho_corasick.py | 78 +++++++++++++++++++ .../Pattern Search/bitap_algorithm.py | 33 ++++++++ .../Pattern Search/boyer_moore.py | 49 ++++++++++++ .../Pattern Search/kmp_pattern_search.py | 59 ++++++++++++++ .../Pattern Search/naive_pattern_search.py | 29 +++++++ .../Pattern Search/rabin_karp.py | 46 +++++++++++ .../Pattern Search/suffix_array.py | 52 +++++++++++++ 7 files changed, 346 insertions(+) create mode 100644 Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py create mode 100644 Algorithms_and_Data_Structures/Pattern Search/suffix_array.py diff --git a/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py b/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py new file mode 100644 index 0000000000..b358a57af7 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py @@ -0,0 +1,78 @@ +# aho_corasick.py + +class AhoCorasick: + def __init__(self): + self.num_nodes = 1 + self.edges = [{}] + self.fail = [-1] + self.output = [[]] + + def add_word(self, word, index): + """ + Adds a word to the Trie structure. + + Parameters: + word (str): The word to add. + index (int): The index of the word for output. + """ + current_node = 0 + for char in word: + if char not in self.edges[current_node]: + self.edges[current_node][char] = self.num_nodes + self.edges.append({}) + self.fail.append(-1) + self.output.append([]) + self.num_nodes += 1 + current_node = self.edges[current_node][char] + self.output[current_node].append(index) + + def build(self): + """ + Constructs the failure links for the Trie structure. + """ + from collections import deque + queue = deque() + for char in self.edges[0]: + child_node = self.edges[0][char] + self.fail[child_node] = 0 + queue.append(child_node) + + while queue: + current_node = queue.popleft() + for char in self.edges[current_node]: + child_node = self.edges[current_node][char] + queue.append(child_node) + fallback_node = self.fail[current_node] + while fallback_node != -1 and char not in self.edges[fallback_node]: + fallback_node = self.fail[fallback_node] + self.fail[child_node] = self.edges[fallback_node].get(char, 0) + self.output[child_node].extend(self.output[self.fail[child_node]]) + + def search(self, text): + """ + Searches for patterns in the given text using the Aho-Corasick algorithm. + + Parameters: + text (str): The text to search for patterns. + + Prints the starting index of each found pattern. + """ + current_node = 0 + for i in range(len(text)): + while current_node != -1 and text[i] not in self.edges[current_node]: + current_node = self.fail[current_node] + if current_node == -1: + current_node = 0 + continue + current_node = self.edges[current_node][text[i]] + for pattern_index in self.output[current_node]: + print(f"Pattern found at index {i}") + +# Example usage +if __name__ == "__main__": + ac = AhoCorasick() + patterns = ["he", "she", "his", "hers"] + for index, pattern in enumerate(patterns): + ac.add_word(pattern, index) + ac.build() + ac.search("ushers") diff --git a/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py b/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py new file mode 100644 index 0000000000..9deb4f5850 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py @@ -0,0 +1,33 @@ +# bitap_algorithm.py + +def bitap_search(text, pattern): + """ + Bitap algorithm (also known as Shift-Or algorithm) for pattern searching. + This function finds all occurrences of 'pattern' in 'text' using bitwise operations. + + Parameters: + text (str): The text in which to search for the pattern. + pattern (str): The pattern to search for. + + Prints the starting index of each occurrence of the pattern. + """ + m = len(pattern) + if m == 0: + return + all_ones = (1 << len(text)) - 1 + R = [0] * (m + 1) + for i in range(m): + R[i] = all_ones << i + + for i in range(len(text)): + for j in range(m): + if text[i] == pattern[m - 1 - j]: + R[j] = R[j] | (1 << i) + else: + R[j] = R[j] & ~(1 << i) + if R[m - 1] & (1 << i): + print(f"Pattern found at index {i - m + 1}") + +# Example usage +if __name__ == "__main__": + bitap_search("abcabcabc", "abc") diff --git a/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py b/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py new file mode 100644 index 0000000000..7276908ff5 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py @@ -0,0 +1,49 @@ +# boyer_moore.py + +def bad_character_heuristic(pattern): + """ + Preprocesses the pattern to create the bad character table. + + Parameters: + pattern (str): The pattern to preprocess. + + Returns: + dict: A dictionary mapping characters to their last occurrence index. + """ + bad_char = {} + for i in range(len(pattern)): + bad_char[pattern[i]] = i + return bad_char + +def boyer_moore(text, pattern): + """ + Boyer-Moore algorithm for pattern searching. + This function finds all occurrences of 'pattern' in 'text' + using the Boyer-Moore algorithm, which skips sections of the text. + + Parameters: + text (str): The text in which to search for the pattern. + pattern (str): The pattern to search for. + + Prints the starting index of each occurrence of the pattern. + """ + bad_char = bad_character_heuristic(pattern) + m = len(pattern) + n = len(text) + s = 0 # Shift of the pattern with respect to text + + while s <= n - m: + j = m - 1 + + while j >= 0 and pattern[j] == text[s + j]: + j -= 1 + + if j < 0: + print(f"Pattern found at index {s}") + s += (m - bad_char.get(text[s + m], -1)) if s + m < n else 1 + else: + s += max(1, j - bad_char.get(text[s + j], -1)) + +# Example usage +if __name__ == "__main__": + boyer_moore("ababcabcab", "abc") diff --git a/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py b/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py new file mode 100644 index 0000000000..3fb5605302 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py @@ -0,0 +1,59 @@ +# kmp_pattern_search.py + +def kmp_pattern_search(text, pattern): + """ + Knuth-Morris-Pratt (KMP) algorithm for pattern searching. + This function finds all occurrences of 'pattern' in 'text' + using the KMP algorithm, which preprocesses the pattern for efficient searching. + + Parameters: + text (str): The text in which to search for the pattern. + pattern (str): The pattern to search for. + + Prints the starting index of each occurrence of the pattern. + """ + def compute_lps(pattern): + """ + Computes the Longest Prefix Suffix (LPS) array for the pattern. + + Parameters: + pattern (str): The pattern to preprocess. + + Returns: + list: The LPS array. + """ + lps = [0] * len(pattern) + length = 0 + i = 1 + while i < len(pattern): + if pattern[i] == pattern[length]: + length += 1 + lps[i] = length + i += 1 + else: + if length != 0: + length = lps[length - 1] + else: + lps[i] = 0 + i += 1 + return lps + + lps = compute_lps(pattern) + i = j = 0 # Index for text and pattern + while i < len(text): + if text[i] == pattern[j]: + i += 1 + j += 1 + + if j == len(pattern): + print(f"Pattern found at index {i - j}") + j = lps[j - 1] + elif i < len(text) and text[i] != pattern[j]: + if j != 0: + j = lps[j - 1] + else: + i += 1 + +# Example usage +if __name__ == "__main__": + kmp_pattern_search("ababcabcab", "abc") diff --git a/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py b/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py new file mode 100644 index 0000000000..aa23603478 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py @@ -0,0 +1,29 @@ +# naive_pattern_search.py + +def naive_pattern_search(text, pattern): + """ + Naive pattern search algorithm. + This function searches for all occurrences of 'pattern' in 'text' + by checking each position. + + Parameters: + text (str): The text in which to search for the pattern. + pattern (str): The pattern to search for. + + Prints the starting index of each occurrence of the pattern. + """ + n = len(text) + m = len(pattern) + + for i in range(n - m + 1): + match = True + for j in range(m): + if text[i + j] != pattern[j]: + match = False + break + if match: + print(f"Pattern found at index {i}") + +# Example usage +if __name__ == "__main__": + naive_pattern_search("ababcabcab", "abc") diff --git a/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py b/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py new file mode 100644 index 0000000000..cbdda45140 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py @@ -0,0 +1,46 @@ +# rabin_karp.py + +def rabin_karp(text, pattern): + """ + Rabin-Karp algorithm for pattern searching. + This function finds all occurrences of 'pattern' in 'text' + using a hashing technique. + + Parameters: + text (str): The text in which to search for the pattern. + pattern (str): The pattern to search for. + + Prints the starting index of each occurrence of the pattern. + """ + d = 256 # Number of characters in the input alphabet + q = 101 # A prime number for hashing + m = len(pattern) + n = len(text) + p = 0 # Hash value for pattern + t = 0 # Hash value for text + h = 1 + + # Calculate the value of h + for i in range(m - 1): + h = (h * d) % q + + # Calculate the initial hash values for pattern and text + for i in range(m): + p = (d * p + ord(pattern[i])) % q + t = (d * t + ord(text[i])) % q + + # Slide the pattern over text one by one + for i in range(n - m + 1): + if p == t: # Check for a match + if text[i:i + m] == pattern: + print(f"Pattern found at index {i}") + + if i < n - m: + t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q + # We might get negative value of t, converting it to positive + if t < 0: + t += q + +# Example usage +if __name__ == "__main__": + rabin_karp("ababcabcab", "abc") diff --git a/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py b/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py new file mode 100644 index 0000000000..1e35414072 --- /dev/null +++ b/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py @@ -0,0 +1,52 @@ +# suffix_array.py + +def build_suffix_array(s): + """ + Builds the suffix array for the given string. + + Parameters: + s (str): The input string. + + Returns: + list: The suffix array. + """ + suffixes = sorted([(s[i:], i) for i in range(len(s))]) + return [suffix[1] for suffix in suffixes] + +def kasai_lcp_array(s, suffix_array): + """ + Constructs the LCP (Longest Common Prefix) array. + + Parameters: + s (str): The input string. + suffix_array (list): The suffix array. + + Returns: + list: The LCP array. + """ + n = len(s) + rank = [0] * n + lcp = [0] * n + + for i, suffix_index in enumerate(suffix_array): + rank[suffix_index] = i + + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 + return lcp + +# Example usage +if __name__ == "__main__": + text = "banana" + suffix_array = build_suffix_array(text) + lcp = kasai_lcp_array(text, suffix_array) + + print("Suffix Array:", suffix_array) + print("LCP Array:", lcp)