From 98493816255fa8f83be7fb082980608af7e3a19b Mon Sep 17 00:00:00 2001
From: ~Chiluka Akshitha <22wh1a12b5@bvrithyderabad.edu.in>
Date: Mon, 7 Oct 2024 19:20:51 +0530
Subject: [PATCH] added all pattern algorithms

---
 .../Pattern Search/aho_corasick.py            | 78 +++++++++++++++++++
 .../Pattern Search/bitap_algorithm.py         | 33 ++++++++
 .../Pattern Search/boyer_moore.py             | 49 ++++++++++++
 .../Pattern Search/kmp_pattern_search.py      | 59 ++++++++++++++
 .../Pattern Search/naive_pattern_search.py    | 29 +++++++
 .../Pattern Search/rabin_karp.py              | 46 +++++++++++
 .../Pattern Search/suffix_array.py            | 52 +++++++++++++
 7 files changed, 346 insertions(+)
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py
 create mode 100644 Algorithms_and_Data_Structures/Pattern Search/suffix_array.py

diff --git a/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py b/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py
new file mode 100644
index 0000000000..b358a57af7
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/aho_corasick.py	
@@ -0,0 +1,78 @@
+# aho_corasick.py
+
+class AhoCorasick:
+    def __init__(self):
+        self.num_nodes = 1
+        self.edges = [{}]
+        self.fail = [-1]
+        self.output = [[]]
+
+    def add_word(self, word, index):
+        """
+        Adds a word to the Trie structure.
+
+        Parameters:
+        word (str): The word to add.
+        index (int): The index of the word for output.
+        """
+        current_node = 0
+        for char in word:
+            if char not in self.edges[current_node]:
+                self.edges[current_node][char] = self.num_nodes
+                self.edges.append({})
+                self.fail.append(-1)
+                self.output.append([])
+                self.num_nodes += 1
+            current_node = self.edges[current_node][char]
+        self.output[current_node].append(index)
+
+    def build(self):
+        """
+        Constructs the failure links for the Trie structure.
+        """
+        from collections import deque
+        queue = deque()
+        for char in self.edges[0]:
+            child_node = self.edges[0][char]
+            self.fail[child_node] = 0
+            queue.append(child_node)
+
+        while queue:
+            current_node = queue.popleft()
+            for char in self.edges[current_node]:
+                child_node = self.edges[current_node][char]
+                queue.append(child_node)
+                fallback_node = self.fail[current_node]
+                while fallback_node != -1 and char not in self.edges[fallback_node]:
+                    fallback_node = self.fail[fallback_node]
+                self.fail[child_node] = self.edges[fallback_node].get(char, 0)
+                self.output[child_node].extend(self.output[self.fail[child_node]])
+
+    def search(self, text):
+        """
+        Searches for patterns in the given text using the Aho-Corasick algorithm.
+
+        Parameters:
+        text (str): The text to search for patterns.
+
+        Prints the starting index of each found pattern.
+        """
+        current_node = 0
+        for i in range(len(text)):
+            while current_node != -1 and text[i] not in self.edges[current_node]:
+                current_node = self.fail[current_node]
+            if current_node == -1:
+                current_node = 0
+                continue
+            current_node = self.edges[current_node][text[i]]
+            for pattern_index in self.output[current_node]:
+                print(f"Pattern found at index {i}")
+
+# Example usage
+if __name__ == "__main__":
+    ac = AhoCorasick()
+    patterns = ["he", "she", "his", "hers"]
+    for index, pattern in enumerate(patterns):
+        ac.add_word(pattern, index)
+    ac.build()
+    ac.search("ushers")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py b/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py
new file mode 100644
index 0000000000..9deb4f5850
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/bitap_algorithm.py	
@@ -0,0 +1,33 @@
+# bitap_algorithm.py
+
+def bitap_search(text, pattern):
+    """
+    Bitap algorithm (also known as Shift-Or algorithm) for pattern searching.
+    This function finds all occurrences of 'pattern' in 'text' using bitwise operations.
+
+    Parameters:
+    text (str): The text in which to search for the pattern.
+    pattern (str): The pattern to search for.
+
+    Prints the starting index of each occurrence of the pattern.
+    """
+    m = len(pattern)
+    if m == 0:
+        return
+    all_ones = (1 << len(text)) - 1
+    R = [0] * (m + 1)
+    for i in range(m):
+        R[i] = all_ones << i
+
+    for i in range(len(text)):
+        for j in range(m):
+            if text[i] == pattern[m - 1 - j]:
+                R[j] = R[j] | (1 << i)
+            else:
+                R[j] = R[j] & ~(1 << i)
+        if R[m - 1] & (1 << i):
+            print(f"Pattern found at index {i - m + 1}")
+
+# Example usage
+if __name__ == "__main__":
+    bitap_search("abcabcabc", "abc")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py b/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py
new file mode 100644
index 0000000000..7276908ff5
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/boyer_moore.py	
@@ -0,0 +1,49 @@
+# boyer_moore.py
+
+def bad_character_heuristic(pattern):
+    """
+    Preprocesses the pattern to create the bad character table.
+    
+    Parameters:
+    pattern (str): The pattern to preprocess.
+
+    Returns:
+    dict: A dictionary mapping characters to their last occurrence index.
+    """
+    bad_char = {}
+    for i in range(len(pattern)):
+        bad_char[pattern[i]] = i
+    return bad_char
+
+def boyer_moore(text, pattern):
+    """
+    Boyer-Moore algorithm for pattern searching.
+    This function finds all occurrences of 'pattern' in 'text'
+    using the Boyer-Moore algorithm, which skips sections of the text.
+
+    Parameters:
+    text (str): The text in which to search for the pattern.
+    pattern (str): The pattern to search for.
+
+    Prints the starting index of each occurrence of the pattern.
+    """
+    bad_char = bad_character_heuristic(pattern)
+    m = len(pattern)
+    n = len(text)
+    s = 0  # Shift of the pattern with respect to text
+
+    while s <= n - m:
+        j = m - 1
+
+        while j >= 0 and pattern[j] == text[s + j]:
+            j -= 1
+
+        if j < 0:
+            print(f"Pattern found at index {s}")
+            s += (m - bad_char.get(text[s + m], -1)) if s + m < n else 1
+        else:
+            s += max(1, j - bad_char.get(text[s + j], -1))
+
+# Example usage
+if __name__ == "__main__":
+    boyer_moore("ababcabcab", "abc")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py b/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py
new file mode 100644
index 0000000000..3fb5605302
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/kmp_pattern_search.py	
@@ -0,0 +1,59 @@
+# kmp_pattern_search.py
+
+def kmp_pattern_search(text, pattern):
+    """
+    Knuth-Morris-Pratt (KMP) algorithm for pattern searching.
+    This function finds all occurrences of 'pattern' in 'text'
+    using the KMP algorithm, which preprocesses the pattern for efficient searching.
+
+    Parameters:
+    text (str): The text in which to search for the pattern.
+    pattern (str): The pattern to search for.
+
+    Prints the starting index of each occurrence of the pattern.
+    """
+    def compute_lps(pattern):
+        """
+        Computes the Longest Prefix Suffix (LPS) array for the pattern.
+        
+        Parameters:
+        pattern (str): The pattern to preprocess.
+
+        Returns:
+        list: The LPS array.
+        """
+        lps = [0] * len(pattern)
+        length = 0
+        i = 1
+        while i < len(pattern):
+            if pattern[i] == pattern[length]:
+                length += 1
+                lps[i] = length
+                i += 1
+            else:
+                if length != 0:
+                    length = lps[length - 1]
+                else:
+                    lps[i] = 0
+                    i += 1
+        return lps
+
+    lps = compute_lps(pattern)
+    i = j = 0  # Index for text and pattern
+    while i < len(text):
+        if text[i] == pattern[j]:
+            i += 1
+            j += 1
+
+        if j == len(pattern):
+            print(f"Pattern found at index {i - j}")
+            j = lps[j - 1]
+        elif i < len(text) and text[i] != pattern[j]:
+            if j != 0:
+                j = lps[j - 1]
+            else:
+                i += 1
+
+# Example usage
+if __name__ == "__main__":
+    kmp_pattern_search("ababcabcab", "abc")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py b/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py
new file mode 100644
index 0000000000..aa23603478
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/naive_pattern_search.py	
@@ -0,0 +1,29 @@
+# naive_pattern_search.py
+
+def naive_pattern_search(text, pattern):
+    """
+    Naive pattern search algorithm.
+    This function searches for all occurrences of 'pattern' in 'text'
+    by checking each position.
+
+    Parameters:
+    text (str): The text in which to search for the pattern.
+    pattern (str): The pattern to search for.
+
+    Prints the starting index of each occurrence of the pattern.
+    """
+    n = len(text)
+    m = len(pattern)
+
+    for i in range(n - m + 1):
+        match = True
+        for j in range(m):
+            if text[i + j] != pattern[j]:
+                match = False
+                break
+        if match:
+            print(f"Pattern found at index {i}")
+
+# Example usage
+if __name__ == "__main__":
+    naive_pattern_search("ababcabcab", "abc")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py b/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py
new file mode 100644
index 0000000000..cbdda45140
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/rabin_karp.py	
@@ -0,0 +1,46 @@
+# rabin_karp.py
+
+def rabin_karp(text, pattern):
+    """
+    Rabin-Karp algorithm for pattern searching.
+    This function finds all occurrences of 'pattern' in 'text'
+    using a hashing technique.
+
+    Parameters:
+    text (str): The text in which to search for the pattern.
+    pattern (str): The pattern to search for.
+
+    Prints the starting index of each occurrence of the pattern.
+    """
+    d = 256  # Number of characters in the input alphabet
+    q = 101  # A prime number for hashing
+    m = len(pattern)
+    n = len(text)
+    p = 0  # Hash value for pattern
+    t = 0  # Hash value for text
+    h = 1
+
+    # Calculate the value of h
+    for i in range(m - 1):
+        h = (h * d) % q
+
+    # Calculate the initial hash values for pattern and text
+    for i in range(m):
+        p = (d * p + ord(pattern[i])) % q
+        t = (d * t + ord(text[i])) % q
+
+    # Slide the pattern over text one by one
+    for i in range(n - m + 1):
+        if p == t:  # Check for a match
+            if text[i:i + m] == pattern:
+                print(f"Pattern found at index {i}")
+
+        if i < n - m:
+            t = (d * (t - ord(text[i]) * h) + ord(text[i + m])) % q
+            # We might get negative value of t, converting it to positive
+            if t < 0:
+                t += q
+
+# Example usage
+if __name__ == "__main__":
+    rabin_karp("ababcabcab", "abc")
diff --git a/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py b/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py
new file mode 100644
index 0000000000..1e35414072
--- /dev/null
+++ b/Algorithms_and_Data_Structures/Pattern Search/suffix_array.py	
@@ -0,0 +1,52 @@
+# suffix_array.py
+
+def build_suffix_array(s):
+    """
+    Builds the suffix array for the given string.
+
+    Parameters:
+    s (str): The input string.
+
+    Returns:
+    list: The suffix array.
+    """
+    suffixes = sorted([(s[i:], i) for i in range(len(s))])
+    return [suffix[1] for suffix in suffixes]
+
+def kasai_lcp_array(s, suffix_array):
+    """
+    Constructs the LCP (Longest Common Prefix) array.
+
+    Parameters:
+    s (str): The input string.
+    suffix_array (list): The suffix array.
+
+    Returns:
+    list: The LCP array.
+    """
+    n = len(s)
+    rank = [0] * n
+    lcp = [0] * n
+
+    for i, suffix_index in enumerate(suffix_array):
+        rank[suffix_index] = i
+
+    h = 0
+    for i in range(n):
+        if rank[i] > 0:
+            j = suffix_array[rank[i] - 1]
+            while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]):
+                h += 1
+            lcp[rank[i]] = h
+            if h > 0:
+                h -= 1
+    return lcp
+
+# Example usage
+if __name__ == "__main__":
+    text = "banana"
+    suffix_array = build_suffix_array(text)
+    lcp = kasai_lcp_array(text, suffix_array)
+
+    print("Suffix Array:", suffix_array)
+    print("LCP Array:", lcp)