diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..9950943 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,22 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +max_line_length = 120 +tab_width = 2 + +[*.py] +profile = black + +[*.{yaml,yml}] +indent_size = 2 + +[*.{md,markdown}] +indent_size = 2 + +[justfile] +indent_size = 2 \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..3a07cb8 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + time: "06:00" + timezone: "America/New_York" + + # Maintain dependencies for Pip + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + time: "06:00" + timezone: "America/New_York" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c3a1485 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +name: Build and Test +on: + push: + branches: + - main + - develop + pull_request: + branches: + - main + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + + - uses: psf/black@stable + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --statistics + + - name: Lint with isort + run: | + isort . + + - name: Spell Check with codespell + run: | + codespell --skip="./.mypy_cache,./pytest_cache,.coverage,./htmlcov,./site,./.venv" + + - name: Lint with mypy + run: | + mypy + + - name: Test with pytest include Doc Test + run: | + pytest --cov --doctest-modules --cov-report html diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 0000000..f27242c --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,26 @@ +name: Build and Publish docs via GitHub Pages +on: + push: + branches: + - main + +jobs: + build: + name: Deploy docs + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + + - name: Deploy docs + run: mkdocs gh-deploy --force diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ac90158 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Jetbrains IDE +.idea/ + +# iPython Notebooks +.ipynb_checkpoints +*/.ipynb_checkpoints/* +.DS_Store + +# Cache +.pytest_cache/ +.mypy_cache/ +__pycache__/ +*.py[cod] +*$py.class +*.so +__pypackages__/ +.hypothesis/ + +# Virtual Env +.venv + +# MkDocs output +site/ + +# Coverage +.coverage +htmlcov/ + diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ebb08ce --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +Initial Commit. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..8963d6b --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Data Structures and Algorithms in Python +[![Build and Test](https://github.com/bhanutejags/python-dsa/actions/workflows/ci.yml/badge.svg)](https://github.com/bhanutejags/python-dsa/actions/workflows/ci.yml) +* [GitHub Pages Site](https://bhanutejags.github.io/python-dsa/) diff --git a/docs/algorithms/analysis_of_algorithms.md b/docs/algorithms/analysis_of_algorithms.md new file mode 100644 index 0000000..1641555 --- /dev/null +++ b/docs/algorithms/analysis_of_algorithms.md @@ -0,0 +1,102 @@ +--- +tags: + - algorithms +--- +# Analysis of Algorithms + +## Key characteristics of algorithms +* Input +* Output +* Definiteness -> Clear and Unambiguous +* Finiteness -> +* Effectiveness -> Nothing superfluous + +## Important Metrics of Algorithm performance +* Time +* Space +* Network Consumption +* Power Consumption + +???+ note "CPU registers" + For device drivers and other low-level algorithms, another metric of analyses could be the number of CPU registers the algorithm utilizes. + +* Datatypes are decided at Program time, we don’t usually care when we write pseudocode. +* Every simple statement, we assume takes 1 unit of time, + * Of course, this is really shallow, at machine-code this can change + * How deep we would want to go in an analysis is up to us. + +???+ example "Example 1: Calculating Time and Space Complexity" + ```python + { + temp = a; + a = b; + b = temp; + } + ``` + $f(n) = 3$ + +## Frequency Count Method +??? example "Sum of all elements in an array." + ```python + sum (A, n) { + s = 0; + for (i = 0; i < n; i++){ + s = s + A[i] + } + return s; + } + ``` + _Analyses_: + === "Time Complexity" + * `i` changes n+1 times, inside the loop the statements executed for n times. The first and return statement add 2 more the time complexity. + * $f(n) = 2n + 3 = O(n)$ -> Order of n. + === "Space Complexity" + * A, n, s, I + * Just 3 variables, each is one word, and one array of size n. + * $S(n) = n + 3 = O(n)$ -> Order of n. + +??? example "Sum of two matrices" + ```python + add(A, B, n) { + for(i = 0; i < n; i++) { # -> n + 1 + for (j = 0; j < n; j++) { # -> n x (n + 1) + C[i,j] = A[i,j] + B[i,j]; # -> n x n + } + } + } + ``` + _Analyses_ + === "Time Complexity" + * $f(n) = n + 1 + n^2 + n + n^2 = 2*n^2 + 2*n + 1 = O(n^2)$ -> Order of $n^2$. + === "Space Complexity" + * $A$ -> $n^2$, $B$ -> $n^2$, $C$ -> $n^2$, $n$ -> $1$, $I$ -> $1$, $j$ -> $1$ + * Total: $2*n^2 + 3 = O(n^2)$ -> Order of $n^2$. + +## Time Complexity +* Ceil of non-integer count +* Conditional statements -> Worst and Best case statements + +### Class of Time Functions +* $O(1)$ -> Constant +* $O(log n)$ -> Logarithmic +* $O(n)$ -> Linear +* $O(n^2)$ -> Quadratic +* $O(n^3)$ -> Cubix +* $O(2^n)$ -> Exponential + +???+ note "Class of Time Functions" + $1 < log n < root(n) < n < n * (log n) < n^2 < n^3 ... < 2^n < 3^n < ... < n^n$ + +### Asymptotic Notation +* Comes from Mathematics +* O -> big-oh -> Upper Bound of a Function +* Ω -> big—omega -> Lower Bound of function +* θ -> theta -> Average Bound + +### Best and Average + + +## Recurrence Relations - Recursive Algorithms +### Master Theorem +$T(n) = aT(b/n) + f(n)$ +[Brilliant - Master Theorem](https://brilliant.org/wiki/master-theorem/) diff --git a/docs/algorithms/divide_and_conquer/binary_search.md b/docs/algorithms/divide_and_conquer/binary_search.md new file mode 100644 index 0000000..4392c00 --- /dev/null +++ b/docs/algorithms/divide_and_conquer/binary_search.md @@ -0,0 +1,6 @@ +## Binary Search +* Requires a sorted array. +* Time Complexity: $O(log n)$ + +???+ note "Implementation of Binary Search in Python" + ::: dsa.algorithms.divide_and_conquer.binary_search.imperative_binary_search \ No newline at end of file diff --git a/docs/algorithms/divide_and_conquer/heap_sort.md b/docs/algorithms/divide_and_conquer/heap_sort.md new file mode 100644 index 0000000..9e6db6b --- /dev/null +++ b/docs/algorithms/divide_and_conquer/heap_sort.md @@ -0,0 +1,270 @@ +# Heap Sort +Heap Sort utilizes on Binary Tree, more special case of a Binary Tree called a Heap. +So, we have to take a short detour, towards Data Structures, to explain Binary Trees and their characteristics. + +## Binary Tree +A **Binary Tree** is a type of tree data structure in which every node has at most two child nodes, a left node and a right node. + +### Array Representation of Binary Tree +* Representing a Binary Tree as an array would go something like this, + * We would start from the top the root node would be the first element of the array. + * And, then we would go down one level, start from the left and go towards the right, the left-most element in the level would be the next element in the array. + * If certain nodes do not have children then, leave a null value for them in the array representation. + +```mermaid +graph TD + A[60 - index 0] --> B[50 - index 1]; + B --> C[16 - index 2]; + B --> D[15 - index 3]; + C --> E[10 - index 4]; + C --> F[9 - index 5]; + D --> G[8 - index 6]; + D --> H[7 - index 7]; +``` +An array representation of this tree would be, `[60, 50, 16, 15, 10, 9, 8, 7]`. + +```mermaid +graph TD + A[60 - index 0] --> B[50 - index 1]; + B --> C[16 - index 3]; + B --> D[15 - index 4]; + + A --> H[25 - index 2]; + H --> I[32 - index 5]; + H --> J[31 - index 6]; +``` +An array representation of this tree would be, `[60, 50, 25, 16, 15, 32, 31]`. + + +```mermaid +graph TD + A[60 - index 0] --> B[50 - index 1]; + B --left-node--> C[16 - index 3]; + + A --> H[25 - index 2]; + H --> I[32 - index 4]; + H --> J[31 - index 5]; +``` +An array representation of this tree would be, `[60, 50, 25, 16, None, 32, 31]`. +???+ node + I added disambiguation to denote the left node, as [mermaid-js](https://mermaid-js.github.io/mermaid/) automatically centers child nodes when only one exists. + +Formally, this would mean: + +* If a node is at index $x - i$, the left child of the node should be at index $2*i$ and the right child of the node should be at index $2*(i+1)$. +* For a particular node at index $i$ in the array representation its parent Node would be at $i/2$. + +### Full Binary Tree +* A Binary Tree is **Full**, when all nodes except the leaf nodes have two child nodes. +* Alternative definition would be, a binary tree with maximum number of nodes is called a Full binary tree. +* The maximum number of nodes would be, $2^(h + 1) - 1$ where $h$ is the height of the binary tree. +* The below is an example. +```mermaid +graph TD + A[60] --> B[50]; + B --> C[16]; + A --> H[25]; + B --> D[15]; + H --> I[32]; + H --> J[31]; +``` + +### Complete Binary Tree also called a Heap +* A **Complete Binary Tree** is a Binary Tree which has no gaps when represented as an array, this is also referred to as a **Heap**. +* Heaps are also used a **Priority Queues**. +* **Max Heap** is a Complete Binary Tree where all parent nodes are of greater value than or equal to the values of their child nodes, and the root node has the greatest value. +```mermaid +graph TD + A[60] --> B[50]; + B --> C[32]; + A --> H[25]; + B --> D[31]; + H --> I[17]; + H --> J[16]; +``` +* **Min Heap** is a Complete Binary Tree where all parent nodes have values lesser than or equal to the values of their child nodes, so the root node has the least value. +```mermaid +graph TD + A[16] --> B[50]; + B --> C[72]; + A --> H[25]; + B --> D[60]; + H --> I[37]; + H --> J[33]; +``` + +### Inserting and Deleting Elements from a Heap +#### Insertion on a Max Heap +A new element is added at the left-most bottom node, that is the end of the array representation. +Then, if the new node has a value greater than its parent i.e. the binary tree is no longer a Max Heap, the binary tree is then reorganized by shuffling parent and child nodes until it transforms into a Max Heap again. +So, if the new node's value is greater than its immediate parent node, the new node is bubbled upwards in the binary tree until new node's parent has a greater value, it might even become the root node of the tree if its value is greater than all of its parents. + +##### Time Complexity +Worst-case complexity of insertion on a Max Heap is $O(log n)$, happens when the new node has a value greater than all the nodes, as then the new node would have to be bubbled up to the root. +So then we would have to traverse the height of the binary tree, which for a Complete Binary Tree is $log n$, where $n$ is the number of nodes in the binary tree. +The best-case complexity would be $O(1)$, happens when the new node's value is lower than its parent, no reorganization would be required. + +???+ example "Insertion" + For example, if a new node with value $57$ is inserted into this Max Heap. + As the new node's value is greater than its immediate parent $32$, after the insertion binary tree would temporarily cease to be a Max Heap until reorganization. + + Insertion Step 0 + + ```mermaid + graph TD + A[60] --> B[50]; + B --> C[32]; + A --> H[25]; + B --> D[31]; + H --> I[17]; + H --> J[16]; + subgraph insert-node-0 + C --> K[57]; + end + ``` + + Insertion Step 1 + + ```mermaid + graph TD + A[60] --> B[50]; + subgraph insert-node-1 + B --> K[57]; + end + A --> H[25]; + B --> D[31]; + H --> I[17]; + H --> J[16]; + K --> C[32]; + ``` + + Insertion Step 2 + + ```mermaid + graph TD + subgraph insert-node-2 + A[60] --> K[57]; + end + K --> B[50]; + A --> H[25]; + B --> D[31]; + H --> I[17]; + H --> J[16]; + B --> C[32]; + ``` + +#### Deletion on a Max Heap +Only the root element is deleted, the rest of the binary tree is reorganized to form a Max Heap again. +Then, the right most element on the last level of the binary tree is then moved to the root of tree, as moving any other node would make the binary tree **in-complete**. + +In other words, for the purposes of deletion the first element in the array representation of Max Heap is removed, and then the last element is moved to the root, pending reorganization. + +The next deletion, post reorganization would give us the next largest element, as it will be new root delete. + +On a Min Heap, this would be the opposite, as the root element would be the least element. + +???+ example "Deletion" + For example, for deleting an element from this Heap, we would delete the node with value 60. + + Deletion Step 0 + + ```mermaid + graph TD + A[60 to-be-deleted] --> B[50]; + B --> C[32]; + A --> H[25]; + B --> D[31]; + H --> I[17]; + H --> J[16]; + style A fill:red,stroke:#333,stroke-width:4px + ``` + + Deletion Step 1 + + ```mermaid + graph TD + subgraph delete-node-2 + H[25] --> I[17]; + H --> J[16]; + end + + subgraph delete-node-1 + B[50] --> C[32]; + B --> D[31]; + end + ``` + + Deletion Step 2 + + ```mermaid + graph TD + subgraph delete-node-2 + H[25] --left-node--> I[17]; + end + + subgraph delete-node-1 + B[50] --> C[32]; + B --> D[31]; + end + + + subgraph moved-node + J[16] --> B; + J --> H; + end + style J fill:green,stroke:#333,stroke-width:4px + ``` + + Deletion Step 3: Making the Binary Tree a Max Heap again + + ```mermaid + graph TD + H[25] --left-node--> I[17]; + + J[16] --> C[32]; + J --> D[31]; + + + subgraph reorganized-node + B[50] --> J; + end + + B --> H; + + style J fill:green,stroke:#333,stroke-width:4px + ``` + + ```mermaid + graph TD + H[25] --left-node--> I[17]; + + B[50] --> C[32]; + C --> D[31]; + + + subgraph reorganized-node + C[32] --> J[16]; + end + + B --> H; + + style J fill:green,stroke:#333,stroke-width:4px + ``` + +##### Time Complexity +Deletion has a time complexity of $O(log n)$. +All average, best and worst-case time complexities are equal to $O(log n)$. + +### Heapify +#### Naive Creation of Heap +The naive way of creating a Heap, requires to insert each individual element one by one, and let the `insert` method organize the binary tree to a form a complete binary tree. +This would have a time complexity of $O(n * log n)$ + +#### Creation of Heap using Heapify +A more efficient way of creating a Heap, with $O(n)$ time complexity. + +### Priority Queue +Max or Min Heap let us express Priority Queues. That is when we want to the item in a queue that has the highest or lowest priority. + +As, when new items are inserted, they bubbled up the binary tree based on their value, or in this case a priority. +The next deletion would return the item with the highest priority. diff --git a/docs/algorithms/divide_and_conquer/index.md b/docs/algorithms/divide_and_conquer/index.md new file mode 100644 index 0000000..692efcf --- /dev/null +++ b/docs/algorithms/divide_and_conquer/index.md @@ -0,0 +1,12 @@ +# Divide and Conquer +A common strategy used to solve problems which can be divided to similar smaller problems. +These smaller problems can then be solved and their outputs aggregated to solve the main problem. + +Some common applications of **Divide and Conquer** are below, they will have implementation as part of this repository. + +* [Binary Search](./binary_search.md) +* [Heap Sort](./heap_sort.md) +* Finding Maximum and Minimum +* [Merge Sort](./merge_sort.md) +* Quick Sort +* Strassen's Matrix Multiplication diff --git a/docs/algorithms/divide_and_conquer/merge_sort.md b/docs/algorithms/divide_and_conquer/merge_sort.md new file mode 100644 index 0000000..c4136ed --- /dev/null +++ b/docs/algorithms/divide_and_conquer/merge_sort.md @@ -0,0 +1,43 @@ +# Merge Sort +## Merge Operation +The most fundamental operation of the Merge Sort, is the `merge` operations, which can take a number of sorted arrays and merge them into another sorted array. +The most common of which is a **Two Way Merge**, which takes two arrays as input and merges them. +The most general form of which is an `m` Way Merge, but these can be modeled with a repetitive 2-way merge. + +???+ example "Two Way Merge" + + `[0, 2, 4, 6]` `[0, 3, 6, 9]` --> `[0, 0, 2, 3, 4, 6, 6, 9]` + ```mermaid + graph TD + D[[0, 2, 4, 6]] --> C{Merge} + E[[0, 3, 6, 9]] --> C + C --> B[[0, 0, 2, 3, 4, 6, 6, 9]] + ``` + +## Two Way Merge Sort +Break down an array to be sorted, till there are only two elements. An array with only one element is sorted, by virtue of having only one element. + +## Merge Sort +A recursive algorithm. $\theta(n * log n)$ + +### Time Complexity +Time complexity of Two Way Merge sort is $O(n * log n)$ + +### Applications +* Large Sized List +* Linked List + * It is really easy to perform merge operation on Linked List, we do not need to create temporary 3rd array. +* External Sorting of huge data + * When the data that needs to be sorted exceed the available RAM, merge sort can be used to piece-wise sorting and while storing the intermediate result on disk. +* Stable + * Order of duplicates is maintained. + +#### Cons +* Extra Place + * Not an inplace sort + * This is however not need in the case of LinkedList +* No small problem, for a small size it is slower. + * Insertion Sort $O(n^2)$ -> Also Stable + * Merge Sort $O(n log n))$ + * Bubble Sort $O(n^2)$ -> Also Stable +* Recursive, all recursive algorithms use the stack, need more memory due the requirements of many stacks. diff --git a/docs/algorithms/divide_and_conquer/quick_sort.md b/docs/algorithms/divide_and_conquer/quick_sort.md new file mode 100644 index 0000000..1f65a6e --- /dev/null +++ b/docs/algorithms/divide_and_conquer/quick_sort.md @@ -0,0 +1,45 @@ +# Quick Sort +* Quick Sort another sorting algorithms that uses divide and conquer strategy. + +## Pivot Element +* A fundamental part of quick sort is a something called a **Pivot Element**, it is the element in the array that is in the right place. +* By that we mean, all the elements to the Pivot Element's left are lower than it and all elements to its right are greater than it. + +???+ example "Pivot element" + For example, in this array `[8, 4, 3, 10, 12, 11, 32, 47]`, `10` would be a pivot element. + +## Algorithm +* Pivot elements allow use to apply to divide and conquer technique to the sorting of the array. +* The two new and separate sub-problems here then would be, the sorting of two slices of the arrays to the left and right of the pivot element. +* This process can be applied recursively until all elements are in their right place. + +### Finding a Pivot element +* Portioning is the process of finding the pivot element and create one if non-exists. + +## Analysis +### Time Complexity +* The time complexity of this recursive algorithm, where we partition each array into two arrays at each level of recursion. +* Best Case + * If the Pivot element at every recursion is a the median of that array, that it is apears at the middle of the array. + * Then, the array is divided in half at each recursion. + * The Time Complexity would be $O(n * logn)$. + * But, we do not know the median upfront, and cannot ensure that the median and pivot element are the same. + +???+ example "Pivot element" + For example, here `[1, 2, 3, 4, 5, 6, 7]`, if `4` is chosen as a pivot element, it is also the median of the array. + The array would be partitioned in two equal half for the next recursion. + +* Worst Case + * Worst case occurs, when the partition occurs at the beginning of the list. + * The Time Complexity would be $O(n^2)$. +???+ example "Pivot element" + For example, here `[2, 4, 8, 10, 16, 18, 17]`, if `2` is chosen as a pivot element. + Then the array would be partitioned, with only on element on the one of the partitions. + +#### Improving Worst Case +* Always select the middle element as pivot. +* Select a random element as pivot. + +### Memory Complexity +* Best case is $log n$, as there $log n$ recursions and stacks. +* Worst case is $n$, as there $n$ recursions and stacks. diff --git a/docs/algorithms/divide_and_conquer/strassens_matrix_multiplication.md b/docs/algorithms/divide_and_conquer/strassens_matrix_multiplication.md new file mode 100644 index 0000000..99f3509 --- /dev/null +++ b/docs/algorithms/divide_and_conquer/strassens_matrix_multiplication.md @@ -0,0 +1,66 @@ +# Strassen's Matrix Multiplication +Time Complexity of naive way of implementing this would be $O(n^3)$. + +???+ example "Matrix Multiplication" + For example, here `[1, 2, 3, 4, 5, 6, 7]`, if `4` is chosen as a pivot element, it is also the median of the array. + The array would be partitioned in two equal half for the next recursion. + + \begin{bmatrix} + a_{11} & a_{12} \\ + a_{21} & a_{22} \\ + \end{bmatrix} + + \begin{bmatrix} + b_{11} & b_{12} \\ + b_{21} & b_{22} \\ + \end{bmatrix} + + \begin{bmatrix} + c_{11} & c_{12} \\ + c_{21} & c_{22} \\ + \end{bmatrix} + + $A * B = C$ + + $c_{11} = a_{11}*b_{11} + a_{12}*b_{21}$ + + +## Algorithm +``` +func mm(A, B, n): + if (n <= 2): + perform normal matrix multiplication + else: + mid = n // 2 + mm(A11,B11,mid) + md(A12,B21,mid) + mm(A11,B12,mid) + md(A12,B22,mid) + mm(A21,B11,mid) + md(A22,B21,mid) + mm(A21,B12,mid) + md(A22,B22,mid) +``` + +### Time Complexity +This is recursive algorithm. + +f(n) = +\begin{cases} +n/2, & \text{if $n$ is even} \\ +3n+1, & \text{if $n$ is odd} +\end{cases} + +\left. +\begin{array}{l} +\text{if $n$ is even:}&n/2\\ +\text{if $n$ is odd:}&3n+1 +\end{array} +\right\} +=T(n) + + +$T(n) = 8*T(n/2) + n^2 $. + +Time Complexity is $\theta(n^3)$. + + +#### Strassen's Matrix M +Time Complexity is $O(n^{2.81})$. + diff --git a/docs/algorithms/greedy_method/dijkstra_algorithm.md b/docs/algorithms/greedy_method/dijkstra_algorithm.md new file mode 100644 index 0000000..c73fad7 --- /dev/null +++ b/docs/algorithms/greedy_method/dijkstra_algorithm.md @@ -0,0 +1 @@ +# Dijkstra Algorithm diff --git a/docs/algorithms/greedy_method/huffman_coding.md b/docs/algorithms/greedy_method/huffman_coding.md new file mode 100644 index 0000000..5120ebf --- /dev/null +++ b/docs/algorithms/greedy_method/huffman_coding.md @@ -0,0 +1,20 @@ +# Huffman Coding +* A compression algorithm that uses the concepts from Optimal Merge Pattern. +* Huffman coding is a variable size encoding. + +Usually each alphabet in ASCII takes 8-bit. + +Message to transmit: ACCBDAECCDAE + +| Alphabets | ASCII | +|-----------|-------| +| A | 65 | +| B | 66 | + +| Alphabets | Count | Frequency | Code | +|-----------|-------|-----------|--| +| A | 3 | 3/12 | | +| B | 1 | 1/12 | | +| C | 4 | 4/12 | | +| D | 2 | 2/12 | | +| E | 2 | 2/12 | | diff --git a/docs/algorithms/greedy_method/index.md b/docs/algorithms/greedy_method/index.md new file mode 100644 index 0000000..1123050 --- /dev/null +++ b/docs/algorithms/greedy_method/index.md @@ -0,0 +1,37 @@ +# Greedy Method +A common strategy used to solve problems. +Useful in solving optimization problems, the problems which require minimum or maximum result. + +Optimizations problems can be solved using the below approaches: +* Greedy Method +* Dynamic Programming +* Branch and Bound + +## Greedy Method +* While looping over stuff, check if it is feasible solution to the problem. + +``` +n = 5 +a = [a1, a2, a3, a4, a5] +algo greedy(a, n) +{ + for i = 1 to n do + { + x = select(a); + if is_feasible(x) then + { + solution = solution + x; + } + } +} +``` + +```python +n = 5 +a = [a1, a2, a3, a4, a5] +def greedy(a, n): + for i in range(1, n + 1): + x = select(a) + if is_feasible(x): + solution = solution + x; +``` diff --git a/docs/algorithms/greedy_method/job_sequencing_with_deadlines.md b/docs/algorithms/greedy_method/job_sequencing_with_deadlines.md new file mode 100644 index 0000000..1b7df67 --- /dev/null +++ b/docs/algorithms/greedy_method/job_sequencing_with_deadlines.md @@ -0,0 +1,33 @@ +# Job Sequencing with Deadlines +Greedy method can be used to determine the order or priority of execution of jobs, +where each job has an associated deadline and an associated profit, +to maximise the total profit. + +There also might be more jobs than are slots available for their execution, +so only a particular combination of jobs which maximise profits must be run. + +For the purposes of this problem, it is assumed each job takes 1 unit of time. + +```python +from dataclasses import dataclass + +@dataclass +class Job: + job_id: int + profit: int + deadline: int +``` + +??? example "Example 1" + + | Jobs | $J_1$ | $J_2$ | $J_3$ | $J_4$ | $J_5$ | + |-----------|-------|-------|-------|-------|-------| + | Profits | 20 | 15 | 10 | 5 | 1 | + | Deadlines | 2 | 2 | 1 | 3 | 3 | + + If the number of slots are 3, only 3 of the above jobs can be scheduled for execution. + + $0 -J_2-> 1 -J_1-> 2 -J_4-> 3$ + + First select the jobs with the highest profits, and schedule as late their deadlines allow us. + So the sequence of jobs is ${J_2, J_1, J_4}$, and total profit is diff --git a/docs/algorithms/greedy_method/knapsack_problem.md b/docs/algorithms/greedy_method/knapsack_problem.md new file mode 100644 index 0000000..50a994d --- /dev/null +++ b/docs/algorithms/greedy_method/knapsack_problem.md @@ -0,0 +1,5 @@ +# Knapsack Problem +Maximizing the profit, by placing various objects with different associated profits and weights inside a knapsack of limited capacity. + +We must judge based the objects on profit/weight, that is profit per KG of weight. + diff --git a/docs/algorithms/greedy_method/optimal_merge_pattern.md b/docs/algorithms/greedy_method/optimal_merge_pattern.md new file mode 100644 index 0000000..cd95ab0 --- /dev/null +++ b/docs/algorithms/greedy_method/optimal_merge_pattern.md @@ -0,0 +1,18 @@ +# Optimal Merge Pattern + +* Merging is the process of combing two sorted array to form a single large array, this is more precisely called Two-way merge. +* Merge has a time complexity of $\theta(n + m)$. + +* The discussion of optimal merge pattern comes up with, more than two array have to merged pairwise using only Two-way merge. +* The Greedy Method that should be followed here is that the smaller list must always be merged first, then only then move onto merging larger lists. + + +??? example "Merge" + + | List | A | B | C | D | + |-------|-----|-----|-----|-----| + | Sizes | 6 | 5 | 3 | 2 | + + The best way to merge this would be to start with mergin the smaller lists first and then going ontowards the large one. + +$\sum_1^n (d_{i} \times x_{i})$, where $d_{i}$ is the distance of each array and $x_{i}$ is the length of the $i^{th}$ array. diff --git a/docs/algorithms/greedy_method/prims_and_kruskals.md b/docs/algorithms/greedy_method/prims_and_kruskals.md new file mode 100644 index 0000000..736e858 --- /dev/null +++ b/docs/algorithms/greedy_method/prims_and_kruskals.md @@ -0,0 +1,6 @@ +# Prim's and Krushal's + +## Minimum Cost Spanning Tree + +Graphs are a combinations of vertices and edges. +A spanning tree is a subgraph of a graph, which all the vertices but has $n - 1$ edges. diff --git a/docs/example.md b/docs/example.md new file mode 100644 index 0000000..5270918 --- /dev/null +++ b/docs/example.md @@ -0,0 +1,201 @@ +# Data Structures and Algorithms in Python + +For full documentation visit [mkdocs.org](https://www.mkdocs.org). + +## Project layout + + mkdocs.yml # The configuration file. + docs/ + index.md # The documentation homepage. + ... # Other markdown pages, images and other files. + +## Reference +::: dsa.algorithms.divide_and_conquer.binary_search + + +## Details +### Nested +???+ note "Open styled details" + + ??? danger "Nested details!" + And more content again. + +### Normal +??? success + Content. + +??? warning classes + Content. + + +## MathJax +$p(x|y) = \frac{p(y|x)p(x)}{p(y)}$, \(p(x|y) = \frac{p(y|x)p(x)}{p(y)}\). + +$$ +E(\mathbf{v}, \mathbf{h}) = -\sum_{i,j}w_{ij}v_i h_j - \sum_i b_i v_i - \sum_j c_j h_j +$$ + +\[3 < 4\] + +\begin{align} +p(v_i=1|\mathbf{h}) & = \sigma\left(\sum_j w_{ij}h_j + b_i\right) \\ +p(h_j=1|\mathbf{v}) & = \sigma\left(\sum_i w_{ij}v_i + c_j\right) +\end{align} + +## Superfence +### Flowchart +```mermaid +graph LR + A[Start] --> B{Error?}; + B -->|Yes| C[Hmm...]; + C --> D[Debug]; + D --> B; + B ---->|No| E[Yay!]; +``` +### Sequence diagrams +```mermaid +sequenceDiagram + Alice->>John: Hello John, how are you? + loop Healthcheck + John->>John: Fight against hypochondria + end + Note right of John: Rational thoughts! + John-->>Alice: Great! + John->>Bob: How about you? + Bob-->>John: Jolly good! +``` +### State diagrams +```mermaid +stateDiagram-v2 + [*] --> Active + + state Active { + [*] --> NumLockOff + NumLockOff --> NumLockOn : EvNumLockPressed + NumLockOn --> NumLockOff : EvNumLockPressed + -- + [*] --> CapsLockOff + CapsLockOff --> CapsLockOn : EvCapsLockPressed + CapsLockOn --> CapsLockOff : EvCapsLockPressed + -- + [*] --> ScrollLockOff + ScrollLockOff --> ScrollLockOn : EvScrollLockPressed + ScrollLockOn --> ScrollLockOff : EvScrollLockPressed + } +``` +### Class diagrams +```mermaid +classDiagram + Person <|-- Student + Person <|-- Professor + Person : +String name + Person : +String phoneNumber + Person : +String emailAddress + Person: +purchaseParkingPass() + Address "1" <-- "0..1" Person:lives at + class Student{ + +int studentNumber + +int averageMark + +isEligibleToEnrol() + +getSeminarsTaken() + } + class Professor{ + +int salary + } + class Address{ + +String street + +String city + +String state + +int postalCode + +String country + -validate() + +outputAsLabel() + } +``` +### Entity-Relationship diagram +```mermaid +erDiagram + CUSTOMER ||--o{ ORDER : places + ORDER ||--|{ LINE-ITEM : contains + CUSTOMER }|..|{ DELIVERY-ADDRESS : uses +``` +```mermaid +sequenceDiagram + participant Alice + participant Bob + Alice->>John: Hello John, how are you? + loop Healthcheck + John->>John: Fight against hypochondria + end + Note right of John: Rational thoughts
prevail! + John-->>Alice: Great! + John->>Bob: How about you? + Bob-->>John: Jolly good! +``` + +### Large Diagram +* https://mermaid-js.github.io/mermaid/#/examples?id=basic-flowchart +```mermaid +graph TB + sq[Square shape] --> ci((Circle shape)) + + subgraph A + od>Odd shape]-- Two line
edge comment --> ro + di{Diamond with
line break} -.-> ro(Rounded
square
shape) + di==>ro2(Rounded square shape) + end + + %% Notice that no text in shape are added here instead that is appended further down + e --> od3>Really long text with linebreak
in an Odd shape] + + %% Comments after double percent signs + e((Inner / circle
and some odd
special characters)) --> f(,.?!+-*ز) + + cyr[Cyrillic]-->cyr2((Circle shape Начало)); + + classDef green fill:#9f6,stroke:#333,stroke-width:2px; + classDef orange fill:#f96,stroke:#333,stroke-width:4px; + class sq,e green + class di orange +``` + +???+ note "Implementation in various languages" + + === "C" + + ``` c + #include + + int main(void) { + printf("Hello world!\n"); + return 0; + } + ``` + + === "C++" + + ``` c++ + #include + + int main(void) { + std::cout << "Hello world!" << std::endl; + return 0; + } + ``` + + === "Python" + + ``` python + def main() -> int: + print("Hello world!") + return 0 + ``` + +``` yaml +theme: + features: + - content.code.annotate # (1) +``` + +1. :man_raising_hand: I'm a code annotation! I can contain `code`, __formatted + text__, images, ... basically anything that can be expressed in Markdown. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..d50008f --- /dev/null +++ b/docs/index.md @@ -0,0 +1,5 @@ +# Data Structures and Algorithms in Python +Implementation of various Algorithms and Data Structures in Python. + +The material is complied from a couple of sources, all of which are listed below. +Although it primarily follows [Algorithms YouTube playlist](https://www.youtube.com/watch?v=0IAPZzGSbME&list=PLDN4rrl48XKpZkf03iYFl-O29szjTrs_O&index=1), by [Abdul Bari :material-youtube:](https://www.youtube.com/channel/UCZCFT11CWBi3MHNlGf019nw). diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js new file mode 100644 index 0000000..fd764a7 --- /dev/null +++ b/docs/javascripts/mathjax.js @@ -0,0 +1,16 @@ +window.MathJax = { + tex: { + inlineMath: [["\\(", "\\)"]], + displayMath: [["\\[", "\\]"]], + processEscapes: true, + processEnvironments: true + }, + options: { + ignoreHtmlClass: ".*|", + processHtmlClass: "arithmatex" + } +}; + +document$.subscribe(() => { + MathJax.typesetPromise() +}) diff --git a/dsa/__init__.py b/dsa/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/__init__.py b/dsa/algorithms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/backtracking/__init__.py b/dsa/algorithms/backtracking/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/branch_and_bound/__init__.py b/dsa/algorithms/branch_and_bound/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/divide_and_conquer/__init__.py b/dsa/algorithms/divide_and_conquer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/divide_and_conquer/binary_search.py b/dsa/algorithms/divide_and_conquer/binary_search.py new file mode 100644 index 0000000..9f266ec --- /dev/null +++ b/dsa/algorithms/divide_and_conquer/binary_search.py @@ -0,0 +1,81 @@ +from typing import List + + +def binary_search() -> int: + pass + + +def imperative_binary_search(input_list: List[int], element_to_search: int) -> int: + """ + Performs a binary search on the `input_list` to find the `element_to_search`, + returns its index if found, else returns -1. + + Calculate average temperature from multiple measurements + + >>> imperative_binary_search([10, 11, 13, 34, 91, 101, 137], 127) + -1 + + >>> imperative_binary_search([10, 11, 13, 34, 91, 127, 138], 127) + 5 + + :param input_list: The list on which binary search is to be performed. + :param element_to_search: Element to search for + :return: Index of the element if found, otherwise -1. + """ + high = len(input_list) + low = 0 + mid = (high + low) // 2 + + while high - low > 1: + if input_list[mid] == element_to_search: + return mid + + if element_to_search < input_list[mid]: + high = mid + mid = (high + low) // 2 + elif element_to_search > input_list[mid]: + low = mid + mid = (high + low) // 2 + else: + if input_list[low] == element_to_search: + return low + else: + return -1 + + +def recursive_binary_search(input_list: List[int], element_to_search: int) -> int: + """ + Performs a binary search on the `input_list` to find the `element_to_search`, + returns its index if found, else returns -1. + + Calculate average temperature from multiple measurements + + >>> imperative_binary_search([10, 11, 13, 34, 91, 101, 137], 127) + -1 + + >>> imperative_binary_search([10, 11, 13, 34, 91, 127, 138], 127) + 5 + + :param input_list: The list on which binary search is to be performed. + :param element_to_search: Element to search for + :return: Index of the element if found, otherwise -1. + """ + + def _binary_search(low: int, high: int) -> int: + nonlocal element_to_search + nonlocal input_list + if low == high: + if input_list[low] == element_to_search: + return low + else: + return -1 + else: + mid = (low + high) // 2 + if element_to_search == input_list[mid]: + return mid + if element_to_search < input_list[mid]: + return _binary_search(low, mid - 1) + else: + return _binary_search(mid + 1, high) + + return _binary_search(0, len(input_list)) diff --git a/dsa/algorithms/divide_and_conquer/heap_sort.py b/dsa/algorithms/divide_and_conquer/heap_sort.py new file mode 100644 index 0000000..653231f --- /dev/null +++ b/dsa/algorithms/divide_and_conquer/heap_sort.py @@ -0,0 +1,8 @@ +# mypy: ignore-errors +from typing import List + +from dsa.data_structures.trees.binary_tree.heap import Heap + + +def heap_sort(heap: Heap) -> List[int]: + return [heap.delete() for i in range(len(heap))] diff --git a/dsa/algorithms/divide_and_conquer/merge_sort.py b/dsa/algorithms/divide_and_conquer/merge_sort.py new file mode 100644 index 0000000..40c88e7 --- /dev/null +++ b/dsa/algorithms/divide_and_conquer/merge_sort.py @@ -0,0 +1,64 @@ +from math import log2 +from typing import List + + +def merge(input_list1: List[int], input_list2: List[int]) -> List[int]: + """ + Performs a merge on the two sorted lists. + """ + i: int = 0 + j: int = 0 + merged_list: List[int] = [] + while (i < len(input_list1)) and (j < len(input_list2)): + if input_list1[i] > input_list2[j]: + merged_list.append(input_list2[j]) + j += 1 + else: + merged_list.append(input_list1[i]) + i += 1 + + # Copy any leftover elements, + # either input_list1 or input_list2 have leftover elements but not both. + for e in input_list1[i:]: + merged_list.append(e) + + for e in input_list2[j:]: + merged_list.append(e) + + return merged_list + + +def two_way_merge_sort(input_list: List[int]) -> List[int]: + number_of_passes: int = int(log2(len(input_list))) + + for this_pass in range(1, number_of_passes + 1): + sorted_list: List[int] = [] + left_over_items = len(input_list) - (len(input_list) // 2 ** this_pass) * ( + 2 ** this_pass + ) + + i = 0 + while i < (len(input_list) - left_over_items): + shift = 2 ** (this_pass - 1) + sorted_list += merge( + input_list[i : i + shift], input_list[i + shift : i + shift + shift] + ) + i += 2 ** this_pass + + sorted_list += input_list[i:] + input_list = sorted_list + + return input_list + + +def merge_sort_recursive(input_list: List[int]) -> List[int]: + def _recursive_merge_sort(low: int, high: int) -> List[int]: + if high > low: + mid = (low + high) // 2 + return merge( + _recursive_merge_sort(low, mid), _recursive_merge_sort(mid + 1, high) + ) + else: + return input_list[low : low + 1] + + return _recursive_merge_sort(0, len(input_list)) diff --git a/dsa/algorithms/divide_and_conquer/quick_sort.py b/dsa/algorithms/divide_and_conquer/quick_sort.py new file mode 100644 index 0000000..7041638 --- /dev/null +++ b/dsa/algorithms/divide_and_conquer/quick_sort.py @@ -0,0 +1,33 @@ +# mypy: ignore-errors +from typing import List + + +def quick_sort(input_list: List[int]) -> List[int]: + def _partition(low: int, high: int) -> int: + pivot: int = input_list[low] + i: int = low + j: int = high + + while i < j: + while input_list[i] < pivot: + i += 1 + + while input_list[j] > pivot: + j -= 1 + + if i < j: + # swap elements at i and j indices + input_list[i], input_list[j] = input_list[j], input_list[i] + + input_list[low], input_list[j] = input_list[j], input_list[low] + return j + + def _quick_sort(low: int, high: int): + if low < high: + j = _partition(low, high) + _quick_sort(low, j) + _quick_sort(j + 1, high) + else: + pass + + return [] diff --git a/dsa/algorithms/divide_and_conquer/strassens_matrix_multiplication.py b/dsa/algorithms/divide_and_conquer/strassens_matrix_multiplication.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/dynamic_programming/__init__.py b/dsa/algorithms/dynamic_programming/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/greedy_method/__init__.py b/dsa/algorithms/greedy_method/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/algorithms/greedy_method/job_sequencing_with_deadlines.py b/dsa/algorithms/greedy_method/job_sequencing_with_deadlines.py new file mode 100644 index 0000000..ae56b9c --- /dev/null +++ b/dsa/algorithms/greedy_method/job_sequencing_with_deadlines.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import List + + +@dataclass +class Job: + job_id: int + profit: int + deadline: int + + def __eq__(self, other: object) -> bool: + if isinstance(other, Job): + return Job.job_id == Job.job_id + return False + + +def sequencing_job(jobs: List[Job], number_of_slots: int) -> List[Job]: + """Return the exact sequence of jobs which maximise the outputs. + + :param jobs: Input jobs. + :param number_of_slots: Number of execution slots + :return: Sequenced jobs + """ + # It is assumed that all jobs take an equal amount of time to execute, + # and that job_ids of the input jobs start from 1 and are sequential + + jobs.sort(key=lambda element: element.profit) + + execution_timeline = [0] * number_of_slots + + scheduled_jobs: List[Job] = [] + + for job in jobs: + if job.deadline > len(execution_timeline) - 1: + continue + place_in_schedule = job.deadline - 1 + while place_in_schedule > 0: + if execution_timeline[place_in_schedule] == 0: + scheduled_jobs.append(job) + break + else: + place_in_schedule -= 1 + + return scheduled_jobs diff --git a/dsa/algorithms/greedy_method/knapsack_problem.py b/dsa/algorithms/greedy_method/knapsack_problem.py new file mode 100644 index 0000000..9b313b2 --- /dev/null +++ b/dsa/algorithms/greedy_method/knapsack_problem.py @@ -0,0 +1,2 @@ +def knapsack_problem() -> None: + raise NotImplementedError() diff --git a/dsa/data_structures/__init__.py b/dsa/data_structures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/data_structures/trees/__init__.py b/dsa/data_structures/trees/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dsa/data_structures/trees/binary_tree/__init__.py b/dsa/data_structures/trees/binary_tree/__init__.py new file mode 100644 index 0000000..8cf04e6 --- /dev/null +++ b/dsa/data_structures/trees/binary_tree/__init__.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Optional + + +class Node: + def __init__(self, value: int): + self.value: int = value + self.left: Optional[Node] = None + self.right: Optional[Node] = None + + +class BinaryTree: + def __init__(self, root: Node): + self.root: Node = root diff --git a/dsa/data_structures/trees/binary_tree/heap.py b/dsa/data_structures/trees/binary_tree/heap.py new file mode 100644 index 0000000..4b837c8 --- /dev/null +++ b/dsa/data_structures/trees/binary_tree/heap.py @@ -0,0 +1,127 @@ +import abc +from typing import List, Optional, Sized, Union + + +class Heap(abc.ABC, Sized): + @abc.abstractmethod + def insert(self, element: int) -> None: + ... + + @abc.abstractmethod + def delete(self) -> int: + ... + + +class MaxHeap(Heap): + """ + An implementation of MaxHeap in Python. + Internally uses an array to represent the Heap. + """ + + def __init__(self, input_list: Optional[List[int]] = None) -> None: + if input_list is None: + input_list = [] + self._tree: List[int] = input_list + + @property + def tree(self) -> List[int]: + return self._tree + + def insert(self, element: int) -> None: + """ + Insert a node as the left-most node in the lowest level. + """ + # Insert element at the end of the array + self._tree.append(element) + + element_index = len(self._tree) - 1 + + parent_element_index = element_index // 2 + + # Move up the new element up the binary _tree, + # until its value is no longer greater than its parent nodes + while self._tree[element_index] > self._tree[parent_element_index]: + parent_element = self._tree[parent_element_index] + self._tree[parent_element_index] = self._tree[element_index] + self._tree[element_index] = parent_element + + element_index = parent_element_index + parent_element_index = element_index // 2 + + def delete(self) -> int: + """ + Delete the Root Node of the MaxHeap. + """ + deleted_element: int = self._tree.pop(0) + # Move the last element in the array, or the right-most element + # on the last line of the Binary Tree,to the root position + self._tree.insert(0, self._tree.pop(-1)) + + element_index = 0 + + child_element_indices: tuple[ + Optional[int], Optional[int] + ] = self._get_child_indices(element_index) + + # Move down the new root element down the binary _tree, + # until its value is no longer less than its child nodes + while ( + (child_element_indices[0] is not None) + and (self._tree[child_element_indices[0]] > self._tree[element_index]) + ) or ( + (child_element_indices[1] is not None) + and (self._tree[child_element_indices[1]] > self._tree[element_index]) + ): + if child_element_indices[0] is None: + break + elif child_element_indices[1] is None: + # This the last element in the lowest level + child_element: int = self._tree[child_element_indices[0]] + self._tree[child_element_indices[0]] = self._tree[element_index] + self._tree[element_index] = child_element + break + else: + child_elements = [ + self._tree[child_element_index] + for child_element_index in child_element_indices + ] + + if child_elements[0] > child_elements[1]: + self._tree[child_element_indices[0]] = self._tree[element_index] + self._tree[element_index] = child_elements[0] + + element_index = child_element_indices[0] + else: + self._tree[child_element_indices[1]] = self._tree[element_index] + self._tree[element_index] = child_elements[1] + + element_index = child_element_indices[1] + + child_element_indices = self._get_child_indices(element_index) + + return deleted_element + + def _get_child_indices( + self, element_index: int + ) -> Union[tuple[int, int], tuple[int, None], tuple[None, None]]: + left_child_node_index = (element_index * 2) + 1 + right_child_node_index = (element_index * 2) + 2 + + if (left_child_node_index < len(self._tree)) and ( + right_child_node_index < len(self._tree) + ): + return left_child_node_index, right_child_node_index + elif left_child_node_index < len(self._tree): + return left_child_node_index, None + else: + return None, None + + @staticmethod + def heapify(items_to_insert: List[int]): + raise NotImplementedError() + + def __len__(self) -> int: + return len(self._tree) + + def __iter__(self) -> List[int]: + return self.tree diff --git a/justfile b/justfile new file mode 100644 index 0000000..081f181 --- /dev/null +++ b/justfile @@ -0,0 +1,74 @@ +#!/usr/bin/env just --justfile + +# install development dependencies +install-dev-deps: + #!/usr/bin/env bash + set -euxo pipefail + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + +docs-serve: + mkdocs serve + +# MkDocs Publish +docs-publish: + mkdocs gh-deploy --force + +# Run all Lints +lint: codespell isort black flake8 mypy + +codespell: + codespell --skip="./.mypy_cache,./pytest_cache,.coverage,./htmlcov,./site,./.venv" + +mypy: + mypy + +isort: + isort . + +black: + black . + +flake8: + #!/usr/bin/env bash + set -euxo pipefail + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --statistics + +test-with-cov: + pytest --cov --doctest-modules --cov-report html + +test: + pytest --doctest-modules + +check: lint test + +push: check + #!/usr/bin/env bash + set -euxo pipefail + ! git branch | grep '* main' + git push origin + +pr TITLE='' BODY='' PROJECT='Algorithms': + gh pr create --base main --assignee @me --title "{{ TITLE }}" --body "{{ BODY }}" --project "{{ PROJECT }}" --web + +# clean up feature branch BRANCH +done BRANCH=`git rev-parse --abbrev-ref HEAD`: + #!/usr/bin/env bash + set -euxo pipefail + git checkout main + git diff --no-ext-diff --quiet --exit-code + git pull --rebase github main + git diff --no-ext-diff --quiet --exit-code {{BRANCH}} + git branch -D {{BRANCH}} + +# A polyglot recipe +polyglot: + #!/usr/bin/env python3 -v + import sys, platform, time + print(sys.version, platform.version()) + print('Hello from python!') + time.sleep(5) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..c5db447 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,108 @@ +site_name: Data Structures and Algorithms in Python +site_url: https://bhanutejags.github.io/python-dsa/ +site_author: Bhanu Teja Ganneni +site_description: >- + Implementation of various Algorithms and Data Structures in Python. + +repo_name: bhanutejags/python-dsa/ +repo_url: https://github.com/bhanutejags/python-dsa/ + +copyright: Copyright © 2020 Bhanu Teja Ganneni + +nav: + - Home: index.md + - Algorithms: + - Analysis Of Algorithms: algorithms/analysis_of_algorithms.md + - Divide and Conquer: + - Index: algorithms/divide_and_conquer/index.md + - Binary Search: algorithms/divide_and_conquer/binary_search.md + - Heap Sort: algorithms/divide_and_conquer/heap_sort.md + - Merge Sort: algorithms/divide_and_conquer/merge_sort.md + - Quick Sort: algorithms/divide_and_conquer/quick_sort.md + - Strassen's Matrix Multiplication: algorithms/divide_and_conquer/strassens_matrix_multiplication.md + - Greedy Method: + - Index: algorithms/greedy_method/index.md + - Knapsack Problem: algorithms/greedy_method/knapsack_problem.md + - Job Sequencing with Deadline: algorithms/greedy_method/job_sequencing_with_deadlines.md + - Optimal Merge Pattern: algorithms/greedy_method/optimal_merge_pattern.md + - Huffman Coding: algorithms/greedy_method/huffman_coding.md + - Prim's and Kruskal's: algorithms/greedy_method/prims_and_kruskals.md + - Dijkstra Algorithm: algorithms/greedy_method/dijkstra_algorithm.md + - Example: example.md + +# https://squidfunk.github.io/mkdocs-material/ +theme: + name: "material" + font: + code: Source Code Pro + text: Roboto + features: + - navigation.tabs + - navigation.instant + - navigation.tracking + - navigation.sections + - navigation.expand + - navigation.indexes + - toc.integrate + - content.tabs.link + - content.code.annotate + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/weather-sunny + name: Switch to light mode + + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/bhanutejags +# generator: false + +plugins: + - search + - autorefs + - mkdocstrings: + watch: + - dsa + +markdown_extensions: + - pymdownx.arithmatex: + generic: true + - pymdownx.saneheaders + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde + - pymdownx.tasklist + - pymdownx.tabbed: + alternate_style: true + - pymdownx.details + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_div_format + # https://github.com/fralau/mkdocs-mermaid2-plugin +# - name: mermaid +# class: mermaid +# format: !!python/name:mermaid2.fence_mermaid + - pymdownx.snippets + - pymdownx.emoji: + emoji_index: !!python/name:materialx.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + + +extra_javascript: + - javascripts/mathjax.js + - https://polyfill.io/v3/polyfill.min.js?features=es6 + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + - https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4500cc6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[tool.isort] +profile = "black" + +[tool.pytest] +addopts = "--cov dsa/" + +[tool.mypy] +strict = true +files = "dsa" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..dbbb0b6 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,32 @@ +# Formatting and Linting Libraries +black +isort +flake8 +## https://testdriven.io/blog/documenting-python/ +darglint +mypy +## Spell Check: https://github.com/codespell-project/codespell +codespell + + +# Testing Libraries +pytest +pytest-cov +pytest-sugar +## https://github.com/anapaulagomes/pytest-picked +pytest-picked +## https://github.com/HypothesisWorks/hypothesis +## https://hypothesis.readthedocs.io/en/latest/index.html +hypothesis + +# Documentation +## https://www.mkdocs.org +mkdocs +## https://mkdocstrings.github.io +mkdocstrings +## https://squidfunk.github.io/mkdocs-material +mkdocs-material +# https://facelessuser.github.io/pymdown-extensions/ +pymdown-extensions +# https://github.com/mkdocstrings/autorefs +mkdocs-autorefs \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/__init__.py b/tests/dsa/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/__init__.py b/tests/dsa/algorithms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/divide_and_conquer/__init__.py b/tests/dsa/algorithms/divide_and_conquer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/divide_and_conquer/binary_search/__init__.py b/tests/dsa/algorithms/divide_and_conquer/binary_search/__init__.py new file mode 100644 index 0000000..77117a2 --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/binary_search/__init__.py @@ -0,0 +1,8 @@ +import pytest + +test_data = [ + pytest.param(list(range(0, 200, 2)), 2, 1, id="test1"), + pytest.param(list(range(0, 200, 3)), 9, 3, id="test2"), + pytest.param(list(range(0, 200, 2)), 9, -1, id="test1-non-existent-element"), + pytest.param(list(range(0, 150, 32)), 42, -1, id="test2-non-existent-element"), +] diff --git a/tests/dsa/algorithms/divide_and_conquer/binary_search/test_imperative_binary_search.py b/tests/dsa/algorithms/divide_and_conquer/binary_search/test_imperative_binary_search.py new file mode 100644 index 0000000..e8d4cac --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/binary_search/test_imperative_binary_search.py @@ -0,0 +1,12 @@ +import pytest + +from dsa.algorithms.divide_and_conquer.binary_search import imperative_binary_search +from tests.dsa.algorithms.divide_and_conquer.binary_search import test_data + + +@pytest.mark.parametrize( + "input_list,element_to_search,expected", + test_data, +) +def test_imperative_parametrized(input_list, element_to_search, expected): + assert imperative_binary_search(input_list, element_to_search) == expected diff --git a/tests/dsa/algorithms/divide_and_conquer/binary_search/test_recursive_binary_search.py b/tests/dsa/algorithms/divide_and_conquer/binary_search/test_recursive_binary_search.py new file mode 100644 index 0000000..8f2c0a9 --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/binary_search/test_recursive_binary_search.py @@ -0,0 +1,12 @@ +import pytest + +from dsa.algorithms.divide_and_conquer.binary_search import recursive_binary_search +from tests.dsa.algorithms.divide_and_conquer.binary_search import test_data + + +@pytest.mark.parametrize( + "input_list,element_to_search,expected", + test_data, +) +def test_recursive_parametrized(input_list, element_to_search, expected): + assert recursive_binary_search(input_list, element_to_search) == expected diff --git a/tests/dsa/algorithms/divide_and_conquer/heap_sort/__init__.py b/tests/dsa/algorithms/divide_and_conquer/heap_sort/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/divide_and_conquer/heap_sort/test_max_heap.py b/tests/dsa/algorithms/divide_and_conquer/heap_sort/test_max_heap.py new file mode 100644 index 0000000..d7b4671 --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/heap_sort/test_max_heap.py @@ -0,0 +1,40 @@ +from typing import List + +import pytest + +from dsa.algorithms.divide_and_conquer.heap_sort import MaxHeap + + +@pytest.mark.parametrize( + "elements_to_insert,expected_heap", + [[[2, 3, 4], [[2], [3, 2], [4, 3, 2]]], [[3, 1, 5], [[3], [3, 1], [5, 3, 1]]]], +) +def test_max_heap_insert(elements_to_insert: List[int], expected_heap): + max_heap = MaxHeap() + for index, element in enumerate(elements_to_insert): + max_heap.insert(element) + assert max_heap.tree == expected_heap[index] + + +# @pytest.mark.parametrize("initial_heap,number_of_deletes,expected_heap") +# def test_max_heap_delete(initial_heap: List[int], number_of_deletes, expected_heap): +# max_heap = MaxHeap.new(initial_heap) +# +# for i in range(number_of_deletes): +# max_heap.delete() +# assert max_heap.tree == expected_heap[i] + + +@pytest.mark.parametrize( + "initial_heap,number_of_deletes,expected_heap", + [ + ([60, 50, 35, 16, 15, 32, 31], 1, [50, 31, 35, 16, 15, 32]), + ([60, 50, 35, 16, 15, 32, 31], 2, [35, 31, 32, 16, 15]), + ], +) +def test_max_heap_delete(initial_heap, number_of_deletes, expected_heap): + max_heap = MaxHeap(initial_heap) + + for _i in range(number_of_deletes): + max_heap.delete() + assert max_heap.tree == expected_heap diff --git a/tests/dsa/algorithms/divide_and_conquer/merge_sort/__init__.py b/tests/dsa/algorithms/divide_and_conquer/merge_sort/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge.py b/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge.py new file mode 100644 index 0000000..2dc02ec --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge.py @@ -0,0 +1,18 @@ +from typing import List + +import pytest + +from dsa.algorithms.divide_and_conquer.merge_sort import merge + + +@pytest.mark.parametrize( + "input_list1,input_list2,expected_merged_list", + [ + ([0, 2, 4, 6], [0, 3, 6, 9], [0, 0, 2, 3, 4, 6, 6, 9]), + ([1, 2, 4, 7], [0, 3, 6, 8, 9, 10], [0, 1, 2, 3, 4, 6, 7, 8, 9, 10]), + ], +) +def test_merge( + input_list1: List[int], input_list2: List[int], expected_merged_list: List[int] +): + assert merge(input_list1, input_list2) == expected_merged_list diff --git a/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge_sort.py b/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge_sort.py new file mode 100644 index 0000000..d7bdd1b --- /dev/null +++ b/tests/dsa/algorithms/divide_and_conquer/merge_sort/test_merge_sort.py @@ -0,0 +1,50 @@ +import hypothesis.strategies as st +import pytest +from hypothesis import given + +from dsa.algorithms.divide_and_conquer.merge_sort import ( + merge_sort_recursive, + two_way_merge_sort, +) + + +@pytest.mark.parametrize( + "input_list,expected_sorted_list", + [ + ([8, 4, 11, 3, 10, 12, 32, 47], [3, 4, 8, 10, 11, 12, 32, 47]), + ([7, 4, 11, 3, 10, 12, 21, 47], [3, 4, 7, 10, 11, 12, 21, 47]), + # ([12, 7, 22, 11, 1, 10, 13], [1, 7, 10, 11, 12, 13, 22]), + ], +) +def test_merge_sort(input_list, expected_sorted_list): + assert two_way_merge_sort(input_list) == expected_sorted_list + + +@pytest.mark.parametrize( + "input_list,expected_sorted_list", + [ + ([8, 4, 11, 3, 10, 12, 32, 47], [3, 4, 8, 10, 11, 12, 32, 47]), + ([12, 7, 22, 11, 1, 10, 13], [1, 7, 10, 11, 12, 13, 22]), + ], +) +def test_merge_sort_recursive(input_list, expected_sorted_list): + assert merge_sort_recursive(input_list) == expected_sorted_list + + +@given(st.lists(st.integers())) +def test_merge_sort_recursive_sum_property(input_list): + assert sum(merge_sort_recursive(input_list)) == sum(input_list) + + +def is_identical(list_a, list_b): + if len(list_a) != len(list_b): + return False + for i in list_a: + if i not in list_b: + return False + return True + + +@given(st.lists(st.integers())) +def test_merge_sort_recursive_same_elements_property(input_list): + assert is_identical(merge_sort_recursive(input_list), input_list) diff --git a/tests/dsa/algorithms/greedy_method/__init__.py b/tests/dsa/algorithms/greedy_method/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dsa/algorithms/greedy_method/test_job_sequencing_with_deadlines.py b/tests/dsa/algorithms/greedy_method/test_job_sequencing_with_deadlines.py new file mode 100644 index 0000000..2de7b64 --- /dev/null +++ b/tests/dsa/algorithms/greedy_method/test_job_sequencing_with_deadlines.py @@ -0,0 +1,14 @@ +from dsa.algorithms.greedy_method.job_sequencing_with_deadlines import ( + Job, + sequencing_job, +) + + +def test_schedule_job(): + jobs = [ + Job(job_id=1, deadline=1, profit=10), + Job(job_id=2, deadline=3, profit=40), + Job(job_id=3, deadline=1, profit=5), + Job(job_id=4, deadline=4, profit=34), + ] + sequencing_job(jobs, number_of_slots=3) diff --git a/tests/dsa/algorithms/greedy_method/test_knapsack_problem.py b/tests/dsa/algorithms/greedy_method/test_knapsack_problem.py new file mode 100644 index 0000000..e69de29 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..79527a2 --- /dev/null +++ b/tox.ini @@ -0,0 +1,9 @@ +[flake8] +ignore = D203,W503 +exclude = .git,.venv,__pycache__,.pytest_cache,.mypy_cache,docs/source/conf.py,old,build,site,dist,.hypothesis/,.coverage,htmlcov/ +max-complexity = 10 +max-length = 127 +max-line-length = 88 +extend-ignore = E203 +strictness=short +docstring_style=sphinx