Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Varunshiyam authored Oct 27, 2024
2 parents 0c0e8ca + 5839f26 commit d38689b
Show file tree
Hide file tree
Showing 20 changed files with 9,237 additions and 0 deletions.
34 changes: 34 additions & 0 deletions Advanced_Projects/Plagiarism-checker/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

student_files = [doc for doc in os.listdir() if doc.endswith('.txt')]
student_notes = [open(_file, encoding='utf-8').read()
for _file in student_files]


def vectorize(Text): return TfidfVectorizer().fit_transform(Text).toarray()
def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])


vectors = vectorize(student_notes)
s_vectors = list(zip(student_files, vectors))
plagiarism_results = set()


def check_plagiarism():
global s_vectors
for student_a, text_vector_a in s_vectors:
new_vectors = s_vectors.copy()
current_index = new_vectors.index((student_a, text_vector_a))
del new_vectors[current_index]
for student_b, text_vector_b in new_vectors:
sim_score = similarity(text_vector_a, text_vector_b)[0][1]
student_pair = sorted((student_a, student_b))
score = (student_pair[0], student_pair[1], sim_score)
plagiarism_results.add(score)
return plagiarism_results


for data in check_plagiarism():
print(data)
3 changes: 3 additions & 0 deletions Advanced_Projects/Plagiarism-checker/fatma.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Life is all about doing your best in trying to
find what works out for you and taking most time in
trying to pursue those skills
Binary file added Advanced_Projects/Plagiarism-checker/image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions Advanced_Projects/Plagiarism-checker/john.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Life is all about finding money and spending on luxury stuffs
Coz this life is kinda short , trust
2 changes: 2 additions & 0 deletions Advanced_Projects/Plagiarism-checker/juma.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Life to me is about finding money and use it on things that makes you happy
coz this life is kinda short
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions Advanced_Projects/Plagiarism-checker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
scikit_learn==0.24.2
36 changes: 36 additions & 0 deletions Algorithms_and_Data_Structures/SearchingAlgorithms/JumpSearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import math

def jumpSearch( arr , x , n ):
# Finding block size
step = math.sqrt(n)

# Finding the block where the element is present (if it is present)
prev = 0
while arr[int(min(step, n) - 1)] < x:
prev = step
step += math.sqrt(n)
if prev >= n: # If we go beyond array bounds
return -1

# Linear search within the identified block
while arr[int(prev)] < x:
prev += 1
if prev == min(step, n): # If we reach the next block or end of the array
return -1

# If element is found
if arr[int(prev)] == x:
return prev

return -1

# Driver code to test the function
arr = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610]
x = 55
n = len(arr)

# Find the index of 'x' using Jump Search
index = jumpSearch(arr, x, n)

# Print the index where 'x' locate
print("Number", x, "is at index", "%.0f" % index)
61 changes: 61 additions & 0 deletions Algorithms_and_Data_Structures/TimeComplexity_analyzer/Checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import ast

class TimeComplexityAnalyzer(ast.NodeVisitor):
def __init__(self):
self.complexity = 0
self.loop_depth = 0

def visit_For(self, node):
self.loop_depth += 1
self.complexity += 2 ** self.loop_depth
self.generic_visit(node)
self.loop_depth -= 1

def visit_While(self, node):
self.loop_depth += 1
self.complexity += 2 ** self.loop_depth
self.generic_visit(node)
self.loop_depth -= 1

def visit_FunctionDef(self, node):
self.generic_visit(node)

def visit_Call(self, node):
self.generic_visit(node)

def visit_If(self, node):
self.generic_visit(node)

def get_complexity(self):
if self.complexity == 0:
return "O(1) - Constant Time"
elif self.complexity == 1:
return "O(log n) - Logarithmic Time"
elif self.complexity == 2:
return "O(n) - Linear Time"
elif self.complexity == 3:
return "O(n log n) - Linearithmic Time"
elif self.complexity == 4:
return "O(n^2) - Quadratic Time"
elif self.complexity == 5:
return "O(n^3) - Cubic Time"
elif self.complexity >= 6:
return f"O(n^{self.complexity}) - Polynomial Time"
return "O(2^n) - Exponential Time"

def analyze_code(code):
try:
tree = ast.parse(code)
analyzer = TimeComplexityAnalyzer()
analyzer.visit(tree)
return analyzer.get_complexity()
except SyntaxError as e:
return f"Syntax Error: {e.msg} at line {e.lineno}, column {e.offset}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"

if __name__ == "__main__":
print("Welcome to the Time Complexity Analyzer!")
user_code = input("Please enter a piece of Python code:\n")
complexity = analyze_code(user_code)
print(f"Estimated time complexity: {complexity}")
19 changes: 19 additions & 0 deletions Algorithms_and_Data_Structures/TimeComplexity_analyzer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Time Complexity Analyzer

## Overview
The Time Complexity Analyzer is a Python script designed to analyze the time complexity of user-provided Python code. By parsing the code and evaluating its structure, the program estimates the time complexity and provides a corresponding order of growth. This tool is particularly useful for developers and students looking to understand the efficiency of their algorithms.
## What Have I Done
In this project, I developed a program that leverages Python's Abstract Syntax Tree (AST) module to parse code input from the user. The program identifies loops and function definitions to estimate the time complexity based on common patterns. It provides clear feedback, including error handling for syntax errors, enhancing the user experience.

## What the Program Does
- Accepts a piece of Python code as input from the user.
- Parses the code using the AST module.
- Analyzes the structure of the code to identify loops and function calls.
- Estimates the time complexity and provides an order of growth (e.g., O(1), O(n), O(n^2)).
- Outputs detailed error messages in case of syntax issues.

## Libraries Used
- **ast**: A built-in Python library for parsing Python source code into its Abstract Syntax Tree representation.

## Conclusion
The Time Complexity Analyzer provides a straightforward and user-friendly way to estimate the efficiency of Python code. With its ability to handle various types of growth patterns and robust error handling, it serves as a valuable tool for anyone looking to improve their understanding of algorithmic efficiency. Future enhancements could include support for more complex constructs and deeper semantic analysis of code.
79 changes: 79 additions & 0 deletions Algorithms_and_Data_Structures/sorting_algorithms/Tim_Sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

# Define the minimum run size
RUN = 32

# Function - perform insertion sort on subarray
def insertion_sort(arr, left, right):
for i in range(left + 1, right + 1):
temp = arr[i]
j = i - 1
while j >= left and arr[j] > temp:
arr[j + 1] = arr[j]
j -= 1
arr[j + 1] = temp

# Function to merge two sorted subarrays
def merge(arr, l, m, r):
# Create temporary arrays to hold the two halves
len1, len2 = m - l + 1, r - m
left = arr[l:l + len1]
right = arr[m + 1:m + 1 + len2]

i, j, k = 0, 0, l

# Merge the left and right arrays
while i < len1 and j < len2:
if left[i] <= right[j]:
arr[k] = left[i]
i += 1
else:
arr[k] = right[j]
j += 1
k += 1

# Copy remaining elements from the left array
while i < len1:
arr[k] = left[i]
i += 1
k += 1

# Copy remaining elements from the right array
while j < len2:
arr[k] = right[j]
j += 1
k += 1

# Function to perform TimSort
def tim_sort(arr):
n = len(arr)

# Sort individual subarrays of size RUN
for i in range(0, n, RUN):
insertion_sort(arr, i, min((i + RUN - 1), (n - 1)))

# Start merging from size RUN. Merge subarrays in bottom-up manner
size = RUN
while size < n:
for left in range(0, n, 2 * size):
mid = left + size - 1
right = min((left + 2 * size - 1), (n - 1))
if mid < right:
merge(arr, left, mid, right)
size *= 2

# Function to print the array
def print_array(arr):
for i in range(len(arr)):
print(arr[i], end=" ")
print()

# Driver code
if __name__ == "__main__":
arr = [40, 12, 31, 27, 25, 8, 1, 32, 17]
print("Before sorting array elements are - ")
print_array(arr)

tim_sort(arr)

print("\nAfter sorting array elements are - ")
print_array(arr)
44 changes: 44 additions & 0 deletions Cybersecurity_Tools/Web Application Firewall/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## **Web Application Firewall**

### 🎯 **Goal**

The goal of this code is to create a Flask-based web application designed to process user input securely by preventing common web security threats.

### 🧵 **Dataset**

N/A

### 🧾 **Description**

This Python code is a Flask-based web application designed to process user input securely by preventing common web security threats like SQL Injection and Cross-Site Scripting (XSS). It uses pattern matching with regular expressions to detect these threats in the input. Additionally, it has a rate-limiting feature, limiting the number of requests from a single user to prevent abuse (DoS attacks).

### 🧮 **What I had done!**

1. Threat Detection: Implemented pattern matching using regular expressions to detect potential SQL injection and XSS attacks in user input.
2. Input Sanitization: By using the html.escape() function, ensured that any harmful HTML characters are escaped, preventing malicious scripts from being executed in the browser.
3. Rate Limiting: Integrated Flask-Limiter to impose rate limits (e.g., 10 requests per minute) on incoming requests, protecting the app from abuse such as denial of service (DoS) attacks.
4. Logging: You set up logging to record any detected security events, such as SQL injection attempts or XSS attacks. This helps with monitoring and auditing the security of the application.

### 🚀 **Models Implemented**

N/A

### 📚 **Libraries Needed**

1. `Flask` : It is a lightweight web framework used to handle HTTP requests and responses, providing routes like /submit for user interaction.
2. `Flask-Limiter` : Provides rate-limiting to protect the application from abuse, such as a Denial of Service (DoS) attack by limiting the number of requests a user can make.

### 📊 **Exploratory Data Analysis Results**

N/A.

### 📈 **Performance of the Models based on the Accuracy Scores**

N/A.

### 📢 **Conclusion**

This Flask-based web application effectively implements basic cybersecurity protections by detecting and preventing common threats like SQL Injection and Cross-Site Scripting (XSS) through pattern matching and input sanitization. The addition of rate-limiting safeguards the application from excessive requests, helping to mitigate DoS attacks. The app also incorporates robust logging to track security events, making it a simple yet effective solution for securing user input and enhancing web application security.

**Deanne Vaz**
[GitHub](https://github.com/djv554) | | [LinkedIn](https://www.linkedin.com/in/deanne-vaz/)
50 changes: 50 additions & 0 deletions Cybersecurity_Tools/Web Application Firewall/waf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import re
import logging
from flask import Flask, request, jsonify
from html import escape
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address

app = Flask(__name__)
limiter = Limiter(app, key_func=get_remote_address)
logging.basicConfig(filename='security.log', level=logging.INFO)

SQL_INJECTION_PATTERN = re.compile(
r"(?:--|;|'|\"|OR|AND|SELECT|INSERT|DELETE|UPDATE|DROP|UNION|#|/\|\/|CHAR|HEX)", re.IGNORECASE)
XSS_PATTERN = re.compile(r"(<script.?>|<.?on[a-zA-Z]+\s*=|javascript:|data:text/html)", re.IGNORECASE)

def is_safe(input_string):
if SQL_INJECTION_PATTERN.search(input_string):
logging.warning(f"SQL Injection detected: {input_string}")
return False, "Potential SQL Injection detected."
if XSS_PATTERN.search(input_string):
logging.warning(f"XSS attack detected: {input_string}")
return False, "Potential XSS detected."
return True, ""

@app.route('/submit', methods=['POST'])
@limiter.limit("10 per minute")
def submit():
try:
data = request.json
if not data or "user_input" not in data:
return jsonify({"error": "Invalid input. Please provide valid JSON."}), 400

user_input = data.get("user_input", "")
if len(user_input) > 1000:
return jsonify({"error": "Input too long."}), 413

safe_input = escape(user_input)
is_safe_input, reason = is_safe(safe_input)
if not is_safe_input:
return jsonify({"error": reason}), 400

logging.info(f"Safe input processed: {safe_input}")
return jsonify({"message": "Input processed successfully!"}), 200

except Exception as e:
logging.error(f"Error processing request: {e}")
return jsonify({"error": "An error occurred processing the request."}), 500

if __name__ == '_main_':
app.run(debug=True)
Loading

0 comments on commit d38689b

Please sign in to comment.