diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..ef5565a1 Binary files /dev/null and b/.DS_Store differ diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..93292e52 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,10 @@ +[run] +omit = + *tests.py + +[report] +exclude_lines = + pragma: no cover + def __repr__ + raise NotImplementedError + if __name__ == .__main__.: \ No newline at end of file diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 00000000..869f93fd --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1,2 @@ +repo_token: 5dQu9sYVNc28Qy8pw1CCOL8IN2ck3NIaU +service_name: travis-ci \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..b7353733 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,35 @@ +--- +name: Bug report +about: Create a report to help us improve + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..066b2d92 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest an idea for this project + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.gitignore b/.gitignore index b92e7ba2..c9f0d3cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,28 @@ +# Module Ignores +modules/*.pyc +modules/__init__.py modules/__pycache__/ +modules/.pytest_cache +modules/.ropeproject/ +modules/lib/*.so + +# Tests Ignores +tests/.pytest_cache +tests/*.pyc +tests/__pycache__/ +tests/.ropeproject/ + +# Cache .cache +*.pyc +.pytestc* +.pytest_cache +__pycache* +__pycache__/ + +# Misc +torBot .*.swp .ropeproject/ -modules/.ropeproject/ -tests/__pycache__/ -modules/__init__.py .idea/ -tests/.ropeproject/ +.DS_Store diff --git a/.hound.yml b/.hound.yml new file mode 100644 index 00000000..19b091bf --- /dev/null +++ b/.hound.yml @@ -0,0 +1,2 @@ +flake8: + enabled: true \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 0f74ecd8..09664b5d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,12 +3,12 @@ dist: trusty language: python cache: pip3 python: - - "3.5" + - "3.6" # command to install dependencies install: - sudo apt-get -y install python3-pip - pip3 install -r requirements.txt - - cd tests + - cd modules/tests script: - pytest notifications: diff --git a/CHANGELOG.md b/CHANGELOG.md index b7e46b8e..fe482586 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,20 +2,46 @@ -------------------- All notable changes to this project will be documented in this file. -## 1.2.0 | Present (Stable) +## 1.3.0 | Present ### Changed + +* Major code improvements +* Updated README.md +* Updated dependencies +* Refactored TorBot + +### Added + +* Visualizer Module +* Download option to save Tree into different formats. +* DB module +* Installation shell script to create torBot binary +* Test for getting links that uses a Mock Object to reproduce tests without touching actual servers. +* BFS algorithm for crawling +* Documentation for functions + +### Removed + +* -l Live argument Removed (Enabled by default) + + +## 1.2.0 | Nov 16, 2017 - Oct 19, 2018 + +### Changed + * Major code improvements * Pep 8 Standard * Tests * Library changes ### Added + * Documentation * Save to JSON * Testcase for Save to JSON -## 1.0.1 - July 6, 2017 - Nov 16 +## 1.0.1 - July 6, 2017 - Nov 16, 2017 ### Added @@ -25,7 +51,7 @@ All notable changes to this project will be documented in this file. * Refactored code to meet Google Style Docstrings for documentation * Fixed error occuring while using -i flag -## 1.0.0 - Jun 28, 2017 - July 5 +## 1.0.0 - Jun 28, 2017 - July 5, 2017 ### Added * Argument parser support @@ -37,7 +63,7 @@ All notable changes to this project will be documented in this file. * Default Live url checker -## 0.0.3 - Jun 18, 2017 - Jun 27 +## 0.0.3 - Jun 18, 2017 - Jun 27, 2017 ### Added @@ -47,7 +73,7 @@ All notable changes to this project will be documented in this file. * CODE_OF_CONDUCT * Travis CI integration -## 0.0.2 - Jun 13, 2017 - Jun 17 +## 0.0.2 - Jun 13, 2017 - Jun 17, 2017 ### Added @@ -60,7 +86,7 @@ All notable changes to this project will be documented in this file. * Testcase for get email -## 0.0.1 - May 17, 2017 - Jun 3 +## 0.0.1 - May 17, 2017 - Jun 3, 2017 ### Added diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 00000000..d0511ba4 --- /dev/null +++ b/FAQ.md @@ -0,0 +1 @@ +## FAQ diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..53e5b304 --- /dev/null +++ b/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,12 @@ +Issue # + +### Changes Proposed +- +- +- + +### Explanation of Changes + + + +### Screenshots of new feature/change diff --git a/README.md b/README.md index 9f3ae36a..60608b57 100755 --- a/README.md +++ b/README.md @@ -1,55 +1,33 @@
- - - ████████╗ ██████╗ ██████╗ ██████╗ ██████╗ ████████╗ - ╚══██╔══╝██╔═══██╗██╔══██╗ ██╔══██╗██╔═████╗╚══██╔══╝ - ██║ ██║ ██║██████╔╝ ██████╔╝██║██╔██║ ██║ - ██║ ██║ ██║██╔══██╗ ██╔══██╗████╔╝██║ ██║ - ██║ ╚██████╔╝██║ ██║ ██████╔╝╚██████╔╝ ██║ - ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ - - - - `.` ` - ``.:.--.` - .-+++/-` - `+sso:` - `` /yy+. - -+.oho. - o../+y - -s.-/:y:` - .:o+-`--::oo/-` - `/o+:.```---///oss+- - .+o:.``...`-::-+++++sys- - :y/```....``--::-yooooosh+ - -h-``--.```..-:-::ssssssssd+ - h:``:.``....`--:-++hsssyyyym. - .d.`/.``--.```:--//odyyyyyyym/ - `d.`+``:.```.--/-+/smyyhhhhhm: - os`./`/````/`-/:+oydhhhhhhdh` - `so.-/-:``./`.//osmddddddmd. - /s/-/:/.`/..+/ydmdddddmo` - `:oosso/:+/syNmddmdy/. - `-/++oosyso+/.` - - - ██████╗ ███████╗██████╗ ███████╗██████╗ ██████╗ ██╗███╗ ██╗███████╗██╗██████╗ ███████╗ - ██╔══██╗██╔════╝██╔══██╗██╔════╝╚════██╗██╔════╝ ██║████╗ ██║██╔════╝██║██╔══██╗██╔════╝ - ██║ ██║█████╗ ██║ ██║███████╗ █████╔╝██║ ██║██╔██╗ ██║███████╗██║██║ ██║█████╗ - ██║ ██║██╔══╝ ██║ ██║╚════██║ ╚═══██╗██║ ██║██║╚██╗██║╚════██║██║██║ ██║██╔══╝ - ██████╔╝███████╗██████╔╝███████║██████╔╝╚██████╗ ██║██║ ╚████║███████║██║██████╔╝███████╗ - ╚═════╝ ╚══════╝╚═════╝ ╚══════╝╚═════╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚═╝╚═════╝ ╚══════╝ - - + ████████╗ ██████╗ ██████╗ ██████╗ ██████╗ ████████╗ + ╚══██╔══╝██╔═══██╗██╔══██╗ ██╔══██╗██╔═████╗╚══██╔══╝ + ██║ ██║ ██║██████╔╝ ██████╔╝██║██╔██║ ██║ + ██║ ██║ ██║██╔══██╗ ██╔══██╗████╔╝██║ ██║ + ██║ ╚██████╔╝██║ ██║ ██████╔╝╚██████╔╝ ██║ + ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ +- -## A python web crawler for Deep and Dark Web. -[![Build Status](https://travis-ci.org/DedSecInside/TorBoT.svg?branch=master)](https://travis-ci.org/DedSecInside/TorBoT) +[![Build Status](https://travis-ci.org/DedSecInside/TorBot.svg?branch=dev)](https://travis-ci.org/DedSecInside/TorBoT) [![](https://img.shields.io/badge/Donate-Bitcoin-blue.svg?style=flat)](https://blockchain.info/address/14st7SzDbQZuu8fpQ74x477WoRJ7gpHFaj) [![](https://img.shields.io/badge/Built%20with-❤-orange.svg?style=flat)]() [![](https://img.shields.io/badge/Made%20with-Python-red.svg?style=flat)]() +## OSINT tool for Deep and Dark Web. + +Open-source intelligence offers value in information security decision making through knowledge of threats and malicious activities that potentially impact business. Open-source intelligence using the internet is common, however, using the darknet is less common for the typical cybersecurity analyst. The challenges to using the darknet for open-source intelligence includes using specialized collection, processing, and analysis tools. TorBot is an open source intelligence tool developed in python. The main objective of this project is to collect open data from the deep web (aka dark web) and with the help of data mining algorithms, collect as much information as possible and produce an interactive tree graph. The interactive tree graph module will be able to display the relations of the collected intelligence data. + +### Motivation + + The idea of developing an open source intelligence tool like TorBot emerged from the deep web itself. Crawling a collection of web pages which has high anonymity and complex data encryption without an index is a tedious task. The crawler in TorBot has to be designed in such a way that the links are identified from a webpage (any webpage) and other links are identified and crawled recursively, then combining all these links to form an index. Each link is then crawled for more links and emails for intelligence information. Unlike surface web discovery tool, a deep web discovery tool is limited for both general and domain-specific search. + + Extensive use of Dark web for communication of terrorism-related information makes it a challenge for Law Enforcement Agencies. TorBot should be able to monitor such illegal activities that are happening in this encrypted network. Therefore, this tool will be able to ease the task of finding such activities by an intelligence group or researchers, thus making this the main objective of TorBot. + + +### Objective + +Though the main objective of TorBot is to identify illegal activities in the deep web and visualize the relations of data collected using a graph, there are several ways this tool can be useful. Like for example, TorBot will be able to crawl and create an index for the deep web. This index can be stored in a database or a JSON file for future use. Other features like live checker will be able to check whether a web address is alive or dead. This is important as the deep web links are constantly changed for privacy and security reasons. For researchers and security enthusiasts the TorBot can be used for checking basic vulnerabilities in a dark web page. + ### Working Procedure/Basic Plan The basic procedure executed by the web crawling algorithm takes a list of seed URLs as its input and repeatedly executes @@ -65,34 +43,41 @@ the following steps: 8. After all URLs are processed, return the most relevant page. ### Features -1. Crawls Tor links (.onion).(Completed) -2. Returns Page title and address with a short description about the site.(Not Started) -3. Save links to database.(Not Started) +1. Onion Crawler (.onion).(Completed) +2. Returns Page title and address with a short description about the site.(Partially Completed) +3. Save links to database.(PR to be reviewed) 4. Get emails from site.(Completed) 5. Save crawl info to JSON file.(Completed) 6. Crawl custom domains.(Completed) -7. Check if the link is live.(Not Started) +7. Check if the link is live.(Completed) 8. Built-in Updater.(Completed) +9. Visualizer module.(Not started) +10. Social Media integration.(not Started) ...(will be updated) ## Contribute -Contributions to this project are always welcome. -To add a new feature fork the dev branch and give a pull request when your new feature is tested and complete. -If its a new module, it should be put inside the modules directory and imported to the main file. +Contributions to this project are always welcome. +To add a new feature fork the dev branch and give a pull request when your new feature is tandested and complete. +If its a new module, it should be put inside the modules directory. The branch name should be your new feature name in the format
-`usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] +usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] [-l] [-i] optional arguments: @@ -127,24 +122,54 @@ optional arguments: Read more about torrc here : [Torrc](https://github.com/DedSecInside/TorBoT/blob/master/Tor.md) ## TO-DO -- [ ] Implement A\* Search for webcrawler +- [ ] Visualization Module +- [x] Implement BFS Search for webcrawler +- [X] Multithreading for Get Links +- [ ] Improve stability (Handle errors gracefully, expand test coverage and etc.) +- [ ] Create a user-friendly GUI +- [ ] Randomize Tor Connection (Random Header and Identity) +- [ ] Keyword/Phrase search +- [ ] Social Media Integration +- [ ] Increase anonymity and efficiency ### Have ideas? If you have new ideas which is worth implementing, mention those by starting a new issue with the title [FEATURE_REQUEST]. If the idea is worth implementing, congratz you are now a contributor. +## Related Works +OSINT and the Dark Web: The Dark Web has proven a very useful and reliable tool in the hands of individuals wishing to be involved in illegal, criminal or terrorist activities, setting sight on getting great economic or political benefits without being identified from government authorities and security agencies world-wide. To this end, LEAs need to become more agile when dealing with criminality on the Dark Web, and in particular on its Hidden Service Markets, and need to invest in new training and technology, if not to get ahead of the criminals, then at least to keep pace[1]. + +Using TOR for Open Source Intelligence: Although the use of Tor for OSINT does not raise specific legal concerns, there are a few interesting arguments that have been raised about using OSINT in general. One of them touches on the Council of Europe’s Convention on Cybercrime. Article 32 (a) of the Convention regulates transborder access to stored computer data with respect to ‘publicly available (open source) stored computer data, regardless of where the data is located geographically’[2]. + +OSINT in Social Networks: In summary, they examined and compared the needs of the Open Source Intelligence community with what social media has to offer investigators. They observed that a friends list of a given individual is a useful starting point for launching an investigation but found that several technical limitations (privacy and platform restrictions and data availability and longevity) may prevent investigators from accessing friend list information of a target account. They address privacy restrictions for the particular case of friends by creating a private friend discovery algorithm with hunter-seeker behaviours[3]. + +Data Mining in The Dark: This paper successfully explored an open-source intelligence automation toolset that scanned across the darknet. It described and shared the tools, process, and techniques to build a secure darknet connection, and then collected, processed, stored, and analysed data. This paper showed the viability of darknet open-source intelligence using the completed toolset. In the end, the toolset finds entities and links entities from the darknet thereby showing strong potential to aid the open source intelligence professional[4]. + +### References + + [1] B. Akhgar, P. S. Bayerl, and F. Sampson, Open Source Intelligence Investigation. From strategy to implementation. 2016. + [2] T. Minárik and A.-M. Osula, “Tor does not stink: Use and abuse of the Tor anonymity network from the perspective of law,” Comput. Law Secur. Rev., vol. 32, no. 1, pp. 111–127, Feb. 2016. + [3] Benjamin Robert Holland, “Enabling Open Source Intelligence (OSINT) in private social networks,” 2012. + [4] Brian Nafziger, “Data Mining in the Dark: Darknet Intelligence Automation,” Secur. Home IoT Netw., no. Security 401, pp. 1–32, 2017. + [5] A. R. Behjat, A. Mustapha, H. Nezamabadi-Pour, M. N. Sulaiman, and N. Mustapha, “A New Binary Particle Swarm Optimization for Feature Subset Selection with Support Vector Machine,” in Recent Advances on Soft Computing and Data Mining, 2014, pp. 47–57. + [6] H. Parmar, S. Bhanderi, and G. Shah, “Sentiment Mining of Movie Reviews using Random Forest with Tuned Hyperparameters.” 2014. + + ## License GNU Public License ## CREDITS - [X] [P5N4PPZ](https://github.com/PSNAppz) - Owner +- [X] [KingAkeem](https://github.com/KingAkeem) - Experienced Contributor,Reviewer,Core Member - [X] [agrepravin](https://github.com/agrepravin) - Contributor,Reviewer +- [X] [shivankar-madaan](https://github.com/shivankar-madaan) - Experienced Contributor - [X] [y-mehta](https://github.com/y-mehta) - Contributor - [X] [Manfredi Martorana](https://github.com/Agostinelli) - Contributor -- [X] [KingAkeem](https://github.com/KingAkeem) - Contributor - [X] [Evan Sia Wai Suan](https://github.com/waisuan) - New Contributor - +- [X] [Lean](https://github.com/leaen) - New Contributor +- [X] [Gus](https://github.com/HotPushUpGuy420) - New Contributor +- [X] [SubaruSama](https://github.com/SubaruSama) - New Contributor +- [X] [robly78746](https://github.com/robly78746) - New Contributor ![](https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Opensource.svg/200px-Opensource.svg.png) - diff --git a/install.sh b/install.sh new file mode 100755 index 00000000..93dc2ae1 --- /dev/null +++ b/install.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Makes directory for dependencies and executable to be installed +mkdir -p tmp_build +mkdir -p tmp_dist + +pip install pyinstaller + +# Creates executable file and sends dependences to the recently created directories +pyinstaller --onefile --workpath ./tmp_build --distpath ./tmp_dist torBot.py + +# Puts the executable in the current directory +mv tmp_dist/torBot . + +# Removes both directories and unneeded file +rm -r tmp_build tmp_dist +rm torBot.spec diff --git a/tests/__init__.py b/modules/__init__.py similarity index 100% rename from tests/__init__.py rename to modules/__init__.py diff --git a/modules/analyzer.py b/modules/analyzer.py new file mode 100644 index 00000000..e8d536b9 --- /dev/null +++ b/modules/analyzer.py @@ -0,0 +1,130 @@ +""" +Module is used for analyzing link relationships +""" +from requests.exceptions import HTTPError + +from ete3 import Tree, TreeStyle, TextFace, add_face_to_node +from .link import LinkNode +from .utils import multi_thread + +class LinkTree: + """ + This is a class that represents a tree of links within TorBot. This can be used to build a tree, + examine the number of nodes, if a node exists within a tree, displaying the tree and + downloading the tree, and will be expanded in the future to meet further needs. + + Attributes: + root (str): Represents root link + tld (bool): Decides whether or not to use additional top-level-domains besides .tor + stop_depth (int): Depth of which to stop searching for links + """ + def __init__(self, root_node, *, stop_depth=1): + self._tree = build_tree(root_node, stop=stop_depth) + + def __len__(self): + return len(self._tree) + + def __contains__(self, link): + return self._tree.search_nodes(name=link) + + def save(self, file_name): + """ + Saves LinkTree to file with given file_name + Current file types supported are .png, .pdf, .svg + + Args: + file_name (str): Name of file being saved to + """ + style = TreeStyle() + style.show_leaf_name = False + def my_layout(node): + node_style = TextFace(node.name, tight_text=True) + add_face_to_node(node_style, node, column=0, position='branch-bottom') + style.layout_fn = my_layout + self._tree.render(file_name, tree_style=style) + + def show(self): + """ + Allows user to quickly view LinkTree + """ + style = TreeStyle() + style.show_leaf_name = False + def my_layout(node): + node_style = TextFace(node.name, tight_text=True) + add_face_to_node(node_style, node, column=0, position='branch-bottom') + style.layout_fn = my_layout + self._tree.show(tree_style=style) + + +def initialize_tree(root_node): + """ + Creates root of tree + Args: + link (str): link node to be used as root + tld (bool): Additional top-level-domains + Returns: + root (ete3.Tree): root node of tree + to_visit (list): Children of root node + """ + root = Tree(name=root_node.name) + children = root_node.links + return root, children + + +def build_tree(link=None, *, stop=1, rec=0, to_visit=None, tree=None): + """ + Builds tree using Breadth First Search. You can specify stop depth. + Rec & tree arguments are used for recursion. + + *NOTE: This function uses a GET request for each url found, this can + be very expensive so avoid if possible try to acquire the urls to + be traversed and use bfs function. + + Args: + link (str): root node + tld (boolean): specifies if all top-level-domains will be allowed or not + stop (int): stops traversing at this depth if specified + rec (int): used for recursion + tree (ete3.Tree): a tree node used for recursion + + Returns: + tree (ete3.Tree): built tree + """ + if rec == 0: + tree, to_visit = initialize_tree(link) + + sub_tree = Tree(name=tree.name) + + if rec == stop: + # If recursion is 0 then sub_tree will be root + return sub_tree if rec == 0 else tree + + def visit_nodes(link): + children_to_visit = list() + try: + node = LinkNode(link) + except (ValueError, ConnectionError, HTTPError): + return None + + link_node = sub_tree.add_child(name=node.name) + link_children = node.links + # No need to find children if we aren't going to visit them + if stop != rec + 1: + for child in link_children: + link_node.add_child(name=child) + children_to_visit.append(child) + + if stop != rec + 1: + return children_to_visit + + return to_visit + + next_nodes = multi_thread(to_visit, visit_nodes) + rec += 1 + + # If we've reached stop depth then return tree + if stop == rec: + return sub_tree + + new_tree = tree.add_child(sub_tree) + return build_tree(to_visit=next_nodes, stop=stop, rec=rec, tree=new_tree) diff --git a/modules/bcolors.py b/modules/bcolors.py deleted file mode 100644 index 78b05842..00000000 --- a/modules/bcolors.py +++ /dev/null @@ -1,14 +0,0 @@ -class Bcolors: - - def __init__(self): - self.HEADER = '\033[95m' - self.OKBLUE = '\033[94m' - self.OKGREEN = '\033[92m' - self.WARNING = '\033[93m' - self.FAIL = '\033[91m' - self.ENDC = '\033[0m' - self.BOLD = '\033[1m' - self.UNDERLINE = '\033[4m' - self.WHITE = '\033[97m' - self.On_Black = '\033[40m' - self.On_Red = '\033[41m' diff --git a/modules/color.py b/modules/color.py new file mode 100644 index 00000000..493712dc --- /dev/null +++ b/modules/color.py @@ -0,0 +1,46 @@ + +""" +Module containing class with colors +""" + +COLORS = { + 'white': "\033[1;37m", + 'yellow': "\033[1;33m", + 'green': "\033[1;32m", + 'blue': "\033[1;34m", + 'cyan': "\033[1;36m", + 'red': "\033[1;31m", + 'magenta': "\033[1;35m", + 'black': "\033[1;30m", + 'darkwhite': "\033[0;37m", + 'darkyellow': "\033[0;33m", + 'darkgreen': "\033[0;32m", + 'darkblue': "\033[0;34m", + 'darkcyan': "\033[0;36m", + 'darkred': "\033[0;31m", + 'darkmagenta':"\033[0;35m", + 'darkblack': "\033[0;30m", + 'end': "\033[0;0m" +} + +class color: + """ + Class that contains colors used for TorBot in terminal and a method + that adds color to a string + + Attributes: + message (string): message to be wrapped in color + selected (string): color to be displayed + """ + def __init__(self, message, selected): + self._msg = message + self._color = COLORS[selected] + + def __str__(self): + return self._color + self._msg + COLORS['end'] + + def __add__(self, other): + return str(self) + other + + def __radd__(self, other): + return other + str(self) diff --git a/modules/getemails.py b/modules/getemails.py deleted file mode 100644 index 4b9c03dc..00000000 --- a/modules/getemails.py +++ /dev/null @@ -1,41 +0,0 @@ -from modules.bcolors import Bcolors -from bs4 import BeautifulSoup - - -def getMails(soup): - - """ - Searches for tags for links then checks if link contains the - substring 'mailto' indicating that it's an email. If it is determined - to be an email then the link is split and the username is appeneded to - the list - - Args: - soup: BeautifulSoup isntance that will be used for parsing - - Returns: - emails: list of email IDs - """ - b_colors = Bcolors() - - if isinstance(type(soup), type(BeautifulSoup)): - - emails = [] - links = soup.find_all('a') - for ref in links: - url = ref.get('href') - if url and 'mailto' in url: - """Split email address on""" - email_addr = url.split(':') - if (len(email_addr) > 1): - emails.append(email_addr[1]) - - """Pretty print output as below""" - print ('') - print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails))) - print ('-------------------------------') - - return emails - - else: - raise('Method parameter is not of instance BeautifulSoup') diff --git a/modules/getweblinks.py b/modules/getweblinks.py deleted file mode 100644 index 21471f0e..00000000 --- a/modules/getweblinks.py +++ /dev/null @@ -1,112 +0,0 @@ -import re -import requests -import tldextract - -from bs4 import BeautifulSoup -from modules.bcolors import Bcolors -from requests.exceptions import ConnectionError, HTTPError - - -def valid_url(url, extensions=False): - """Checks for any valid url using regular expression matching - - Matches all possible url patterns with the url that is passed and - returns True if it is a url and returns False if it is not. - - Args: - url: string representing url to be checked - - Returns: - bool: True if valid url format and False if not - """ - pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.([a-z, A-Z]+)(.*)" - regex = re.compile(pattern) - if not extensions: - if regex.match(url): - return True - return False - - parts = tldextract.extract(url) - valid_sites = list() - for ext in extensions: - if regex.match(url) and '.'+parts.suffix in ext: - valid_sites.append(url) - return valid_sites - - -def valid_onion_url(url): - """Checks for valid onion url using regular expression matching - - Only matches onion urls - - Args: - url: string representing url to be checked - - Returns: - bool: True if valid onion url format, False if not - """ - pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)" - regex = re.compile(pattern) - if regex.match(url): - return True - return False - - -def get_link_status(link, colors): - """Generator that yields links as they come - - Uses head request because it uses less bandwith than get and timeout is - set to 10 seconds and then link is automatically declared as dead. - - Args: - link: link to be tested - colors: object containing colors for link - - Yields: - string: link with either no color or red which indicates failure - """ - - try: - resp = requests.head(link, timeout=10) - resp.raise_for_status() - yield '\t'+link - except (ConnectionError, HTTPError): - yield '\t'+colors.On_Red+link+colors.ENDC - - -def getLinks(soup, ext=False, live=False): - """ - Searches through all (hyperlinks) tags and stores them in a - list then validates if the url is formatted correctly. - - Args: - soup: BeautifulSoup instance currently being used. - - Returns: - websites: List of websites that were found - """ - b_colors = Bcolors() - if isinstance(soup, BeautifulSoup): - websites = [] - - links = soup.find_all('a') - for ref in links: - url = ref.get('href') - if ext: - if url and valid_url(url, ext): - websites.append(url) - else: - if url and valid_onion_url(url): - websites.append(url) - - """Pretty print output as below""" - print(''.join((b_colors.OKGREEN, - 'Websites Found - ', b_colors.ENDC, str(len(websites))))) - print('------------------------------------') - - for link in websites: - print(next(get_link_status(link, b_colors))) - return websites - - else: - raise(Exception('Method parameter is not of instance BeautifulSoup')) diff --git a/modules/info.py b/modules/info.py index a861cd54..4cddf494 100644 --- a/modules/info.py +++ b/modules/info.py @@ -1,30 +1,38 @@ -import requests - from urllib.parse import urlsplit +from bs4 import BeautifulSoup from termcolor import cprint +from requests.exceptions import HTTPError +import requests + +from .link_io import LinkIO + -def executeAll(target): - try: - get_robots_txt(target) - except Exception: - cprint("No robots.txt file Found!""blue") - try: - get_dot_git(target) - except Exception: - cprint("Error !""red") - try: - get_dot_svn(target) - except Exception: - cprint("Error""red") - try: - get_dot_htaccess(target) - except Exception: - cprint("Error""red") +def execute_all(link, *, display_status=False): + page, response = LinkIO.read(link, response=True, show_msg=display_status) + soup = BeautifulSoup(page, 'html.parser') + validation_functions = [get_robots_txt, get_dot_git, get_dot_svn, get_dot_git] + for validate_func in validation_functions: + try: + validate_func(link) + except (ConnectionError, HTTPError): + cprint('Error', 'red') + + display_webpage_description(soup) + display_headers(response) + + +def display_headers(response): + print(''' + RESPONSE HEADERS + __________________ + ''') + for key, val in response.headers.items(): + print('*', key, ':', val) def get_robots_txt(target): - cprint("[*]Checking for Robots.txt"'yellow') + cprint("[*]Checking for Robots.txt", 'yellow') url = target target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) requests.get(target+"/robots.txt") @@ -32,42 +40,52 @@ def get_robots_txt(target): def get_dot_git(target): - cprint("[*]Checking for .git folder"'yellow') + cprint("[*]Checking for .git folder", 'yellow') url = target target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) req = requests.get(target+"/.git/") - r = req.status_code - if r == 200: - cprint("Alert!"'red') - cprint(".git folder exposed publicly"'red') + status = req.status_code + if status == 200: + cprint("Alert!", 'red') + cprint(".git folder exposed publicly", 'red') else: - print("NO .git folder found"'blue') + cprint("NO .git folder found", 'blue') def get_dot_svn(target): - cprint("[*]Checking for .svn folder"'yellow') + cprint("[*]Checking for .svn folder", 'yellow') url = target target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) req = requests.get(target+"/.svn/entries") - r = req.status_code - if r == 200: - cprint("Alert!"'red') - cprint(".SVN folder exposed publicly"'red') + status = req.status_code + if status == 200: + cprint("Alert!", 'red') + cprint(".SVN folder exposed publicly", 'red') else: - cprint("NO .SVN folder found"'blue') + cprint("NO .SVN folder found", 'blue') def get_dot_htaccess(target): - cprint("[*]Checking for .htaccess"'yellow') + cprint("[*]Checking for .htaccess", 'yellow') url = target target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) req = requests.get(target+"/.htaccess") statcode = req.status_code if statcode == 403: - cprint("403 Forbidden"'blue') + cprint("403 Forbidden", 'blue') elif statcode == 200: - cprint("Alert!!"'blue') - cprint(".htaccess file found!"'blue') + cprint("Alert!!", 'blue') + cprint(".htaccess file found!", 'blue') else: - cprint("Status code"'blue') + cprint("Status code", 'blue') cprint(statcode) + + +def display_webpage_description(soup): + cprint("[*]Checking for description meta tag", 'yellow') + metatags = soup.find_all('meta') + for meta in metatags: + if meta.has_attr('name'): + attributes = meta.attrs + if attributes['name'] == 'description': + cprint("Page description: " + attributes['content']) diff --git a/modules/link.py b/modules/link.py new file mode 100644 index 00000000..b8d6677d --- /dev/null +++ b/modules/link.py @@ -0,0 +1,122 @@ +""" + +This module is used to create a LinkNode that can be consumued by a LinkTree +and contains useful Link methods + +""" +import requests +import requests.exceptions +import validators + +from bs4 import BeautifulSoup +from .utils import multi_thread +from .color import color + +def get_emails(node): + """Finds all emails associated with node + + Args: + node (LinkNode): node used to get emails from + Returns: + emails (list): list of emails + """ + emails = [] + for child in node.children: + link = child.get('href') + if link and 'mailto' in link: + email_addr = link.split(':') + if LinkNode.valid_email(email_addr[1]) and len(email_addr) > 1: + emails.append(email_addr[1]) + return emails + + +def get_links(node): + """Finds all links associated with node + + Args: + node (LinkNode): node used to get links from + Returns: + links (list): list of links + """ + def retrieve_link(child): + link = child.get('href') + if link and LinkNode.valid_link(link): + return link + return None + + return multi_thread(node.children, retrieve_link) + + +class LinkNode: + """Represents link node in a link tree + + Attributes: + link (str): link to be used as node + """ + + def __init__(self, link): + # If link has invalid form, throw an error + if not self.valid_link(link): + raise ValueError("Invalid link format.") + + self._children = [] + self._emails = [] + self._links = [] + + # Attempts to connect to link, throws an error if link is unreachable + try: + self.response = requests.get(link) + except (requests.exceptions.ChunkedEncodingError, + requests.exceptions.HTTPError, + requests.exceptions.ConnectionError, + ConnectionError) as err: + raise err + + self._node = BeautifulSoup(self.response.text, 'html.parser') + if not self._node.title: + self.name = "TITLE NOT FOUND" + self.status = color(link, 'yellow') + else: + self.name = self._node.title.string + self.status = color(link, 'green') + + @property + def emails(self): + """ + Getter for node emails + """ + if not self._emails: + self._emails = get_emails(self) + return self._emails + + @property + def links(self): + """ + Getter for node links + """ + if not self._links: + self._links = get_links(self) + return self._links + + @property + def children(self): + """ + Getter for node children + """ + if not self._children: + self._children = self._node.find_all('a') + return self._children + + @staticmethod + def valid_email(email): + """Static method used to validate emails""" + if validators.email(email): + return True + return False + + @staticmethod + def valid_link(link): + """Static method used to validate links""" + if validators.url(link): + return True + return False diff --git a/modules/link_io.py b/modules/link_io.py new file mode 100644 index 00000000..0639ba07 --- /dev/null +++ b/modules/link_io.py @@ -0,0 +1,102 @@ +""" +This module is used for reading HTML pages using either bs4.BeautifulSoup objects or url strings +""" +import requests.exceptions +from bs4 import BeautifulSoup + +from .link import LinkNode +from .utils import multi_thread +from .color import color + +class LinkIO: + """ + This class is only used to interact with links + """ + @staticmethod + def display_children(root): + """ + Static method to display status of child nodes + + Args: + root (LinkNode): root of children to be displayed + """ + sucess_msg = color(f'Links Found - {len(root.links)}', 'green') + print(sucess_msg + '\n' + '---------------------------------') + multi_thread(root.links, LinkIO.display) + + @staticmethod + def read(link, *, response=False, show_msg=False, headers=None, schemes=None): + """ + Attempts to retrieve HTML from link + + Args: + link (str): link to read + response (bool): determines if response is returned. + show_msg(bool): determines if message is displayed for connection + headers (dict): header for request, defaults to None + schemes (list): differenct schemes to attempt to use + Returns: + str: html from page + requests.Response (optional): response returned from requests + + """ + headers = {'User-Agent': 'XXXX-XXXXX-XXXX'} if not headers else headers + # Attempts to connect directly to site if no scheme is passed + if not schemes: + if show_msg: + print(f'Attempting to connect to {link}') + if LinkNode.valid_link(link): + node = LinkNode(link) + if response: + return node.response.text, node.response + return node.response.text + + schemes = ['https://', 'http://'] if not schemes else schemes + # Attempt to use different schemes until one is successful + for scheme in schemes: + temp_url = scheme + link + if show_msg: + print(f'Attempting to connect to {link}') + if LinkNode.valid_link(temp_url): + node = LinkNode(temp_url) + if response: + return node.response.text, node.response + return node.response.text + raise ConnectionError + + @staticmethod + def display(link): + """ + Prints the status of a link based on it's connection status + + Args: + link (str): link to get status of + """ + if LinkNode.valid_link(link): + try: + node = LinkNode(link) + title = node.name + link_status = node.status + except (requests.exceptions.HTTPError, + requests.exceptions.ConnectionError, + ConnectionError): + title = 'Not Found' + link_status = color(link, 'red') + + status_msg = "%-80s %-30s" % (link_status, title) + print(status_msg) + + + @staticmethod + def display_ip(): + """ + https://check.torproject.org/ tells you if you are using tor and it + displays your IP address which we scape and display + """ + + page = LinkIO.read('https://check.torproject.org/', show_msg=True) + page = BeautifulSoup(page, 'html.parser') + ip_cont = page.find('strong') + ip_addr = ip_cont.renderContents() + ip_string = color(ip_addr.decode("utf-8"), 'yellow') + print(f'Tor IP Address: {ip_string}') diff --git a/modules/pagereader.py b/modules/pagereader.py deleted file mode 100644 index 66862f25..00000000 --- a/modules/pagereader.py +++ /dev/null @@ -1,65 +0,0 @@ -import requests - -from bs4 import BeautifulSoup -from modules.bcolors import Bcolors -from requests.exceptions import ConnectionError, HTTPError -from sys import exit - - -def connection_msg(site): - yield "Attempting to connect to {site}".format(site=site) - - -def readPage(site, extension=False): - headers = {'User-Agent': - 'TorBot - Onion crawler | www.github.com/DedSecInside/TorBot'} - attempts_left = 3 - err = " " - while attempts_left: - try: - if not extension: - print(next(connection_msg(site))) - response = requests.get(site, headers=headers) - print("Connection successful.") - page = BeautifulSoup(response.text, 'html.parser') - return page - if extension and attempts_left == 3: - print(next(connection_msg('https://'+site))) - response = requests.get('https://'+site, headers=headers) - print("Connection successful.") - page = BeautifulSoup(response.text, 'html.parser') - return page - if extension and attempts_left == 2: - print(next(connection_msg('http://'+site))) - response = requests.get('http://'+site, headers=headers) - print("Connection successful.") - page = BeautifulSoup(response.text, 'html.parser') - return page - if extension and attempts_left == 1: - msg = ''.join(("There has been an {err} while attempting to ", - "connect to {site}.")).format(err=err, site=site) - exit(msg) - - except (HTTPError, ConnectionError) as e: - attempts_left -= 1 - err = e - - if err == HTTPError: - raise("There has been an HTTP error after three attempts.") - if err == ConnectionError: - raise("There has been a connection error after three attempts.") - - -def get_ip(): - """Returns users tor ip address - - https://check.torproject.org/ tells you if you are using tor and it - displays your IP address which we scape and return - """ - - b_colors = Bcolors() - page = readPage('https://check.torproject.org/') - pg = page.find('strong') - ip_addr = pg.renderContents() - - return b_colors.WARNING+b_colors.BOLD+ip_addr.decode("utf-8")+b_colors.ENDC diff --git a/modules/savefile.py b/modules/savefile.py index 95375a4b..11a09ce8 100644 --- a/modules/savefile.py +++ b/modules/savefile.py @@ -4,11 +4,11 @@ def saveJson(datatype, data): """ - Creates json file and stores json + Creates json file and stores json - Args: - datatype: the type of the object being passed - data = data that is being stored with object + Args: + datatype: the type of the object being passed + data = data that is being stored with object """ timestr = time.strftime("%Y%m%d-%H%M%S") diff --git a/modules/tests/TESTING.md b/modules/tests/TESTING.md new file mode 100755 index 00000000..02791f81 --- /dev/null +++ b/modules/tests/TESTING.md @@ -0,0 +1,3 @@ +# Testing Documentation + +We are currently using [`pytest`](https://docs.pytest.org/en/latest/) as our testing framework so if you want to run the test suite. Run `pytest` from the base directory of TorBot or from the `tests` directory. We're using mock objects to simulate HTTP requests and HTML webpages using `mock_requests` and `yattag` which allows us to have much faster tests that don't rely on network connections. In order to create test using these mocks, the general pattern is to create some HTML using [`yattag`](http://www.yattag.org/) and registering the data to a URL using [`requests_mock`](https://requests-mock.readthedocs.io/en/latest/) which will be used to simulate HTTP requests. diff --git a/modules/tests/__init__.py b/modules/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/tests/test_analyzer.py b/modules/tests/test_analyzer.py new file mode 100644 index 00000000..891d2e93 --- /dev/null +++ b/modules/tests/test_analyzer.py @@ -0,0 +1,48 @@ +import pytest +import requests_mock + +from yattag import Doc +from ..analyzer import LinkTree +from ..link import LinkNode + +def create_page(name): + doc, tag, _, line = Doc().ttl() + doc.asis('') + with tag('html'): + line('title', name) + with tag('body'): + line('h1', 'Something') + return doc.getvalue() + +def create_root_page_with_links(root, links): + doc, tag, _, line = Doc().ttl() + doc.asis('') + with tag('html'): + line('title', root) + with tag('body'): + for link in links: + line('a', 'test', href=link) + + return doc.getvalue() + +@pytest.fixture +def test_links_in_tree(): + links = ['http://dog.onion', 'http://cat.onion', 'http://foo.cnion'] + with requests_mock.Mocker() as mock_connection: + root_page = create_root_page_with_links('http://root.onion', links) + for link in links: + page = create_page(link) + mock_connection.register_uri('GET', link, text=page) + mock_connection.register_uri('GET', 'http://root.onion', text=root_page) + + node = LinkNode('http://root.onion') + tree = LinkTree(node) + + for link in links: + assert link in tree + +def test_run(): + test_links_in_tree() + +if __name__ == '__main__': + test_run() diff --git a/modules/tests/test_getweblinks.py b/modules/tests/test_getweblinks.py new file mode 100644 index 00000000..ebcb4b9d --- /dev/null +++ b/modules/tests/test_getweblinks.py @@ -0,0 +1,108 @@ +""" +Test module for getting web links +""" +import pytest +import requests_mock + +from yattag import Doc +from ..link import LinkNode + + +def setup_html(test_links, *, fail=False): + """ + Sets up test html containing links + + Args: + test_links (list): list of links to be used for tests + Return: + test HTML value + """ + doc, tag, _, line = Doc().ttl() + doc.asis('') + with tag('html'): + with tag('body'): + for data in test_links: + if not fail: + line('a', 'test_anchor', href=data) + + return doc.getvalue() + + +@pytest.fixture +def test_get_links_fail(): + """ + Test links that have incorrect scheme + """ + test_data = ['ssh://aff.ironsocket.onion', + 'ftp://aff.ironsocket.onion', + 'lol://wsrs.onion', + 'dial://cmsgear.onion'] + + mock_html = setup_html(test_data, fail=True) + with requests_mock.Mocker() as mock_connection: + for data in test_data: + mock_connection.register_uri('GET', data, text=mock_html) + with pytest.raises(ValueError): + node = LinkNode(data) + result = node.links + assert result == [] + +@pytest.fixture +def test_get_links_tor(): + """ + Test links that return sucessfully + """ + test_data = ['https://aff.ironsocket.onion', + 'https://aff.ironsocket.onion', + 'https://wsrs.onion', + 'https://cmsgear.onion'] + + mock_html = setup_html(test_data) + mock_link = 'http://test.tor' + with requests_mock.Mocker() as mock_connection: + mock_connection.register_uri('GET', mock_link, text=mock_html) + + node = LinkNode(mock_link) + result = node.links + assert result == test_data + + +@pytest.fixture +def test_get_links_tld(): + """ + Test links with additional top-level-domains + """ + test_data = ['https://aff.ironsocket.com/SH7L', + 'https://aff.ironsocket.gov/SH7L', + 'https://wsrs.net/', + 'https://cmsgear.com/'] + + doc, tag, _, line = Doc().ttl() + doc.asis('') + with tag('html'): + with tag('body'): + for data in test_data: + line('a', 'test_anchor', href=data) + + mock_html = doc.getvalue() + mock_url = 'http://test.tor' + with requests_mock.Mocker() as mock_connection: + for data in test_data: + mock_connection.register_uri('GET', mock_url, text=mock_html) + + node = LinkNode(mock_url) + links = node.links + assert links == test_data + + +def test_run(): + """ + Executes tests + """ + test_get_links_fail() + test_get_links_tor() + test_get_links_tld() + + +if __name__ == '__main__': + test_run() diff --git a/modules/tests/test_pagereader.py b/modules/tests/test_pagereader.py new file mode 100644 index 00000000..da941c19 --- /dev/null +++ b/modules/tests/test_pagereader.py @@ -0,0 +1,51 @@ +""" +Test module for reading pages +""" +import pytest +import requests_mock + +from yattag import Doc +from ..link_io import LinkIO + + +@pytest.fixture +def test_read(): + """ + Tests if read is returning the expected html + """ + websites = [] + test_data = [ + ('https://www.test.com', 'This is a dot com site.'), + ('https://www.test.org', 'This is a dot org site.'), + ('https://www.test.net', 'This is a dot net site.'), + ('https://www.test.onion', 'This is a dot onion site.') + ] + + doc, tag, text = Doc().tagtext() + + for data in test_data: + doc.asis('') + with tag('html'): + with tag('body'): + text(data[1]) + + websites.append(doc.getvalue()) + + with requests_mock.Mocker() as mock_connection: + for i in range(len(websites)): + mock_connection.register_uri('GET', + test_data[i][0], + text=test_data[i][1]) + result = LinkIO.read(test_data[i][0]) + assert result == test_data[i][1] + + +def test_run(): + """ + Execute tests + """ + test_read() + + +if __name__ == '__main__': + test_run() diff --git a/tests/test_savetofile.py b/modules/tests/test_savetofile.py similarity index 66% rename from tests/test_savetofile.py rename to modules/tests/test_savetofile.py index 2c6c369d..cf46452e 100644 --- a/tests/test_savetofile.py +++ b/modules/tests/test_savetofile.py @@ -1,22 +1,21 @@ -import sys -import os +""" +Test module for saving data to file +""" import json -PACKAGE_PARENT = '..' -SCRIPT_DIR = os.path.dirname(os.path.realpath( - os.path.join(os.getcwd(), os.path.expanduser(__file__)))) - -sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) - -from modules import savefile +import os +from ..savefile import saveJson -def test_save_links_successful(): +def test_save_json_successful(): + """ + Sucessfully create and dump JSON object of links + """ mock_data = ['http://aff.ironsocket.com/SH7L', 'http://aff.ironsocket.com/SH7L', 'http://wsrs.net/', 'http://cmsgear.com/'] try: - file_name = savefile.saveJson('Links', mock_data) + file_name = saveJson('Links', mock_data) mock_output = {'Links': mock_data} with open('test_file.json', 'w+') as test_file: @@ -36,5 +35,6 @@ def test_save_links_successful(): assert mock_data == test_data + if __name__ == '__main__': - test_save_links_successful() \ No newline at end of file + test_save_json_successful() diff --git a/modules/updater.py b/modules/updater.py index 2b662e8d..df790e97 100644 --- a/modules/updater.py +++ b/modules/updater.py @@ -4,9 +4,8 @@ def updateTor(): """ - Currently updates Tor by calling terminal commands using subprocess - Not a great method and will be replaced in the future. - + Currently updates Tor by calling terminal commands using subprocess + Not a great method and will be replaced in the future. """ print("Checking for latest stable release") @@ -23,7 +22,7 @@ def updateTor(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT) update_out = update.stdout.read() - if update_out[90:109].decode("utf-8") == 'Already up-to-date.': + if update_out[90:109].decode("utf-8") == 'Already up to date.': print("TorBot is already up-to-date.") else: print("TorBot has succesfully updated to latest stable version.") @@ -42,7 +41,7 @@ def updateTor(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT) update_out = update.stdout.read() - if update_out[90:109].decode("utf-8") == 'Already up-to-date.': + if update_out[90:109].decode("utf-8") == 'Already up to date.': print("TorBot is already up-to-date.") else: print("TorBot has succesfully updated to latest stable version.") diff --git a/modules/utils.py b/modules/utils.py new file mode 100644 index 00000000..83c09e6b --- /dev/null +++ b/modules/utils.py @@ -0,0 +1,86 @@ + +""" +Provides essential utilites for the rest of TorBot app +""" +from queue import Queue +from threading import Thread +from requests.exceptions import HTTPError +import requests + + +# ALGORITHM UTILITY FUNCTIONS + +def process_data(data_queue, data_stack, process, *args): + """ + Processes tasks using by grabbing threads from queue + + Args: + data_queue (queue.Queue): contains tasks in FIFO data structure + data_processor (function): function to be executed on task and args + data_args (tuple): contains arguments for tasks + Returns: + None + """ + while True: + data = data_queue.get() + if args: + result = process(data, args) + else: + result = process(data) + + if result: + data_stack.append(result) + data_queue.task_done() + + +def multi_thread(data, data_function, *args): + """ + Start threads with function to process data and arguments then process the data + in FIFO order. + + Args: + data (list): lists of values that you'd like to operate on + data_function (function): function that you would like to use for processsing + args (tuple): arguments for function + Returns: + None + """ + data_queue = Queue(len(data)*2) + ret_stack = list() + for _ in data: + data_args = (data_queue, ret_stack, data_function, *args) + thd = Thread(target=process_data, args=data_args) + thd.daemon = True + thd.start() + + for obj in data: + data_queue.put(obj) + + data_queue.join() + return ret_stack + + +# Networking functions + +def get_url_status(url, headers=False): + """ + Uses GET request to check if website exists + + *NOTE: May look into changing this to HEAD requests to improve perf + + Args: + url (str): url to be tested + + Return: + something? (int/Response object): return value of the connection + object's GET request if successful & zero upon failure + """ + try: + if headers: + resp = requests.get(url, headers=headers) + else: + resp = requests.get(url) + resp.raise_for_status() + return resp + except (ConnectionError, HTTPError): + return 0 diff --git a/requirements.txt b/requirements.txt index a5882a6c..ea7d25f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,9 @@ beautifulsoup4==4.6.0 PySocks==1.6.7 termcolor==1.1.0 requests==2.18.4 -tldextract==2.2.0 \ No newline at end of file +requests_mock==1.4.0 +yattag==1.10.0 +pyinstaller==3.4.0 +ete3==3.1.1 +PyQt5==5.11.3 +validators==0.12.2 diff --git a/tests/TESTING.md b/tests/TESTING.md deleted file mode 100755 index 963c73c6..00000000 --- a/tests/TESTING.md +++ /dev/null @@ -1,3 +0,0 @@ -# Testing Documentation - -Soon! diff --git a/tests/test_getemails.py b/tests/test_getemails.py deleted file mode 100644 index cb8a73d5..00000000 --- a/tests/test_getemails.py +++ /dev/null @@ -1,20 +0,0 @@ -import sys -import os - -PACKAGE_PARENT = '..' -SCRIPT_DIR = os.path.dirname(os.path.realpath( - os.path.join(os.getcwd(), os.path.expanduser(__file__)))) - -sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) - -from modules import pagereader, getemails - - -def test_get_emails_successful(): - soup = pagereader.readPage('https://www.helloaddress.com/') - test_emails = ["hello@helloaddress.com"] - emails = getemails.getMails(soup) - assert emails == test_emails - -if __name__ == '__main__': - test_get_emails_successful() \ No newline at end of file diff --git a/tests/test_getweblinks.py b/tests/test_getweblinks.py deleted file mode 100644 index 2e7d0cff..00000000 --- a/tests/test_getweblinks.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -PACKAGE_PARENT = '..' -SCRIPT_DIR = os.path.dirname(os.path.realpath( - os.path.join(os.getcwd(), os.path.expanduser(__file__)))) - -sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) -from modules import getweblinks, pagereader - - -def test_get_links_successful(): - soup = pagereader.readPage('http://www.whatsmyip.net/') - data = ['http://aff.ironsocket.com/SH7L', - 'http://aff.ironsocket.com/SH7L', - 'http://wsrs.net/', - 'http://cmsgear.com/'] - - result = getweblinks.getLinks(soup, ext=['.com', '.net']) - assert result == data - - -if __name__ == '__main__': - test_get_links_successful() \ No newline at end of file diff --git a/torBot.py b/torBot.py index 39e4fe44..bcccafff 100644 --- a/torBot.py +++ b/torBot.py @@ -1,14 +1,26 @@ +""" +MAIN MODULE +""" import argparse import socket import socks -from modules import (bcolors, getemails, pagereader, getweblinks, updater, - info, savefile) +from requests.exceptions import HTTPError +from modules.analyzer import LinkTree +from modules.color import color +from modules.link_io import LinkIO +from modules.link import LinkNode +from modules.updater import updateTor +from modules.savefile import saveJson +from modules.info import execute_all + +# GLOBAL CONSTS LOCALHOST = "127.0.0.1" -PORT = 9050 +DEFPORT = 9050 + # TorBot VERSION -__VERSION = "1.2" +__VERSION = "1.3" def connect(address, port): @@ -21,7 +33,16 @@ def connect(address, port): address: address for port to bound to port: Establishes connect to this port """ - socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, address, port) + + if address and port: + socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, address, port) + elif address: + socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, address, DEFPORT) + elif port: + socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, LOCALHOST, port) + else: + socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, LOCALHOST, DEFPORT) + socket.socket = socks.socksocket # Monkey Patch our socket to tor socket def getaddrinfo(*args): @@ -36,7 +57,7 @@ def getaddrinfo(*args): Last two arguments should be a tuple containing the address and port """ return [(socket.AF_INET, socket.SOCK_STREAM, 6, - '', (args[0], args[1]))] + '', (args[0], args[1]))] socket.getaddrinfo = getaddrinfo @@ -44,103 +65,68 @@ def header(): """ Prints out header ASCII art """ - - b_color = bcolors.Bcolors() - D3DSEC = b_color.FAIL + " D3DSEC " + b_color.WHITE - INS1DE = b_color.FAIL + " INS1DE " + b_color.WHITE - - header = r""" - {WHITE} - ###################################################### - MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNXNWWWWWMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWX0KXXKKXWMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNKOkOOkOXWMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMMNX0kdodoxKWMMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMW0doccloONMWWMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMMMMKl;;:cxKWMMMMMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMWKOXNx;,,cONMMMMMMMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMXdxKk:',lONMMMM{D3DSEC}MMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMOo0NOdxc,kMMMM{INS1DE}MMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMMMOcONOxkx;dWMMMMMMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMMNkcdXXOkxkd:oXMMMMMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMNOoclONNX00OkOxc:lkXWMMMMMMMMMMMMMM - MMMMMMMMMMMMMMMMN0olld0NWNNX0O00kxkxl:;ckXMWWMMMMMMMMM - MMMMMMMMMMMWMMNxccd0NNNNNXNXOkOK0dodxdo:,;o0WMMMMMMMMM - MMMMMMMMMMMMNk:ckXNNWNXXXXNXOOOOK0oloooooc,'oKMMMMMMMM - MMMMMMMMMMMXc;xXNNNXKXXXNNWKOO0Ok0x:clllllc:.,OWMMMMMM - MMMMMMMMMMX:;0WNNX00XNNNNNNKOO0KkkOc,ccccccc:.'OWMMMMM - MMMMMMMMMNl,ONNN0OXNNNNNXXNKOkOK0xkl':c::::::;.;KMMMMM - MMMMMMMMM0,lNWXO0NNNNXKKXXNXO0Ok0Oxl',:;;;;;;;..dMMMMM - MMMMMMMMMk,xWNOONNNX00XNNNWKOO0OkOxc'';;,,,,,,'.cMMMMM - MMMMMMMMMx,xWKkKWNXOKNWNNNX0xxOKxxx:..,,,,,''''.cMMMMM - MMMMMMMMM0,oWXkOWXOKNNNNN00Xkdx0kdd;..,'''''''..oMMMMM - MMMMMMMMMNl;0W0kKKkKWNNN0ONNOxdOOll,..'''......,0MMMMM - MMMMMMMMMMK::KN0kKOkNNWXk0WX0kdxkc:............xWMMMMM - MMMMMMMMMMMKl:kX0k0kONWNOONX0koxd:,..........'kWMMMMMM - MMMMMMMMMMMMNxccxOkkxkKWKx0NOoooc'..........lKWMMMMMMM - MMMMMMMMMMMMMWNklccclldk0OxOdcc;. .......;oKWWMMMMMMMM - MMMMMMMMMMMMMMMMWXOdl:::;cc;'... ..',:lx0NMMMMMMMMMMMM - MMMMMMMMMMMMMMMMMMMMMNKOkxddolloodk0XWMMMMMMMMMMMMMMMM - {FAIL} + {BOLD} + license_msg = color("LICENSE: GNU Public License", "red") + banner = r""" __ ____ ____ __ ______ / /_/ __ \/ __ \/ /_ ____/_ __/ / __/ / / / /_/ / __ \/ __ \/ / / /_/ /_/ / _, _/ /_/ / /_/ / / \__/\____/_/ |_/_____/\____/_/ V{VERSION} - {FAIL} + {On_Black} - ####################################################### - # TorBot - A python Tor Crawler # - # GitHub : https://github.com/DedsecInside/TorBot # - # Help : use -h for help text # - ####################################################### - {FAIL} + "LICENSE: GNU Public License" + {END}""".format( - D3DSEC=D3DSEC, INS1DE=INS1DE, FAIL=b_color.FAIL, - BOLD=b_color.BOLD, VERSION=__VERSION, END=b_color.ENDC, - On_Black=b_color.On_Black, WHITE=b_color.WHITE - ) - print(header) - - -def main(conn=False): - - if conn: - connect(LOCALHOST, PORT) - - parser = argparse.ArgumentParser() - parser.add_argument("-v", "--version", - action="store_true", + """.format(VERSION=__VERSION) + banner = color(banner, "red") + + title = r""" + {banner} + ####################################################### + # TorBot - An OSINT Tool for Deep Web # + # GitHub : https://github.com/DedsecInside/TorBot # + # Help : use -h for help text # + ####################################################### + {license_msg} + """ + + title = title.format(license_msg=license_msg, banner=banner) + print(title) + + +def get_args(): + """ + Parses user flags passed to TorBot + """ + parser = argparse.ArgumentParser(prog="TorBot", + usage="Gather and analayze data from Tor sites.") + parser.add_argument("--version", action="store_true", help="Show current version of TorBot.") - parser.add_argument("--update", - action="store_true", + parser.add_argument("--update", action="store_true", help="Update TorBot to the latest stable version") - parser.add_argument("-q", "--quiet", - action="store_true") - parser.add_argument("-u", "--url", - help="Specifiy a website link to crawl") - parser.add_argument("-s", "--save", - action="store_true", + parser.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("-u", "--url", help="Specifiy a website link to crawl") + parser.add_argument("--ip", help="Change default ip of tor") + parser.add_argument("-p", "--port", help="Change default port of tor") + parser.add_argument("-s", "--save", action="store_true", help="Save results in a file") - parser.add_argument("-m", "--mail", - action="store_true", + parser.add_argument("-m", "--mail", action="store_true", help="Get e-mail addresses from the crawled sites") - parser.add_argument("-e", "--extension", - action='append', - dest='extension', + parser.add_argument("-e", "--extension", action='append', dest='extension', default=[], - help=' '.join(("Specifiy additional website extensions", - "to the list(.com , .org etc)"))) - parser.add_argument("-l", "--live", - action="store_true", - help="Check if websites are live or not (slow)") - parser.add_argument("-i", "--info", - action="store_true", + help=' '.join(("Specifiy additional website", + "extensions to the list(.com , .org, .etc)"))) + parser.add_argument("-i", "--info", action="store_true", help=' '.join(("Info displays basic info of the", - "scanned site, (very slow)"))) - args = parser.parse_args() + "scanned site"))) + parser.add_argument("-v", "--visualize", action="store_true", + help="Visualizes tree of data gathered.") + parser.add_argument("-d", "--download", action="store_true", + help="Downloads tree of data gathered.") + return parser.parse_args() - link = args.url + +def main(): + """ + TorBot's Core + """ + args = get_args() + connect(args.ip, args.port) # If flag is -v, --update, -q/--quiet then user only runs that operation # because these are single flags only @@ -148,34 +134,41 @@ def main(conn=False): print("TorBot Version:" + __VERSION) exit() if args.update: - updater.updateTor() + updateTor() exit() if not args.quiet: header() # If url flag is set then check for accompanying flag set. Only one # additional flag can be set with -u/--url flag if args.url: - print("Tor IP Address :", pagereader.get_ip()) - html_content = pagereader.readPage(link, args.extension) + try: + node = LinkNode(args.url) + except (ValueError, HTTPError, ConnectionError) as err: + raise err + LinkIO.display_ip() # -m/--mail if args.mail: - emails = getemails.getMails(html_content) - print(emails) + print(node.emails) if args.save: - savefile.saveJson('Emails', emails) + saveJson('Emails', node.emails) # -i/--info elif args.info: - info.executeAll(link) + execute_all(node.name) if args.save: print('Nothing to save.\n') + elif args.visualize: + tree = LinkTree(node) + tree.show() + elif args.download: + tree = LinkTree(node) + file_name = str(input("File Name (.pdf/.png/.svg): ")) + tree.save(file_name) else: - links = getweblinks.getLinks(soup=html_content, - live=args.live, - ext=args.extension) + LinkIO.display_children(node) if args.save: - savefile.saveJson("Links", links) + saveJson("Links", node.links) else: - print("usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] [-l] [-i]") + print("usage: See torBot.py -h for possible arguments.") print("\n\n") @@ -183,7 +176,7 @@ def main(conn=False): if __name__ == '__main__': try: - main(conn=True) + main() except KeyboardInterrupt: print("Interrupt received! Exiting cleanly...")