Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #106 from sethsec/issue-102-extract-urls
Browse files Browse the repository at this point in the history
Merge Issue 102 extract urls with dev
  • Loading branch information
sethsec authored Jun 13, 2019
2 parents 1df28e6 + a73ff1c commit 1e88bc3
Show file tree
Hide file tree
Showing 11 changed files with 350 additions and 234 deletions.
16 changes: 11 additions & 5 deletions celerystalk
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Usage:
celerystalk resume ([all]|[<task_ids>]) [-h]
celerystalk db ([workspaces]|[workspace]|[services]|[ports]|[hosts]|[vhosts]|[paths]|[paths_only]|[tasks]) [-h]
celerystalk db export [-h]
celerystalk db paths_only limit [-h]
celerystalk admin ([start]|[stop]|[restart]|[reset]|[backup]|[restore]) [-f <restore_file>] [-h]
celerystalk interactive [-h]
celerystalk (help | -h | --help)
Expand Down Expand Up @@ -89,7 +90,7 @@ import csv

from lib.nmap import nmapcommand

build=str(191)
build=str(200)

def print_banner():

Expand Down Expand Up @@ -691,10 +692,15 @@ def main(arguments):
elif arguments["paths_only"]:
#print("[+] Showing paths for the [{0}] workspace\n".format(workspace))
#columns = ["IP", "Port", "Path"]
paths_rows = lib.db.get_all_paths(workspace)
for row in paths_rows:
sys.stdout.write(row[3]+"\n")
print("\n\n")
if arguments["limit"]:
paths = lib.screenshot.get_max_screenshots(workspace,config_file)
for path in paths:
print path
else:
paths_rows = lib.db.get_all_paths(workspace)
for row in paths_rows:
sys.stdout.write(row[3]+"\n")
print("\n")
elif arguments["tasks"]:
print("[+] Showing tasks for the [{0}] workspace\n".format(workspace))
columns = ["ID","PID","Command","Status"]
Expand Down
6 changes: 6 additions & 0 deletions lib/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ def get_simpleserver_port(config_file):
if key == "simple_server_port":
return val

def get_screenshot_max(config_file):
config,supported_services = read_config_ini(config_file)
for (key, val) in config.items("celerystalk-config"):
if key == "max_screenshots_per_vhost":
return val

def get_user_config(config_file):
config,supported_services = read_config_ini(config_file)
return config.items("user-config")
Expand Down
6 changes: 4 additions & 2 deletions lib/csimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def import_url(url,workspace,output_base_dir):
url_path = ''

url_screenshot_filename = scan_output_base_file_dir + url_path.replace("/", "_") + ".png"
db_path = (vhost, port, url, 0, url_screenshot_filename, workspace)
db_path = (vhost, port, url.rstrip("/"), 0, url_screenshot_filename, workspace)
db.insert_new_path(db_path)
# print("Found Url: " + str(url))
#urls_to_screenshot.append((url, url_screenshot_filename))
Expand All @@ -246,7 +246,7 @@ def import_url(url,workspace,output_base_dir):
# print(result)


db_path = (vhost, port, url, 0, url_screenshot_filename, workspace)
db_path = (vhost, port, url.rstrip("/"), 0, url_screenshot_filename, workspace)
lib.db.insert_new_path(db_path)
else:
print("[!] {0} is explicitly marked as out of scope. Skipping...".format(vhost))
Expand Down Expand Up @@ -523,6 +523,7 @@ def process_nmap_data(nmap_report,workspace, target=None):

if (scanned_service_name == 'https') or (scanned_service_name == 'http'):
path = scanned_service_name + "://" + ip + ":" + str(scanned_service_port)
path = path.rstrip("/")
db_path = db.get_path(path, workspace)
if not db_path:
url_screenshot_filename = scan_output_base_file_dir + ".png"
Expand Down Expand Up @@ -553,6 +554,7 @@ def process_nmap_data(nmap_report,workspace, target=None):

if (scanned_service_name == 'https') or (scanned_service_name == 'http'):
path = scanned_service_name + "://" + vhost + ":" + str(scanned_service_port)
path = path.rstrip("/")
db_path = db.get_path(path, workspace)
if not db_path:
url_screenshot_filename = scan_output_base_file_dir + ".png"
Expand Down
6 changes: 6 additions & 0 deletions lib/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,12 @@ def get_all_paths_for_host_path_only(ip,workspace):
CONNECTION.commit()
return all_paths_for_host

def get_x_paths_for_host_path_only(ip,workspace,config_max):
CUR.execute("SELECT path FROM paths WHERE ip = ? AND workspace = ? LIMIT ?", (ip,workspace,config_max))
all_paths_for_host = CUR.fetchall()
CONNECTION.commit()
return all_paths_for_host

def get_path(path,workspace):
CUR.execute("SELECT * FROM paths WHERE workspace = ? AND path = ?", (workspace,path))
path = CUR.fetchall()
Expand Down
26 changes: 23 additions & 3 deletions lib/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,29 @@



def summary_paths(workspace):
def summary_paths():
pass

def summary_hosts():
pass

def summary_services():
pass

def summary_tasks():
workspace = lib.db.get_current_workspace()[0][0]
completed_rows = lib.db.get_completed_tasks(workspace)
if completed_rows.__len__() > 0:
for completed_row in completed_rows:
command = completed_row[1]
run_time = completed_row[2]
run_time = time.strftime("%H:%M:%S", time.gmtime(float(run_time)))
ip = completed_row[3]






def paths_report(host,all_paths):
#all_paths = lib.db.get_all_paths_for_host(host)
Expand All @@ -29,12 +49,12 @@ def paths_report(host,all_paths):
url_screenshot_filename = urllib.quote(url_screenshot_filename)
url_screenshot_filename_relative = os.path.join("screens/",url_screenshot_filename.split("/screens/")[1])
html_code = html_code + """\n<div id="linkwrap">\n"""
html_code = html_code + """<a class="link" href="#">[Screenshot]<span><img src="{1}" alt="image"/></span></a> <a href="{0}">{0}</a><br>\n""".format(path,url_screenshot_filename_relative)
html_code = html_code + """<a href="{0}">{0}</a><br>\n""".format(path)
html_code = html_code + "\n</div>\n"
except:
#print("Could not find screenshot for " + path)
html_code = html_code + """\n<div id="linkwrap">\n"""
html_code = html_code + "[Screenshot] " + """<a href="{0}">{0}</a><br>\n""".format(path)
html_code = html_code + """<a href="{0}">{0}</a><br>\n""".format(path)
html_code = html_code + "\n</div>\n"
return html_code

Expand Down
31 changes: 28 additions & 3 deletions lib/screenshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,36 @@ def screenshot_command(arguments):
print("./celerystalk workspace create -o output_dir -w workspace_name -m vapt")
print("./celerystalk workspace create -o output_dir -w workspace_name -m bb\n")
exit()
if arguments["-c"]:
if os.path.exists(arguments["-c"]):
config_file = arguments["-c"]
else:
print("[!] The specified config file does not exist. Try again?")
exit()
else:
config_file = 'config.ini'

# lib.screenshot.screenshot_all_paths(workspace)
#TODO: change this to reflect number of screenshots taken based on config.ini max
paths_len = len(lib.db.get_all_paths(workspace))
print("[+]\n[+] Tasking aquatone to take [{0}] screenshots").format(str(paths_len))
max_paths_len = len(get_max_screenshots(workspace,config_file))
max = lib.config_parser.get_screenshot_max(config_file)
print("[+]\n[+] There are [{0}] paths in the DB").format(str(paths_len))
#print("[+] max_screenshots_per_vhost set to: [{0}]").format(str(max))
print("[+] Tasking aquatone to take [{0}] screenshots per host for a total of [{1}] screenshots\n[+]\n[+]").format(str(max),str(max_paths_len))
lib.screenshot.aquatone_all_paths(workspace)

def get_max_screenshots(workspace,config_file):
screenshot_list = []
max = lib.config_parser.get_screenshot_max(config_file)
vhosts = lib.db.get_unique_hosts_with_paths(workspace)
for vhost in vhosts:
vhost = vhost[0]
paths = lib.db.get_x_paths_for_host_path_only(vhost, workspace,max)
for path in paths:
screenshot_list.append(path[0])
return screenshot_list


def aquatone_all_paths(workspace,simulation=None,config_file=None):
#print("in aquatone all_paths")
Expand Down Expand Up @@ -62,7 +86,7 @@ def aquatone_all_paths(workspace,simulation=None,config_file=None):
#print(cmd_name, cmd)
try:
if cmd_name == "aquatone":
populated_command = celery_path + "/celerystalk db paths_only | " + cmd.replace("[OUTPUT]", outdir)
populated_command = celery_path + "/celerystalk db paths_only limit | " + cmd.replace("[OUTPUT]", outdir)
#print(populated_command)
except Exception, e:
print(e)
Expand All @@ -71,14 +95,15 @@ def aquatone_all_paths(workspace,simulation=None,config_file=None):


task_id = uuid()
utils.create_task(cmd_name, populated_command, workspace, outdir + "/aquatone_report.html", workspace, task_id)
utils.create_task(cmd_name, populated_command, workspace, outdir + "aquatone_report.html", workspace, task_id)
result = chain(
tasks.run_cmd.si(cmd_name, populated_command, celery_path, task_id).set(task_id=task_id),
)()
print("[+]\t\tTo keep an eye on things, run one of these commands: \n[+]")
print("[+]\t\t./celerystalk query [watch]")
print("[+]\t\t./celerystalk query brief [watch]")
print("[+]\t\t./celerystalk query summary [watch]")
print("[+]\t\tor\n[+]\t\ttail -f " + outdir + "aquatone_stdout.txt")
print("[+]")
print("[+] To peak behind the curtain, view log/celeryWorker.log")
print("[+] For a csv compatible record of every command execued, view log/cmdExecutionAudit.log\n")
Empty file added parsers/__init__.py
Empty file.
80 changes: 80 additions & 0 deletions parsers/generic_urlextract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#from urlextract import URLExtract
import re
import lib.db
import lib.utils
import urlparse


#TODO: Add this when i move project to python3
# def extract_urls_urlextractor(tool_output):
# #print(type(tool_output))
# extractor = URLExtract()
# urls = extractor.find_urls(tool_output)
# #print(urls)
# #for url in extractor.find_urls(tool_output):
# #print("* " + urls)
# #print(type(url))
# #print(urls)
# return urls



def extract_urls_regex(tool_output):
intereseting_urls = []
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tool_output)
not_interesting_extensions = [".png", ".ico", ".js", ".css", ".woff2", ".ttf", ".jpg", ".jpeg", ".svg", ".eot", ".woff",".gif"]
for url in urls:
if not url.endswith(tuple(not_interesting_extensions)):
intereseting_urls.append(url)
return intereseting_urls


def extract_urls(tool_output):
#TODO: Add uncomment these three lines and comment out the forth when i move project to python3
#a = extract_urls_urlextractor(tool_output)
#b = extract_urls_regex(tool_output)
#urls = list(set().union(a, b))
urls = extract_urls_regex(tool_output)
return urls


def is_url_in_scope(url):
workspace = lib.db.get_current_workspace()[0][0]
try:
parsed_url = urlparse.urlparse(url)
scheme = parsed_url[0]
if ":" in parsed_url[1]:
vhost, port = parsed_url[1].split(':')
else:
vhost = parsed_url[1]
if scheme == "http":
port = 80
elif scheme == "https":
port = 443
path = parsed_url[2].replace("//", "/")
except:
print("error parsing url")
if not scheme:
pass
in_scope = lib.db.is_vhost_in_db(vhost,workspace)
if in_scope:
return str(True),vhost,port,url.rstrip("/"),workspace
else:
return str(False)

def insert_url_into_db(vhost,port,url,workspace):
db_path = (vhost, port, url, 0, "", workspace)
lib.db.insert_new_path(db_path)
print("Found Url: " + str(url))

def extract_in_scope_urls_from_task_output(tool_output):
urls = extract_urls(tool_output)
for url in urls:
exists,vhost,port,url,workspace = is_url_in_scope(url)
if exists == "True":
insert_url_into_db(vhost,port,url,workspace)





Loading

0 comments on commit 1e88bc3

Please sign in to comment.