Skip to content

Commit

Permalink
update to deep_scan.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayush RoyChowdhury committed Nov 21, 2024
1 parent 4a33d2f commit feb24ec
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions src/powerpwn/copilot_studio/modules/deep_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,69 @@ def run_pup_commands(existing_bots: List[str]):
return sort_unique_values_in_file(open_bots_path)
return []

def query_using_pup(open_bots: List[str]):
"""
Execute the Puppeteer JavaScript code for each bot URL given.
The function calls a different JavaScript file.
:param open_bots: The list of bot URLs needed to check
"""
import os
import subprocess
import logging

pup_path = get_project_file_path("tools/pup_query_webchat", "query_chat.js")
bots_has_knowledge_path = get_project_file_path("final_results/", "chat_exists_output.xlsx")

# Delete the existing Excel file to start fresh
if os.path.exists(bots_has_knowledge_path):
os.remove(bots_has_knowledge_path)
logging.debug(f"Deleted existing file: {bots_has_knowledge_path}")

for bot_url in open_bots:
try:
# Construct the shell command
command = f"node {pup_path} {bot_url}"
logging.debug(f"Running command: `{command}`")
# Run the command
subprocess.run(command, shell=True, check=True) # nosec
except subprocess.CalledProcessError as e:
logging.error(f"Error occurred while running Puppeteer: {e}")

if os.path.exists(bots_has_knowledge_path):
# Read the output Excel file and create a dictionary
return parse_chatbot_results(bots_has_knowledge_path)

return {}

def parse_chatbot_results(file_path):
"""
Parses the output Excel file generated by query_chat.js and returns a dictionary.
:param file_path: Path to the output Excel file.
:return: Dictionary with bot URL as key and knowledge info as value.
"""

# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

bot_results = {}
for _, row in df.iterrows():
url = str(row.get('URL', '')).strip()
has_knowledge = str(row.get('Has Knowledge', '')).strip()
titles_str = row.get('Titles', '')
titles = []

if pd.notnull(titles_str) and titles_str:
# Split titles by semicolon and strip whitespace
titles = [title.strip() for title in titles_str.split(';')]

bot_results[url] = {
'has_knowledge': has_knowledge,
'titles': titles
}

return bot_results

def camel_case_split(identifier: str):
"""
Expand Down

0 comments on commit feb24ec

Please sign in to comment.