Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] ChatGPT data enrichment #2392

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions mobsf/MobSF/chat_gpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Chat GPT module for MobSF."""
import logging

import openai

from django.conf import settings

logger = logging.getLogger(__name__)


class ChatGPT:

def __init__(self, api_key):
self.gpt_client = openai.Client(api_key=api_key)
self.gpt_model = settings.OPENAI_GPT_MODEL

def get_available_models(self):
models = set()
for i in self.gpt_client.models.list():
models.add(i.id)
return models

def chat(self, messages):
"""Chat with GPT."""
try:
response = self.gpt_client.chat.completions.create(
messages=messages,
temperature=0,
model=self.gpt_model,
n=1,
)
return response.choices[0].message.content
except openai.APIConnectionError:
logger.error('The server could not be reached')
except openai.RateLimitError:
logger.error('You\'ve hit the OpenAI API rate limit')
except openai.NotFoundError:
logger.error('The requested model %s is not available. Available models: %s', self.gpt_model, self.get_available_models())
except openai.APIStatusError as e:
logger.error('OpenAI API is returning an error')
logger.error(e.status_code)
logger.error(e.response)
except Exception:
logger.exception('Chat with GPT failed')
return None
1 change: 1 addition & 0 deletions mobsf/MobSF/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def get_executable_hashes():
'objdump',
'strings',
'xcrun',
'file',
'BinSkim.exe',
'BinScope.exe',
'nuget.exe',
Expand Down
3 changes: 3 additions & 0 deletions mobsf/MobSF/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@
# SP Configuration
SP_HOST = os.getenv('MOBSF_SP_HOST')
SP_ALLOW_PASSWORD = os.getenv('MOBSF_SP_ALLOW_PASSWORD', '0')
# LLM Integration
OPENAI_API_KEY = os.getenv('MOBSF_OPENAI_API_KEY')
OPENAI_GPT_MODEL = os.getenv('MOBSF_OPENAI_GPT_MODEL', 'gpt-3.5-turbo')
# ===================
# USER CONFIGURATION
# ===================
Expand Down
7 changes: 7 additions & 0 deletions mobsf/StaticAnalyzer/views/android/code_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def code_analysis(app_dir, typ, manifest_file, android_permissions):
app_dir = Path(app_dir)
src = get_android_src_dir(app_dir, typ).as_posix() + '/'
skp = settings.SKIP_CLASS_PATH
from mobsf.StaticAnalyzer.views.common.prompts import (
AndroidPrompts,
)
ap = AndroidPrompts()
if ap:
out = ap.package_name_identifier(src)
print(out)
logger.info('Code Analysis Started on - %s',
filename_from_path(src))
# Code Analysis
Expand Down
5 changes: 5 additions & 0 deletions mobsf/StaticAnalyzer/views/android/static_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
quark,
)
from mobsf.MalwareAnalyzer.views.MalwareDomainCheck import MalwareDomainCheck
from mobsf.StaticAnalyzer.views.common.prompts import AndroidPrompts

from django.conf import settings
from django.http import HttpResponseRedirect
Expand Down Expand Up @@ -253,6 +254,10 @@ def static_analyzer(request, checksum, api=False):
app_dic['app_dir'],
app_dic['md5'],
'elf')
ap = AndroidPrompts()
if ap:
out = ap.shared_object_identifier(elf_dict['elf_analysis'])
print(out)
cert_dic = cert_info(
apk,
app_dic,
Expand Down
53 changes: 53 additions & 0 deletions mobsf/StaticAnalyzer/views/common/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Chat GPT prompts for MobSF."""
import logging
from pathlib import Path

from django.conf import settings

from mobsf.MobSF.chat_gpt import ChatGPT

logger = logging.getLogger(__name__)


class AndroidPrompts:

def __init__(self):
if not settings.OPENAI_API_KEY:
return None
self.gpt = ChatGPT(settings.OPENAI_API_KEY)

def shared_object_identifier(self, shared_objects):
"""Identify shared object."""
return None
list_of_shared_objects = list(shared_objects)
messages = [
{'role': 'system', 'content': (
'You are analyzing shared object files for Android applications as a Static Analyzer.'
'You must always be truthful. Your responses should always be in json format.')},
{'role': 'user', 'content': (
f'Identify the SDK or Company from the shared object files is used in the following list. {list_of_shared_objects}.'
'The resulting json response should be a list of dicts with two keys the file_name and company_name.'
'The output should not be broken, and must be a valid json.')},
]
return self.gpt.chat(messages)

def package_name_identifier(self, source_dir):
"""Identify package name."""
packages = set()
src = Path(source_dir)
for file_path in src.glob('**/*'):

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
if file_path.is_file():
pkg = file_path.parent.relative_to(src).as_posix().replace('/', '.')
packages.add(pkg)
print(packages)
messages = [
{'role': 'system', 'content': (
'You are analyzing Android application java source code as a Static Analyzer.'
'You must always be truthful. Your responses should always be in json format.')},
{'role': 'user', 'content': (
f'Identify the library or SDK name from the following package names. {source_dir}.'
'Ignore the package name if you cannot identify the library or SDK name.'
'The resulting json response should be a list of dict with the keys library_name and package_name.'
'The output should not be broken, and must be a valid json.')},
]
return self.gpt.chat(messages)
Loading
Loading