Fix SSH Task and other required states (#1368)

* Fix task required states * fix path_specs def * Update mount requirements * Fix required states
google · Oct 16, 2023 · 41f3fac · 41f3fac
1 parent dc81ee7
commit 41f3fac
Show file tree

Hide file tree

Showing 11 changed files with 40 additions and 29 deletions.
diff --git a/turbinia/evidence.py b/turbinia/evidence.py
@@ -363,7 +363,7 @@ def from_dict(cls, dictionary):
 
   def serialize_attribute(self, name: str) -> str:
     """Returns JSON serialized attribute.
-    
+
     Args:
       name(str): the name of the attribute that will be serialized.
     Returns:
@@ -381,7 +381,7 @@ def serialize(self, json_values: bool = False):
     """Returns a JSON serialized object. The function will return A string
     containing the serialized evidence_dict or a dict of serialized attributes
     if json_values is true.
-    
+
     Args:
       json_values(bool): Returns only values of the dictionary as json strings
         instead of the entire dictionary.
@@ -600,7 +600,7 @@ def format_state(self):
 
   def _validate(self):
     """Runs additional logic to validate evidence requirements.
-    
+
     Evidence subclasses can override this method to perform custom
     validation of evidence objects.
     """
@@ -840,27 +840,28 @@ def _preprocess(self, _, required_states):
 
     # We need to enumerate partitions in preprocessing so the path_specs match
     # the parent evidence location for each task.
+    path_specs = None
     try:
       # We should only get one path_spec here since we're specifying the location.
       path_specs = partitions.Enumerate(
           self.parent_evidence, self.partition_location)
     except TurbiniaException as exception:
       log.error(exception)
 
-    if len(path_specs) > 1:
+    if not path_specs:
+      raise TurbiniaException(
+          f'Could not find path_spec for location {self.partition_location:s}')
+    elif path_specs and len(path_specs) > 1:
       path_specs_dicts = [path_spec.CopyToDict() for path_spec in path_specs]
       raise TurbiniaException(
           'Found more than one path_spec for {0:s} {1:s}: {2!s}'.format(
               self.parent_evidence.name, self.partition_location,
               path_specs_dicts))
-    elif len(path_specs) == 1:
+    elif path_specs and len(path_specs) == 1:
       self.path_spec = path_specs[0]
       log.debug(
           'Found path_spec {0!s} for parent evidence {1:s}'.format(
               self.path_spec.CopyToDict(), self.parent_evidence.name))
-    else:
-      raise TurbiniaException(
-          f'Could not find path_spec for location {self.partition_location:s}')
 
     # In attaching a partition, we create a new loopback device using the
     # partition offset and size.
@@ -1051,7 +1052,7 @@ def __init__(self, plaso_version=None, *args, **kwargs):
 
   def _validate(self):
     """Validates whether the Plaso file contains any events.
-    
+
     Raises:
       TurbiniaException: if validation fails.
     """

diff --git a/turbinia/workers/analysis/jenkins.py b/turbinia/workers/analysis/jenkins.py
@@ -32,6 +32,7 @@
 class JenkinsAnalysisTask(TurbiniaTask):
   """Task to analyze a Jenkins install."""
 
+  # Does not need to be MOUNTED as this Task uses extract_files()
   REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
 
   TASK_CONFIG = {

diff --git a/turbinia/workers/analysis/jupyter.py b/turbinia/workers/analysis/jupyter.py
@@ -30,7 +30,8 @@
 class JupyterAnalysisTask(TurbiniaTask):
   """Task to analyze a Jupyter Notebook config."""
 
-  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
+  # Input Evidence is ExportedFileArtifact so does not need to be pre-processed.
+  REQUIRED_STATES = []
 
   def run(self, evidence, result):
     """Run the Jupyter worker.

diff --git a/turbinia/workers/analysis/linux_acct.py b/turbinia/workers/analysis/linux_acct.py
@@ -30,6 +30,7 @@
 class LinuxAccountAnalysisTask(TurbiniaTask):
   """Task to analyze a Linux password file."""
 
+  # Does not need to be MOUNTED as this Task uses extract_artifacts()
   REQUIRED_STATES = [
       state.ATTACHED, state.CONTAINER_MOUNTED, state.DECOMPRESSED
   ]

diff --git a/turbinia/workers/analysis/ssh_analyzer.py b/turbinia/workers/analysis/ssh_analyzer.py
@@ -64,7 +64,7 @@ def __init__(
   def calculate_session_id(self) -> None:
     """Calculates pseudo session_id for SSH login.
 
-    The pseudo session_id is based on date, hostname, username, source_ip, 
+    The pseudo session_id is based on date, hostname, username, source_ip,
     and source_port.
     """
     # TODO(rmaskey): Find a better way to generate pseudo session_id. Current
@@ -81,7 +81,8 @@ def calculate_session_id(self) -> None:
 class LinuxSSHAnalysisTask(TurbiniaTask):
   """Task to analyze Linux SSH authentication."""
 
-  REQUIRED_STATES = [state.MOUNTED, state.CONTAINER_MOUNTED]
+  # Does not need to be MOUNTED as this Task uses extract_artifacts()
+  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
 
   # Log year validation
   # The minimum supported log year
@@ -150,10 +151,10 @@ class LinuxSSHAnalysisTask(TurbiniaTask):
 
   def read_logs(self, log_dir: str) -> pd.DataFrame:
     """Reads SSH logs directory and returns Pandas dataframe.
-    
+
     Args:
       log_dir (str): Directory containing SSH authentication log.
-    
+
     Returns:
       pd.DataFrame: Returns Pandas dataframe.
     """
@@ -226,12 +227,12 @@ def read_logs(self, log_dir: str) -> pd.DataFrame:
   def parse_message_datetime(
       self, message_datetime: List, log_year: int) -> datetime:
     """Parses and returns datetime.
-    
+
     Args:
       message_datetime (List[str]): A list containing syslog datetime separated
           by spaces e.g. Feb 8 13:30:45 for Debian, and Red Hat, and
           2023-02-08T13:30:45.123456+11:00 for OpenSUSE.
-      log_year (int): A user provided log year for SSH events. The log year is 
+      log_year (int): A user provided log year for SSH events. The log year is
       not captured by syslog and this is either provided by user or guessed
       based on the last SSH event and current date/time.
 
@@ -261,14 +262,14 @@ def parse_message_datetime(
   def read_log_data(self, data, log_filename: str,
                     log_year: int = None) -> List[SSHEventData]:
     """Parses SSH log data and returns a list of SSHEventData.
-    
+
     Args:
       data (str): Content of authentication log file.
       log_filename (str): Name of the log file whose content is read.
       log_year (int): SSH authentication log year.
-    
+
     Returns:
-      List(SSHEventData): Returns SSH events as list of SSHEventData. 
+      List(SSHEventData): Returns SSH events as list of SSHEventData.
     """
     # check valid year is provided
     # If valid year isn't provided raise error
@@ -356,10 +357,10 @@ def read_log_data(self, data, log_filename: str,
 
   def get_priority_value(self, priority_string: str) -> Priority:
     """Returns priority value.
-    
+
     Args:
       priority_string (str): Priority values as string e.g. HIGH, MEDIUM, LOW
-    
+
     Returns:
       Priority: Returns priority value of priority_string.
     """
@@ -374,15 +375,15 @@ def get_priority_value(self, priority_string: str) -> Priority:
 
   def brute_force_analysis(self, df: pd.DataFrame) -> Tuple[Priority, str, str]:
     """Runs brute force analysis.
-    
+
     Args:
       df (pd.DataFrame): Pandas dataframe of SSH events.
-    
+
     Returns:
       Tuple[Priority, str, str]: Returns brute force analysis result as tuple.
         Priority: Priority of the findings.
         str: Brief summary of the findings.
-        str: Detailed information as markdown. 
+        str: Detailed information as markdown.
     """
     bfa = BruteForceAnalyzer()
 
@@ -416,7 +417,7 @@ def run(
     Args:
       evidence (Evidence object): The evidence being processed by analyzer.
       result (TurbiniaTaskResult): The object to place task results into.
-    
+
     Returns:
       TurbiniaTaskResult object.
     """

diff --git a/turbinia/workers/analysis/windows_acct.py b/turbinia/workers/analysis/windows_acct.py
@@ -29,6 +29,7 @@
 class WindowsAccountAnalysisTask(TurbiniaTask):
   """Task to analyze Windows accounts."""
 
+  # Does not need to be MOUNTED as this Task uses extract_artifacts()
   REQUIRED_STATES = [
       state.ATTACHED, state.CONTAINER_MOUNTED, state.DECOMPRESSED
   ]

diff --git a/turbinia/workers/analysis/wordpress_access.py b/turbinia/workers/analysis/wordpress_access.py
@@ -30,7 +30,8 @@
 class WordpressAccessLogAnalysisTask(TurbiniaTask):
   """Task to analyze Wordpress access logs."""
 
-  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
+  # Input Evidence is ExportedFileArtifact so does not need to be preprocessed.
+  REQUIRED_STATES = []
 
   timestamp_regex = re.compile(r'\[(?P<timestamp>.+)\]')
 

diff --git a/turbinia/workers/analysis/wordpress_creds.py b/turbinia/workers/analysis/wordpress_creds.py
@@ -34,6 +34,7 @@
 class WordpressCredsAnalysisTask(TurbiniaTask):
   """Task to analyze the credentials of a Wordpress instance."""
 
+  # Does not need to be MOUNTED as uses extract_files()
   REQUIRED_STATES = [
       state.ATTACHED, state.CONTAINER_MOUNTED, state.DECOMPRESSED
   ]

diff --git a/turbinia/workers/redis.py b/turbinia/workers/redis.py
@@ -29,7 +29,8 @@
 class RedisAnalysisTask(TurbiniaTask):
   """Task to analyze a Redis configuration file."""
 
-  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
+  # Input Evidence is ExportedFileArtifact so does not need to be pre-processed.
+  REQUIRED_STATES = []
 
   def run(self, evidence, result):
     """Run the Redis configuration analysis worker.

diff --git a/turbinia/workers/sshd.py b/turbinia/workers/sshd.py
@@ -29,7 +29,8 @@
 class SSHDAnalysisTask(TurbiniaTask):
   """Task to analyze a sshd_config file."""
 
-  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
+  # Input Evidence is ExportedFileArtifact so does not need to be pre-processed.
+  REQUIRED_STATES = []
 
   def run(self, evidence, result):
     """Run the sshd_config analysis worker.

diff --git a/turbinia/workers/tomcat.py b/turbinia/workers/tomcat.py
@@ -29,7 +29,8 @@
 class TomcatAnalysisTask(TurbiniaTask):
   """Task to analyze a Tomcat file."""
 
-  REQUIRED_STATES = [state.ATTACHED, state.CONTAINER_MOUNTED]
+  # Input Evidence is ExportedFileArtifact so does not need to be pre-processed.
+  REQUIRED_STATES = []
 
   def run(self, evidence, result):
     """Run the Tomcat analysis worker.