From a26ac60058ac9a697bd632a375e8c2f0f1c48131 Mon Sep 17 00:00:00 2001
From: Juan Leaniz <leaniz@google.com>
Date: Fri, 9 Aug 2024 10:54:16 -0500
Subject: [PATCH] Added new API endpoints (logs, evidence) (#1527)

* Added logs api endpoints

* Updates to tail function and tests

* Updates

* Fix type annotations

* Add test log file

* API response fixes, unit tests

* Fix unit test

* Avoid double logging to file handler

* Update logger

* Make num_lines an optional query parameter instead

* Int validation and fix unit test

* Update OpenAPI spec

* Updates per review comments
---
 test_data/turbinia_logs.log     |  10 +++
 turbinia/api/api_server_test.py |  44 +++++++++++-
 turbinia/api/openapi.yaml       | 121 ++++++++++++++++++++++++++++++--
 turbinia/api/routes/download.py |   9 +--
 turbinia/api/routes/evidence.py |  36 +++++++++-
 turbinia/api/routes/logs.py     |  58 +++++++++++++--
 turbinia/api/routes/result.py   |  39 ++++------
 turbinia/api/utils.py           |  33 +++++++++
 turbinia/config/logger.py       |  19 +++--
 9 files changed, 314 insertions(+), 55 deletions(-)
 create mode 100644 test_data/turbinia_logs.log

diff --git a/test_data/turbinia_logs.log b/test_data/turbinia_logs.log
new file mode 100644
index 000000000..56e08da24
--- /dev/null
+++ b/test_data/turbinia_logs.log
@@ -0,0 +1,10 @@
+2024-08-08 16:52:48,110 [INFO] uvicorn.error | Application startup complete.
+2024-08-08 16:52:48,124 [INFO] uvicorn.error | Started server process [1232189]
+2024-08-08 16:52:48,124 [INFO] uvicorn.error | Waiting for application startup.
+2024-08-08 16:52:48,124 [INFO] uvicorn.error | Application startup complete.
+2024-08-08 16:52:48,136 [INFO] uvicorn.error | Started server process [1232191]
+2024-08-08 16:52:48,137 [INFO] uvicorn.error | Waiting for application startup.
+2024-08-08 16:52:48,137 [INFO] uvicorn.error | Application startup complete.
+2024-08-08 16:52:50,468 [INFO] uvicorn.access | 127.0.0.1:39730 - "GET /api/logs/api_server/100 HTTP/1.1" 200
+2024-08-08 16:53:26,287 [INFO] uvicorn.access | 127.0.0.1:53206 - "GET /api/logs/logs/ed6c72f82d42/100 HTTP/1.1" 200
+2024-08-08 16:53:51,856 [INFO] uvicorn.access | 127.0.0.1:33544 - "GET /openapi.yaml HTTP/1.1" 200
\ No newline at end of file
diff --git a/turbinia/api/api_server_test.py b/turbinia/api/api_server_test.py
index d5f484847..73c910788 100644
--- a/turbinia/api/api_server_test.py
+++ b/turbinia/api/api_server_test.py
@@ -14,8 +14,6 @@
 # limitations under the License.
 """Turbinia API server unit tests."""
 
-import importlib
-
 from collections import OrderedDict
 
 import datetime
@@ -639,3 +637,45 @@ def testEvidenceUpload(self, mock_datetime, mock_get_attribute):
       mocked_file.assert_called_with(expected_evidence_2_path, 'wb')
     self.assertEqual(response.status_code, 200)
     self.assertEqual(json.loads(response.content), expected_response)
+
+  def testDownloadEvidenceByIdNotFound(self):
+    """Test downloading non existent evidence by its UUID."""
+    evidence_id = 'invalid_id'
+    response = self.client.get(f'/api/download/output/{evidence_id}')
+    self.assertEqual(response.status_code, 404)
+
+  @mock.patch('turbinia.state_manager.RedisStateManager.get_evidence_data')
+  @mock.patch('turbinia.redis_client.RedisClient.key_exists')
+  def testDownloadEvidenceById(self, testKeyExists, testEvidenceData):
+    """Test downloading evidence by its UUID."""
+    evidence_id = '084d5904f3d2412b99dc29ed34853a16'
+    testKeyExists.return_value = True
+    testEvidenceData.return_value = self._EVIDENCE_TEST_DATA
+    self._EVIDENCE_TEST_DATA['copyable'] = True
+    turbinia_config.OUTPUT_DIR = str(
+        os.path.dirname(os.path.realpath(__file__)))
+    response = self.client.get(f'/api/evidence/download/{evidence_id}')
+    filedir = os.path.dirname(os.path.realpath(__file__))
+    test_data_dir = os.path.join(filedir, '..', '..', 'test_data')
+    with open(f'{test_data_dir}/artifact_disk.dd', 'rb') as f:
+      expected = f.read()
+    self.assertEqual(response.content, expected)
+    self._EVIDENCE_TEST_DATA['copyable'] = False
+    response = self.client.get(f'/api/evidence/download/{evidence_id}')
+    self.assertEqual(response.status_code, 400)
+
+  def testGetTurbiniaLogs(self):
+    """Test the /logs API endpoint."""
+    hostname = 'turbinia_logs'
+    filedir = os.path.dirname(os.path.realpath(__file__))
+    test_data_dir = os.path.join(filedir, '..', '..', 'test_data')
+    turbinia_config.LOG_DIR = test_data_dir
+    with open(f'{test_data_dir}/turbinia_logs.log', 'rb') as f:
+      expected = f.read()
+    response = self.client.get(f'/api/logs/{hostname}?num_lines=10')
+    self.assertEqual(response.content, expected)
+    response = self.client.get(f'/api/logs/{hostname}?num_lines=5')
+    self.assertNotEqual(response.content, expected)
+    hostname = 'invalid_hostname'
+    response = self.client.get(f'/api/logs/{hostname}?num_lines=10')
+    self.assertEqual(response.status_code, 404)
diff --git a/turbinia/api/openapi.yaml b/turbinia/api/openapi.yaml
index e1d32999a..074a5e6a8 100644
--- a/turbinia/api/openapi.yaml
+++ b/turbinia/api/openapi.yaml
@@ -246,6 +246,36 @@ paths:
       summary: Get Task Report
       tags:
       - Turbinia Tasks
+  /api/evidence/download/{evidence_id}:
+    get:
+      description: "Retrieves an evidence in Redis by using its UUID.\n\nArgs:\n \
+        \ evidence_id (str): The UUID of the evidence.\n\nRaises:\n  HTTPException:\
+        \ if the evidence is not found.\n\nReturns:\n  FileResponse: The evidence\
+        \ file."
+      operationId: download_by_evidence_id
+      parameters:
+      - in: path
+        name: evidence_id
+        required: true
+        schema:
+          title: Evidence Id
+      responses:
+        '200':
+          content:
+            application/octet-stream:
+              schema:
+                format: binary
+                type: string
+          description: Successful Response
+        '422':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+          description: Validation Error
+      summary: Download By Evidence Id
+      tags:
+      - Turbinia Evidence
   /api/evidence/query:
     get:
       description: "Queries evidence in Redis that have the specified attribute value.\n\
@@ -491,22 +521,99 @@ paths:
       summary: Read Jobs
       tags:
       - Turbinia Jobs
-  /api/logs/{query}:
+  /api/logs/api_server:
     get:
       description: Retrieve log data.
-      operationId: get_logs
+      operationId: get_api_server_logs
+      parameters:
+      - in: query
+        name: num_lines
+        required: false
+        schema:
+          default: 500
+          exclusiveMinimum: 0.0
+          title: Num Lines
+          type: integer
+      responses:
+        '200':
+          content:
+            application/text:
+              schema:
+                type: string
+          description: Successful Response
+        '422':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+          description: Validation Error
+      summary: Get Api Server Logs
+      tags:
+      - Turbinia Logs
+  /api/logs/server:
+    get:
+      description: Retrieve log data.
+      operationId: get_server_logs
+      parameters:
+      - in: query
+        name: num_lines
+        required: false
+        schema:
+          default: 500
+          exclusiveMinimum: 0.0
+          title: Num Lines
+          type: integer
+      responses:
+        '200':
+          content:
+            application/text:
+              schema:
+                type: string
+          description: Successful Response
+        '422':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+          description: Validation Error
+      summary: Get Server Logs
+      tags:
+      - Turbinia Logs
+  /api/logs/{hostname}:
+    get:
+      description: 'Retrieve log data.
+
+
+        Turbinia currently stores logs on plaintext files. The log files are named
+
+        <hostname>.log for each instance of a worker, server or API server.
+
+
+        In some deployments, the same file can contain all logs (e.g. running all
+
+        services locally in the same container).'
+      operationId: get_turbinia_logs
       parameters:
       - in: path
-        name: query
+        name: hostname
         required: true
         schema:
-          title: Query
+          title: Hostname
           type: string
+      - in: query
+        name: num_lines
+        required: false
+        schema:
+          default: 500
+          exclusiveMinimum: 0.0
+          title: Num Lines
+          type: integer
       responses:
         '200':
           content:
-            application/json:
-              schema: {}
+            application/text:
+              schema:
+                type: string
           description: Successful Response
         '422':
           content:
@@ -514,7 +621,7 @@ paths:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
           description: Validation Error
-      summary: Get Logs
+      summary: Get Turbinia Logs
       tags:
       - Turbinia Logs
   /api/request/:
diff --git a/turbinia/api/routes/download.py b/turbinia/api/routes/download.py
index 3c1c6cf4e..e9644e741 100644
--- a/turbinia/api/routes/download.py
+++ b/turbinia/api/routes/download.py
@@ -23,12 +23,13 @@
 
 from turbinia import config as turbinia_config
 
-#log = logging.getLogger(__name__)
+log = logging.getLogger(__name__)
 router = APIRouter(prefix='/download', tags=['Turbinia Download'])
 
 
-@router.get('/output/{file_path:path}')
-async def download_output_path(request: Request, file_path):
+@router.get('/output/{file_path:path}', response_class=FileResponse)
+async def download_output_path(
+    request: Request, file_path: str) -> FileResponse:
   """Downloads output file path.
   
   Args:
@@ -42,7 +43,7 @@ async def download_output_path(request: Request, file_path):
   requested_file = pathlib.Path(file_path).resolve()
   if requested_file.is_relative_to(
       config_output_dir) and requested_file.is_file():
-    return FileResponse(requested_file)
+    return FileResponse(requested_file, media_type='application/octet-stream')
 
   raise HTTPException(
       status_code=404, detail='Access denied or file not found!')
diff --git a/turbinia/api/routes/evidence.py b/turbinia/api/routes/evidence.py
index 4ea0398e9..ea7352db6 100644
--- a/turbinia/api/routes/evidence.py
+++ b/turbinia/api/routes/evidence.py
@@ -21,7 +21,7 @@
 from datetime import datetime
 from fastapi import HTTPException, APIRouter, UploadFile, Query, Form
 from fastapi.requests import Request
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, FileResponse
 from typing import List, Annotated
 
 from turbinia import evidence
@@ -263,3 +263,37 @@ async def upload_evidence(
     else:
       evidences.append(file_info)
   return JSONResponse(content=evidences, status_code=200)
+
+
+@router.get('/download/{evidence_id}', response_class=FileResponse)
+async def download_by_evidence_id(
+    request: Request, evidence_id) -> FileResponse:
+  """Downloads an evidence file based in its UUID.
+
+  Args:
+    evidence_id (str): The UUID of the evidence.
+  
+  Raises:
+    HTTPException: if the evidence is not found.
+
+  Returns:
+    FileResponse: The evidence file.
+  """
+  evidence_key = redis_manager.redis_client.build_key_name(
+      'evidence', evidence_id)
+  if redis_manager.redis_client.key_exists(evidence_key):
+    data: dict = redis_manager.get_evidence_data(evidence_id)
+    file_path: str = None
+    if not data['copyable']:
+      raise HTTPException(status_code=400, detail='Evidence is not copyable.')
+    if data['source_path']:
+      file_path = data['source_path']
+    elif data['local_path']:
+      file_path = data['local_path']
+
+    if file_path and os.path.exists(file_path):
+      filename = os.path.basename(file_path)
+      return FileResponse(file_path, filename=filename)
+  raise HTTPException(
+      status_code=404,
+      detail=f'UUID {evidence_id} not found or it had no associated evidence.')
diff --git a/turbinia/api/routes/logs.py b/turbinia/api/routes/logs.py
index 00535b0b8..149a0f4fa 100644
--- a/turbinia/api/routes/logs.py
+++ b/turbinia/api/routes/logs.py
@@ -15,18 +15,68 @@
 """Turbinia API - Logs router"""
 
 import logging
+import os
 
-from fastapi import APIRouter
-from fastapi.responses import JSONResponse
+from pathlib import Path
+
+from fastapi import APIRouter, Query
+from fastapi.responses import JSONResponse, PlainTextResponse
 from fastapi.requests import Request
+from turbinia import config
+from turbinia.api import utils
 
 log = logging.getLogger(__name__)
 
 router = APIRouter(prefix='/logs', tags=['Turbinia Logs'])
 
 
-@router.get('/{query}')
-async def get_logs(request: Request, query: str):
+@router.get('/server')
+async def get_server_logs(
+    request: Request, num_lines: int | None = Query(default=500, gt=0)
+) -> PlainTextResponse:
   """Retrieve log data."""
   return JSONResponse(
       content={'detail': 'Not implemented yet.'}, status_code=200)
+
+
+@router.get('/api_server')
+async def get_api_server_logs(
+    request: Request, num_lines: int | None = Query(default=500, gt=0)
+) -> PlainTextResponse:
+  """Retrieve log data."""
+  hostname = os.uname().nodename
+  log_name = f'{hostname}.log'
+  log_path = Path(config.LOG_DIR, log_name)
+  log_lines = utils.tail_log(log_path, num_lines)
+  if log_path:
+    return PlainTextResponse(log_lines)
+  return JSONResponse(
+      content={'detail': f'No logs found for {hostname}'}, status_code=404)
+
+
+@router.get('/{hostname}')
+async def get_turbinia_logs(
+    request: Request, hostname: str, num_lines: int | None = Query(
+        default=500, gt=0)
+) -> PlainTextResponse:
+  """Retrieve log data.
+  
+  Turbinia currently stores logs on plaintext files. The log files are named
+  <hostname>.log for each instance of a worker, server or API server.
+
+  In some deployments, the same file can contain all logs (e.g. running all
+  services locally in the same container).
+  """
+  if not hostname:
+    return JSONResponse(content={'detail': 'Invalid hostname'}, status_code=404)
+
+  if 'NODE_NAME' in os.environ:
+    log_name = f'{hostname}.{os.environ["NODE_NAME"]!s}'
+  else:
+    log_name = f'{hostname}.log'
+  log_path = Path(config.LOG_DIR, log_name)
+  log_lines = utils.tail_log(log_path, num_lines)
+  if log_lines:
+    return PlainTextResponse(log_lines)
+  return JSONResponse(
+      content={'detail': f'No logs found for {hostname}'}, status_code=404)
diff --git a/turbinia/api/routes/result.py b/turbinia/api/routes/result.py
index c727c20ca..928a7949b 100644
--- a/turbinia/api/routes/result.py
+++ b/turbinia/api/routes/result.py
@@ -21,29 +21,15 @@
 from fastapi.responses import StreamingResponse, FileResponse
 from fastapi.requests import Request
 
+from turbinia import TurbiniaException
 from turbinia.api import utils as api_utils
 
 log = logging.getLogger(__name__)
 
 router = APIRouter(prefix='/result', tags=['Turbinia Request Results'])
 
-ATTACHMENT_RESPONSE = {
-    '200': {
-        'content': {
-            'application/octet-stream': {
-                'schema': {
-                    'type': 'string',
-                    'format': 'binary'
-                }
-            }
-        }
-    }
-}
-
-
-@router.get(
-    '/task/{task_id}', response_class=StreamingResponse,
-    responses=ATTACHMENT_RESPONSE)
+
+@router.get('/task/{task_id}', response_class=StreamingResponse)
 async def get_task_output(request: Request, task_id: str) -> StreamingResponse:
   """Retrieves a task's output files."""
   # Get the request_id for the task. This is needed to find the right path.
@@ -59,15 +45,13 @@ async def get_task_output(request: Request, task_id: str) -> StreamingResponse:
         api_utils.create_tarball(output_path), headers={
             'Content-Disposition': f'attachment;filename={task_id}.tgz',
             'Transfer-Encoding': 'chunked'
-        })
+        }, media_type='application/octet-stream')
   else:
     raise HTTPException(
         status_code=404, detail='The requested file was not found.')
 
 
-@router.get(
-    '/request/{request_id}', response_class=StreamingResponse,
-    responses=ATTACHMENT_RESPONSE)
+@router.get('/request/{request_id}', response_class=StreamingResponse)
 async def get_request_output(
     request: Request, request_id: str) -> StreamingResponse:
   """Retrieve request output."""
@@ -81,23 +65,24 @@ async def get_request_output(
         api_utils.create_tarball(request_output_path), headers={
             'Content-Disposition': f'attachment;filename={request_id}.tgz',
             'Transfer-Encoding': 'chunked'
-        })
+        }, media_type='application/octet-stream')
   else:
     raise HTTPException(
         status_code=404, detail='The requested file was not found.')
 
 
-@router.get(
-    '/plasofile/{task_id}', response_class=FileResponse,
-    responses=ATTACHMENT_RESPONSE)
+@router.get('/plasofile/{task_id}', response_class=FileResponse)
 async def get_plaso_file(request: Request, task_id: str) -> FileResponse:
   """Retrieves a task's Plaso file."""
   if not task_id:
     raise HTTPException(status_code=400, detail='Task identifier not provided.')
 
-  plaso_file_path = api_utils.get_plaso_file_path(task_id)
-  filename = f'{task_id}.plaso'
+  try:
+    plaso_file_path = api_utils.get_plaso_file_path(task_id)
+  except TurbiniaException as exception:
+    raise HTTPException(status_code=404, detail=str(exception)) from exception
 
+  filename = f'{task_id}.plaso'
   if os.path.exists(plaso_file_path):
     log.info(f'Sending {plaso_file_path} to client.')
     return FileResponse(plaso_file_path, filename=filename)
diff --git a/turbinia/api/utils.py b/turbinia/api/utils.py
index a90ce4199..30ad0d37a 100644
--- a/turbinia/api/utils.py
+++ b/turbinia/api/utils.py
@@ -189,3 +189,36 @@ async def create_tarball(output_path: str) -> AsyncGenerator[bytes, Any]:
 
     # Yield end-of-archive marker.
     yield stream.pop()
+
+
+def tail_log(log_path, max_lines=500) -> str:
+  """Reads a log file and returns the last max_lines."""
+  if not os.path.exists(log_path) and not os.path.isfile(log_path):
+    return ''
+
+  fsize = os.path.getsize(log_path)
+  buffsize = 8192
+  nlines = 0
+  log_lines = []
+
+  with open(log_path, 'rb') as file:
+    offset = fsize
+    while nlines < max_lines:
+      diff = offset - buffsize
+      if diff > 0:
+        offset = file.seek(offset - buffsize)
+
+      current_lines = file.readlines()
+      nlines += len(current_lines)
+
+      if nlines > max_lines:
+        current_lines = current_lines[-max_lines:]
+
+      log_lines.extend(current_lines)
+      if diff > 0:
+        offset = file.seek(-buffsize, 1)
+
+  for i in range(len(log_lines)):
+    log_lines[i] = log_lines[i].decode()
+
+  return ''.join(log_lines)
diff --git a/turbinia/config/logger.py b/turbinia/config/logger.py
index e4a5bc14f..cbc1e5a39 100644
--- a/turbinia/config/logger.py
+++ b/turbinia/config/logger.py
@@ -40,9 +40,12 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None):
       'ignore', 'Your application has authenticated using end user credentials')
 
   logger = logging.getLogger('turbinia')
+  uvicorn_error = logging.getLogger('uvicorn.error')
+  uvicorn_access = logging.getLogger('uvicorn.access')
   # Eliminate double logging from root logger
   logger.propagate = False
-
+  uvicorn_error.propagate = False
+  uvicorn_access.propagate = False
   # We only need a handler if one of that type doesn't exist already
   if logger.handlers:
     for handler in logger.handlers:
@@ -82,6 +85,8 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None):
     file_handler.setFormatter(formatter)
     file_handler.setLevel(logging.DEBUG)
     logger.addHandler(file_handler)
+    uvicorn_error.addHandler(file_handler)
+    uvicorn_access.addHandler(file_handler)
 
   console_handler = logging.StreamHandler(sys.stdout)
   formatter = logging.Formatter(
@@ -89,7 +94,8 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None):
   console_handler.setFormatter(formatter)
   if need_stream_handler:
     logger.addHandler(console_handler)
-
+    uvicorn_error.addHandler(console_handler)
+    uvicorn_access.addHandler(console_handler)
   # Configure the root logger to use exactly our handlers because other modules
   # like PSQ use this, and we want to see log messages from it when executing
   # from CLI.
@@ -97,16 +103,9 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None):
   for handler in root_log.handlers:
     root_log.removeHandler(handler)
   root_log.addHandler(console_handler)
+
   if need_file_handler:
     root_log.addHandler(file_handler)
 
-  # Set up uvicorn loggers
-  uvicron_error = logging.getLogger('uvicorn.error')
-  uvicorn_access = logging.getLogger('uvicorn.access')
-  for handler in logger.handlers:
-    if isinstance(handler, logging.FileHandler):
-      uvicron_error.addHandler(handler)
-      uvicorn_access.addHandler(handler)
-
   # Set filelock logging to ERROR due to log spam
   logging.getLogger('filelock').setLevel(logging.ERROR)