From a26ac60058ac9a697bd632a375e8c2f0f1c48131 Mon Sep 17 00:00:00 2001 From: Juan Leaniz Date: Fri, 9 Aug 2024 10:54:16 -0500 Subject: [PATCH] Added new API endpoints (logs, evidence) (#1527) * Added logs api endpoints * Updates to tail function and tests * Updates * Fix type annotations * Add test log file * API response fixes, unit tests * Fix unit test * Avoid double logging to file handler * Update logger * Make num_lines an optional query parameter instead * Int validation and fix unit test * Update OpenAPI spec * Updates per review comments --- test_data/turbinia_logs.log | 10 +++ turbinia/api/api_server_test.py | 44 +++++++++++- turbinia/api/openapi.yaml | 121 ++++++++++++++++++++++++++++++-- turbinia/api/routes/download.py | 9 +-- turbinia/api/routes/evidence.py | 36 +++++++++- turbinia/api/routes/logs.py | 58 +++++++++++++-- turbinia/api/routes/result.py | 39 ++++------ turbinia/api/utils.py | 33 +++++++++ turbinia/config/logger.py | 19 +++-- 9 files changed, 314 insertions(+), 55 deletions(-) create mode 100644 test_data/turbinia_logs.log diff --git a/test_data/turbinia_logs.log b/test_data/turbinia_logs.log new file mode 100644 index 000000000..56e08da24 --- /dev/null +++ b/test_data/turbinia_logs.log @@ -0,0 +1,10 @@ +2024-08-08 16:52:48,110 [INFO] uvicorn.error | Application startup complete. +2024-08-08 16:52:48,124 [INFO] uvicorn.error | Started server process [1232189] +2024-08-08 16:52:48,124 [INFO] uvicorn.error | Waiting for application startup. +2024-08-08 16:52:48,124 [INFO] uvicorn.error | Application startup complete. +2024-08-08 16:52:48,136 [INFO] uvicorn.error | Started server process [1232191] +2024-08-08 16:52:48,137 [INFO] uvicorn.error | Waiting for application startup. +2024-08-08 16:52:48,137 [INFO] uvicorn.error | Application startup complete. +2024-08-08 16:52:50,468 [INFO] uvicorn.access | 127.0.0.1:39730 - "GET /api/logs/api_server/100 HTTP/1.1" 200 +2024-08-08 16:53:26,287 [INFO] uvicorn.access | 127.0.0.1:53206 - "GET /api/logs/logs/ed6c72f82d42/100 HTTP/1.1" 200 +2024-08-08 16:53:51,856 [INFO] uvicorn.access | 127.0.0.1:33544 - "GET /openapi.yaml HTTP/1.1" 200 \ No newline at end of file diff --git a/turbinia/api/api_server_test.py b/turbinia/api/api_server_test.py index d5f484847..73c910788 100644 --- a/turbinia/api/api_server_test.py +++ b/turbinia/api/api_server_test.py @@ -14,8 +14,6 @@ # limitations under the License. """Turbinia API server unit tests.""" -import importlib - from collections import OrderedDict import datetime @@ -639,3 +637,45 @@ def testEvidenceUpload(self, mock_datetime, mock_get_attribute): mocked_file.assert_called_with(expected_evidence_2_path, 'wb') self.assertEqual(response.status_code, 200) self.assertEqual(json.loads(response.content), expected_response) + + def testDownloadEvidenceByIdNotFound(self): + """Test downloading non existent evidence by its UUID.""" + evidence_id = 'invalid_id' + response = self.client.get(f'/api/download/output/{evidence_id}') + self.assertEqual(response.status_code, 404) + + @mock.patch('turbinia.state_manager.RedisStateManager.get_evidence_data') + @mock.patch('turbinia.redis_client.RedisClient.key_exists') + def testDownloadEvidenceById(self, testKeyExists, testEvidenceData): + """Test downloading evidence by its UUID.""" + evidence_id = '084d5904f3d2412b99dc29ed34853a16' + testKeyExists.return_value = True + testEvidenceData.return_value = self._EVIDENCE_TEST_DATA + self._EVIDENCE_TEST_DATA['copyable'] = True + turbinia_config.OUTPUT_DIR = str( + os.path.dirname(os.path.realpath(__file__))) + response = self.client.get(f'/api/evidence/download/{evidence_id}') + filedir = os.path.dirname(os.path.realpath(__file__)) + test_data_dir = os.path.join(filedir, '..', '..', 'test_data') + with open(f'{test_data_dir}/artifact_disk.dd', 'rb') as f: + expected = f.read() + self.assertEqual(response.content, expected) + self._EVIDENCE_TEST_DATA['copyable'] = False + response = self.client.get(f'/api/evidence/download/{evidence_id}') + self.assertEqual(response.status_code, 400) + + def testGetTurbiniaLogs(self): + """Test the /logs API endpoint.""" + hostname = 'turbinia_logs' + filedir = os.path.dirname(os.path.realpath(__file__)) + test_data_dir = os.path.join(filedir, '..', '..', 'test_data') + turbinia_config.LOG_DIR = test_data_dir + with open(f'{test_data_dir}/turbinia_logs.log', 'rb') as f: + expected = f.read() + response = self.client.get(f'/api/logs/{hostname}?num_lines=10') + self.assertEqual(response.content, expected) + response = self.client.get(f'/api/logs/{hostname}?num_lines=5') + self.assertNotEqual(response.content, expected) + hostname = 'invalid_hostname' + response = self.client.get(f'/api/logs/{hostname}?num_lines=10') + self.assertEqual(response.status_code, 404) diff --git a/turbinia/api/openapi.yaml b/turbinia/api/openapi.yaml index e1d32999a..074a5e6a8 100644 --- a/turbinia/api/openapi.yaml +++ b/turbinia/api/openapi.yaml @@ -246,6 +246,36 @@ paths: summary: Get Task Report tags: - Turbinia Tasks + /api/evidence/download/{evidence_id}: + get: + description: "Retrieves an evidence in Redis by using its UUID.\n\nArgs:\n \ + \ evidence_id (str): The UUID of the evidence.\n\nRaises:\n HTTPException:\ + \ if the evidence is not found.\n\nReturns:\n FileResponse: The evidence\ + \ file." + operationId: download_by_evidence_id + parameters: + - in: path + name: evidence_id + required: true + schema: + title: Evidence Id + responses: + '200': + content: + application/octet-stream: + schema: + format: binary + type: string + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Download By Evidence Id + tags: + - Turbinia Evidence /api/evidence/query: get: description: "Queries evidence in Redis that have the specified attribute value.\n\ @@ -491,22 +521,99 @@ paths: summary: Read Jobs tags: - Turbinia Jobs - /api/logs/{query}: + /api/logs/api_server: get: description: Retrieve log data. - operationId: get_logs + operationId: get_api_server_logs + parameters: + - in: query + name: num_lines + required: false + schema: + default: 500 + exclusiveMinimum: 0.0 + title: Num Lines + type: integer + responses: + '200': + content: + application/text: + schema: + type: string + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Get Api Server Logs + tags: + - Turbinia Logs + /api/logs/server: + get: + description: Retrieve log data. + operationId: get_server_logs + parameters: + - in: query + name: num_lines + required: false + schema: + default: 500 + exclusiveMinimum: 0.0 + title: Num Lines + type: integer + responses: + '200': + content: + application/text: + schema: + type: string + description: Successful Response + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + description: Validation Error + summary: Get Server Logs + tags: + - Turbinia Logs + /api/logs/{hostname}: + get: + description: 'Retrieve log data. + + + Turbinia currently stores logs on plaintext files. The log files are named + + .log for each instance of a worker, server or API server. + + + In some deployments, the same file can contain all logs (e.g. running all + + services locally in the same container).' + operationId: get_turbinia_logs parameters: - in: path - name: query + name: hostname required: true schema: - title: Query + title: Hostname type: string + - in: query + name: num_lines + required: false + schema: + default: 500 + exclusiveMinimum: 0.0 + title: Num Lines + type: integer responses: '200': content: - application/json: - schema: {} + application/text: + schema: + type: string description: Successful Response '422': content: @@ -514,7 +621,7 @@ paths: schema: $ref: '#/components/schemas/HTTPValidationError' description: Validation Error - summary: Get Logs + summary: Get Turbinia Logs tags: - Turbinia Logs /api/request/: diff --git a/turbinia/api/routes/download.py b/turbinia/api/routes/download.py index 3c1c6cf4e..e9644e741 100644 --- a/turbinia/api/routes/download.py +++ b/turbinia/api/routes/download.py @@ -23,12 +23,13 @@ from turbinia import config as turbinia_config -#log = logging.getLogger(__name__) +log = logging.getLogger(__name__) router = APIRouter(prefix='/download', tags=['Turbinia Download']) -@router.get('/output/{file_path:path}') -async def download_output_path(request: Request, file_path): +@router.get('/output/{file_path:path}', response_class=FileResponse) +async def download_output_path( + request: Request, file_path: str) -> FileResponse: """Downloads output file path. Args: @@ -42,7 +43,7 @@ async def download_output_path(request: Request, file_path): requested_file = pathlib.Path(file_path).resolve() if requested_file.is_relative_to( config_output_dir) and requested_file.is_file(): - return FileResponse(requested_file) + return FileResponse(requested_file, media_type='application/octet-stream') raise HTTPException( status_code=404, detail='Access denied or file not found!') diff --git a/turbinia/api/routes/evidence.py b/turbinia/api/routes/evidence.py index 4ea0398e9..ea7352db6 100644 --- a/turbinia/api/routes/evidence.py +++ b/turbinia/api/routes/evidence.py @@ -21,7 +21,7 @@ from datetime import datetime from fastapi import HTTPException, APIRouter, UploadFile, Query, Form from fastapi.requests import Request -from fastapi.responses import JSONResponse +from fastapi.responses import JSONResponse, FileResponse from typing import List, Annotated from turbinia import evidence @@ -263,3 +263,37 @@ async def upload_evidence( else: evidences.append(file_info) return JSONResponse(content=evidences, status_code=200) + + +@router.get('/download/{evidence_id}', response_class=FileResponse) +async def download_by_evidence_id( + request: Request, evidence_id) -> FileResponse: + """Downloads an evidence file based in its UUID. + + Args: + evidence_id (str): The UUID of the evidence. + + Raises: + HTTPException: if the evidence is not found. + + Returns: + FileResponse: The evidence file. + """ + evidence_key = redis_manager.redis_client.build_key_name( + 'evidence', evidence_id) + if redis_manager.redis_client.key_exists(evidence_key): + data: dict = redis_manager.get_evidence_data(evidence_id) + file_path: str = None + if not data['copyable']: + raise HTTPException(status_code=400, detail='Evidence is not copyable.') + if data['source_path']: + file_path = data['source_path'] + elif data['local_path']: + file_path = data['local_path'] + + if file_path and os.path.exists(file_path): + filename = os.path.basename(file_path) + return FileResponse(file_path, filename=filename) + raise HTTPException( + status_code=404, + detail=f'UUID {evidence_id} not found or it had no associated evidence.') diff --git a/turbinia/api/routes/logs.py b/turbinia/api/routes/logs.py index 00535b0b8..149a0f4fa 100644 --- a/turbinia/api/routes/logs.py +++ b/turbinia/api/routes/logs.py @@ -15,18 +15,68 @@ """Turbinia API - Logs router""" import logging +import os -from fastapi import APIRouter -from fastapi.responses import JSONResponse +from pathlib import Path + +from fastapi import APIRouter, Query +from fastapi.responses import JSONResponse, PlainTextResponse from fastapi.requests import Request +from turbinia import config +from turbinia.api import utils log = logging.getLogger(__name__) router = APIRouter(prefix='/logs', tags=['Turbinia Logs']) -@router.get('/{query}') -async def get_logs(request: Request, query: str): +@router.get('/server') +async def get_server_logs( + request: Request, num_lines: int | None = Query(default=500, gt=0) +) -> PlainTextResponse: """Retrieve log data.""" return JSONResponse( content={'detail': 'Not implemented yet.'}, status_code=200) + + +@router.get('/api_server') +async def get_api_server_logs( + request: Request, num_lines: int | None = Query(default=500, gt=0) +) -> PlainTextResponse: + """Retrieve log data.""" + hostname = os.uname().nodename + log_name = f'{hostname}.log' + log_path = Path(config.LOG_DIR, log_name) + log_lines = utils.tail_log(log_path, num_lines) + if log_path: + return PlainTextResponse(log_lines) + return JSONResponse( + content={'detail': f'No logs found for {hostname}'}, status_code=404) + + +@router.get('/{hostname}') +async def get_turbinia_logs( + request: Request, hostname: str, num_lines: int | None = Query( + default=500, gt=0) +) -> PlainTextResponse: + """Retrieve log data. + + Turbinia currently stores logs on plaintext files. The log files are named + .log for each instance of a worker, server or API server. + + In some deployments, the same file can contain all logs (e.g. running all + services locally in the same container). + """ + if not hostname: + return JSONResponse(content={'detail': 'Invalid hostname'}, status_code=404) + + if 'NODE_NAME' in os.environ: + log_name = f'{hostname}.{os.environ["NODE_NAME"]!s}' + else: + log_name = f'{hostname}.log' + log_path = Path(config.LOG_DIR, log_name) + log_lines = utils.tail_log(log_path, num_lines) + if log_lines: + return PlainTextResponse(log_lines) + return JSONResponse( + content={'detail': f'No logs found for {hostname}'}, status_code=404) diff --git a/turbinia/api/routes/result.py b/turbinia/api/routes/result.py index c727c20ca..928a7949b 100644 --- a/turbinia/api/routes/result.py +++ b/turbinia/api/routes/result.py @@ -21,29 +21,15 @@ from fastapi.responses import StreamingResponse, FileResponse from fastapi.requests import Request +from turbinia import TurbiniaException from turbinia.api import utils as api_utils log = logging.getLogger(__name__) router = APIRouter(prefix='/result', tags=['Turbinia Request Results']) -ATTACHMENT_RESPONSE = { - '200': { - 'content': { - 'application/octet-stream': { - 'schema': { - 'type': 'string', - 'format': 'binary' - } - } - } - } -} - - -@router.get( - '/task/{task_id}', response_class=StreamingResponse, - responses=ATTACHMENT_RESPONSE) + +@router.get('/task/{task_id}', response_class=StreamingResponse) async def get_task_output(request: Request, task_id: str) -> StreamingResponse: """Retrieves a task's output files.""" # Get the request_id for the task. This is needed to find the right path. @@ -59,15 +45,13 @@ async def get_task_output(request: Request, task_id: str) -> StreamingResponse: api_utils.create_tarball(output_path), headers={ 'Content-Disposition': f'attachment;filename={task_id}.tgz', 'Transfer-Encoding': 'chunked' - }) + }, media_type='application/octet-stream') else: raise HTTPException( status_code=404, detail='The requested file was not found.') -@router.get( - '/request/{request_id}', response_class=StreamingResponse, - responses=ATTACHMENT_RESPONSE) +@router.get('/request/{request_id}', response_class=StreamingResponse) async def get_request_output( request: Request, request_id: str) -> StreamingResponse: """Retrieve request output.""" @@ -81,23 +65,24 @@ async def get_request_output( api_utils.create_tarball(request_output_path), headers={ 'Content-Disposition': f'attachment;filename={request_id}.tgz', 'Transfer-Encoding': 'chunked' - }) + }, media_type='application/octet-stream') else: raise HTTPException( status_code=404, detail='The requested file was not found.') -@router.get( - '/plasofile/{task_id}', response_class=FileResponse, - responses=ATTACHMENT_RESPONSE) +@router.get('/plasofile/{task_id}', response_class=FileResponse) async def get_plaso_file(request: Request, task_id: str) -> FileResponse: """Retrieves a task's Plaso file.""" if not task_id: raise HTTPException(status_code=400, detail='Task identifier not provided.') - plaso_file_path = api_utils.get_plaso_file_path(task_id) - filename = f'{task_id}.plaso' + try: + plaso_file_path = api_utils.get_plaso_file_path(task_id) + except TurbiniaException as exception: + raise HTTPException(status_code=404, detail=str(exception)) from exception + filename = f'{task_id}.plaso' if os.path.exists(plaso_file_path): log.info(f'Sending {plaso_file_path} to client.') return FileResponse(plaso_file_path, filename=filename) diff --git a/turbinia/api/utils.py b/turbinia/api/utils.py index a90ce4199..30ad0d37a 100644 --- a/turbinia/api/utils.py +++ b/turbinia/api/utils.py @@ -189,3 +189,36 @@ async def create_tarball(output_path: str) -> AsyncGenerator[bytes, Any]: # Yield end-of-archive marker. yield stream.pop() + + +def tail_log(log_path, max_lines=500) -> str: + """Reads a log file and returns the last max_lines.""" + if not os.path.exists(log_path) and not os.path.isfile(log_path): + return '' + + fsize = os.path.getsize(log_path) + buffsize = 8192 + nlines = 0 + log_lines = [] + + with open(log_path, 'rb') as file: + offset = fsize + while nlines < max_lines: + diff = offset - buffsize + if diff > 0: + offset = file.seek(offset - buffsize) + + current_lines = file.readlines() + nlines += len(current_lines) + + if nlines > max_lines: + current_lines = current_lines[-max_lines:] + + log_lines.extend(current_lines) + if diff > 0: + offset = file.seek(-buffsize, 1) + + for i in range(len(log_lines)): + log_lines[i] = log_lines[i].decode() + + return ''.join(log_lines) diff --git a/turbinia/config/logger.py b/turbinia/config/logger.py index e4a5bc14f..cbc1e5a39 100644 --- a/turbinia/config/logger.py +++ b/turbinia/config/logger.py @@ -40,9 +40,12 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None): 'ignore', 'Your application has authenticated using end user credentials') logger = logging.getLogger('turbinia') + uvicorn_error = logging.getLogger('uvicorn.error') + uvicorn_access = logging.getLogger('uvicorn.access') # Eliminate double logging from root logger logger.propagate = False - + uvicorn_error.propagate = False + uvicorn_access.propagate = False # We only need a handler if one of that type doesn't exist already if logger.handlers: for handler in logger.handlers: @@ -82,6 +85,8 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None): file_handler.setFormatter(formatter) file_handler.setLevel(logging.DEBUG) logger.addHandler(file_handler) + uvicorn_error.addHandler(file_handler) + uvicorn_access.addHandler(file_handler) console_handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( @@ -89,7 +94,8 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None): console_handler.setFormatter(formatter) if need_stream_handler: logger.addHandler(console_handler) - + uvicorn_error.addHandler(console_handler) + uvicorn_access.addHandler(console_handler) # Configure the root logger to use exactly our handlers because other modules # like PSQ use this, and we want to see log messages from it when executing # from CLI. @@ -97,16 +103,9 @@ def setup(need_file_handler=True, need_stream_handler=True, log_file_path=None): for handler in root_log.handlers: root_log.removeHandler(handler) root_log.addHandler(console_handler) + if need_file_handler: root_log.addHandler(file_handler) - # Set up uvicorn loggers - uvicron_error = logging.getLogger('uvicorn.error') - uvicorn_access = logging.getLogger('uvicorn.access') - for handler in logger.handlers: - if isinstance(handler, logging.FileHandler): - uvicron_error.addHandler(handler) - uvicorn_access.addHandler(handler) - # Set filelock logging to ERROR due to log spam logging.getLogger('filelock').setLevel(logging.ERROR)