diff --git a/src/cautiousrobot/__init__.py b/src/cautiousrobot/__init__.py index 5340429..b8bbe50 100644 --- a/src/cautiousrobot/__init__.py +++ b/src/cautiousrobot/__init__.py @@ -1,5 +1,6 @@ from cautiousrobot.__main__ import download_images from cautiousrobot.buddy_check import BuddyCheck +from cautiousrobot.utils import downsample_and_save_image # Create instance of the class @@ -9,4 +10,4 @@ buddy_check_instance.validate_download buddy_check_instance.check_alignment -__all__ = ["download_images", "validate_download", "check_alignment"] +__all__ = ["download_images", "validate_download", "check_alignment", "downsample_and_save_image"] diff --git a/src/cautiousrobot/__main__.py b/src/cautiousrobot/__main__.py index 7402dcc..6c9781c 100644 --- a/src/cautiousrobot/__main__.py +++ b/src/cautiousrobot/__main__.py @@ -18,7 +18,7 @@ import time from PIL import Image from sumbuddy import get_checksums -from cautiousrobot.utils import log_response, update_log, process_csv +from cautiousrobot.utils import log_response, update_log, process_csv, downsample_and_save_image from cautiousrobot.buddy_check import BuddyCheck @@ -88,99 +88,104 @@ def download_images(data, img_dir, log_filepath, error_log_filepath, filename = image_name = data[filename][i] if subfolders: image_dir_path = img_dir + "/" + data[subfolders][i] - - # get image from url - url = data[file_url][i] - if not url: - log_errors = log_response(log_errors, - index = i, - image = image_name, - url = url, - response_code = "no url") - update_log(log = log_errors, index = i, filepath = error_log_filepath) - - else: - #download the image - redo = True - max_redos = retry - while redo and max_redos > 0: - try: - response = requests.get(url, stream = True) - except Exception as e: - redo = True - max_redos -= 1 - if max_redos <= 0: - log_errors = log_response(log_errors, - index = i, - image = image_name, - url = url, - response_code = str(e)) - update_log(log = log_errors, index = i, filepath = error_log_filepath) - continue - - if response.status_code == 200: - redo = False - # log status - log_data = log_response(log_data, + + if not os.path.exists(image_dir_path + "/" + image_name): + + # get image from url + url = data[file_url][i] + if not url: + log_errors = log_response(log_errors, index = i, image = image_name, - url = url, - response_code = response.status_code - ) - update_log(log = log_data, index = i, filepath = log_filepath) - - #create the appropriate folders if necessary - - if os.path.exists(image_dir_path) != True: - os.makedirs(image_dir_path, exist_ok=False) - - # save full size image to appropriate folder - with open(f"{image_dir_path}/{image_name}", "wb") as out_file: - shutil.copyfileobj(response.raw, out_file) - - if downsample: - downsample_dir_path = downsample_path - if subfolders: - downsample_dir_path = downsample_path + "/" + data[subfolders][i] - if os.path.exists(downsample_dir_path) != True: - os.makedirs(downsample_dir_path, exist_ok=False) - # Downsample & save image - try: - img = Image.open(f"{image_dir_path}/{image_name}") - img.resize((downsample, downsample)).save(downsample_dir_path + "/" + image_name) - except Exception as e: - print(e) + file_path = url, + response_code = "no url") + update_log(log = log_errors, index = i, filepath = error_log_filepath) + + else: + #download the image + redo = True + max_redos = retry + while redo and max_redos > 0: + try: + response = requests.get(url, stream = True) + except Exception as e: + redo = True + max_redos -= 1 + if max_redos <= 0: log_errors = log_response(log_errors, - index = i, - image = "downsized_" + image_name, - url = url, - response_code = str(e)) + index = i, + image = image_name, + file_path = url, + response_code = str(e)) update_log(log = log_errors, index = i, filepath = error_log_filepath) - - # check for too many requests - elif response.status_code in REDO_CODE_LIST: - redo = True - max_redos -= 1 - if max_redos <= 0: - log_errors = log_response(log_errors, - index = i, - image = image_name, - url = url, - response_code = response.status_code) - update_log(log = log_errors, index = i, filepath = error_log_filepath) + continue + + if response.status_code == 200: + redo = False + # log status + log_data = log_response(log_data, + index = i, + image = image_name, + file_path = url, + response_code = response.status_code + ) + update_log(log = log_data, index = i, filepath = log_filepath) + + #create the appropriate folders if necessary + + if os.path.exists(image_dir_path) != True: + os.makedirs(image_dir_path, exist_ok=False) + + # save full size image to appropriate folder + with open(f"{image_dir_path}/{image_name}", "wb") as out_file: + shutil.copyfileobj(response.raw, out_file) - else: - time.sleep(wait) - else: #other fail, eg. 404 - redo = False - log_errors = log_response(log_errors, + # check for too many requests + elif response.status_code in REDO_CODE_LIST: + redo = True + max_redos -= 1 + if max_redos <= 0: + log_errors = log_response(log_errors, index = i, image = image_name, - url = url, + file_path= url, response_code = response.status_code) - update_log(log = log_errors, index = i, filepath = error_log_filepath) + update_log(log = log_errors, index = i, filepath = error_log_filepath) + + else: + time.sleep(wait) + else: #other fail, eg. 404 + redo = False + log_errors = log_response(log_errors, + index = i, + image = image_name, + file_path = url, + response_code = response.status_code) + update_log(log = log_errors, index = i, filepath = error_log_filepath) + + del response + + if downsample: + # Since we have image resize within a try and log failure, seems reasonable to not check for the image again. + downsample_dir_path = downsample_path + if subfolders: + downsample_dir_path = downsample_path + "/" + data[subfolders][i] + if os.path.exists(downsample_dir_path + "/" + image_name): + # Don't overwrite resized images either + continue + + downsample_and_save_image( + image_dir_path=image_dir_path, + image_name=image_name, + downsample_dir_path=downsample_dir_path, + downsample_size=downsample, + log_errors=log_errors, + image_index=i, + file_path=url, + error_log_filepath=error_log_filepath + ) + - del response return @@ -222,9 +227,7 @@ def main(): # Check for img_dir img_dir = args.output_dir if os.path.exists(img_dir): - overwrite = input(f"'{img_dir}' already exists (may impact downsizing too). Overwrite? [y/n]: ") - if overwrite.lower() != "y": - sys.exit("Exited without executing.") + sys.exit(f"'{img_dir}' already exists. Exited without executing.") # Set location for logs metadata_path = csv_path.split(".")[0] diff --git a/src/cautiousrobot/utils.py b/src/cautiousrobot/utils.py index 2515920..dfb2003 100644 --- a/src/cautiousrobot/utils.py +++ b/src/cautiousrobot/utils.py @@ -2,13 +2,15 @@ import json import pandas as pd +import os +from PIL import Image -def log_response(log_data, index, image, url, response_code): +def log_response(log_data, index, image, file_path, response_code): # log status log_entry = {} log_entry["Image"] = image - log_entry["file_url"] = url + log_entry["file_path"] = file_path log_entry["Response_status"] = str(response_code) #int64 has problems sometimes log_data[index] = log_entry @@ -44,3 +46,38 @@ def process_csv(csv_path, expected_cols): raise Exception(f"The CSV at {csv_path} is missing column(s): {missing_cols}, defined as {[expected_cols[col] for col in missing_cols]}.") return df + +def downsample_and_save_image(image_dir_path, image_name, downsample_dir_path, downsample_size, log_errors, image_index, file_path, error_log_filepath): + """ + Downsample an image and save it to the specified directory. + + Parameters: + - image_dir_path (str): The path to the directory containing the original image. + - image_name (str): The name of the image to be downsampled. + - downsample_dir_path (str): The path to the directory where the downsampled image will be saved. + - downsample_size (int): The new size (both width and height) for the downsampled image. + - log_errors (dict): A dictionary to store errors encountered during the downsampling process. + - image_index (int): The index of the current image being processed, used for logging. + - file_path (str): The file path or URL associated with the image, used for logging errors. + - error_log_filepath (str): The file path where error logs are stored. + + Returns: + None + """ + if not os.path.exists(downsample_dir_path): + os.makedirs(downsample_dir_path, exist_ok=False) + + try: + img = Image.open(f"{image_dir_path}/{image_name}") + img.resize((downsample_size, downsample_size)).save(f"{downsample_dir_path}/{image_name}") + except Exception as e: + print(e) + log_errors = log_response( + log_errors, + index=image_index, + image="downsized_" + image_name, + file_path=file_path, + response_code=str(e) + ) + update_log(log=log_errors, index=image_index, filepath=error_log_filepath) + \ No newline at end of file diff --git a/tests/test_download_images.py b/tests/test_download_images.py index fd6c9a0..fecb200 100644 --- a/tests/test_download_images.py +++ b/tests/test_download_images.py @@ -302,7 +302,8 @@ def test_main_directory_exists(self, mock_exists, mock_input, mock_process_csv, with self.assertRaises(SystemExit) as cm: main() - self.assertEqual(cm.exception.code, "Exited without executing.") + # self.assertEqual(cm.exception.code, "mock_args.output_dir Exited without executing.") + self.assertEqual(cm.exception.code, f"'{mock_args.output_dir}' already exists. Exited without executing.") if __name__ == '__main__': diff --git a/tests/test_downsample.py b/tests/test_downsample.py new file mode 100644 index 0000000..5496303 --- /dev/null +++ b/tests/test_downsample.py @@ -0,0 +1,134 @@ +import unittest +from unittest import mock +from unittest.mock import patch, MagicMock +import os +from PIL import Image +from cautiousrobot.utils import log_response, update_log, process_csv, downsample_and_save_image + +class TestDownsampleAndSaveImage(unittest.TestCase): + """Test the downsample_and_save_image function.""" + + def setUp(self): + self.image_dir_path = "test_images" + self.downsample_dir_path = "downsampled_images" + self.downsample_size = 100 + self.log_errors = {} # Dictionary to store error logs + self.error_log_filepath = "error_log.json" + self.file_path = "file://example.com/image.jpg" + + def tearDown(self): + if os.path.exists(self.image_dir_path): + os.rmdir(self.image_dir_path) + if os.path.exists(self.downsample_dir_path): + os.rmdir(self.downsample_dir_path) + if os.path.exists(self.error_log_filepath): + os.remove(self.error_log_filepath) + + def mock_log_response_side_effect(self, log_errors, index, image, file_path, response_code): + """Helper function to mimic the behavior of log_response.""" + log_errors[index] = {'image': image, 'file_path': file_path, 'response_code': response_code} + return log_errors + + @patch("PIL.Image.open") + def test_downsample_and_save_image_success(self, mock_open): + """ Test the successful downsampling and saving of an image. """ + + mock_image = MagicMock(spec=Image.Image) + mock_open.return_value = mock_image + + # The image is resized to a new image + mock_resized_image = MagicMock(spec=Image.Image) + mock_image.resize.return_value = mock_resized_image + + downsample_and_save_image( + self.image_dir_path, + "test_image.jpg", + self.downsample_dir_path, + self.downsample_size, + self.log_errors, + 0, # image_index + self.file_path, + self.error_log_filepath + ) + + mock_open.assert_called_once_with(f"{self.image_dir_path}/test_image.jpg") + mock_image.resize.assert_called_once_with((self.downsample_size, self.downsample_size)) + mock_resized_image.save.assert_called_once_with(f"{self.downsample_dir_path}/test_image.jpg") + + @patch("os.path.exists", return_value=True) + @patch("PIL.Image.open", side_effect=FileNotFoundError("File not found")) + @patch("cautiousrobot.utils.log_response") + @patch("cautiousrobot.utils.update_log") + def test_downsample_and_save_image_file_not_found(self, mock_update_log, mock_log_response, mock_open, mock_exists): + """ Test the behavior when the image file is not found. """ + + mock_log_response.side_effect = self.mock_log_response_side_effect + + downsample_and_save_image( + self.image_dir_path, + "missing_image.jpg", + self.downsample_dir_path, + self.downsample_size, + self.log_errors, + 0, # image_index + self.file_path, + self.error_log_filepath + ) + + mock_open.assert_called_once_with(f"{self.image_dir_path}/missing_image.jpg") + mock_log_response.assert_called_once_with( + self.log_errors, + index=0, + image="downsized_missing_image.jpg", + file_path=self.file_path, + response_code="File not found" + ) + mock_update_log.assert_called_once_with( + log=self.log_errors, + index=0, + filepath=self.error_log_filepath + ) + + # Check the log error dictionary + self.assertIn(0, self.log_errors) + self.assertEqual(self.log_errors[0]['response_code'], "File not found") + + @patch("os.path.exists", return_value=False) + @patch("PIL.Image.open", side_effect=Exception("Unexpected error")) + @patch("cautiousrobot.utils.log_response") + @patch("cautiousrobot.utils.update_log") + def test_downsample_and_save_image_unexpected_error(self, mock_update_log, mock_log_response, mock_open, mock_exists): + """ Test the behavior when an unexpected error occurs. """ + + mock_log_response.side_effect = self.mock_log_response_side_effect + + downsample_and_save_image( + self.image_dir_path, + "test_image.jpg", + self.downsample_dir_path, + self.downsample_size, + self.log_errors, + 1, + self.file_path, + self.error_log_filepath + ) + + mock_open.assert_called_once_with(f"{self.image_dir_path}/test_image.jpg") + mock_log_response.assert_called_once_with( + self.log_errors, + index=1, + image="downsized_test_image.jpg", + file_path=self.file_path, + response_code="Unexpected error" + ) + mock_update_log.assert_called_once_with( + log=self.log_errors, + index=1, + filepath=self.error_log_filepath + ) + + self.assertIn(1, self.log_errors) + self.assertEqual(self.log_errors[1]['response_code'], "Unexpected error") + +if __name__ == "__main__": + unittest.main()