From 657db28cc69983b3a57637c8b985c497bce96012 Mon Sep 17 00:00:00 2001 From: Stefan Garlonta Date: Thu, 28 Sep 2023 15:55:35 +0200 Subject: [PATCH 1/6] :bug: Fix CSV reader error when header not first line --- .../loaders/{ => __csv}/__csv_loader.py | 36 ++++------ .../loaders/__csv/__init__.py | 0 pystreamapi/loaders/__init__.py | 6 +- pystreamapi/loaders/__loader_utils.py | 23 ++++++ tests/assets/data.csv | 3 - tests/assets/data2.csv | 2 - tests/test_csv_loader.py | 70 +++++++++++++++++++ tests/test_loaders.py | 53 -------------- 8 files changed, 112 insertions(+), 81 deletions(-) rename pystreamapi/loaders/{ => __csv}/__csv_loader.py (61%) rename tests/assets/empty.csv => pystreamapi/loaders/__csv/__init__.py (100%) create mode 100644 pystreamapi/loaders/__loader_utils.py delete mode 100644 tests/assets/data.csv delete mode 100644 tests/assets/data2.csv create mode 100644 tests/test_csv_loader.py delete mode 100644 tests/test_loaders.py diff --git a/pystreamapi/loaders/__csv_loader.py b/pystreamapi/loaders/__csv/__csv_loader.py similarity index 61% rename from pystreamapi/loaders/__csv_loader.py rename to pystreamapi/loaders/__csv/__csv_loader.py index b585833..e5d6a30 100644 --- a/pystreamapi/loaders/__csv_loader.py +++ b/pystreamapi/loaders/__csv/__csv_loader.py @@ -1,8 +1,7 @@ -import contextlib -import os from collections import namedtuple from csv import reader +from pystreamapi.loaders.__loader_utils import LoaderUtils from pystreamapi.loaders.__lazy_file_iterable import LazyFileIterable @@ -17,7 +16,7 @@ def csv(file_path: str, cast_types=True, delimiter=',', encoding="utf-8") -> Laz :param file_path: The path to the CSV file. :param delimiter: The delimiter used in the CSV file. """ - file_path = __validate_path(file_path) + file_path = LoaderUtils.validate_path(file_path) return LazyFileIterable(lambda: __load_csv(file_path, cast_types, delimiter, encoding)) @@ -28,28 +27,23 @@ def __load_csv(file_path, cast, delimiter, encoding): csvreader = reader(csvfile, delimiter=delimiter) # Create a namedtuple type, casting the header values to int or float if possible - Row = namedtuple('Row', list(next(csvreader, []))) + header = __get_csv_header(csvreader) - mapper = __try_cast if cast else lambda x: x + Row = namedtuple('Row', list(header)) + + mapper = LoaderUtils.try_cast if cast else lambda x: x # Process the data, casting values to int or float if possible data = [Row(*[mapper(value) for value in row]) for row in csvreader] return data -def __validate_path(file_path: str): - """Validate the path to the CSV file""" - if not os.path.exists(file_path): - raise FileNotFoundError("The specified file does not exist.") - if not os.path.isfile(file_path): - raise ValueError("The specified path is not a file.") - return file_path - - -def __try_cast(value): - """Try to cast value to primary data types from python (int, float, bool)""" - for cast in (int, float): - with contextlib.suppress(ValueError): - return cast(value) - # Try to cast to bool - return value.lower() == 'true' if value.lower() in ('true', 'false') else value +def __get_csv_header(csvreader): + while True: + try: + header = next(csvreader) + if header: + break + except StopIteration: + return [] + return header diff --git a/tests/assets/empty.csv b/pystreamapi/loaders/__csv/__init__.py similarity index 100% rename from tests/assets/empty.csv rename to pystreamapi/loaders/__csv/__init__.py diff --git a/pystreamapi/loaders/__init__.py b/pystreamapi/loaders/__init__.py index 51f7ca8..5d25db5 100644 --- a/pystreamapi/loaders/__init__.py +++ b/pystreamapi/loaders/__init__.py @@ -1,5 +1,7 @@ -from pystreamapi.loaders.__csv_loader import csv +from pystreamapi.loaders.__csv.__csv_loader import csv +from pystreamapi.loaders.__json.__json_loader import json __all__ = [ - 'csv' + 'csv', + 'json' ] diff --git a/pystreamapi/loaders/__loader_utils.py b/pystreamapi/loaders/__loader_utils.py new file mode 100644 index 0000000..247e390 --- /dev/null +++ b/pystreamapi/loaders/__loader_utils.py @@ -0,0 +1,23 @@ +import contextlib +import os + + +class LoaderUtils: + + @staticmethod + def try_cast(value): + """Try to cast value to primary data types from python (int, float, bool)""" + for cast in (int, float): + with contextlib.suppress(ValueError): + return cast(value) + # Try to cast to bool + return value.lower() == 'true' if value.lower() in ('true', 'false') else value + + @staticmethod + def validate_path(file_path: str): + """Validate the path to the CSV file""" + if not os.path.exists(file_path): + raise FileNotFoundError("The specified file does not exist.") + if not os.path.isfile(file_path): + raise ValueError("The specified path is not a file.") + return file_path diff --git a/tests/assets/data.csv b/tests/assets/data.csv deleted file mode 100644 index 5fd27af..0000000 --- a/tests/assets/data.csv +++ /dev/null @@ -1,3 +0,0 @@ -attr1,attr2 -1,2.0 -a,b \ No newline at end of file diff --git a/tests/assets/data2.csv b/tests/assets/data2.csv deleted file mode 100644 index 6956ce9..0000000 --- a/tests/assets/data2.csv +++ /dev/null @@ -1,2 +0,0 @@ -attr1;attr2 -1;2 \ No newline at end of file diff --git a/tests/test_csv_loader.py b/tests/test_csv_loader.py new file mode 100644 index 0000000..8872bc1 --- /dev/null +++ b/tests/test_csv_loader.py @@ -0,0 +1,70 @@ +from unittest import TestCase +from unittest.mock import patch, mock_open + +from pystreamapi.loaders import csv + +file_content = """ +attr1,attr2 +1,2.0 +a,b +""" + + +class TestCSVLoader(TestCase): + + def test_csv_loader(self): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) + + def test_csv_loader_with_casting_disabled(self): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = csv('path/to/data.csv', cast_types=False) + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, '1') + self.assertIsInstance(data[0].attr1, str) + self.assertEqual(data[0].attr2, '2.0') + self.assertIsInstance(data[0].attr2, str) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) + + def test_csv_loader_is_iterable(self): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = csv('path/to/data.csv') + self.assertEqual(len(list(iter(data))), 2) + + def test_csv_loader_with_custom_delimiter(self): + with (patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = csv('path/to/data.csv', delimiter=';') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + + def test_csv_loader_with_empty_file(self): + with (patch('builtins.open', mock_open(read_data="")), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 0) + + def test_csv_loader_with_invalid_path(self): + with self.assertRaises(FileNotFoundError): + csv('path/to/invalid.csv') + + def test_csv_loader_with_no_file(self): + with self.assertRaises(ValueError): + csv('./') diff --git a/tests/test_loaders.py b/tests/test_loaders.py deleted file mode 100644 index 32730b1..0000000 --- a/tests/test_loaders.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from unittest import TestCase - -from pystreamapi.loaders import csv - - -class TestLoaders(TestCase): - - def setUp(self) -> None: - cwd = os.path.dirname(os.path.realpath(__file__)) - self.path = os.path.join(cwd, 'assets') - - def test_csv_loader(self): - data = csv(f'{self.path}/data.csv') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - self.assertEqual(data[0].attr2, 2.0) - self.assertIsInstance(data[0].attr2, float) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) - - def test_csv_loader_with_casting_disabled(self): - data = csv(f'{self.path}/data.csv', cast_types=False) - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, '1') - self.assertIsInstance(data[0].attr1, str) - self.assertEqual(data[0].attr2, '2.0') - self.assertIsInstance(data[0].attr2, str) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) - - def test_csv_loader_is_iterable(self): - data = csv(f'{self.path}/data.csv') - self.assertEqual(len(list(iter(data))), 2) - - def test_csv_loader_with_custom_delimiter(self): - data = csv(f'{self.path}/data2.csv', delimiter=';') - self.assertEqual(len(data), 1) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - - def test_csv_loader_with_empty_file(self): - data = csv(f'{self.path}/empty.csv') - self.assertEqual(len(data), 0) - - def test_csv_loader_with_invalid_path(self): - with self.assertRaises(FileNotFoundError): - csv(f'{self.path}/invalid.csv') - - def test_csv_loader_with_no_file(self): - with self.assertRaises(ValueError): - csv(f'{self.path}/') From 7eb2639a5bd9ea63e9c4da87f8d1c93831723109 Mon Sep 17 00:00:00 2001 From: Stefan Garlonta Date: Thu, 28 Sep 2023 15:56:04 +0200 Subject: [PATCH 2/6] :sparkles: Implement JSON loader --- pystreamapi/loaders/__json/__init__.py | 0 pystreamapi/loaders/__json/__json_loader.py | 46 ++++++++++++++ tests/test_json_loader.py | 68 +++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 pystreamapi/loaders/__json/__init__.py create mode 100644 pystreamapi/loaders/__json/__json_loader.py create mode 100644 tests/test_json_loader.py diff --git a/pystreamapi/loaders/__json/__init__.py b/pystreamapi/loaders/__json/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pystreamapi/loaders/__json/__json_loader.py b/pystreamapi/loaders/__json/__json_loader.py new file mode 100644 index 0000000..cf416ef --- /dev/null +++ b/pystreamapi/loaders/__json/__json_loader.py @@ -0,0 +1,46 @@ +import json as jsonlib +from collections import namedtuple + +from pystreamapi.loaders.__lazy_file_iterable import LazyFileIterable +from pystreamapi.loaders.__loader_utils import LoaderUtils + + +def json(src: str, read_from_src=False) -> LazyFileIterable: + """ + Loads JSON data from either a path or a string and converts it into a list of namedtuples. + + Returns: + list: A list of namedtuples, where each namedtuple represents an object in the JSON. + :param src: Either the path to a JSON file or a JSON string. + :param read_from_src: If True, src is treated as a JSON string. If False, src is treated as + a path to a JSON file. + """ + if read_from_src: + return LazyFileIterable(lambda: __load_json_string(src)) + path = LoaderUtils.validate_path(src) + return LazyFileIterable(lambda: __load_json_file(path)) + + +def __load_json_file(file_path): + """Load a JSON file and convert it into a list of namedtuples""" + # skipcq: PTC-W6004 + with open(file_path, mode='r', encoding='utf-8') as jsonfile: + src = jsonfile.read() + if src == '': + return [] + data = jsonlib.loads(src, object_hook=__dict_to_namedtuple) + return data + + +def __load_json_string(json_string): + """Load JSON data from a string and convert it into a list of namedtuples""" + return jsonlib.loads(json_string, object_hook=__dict_to_namedtuple) + + +def __dict_to_namedtuple(d, name='Item'): + """Convert a dictionary to a namedtuple""" + if isinstance(d, dict): + fields = list(d.keys()) + Item = namedtuple(name, fields) + return Item(**{k: __dict_to_namedtuple(v, k) for k, v in d.items()}) + return d diff --git a/tests/test_json_loader.py b/tests/test_json_loader.py new file mode 100644 index 0000000..ba7ed3d --- /dev/null +++ b/tests/test_json_loader.py @@ -0,0 +1,68 @@ +from unittest import TestCase +from unittest.mock import patch, mock_open + +from pystreamapi.loaders import json + +file_content = """ +[ + { + "attr1": 1, + "attr2": 2.0 + }, + { + "attr1": "a", + "attr2": "b" + } +] +""" + + +class TestJsonLoader(TestCase): + + def test_json_loader_from_file(self): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = json('path/to/data.json') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) + + def test_json_loader_is_iterable(self): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = json('path/to/data.json') + self.assertEqual(len(list(iter(data))), 2) + + def test_json_loader_with_empty_file(self): + with (patch('builtins.open', mock_open(read_data="")), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): + data = json('path/to/data.json') + self.assertEqual(len(data), 0) + + def test_json_loader_with_invalid_path(self): + with self.assertRaises(FileNotFoundError): + json('path/to/invalid.json') + + def test_json_loader_with_no_file(self): + with self.assertRaises(ValueError): + json('./') + + def test_json_loader_from_string(self): + data = json(file_content, read_from_src=True) + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) + + def test_json_loader_from_empty_string(self): + json('', read_from_src=True) From 5af2ac6c5dadceb8a901d6b451530f017349d663 Mon Sep 17 00:00:00 2001 From: Stefan Garlonta Date: Thu, 28 Sep 2023 16:00:13 +0200 Subject: [PATCH 3/6] :memo: Add JSON loader --- README.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b1b9f5f..0e0ee7a 100644 --- a/README.md +++ b/README.md @@ -213,23 +213,33 @@ Stream.concat(Stream.of([1, 2]), Stream.of([3, 4])) Creates a new Stream from multiple Streams. Order doesn't change. -## Use loaders: Load data from CSV files in just one line +## Use loaders: Load data from CSV and JSON files in just one line -PyStreamAPI offers a convenient way to load data from CSV files. Like that you can start processing your CSV right away without having to worry about reading and parsing the file. +PyStreamAPI offers a convenient way to load data from CSV and JSON files. Like that you can start processing your files right away without having to worry about reading and parsing the files. -You can import the loader with: +You can import the loaders with: ```python -from pystreamapi.loaders import csv +from pystreamapi.loaders import csv, json ``` -Now you can use the loader directly when creating your Stream: +Now you can use the loaders directly when creating your Stream: + +For CSV: ```python Stream.of(csv("data.csv", delimiter=";")) \ .map(lambda x: x.attr1) \ .for_each(print) ``` -You can access the attributes of the CSV rows directly like you would with a normal object. + +For JSON: +```python +Stream.of(json("data.json")) \ + .map(lambda x: x.attr1) \ + .for_each(print) +``` + +You can access the attributes of the data structures directly like you would do with a normal object. ## API Reference For a more detailed documentation view the docs on GitBook: [PyStreamAPI Docs](https://pystreamapi.pickwicksoft.org/) From 2b4c3c8feaff0f241d05f937528c46ba66a15192 Mon Sep 17 00:00:00 2001 From: Stefan Garlonta Date: Thu, 28 Sep 2023 16:12:01 +0200 Subject: [PATCH 4/6] :rotating_light: Fix pylint and Deepsource warnings --- pystreamapi/loaders/__csv/__csv_loader.py | 1 + pystreamapi/loaders/__loader_utils.py | 1 + tests/test_csv_loader.py | 78 +++++++++++------------ tests/test_json_loader.py | 46 ++++++------- 4 files changed, 65 insertions(+), 61 deletions(-) diff --git a/pystreamapi/loaders/__csv/__csv_loader.py b/pystreamapi/loaders/__csv/__csv_loader.py index e5d6a30..f009ad4 100644 --- a/pystreamapi/loaders/__csv/__csv_loader.py +++ b/pystreamapi/loaders/__csv/__csv_loader.py @@ -39,6 +39,7 @@ def __load_csv(file_path, cast, delimiter, encoding): def __get_csv_header(csvreader): + """Get the header of a CSV file. If the header is empty, return an empty list""" while True: try: header = next(csvreader) diff --git a/pystreamapi/loaders/__loader_utils.py b/pystreamapi/loaders/__loader_utils.py index 247e390..5f9e784 100644 --- a/pystreamapi/loaders/__loader_utils.py +++ b/pystreamapi/loaders/__loader_utils.py @@ -3,6 +3,7 @@ class LoaderUtils: + """Utility class for loaders to validate paths and cast data""" @staticmethod def try_cast(value): diff --git a/tests/test_csv_loader.py b/tests/test_csv_loader.py index 8872bc1..6463748 100644 --- a/tests/test_csv_loader.py +++ b/tests/test_csv_loader.py @@ -13,53 +13,53 @@ class TestCSVLoader(TestCase): def test_csv_loader(self): - with (patch('builtins.open', mock_open(read_data=file_content)), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = csv('path/to/data.csv') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - self.assertEqual(data[0].attr2, 2.0) - self.assertIsInstance(data[0].attr2, float) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_csv_loader_with_casting_disabled(self): - with (patch('builtins.open', mock_open(read_data=file_content)), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = csv('path/to/data.csv', cast_types=False) - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, '1') - self.assertIsInstance(data[0].attr1, str) - self.assertEqual(data[0].attr2, '2.0') - self.assertIsInstance(data[0].attr2, str) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv', cast_types=False) + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, '1') + self.assertIsInstance(data[0].attr1, str) + self.assertEqual(data[0].attr2, '2.0') + self.assertIsInstance(data[0].attr2, str) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_csv_loader_is_iterable(self): - with (patch('builtins.open', mock_open(read_data=file_content)), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = csv('path/to/data.csv') - self.assertEqual(len(list(iter(data))), 2) + with patch('builtins.open', mock_open(read_data=file_content)): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(list(iter(data))), 2) def test_csv_loader_with_custom_delimiter(self): - with (patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = csv('path/to/data.csv', delimiter=';') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) + with patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv', delimiter=';') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) def test_csv_loader_with_empty_file(self): - with (patch('builtins.open', mock_open(read_data="")), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = csv('path/to/data.csv') - self.assertEqual(len(data), 0) + with patch('builtins.open', mock_open(read_data="")): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 0) def test_csv_loader_with_invalid_path(self): with self.assertRaises(FileNotFoundError): diff --git a/tests/test_json_loader.py b/tests/test_json_loader.py index ba7ed3d..8af0fbf 100644 --- a/tests/test_json_loader.py +++ b/tests/test_json_loader.py @@ -1,3 +1,4 @@ +from json import JSONDecodeError from unittest import TestCase from unittest.mock import patch, mock_open @@ -20,31 +21,31 @@ class TestJsonLoader(TestCase): def test_json_loader_from_file(self): - with (patch('builtins.open', mock_open(read_data=file_content)), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = json('path/to/data.json') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - self.assertEqual(data[0].attr2, 2.0) - self.assertIsInstance(data[0].attr2, float) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_json_loader_is_iterable(self): - with (patch('builtins.open', mock_open(read_data=file_content)), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = json('path/to/data.json') - self.assertEqual(len(list(iter(data))), 2) + with patch('builtins.open', mock_open(read_data=file_content)): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(list(iter(data))), 2) def test_json_loader_with_empty_file(self): - with (patch('builtins.open', mock_open(read_data="")), - patch('os.path.exists', return_value=True), - patch('os.path.isfile', return_value=True)): - data = json('path/to/data.json') - self.assertEqual(len(data), 0) + with patch('builtins.open', mock_open(read_data="")): + with patch('os.path.exists', return_value=True): + with patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(data), 0) def test_json_loader_with_invalid_path(self): with self.assertRaises(FileNotFoundError): @@ -65,4 +66,5 @@ def test_json_loader_from_string(self): self.assertIsInstance(data[1].attr1, str) def test_json_loader_from_empty_string(self): - json('', read_from_src=True) + with self.assertRaises(JSONDecodeError): + self.assertEqual(len(json('', read_from_src=True)), 0) From 828e9e09f1a59632bd660d9ebdf8c7b601f6d9f0 Mon Sep 17 00:00:00 2001 From: "deepsource-autofix[bot]" <62050782+deepsource-autofix[bot]@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:16:05 +0000 Subject: [PATCH 5/6] refactor: autofix issues in 2 files Resolved issues in the following files with DeepSource Autofix: 1. tests/test_csv_loader.py 2. tests/test_json_loader.py --- tests/test_csv_loader.py | 68 +++++++++++++++++---------------------- tests/test_json_loader.py | 36 +++++++++------------ 2 files changed, 44 insertions(+), 60 deletions(-) diff --git a/tests/test_csv_loader.py b/tests/test_csv_loader.py index 6463748..a2ecbb6 100644 --- a/tests/test_csv_loader.py +++ b/tests/test_csv_loader.py @@ -13,53 +13,43 @@ class TestCSVLoader(TestCase): def test_csv_loader(self): - with patch('builtins.open', mock_open(read_data=file_content)): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = csv('path/to/data.csv') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - self.assertEqual(data[0].attr2, 2.0) - self.assertIsInstance(data[0].attr2, float) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_csv_loader_with_casting_disabled(self): - with patch('builtins.open', mock_open(read_data=file_content)): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = csv('path/to/data.csv', cast_types=False) - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, '1') - self.assertIsInstance(data[0].attr1, str) - self.assertEqual(data[0].attr2, '2.0') - self.assertIsInstance(data[0].attr2, str) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv', cast_types=False) + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, '1') + self.assertIsInstance(data[0].attr1, str) + self.assertEqual(data[0].attr2, '2.0') + self.assertIsInstance(data[0].attr2, str) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_csv_loader_is_iterable(self): - with patch('builtins.open', mock_open(read_data=file_content)): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = csv('path/to/data.csv') - self.assertEqual(len(list(iter(data))), 2) + with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(list(iter(data))), 2) def test_csv_loader_with_custom_delimiter(self): - with patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = csv('path/to/data.csv', delimiter=';') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) + with patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv', delimiter=';') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) def test_csv_loader_with_empty_file(self): - with patch('builtins.open', mock_open(read_data="")): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = csv('path/to/data.csv') - self.assertEqual(len(data), 0) + with patch('builtins.open', mock_open(read_data="")), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = csv('path/to/data.csv') + self.assertEqual(len(data), 0) def test_csv_loader_with_invalid_path(self): with self.assertRaises(FileNotFoundError): diff --git a/tests/test_json_loader.py b/tests/test_json_loader.py index 8af0fbf..6f96355 100644 --- a/tests/test_json_loader.py +++ b/tests/test_json_loader.py @@ -21,31 +21,25 @@ class TestJsonLoader(TestCase): def test_json_loader_from_file(self): - with patch('builtins.open', mock_open(read_data=file_content)): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = json('path/to/data.json') - self.assertEqual(len(data), 2) - self.assertEqual(data[0].attr1, 1) - self.assertIsInstance(data[0].attr1, int) - self.assertEqual(data[0].attr2, 2.0) - self.assertIsInstance(data[0].attr2, float) - self.assertEqual(data[1].attr1, 'a') - self.assertIsInstance(data[1].attr1, str) + with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(data), 2) + self.assertEqual(data[0].attr1, 1) + self.assertIsInstance(data[0].attr1, int) + self.assertEqual(data[0].attr2, 2.0) + self.assertIsInstance(data[0].attr2, float) + self.assertEqual(data[1].attr1, 'a') + self.assertIsInstance(data[1].attr1, str) def test_json_loader_is_iterable(self): - with patch('builtins.open', mock_open(read_data=file_content)): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = json('path/to/data.json') - self.assertEqual(len(list(iter(data))), 2) + with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(list(iter(data))), 2) def test_json_loader_with_empty_file(self): - with patch('builtins.open', mock_open(read_data="")): - with patch('os.path.exists', return_value=True): - with patch('os.path.isfile', return_value=True): - data = json('path/to/data.json') - self.assertEqual(len(data), 0) + with patch('builtins.open', mock_open(read_data="")), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + data = json('path/to/data.json') + self.assertEqual(len(data), 0) def test_json_loader_with_invalid_path(self): with self.assertRaises(FileNotFoundError): From 561ac178bb4c2c1e6952d45231850ecac99860fa Mon Sep 17 00:00:00 2001 From: Stefan Garlonta Date: Thu, 28 Sep 2023 16:19:25 +0200 Subject: [PATCH 6/6] :rotating_light: Fix linter warnings and disable pylint rule --- tests/test_csv_loader.py | 21 ++++++++++++++++----- tests/test_json_loader.py | 13 ++++++++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/test_csv_loader.py b/tests/test_csv_loader.py index a2ecbb6..41d983c 100644 --- a/tests/test_csv_loader.py +++ b/tests/test_csv_loader.py @@ -1,3 +1,4 @@ +# pylint: disable=not-context-manager from unittest import TestCase from unittest.mock import patch, mock_open @@ -13,7 +14,9 @@ class TestCSVLoader(TestCase): def test_csv_loader(self): - with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = csv('path/to/data.csv') self.assertEqual(len(data), 2) self.assertEqual(data[0].attr1, 1) @@ -24,7 +27,9 @@ def test_csv_loader(self): self.assertIsInstance(data[1].attr1, str) def test_csv_loader_with_casting_disabled(self): - with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = csv('path/to/data.csv', cast_types=False) self.assertEqual(len(data), 2) self.assertEqual(data[0].attr1, '1') @@ -35,19 +40,25 @@ def test_csv_loader_with_casting_disabled(self): self.assertIsInstance(data[1].attr1, str) def test_csv_loader_is_iterable(self): - with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = csv('path/to/data.csv') self.assertEqual(len(list(iter(data))), 2) def test_csv_loader_with_custom_delimiter(self): - with patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = csv('path/to/data.csv', delimiter=';') self.assertEqual(len(data), 2) self.assertEqual(data[0].attr1, 1) self.assertIsInstance(data[0].attr1, int) def test_csv_loader_with_empty_file(self): - with patch('builtins.open', mock_open(read_data="")), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data="")), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = csv('path/to/data.csv') self.assertEqual(len(data), 0) diff --git a/tests/test_json_loader.py b/tests/test_json_loader.py index 6f96355..e4d61de 100644 --- a/tests/test_json_loader.py +++ b/tests/test_json_loader.py @@ -1,3 +1,4 @@ +# pylint: disable=not-context-manager from json import JSONDecodeError from unittest import TestCase from unittest.mock import patch, mock_open @@ -21,7 +22,9 @@ class TestJsonLoader(TestCase): def test_json_loader_from_file(self): - with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = json('path/to/data.json') self.assertEqual(len(data), 2) self.assertEqual(data[0].attr1, 1) @@ -32,12 +35,16 @@ def test_json_loader_from_file(self): self.assertIsInstance(data[1].attr1, str) def test_json_loader_is_iterable(self): - with patch('builtins.open', mock_open(read_data=file_content)), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data=file_content)), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = json('path/to/data.json') self.assertEqual(len(list(iter(data))), 2) def test_json_loader_with_empty_file(self): - with patch('builtins.open', mock_open(read_data="")), patch('os.path.exists', return_value=True), patch('os.path.isfile', return_value=True): + with (patch('builtins.open', mock_open(read_data="")), + patch('os.path.exists', return_value=True), + patch('os.path.isfile', return_value=True)): data = json('path/to/data.json') self.assertEqual(len(data), 0)