-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
…json Feature/#70/data loader for json
- Loading branch information
Showing
12 changed files
with
248 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
from pystreamapi.loaders.__csv_loader import csv | ||
from pystreamapi.loaders.__csv.__csv_loader import csv | ||
from pystreamapi.loaders.__json.__json_loader import json | ||
|
||
__all__ = [ | ||
'csv' | ||
'csv', | ||
'json' | ||
] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import json as jsonlib | ||
from collections import namedtuple | ||
|
||
from pystreamapi.loaders.__lazy_file_iterable import LazyFileIterable | ||
from pystreamapi.loaders.__loader_utils import LoaderUtils | ||
|
||
|
||
def json(src: str, read_from_src=False) -> LazyFileIterable: | ||
""" | ||
Loads JSON data from either a path or a string and converts it into a list of namedtuples. | ||
Returns: | ||
list: A list of namedtuples, where each namedtuple represents an object in the JSON. | ||
:param src: Either the path to a JSON file or a JSON string. | ||
:param read_from_src: If True, src is treated as a JSON string. If False, src is treated as | ||
a path to a JSON file. | ||
""" | ||
if read_from_src: | ||
return LazyFileIterable(lambda: __load_json_string(src)) | ||
path = LoaderUtils.validate_path(src) | ||
return LazyFileIterable(lambda: __load_json_file(path)) | ||
|
||
|
||
def __load_json_file(file_path): | ||
"""Load a JSON file and convert it into a list of namedtuples""" | ||
# skipcq: PTC-W6004 | ||
with open(file_path, mode='r', encoding='utf-8') as jsonfile: | ||
src = jsonfile.read() | ||
if src == '': | ||
return [] | ||
data = jsonlib.loads(src, object_hook=__dict_to_namedtuple) | ||
return data | ||
|
||
|
||
def __load_json_string(json_string): | ||
"""Load JSON data from a string and convert it into a list of namedtuples""" | ||
return jsonlib.loads(json_string, object_hook=__dict_to_namedtuple) | ||
|
||
|
||
def __dict_to_namedtuple(d, name='Item'): | ||
"""Convert a dictionary to a namedtuple""" | ||
if isinstance(d, dict): | ||
fields = list(d.keys()) | ||
Item = namedtuple(name, fields) | ||
return Item(**{k: __dict_to_namedtuple(v, k) for k, v in d.items()}) | ||
return d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import contextlib | ||
import os | ||
|
||
|
||
class LoaderUtils: | ||
"""Utility class for loaders to validate paths and cast data""" | ||
|
||
@staticmethod | ||
def try_cast(value): | ||
"""Try to cast value to primary data types from python (int, float, bool)""" | ||
for cast in (int, float): | ||
with contextlib.suppress(ValueError): | ||
return cast(value) | ||
# Try to cast to bool | ||
return value.lower() == 'true' if value.lower() in ('true', 'false') else value | ||
|
||
@staticmethod | ||
def validate_path(file_path: str): | ||
"""Validate the path to the CSV file""" | ||
if not os.path.exists(file_path): | ||
raise FileNotFoundError("The specified file does not exist.") | ||
if not os.path.isfile(file_path): | ||
raise ValueError("The specified path is not a file.") | ||
return file_path |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# pylint: disable=not-context-manager | ||
from unittest import TestCase | ||
from unittest.mock import patch, mock_open | ||
|
||
from pystreamapi.loaders import csv | ||
|
||
file_content = """ | ||
attr1,attr2 | ||
1,2.0 | ||
a,b | ||
""" | ||
|
||
|
||
class TestCSVLoader(TestCase): | ||
|
||
def test_csv_loader(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content)), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = csv('path/to/data.csv') | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0].attr1, 1) | ||
self.assertIsInstance(data[0].attr1, int) | ||
self.assertEqual(data[0].attr2, 2.0) | ||
self.assertIsInstance(data[0].attr2, float) | ||
self.assertEqual(data[1].attr1, 'a') | ||
self.assertIsInstance(data[1].attr1, str) | ||
|
||
def test_csv_loader_with_casting_disabled(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content)), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = csv('path/to/data.csv', cast_types=False) | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0].attr1, '1') | ||
self.assertIsInstance(data[0].attr1, str) | ||
self.assertEqual(data[0].attr2, '2.0') | ||
self.assertIsInstance(data[0].attr2, str) | ||
self.assertEqual(data[1].attr1, 'a') | ||
self.assertIsInstance(data[1].attr1, str) | ||
|
||
def test_csv_loader_is_iterable(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content)), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = csv('path/to/data.csv') | ||
self.assertEqual(len(list(iter(data))), 2) | ||
|
||
def test_csv_loader_with_custom_delimiter(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content.replace(",", ";"))), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = csv('path/to/data.csv', delimiter=';') | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0].attr1, 1) | ||
self.assertIsInstance(data[0].attr1, int) | ||
|
||
def test_csv_loader_with_empty_file(self): | ||
with (patch('builtins.open', mock_open(read_data="")), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = csv('path/to/data.csv') | ||
self.assertEqual(len(data), 0) | ||
|
||
def test_csv_loader_with_invalid_path(self): | ||
with self.assertRaises(FileNotFoundError): | ||
csv('path/to/invalid.csv') | ||
|
||
def test_csv_loader_with_no_file(self): | ||
with self.assertRaises(ValueError): | ||
csv('./') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# pylint: disable=not-context-manager | ||
from json import JSONDecodeError | ||
from unittest import TestCase | ||
from unittest.mock import patch, mock_open | ||
|
||
from pystreamapi.loaders import json | ||
|
||
file_content = """ | ||
[ | ||
{ | ||
"attr1": 1, | ||
"attr2": 2.0 | ||
}, | ||
{ | ||
"attr1": "a", | ||
"attr2": "b" | ||
} | ||
] | ||
""" | ||
|
||
|
||
class TestJsonLoader(TestCase): | ||
|
||
def test_json_loader_from_file(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content)), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = json('path/to/data.json') | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0].attr1, 1) | ||
self.assertIsInstance(data[0].attr1, int) | ||
self.assertEqual(data[0].attr2, 2.0) | ||
self.assertIsInstance(data[0].attr2, float) | ||
self.assertEqual(data[1].attr1, 'a') | ||
self.assertIsInstance(data[1].attr1, str) | ||
|
||
def test_json_loader_is_iterable(self): | ||
with (patch('builtins.open', mock_open(read_data=file_content)), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = json('path/to/data.json') | ||
self.assertEqual(len(list(iter(data))), 2) | ||
|
||
def test_json_loader_with_empty_file(self): | ||
with (patch('builtins.open', mock_open(read_data="")), | ||
patch('os.path.exists', return_value=True), | ||
patch('os.path.isfile', return_value=True)): | ||
data = json('path/to/data.json') | ||
self.assertEqual(len(data), 0) | ||
|
||
def test_json_loader_with_invalid_path(self): | ||
with self.assertRaises(FileNotFoundError): | ||
json('path/to/invalid.json') | ||
|
||
def test_json_loader_with_no_file(self): | ||
with self.assertRaises(ValueError): | ||
json('./') | ||
|
||
def test_json_loader_from_string(self): | ||
data = json(file_content, read_from_src=True) | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0].attr1, 1) | ||
self.assertIsInstance(data[0].attr1, int) | ||
self.assertEqual(data[0].attr2, 2.0) | ||
self.assertIsInstance(data[0].attr2, float) | ||
self.assertEqual(data[1].attr1, 'a') | ||
self.assertIsInstance(data[1].attr1, str) | ||
|
||
def test_json_loader_from_empty_string(self): | ||
with self.assertRaises(JSONDecodeError): | ||
self.assertEqual(len(json('', read_from_src=True)), 0) |
This file was deleted.
Oops, something went wrong.