Skip to content

Commit

Permalink
More tests for python and csv plugins, bz2 handling for csv writer
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Aug 23, 2024
1 parent bc21219 commit c4b6ed1
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 5 deletions.
1 change: 0 additions & 1 deletion countess/core/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from .config import read_config


def process_ini(config_filename) -> None:
graph = read_config(config_filename)
graph.run()
Expand Down
5 changes: 5 additions & 0 deletions countess/plugins/csv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import bz2
import csv
import gzip
import logging
Expand Down Expand Up @@ -140,6 +141,8 @@ def prepare(self, sources: list[str], row_limit: Optional[int] = None):
filename = str(self.filename)
if filename.endswith(".gz"):
self.filehandle = gzip.open(filename, "wb")
elif filename.endswith(".bz2"):
self.filehandle = bz2.open(filename, "wb")
else:
self.filehandle = open(filename, "wb")
else:
Expand Down Expand Up @@ -182,3 +185,5 @@ def process(self, data: pd.DataFrame, source: str):
def finalize(self):
if isinstance(self.filehandle, BytesIO):
yield self.filehandle.getvalue().decode("utf-8")
else:
self.filehandle.close()
Binary file added tests/input1.csv.bz2
Binary file not shown.
Binary file added tests/input1.csv.gz
Binary file not shown.
98 changes: 97 additions & 1 deletion tests/plugins/test_csv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from countess.plugins.csv import LoadCsvPlugin
import bz2
import gzip

import pandas as pd

from countess.plugins.csv import LoadCsvPlugin, SaveCsvPlugin

def test_load_csv():
plugin = LoadCsvPlugin()
Expand All @@ -9,6 +13,37 @@ def test_load_csv():
assert len(output_df) == 4


def test_load_csv_index():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.csv")
plugin.set_parameter("columns.0.name", "whatever")
plugin.set_parameter("columns.0.type", "string")
plugin.set_parameter("columns.0.index", True)
plugin.set_parameter("columns.1.name", "stuff")
plugin.set_parameter("columns.1.type", "integer")
plugin.set_parameter("columns.1.index", False)
output_df = next(plugin.load_file(0))
assert output_df.index.name == "whatever"
assert list(output_df.columns) == ["stuff"]
assert len(output_df) == 4


def test_load_csv_gz():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.csv.gz")
output_df = next(plugin.load_file(0))
assert list(output_df.columns) == ["thing", "count"]
assert len(output_df) == 4


def test_load_csv_bz2():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.csv.bz2")
output_df = next(plugin.load_file(0))
assert list(output_df.columns) == ["thing", "count"]
assert len(output_df) == 4


def test_load_tsv():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.tsv")
Expand Down Expand Up @@ -59,3 +94,64 @@ def test_filename_column():
output_df = next(plugin.load_file(0))
assert "filename" in output_df.columns
assert output_df["filename"].iloc[1] == "input1"


df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns=['a','b','c'])

def test_save_csv():

plugin = SaveCsvPlugin()
plugin.set_parameter("header", True)
plugin.set_parameter("filename", "tests/output1.csv")
plugin.prepare(["test"], None)
plugin.process(df, 'test')
plugin.finalize()

with open("tests/output1.csv", "r", encoding="utf-8") as fh:
text = fh.read()
assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"


def test_save_csv_gz():

plugin = SaveCsvPlugin()
plugin.set_parameter("header", True)
plugin.set_parameter("filename", "tests/output1.csv.gz")
plugin.prepare(["test"], None)
plugin.process(df, 'test')
list(plugin.finalize())

with gzip.open("tests/output1.csv.gz", "rt") as fh:
text = fh.read()
assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"


def test_save_csv_bz2():

plugin = SaveCsvPlugin()
plugin.set_parameter("header", True)
plugin.set_parameter("filename", "tests/output1.csv.bz2")
plugin.prepare(["test"], None)
plugin.process(df, 'test')
list(plugin.finalize())

with bz2.open("tests/output1.csv.bz2", "rt") as fh:
text = fh.read()
assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"


df2 = pd.DataFrame([[10,11,12]], columns=['a','b','d'])


def test_save_csv_multi():
plugin = SaveCsvPlugin()
plugin.set_parameter("header", True)
plugin.set_parameter("filename", "tests/output2.csv")
plugin.prepare(["test"], None)
plugin.process(df, 'test')
plugin.process(df2, 'test2')
plugin.finalize()

with open("tests/output2.csv", "r", encoding="utf-8") as fh:
text = fh.read()
assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,,12\n"
32 changes: 32 additions & 0 deletions tests/plugins/test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,35 @@ def test_python_dropna():

assert any(np.isnan(dfo["d"]))
assert not any(np.isnan(dfo["b"]))


def test_python_filter():
plugin = PythonPlugin()
plugin.set_parameter(
"code",
"""
__filter = d < 10 and a % 2
""",
)

plugin.prepare(["test"], None)
dfo = plugin.process_dataframe(dfi)

assert "__filter" not in dfo.columns
assert len(dfo) == 2


def test_python_exception(caplog):
plugin = PythonPlugin()
plugin.set_parameter(
"code",
"""
e = 1/0
""",
)

plugin.prepare(["test"], None)
dfo = plugin.process_dataframe(dfi)
assert len(dfo) == 5

assert "Exception" in caplog.text
14 changes: 11 additions & 3 deletions tests/test_cmd.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import csv
from unittest.mock import patch

import pytest

from countess.core.cmd import run as cmd_run
import countess.core.cmd
from countess.core.cmd import run, main

expected_output = """"thing","foo","bar","baz","qux","number","zz"
"bar",10,2,1,4,232,0.08620689655172414
Expand All @@ -13,8 +15,14 @@

@pytest.mark.slow
def test_command_invocation():
cmd_run(["countess_cmd", "tests/simple.ini"])
run(["countess_cmd", "tests/simple.ini"])

with open("tests/output.csv", "r") as fh:
with open("tests/output.csv", "r", encoding="utf-8") as fh:
output = fh.read()
assert output == expected_output


def test_main():
with patch.object(countess.core.cmd, 'run') as p:
main()
p.assert_called_once()

0 comments on commit c4b6ed1

Please sign in to comment.