From c4b6ed19c80e079b54da9a4ef322b86b9188b1cf Mon Sep 17 00:00:00 2001
From: Nick Moore <nick@zoic.org>
Date: Fri, 23 Aug 2024 18:40:00 +1000
Subject: [PATCH] More tests for python and csv plugins, bz2 handling for csv
 writer

---
 countess/core/cmd.py         |   1 -
 countess/plugins/csv.py      |   5 ++
 tests/input1.csv.bz2         | Bin 0 -> 82 bytes
 tests/input1.csv.gz          | Bin 0 -> 73 bytes
 tests/plugins/test_csv.py    |  98 ++++++++++++++++++++++++++++++++++-
 tests/plugins/test_python.py |  32 ++++++++++++
 tests/test_cmd.py            |  14 +++--
 7 files changed, 145 insertions(+), 5 deletions(-)
 create mode 100644 tests/input1.csv.bz2
 create mode 100644 tests/input1.csv.gz

diff --git a/countess/core/cmd.py b/countess/core/cmd.py
index e934a20..c49242c 100644
--- a/countess/core/cmd.py
+++ b/countess/core/cmd.py
@@ -5,7 +5,6 @@
 
 from .config import read_config
 
-
 def process_ini(config_filename) -> None:
     graph = read_config(config_filename)
     graph.run()
diff --git a/countess/plugins/csv.py b/countess/plugins/csv.py
index 89bec0b..41261b7 100644
--- a/countess/plugins/csv.py
+++ b/countess/plugins/csv.py
@@ -1,3 +1,4 @@
+import bz2
 import csv
 import gzip
 import logging
@@ -140,6 +141,8 @@ def prepare(self, sources: list[str], row_limit: Optional[int] = None):
             filename = str(self.filename)
             if filename.endswith(".gz"):
                 self.filehandle = gzip.open(filename, "wb")
+            elif filename.endswith(".bz2"):
+                self.filehandle = bz2.open(filename, "wb")
             else:
                 self.filehandle = open(filename, "wb")
         else:
@@ -182,3 +185,5 @@ def process(self, data: pd.DataFrame, source: str):
     def finalize(self):
         if isinstance(self.filehandle, BytesIO):
             yield self.filehandle.getvalue().decode("utf-8")
+        else:
+            self.filehandle.close()
diff --git a/tests/input1.csv.bz2 b/tests/input1.csv.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..4ab66d26b09ad49bab8bdd5d17b80e49123fe6a8
GIT binary patch
literal 82
zcmV-Y0ImN*T4*^jL0KkKSvOl*EC2u$S%3f#00cWAIpMZYAOJBy1|v;0XheE}sPq#w
o%1D-{wRhS2mxz`EAq^HtN4j3W8I8OYdsFduBvXY619i2Mz!$Y05&!@I

literal 0
HcmV?d00001

diff --git a/tests/input1.csv.gz b/tests/input1.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0088c32e50a757eabc3c29ff5efc94babd496dbb
GIT binary patch
literal 73
zcmV-P0Ji@hiwFpEy>4Uz18Ht>b#yT<V{>)@E6K>rOV>%xFU>3AO3TmJF*GtX;z~*^
f(lIgy(N#L8W~N+)r4>4smX=%qOaectEdT%jGwmEO

literal 0
HcmV?d00001

diff --git a/tests/plugins/test_csv.py b/tests/plugins/test_csv.py
index 2da199d..d2cc699 100644
--- a/tests/plugins/test_csv.py
+++ b/tests/plugins/test_csv.py
@@ -1,5 +1,9 @@
-from countess.plugins.csv import LoadCsvPlugin
+import bz2
+import gzip
 
+import pandas as pd
+
+from countess.plugins.csv import LoadCsvPlugin, SaveCsvPlugin
 
 def test_load_csv():
     plugin = LoadCsvPlugin()
@@ -9,6 +13,37 @@ def test_load_csv():
     assert len(output_df) == 4
 
 
+def test_load_csv_index():
+    plugin = LoadCsvPlugin()
+    plugin.set_parameter("files.0.filename", "tests/input1.csv")
+    plugin.set_parameter("columns.0.name", "whatever")
+    plugin.set_parameter("columns.0.type", "string")
+    plugin.set_parameter("columns.0.index", True)
+    plugin.set_parameter("columns.1.name", "stuff")
+    plugin.set_parameter("columns.1.type", "integer")
+    plugin.set_parameter("columns.1.index", False)
+    output_df = next(plugin.load_file(0))
+    assert output_df.index.name == "whatever"
+    assert list(output_df.columns) == ["stuff"]
+    assert len(output_df) == 4
+
+
+def test_load_csv_gz():
+    plugin = LoadCsvPlugin()
+    plugin.set_parameter("files.0.filename", "tests/input1.csv.gz")
+    output_df = next(plugin.load_file(0))
+    assert list(output_df.columns) == ["thing", "count"]
+    assert len(output_df) == 4
+
+
+def test_load_csv_bz2():
+    plugin = LoadCsvPlugin()
+    plugin.set_parameter("files.0.filename", "tests/input1.csv.bz2")
+    output_df = next(plugin.load_file(0))
+    assert list(output_df.columns) == ["thing", "count"]
+    assert len(output_df) == 4
+
+
 def test_load_tsv():
     plugin = LoadCsvPlugin()
     plugin.set_parameter("files.0.filename", "tests/input1.tsv")
@@ -59,3 +94,64 @@ def test_filename_column():
     output_df = next(plugin.load_file(0))
     assert "filename" in output_df.columns
     assert output_df["filename"].iloc[1] == "input1"
+
+
+df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns=['a','b','c'])
+
+def test_save_csv():
+
+    plugin = SaveCsvPlugin()
+    plugin.set_parameter("header", True)
+    plugin.set_parameter("filename", "tests/output1.csv")
+    plugin.prepare(["test"], None)
+    plugin.process(df, 'test')
+    plugin.finalize()
+
+    with open("tests/output1.csv", "r", encoding="utf-8") as fh:
+        text = fh.read()
+        assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"
+
+
+def test_save_csv_gz():
+
+    plugin = SaveCsvPlugin()
+    plugin.set_parameter("header", True)
+    plugin.set_parameter("filename", "tests/output1.csv.gz")
+    plugin.prepare(["test"], None)
+    plugin.process(df, 'test')
+    list(plugin.finalize())
+
+    with gzip.open("tests/output1.csv.gz", "rt") as fh:
+        text = fh.read()
+        assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"
+
+
+def test_save_csv_bz2():
+
+    plugin = SaveCsvPlugin()
+    plugin.set_parameter("header", True)
+    plugin.set_parameter("filename", "tests/output1.csv.bz2")
+    plugin.prepare(["test"], None)
+    plugin.process(df, 'test')
+    list(plugin.finalize())
+
+    with bz2.open("tests/output1.csv.bz2", "rt") as fh:
+        text = fh.read()
+        assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n"
+
+
+df2 = pd.DataFrame([[10,11,12]], columns=['a','b','d'])
+
+
+def test_save_csv_multi():
+    plugin = SaveCsvPlugin()
+    plugin.set_parameter("header", True)
+    plugin.set_parameter("filename", "tests/output2.csv")
+    plugin.prepare(["test"], None)
+    plugin.process(df, 'test')
+    plugin.process(df2, 'test2')
+    plugin.finalize()
+
+    with open("tests/output2.csv", "r", encoding="utf-8") as fh:
+        text = fh.read()
+        assert text == "a,b,c\n1,2,3\n4,5,6\n7,8,9\n10,11,,12\n"
diff --git a/tests/plugins/test_python.py b/tests/plugins/test_python.py
index 28e376a..f63e4c9 100644
--- a/tests/plugins/test_python.py
+++ b/tests/plugins/test_python.py
@@ -53,3 +53,35 @@ def test_python_dropna():
 
     assert any(np.isnan(dfo["d"]))
     assert not any(np.isnan(dfo["b"]))
+
+
+def test_python_filter():
+    plugin = PythonPlugin()
+    plugin.set_parameter(
+        "code",
+        """
+__filter = d < 10 and a % 2
+    """,
+    )
+
+    plugin.prepare(["test"], None)
+    dfo = plugin.process_dataframe(dfi)
+
+    assert "__filter" not in dfo.columns
+    assert len(dfo) == 2
+
+
+def test_python_exception(caplog):
+    plugin = PythonPlugin()
+    plugin.set_parameter(
+        "code",
+        """
+e = 1/0
+    """,
+    )
+
+    plugin.prepare(["test"], None)
+    dfo = plugin.process_dataframe(dfi)
+    assert len(dfo) == 5
+
+    assert "Exception" in caplog.text
diff --git a/tests/test_cmd.py b/tests/test_cmd.py
index 8f71892..381b727 100644
--- a/tests/test_cmd.py
+++ b/tests/test_cmd.py
@@ -1,8 +1,10 @@
 import csv
+from unittest.mock import patch
 
 import pytest
 
-from countess.core.cmd import run as cmd_run
+import countess.core.cmd
+from countess.core.cmd import run, main
 
 expected_output = """"thing","foo","bar","baz","qux","number","zz"
 "bar",10,2,1,4,232,0.08620689655172414
@@ -13,8 +15,14 @@
 
 @pytest.mark.slow
 def test_command_invocation():
-    cmd_run(["countess_cmd", "tests/simple.ini"])
+    run(["countess_cmd", "tests/simple.ini"])
 
-    with open("tests/output.csv", "r") as fh:
+    with open("tests/output.csv", "r", encoding="utf-8") as fh:
         output = fh.read()
         assert output == expected_output
+
+
+def test_main():
+    with patch.object(countess.core.cmd, 'run') as p:
+        main()
+        p.assert_called_once()