Merge pull request #179 from capitalone/develop

Release v0.8.4
capitalone · Jan 4, 2023 · f9e8694 · f9e8694
2 parents 8776719 + 3654b0a
commit f9e8694
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 5 deletions.
diff --git a/datacompy/__init__.py b/datacompy/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.8.3"
+__version__ = "0.8.4"
 
 from datacompy.core import *
 from datacompy.sparkcompare import NUMERIC_SPARK_TYPES, SparkCompare
diff --git a/datacompy/core.py b/datacompy/core.py
@@ -508,8 +508,8 @@ def all_mismatch(self, ignore_matching_cols=False):
                 orig_col_name = col[:-6]
 
                 col_comparison = columns_equal(
-                    self.df1[orig_col_name],
-                    self.df2[orig_col_name],
+                    self.intersect_rows[orig_col_name + "_df1"],
+                    self.intersect_rows[orig_col_name + "_df2"],
                     self.rel_tol,
                     self.abs_tol,
                     self.ignore_spaces,

diff --git a/setup.cfg b/setup.cfg
@@ -33,8 +33,8 @@ zip_safe = False
 include_package_data = True
 packages = find:
 install_requires = 
-	pandas<=1.5.1,>=0.25.0
-	numpy<=1.23.4,>=1.11.3
+	pandas<=1.5.2,>=0.25.0
+	numpy<=1.24.1,>=1.11.3
 	ordered-set<=4.1.0,>=4.0.2
 
 [options.package_data]

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -1022,6 +1022,45 @@ def test_all_mismatch_not_ignore_matching_cols_some_cols_matching():
     assert (~(output.date_fld_df1 != output.date_fld_df2)).values.sum() == 0
 
 
+def test_all_mismatch_ignore_matching_cols_some_cols_matching_diff_rows():
+    # Case where there are rows on either dataset which don't match up.
+    # Columns dollar_amt and name are matching
+    data1 = """acct_id,dollar_amt,name,float_fld,date_fld
+    10000001234,123.45,George Maharis,14530.1555,2017-01-01
+    10000001235,0.45,Michael Bluth,1,2017-01-01
+    10000001236,1345,George Bluth,,2017-01-01
+    10000001237,123456,Bob Loblaw,345.12,2017-01-01
+    10000001239,1.05,Lucille Bluth,,2017-01-01
+    10000001240,123.45,George Maharis,14530.1555,2017-01-02
+    10000001241,1111.05,Lucille Bluth,
+    """
+
+    data2 = """acct_id,dollar_amt,name,float_fld,date_fld
+    10000001234,123.45,George Maharis,14530.155,
+    10000001235,0.45,Michael Bluth,,
+    10000001236,1345,George Bluth,1,
+    10000001237,123456,Bob Loblaw,345.12,
+    10000001238,1.05,Lucille Bluth,111,
+    """
+    df1 = pd.read_csv(io.StringIO(data1), sep=",")
+    df2 = pd.read_csv(io.StringIO(data2), sep=",")
+    compare = datacompy.Compare(df1, df2, "acct_id")
+
+    output = compare.all_mismatch(ignore_matching_cols=True)
+
+    assert output.shape[0] == 4
+    assert output.shape[1] == 5
+
+    assert (output.float_fld_df1 != output.float_fld_df2).values.sum() == 3
+    assert (~(output.float_fld_df1 != output.float_fld_df2)).values.sum() == 1
+
+    assert (output.date_fld_df1 != output.date_fld_df2).values.sum() == 4
+    assert (~(output.date_fld_df1 != output.date_fld_df2)).values.sum() == 0
+
+    assert not ("name_df1" in output and "name_df2" in output)
+    assert not ("dollar_amt_df1" in output and "dollar_amt_df1" in output)
+
+
 def test_all_mismatch_ignore_matching_cols_some_calls_matching():
     # Columns dollar_amt and name are matching
     data1 = """acct_id,dollar_amt,name,float_fld,date_fld