Skip to content

Commit

Permalink
Merge pull request #179 from capitalone/develop
Browse files Browse the repository at this point in the history
Release v0.8.4
  • Loading branch information
Faisal authored Jan 4, 2023
2 parents 8776719 + 3654b0a commit f9e8694
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.8.3"
__version__ = "0.8.4"

from datacompy.core import *
from datacompy.sparkcompare import NUMERIC_SPARK_TYPES, SparkCompare
4 changes: 2 additions & 2 deletions datacompy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,8 @@ def all_mismatch(self, ignore_matching_cols=False):
orig_col_name = col[:-6]

col_comparison = columns_equal(
self.df1[orig_col_name],
self.df2[orig_col_name],
self.intersect_rows[orig_col_name + "_df1"],
self.intersect_rows[orig_col_name + "_df2"],
self.rel_tol,
self.abs_tol,
self.ignore_spaces,
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ zip_safe = False
include_package_data = True
packages = find:
install_requires =
pandas<=1.5.1,>=0.25.0
numpy<=1.23.4,>=1.11.3
pandas<=1.5.2,>=0.25.0
numpy<=1.24.1,>=1.11.3
ordered-set<=4.1.0,>=4.0.2

[options.package_data]
Expand Down
39 changes: 39 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,45 @@ def test_all_mismatch_not_ignore_matching_cols_some_cols_matching():
assert (~(output.date_fld_df1 != output.date_fld_df2)).values.sum() == 0


def test_all_mismatch_ignore_matching_cols_some_cols_matching_diff_rows():
# Case where there are rows on either dataset which don't match up.
# Columns dollar_amt and name are matching
data1 = """acct_id,dollar_amt,name,float_fld,date_fld
10000001234,123.45,George Maharis,14530.1555,2017-01-01
10000001235,0.45,Michael Bluth,1,2017-01-01
10000001236,1345,George Bluth,,2017-01-01
10000001237,123456,Bob Loblaw,345.12,2017-01-01
10000001239,1.05,Lucille Bluth,,2017-01-01
10000001240,123.45,George Maharis,14530.1555,2017-01-02
10000001241,1111.05,Lucille Bluth,
"""

data2 = """acct_id,dollar_amt,name,float_fld,date_fld
10000001234,123.45,George Maharis,14530.155,
10000001235,0.45,Michael Bluth,,
10000001236,1345,George Bluth,1,
10000001237,123456,Bob Loblaw,345.12,
10000001238,1.05,Lucille Bluth,111,
"""
df1 = pd.read_csv(io.StringIO(data1), sep=",")
df2 = pd.read_csv(io.StringIO(data2), sep=",")
compare = datacompy.Compare(df1, df2, "acct_id")

output = compare.all_mismatch(ignore_matching_cols=True)

assert output.shape[0] == 4
assert output.shape[1] == 5

assert (output.float_fld_df1 != output.float_fld_df2).values.sum() == 3
assert (~(output.float_fld_df1 != output.float_fld_df2)).values.sum() == 1

assert (output.date_fld_df1 != output.date_fld_df2).values.sum() == 4
assert (~(output.date_fld_df1 != output.date_fld_df2)).values.sum() == 0

assert not ("name_df1" in output and "name_df2" in output)
assert not ("dollar_amt_df1" in output and "dollar_amt_df1" in output)


def test_all_mismatch_ignore_matching_cols_some_calls_matching():
# Columns dollar_amt and name are matching
data1 = """acct_id,dollar_amt,name,float_fld,date_fld
Expand Down

0 comments on commit f9e8694

Please sign in to comment.