Skip to content

Commit

Permalink
Merge branch 'release-0.7.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
fdosani committed Jun 9, 2020
2 parents 68db83e + c193b94 commit 1940bb4
Show file tree
Hide file tree
Showing 105 changed files with 303 additions and 31,830 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@ spark-warehouse/
derby.log

*.ipynb*
docs/doctrees
docs/.buildinfo

# Sphinx documentation
docs/_build/
docs/source/api/
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
repos:
- repo: https://github.com/ambv/black
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
Expand Down
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include README.rst
include LICENSE
include requirements.txt
include test-requirements.txt
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
sphinx:
cd docs && \
make -f Makefile clean && \
make -f Makefile html && \
cd ..

ghpages:
git checkout gh-pages && \
cp -r docs/build/html/* . && \
git add -u && \
git add -A && \
PRE_COMMIT_ALLOW_NO_CONFIG=1 git commit -m "Updated generated Sphinx documentation"
2 changes: 1 addition & 1 deletion datacompy/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.6.0"
__version__ = "0.7.0"
30 changes: 29 additions & 1 deletion datacompy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import logging
import os


import numpy as np
import pandas as pd

Expand Down Expand Up @@ -61,7 +62,8 @@ class Compare:
df2_name : str, optional
A string name for the second dataframe
ignore_spaces : bool, optional
Flag to strip whitespace (including newlines) from string columns
Flag to strip whitespace (including newlines) from string columns (including any join
columns)
ignore_case : bool, optional
Flag to ignore the case of string columns
Expand Down Expand Up @@ -229,6 +231,13 @@ def _dataframe_merge(self, ignore_spaces):
else:
params = {"on": self.join_columns}

if ignore_spaces:
for column in self.join_columns:
if self.df1[column].dtype.kind == "O":
self.df1[column] = self.df1[column].str.strip()
if self.df2[column].dtype.kind == "O":
self.df2[column] = self.df2[column].str.strip()

outer_join = self.df1.merge(
self.df2, how="outer", suffixes=("_df1", "_df2"), indicator=True, **params
)
Expand Down Expand Up @@ -424,6 +433,25 @@ def sample_mismatch(self, column, sample_count=10, for_display=False):
]
return to_return

def all_mismatch(self):
"""All rows with any columns that have a mismatch. Returns all df1 and df2 versions of the columns and join
columns.
Returns
-------
Pandas.DataFrame
All rows of the intersection dataframe, containing any columns, that don't match.
"""
match_list = []
return_list = []
for col in self.intersect_rows.columns:
if col.endswith("_match"):
match_list.append(col)
return_list.extend([col[:-6] + "_df1", col[:-6] + "_df2"])

mm_bool = self.intersect_rows[match_list].all(axis="columns")
return self.intersect_rows[~mm_bool][self.join_columns + return_list]

def report(self, sample_count=10):
"""Returns a string representation of a report. The representation can
then be printed or saved to a file.
Expand Down
1 change: 0 additions & 1 deletion docs/.nojekyll
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@

29 changes: 29 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SPHINXPROJ = datacompy
SOURCEDIR = source
BUILDDIR = build
SPHINXAPI = sphinx-apidoc

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

html: apidoc
$(SPHINXBUILD) -b html $(SPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html

apidoc:
$(SPHINXAPI) -f -o source/api ../datacompy
@echo "Regenerated API docs in ./source/api"

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
Binary file removed docs/_images/datacompy.png
Binary file not shown.
7 changes: 0 additions & 7 deletions docs/_sources/api/core.rst.txt

This file was deleted.

8 changes: 0 additions & 8 deletions docs/_sources/api/modules.rst.txt

This file was deleted.

6 changes: 0 additions & 6 deletions docs/_sources/api/sparkcompare.rst.txt

This file was deleted.

54 changes: 0 additions & 54 deletions docs/_sources/developer_instructions.rst.txt

This file was deleted.

21 changes: 0 additions & 21 deletions docs/_sources/index.rst.txt

This file was deleted.

29 changes: 0 additions & 29 deletions docs/_sources/install.rst.txt

This file was deleted.

Loading

0 comments on commit 1940bb4

Please sign in to comment.