From 62ff7e94fba4318d2118e07b460ed10fd8e67cab Mon Sep 17 00:00:00 2001 From: Faisal Date: Wed, 30 Oct 2024 14:40:36 -0300 Subject: [PATCH] fix snowflake formatting issue (#346) * fix snowflake formatting issue * bump version * PR typo fix --- datacompy/__init__.py | 2 +- docs/source/snowflake_usage.rst | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/datacompy/__init__.py b/datacompy/__init__.py index 8ca604ac..e2d150e3 100644 --- a/datacompy/__init__.py +++ b/datacompy/__init__.py @@ -18,7 +18,7 @@ Then extended to carry that functionality over to Spark Dataframes. """ -__version__ = "0.14.2" +__version__ = "0.14.3" import platform from warnings import warn diff --git a/docs/source/snowflake_usage.rst b/docs/source/snowflake_usage.rst index 3c2687e3..9cc2139f 100644 --- a/docs/source/snowflake_usage.rst +++ b/docs/source/snowflake_usage.rst @@ -6,11 +6,12 @@ For ``SnowflakeCompare`` - ``on_index`` is not supported. - Joining is done using ``EQUAL_NULL`` which is the equality test that is safe for null values. - Compares ``snowflake.snowpark.DataFrame``, which can be provided as either raw Snowflake dataframes -or the as the names of full names of valid snowflake tables, which we will process into Snowpark dataframes. + or as the names of full names of valid snowflake tables, which we will process into Snowpark dataframes. -SnowflakeCompare Object Setup ---------------------------------------------------- +SnowflakeCompare setup +---------------------- + There are two ways to specify input dataframes for ``SnowflakeCompare`` Provide Snowpark dataframes @@ -66,11 +67,12 @@ Provide Snowpark dataframes print(compare.report()) -Provide the full name (``{db}.{schema}.{table_name}``) of valid Snowflake tables -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Provide the full name (``db.schema.table_name``) of valid Snowflake tables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Given the dataframes from the prior examples... .. code-block:: python + df_1.write.mode("overwrite").save_as_table("toy_table_1") df_2.write.mode("overwrite").save_as_table("toy_table_2") @@ -210,6 +212,7 @@ There are a few convenience methods and attributes available after the compariso print(compare.df2_unq_columns()) # OrderedSet() + Duplicate rows -------------- @@ -260,9 +263,10 @@ as uniquely in the second. Additional considerations ------------------------- -- It is strongly recommended against joining on float columns (or any column with floating point precision). -Columns joining tables are compared on the basis of an exact comparison, therefore if the values comparing -your float columns are not exact, you will likely get unexpected results. -- Case-sensitive columns are only partially supported. We essentially treat case-sensitive -columns as if they are case-insensitive. Therefore you may use case-sensitive columns as long as -you don't have several columns with the same name differentiated only be case sensitivity. + +- It is strongly recommended against joining on float columns or any column with floating point precision. + Columns joining tables are compared on the basis of an exact comparison, therefore if the values + comparing your float columns are not exact, you will likely get unexpected results. +- Case-sensitive columns are only partially supported. We essentially treat case-sensitive columns as + if they are case-insensitive. Therefore you may use case-sensitive columns as long as you don't have several + columns with the same name differentiated only be case sensitivity.