From 07a272825eb00559484b5b6361b87ad23213e898 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Mon, 18 Sep 2023 21:59:17 +0100 Subject: [PATCH 01/14] crosstab_margins --- acro/acro.py | 1 + acro/acro_tables.py | 224 ++++++++++++++++++++++++++++++++++++------- acro/default.yaml | 3 + notebooks/test.ipynb | 48 ++++------ test/test_initial.py | 82 +++++++++++++++- 5 files changed, 293 insertions(+), 65 deletions(-) diff --git a/acro/acro.py b/acro/acro.py index 79005b8..94d81c9 100644 --- a/acro/acro.py +++ b/acro/acro.py @@ -69,6 +69,7 @@ def __init__(self, config: str = "default", suppress: bool = False) -> None: acro_tables.SAFE_NK_N = self.config["safe_nk_n"] acro_tables.SAFE_NK_K = self.config["safe_nk_k"] acro_tables.CHECK_MISSING_VALUES = self.config["check_missing_values"] + acro_tables.ZEROS_ARE_DISCLOSIVE = self.config["zeros_are_disclosive"] # set globals for survival analysis acro_tables.SURVIVAL_THRESHOLD = self.config["survival_safe_threshold"] diff --git a/acro/acro_tables.py b/acro/acro_tables.py index a4e3fe4..9ccaa13 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -19,12 +19,12 @@ logger = logging.getLogger("acro") -AGGFUNC: dict[str, Callable] = { - "mean": np.mean, - "median": np.median, - "sum": np.sum, - "std": np.std, - "freq": np.size, +AGGFUNC: dict[str, str] = { + "mean": "mean", + "median": "median", + "sum": "sum", + "std": "std", + "count": "count", } # aggregation function parameters @@ -33,6 +33,7 @@ SAFE_NK_N: int = 2 SAFE_NK_K: float = 0.9 CHECK_MISSING_VALUES: bool = False +ZEROS_ARE_DISCLOSIVE: bool = True # survival analysis parameters SURVIVAL_THRESHOLD: int = 10 @@ -114,7 +115,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals ) # convert [list of] string to [list of] function - aggfunc = get_aggfuncs(aggfunc) + agg_func = get_aggfuncs(aggfunc) # requested table table: DataFrame = pd.crosstab( # type: ignore @@ -123,7 +124,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals values, rownames, colnames, - aggfunc, + agg_func, margins, margins_name, dropna, @@ -133,17 +134,17 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals # suppression masks to apply based on the following checks masks: dict[str, DataFrame] = {} - if aggfunc is not None: + if agg_func is not None: # create lists with single entry for when there is only one aggfunc - freq_funcs: list[Callable] = [AGGFUNC["freq"]] + count_funcs: list[str] = [AGGFUNC["count"]] neg_funcs: list[Callable] = [agg_negative] pperc_funcs: list[Callable] = [agg_p_percent] nk_funcs: list[Callable] = [agg_nk] missing_funcs: list[Callable] = [agg_missing] # then expand them to deal with extra columns as needed - if isinstance(aggfunc, list): - num = len(aggfunc) - freq_funcs.extend([AGGFUNC["freq"] for i in range(1, num)]) + if isinstance(agg_func, list): + num = len(agg_func) + count_funcs.extend([AGGFUNC["count"] for i in range(1, num)]) neg_funcs.extend([agg_negative for i in range(1, num)]) pperc_funcs.extend([agg_p_percent for i in range(1, num)]) nk_funcs.extend([agg_nk for i in range(1, num)]) @@ -156,12 +157,17 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals values=values, rownames=rownames, colnames=colnames, - aggfunc=freq_funcs, + aggfunc=count_funcs, margins=margins, margins_name=margins_name, dropna=dropna, normalize=normalize, ) + + if dropna or margins: + for col in t_values.columns: + if t_values[col].sum() == 0: + t_values = t_values.drop(col, axis=1) t_values = t_values < THRESHOLD masks["threshold"] = t_values # check for negative values -- currently unsupported @@ -172,11 +178,21 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals masks["negative"] = negative # p-percent check masks["p-ratio"] = pd.crosstab( # type: ignore - index, columns, values, aggfunc=pperc_funcs, margins=margins + index, + columns, + values, + aggfunc=pperc_funcs, + margins=margins, + dropna=dropna ) # nk values check masks["nk-rule"] = pd.crosstab( # type: ignore - index, columns, values, aggfunc=nk_funcs, margins=margins + index, + columns, + values, + aggfunc=nk_funcs, + margins=margins, + dropna=dropna ) # check for missing values -- currently unsupported if CHECK_MISSING_VALUES: @@ -215,6 +231,131 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals safe_table, outcome = apply_suppression(table, masks) if self.suppress: table = safe_table + if margins: + if aggfunc is None: + table = table.drop(margins_name, axis=1) + rows_total = table.sum(axis=1) + table.loc[:, margins_name] = rows_total + table = table.drop(margins_name, axis=0) + cols_total = table.sum(axis=0) + table.loc[margins_name] = cols_total + if aggfunc == "mean": + count_table = pd.crosstab( # type: ignore + index, + columns, + values=values, + rownames=rownames, + colnames=colnames, + aggfunc="count", + margins=margins, + margins_name=margins_name, + dropna=dropna, + normalize=normalize, + ) + count_table = count_table.where(table.notna(), other=np.nan) + columns_to_keep = table.columns + count_table = count_table[columns_to_keep] + if not isinstance( + count_table.columns, pd.MultiIndex + ) and not isinstance(count_table.index, pd.MultiIndex): + count_table = count_table.drop(margins_name, axis=1) + count_table.loc[:, margins_name] = count_table.sum(axis=1) + count_table = count_table.drop(margins_name, axis=0) + count_table.loc[(margins_name)] = count_table.sum(axis=0) + table[margins_name] = 1 + table.loc[margins_name, :] = 1 + multip_table = count_table * table + table[margins_name] = ( + multip_table.drop(margins_name, axis=1).sum(axis=1) + / multip_table[margins_name] + ) + table.loc[margins_name, :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[margins_name, :] + ) + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name) + .sum() + .sum() + ) / multip_table.loc[margins_name, margins_name] + + if isinstance(count_table.columns, pd.MultiIndex) and isinstance( + count_table.index, pd.MultiIndex + ): # multidimensional columns and rows + count_table = count_table.drop(margins_name, axis=1, level=0) + count_table.loc[:, margins_name] = count_table.sum(axis=1) + count_table = count_table.drop(margins_name, axis=0) + count_table.loc[(margins_name, ""), :] = count_table.sum(axis=0) + table[margins_name] = 1 + table.loc[margins_name, :] = 1 + multip_table = count_table * table + table[margins_name] = ( + multip_table.drop(margins_name, axis=1, level=0).sum(axis=1) + / multip_table[margins_name] + ) + table.loc[(margins_name, ""), :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[(margins_name, ""), :] + ) + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name) + .sum() + .sum() + ) / multip_table.loc[margins_name, margins_name][0] + + if isinstance( + count_table.columns, pd.MultiIndex + ) and not isinstance( + count_table.index, pd.MultiIndex + ): # multidimensional columns + count_table = count_table.drop(margins_name, axis=1, level=0) + count_table.loc[:, margins_name] = count_table.sum(axis=1) + count_table = count_table.drop(margins_name, axis=0) + count_table.loc[(margins_name)] = count_table.sum(axis=0) + table[margins_name] = 1 + table.loc[margins_name, :] = 1 + multip_table = count_table * table + table[margins_name] = ( + multip_table.drop(margins_name, axis=1, level=0).sum(axis=1) + / multip_table[margins_name] + ) + table.loc[margins_name, :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[margins_name, :] + ) + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name) + .sum() + .sum() + ) / multip_table.loc[margins_name, margins_name][0] + + if isinstance(count_table.index, pd.MultiIndex) and not isinstance( + count_table.columns, pd.MultiIndex + ): # multidimensional rows + count_table = count_table.where(table.notna(), other=np.nan) + columns_to_keep = table.columns + count_table = count_table[columns_to_keep] + count_table = count_table.drop(margins_name, axis=1) + count_table.loc[:, margins_name] = count_table.sum(axis=1) + count_table = count_table.drop(margins_name, axis=0) + count_table.loc[(margins_name, ""), :] = count_table.sum(axis=0) + table[margins_name] = 1 + table.loc[margins_name, :] = 1 + multip_table = count_table * table + table[margins_name] = ( + multip_table.drop(margins_name, axis=1).sum(axis=1) + / multip_table[margins_name] + ) + table.loc[(margins_name, ""), :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[(margins_name, ""), :] + ) + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name) + .sum() + .sum() + ) / multip_table.loc[margins_name, margins_name][0] + # record output self.results.add( status=status, @@ -317,7 +458,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals # threshold check agg = [agg_threshold] * n_agg if n_agg > 1 else agg_threshold t_values = pd.pivot_table( # type: ignore - data, values, index, columns, aggfunc=agg + data, values, index, columns, aggfunc=agg, margins=margins ) masks["threshold"] = t_values @@ -325,27 +466,34 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals # check for negative values -- currently unsupported agg = [agg_negative] * n_agg if n_agg > 1 else agg_negative negative = pd.pivot_table( # type: ignore - data, values, index, columns, aggfunc=agg + data, values, index, columns, aggfunc=agg, margins=margins ) if negative.to_numpy().sum() > 0: masks["negative"] = negative # p-percent check agg = [agg_p_percent] * n_agg if n_agg > 1 else agg_p_percent masks["p-ratio"] = pd.pivot_table( # type: ignore - data, values, index, columns, aggfunc=agg + data, values, index, columns, aggfunc=agg, margins=margins ) # nk values check agg = [agg_nk] * n_agg if n_agg > 1 else agg_nk masks["nk-rule"] = pd.pivot_table( # type: ignore - data, values, index, columns, aggfunc=agg + data, values, index, columns, aggfunc=agg, margins=margins ) # check for missing values -- currently unsupported if CHECK_MISSING_VALUES: agg = [agg_missing] * n_agg if n_agg > 1 else agg_missing masks["missing"] = pd.pivot_table( # type: ignore - data, values, index, columns, aggfunc=agg + data, values, index, columns, aggfunc=agg, margins=margins ) + # pd.pivot_table returns nan for an empty cell + for name, mask in masks.items(): + mask.fillna(value=1, inplace=True) + mask = mask.astype(int) + mask.replace({0: False, 1: True}, inplace=True) + masks[name] = mask + # build the sdc dictionary sdc: dict = get_table_sdc(masks, self.suppress) # get the status and summary @@ -504,7 +652,6 @@ def plot( # pylint: disable=too-many-arguments,too-many-locals ) return plot - def rounded_survival_table(survival_table): """Calculates the rounded surival function.""" death_censored = ( @@ -548,8 +695,7 @@ def rounded_survival_table(survival_table): survival_table["rounded_survival_fun"] = rounded_survival_func return survival_table - -def get_aggfunc(aggfunc: str | None) -> Callable | None: +def get_aggfunc(aggfunc: str | None) -> str | None: """Checks whether an aggregation function is allowed and returns the appropriate function. @@ -578,10 +724,9 @@ def get_aggfunc(aggfunc: str | None) -> Callable | None: logger.debug("aggfunc: %s", func) return func - def get_aggfuncs( aggfuncs: str | list[str] | None, -) -> Callable | list[Callable] | None: +) -> str | list[str] | None: """Checks whether a list of aggregation functions is allowed and returns the appropriate functions. @@ -604,7 +749,7 @@ def get_aggfuncs( logger.debug("aggfuncs: %s", function) return function if isinstance(aggfuncs, list): - functions: list[Callable] = [] + functions: list[str] = [] for function_name in aggfuncs: function = get_aggfunc(function_name) if function is not None: @@ -615,7 +760,6 @@ def get_aggfuncs( return functions raise ValueError("aggfuncs must be: either str or list[str]") # pragma: no cover - def agg_negative(vals: Series) -> bool: """Aggregation function that returns whether any values are negative. @@ -631,7 +775,6 @@ def agg_negative(vals: Series) -> bool: """ return vals.min() < 0 - def agg_missing(vals: Series) -> bool: """Aggregation function that returns whether any values are missing. @@ -647,7 +790,6 @@ def agg_missing(vals: Series) -> bool: """ return vals.isna().sum() != 0 - def agg_p_percent(vals: Series) -> bool: """Aggregation function that returns whether the p percent rule is violated. @@ -667,13 +809,20 @@ def agg_p_percent(vals: Series) -> bool: bool whether the p percent rule is violated. """ + assert isinstance(vals, Series), "vals is not a pandas series" + logger.debug(f"vals is {vals} with size {vals.size}") sorted_vals = vals.sort_values(ascending=False) total: float = sorted_vals.sum() + if total <= 0.0 or vals.size <= 1: + logger.debug("not calculating ppercent due to small size") + if ZEROS_ARE_DISCLOSIVE: + return True + else: + return False sub_total = total - sorted_vals.iloc[0] - sorted_vals.iloc[1] p_val: float = sub_total / sorted_vals.iloc[0] if total > 0 else 1 return p_val < SAFE_PRATIO_P - def agg_nk(vals: Series) -> bool: """Aggregation function that returns whether the top n items account for more than k percent of the total. @@ -695,7 +844,6 @@ def agg_nk(vals: Series) -> bool: return (n_total / total) > SAFE_NK_K return False - def agg_threshold(vals: Series) -> bool: """Aggregation function that returns whether the number of contributors is below a threshold. @@ -712,7 +860,6 @@ def agg_threshold(vals: Series) -> bool: """ return vals.count() < THRESHOLD - def apply_suppression( table: DataFrame, masks: dict[str, DataFrame] ) -> tuple[DataFrame, DataFrame]: @@ -755,11 +902,17 @@ def apply_suppression( outcome_df += tmp_df except TypeError: logger.warning("problem mask %s is not binary", name) + except ValueError: + raise ValueError( + f"name is {name} \n mask is {mask} \n table is {table}", + name, + mask, + safe_df, + ) outcome_df = outcome_df.replace({"": "ok"}) logger.info("outcome_df:\n%s", utils.prettify_table_string(outcome_df)) return safe_df, outcome_df - def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict: """Returns the SDC dictionary using the suppression masks. @@ -778,7 +931,7 @@ def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict: sdc["summary"]["p-ratio"] = 0 sdc["summary"]["nk-rule"] = 0 for name, mask in masks.items(): - sdc["summary"][name] = int(mask.to_numpy().sum()) + sdc["summary"][name] = int(np.nansum(mask.to_numpy())) # positions of cells to be suppressed sdc["cells"]["negative"] = [] sdc["cells"]["missing"] = [] @@ -792,7 +945,6 @@ def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict: sdc["cells"][name].append([int(row_index), int(col_index)]) return sdc - def get_summary(sdc: dict) -> tuple[str, str]: """Returns the status and summary of the suppression masks. diff --git a/acro/default.yaml b/acro/default.yaml index 0fe8d08..d0b3d92 100644 --- a/acro/default.yaml +++ b/acro/default.yaml @@ -26,4 +26,7 @@ check_missing_values: False # frequency threshols for survival tables and plots survival_safe_threshold: 10 + +# consider zeros to be disclosive +zeros_are_disclosive: True ... diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index b921523..a0b957e 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -722,7 +722,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "298d2b40", + "id": "4ae844a0", "metadata": {}, "outputs": [ { @@ -1085,7 +1085,7 @@ }, { "cell_type": "markdown", - "id": "a521cb83", + "id": "d642ed00", "metadata": {}, "source": [ "### ACRO crosstab with missing values" @@ -1386,7 +1386,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "966c1a9b", + "id": "6d4730c4", "metadata": {}, "outputs": [ { @@ -1622,7 +1622,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "ac4fd993", + "id": "8b603548", "metadata": {}, "outputs": [], "source": [ @@ -1640,7 +1640,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "01152d49", + "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "outputs": [ { @@ -1762,10 +1762,8 @@ { "cell_type": "code", "execution_count": 19, - "id": "2f462e42", - "metadata": { - "scrolled": true - }, + "id": "a521cb83", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -2106,7 +2104,7 @@ }, { "cell_type": "markdown", - "id": "2816eac7", + "id": "fcc81e98", "metadata": {}, "source": [ "### ACRO Probit" @@ -2261,7 +2259,7 @@ }, { "cell_type": "markdown", - "id": "f38b4334", + "id": "d66e565b", "metadata": {}, "source": [ "### ACRO Logit" @@ -2408,7 +2406,7 @@ }, { "cell_type": "markdown", - "id": "9e554eea", + "id": "dc99fa71", "metadata": {}, "source": [ "### List current ACRO outputs" @@ -2851,7 +2849,7 @@ { "cell_type": "code", "execution_count": 24, - "id": "e4ee985e", + "id": "b1f77749", "metadata": {}, "outputs": [ { @@ -2879,7 +2877,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "b9d0b9ac", + "id": "45ec04ef", "metadata": {}, "outputs": [ { @@ -2905,10 +2903,8 @@ { "cell_type": "code", "execution_count": 26, - "id": "8e21f7b0", - "metadata": { - "scrolled": true - }, + "id": "0c826271", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -2935,7 +2931,7 @@ { "cell_type": "code", "execution_count": 27, - "id": "1e8000a1", + "id": "2816eac7", "metadata": {}, "outputs": [ { @@ -2963,7 +2959,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "e7b05fc0", + "id": "f38b4334", "metadata": {}, "outputs": [ { @@ -2995,10 +2991,8 @@ { "cell_type": "code", "execution_count": 29, - "id": "f941aca2", - "metadata": { - "scrolled": true - }, + "id": "9e554eea", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -3186,7 +3180,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "50c59b97", + "id": "f78b5a08", "metadata": {}, "outputs": [ { @@ -3237,7 +3231,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "35096efe", + "id": "df2a02e0", "metadata": {}, "outputs": [ { @@ -3288,7 +3282,7 @@ { "cell_type": "code", "execution_count": 32, - "id": "2cd90463", + "id": "56d2b6a1", "metadata": {}, "outputs": [ { diff --git a/test/test_initial.py b/test/test_initial.py index 5860baa..9cff9d0 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -88,7 +88,7 @@ def test_crosstab_multiple(data, acro): acro.add_exception("output_0", "Let me have it") results: Records = acro.finalise() correct_summary: str = ( - "fail; threshold: 6 cells suppressed; p-ratio: 1 cells suppressed; " + "fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; " "nk-rule: 1 cells suppressed; " ) output = results.get_index(0) @@ -149,7 +149,7 @@ def test_pivot_table_cols(data, acro): results: Records = acro.finalise() correct_summary: str = ( "fail; threshold: 14 cells suppressed; " - "p-ratio: 2 cells suppressed; nk-rule: 2 cells suppressed; " + "p-ratio: 4 cells suppressed; nk-rule: 2 cells suppressed; " ) output_0 = results.get_index(0) assert output_0.summary == correct_summary @@ -545,3 +545,81 @@ def test_surv_func(acro): results: Records = acro.finalise(path=PATH) output_1 = results.get_index(1) assert output_1.output == [filename] + + +def test_zeros_are_not_disclosive(data, acro): + """Test that zeros are handled as not disclosive when the parameter () is False.""" + acro_tables.ZEROS_ARE_DISCLOSIVE=False + _ = acro.pivot_table( + data, + index=["grant_type"], + columns=["year"], + values=["inc_grants"], + aggfunc=["mean", "std"], + ) + acro.add_exception("output_0", "Let me have it") + results: Records = acro.finalise() + correct_summary: str = ( + "fail; threshold: 14 cells suppressed; " + "p-ratio: 2 cells suppressed; nk-rule: 2 cells suppressed; " + ) + output_0 = results.get_index(0) + assert output_0.summary == correct_summary + + + +def test_crosstab_with_totals_without_suppression(data, acro): + """Test the crosstab with margins id true and suppression is false.""" + acro.suppress = False + _ = acro.crosstab(data.year, data.grant_type, margins=True) + output = acro.results.get_index(0) + assert 153 == output.output[0]["All"].iat[0] + + total_rows = output.output[0].iloc[-1, 0:4].sum() + total_cols = output.output[0].loc[2010:2015, "All"].sum() + assert 918 == total_rows == total_cols == output.output[0]["All"].iat[6] + + +def test_crosstab_with_totals_with_suppression(data, acro): + """Test the crosstab with both margins and suprression are true.""" + _ = acro.crosstab(data.year, data.grant_type, margins=True) + output = acro.results.get_index(0) + assert 145 == output.output[0]["All"].iat[0] + + total_rows = output.output[0].iloc[-1, 0:4].sum() + total_cols = output.output[0].loc[2010:2015, "All"].sum() + assert 870 == total_cols == total_rows == output.output[0]["All"].iat[6] + + +def test_crosstab_with_totals_with_suppression_herichical(data, acro): + """Test the crosstab with both margins and suprression are true.""" + _ = acro.crosstab( + [data.year, data.survivor], [data.grant_type, data.status], margins=True + ) + output = acro.results.get_index(0) + assert 47 == output.output[0]["All"].iat[0] + + total_rows = (output.output[0].loc["All", :].sum()) - output.output[0]["All"].iat[ + 12 + ] + total_cols = (output.output[0].loc[:, "All"].sum()) - output.output[0]["All"].iat[ + 12 + ] + assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + + +def test_crosstab_with_totals_with_suppression_with_mean_aggfunc(data, acro): + """Test the crosstab with both margins and suprression are true and with one aggfunc.""" + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_donations, + aggfunc="mean", + margins=True, + ) + acro.results.get_index(0) + # assert 145 == output.output[0]["All"].iat[0] + + # total_rows = output.output[0].loc["All", "G":"R/G"].sum() + # total_cols = output.output[0].loc[2010:2015, "All"].sum() + # assert total_cols == total_rows == output.output[0]["All"].iat[6] From cb1d2874c104a5651144b7f1fc21af0464a422dc Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Wed, 20 Sep 2023 23:44:45 +0100 Subject: [PATCH 02/14] adding tests --- test/test_initial.py | 60 ++++---------------------------------------- 1 file changed, 5 insertions(+), 55 deletions(-) diff --git a/test/test_initial.py b/test/test_initial.py index 9cff9d0..244d3e0 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -567,59 +567,9 @@ def test_zeros_are_not_disclosive(data, acro): assert output_0.summary == correct_summary - -def test_crosstab_with_totals_without_suppression(data, acro): - """Test the crosstab with margins id true and suppression is false.""" - acro.suppress = False - _ = acro.crosstab(data.year, data.grant_type, margins=True) - output = acro.results.get_index(0) - assert 153 == output.output[0]["All"].iat[0] - - total_rows = output.output[0].iloc[-1, 0:4].sum() - total_cols = output.output[0].loc[2010:2015, "All"].sum() - assert 918 == total_rows == total_cols == output.output[0]["All"].iat[6] - - -def test_crosstab_with_totals_with_suppression(data, acro): - """Test the crosstab with both margins and suprression are true.""" - _ = acro.crosstab(data.year, data.grant_type, margins=True) - output = acro.results.get_index(0) - assert 145 == output.output[0]["All"].iat[0] - - total_rows = output.output[0].iloc[-1, 0:4].sum() - total_cols = output.output[0].loc[2010:2015, "All"].sum() - assert 870 == total_cols == total_rows == output.output[0]["All"].iat[6] - - -def test_crosstab_with_totals_with_suppression_herichical(data, acro): - """Test the crosstab with both margins and suprression are true.""" - _ = acro.crosstab( - [data.year, data.survivor], [data.grant_type, data.status], margins=True - ) +def test_crosstab_with_sum(data, acro): + """Test the crosstab with two columns and aggfunc sum""" + acro = ACRO(suppress=False) + _ = acro.crosstab(data.year, [data.grant_type, data.survivor], values=data.inc_grants, aggfunc= "mean") output = acro.results.get_index(0) - assert 47 == output.output[0]["All"].iat[0] - - total_rows = (output.output[0].loc["All", :].sum()) - output.output[0]["All"].iat[ - 12 - ] - total_cols = (output.output[0].loc[:, "All"].sum()) - output.output[0]["All"].iat[ - 12 - ] - assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 - - -def test_crosstab_with_totals_with_suppression_with_mean_aggfunc(data, acro): - """Test the crosstab with both margins and suprression are true and with one aggfunc.""" - _ = acro.crosstab( - data.year, - data.grant_type, - values=data.inc_donations, - aggfunc="mean", - margins=True, - ) - acro.results.get_index(0) - # assert 145 == output.output[0]["All"].iat[0] - - # total_rows = output.output[0].loc["All", "G":"R/G"].sum() - # total_cols = output.output[0].loc[2010:2015, "All"].sum() - # assert total_cols == total_rows == output.output[0]["All"].iat[6] + assert (6,8) == output.output[0].shape From 6ff77e5570bb0f5e38ce20fe3d178817bb27de44 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Fri, 22 Sep 2023 11:40:32 +0100 Subject: [PATCH 03/14] updating the supression when total --- acro/acro_tables.py | 250 +++++----- notebooks/test.ipynb | 1115 ++++++++++++++++-------------------------- test/test_initial.py | 13 +- 3 files changed, 568 insertions(+), 810 deletions(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 9ccaa13..7aa3e0a 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -182,17 +182,12 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals columns, values, aggfunc=pperc_funcs, - margins=margins, - dropna=dropna + margins=margins, + dropna=dropna, ) # nk values check masks["nk-rule"] = pd.crosstab( # type: ignore - index, - columns, - values, - aggfunc=nk_funcs, - margins=margins, - dropna=dropna + index, columns, values, aggfunc=nk_funcs, margins=margins, dropna=dropna ) # check for missing values -- currently unsupported if CHECK_MISSING_VALUES: @@ -232,129 +227,127 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals if self.suppress: table = safe_table if margins: - if aggfunc is None: - table = table.drop(margins_name, axis=1) - rows_total = table.sum(axis=1) - table.loc[:, margins_name] = rows_total - table = table.drop(margins_name, axis=0) - cols_total = table.sum(axis=0) - table.loc[margins_name] = cols_total - if aggfunc == "mean": - count_table = pd.crosstab( # type: ignore - index, - columns, + # initialize a list to store queries for true cells + true_cell_queries = [] + for _, mask in masks.items(): + # drop the name of the mask + mask = mask.droplevel(0, axis=1) + # identify level names for rows and columns + index_level_names = mask.index.names + column_level_names = mask.columns.names + + # iterate through the masks to identify the true cells and extract queries + for column_level_values in mask.columns: + for index_level_values in mask.index: + if ( + mask.loc[index_level_values, column_level_values] + # == True + ): + if isinstance(index_level_values, tuple): + index_query = " & ".join( + [ + f"({level} == {val})" + if isinstance(val, (int, float)) + else f'({level} == "{val}")' + for level, val in zip( + index_level_names, index_level_values + ) + ] + ) + else: + index_query = " & ".join( + [ + f"({index_level_names} == {index_level_values})" + if isinstance( + index_level_values, (int, float) + ) + else ( + f"({index_level_names}" + f'== "{index_level_values}")' + ) + ] + ) + if isinstance(column_level_values, tuple): + column_query = " & ".join( + [ + f"({level} == {val})" + if isinstance(val, (int, float)) + else f'({level} == "{val}")' + for level, val in zip( + column_level_names, column_level_values + ) + ] + ) + else: + column_query = " & ".join( + [ + f"({column_level_names} == {column_level_values})" + if isinstance( + column_level_values, (int, float) + ) + else ( + f"({column_level_names}" + f'== "{column_level_values}")' + ) + ] + ) + query = f"{index_query} & {column_query}" + true_cell_queries.append(query) + + # delete the duplication + true_cell_queries = list(set(true_cell_queries)) + + # create dataframe from the index and columns parameters + if isinstance(index, list): + index_df = pd.concat(index, axis=1) + elif isinstance(index, pd.Series): + index_df = pd.DataFrame({index.name: index}) + if isinstance(columns, list): + columns_df = pd.concat(columns, axis=1) + elif isinstance(columns, pd.Series): + columns_df = pd.DataFrame({columns.name: columns}) + data = pd.concat([index_df, columns_df], axis=1) + + # apply the queries to the data + for query in true_cell_queries: + query = str(query).replace("['", "").replace("']", "") + data = data.query(f"not ({query})") + + # get the index and columns from the data after the queries are applied + try: + if isinstance(index, list): + index_new = [] + for _, val in enumerate(index): + index_new.append(data[val.name]) + else: + index_new = data[index.name] + + if isinstance(columns, list): + columns_new = [] + for _, val in enumerate(columns): + columns_new.append(data[val.name]) + else: + columns_new = data[columns.name] + + # apply the crosstab with the new index and columns + table = pd.crosstab( # type: ignore + index_new, + columns_new, values=values, rownames=rownames, colnames=colnames, - aggfunc="count", + aggfunc=aggfunc, margins=margins, margins_name=margins_name, dropna=dropna, normalize=normalize, ) - count_table = count_table.where(table.notna(), other=np.nan) - columns_to_keep = table.columns - count_table = count_table[columns_to_keep] - if not isinstance( - count_table.columns, pd.MultiIndex - ) and not isinstance(count_table.index, pd.MultiIndex): - count_table = count_table.drop(margins_name, axis=1) - count_table.loc[:, margins_name] = count_table.sum(axis=1) - count_table = count_table.drop(margins_name, axis=0) - count_table.loc[(margins_name)] = count_table.sum(axis=0) - table[margins_name] = 1 - table.loc[margins_name, :] = 1 - multip_table = count_table * table - table[margins_name] = ( - multip_table.drop(margins_name, axis=1).sum(axis=1) - / multip_table[margins_name] - ) - table.loc[margins_name, :] = ( - multip_table.drop(margins_name, axis=0).sum() - / multip_table.loc[margins_name, :] - ) - table.loc[margins_name, margins_name] = ( - multip_table.drop(index=margins_name, columns=margins_name) - .sum() - .sum() - ) / multip_table.loc[margins_name, margins_name] - - if isinstance(count_table.columns, pd.MultiIndex) and isinstance( - count_table.index, pd.MultiIndex - ): # multidimensional columns and rows - count_table = count_table.drop(margins_name, axis=1, level=0) - count_table.loc[:, margins_name] = count_table.sum(axis=1) - count_table = count_table.drop(margins_name, axis=0) - count_table.loc[(margins_name, ""), :] = count_table.sum(axis=0) - table[margins_name] = 1 - table.loc[margins_name, :] = 1 - multip_table = count_table * table - table[margins_name] = ( - multip_table.drop(margins_name, axis=1, level=0).sum(axis=1) - / multip_table[margins_name] - ) - table.loc[(margins_name, ""), :] = ( - multip_table.drop(margins_name, axis=0).sum() - / multip_table.loc[(margins_name, ""), :] - ) - table.loc[margins_name, margins_name] = ( - multip_table.drop(index=margins_name, columns=margins_name) - .sum() - .sum() - ) / multip_table.loc[margins_name, margins_name][0] - - if isinstance( - count_table.columns, pd.MultiIndex - ) and not isinstance( - count_table.index, pd.MultiIndex - ): # multidimensional columns - count_table = count_table.drop(margins_name, axis=1, level=0) - count_table.loc[:, margins_name] = count_table.sum(axis=1) - count_table = count_table.drop(margins_name, axis=0) - count_table.loc[(margins_name)] = count_table.sum(axis=0) - table[margins_name] = 1 - table.loc[margins_name, :] = 1 - multip_table = count_table * table - table[margins_name] = ( - multip_table.drop(margins_name, axis=1, level=0).sum(axis=1) - / multip_table[margins_name] - ) - table.loc[margins_name, :] = ( - multip_table.drop(margins_name, axis=0).sum() - / multip_table.loc[margins_name, :] - ) - table.loc[margins_name, margins_name] = ( - multip_table.drop(index=margins_name, columns=margins_name) - .sum() - .sum() - ) / multip_table.loc[margins_name, margins_name][0] - - if isinstance(count_table.index, pd.MultiIndex) and not isinstance( - count_table.columns, pd.MultiIndex - ): # multidimensional rows - count_table = count_table.where(table.notna(), other=np.nan) - columns_to_keep = table.columns - count_table = count_table[columns_to_keep] - count_table = count_table.drop(margins_name, axis=1) - count_table.loc[:, margins_name] = count_table.sum(axis=1) - count_table = count_table.drop(margins_name, axis=0) - count_table.loc[(margins_name, ""), :] = count_table.sum(axis=0) - table[margins_name] = 1 - table.loc[margins_name, :] = 1 - multip_table = count_table * table - table[margins_name] = ( - multip_table.drop(margins_name, axis=1).sum(axis=1) - / multip_table[margins_name] - ) - table.loc[(margins_name, ""), :] = ( - multip_table.drop(margins_name, axis=0).sum() - / multip_table.loc[(margins_name, ""), :] - ) - table.loc[margins_name, margins_name] = ( - multip_table.drop(index=margins_name, columns=margins_name) - .sum() - .sum() - ) / multip_table.loc[margins_name, margins_name][0] + except ValueError: + logger.info( + "All the cells in this data are discolsive." + " Thus suppression can not be applied" + ) + return None # record output self.results.add( @@ -652,6 +645,7 @@ def plot( # pylint: disable=too-many-arguments,too-many-locals ) return plot + def rounded_survival_table(survival_table): """Calculates the rounded surival function.""" death_censored = ( @@ -695,6 +689,7 @@ def rounded_survival_table(survival_table): survival_table["rounded_survival_fun"] = rounded_survival_func return survival_table + def get_aggfunc(aggfunc: str | None) -> str | None: """Checks whether an aggregation function is allowed and returns the appropriate function. @@ -724,6 +719,7 @@ def get_aggfunc(aggfunc: str | None) -> str | None: logger.debug("aggfunc: %s", func) return func + def get_aggfuncs( aggfuncs: str | list[str] | None, ) -> str | list[str] | None: @@ -760,6 +756,7 @@ def get_aggfuncs( return functions raise ValueError("aggfuncs must be: either str or list[str]") # pragma: no cover + def agg_negative(vals: Series) -> bool: """Aggregation function that returns whether any values are negative. @@ -775,6 +772,7 @@ def agg_negative(vals: Series) -> bool: """ return vals.min() < 0 + def agg_missing(vals: Series) -> bool: """Aggregation function that returns whether any values are missing. @@ -790,6 +788,7 @@ def agg_missing(vals: Series) -> bool: """ return vals.isna().sum() != 0 + def agg_p_percent(vals: Series) -> bool: """Aggregation function that returns whether the p percent rule is violated. @@ -823,6 +822,7 @@ def agg_p_percent(vals: Series) -> bool: p_val: float = sub_total / sorted_vals.iloc[0] if total > 0 else 1 return p_val < SAFE_PRATIO_P + def agg_nk(vals: Series) -> bool: """Aggregation function that returns whether the top n items account for more than k percent of the total. @@ -844,6 +844,7 @@ def agg_nk(vals: Series) -> bool: return (n_total / total) > SAFE_NK_K return False + def agg_threshold(vals: Series) -> bool: """Aggregation function that returns whether the number of contributors is below a threshold. @@ -860,6 +861,7 @@ def agg_threshold(vals: Series) -> bool: """ return vals.count() < THRESHOLD + def apply_suppression( table: DataFrame, masks: dict[str, DataFrame] ) -> tuple[DataFrame, DataFrame]: @@ -913,6 +915,7 @@ def apply_suppression( logger.info("outcome_df:\n%s", utils.prettify_table_string(outcome_df)) return safe_df, outcome_df + def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict: """Returns the SDC dictionary using the suppression masks. @@ -945,6 +948,7 @@ def get_table_sdc(masks: dict[str, DataFrame], suppress: bool) -> dict: sdc["cells"][name].append([int(row_index), int(col_index)]) return sdc + def get_summary(sdc: dict) -> tuple[str, str]: """Returns the status and summary of the suppression masks. diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index a0b957e..6547872 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "e33fd4fb", "metadata": {}, "outputs": [], @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "c01cfe12", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "cc8d993a", "metadata": { "scrolled": true @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "4b8a77e2", "metadata": {}, "outputs": [ @@ -62,7 +62,7 @@ "output_type": "stream", "text": [ "INFO:acro:version: 0.4.3\n", - "INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10}\n", + "INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10, 'zeros_are_disclosive': True}\n", "INFO:acro:automatic suppression: False\n" ] } @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "8722735f", "metadata": { "scrolled": true @@ -296,7 +296,7 @@ "[5 rows x 44 columns]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -317,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "961684cb", "metadata": {}, "outputs": [ @@ -413,7 +413,7 @@ "2015 15 59 71 8" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "bb4b2677", "metadata": { "scrolled": true @@ -552,7 +552,7 @@ "2015 15 59 71 8" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -562,6 +562,58 @@ "safe_table" ] }, + { + "cell_type": "markdown", + "id": "ea5587ff", + "metadata": {}, + "source": [ + "### ACRO crosstab with supression and totals" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dc2de220", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:get_summary(): fail; threshold: 1071 cells suppressed; p-ratio: 918 cells suppressed; nk-rule: 815 cells suppressed; \n", + "INFO:acro:outcome_df:\n", + "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "charity |4||||Children | AD|FAM |||National | | AGE Cymru | |||Ac|t|||ion ||for A-T ||| | Age Exchange | | || | | | | || || |Age ||UK B|l||ack|pool|| and dist|rict Age U|K: |Runnym|||ede a|nd Spelthorn|e| Al M|a|dad Foundation | Alchemy Fo|undati|on | Alzheim|er's Society | Amos |Founda|tio|n | | | | | || ||||| | A|nt|ho|ny||||| N|||||||o||l|an Trust A|pples and Snakes Ar|thritis care As|thma UK Au|tistica | | | Ava| | Bat|tersea |dogs and cats ho||me Be|de||'|s|| Wo||rld | |Bla|con Community Tr|us|t || | British Association fo|r Adoption and Foster|ing British| Bee Keepers Association British |Deer| Society |British| Heart F|oundation ||Brit||is|||h Re|d Cros|s|||| || | || || | Bro|ken Rainbo||w (Dead|) Brushmill Li|mited | Bumblebee Con|servation Trus|t | ||| CYF | | | | | | | Cancer R|esearch U|K | |Carers Trust S|wansea | | || Carla Lan|e Animals in N|eed Catholic Institute for interna|tional| relations Change Agents UK Charity| Ch|anges | |Child Brain Injury |Trust | Children and Famili|es Across Borders Children in ne|ed Christian Aid | Civitas Limited | Collection Trust | | Community Music |L||imited Community matters| Cot|swold| Friends | |Cr|isi|s UK | |Crossroads Care East An|glia (D|ead) Cumb|ria Rural Housi|ng Tr|ust | C|ycling Pr|ojects | | Cystic fibrosis |t|rust Debra | |Deer Init|ia|tive |Limited Dogs Trust | | Dogs for the disa|bled Drugscope (Dead) | E.I.L Limited | Eaves (Dead) | | Eden Trust | Edupoor| Limited | | Emmau|s Mossley | English |national ballet Essex Di|sabled Peo|ple's Association LTD |(Dead) Everychild | Fairf|ield Croydon (Dead) | Fair|wood Trust | | | |Fisherbeck Charitable Trust F|ood|cycle Forev|er Houn|ds Trust | Friend o|f the |Animals | Friends of| the Earth Trust | Gardeni|ng |Leave Ltd (Dead) | Hawk|spring (dead) |Headway Dor|set | H|edge Funds| Care UK He|len and Douglas House He|lp for Heroes Hen|ry | | Hi||scox |Foundation | | H|ome Start Bury | Hop|e Now| Limited Hull and E|ast Yorks| Comm|unity fund| Human |Research Trust Hyndbur|n Us|ed Fur|niture Store IdeasTap| (Dea|d) Key house| project |(Dead) Kids Compan|y (Dead) Kidscape Ca|mpaign fo|r Chil|dren La|ndlife | | Learning S|outh West | | Living Spr|ings | Macmillian C|ancer S|upport Ma|rie Curie C|ancer Care Meeting point| trust limited Mencap | Milestones Tru|st | Mind | | NAM Publication|s NHS Charitable |fund|s NSPCC | N|WG Network National Kidney Federation | Natio|nal Trust | | Natio|nal playbus ass|ociation Northern refugee cent|re (Dea|d) OMID Foundation | | Oakley regenera|tion | Ocean| Youth Trust South | Order of St John | PAC|E (Dead) | | Panthe|ra Wi|ldlife Trust Limi|ted Por|tmouth mi|nd ltd (Dead) | Prison Reform Trust | Prospects | RICE | RNLI | RSPB | R|SPCA | | Rare Br|eed |Survival Trust | Redeeming Our Communities | Respond | Revere Char|itable Trust |Riders for Health | R|iversi|de Animal C|entre R|oyal Forestry Society S|amaritans San|dra Charit|able Trust Save| the children Scil|l Scop|e Skid|z Somers|et Wildlife trust Space Youth |Project St Monica's Tr|ust | St Peter's |Hospice | S|tarlight ch|ildr|en's foundation Sue Ryder | Tee|nage Canc|e|r|| |T|ru||s|t| The Anchor| Trus|t The Mary Ros|e Trust The Prince's T|rust | The Rowan Arts Project T|he Royal British L|egion | The Salvation Army | The Sobell Foundation | The Vine Project | The Vivat Trust Ltd Tru|st for Sustainable Liv|ing Volunteer| and commu|nity ac|tion WWF | Wal|sall dome|stic viol|ence forum |ltd W|ill Woodlands | Worcestersh|ire Lifestyles (Dea|d) Yorkshire an|d Humbe|r peopl|e unite|d agains|t crime age UK |All|\n", + "year | |||| | | ||| | | | ||| | ||| || ||| | | | || | | | | || || | || | || | || | | | ||| | | | | | | | | | | | | | | | | | | | || ||||| | | | | ||||| ||||||| || | | | | | | | | | | | | || | || | || || | | | | | || | | | | | | | | | || || ||| | | |||| || | || || | | || | | | | | | ||| | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | || | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |\n", + "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "2010 | ||||threshold; p-ratio; nk-rul|e; | ||| threshold; p-r|atio|; threshold; p-r|atio; ||| | ||| || thresho|||ld|; p-ratio; threshold;| |p||-|rat|i|o|;|| || | || | || th|resh||old; p-ra|tio; | | thre|||shold|; p-ratio; | | | | threshold; p-ratio;| |thresh|old; p-ratio|; | threshold; p-ratio|; | t|hre|shol|d|; p|-r|a|t||i|||||o|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; | threshold; p-ratio; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; | th|reshold; p|-rati|o; | |threshold; p-ratio; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | | threshold; p-ratio; | th|reshold; p-ratio; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; th|reshold|; p-ratio; thr|eshold; p-rati|o;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| | threshold; p-|ratio; threshold; p-ratio; nk-rule; | | threshold; p-r|atio; | | threshold; p-ratio; | threshold|; p-ratio;| threshold|; p-ratio; threshold; p-|ratio; nk-rule; threshold; p-rat|io; threshold; p-ratio|; threshol|d; p-rati|o; thresh|o||ld; p-ratio; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | | |threshold; |p-ratio;| thresh|o|ld; p-ratio; threshold; p-ratio; nk-rul|e; | | t|hresh|old; p-ratio; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold|; p-ratio; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; | threshold; |p-rati|o; | threshold; p-rati|o; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; | | threshold; p-ratio; | th|reshold; p-ratio;| | thre|shold; p-ratio; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; | t|hreshold; p-ratio; | threshold; p-ratio; t|hreshold; p-ratio; nk-rule; | | thre|shold; p-ratio; || | threshol|d|; p-ratio; |threshold; p-rat|io; nk-rule; | | threshold; p-ratio; threshold|; p-ratio|; nk-|rule; | | threshold; p-ratio; thresh|old;| p-rat|io; nk-rule; | thr|eshold; p-ratio; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; t|hreshold;| p-rat|io; | |thresho|ld; p-ratio; threshold|; p-ratio; nk-rul|e;| |thresh|old; p-ratio; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; thr|eshold; p-ratio; threshold; p-|ratio; nk-rule; thre|shol|d; p-ratio; threshold; p-|ratio; nk-rule; | thres|hold; p-ratio; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; | threshold; p-ratio; threshold; p-ratio|; thre|shold; p-ratio|; nk-rule|; | threshold;| p-ratio; threshold; p-ratio; |nk-rule|; threshold; |p-rati|o; threshold; p-r|atio; n|k-rule; | threshold; p-r|atio; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; | t|hreshold; p-ratio|; th|reshold; |p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; | t|hreshold; p-ratio|; threshold; p-ratio|; threshold; p-ratio; | t|hreshold; p-ratio; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; | threshold; p-ratio; |threshold; p-ratio; nk-rule; | thr|eshold; p-ratio; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; | threshold; p-ratio; th|reshold; p-ratio; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; | th|resh|old; p-ratio; t|hreshold; p-ratio; th|reshold; |p|-||r|a|ti||o|;| nk-rule; |thres|hold; p-ratio; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; | threshold; p-ra|tio; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | | threshold; p-rati|o; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", + "2011 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| | ||| || thresho|||ld|; p-ratio; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | || | || th|resh||old; p-ra|tio; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; | | threshold; p-ratio; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", + "2012 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | || | || th|resh||old; p-ra|tio; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; | | threshold; p-ratio; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", + "2013 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", + "2014 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| |thre|s|hol|d|; p-ratio;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; t|hreshold; p-ratio; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | th|reshold|; p-rat|io; | | threshold; p-ratio; nk-rule; | ok|\n", + "2015 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-r|atio; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | | threshold|; |p-r||a|tio; threshold; p|-ratio; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; | thr||eshold;| p-ratio; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| |thre|s|hol|d|; p-ratio;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-|ratio; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshol|d; p-ratio; threshold; p-ratio|; nk-rule; threshold;| p-rati|o; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | | |threshold; p-ratio;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; |threshold|; p-r|atio; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; t|hreshold; p-ratio; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; th|reshold;| p-ratio; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; |p-ratio|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ra|tio; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; | | threshold; p-ratio|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshol|d; p-ratio; | | threshol|d; p-ratio; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; | threshold; p-ratio; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; thre|shold; p|-ratio; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-r|atio; threshold; p-ratio; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | th|reshold|; p-rat|io; | | threshold; p-ratio; nk-rule; | ok|\n", + "All | |||| threshol|d; | ||| thres|hold|; thres|hold; ||| | ||| || ||| |threshold; t|h|r||e|sho|l|d|;|| || | || | || | || thresh|old; | | ||| |threshold; | | | | threshold;| | | threshold|; | threshold|; | | | | |thr|es|h|o||l|||||d|; | | | ||||| ||||||| || | threshold; | threshold; | threshold; | threshold; | | thr|eshol|d; | | threshold; | | thresho||ld; | || | || || th|reshold; | | |th|res||h|old; thr|eshold; | | threshold; | | threshold; | | | threshold; || || ||| | | |||| t||hre|s||ho||l|d; | || t|hreshold; | |threshold; | threshol|d;| ||| | | | | |threshold;| | | | t|hreshold; | | thres|h|old;|| | thre|shold; threshold; | | thres|hold; | | threshold; | |threshold;| |threshold; | threshold; thresho|ld; threshold|; | threshol|d; | || threshold; | threshold; | | thre|shold; | | | thre|shold; | thre|shold; | | |thres|hold; | | | th|reshold;| | | threshold; threshol|d; | | | | threshold; | th|reshold; | threshold; | threshold; |threshold; t|hreshol|d; t|hreshold; | th|reshol|d; | threshol|d; | threshold; | | threshold; | thresh|old; | thresho|ld; | | th|re|shold; | threshold; | | threshold; | | threshold;| | | threshold; | | threshold; | | | threshold;| | threshold; | | |threshold; | | threshold; | threshold; | threshold; | | | threshold; || | | |threshold; | | threshold; | | threshold; | |thres|hold; | | threshold; | | | threshold; | | threshold; | | threshold; | threshold; | t|hresho|ld; | | | threshold; | threshol|d;| | | threshold; | t|hreshold; | | threshold; | threshold; | threshold; | | threshold; | threshold; | | threshold; | |threshold; thresh|old; | threshold; threshold|; | |threshold|; | t|hreshold; th|reshold|; th|reshol|d; | thr|eshold; | thres|hold; thresh|old; | | threshol|d; | | threshold|; | | threshol|d; threshol|d; threshol|d; threshol|d; threshol|d; threshol|d; | | threshold|; | | threshold|; threshold|; threshold; | | threshold; | | threshold; | | | threshold; | threshold; | threshold; | | threshold; | threshold; | threshold; | threshold; | threshold; | threshold; | threshold; | thr|eshold; | th|reshold; | | | threshold; | threshold; | | | || | | || |t|hreshold; | | threshold; | threshold; | t|hreshold; threshold; | |threshold;| t|hreshold; thr|eshold; thres|hold; threshold; | thresh|old; | |thresho|ld; thresho|ld; | | th|reshold; | | threshol|d; | threshold; | | |thresho|ld; | | threshold; | ok|\n", + "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "\n", + "INFO:acro:All the cells in this data are discolsive. Thus suppression can not be applied\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "acro.suppress = True\n", + "table = acro.crosstab(\n", + " df.year, df.grant_type, values=df.inc_total, aggfunc=\"mean\", margins=all\n", + ")\n", + "print(table)" + ] + }, { "cell_type": "markdown", "id": "6d4730c4", @@ -572,7 +624,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "37ddb939", "metadata": {}, "outputs": [ @@ -580,21 +632,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 6 cells suppressed; p-ratio: 1 cells suppressed; nk-rule: 1 cells suppressed; \n", + "INFO:acro:get_summary(): fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", "INFO:acro:outcome_df:\n", - "---------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "---------------------------------------------------------|\n", - "2010 | ok | ok | ok | threshold; p-ratio; nk-rule; |\n", - "2011 | ok | ok | ok | threshold; |\n", - "2012 | ok | ok | ok | threshold; |\n", - "2013 | ok | ok | ok | threshold; |\n", - "2014 | ok | ok | ok | threshold; |\n", - "2015 | ok | ok | ok | threshold; |\n", - "---------------------------------------------------------|\n", + "---------------------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |\n", + "year | | | | |\n", + "---------------------------------------------------------------------------|\n", + "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", + "2011 | ok | ok | ok | threshold; |\n", + "2012 | ok | ok | ok | threshold; |\n", + "2013 | ok | ok | ok | threshold; |\n", + "2014 | ok | ok | ok | threshold; |\n", + "2015 | ok | ok | ok | threshold; |\n", + "---------------------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_1\n" + "INFO:acro:records:add(): output_2\n" ] }, { @@ -635,7 +687,7 @@ " \n", " 2010\n", " 9921906.0\n", - " 0.000000\n", + " NaN\n", " 8402284.0\n", " NaN\n", " \n", @@ -681,7 +733,7 @@ "text/plain": [ "grant_type G N R R/G\n", "year \n", - "2010 9921906.0 0.000000 8402284.0 NaN\n", + "2010 9921906.0 NaN 8402284.0 NaN\n", "2011 8502247.0 124013.859375 7716880.0 NaN\n", "2012 11458580.0 131859.062500 6958050.5 NaN\n", "2013 13557147.0 147937.796875 7202273.5 NaN\n", @@ -689,7 +741,7 @@ "2015 11133433.0 146572.187500 10812888.0 NaN" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -703,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "506135e0", "metadata": {}, "outputs": [], @@ -721,7 +773,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "4ae844a0", "metadata": {}, "outputs": [ @@ -729,21 +781,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 6 cells may need suppressing; p-ratio: 1 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", + "INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", - "---------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "---------------------------------------------------------|\n", - "2010 | ok | ok | ok | threshold; p-ratio; nk-rule; |\n", - "2011 | ok | ok | ok | threshold; |\n", - "2012 | ok | ok | ok | threshold; |\n", - "2013 | ok | ok | ok | threshold; |\n", - "2014 | ok | ok | ok | threshold; |\n", - "2015 | ok | ok | ok | threshold; |\n", - "---------------------------------------------------------|\n", + "---------------------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |\n", + "year | | | | |\n", + "---------------------------------------------------------------------------|\n", + "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", + "2011 | ok | ok | ok | threshold; |\n", + "2012 | ok | ok | ok | threshold; |\n", + "2013 | ok | ok | ok | threshold; |\n", + "2014 | ok | ok | ok | threshold; |\n", + "2015 | ok | ok | ok | threshold; |\n", + "---------------------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_2\n" + "INFO:acro:records:add(): output_3\n" ] }, { @@ -838,7 +890,7 @@ "2015 11133433.0 146572.187500 10812888.0 18278624.0" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -858,7 +910,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "fb7abfc9-e428-4b71-9066-01ac9a08d655", "metadata": {}, "outputs": [ @@ -866,214 +918,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 2 cells may need suppressing; \n", - "INFO:acro:outcome_df:\n", - "----------------------------------------------------------------------------------------------------------|\n", - " mean |std |\n", - "grant_type G N R R/G All |G N R R/G All|\n", - "year | |\n", - "----------------------------------------------------------------------------------------------------------|\n", - "2010 ok ok ok threshold; p-ratio; nk-rule; ok | ok ok ok threshold; p-ratio; nk-rule; ok|\n", - "2011 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", - "2012 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", - "2013 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", - "2014 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", - "2015 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", - "All ok ok ok ok ok | ok ok ok ok ok|\n", - "----------------------------------------------------------------------------------------------------------|\n", - "\n", - "INFO:acro:records:add(): output_3\n" + "c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:169: PerformanceWarning: indexing past lexsort depth may impact performance.\n", + " if t_values[col].sum() == 0:\n" ] }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.01.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502247.0124013.8593757716880.016047500.05303808.01.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.02.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.517845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.403848e+07
\n", - "
" - ], - "text/plain": [ - " mean \\\n", - "grant_type G N R R/G All \n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.0 \n", - "2011 8502247.0 124013.859375 7716880.0 16047500.0 5303808.0 \n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.0 \n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", - "2014 13748147.0 133198.250000 8277525.5 17845750.0 6117054.5 \n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", - "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", - "\n", - " std \\\n", - "grant_type G N R R/G \n", - "year \n", - "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", - "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", - "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", - "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", - "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", - "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", - "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", - "\n", - " \n", - "grant_type All \n", - "year \n", - "2010 2.727398e+07 \n", - "2011 2.137658e+07 \n", - "2012 2.026400e+07 \n", - "2013 2.251787e+07 \n", - "2014 2.641722e+07 \n", - "2015 2.784636e+07 \n", - "All 2.403848e+07 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "ename": "ValueError", + "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 21\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m safe_table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 2\u001b[0m df\u001b[39m.\u001b[39;49myear, df\u001b[39m.\u001b[39;49mgrant_type, values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants, aggfunc\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mmean\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstd\u001b[39;49m\u001b[39m\"\u001b[39;49m], margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m safe_table\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:169\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize)\u001b[0m\n\u001b[0;32m 167\u001b[0m \u001b[39mif\u001b[39;00m dropna \u001b[39mor\u001b[39;00m margins:\n\u001b[0;32m 168\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m t_values\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m--> 169\u001b[0m \u001b[39mif\u001b[39;00m t_values[col]\u001b[39m.\u001b[39;49msum() \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m:\n\u001b[0;32m 170\u001b[0m t_values \u001b[39m=\u001b[39m t_values\u001b[39m.\u001b[39mdrop(col, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 171\u001b[0m t_values \u001b[39m=\u001b[39m t_values \u001b[39m<\u001b[39m THRESHOLD\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\generic.py:1527\u001b[0m, in \u001b[0;36mNDFrame.__nonzero__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1525\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[0;32m 1526\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__nonzero__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1528\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe truth value of a \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m is ambiguous. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1529\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mUse a.empty, a.bool(), a.item(), a.any() or a.all().\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1530\u001b[0m )\n", + "\u001b[1;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ] } ], "source": [ @@ -1093,7 +953,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "bf132239", "metadata": {}, "outputs": [ @@ -1101,19 +961,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 6 cells may need suppressing; p-ratio: 1 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", + "INFO:acro:get_summary(): review; missing values found\n", "INFO:acro:outcome_df:\n", - "---------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "---------------------------------------------------------|\n", - "2010 | ok | ok | ok | threshold; p-ratio; nk-rule; |\n", - "2011 | ok | ok | ok | threshold; |\n", - "2012 | ok | ok | ok | threshold; |\n", - "2013 | ok | ok | ok | threshold; |\n", - "2014 | ok | ok | ok | threshold; |\n", - "2015 | ok | ok | ok | threshold; |\n", - "---------------------------------------------------------|\n", + "--------------------------------------------------|\n", + "grant_type |G |N |R |R/G |\n", + "year | | | | |\n", + "--------------------------------------------------|\n", + "2010 | missing | missing | missing | missing|\n", + "2011 | | missing | missing | |\n", + "2012 | | | missing | |\n", + "2013 | | missing | missing | |\n", + "2014 | | missing | missing | |\n", + "2015 | missing | missing | missing | |\n", + "--------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_4\n" ] @@ -1210,7 +1070,7 @@ "2015 11133433.0 149143.625000 10596385.0 18278624.0" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1227,7 +1087,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "7cc417a0", "metadata": {}, "outputs": [], @@ -1245,7 +1105,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "15bcdc7c", "metadata": {}, "outputs": [ @@ -1314,7 +1174,7 @@ " \n", " \n", " 2011\n", - " 8502246.0\n", + " 8502247.0\n", " 123496.445312\n", " 7577703.5\n", " 16047500.0\n", @@ -1330,21 +1190,21 @@ " 2013\n", " 13557147.0\n", " 147937.625000\n", - " 6988263.0\n", + " 6988263.5\n", " 16765625.0\n", " \n", " \n", " 2014\n", " 13748147.0\n", " 133198.078125\n", - " 7997392.0\n", + " 7997392.5\n", " 17845750.0\n", " \n", " \n", " 2015\n", " 11133433.0\n", " 146572.015625\n", - " 10388612.0\n", + " 10388613.0\n", " 18278624.0\n", " \n", " \n", @@ -1355,14 +1215,14 @@ "grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8280032.5 11636000.0\n", - "2011 8502246.0 123496.445312 7577703.5 16047500.0\n", + "2011 8502247.0 123496.445312 7577703.5 16047500.0\n", "2012 11458580.0 131859.062500 6796357.5 16810000.0\n", - "2013 13557147.0 147937.625000 6988263.0 16765625.0\n", - "2014 13748147.0 133198.078125 7997392.0 17845750.0\n", - "2015 11133433.0 146572.015625 10388612.0 18278624.0" + "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", + "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", + "2015 11133433.0 146572.015625 10388613.0 18278624.0" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1385,7 +1245,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "6d4730c4", "metadata": {}, "outputs": [ @@ -1393,17 +1253,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): pass\n", + "INFO:acro:get_summary(): review; missing values found\n", "INFO:acro:outcome_df:\n", "---------------------------------|\n", " mean |std |\n", " inc_grants |inc_grants|\n", "grant_type | |\n", "---------------------------------|\n", - "G ok | ok |\n", - "N ok | ok |\n", - "R ok | ok |\n", - "R/G ok | ok |\n", + "G missing | missing |\n", + "N missing | missing |\n", + "R missing | missing |\n", + "R/G missing | missing |\n", "---------------------------------|\n", "\n", "INFO:acro:records:add(): output_6\n" @@ -1483,7 +1343,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1505,7 +1365,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "f3a87c20", "metadata": {}, "outputs": [ @@ -1581,7 +1441,7 @@ " \n", " \n", " R\n", - " 8.006361e+06\n", + " 8.006360e+06\n", " 3.228216e+07\n", " \n", " \n", @@ -1599,11 +1459,11 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", - "R 8.006361e+06 3.228216e+07\n", + "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1621,7 +1481,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "8b603548", "metadata": {}, "outputs": [], @@ -1639,7 +1499,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "outputs": [ @@ -1715,7 +1575,7 @@ " \n", " \n", " R\n", - " 7.882230e+06\n", + " 7.882231e+06\n", " 3.204558e+07\n", " \n", " \n", @@ -1733,11 +1593,11 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", - "R 7.882230e+06 3.204558e+07\n", + "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1761,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "a521cb83", "metadata": {}, "outputs": [ @@ -1769,7 +1629,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n", + "INFO:acro:ols() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "INFO:acro:records:add(): output_9\n" ] }, @@ -1788,10 +1654,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.00 \n", + " Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 14:16:00 Log-Likelihood: -14495. \n", + " Time: 10:33:12 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1838,45 +1704,6 @@ "\n", "

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems." ], - "text/latex": [ - "\\begin{center}\n", - "\\begin{tabular}{lclc}\n", - "\\toprule\n", - "\\textbf{Dep. Variable:} & inc\\_activity & \\textbf{ R-squared: } & 0.894 \\\\\n", - "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.893 \\\\\n", - "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 2261. \\\\\n", - "\\textbf{Date:} & Mon, 14 Aug 2023 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n", - "\\textbf{Time:} & 14:16:00 & \\textbf{ Log-Likelihood: } & -14495. \\\\\n", - "\\textbf{No. Observations:} & 811 & \\textbf{ AIC: } & 2.900e+04 \\\\\n", - "\\textbf{Df Residuals:} & 807 & \\textbf{ BIC: } & 2.902e+04 \\\\\n", - "\\textbf{Df Model:} & 3 & \\textbf{ } & \\\\\n", - "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lcccccc}\n", - " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", - "\\midrule\n", - "\\textbf{const} & 3.01e+05 & 5.33e+05 & 0.565 & 0.572 & -7.45e+05 & 1.35e+06 \\\\\n", - "\\textbf{inc\\_grants} & -0.8846 & 0.025 & -35.956 & 0.000 & -0.933 & -0.836 \\\\\n", - "\\textbf{inc\\_donations} & -0.6647 & 0.016 & -40.721 & 0.000 & -0.697 & -0.633 \\\\\n", - "\\textbf{total\\_costs} & 0.8313 & 0.011 & 78.674 & 0.000 & 0.811 & 0.852 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lclc}\n", - "\\textbf{Omnibus:} & 1339.956 & \\textbf{ Durbin-Watson: } & 1.414 \\\\\n", - "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 1253317.706 \\\\\n", - "\\textbf{Skew:} & 9.899 & \\textbf{ Prob(JB): } & 0.00 \\\\\n", - "\\textbf{Kurtosis:} & 194.566 & \\textbf{ Cond. No. } & 1.05e+08 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "%\\caption{OLS Regression Results}\n", - "\\end{center}\n", - "\n", - "Notes: \\newline\n", - " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n", - " [2] The condition number is large, 1.05e+08. This might indicate that there are \\newline\n", - " strong multicollinearity or other numerical problems." - ], "text/plain": [ "\n", "\"\"\"\n", @@ -1885,8 +1712,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.00\n", - "Time: 14:16:00 Log-Likelihood: -14495.\n", + "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00\n", + "Time: 10:33:12 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -1912,7 +1739,7 @@ "\"\"\"" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1939,7 +1766,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "cc90f7c9", "metadata": {}, "outputs": [ @@ -1947,8 +1774,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", - "INFO:acro:records:add(): output_10\n" + "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_11\n" ] }, { @@ -1966,10 +1799,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.00 \n", + " Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 14:16:09 Log-Likelihood: -14495. \n", + " Time: 10:33:21 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -2016,45 +1849,6 @@ "\n", "

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems." ], - "text/latex": [ - "\\begin{center}\n", - "\\begin{tabular}{lclc}\n", - "\\toprule\n", - "\\textbf{Dep. Variable:} & inc\\_activity & \\textbf{ R-squared: } & 0.894 \\\\\n", - "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.893 \\\\\n", - "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 2261. \\\\\n", - "\\textbf{Date:} & Mon, 14 Aug 2023 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n", - "\\textbf{Time:} & 14:16:09 & \\textbf{ Log-Likelihood: } & -14495. \\\\\n", - "\\textbf{No. Observations:} & 811 & \\textbf{ AIC: } & 2.900e+04 \\\\\n", - "\\textbf{Df Residuals:} & 807 & \\textbf{ BIC: } & 2.902e+04 \\\\\n", - "\\textbf{Df Model:} & 3 & \\textbf{ } & \\\\\n", - "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lcccccc}\n", - " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", - "\\midrule\n", - "\\textbf{Intercept} & 3.01e+05 & 5.33e+05 & 0.565 & 0.572 & -7.45e+05 & 1.35e+06 \\\\\n", - "\\textbf{inc\\_grants} & -0.8846 & 0.025 & -35.956 & 0.000 & -0.933 & -0.836 \\\\\n", - "\\textbf{inc\\_donations} & -0.6647 & 0.016 & -40.721 & 0.000 & -0.697 & -0.633 \\\\\n", - "\\textbf{total\\_costs} & 0.8313 & 0.011 & 78.674 & 0.000 & 0.811 & 0.852 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lclc}\n", - "\\textbf{Omnibus:} & 1339.956 & \\textbf{ Durbin-Watson: } & 1.414 \\\\\n", - "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 1253317.706 \\\\\n", - "\\textbf{Skew:} & 9.899 & \\textbf{ Prob(JB): } & 0.00 \\\\\n", - "\\textbf{Kurtosis:} & 194.566 & \\textbf{ Cond. No. } & 1.05e+08 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "%\\caption{OLS Regression Results}\n", - "\\end{center}\n", - "\n", - "Notes: \\newline\n", - " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n", - " [2] The condition number is large, 1.05e+08. This might indicate that there are \\newline\n", - " strong multicollinearity or other numerical problems." - ], "text/plain": [ "\n", "\"\"\"\n", @@ -2063,8 +1857,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.00\n", - "Time: 14:16:09 Log-Likelihood: -14495.\n", + "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00\n", + "Time: 10:33:21 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -2090,7 +1884,7 @@ "\"\"\"" ] }, - "execution_count": 20, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2112,7 +1906,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "id": "5b1a1611", "metadata": {}, "outputs": [ @@ -2120,8 +1914,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n", - "INFO:acro:records:add(): output_11\n" + "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_12\n" ] }, { @@ -2148,10 +1948,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Mon, 14 Aug 2023 Pseudo R-squ.: 0.2140 \n", + " Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2140 \n", "\n", "\n", - " Time: 14:16:16 Log-Likelihood: -400.46 \n", + " Time: 10:33:24 Log-Likelihood: -400.46 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2181,36 +1981,6 @@ "\n", "

Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified." ], - "text/latex": [ - "\\begin{center}\n", - "\\begin{tabular}{lclc}\n", - "\\toprule\n", - "\\textbf{Dep. Variable:} & survivor & \\textbf{ No. Observations: } & 811 \\\\\n", - "\\textbf{Model:} & Probit & \\textbf{ Df Residuals: } & 806 \\\\\n", - "\\textbf{Method:} & MLE & \\textbf{ Df Model: } & 4 \\\\\n", - "\\textbf{Date:} & Mon, 14 Aug 2023 & \\textbf{ Pseudo R-squ.: } & 0.2140 \\\\\n", - "\\textbf{Time:} & 14:16:16 & \\textbf{ Log-Likelihood: } & -400.46 \\\\\n", - "\\textbf{converged:} & True & \\textbf{ LL-Null: } & -509.50 \\\\\n", - "\\textbf{Covariance Type:} & nonrobust & \\textbf{ LLR p-value: } & 4.875e-46 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lcccccc}\n", - " & \\textbf{coef} & \\textbf{std err} & \\textbf{z} & \\textbf{P$> |$z$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", - "\\midrule\n", - "\\textbf{const} & 0.0474 & 0.057 & 0.838 & 0.402 & -0.063 & 0.158 \\\\\n", - "\\textbf{inc\\_activity} & 1.836e-07 & 5.16e-08 & 3.559 & 0.000 & 8.25e-08 & 2.85e-07 \\\\\n", - "\\textbf{inc\\_grants} & 8.576e-08 & 3.9e-08 & 2.197 & 0.028 & 9.25e-09 & 1.62e-07 \\\\\n", - "\\textbf{inc\\_donations} & 2.406e-07 & 4.54e-08 & 5.297 & 0.000 & 1.52e-07 & 3.3e-07 \\\\\n", - "\\textbf{total\\_costs} & -8.644e-08 & 3.68e-08 & -2.351 & 0.019 & -1.59e-07 & -1.44e-08 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "%\\caption{Probit Regression Results}\n", - "\\end{center}\n", - "\n", - "Possibly complete quasi-separation: A fraction 0.18 of observations can be \\newline\n", - " perfectly predicted. This might indicate that there is complete \\newline\n", - " quasi-separation. In this case some parameters will not be identified." - ], "text/plain": [ "\n", "\"\"\"\n", @@ -2219,8 +1989,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Mon, 14 Aug 2023 Pseudo R-squ.: 0.2140\n", - "Time: 14:16:16 Log-Likelihood: -400.46\n", + "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2140\n", + "Time: 10:33:24 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", @@ -2239,7 +2009,7 @@ "\"\"\"" ] }, - "execution_count": 21, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2267,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "dcf30f8f", "metadata": {}, "outputs": [ @@ -2276,7 +2046,7 @@ "output_type": "stream", "text": [ "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", - "INFO:acro:records:add(): output_12\n" + "INFO:acro:records:add(): output_13\n" ] }, { @@ -2303,10 +2073,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Mon, 14 Aug 2023 Pseudo R-squ.: 0.2187 \n", + " Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2187 \n", "\n", "\n", - " Time: 14:16:20 Log-Likelihood: -398.07 \n", + " Time: 10:33:26 Log-Likelihood: -398.07 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2336,36 +2106,6 @@ "\n", "

Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified." ], - "text/latex": [ - "\\begin{center}\n", - "\\begin{tabular}{lclc}\n", - "\\toprule\n", - "\\textbf{Dep. Variable:} & survivor & \\textbf{ No. Observations: } & 811 \\\\\n", - "\\textbf{Model:} & Logit & \\textbf{ Df Residuals: } & 806 \\\\\n", - "\\textbf{Method:} & MLE & \\textbf{ Df Model: } & 4 \\\\\n", - "\\textbf{Date:} & Mon, 14 Aug 2023 & \\textbf{ Pseudo R-squ.: } & 0.2187 \\\\\n", - "\\textbf{Time:} & 14:16:20 & \\textbf{ Log-Likelihood: } & -398.07 \\\\\n", - "\\textbf{converged:} & True & \\textbf{ LL-Null: } & -509.50 \\\\\n", - "\\textbf{Covariance Type:} & nonrobust & \\textbf{ LLR p-value: } & 4.532e-47 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "\\begin{tabular}{lcccccc}\n", - " & \\textbf{coef} & \\textbf{std err} & \\textbf{z} & \\textbf{P$> |$z$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", - "\\midrule\n", - "\\textbf{const} & 0.0512 & 0.091 & 0.561 & 0.575 & -0.128 & 0.230 \\\\\n", - "\\textbf{inc\\_activity} & 2.981e-07 & 8.95e-08 & 3.330 & 0.001 & 1.23e-07 & 4.74e-07 \\\\\n", - "\\textbf{inc\\_grants} & 1.351e-07 & 6.67e-08 & 2.026 & 0.043 & 4.39e-09 & 2.66e-07 \\\\\n", - "\\textbf{inc\\_donations} & 5.123e-07 & 1.04e-07 & 4.927 & 0.000 & 3.08e-07 & 7.16e-07 \\\\\n", - "\\textbf{total\\_costs} & -1.442e-07 & 6.26e-08 & -2.304 & 0.021 & -2.67e-07 & -2.15e-08 \\\\\n", - "\\bottomrule\n", - "\\end{tabular}\n", - "%\\caption{Logit Regression Results}\n", - "\\end{center}\n", - "\n", - "Possibly complete quasi-separation: A fraction 0.18 of observations can be \\newline\n", - " perfectly predicted. This might indicate that there is complete \\newline\n", - " quasi-separation. In this case some parameters will not be identified." - ], "text/plain": [ "\n", "\"\"\"\n", @@ -2374,8 +2114,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Mon, 14 Aug 2023 Pseudo R-squ.: 0.2187\n", - "Time: 14:16:20 Log-Likelihood: -398.07\n", + "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2187\n", + "Time: 10:33:26 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", @@ -2394,7 +2134,7 @@ "\"\"\"" ] }, - "execution_count": 22, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -2414,7 +2154,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "id": "ec960039", "metadata": { "scrolled": true @@ -2447,7 +2187,7 @@ "2013 15 59 71 8\n", "2014 15 59 71 8\n", "2015 15 59 71 8]\n", - "timestamp: 2023-08-14T14:14:31.710403\n", + "timestamp: 2023-09-22T10:32:14.193937\n", "comments: []\n", "exception: \n", "\n", @@ -2455,117 +2195,82 @@ "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 6, 'p-ratio': 1, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 3]], 'nk-rule': [[0, 3]]}}\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", + "command: table = acro.crosstab(df.year,df.grant_type, values=df.inc_grants, aggfunc=\"sum\", margins=True)\n", + "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", + "outcome: grant_type G N R R/G All\n", + "year \n", + "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", + "2011 ok ok ok threshold; ok\n", + "2012 ok ok ok threshold; ok\n", + "2013 ok ok ok threshold; ok\n", + "2014 ok ok ok threshold; ok\n", + "2015 ok ok ok threshold; ok\n", + "All ok ok ok ok ok\n", + "output: [grant_type G N R All\n", + "year \n", + "2010 138906688.0 NaN 5.041371e+08 6.430438e+08\n", + "2011 127533696.0 7192804.0 5.324647e+08 6.671912e+08\n", + "2012 171878704.0 7779685.0 4.801055e+08 6.597638e+08\n", + "2013 203357200.0 8728330.0 5.113614e+08 7.234470e+08\n", + "2014 206222208.0 7858697.0 5.545942e+08 7.686751e+08\n", + "2015 133601200.0 8501187.0 5.514573e+08 6.935597e+08\n", + "All 981499712.0 40060704.0 3.134120e+09 4.155681e+09]\n", + "timestamp: 2023-09-22T10:32:20.714474\n", + "comments: []\n", + "exception: \n", + "\n", + "uid: output_2\n", + "status: fail\n", + "type: table\n", + "properties: {'method': 'crosstab'}\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", - "summary: fail; threshold: 6 cells suppressed; p-ratio: 1 cells suppressed; nk-rule: 1 cells suppressed; \n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 ok ok ok threshold; p-ratio; nk-rule; \n", - "2011 ok ok ok threshold; \n", - "2012 ok ok ok threshold; \n", - "2013 ok ok ok threshold; \n", - "2014 ok ok ok threshold; \n", - "2015 ok ok ok threshold; \n", + "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", + "outcome: grant_type G N R R/G\n", + "year \n", + "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", + "2011 ok ok ok threshold; \n", + "2012 ok ok ok threshold; \n", + "2013 ok ok ok threshold; \n", + "2014 ok ok ok threshold; \n", + "2015 ok ok ok threshold; \n", "output: [grant_type G N R R/G\n", "year \n", - "2010 9921906.0 0.000000 8402284.0 NaN\n", - "2011 8502246.0 124013.859375 7716880.0 NaN\n", + "2010 9921906.0 NaN 8402284.0 NaN\n", + "2011 8502247.0 124013.859375 7716880.0 NaN\n", "2012 11458580.0 131859.062500 6958050.5 NaN\n", "2013 13557147.0 147937.796875 7202273.5 NaN\n", - "2014 13748147.0 133198.250000 8277525.0 NaN\n", + "2014 13748147.0 133198.250000 8277525.5 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-08-14T14:14:47.594161\n", + "timestamp: 2023-09-22T10:32:34.076384\n", "comments: []\n", "exception: \n", "\n", - "uid: output_2\n", + "uid: output_3\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 6, 'p-ratio': 1, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 3]], 'nk-rule': [[0, 3]]}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", - "summary: fail; threshold: 6 cells may need suppressing; p-ratio: 1 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 ok ok ok threshold; p-ratio; nk-rule; \n", - "2011 ok ok ok threshold; \n", - "2012 ok ok ok threshold; \n", - "2013 ok ok ok threshold; \n", - "2014 ok ok ok threshold; \n", - "2015 ok ok ok threshold; \n", + "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", + "outcome: grant_type G N R R/G\n", + "year \n", + "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", + "2011 ok ok ok threshold; \n", + "2012 ok ok ok threshold; \n", + "2013 ok ok ok threshold; \n", + "2014 ok ok ok threshold; \n", + "2015 ok ok ok threshold; \n", "output: [grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8402284.0 11636000.0\n", - "2011 8502246.0 124013.859375 7716880.0 16047500.0\n", + "2011 8502247.0 124013.859375 7716880.0 16047500.0\n", "2012 11458580.0 131859.062500 6958050.5 16810000.0\n", "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", - "2014 13748147.0 133198.250000 8277525.0 17845750.0\n", + "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-08-14T14:14:55.245016\n", - "comments: []\n", - "exception: \n", - "\n", - "uid: output_3\n", - "status: fail\n", - "type: table\n", - "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 12, 'p-ratio': 2, 'nk-rule': 2}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 3], [0, 8], [1, 3], [1, 8], [2, 3], [2, 8], [3, 3], [3, 8], [4, 3], [4, 8], [5, 3], [5, 8]], 'p-ratio': [[0, 3], [0, 8]], 'nk-rule': [[0, 3], [0, 8]]}}\n", - "command: safe_table = acro.crosstab(\n", - "summary: fail; threshold: 12 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 2 cells may need suppressing; \n", - "outcome: mean std \\\n", - "grant_type G N R R/G All G N R \n", - "year \n", - "2010 ok ok ok threshold; p-ratio; nk-rule; ok ok ok ok \n", - "2011 ok ok ok threshold; ok ok ok ok \n", - "2012 ok ok ok threshold; ok ok ok ok \n", - "2013 ok ok ok threshold; ok ok ok ok \n", - "2014 ok ok ok threshold; ok ok ok ok \n", - "2015 ok ok ok threshold; ok ok ok ok \n", - "All ok ok ok ok ok ok ok ok \n", - "\n", - " \n", - "grant_type R/G All \n", - "year \n", - "2010 threshold; p-ratio; nk-rule; ok \n", - "2011 threshold; ok \n", - "2012 threshold; ok \n", - "2013 threshold; ok \n", - "2014 threshold; ok \n", - "2015 threshold; ok \n", - "All ok ok \n", - "output: [ mean \\\n", - "grant_type G N R R/G All \n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.5 \n", - "2011 8502246.0 124013.859375 7716880.0 16047500.0 5303808.5 \n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.5 \n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", - "2014 13748147.0 133198.250000 8277525.0 17845750.0 6117054.5 \n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", - "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", - "\n", - " std \\\n", - "grant_type G N R R/G \n", - "year \n", - "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", - "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", - "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", - "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", - "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", - "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", - "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", - "\n", - " \n", - "grant_type All \n", - "year \n", - "2010 2.727398e+07 \n", - "2011 2.137658e+07 \n", - "2012 2.026400e+07 \n", - "2013 2.251787e+07 \n", - "2014 2.641722e+07 \n", - "2015 2.784636e+07 \n", - "All 2.403848e+07 ]\n", - "timestamp: 2023-08-14T14:15:05.753875\n", + "timestamp: 2023-09-22T10:32:42.233235\n", "comments: []\n", "exception: \n", "\n", @@ -2573,7 +2278,7 @@ "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 14, 'threshold': 6, 'p-ratio': 1, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 3]], 'nk-rule': [[0, 3]]}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 14, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc=\"mean\")\n", "summary: review; missing values found\n", "outcome: grant_type G N R R/G\n", @@ -2586,13 +2291,13 @@ "2015 missing missing missing \n", "output: [grant_type G N R R/G\n", "year \n", - "2010 9921906.0 0.000000 8420373.0 11636000.0\n", - "2011 8502246.0 125663.226562 7689140.0 16047500.0\n", + "2010 9921906.0 0.000000 8420372.0 11636000.0\n", + "2011 8502247.0 125663.226562 7689140.5 16047500.0\n", "2012 11458580.0 131859.062500 6896304.0 16810000.0\n", - "2013 13557147.0 150488.453125 7088095.5 16765625.0\n", - "2014 13748147.0 135494.781250 8118565.5 17845750.0\n", + "2013 13557147.0 150488.453125 7088096.0 16765625.0\n", + "2014 13748147.0 135494.781250 8118565.0 17845750.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0]\n", - "timestamp: 2023-08-14T14:15:22.796424\n", + "timestamp: 2023-09-22T10:32:56.949849\n", "comments: []\n", "exception: \n", "\n", @@ -2600,7 +2305,7 @@ "status: review\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 0, 'threshold': 6, 'p-ratio': 1, 'nk-rule': 1}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 3]], 'nk-rule': [[0, 3]]}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 11, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\"mean\")\n", "summary: review; negative values found\n", "outcome: grant_type G N R R/G\n", @@ -2614,29 +2319,29 @@ "output: [grant_type G N R R/G\n", "year \n", "2010 9921906.0 0.000000 8280032.5 11636000.0\n", - "2011 8502246.0 123496.445312 7577703.5 16047500.0\n", + "2011 8502247.0 123496.445312 7577703.5 16047500.0\n", "2012 11458580.0 131859.062500 6796357.5 16810000.0\n", - "2013 13557147.0 147937.625000 6988263.0 16765625.0\n", - "2014 13748147.0 133198.078125 7997392.0 17845750.0\n", - "2015 11133433.0 146572.015625 10388612.0 18278624.0]\n", - "timestamp: 2023-08-14T14:15:31.575434\n", + "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", + "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", + "2015 11133433.0 146572.015625 10388613.0 18278624.0]\n", + "timestamp: 2023-09-22T10:33:03.111509\n", "comments: []\n", "exception: \n", "\n", "uid: output_6\n", - "status: pass\n", + "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", "command: table = acro.pivot_table(\n", - "summary: pass\n", + "summary: review; missing values found\n", "outcome: mean std\n", " inc_grants inc_grants\n", "grant_type \n", - "G ok ok\n", - "N ok ok\n", - "R ok ok\n", - "R/G ok ok\n", + "G missing missing\n", + "N missing missing\n", + "R missing missing\n", + "R/G missing missing\n", "output: [ mean std\n", " inc_grants inc_grants\n", "grant_type \n", @@ -2644,7 +2349,7 @@ "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-08-14T14:15:42.742599\n", + "timestamp: 2023-09-22T10:33:05.216495\n", "comments: []\n", "exception: \n", "\n", @@ -2667,9 +2372,9 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", - "R 8.006361e+06 3.228216e+07\n", + "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-08-14T14:15:48.940003\n", + "timestamp: 2023-09-22T10:33:07.188134\n", "comments: []\n", "exception: \n", "\n", @@ -2677,7 +2382,7 @@ "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", "command: table = acro.pivot_table(\n", "summary: review; negative values found\n", "outcome: mean std\n", @@ -2692,9 +2397,9 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", - "R 7.882230e+06 3.204558e+07\n", + "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-08-14T14:15:56.952843\n", + "timestamp: 2023-09-22T10:33:10.739040\n", "comments: []\n", "exception: \n", "\n", @@ -2712,8 +2417,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.000\n", - "Time: 14:16:00 Log-Likelihood: -14495.000\n", + "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", + "Time: 10:33:12 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2726,7 +2431,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-08-14T14:16:00.497443\n", + "timestamp: 2023-09-22T10:33:12.893068\n", "comments: []\n", "exception: \n", "\n", @@ -2744,8 +2449,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Mon, 14 Aug 2023 Prob (F-statistic): 0.000\n", - "Time: 14:16:09 Log-Likelihood: -14495.000\n", + "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", + "Time: 10:33:20 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2758,13 +2463,45 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-08-14T14:16:09.552690\n", + "timestamp: 2023-09-22T10:33:20.830086\n", "comments: []\n", "exception: \n", "\n", "uid: output_11\n", "status: pass\n", "type: regression\n", + "properties: {'method': 'olsr', 'dof': 807.0}\n", + "sdc: {}\n", + "command: results = acro.olsr(\n", + "summary: pass; dof=807.0 >= 10\n", + "outcome: Empty DataFrame\n", + "Columns: []\n", + "Index: []\n", + "output: [ inc_activity R-squared: 0.894\n", + "Dep. Variable: \n", + "Model: OLS Adj. R-squared: 0.893\n", + "Method: Least Squares F-statistic: 2261.000\n", + "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", + "Time: 10:33:21 Log-Likelihood: -14495.000\n", + "No. Observations: 811 AIC: 29000.000\n", + "Df Residuals: 807 BIC: 29020.000\n", + "Df Model: 3 NaN NaN\n", + "Covariance Type: nonrobust NaN NaN, coef std err t P>|t| [0.025 0.975]\n", + "Intercept 301000.0000 533000.000 0.565 0.572 -745000.000 1350000.000\n", + "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", + "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", + "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852, 1339.956 Durbin-Watson: 1.414\n", + "Omnibus: \n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", + "Skew: 9.899 Prob(JB): 0.000000e+00\n", + "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", + "timestamp: 2023-09-22T10:33:21.826209\n", + "comments: []\n", + "exception: \n", + "\n", + "uid: output_12\n", + "status: pass\n", + "type: regression\n", "properties: {'method': 'probit', 'dof': 806.0}\n", "sdc: {}\n", "command: results = acro.probit(y, x)\n", @@ -2776,8 +2513,8 @@ "Dep. Variable: \n", "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Mon, 14 Aug 2023 Pseudo R-squ.: 2.140000e-01\n", - "Time: 14:16:16 Log-Likelihood: -4.004600e+02\n", + "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 2.140000e-01\n", + "Time: 10:33:24 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", @@ -2792,11 +2529,11 @@ "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", - "timestamp: 2023-08-14T14:16:16.190948\n", + "timestamp: 2023-09-22T10:33:24.118828\n", "comments: []\n", "exception: \n", "\n", - "uid: output_12\n", + "uid: output_13\n", "status: pass\n", "type: regression\n", "properties: {'method': 'logit', 'dof': 806.0}\n", @@ -2810,8 +2547,8 @@ "Dep. Variable: \n", "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Mon, 14 Aug 2023 Pseudo R-squ.: 2.187000e-01\n", - "Time: 14:16:20 Log-Likelihood: -3.980700e+02\n", + "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 2.187000e-01\n", + "Time: 10:33:26 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", @@ -2826,7 +2563,7 @@ "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", - "timestamp: 2023-08-14T14:16:20.747689\n", + "timestamp: 2023-09-22T10:33:26.379481\n", "comments: []\n", "exception: \n", "\n", @@ -2848,7 +2585,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "b1f77749", "metadata": {}, "outputs": [ @@ -2876,7 +2613,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "45ec04ef", "metadata": {}, "outputs": [ @@ -2902,7 +2639,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "id": "0c826271", "metadata": {}, "outputs": [ @@ -2930,7 +2667,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "id": "2816eac7", "metadata": {}, "outputs": [ @@ -2938,7 +2675,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:records:add_custom(): output_13\n" + "INFO:acro:records:add_custom(): output_14\n" ] } ], @@ -2958,7 +2695,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "id": "f38b4334", "metadata": {}, "outputs": [ @@ -2990,7 +2727,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "9e554eea", "metadata": {}, "outputs": [ @@ -2998,6 +2735,70 @@ "name": "stderr", "output_type": "stream", "text": [ + "INFO:acro:records:\n", + "uid: output_1\n", + "status: fail\n", + "type: table\n", + "properties: {'method': 'crosstab'}\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", + "command: table = acro.crosstab(df.year,df.grant_type, values=df.inc_grants, aggfunc=\"sum\", margins=True)\n", + "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", + "outcome: grant_type G N R R/G All\n", + "year \n", + "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", + "2011 ok ok ok threshold; ok\n", + "2012 ok ok ok threshold; ok\n", + "2013 ok ok ok threshold; ok\n", + "2014 ok ok ok threshold; ok\n", + "2015 ok ok ok threshold; ok\n", + "All ok ok ok ok ok\n", + "output: [grant_type G N R All\n", + "year \n", + "2010 138906688.0 NaN 5.041371e+08 6.430438e+08\n", + "2011 127533696.0 7192804.0 5.324647e+08 6.671912e+08\n", + "2012 171878704.0 7779685.0 4.801055e+08 6.597638e+08\n", + "2013 203357200.0 8728330.0 5.113614e+08 7.234470e+08\n", + "2014 206222208.0 7858697.0 5.545942e+08 7.686751e+08\n", + "2015 133601200.0 8501187.0 5.514573e+08 6.935597e+08\n", + "All 981499712.0 40060704.0 3.134120e+09 4.155681e+09]\n", + "timestamp: 2023-09-22T10:32:20.714474\n", + "comments: []\n", + "exception: \n", + "\n", + "The status of the record above is: fail.\n", + "Please explain why an exception should be granted.\n", + "\n", + "INFO:acro:records:\n", + "uid: output_4\n", + "status: review\n", + "type: table\n", + "properties: {'method': 'crosstab'}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 14, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", + "command: safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc=\"mean\")\n", + "summary: review; missing values found\n", + "outcome: grant_type G N R R/G\n", + "year \n", + "2010 missing missing missing missing\n", + "2011 missing missing \n", + "2012 missing \n", + "2013 missing missing \n", + "2014 missing missing \n", + "2015 missing missing missing \n", + "output: [grant_type G N R R/G\n", + "year \n", + "2010 9921906.0 0.000000 8420372.0 11636000.0\n", + "2011 8502247.0 125663.226562 7689140.5 16047500.0\n", + "2012 11458580.0 131859.062500 6896304.0 16810000.0\n", + "2013 13557147.0 150488.453125 7088096.0 16765625.0\n", + "2014 13748147.0 135494.781250 8118565.0 17845750.0\n", + "2015 11133433.0 149143.625000 10596385.0 18278624.0]\n", + "timestamp: 2023-09-22T10:32:56.949849\n", + "comments: []\n", + "exception: \n", + "\n", + "The status of the record above is: review.\n", + "Please explain why an exception should be granted.\n", + "\n", "INFO:acro:records:\n", "uid: output_7\n", "status: review\n", @@ -3018,34 +2819,21 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.364700e+05 1.999335e+05\n", - "R 8.006361e+06 3.228216e+07\n", + "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-08-14T14:15:48.940003\n", + "timestamp: 2023-09-22T10:33:07.188134\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " a message\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "\n", "INFO:acro:records:\n", "uid: output_8\n", "status: review\n", "type: table\n", "properties: {'method': 'pivot_table'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 8, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [[0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1], [3, 0], [3, 1]], 'threshold': [], 'p-ratio': [], 'nk-rule': []}}\n", "command: table = acro.pivot_table(\n", "summary: review; negative values found\n", "outcome: mean std\n", @@ -3060,74 +2848,48 @@ "grant_type \n", "G 1.141279e+07 2.283220e+07\n", "N 1.341800e+05 1.990196e+05\n", - "R 7.882230e+06 3.204558e+07\n", + "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-08-14T14:15:56.952843\n", + "timestamp: 2023-09-22T10:33:10.739040\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " and another\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "\n", "INFO:acro:records:\n", "uid: pivot_table\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 6, 'p-ratio': 1, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 3]], 'nk-rule': [[0, 3]]}}\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", - "summary: fail; threshold: 6 cells may need suppressing; p-ratio: 1 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 ok ok ok threshold; p-ratio; nk-rule; \n", - "2011 ok ok ok threshold; \n", - "2012 ok ok ok threshold; \n", - "2013 ok ok ok threshold; \n", - "2014 ok ok ok threshold; \n", - "2015 ok ok ok threshold; \n", - "output: [grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0\n", - "2011 8502246.0 124013.859375 7716880.0 16047500.0\n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0\n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", - "2014 13748147.0 133198.250000 8277525.0 17845750.0\n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-08-14T14:14:55.245016\n", + "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", + "outcome: grant_type G N R R/G\n", + "year \n", + "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", + "2011 ok ok ok threshold; \n", + "2012 ok ok ok threshold; \n", + "2013 ok ok ok threshold; \n", + "2014 ok ok ok threshold; \n", + "2015 ok ok ok threshold; \n", + "output: [grant_type G N R R/G\n", + "year \n", + "2010 9921906.0 NaN 8402284.0 NaN\n", + "2011 8502247.0 124013.859375 7716880.0 NaN\n", + "2012 11458580.0 131859.062500 6958050.5 NaN\n", + "2013 13557147.0 147937.796875 7202273.5 NaN\n", + "2014 13748147.0 133198.250000 8277525.5 NaN\n", + "2015 11133433.0 146572.187500 10812888.0 NaN]\n", + "timestamp: 2023-09-22T10:32:34.076384\n", "comments: []\n", "exception: \n", "\n", "The status of the record above is: fail.\n", "Please explain why an exception should be granted.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " and another\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "\n", "INFO:acro:records:\n", - "uid: output_13\n", + "uid: output_14\n", "status: review\n", "type: custom\n", "properties: {}\n", @@ -3138,26 +2900,13 @@ "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", - "timestamp: 2023-08-14T14:16:38.781407\n", + "timestamp: 2023-09-22T10:33:43.655255\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", "The status of the record above is: review.\n", "Please explain why an exception should be granted.\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " image is not disclosive\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "\n", "INFO:acro:records:outputs written to: ACRO_RES\n" ] } @@ -3179,7 +2928,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "f78b5a08", "metadata": {}, "outputs": [ @@ -3230,7 +2979,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "df2a02e0", "metadata": {}, "outputs": [ @@ -3281,7 +3030,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "56d2b6a1", "metadata": {}, "outputs": [ diff --git a/test/test_initial.py b/test/test_initial.py index 244d3e0..716c6aa 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -549,7 +549,7 @@ def test_surv_func(acro): def test_zeros_are_not_disclosive(data, acro): """Test that zeros are handled as not disclosive when the parameter () is False.""" - acro_tables.ZEROS_ARE_DISCLOSIVE=False + acro_tables.ZEROS_ARE_DISCLOSIVE = False _ = acro.pivot_table( data, index=["grant_type"], @@ -568,8 +568,13 @@ def test_zeros_are_not_disclosive(data, acro): def test_crosstab_with_sum(data, acro): - """Test the crosstab with two columns and aggfunc sum""" + """Test the crosstab with two columns and aggfunc sum.""" acro = ACRO(suppress=False) - _ = acro.crosstab(data.year, [data.grant_type, data.survivor], values=data.inc_grants, aggfunc= "mean") + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="mean", + ) output = acro.results.get_index(0) - assert (6,8) == output.output[0].shape + assert (6, 8) == output.output[0].shape From a3080af8c0a30d8ec0af4b1bf528dbd48b3df415 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Fri, 22 Sep 2023 12:03:03 +0100 Subject: [PATCH 04/14] fixing some pylint isssues --- acro/acro_tables.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 7aa3e0a..94c20cc 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -814,10 +814,7 @@ def agg_p_percent(vals: Series) -> bool: total: float = sorted_vals.sum() if total <= 0.0 or vals.size <= 1: logger.debug("not calculating ppercent due to small size") - if ZEROS_ARE_DISCLOSIVE: - return True - else: - return False + return bool(ZEROS_ARE_DISCLOSIVE) sub_total = total - sorted_vals.iloc[0] - sorted_vals.iloc[1] p_val: float = sub_total / sorted_vals.iloc[0] if total > 0 else 1 return p_val < SAFE_PRATIO_P @@ -904,13 +901,13 @@ def apply_suppression( outcome_df += tmp_df except TypeError: logger.warning("problem mask %s is not binary", name) - except ValueError: - raise ValueError( - f"name is {name} \n mask is {mask} \n table is {table}", - name, - mask, - safe_df, + except ValueError as ve: + error_message = ( + f"An error occurred with the following details" + f":\n Name: {name}\n Mask: {mask}\n Table: {table}" ) + raise ValueError(error_message) from ve + outcome_df = outcome_df.replace({"": "ok"}) logger.info("outcome_df:\n%s", utils.prettify_table_string(outcome_df)) return safe_df, outcome_df From 7f6d833e434908ca4f8453aa48f4dee686282942 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Tue, 26 Sep 2023 12:31:59 +0100 Subject: [PATCH 05/14] add th two functions and refactor the code --- CHANGELOG.md | 1 + acro/acro_tables.py | 538 ++++++++++++++---- notebooks/test.ipynb | 1267 ++++++++++++++++++++++++++---------------- test/test_initial.py | 125 ++++- 4 files changed, 1294 insertions(+), 637 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bb4105..bcd96b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Changes: * Update pandas version dependency to 1.5 ([#150](https://github.com/AI-SDC/ACRO/pull/150)) * Refactor ACRO class ([#152](https://github.com/AI-SDC/ACRO/pull/152)) * Adding support for table function in R ([#153](https://github.com/AI-SDC/ACRO/pull/153)) +* Update table suppression when totals are true ([#160](https://github.com/AI-SDC/ACRO/pull/160)) ## Version 0.4.2 (Jul 13, 2023) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 94c20cc..83e7f32 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -64,6 +64,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals margins_name: str = "All", dropna: bool = True, normalize=False, + show_suppressed=False, ) -> DataFrame: """Compute a simple cross tabulation of two (or more) factors. By default, computes a frequency table of the factors unless an array of @@ -97,6 +98,8 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals - If passed 'index' will normalize over each row. - If passed 'columns' will normalize over each column. - If margins is `True`, will also normalize margin values. + show_suppressed : bool. default False + how the totals are being calculated when the suppression is true Returns ------- @@ -227,127 +230,34 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals if self.suppress: table = safe_table if margins: - # initialize a list to store queries for true cells - true_cell_queries = [] - for _, mask in masks.items(): - # drop the name of the mask - mask = mask.droplevel(0, axis=1) - # identify level names for rows and columns - index_level_names = mask.index.names - column_level_names = mask.columns.names - - # iterate through the masks to identify the true cells and extract queries - for column_level_values in mask.columns: - for index_level_values in mask.index: - if ( - mask.loc[index_level_values, column_level_values] - # == True - ): - if isinstance(index_level_values, tuple): - index_query = " & ".join( - [ - f"({level} == {val})" - if isinstance(val, (int, float)) - else f'({level} == "{val}")' - for level, val in zip( - index_level_names, index_level_values - ) - ] - ) - else: - index_query = " & ".join( - [ - f"({index_level_names} == {index_level_values})" - if isinstance( - index_level_values, (int, float) - ) - else ( - f"({index_level_names}" - f'== "{index_level_values}")' - ) - ] - ) - if isinstance(column_level_values, tuple): - column_query = " & ".join( - [ - f"({level} == {val})" - if isinstance(val, (int, float)) - else f'({level} == "{val}")' - for level, val in zip( - column_level_names, column_level_values - ) - ] - ) - else: - column_query = " & ".join( - [ - f"({column_level_names} == {column_level_values})" - if isinstance( - column_level_values, (int, float) - ) - else ( - f"({column_level_names}" - f'== "{column_level_values}")' - ) - ] - ) - query = f"{index_query} & {column_query}" - true_cell_queries.append(query) - - # delete the duplication - true_cell_queries = list(set(true_cell_queries)) - - # create dataframe from the index and columns parameters - if isinstance(index, list): - index_df = pd.concat(index, axis=1) - elif isinstance(index, pd.Series): - index_df = pd.DataFrame({index.name: index}) - if isinstance(columns, list): - columns_df = pd.concat(columns, axis=1) - elif isinstance(columns, pd.Series): - columns_df = pd.DataFrame({columns.name: columns}) - data = pd.concat([index_df, columns_df], axis=1) - - # apply the queries to the data - for query in true_cell_queries: - query = str(query).replace("['", "").replace("']", "") - data = data.query(f"not ({query})") - - # get the index and columns from the data after the queries are applied - try: - if isinstance(index, list): - index_new = [] - for _, val in enumerate(index): - index_new.append(data[val.name]) - else: - index_new = data[index.name] - - if isinstance(columns, list): - columns_new = [] - for _, val in enumerate(columns): - columns_new.append(data[val.name]) - else: - columns_new = data[columns.name] - - # apply the crosstab with the new index and columns - table = pd.crosstab( # type: ignore - index_new, - columns_new, - values=values, - rownames=rownames, - colnames=colnames, - aggfunc=aggfunc, - margins=margins, - margins_name=margins_name, - dropna=dropna, - normalize=normalize, + if show_suppressed: + table = manual_crossstab_with_totals( + table, + aggfunc, + index, + columns, + values, + rownames, + colnames, + margins, + margins_name, + dropna, + normalize, ) - except ValueError: - logger.info( - "All the cells in this data are discolsive." - " Thus suppression can not be applied" + else: + table = crosstab_with_totals( + masks, + aggfunc, + index, + columns, + values, + rownames, + colnames, + margins, + margins_name, + dropna, + normalize, ) - return None # record output self.results.add( @@ -901,12 +811,12 @@ def apply_suppression( outcome_df += tmp_df except TypeError: logger.warning("problem mask %s is not binary", name) - except ValueError as ve: + except ValueError as error: error_message = ( f"An error occurred with the following details" f":\n Name: {name}\n Mask: {mask}\n Table: {table}" ) - raise ValueError(error_message) from ve + raise ValueError(error_message) from error outcome_df = outcome_df.replace({"": "ok"}) logger.info("outcome_df:\n%s", utils.prettify_table_string(outcome_df)) @@ -987,3 +897,389 @@ def get_summary(sdc: dict) -> tuple[str, str]: summary = status logger.info("get_summary(): %s", summary) return status, summary + + +def get_queries(masks, aggfunc) -> list[str]: + """Returns a list of the boolean conditions for each true cell in the suppression masks. + + Parameters + ---------- + masks : dict[str, DataFrame] + Dictionary of tables specifying suppression masks for application. + aggfunc : str | None + The aggregation function + + Returns + ------- + str + The boolean conditions for each true cell in the suppression masks. + """ + # initialize a list to store queries for true cells + true_cell_queries = [] + for _, mask in masks.items(): + # drop the name of the mask + if aggfunc is not None: + mask = mask.droplevel(0, axis=1) + # identify level names for rows and columns + index_level_names = mask.index.names + column_level_names = mask.columns.names + + # iterate through the masks to identify the true cells and extract queries + for column_level_values in mask.columns: + for index_level_values in mask.index: + if mask.loc[index_level_values, column_level_values]: + if isinstance(index_level_values, tuple): + index_query = " & ".join( + [ + f"({level} == {val})" + if isinstance(val, (int, float)) + else f'({level} == "{val}")' + for level, val in zip( + index_level_names, index_level_values + ) + ] + ) + else: + index_query = " & ".join( + [ + f"({index_level_names} == {index_level_values})" + if isinstance(index_level_values, (int, float)) + else ( + f"({index_level_names}" + f'== "{index_level_values}")' + ) + ] + ) + if isinstance(column_level_values, tuple): + column_query = " & ".join( + [ + f"({level} == {val})" + if isinstance(val, (int, float)) + else f'({level} == "{val}")' + for level, val in zip( + column_level_names, column_level_values + ) + ] + ) + else: + column_query = " & ".join( + [ + f"({column_level_names} == {column_level_values})" + if isinstance(column_level_values, (int, float)) + else ( + f"({column_level_names}" + f'== "{column_level_values}")' + ) + ] + ) + query = f"{index_query} & {column_query}" + true_cell_queries.append(query) + # delete the duplication + true_cell_queries = list(set(true_cell_queries)) + return true_cell_queries + + +def create_dataframe(index, columns) -> DataFrame: + """Combining the index and columns in a dataframe and return the datframe. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + + Returns + ------- + Dataframe + Table of the index and columns combined. + """ + if isinstance(index, list): + index_df = pd.concat(index, axis=1) + elif isinstance(index, pd.Series): + index_df = pd.DataFrame({index.name: index}) + if isinstance(columns, list): + columns_df = pd.concat(columns, axis=1) + elif isinstance(columns, pd.Series): + columns_df = pd.DataFrame({columns.name: columns}) + data = pd.concat([index_df, columns_df], axis=1) + return data + + +def get_index_columns(index, columns, data) -> tuple[list | Series, list | Series]: + """Get the index and columns from the data dataframe. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + data : dataframe + Table of the index and columns combined. + + Returns + ------- + List | Series + The index extracted from the data. + List | Series + The columns extracted from the data. + """ + if isinstance(index, list): + index_new = [] + for _, val in enumerate(index): + index_new.append(data[val.name]) + else: + index_new = data[index.name] + + if isinstance(columns, list): + columns_new = [] + for _, val in enumerate(columns): + columns_new.append(data[val.name]) + else: + columns_new = data[columns.name] + return index_new, columns_new + + +def crosstab_with_totals( + masks, + aggfunc, + index, + columns, + values, + rownames, + colnames, + margins, + margins_name, + dropna, + normalize, +) -> DataFrame: + """Recalculate the crosstab table when margins are true and suppression is true. + + Parameters + ---------- + masks : dict[str, DataFrame] + Dictionary of tables specifying suppression masks for application. + aggfunc : str | None + The aggregation function. + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires `aggfunc` be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + aggfunc : str, optional + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals + when margins is True. + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + - If passed 'all' or `True`, will normalize over all values. + - If passed 'index' will normalize over each row. + - If passed 'columns' will normalize over each column. + - If margins is `True`, will also normalize margin values. + + Returns + ------- + DataFrame + Crosstabulation of data + """ + true_cell_queries = get_queries(masks, aggfunc) + data = create_dataframe(index, columns) + + # apply the queries to the data + for query in true_cell_queries: + query = str(query).replace("['", "").replace("']", "") + data = data.query(f"not ({query})") + + # get the index and columns from the data after the queries are applied + try: + index_new, columns_new = get_index_columns(index, columns, data) + # apply the crosstab with the new index and columns + table = pd.crosstab( # type: ignore + index_new, + columns_new, + values=values, + rownames=rownames, + colnames=colnames, + aggfunc=aggfunc, + margins=margins, + margins_name=margins_name, + dropna=dropna, + normalize=normalize, + ) + except ValueError: + logger.warning( + "All the cells in this data are discolsive." + " Thus suppression can not be applied" + ) + return None + return table + + +def manual_crossstab_with_totals( + table, + aggfunc, + index, + columns, + values, + rownames, + colnames, + margins, + margins_name, + dropna, + normalize, +) -> DataFrame: + """Recalculate the crosstab table when margins are true and suppression is true. + + Parameters + ---------- + table : Dataframe + The suppressed table. + aggfunc : str | None + The aggregation function. + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires `aggfunc` be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals + when margins is True. + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + - If passed 'all' or `True`, will normalize over all values. + - If passed 'index' will normalize over each row. + - If passed 'columns' will normalize over each column. + - If margins is `True`, will also normalize margin values. + + Returns + ------- + DataFrame + Crosstabulation of data + """ + if isinstance(aggfunc, list): + logger.warning( + "We can not calculate the margins with a list of aggregation functions. " + "Please create a table for each aggregation function" + ) + return None + elif aggfunc is None or aggfunc == "sum" or aggfunc == "count": + table = recalculate_margin(table, margins_name) + + elif aggfunc == "mean": + count_table = pd.crosstab( # type: ignore + index, + columns, + values=values, + rownames=rownames, + colnames=colnames, + aggfunc="count", + margins=margins, + margins_name=margins_name, + dropna=dropna, + normalize=normalize, + ) + # suppress the cells in the count by mimicking the suppressed cells in the table + count_table = count_table.where(table.notna(), other=np.nan) + # delete any columns from the count_table that are not in the table + columns_to_keep = table.columns + count_table = count_table[columns_to_keep] + count_table = count_table.sort_index(axis=1) + # recalculate the margins considering the nan values + count_table = recalculate_margin(count_table, margins_name) + # multiply the table by the count table + table[margins_name] = 1 + table.loc[margins_name, :] = 1 + multip_table = count_table * table + multip_table = multip_table.sort_index(axis=1) + # calculate the margins columns + table[margins_name] = ( + multip_table.drop(margins_name, axis=1).sum(axis=1) + / multip_table[margins_name] + ) + # calculate the margins row + if not isinstance(count_table.index, pd.MultiIndex): # single row + table.loc[margins_name, :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[margins_name, :] + ) + else: # multiple rows + table.loc[(margins_name, ""), :] = ( + multip_table.drop(margins_name, axis=0).sum() + / multip_table.loc[(margins_name, ""), :] + ) + # calculate the grand margin + if not isinstance(count_table.columns, pd.MultiIndex) and not isinstance( + count_table.index, pd.MultiIndex + ): # single column, single row + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name).sum().sum() + ) / multip_table.loc[margins_name, margins_name] + else: # multiple columns or multiple rows + table.loc[margins_name, margins_name] = ( + multip_table.drop(index=margins_name, columns=margins_name).sum().sum() + ) / multip_table.loc[margins_name, margins_name][0] + + elif aggfunc == "std": + table = table.drop(margins_name, axis=1) + table = table.drop(margins_name, axis=0) + logger.warning( + "The margins with the std agg func can not be calculated. " + "Please set the show_suppressed to false to calculate it." + ) + return table + return table + + +def recalculate_margin(table, margins_name) -> DataFrame: + """Recalculate the margins in a table. + + Parameters + ---------- + table : Dataframe + The suppressed table. + margins_name : str, default 'All' + Name of the row/column that will contain the totals + + Returns + ------- + DataFrame + Table with new calculated margins + """ + table = table.drop(margins_name, axis=1) + rows_total = table.sum(axis=1) + table.loc[:, margins_name] = rows_total + if isinstance(table.index, pd.MultiIndex): + table = table.drop(margins_name, axis=0) + cols_total = table.sum(axis=0) + table.loc[(margins_name, ""), :] = cols_total + else: + table = table.drop(margins_name, axis=0) + cols_total = table.sum(axis=0) + table.loc[margins_name] = cols_total + return table diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index 6547872..1bc6777 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "e33fd4fb", "metadata": {}, "outputs": [], @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "c01cfe12", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "cc8d993a", "metadata": { "scrolled": true @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "4b8a77e2", "metadata": {}, "outputs": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "8722735f", "metadata": { "scrolled": true @@ -296,7 +296,7 @@ "[5 rows x 44 columns]" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -317,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "961684cb", "metadata": {}, "outputs": [ @@ -413,7 +413,7 @@ "2015 15 59 71 8" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "bb4b2677", "metadata": { "scrolled": true @@ -552,7 +552,7 @@ "2015 15 59 71 8" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -562,58 +562,6 @@ "safe_table" ] }, - { - "cell_type": "markdown", - "id": "ea5587ff", - "metadata": {}, - "source": [ - "### ACRO crosstab with supression and totals" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "dc2de220", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:acro:get_summary(): fail; threshold: 1071 cells suppressed; p-ratio: 918 cells suppressed; nk-rule: 815 cells suppressed; \n", - "INFO:acro:outcome_df:\n", - "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "charity |4||||Children | AD|FAM |||National | | AGE Cymru | |||Ac|t|||ion ||for A-T ||| | Age Exchange | | || | | | | || || |Age ||UK B|l||ack|pool|| and dist|rict Age U|K: |Runnym|||ede a|nd Spelthorn|e| Al M|a|dad Foundation | Alchemy Fo|undati|on | Alzheim|er's Society | Amos |Founda|tio|n | | | | | || ||||| | A|nt|ho|ny||||| N|||||||o||l|an Trust A|pples and Snakes Ar|thritis care As|thma UK Au|tistica | | | Ava| | Bat|tersea |dogs and cats ho||me Be|de||'|s|| Wo||rld | |Bla|con Community Tr|us|t || | British Association fo|r Adoption and Foster|ing British| Bee Keepers Association British |Deer| Society |British| Heart F|oundation ||Brit||is|||h Re|d Cros|s|||| || | || || | Bro|ken Rainbo||w (Dead|) Brushmill Li|mited | Bumblebee Con|servation Trus|t | ||| CYF | | | | | | | Cancer R|esearch U|K | |Carers Trust S|wansea | | || Carla Lan|e Animals in N|eed Catholic Institute for interna|tional| relations Change Agents UK Charity| Ch|anges | |Child Brain Injury |Trust | Children and Famili|es Across Borders Children in ne|ed Christian Aid | Civitas Limited | Collection Trust | | Community Music |L||imited Community matters| Cot|swold| Friends | |Cr|isi|s UK | |Crossroads Care East An|glia (D|ead) Cumb|ria Rural Housi|ng Tr|ust | C|ycling Pr|ojects | | Cystic fibrosis |t|rust Debra | |Deer Init|ia|tive |Limited Dogs Trust | | Dogs for the disa|bled Drugscope (Dead) | E.I.L Limited | Eaves (Dead) | | Eden Trust | Edupoor| Limited | | Emmau|s Mossley | English |national ballet Essex Di|sabled Peo|ple's Association LTD |(Dead) Everychild | Fairf|ield Croydon (Dead) | Fair|wood Trust | | | |Fisherbeck Charitable Trust F|ood|cycle Forev|er Houn|ds Trust | Friend o|f the |Animals | Friends of| the Earth Trust | Gardeni|ng |Leave Ltd (Dead) | Hawk|spring (dead) |Headway Dor|set | H|edge Funds| Care UK He|len and Douglas House He|lp for Heroes Hen|ry | | Hi||scox |Foundation | | H|ome Start Bury | Hop|e Now| Limited Hull and E|ast Yorks| Comm|unity fund| Human |Research Trust Hyndbur|n Us|ed Fur|niture Store IdeasTap| (Dea|d) Key house| project |(Dead) Kids Compan|y (Dead) Kidscape Ca|mpaign fo|r Chil|dren La|ndlife | | Learning S|outh West | | Living Spr|ings | Macmillian C|ancer S|upport Ma|rie Curie C|ancer Care Meeting point| trust limited Mencap | Milestones Tru|st | Mind | | NAM Publication|s NHS Charitable |fund|s NSPCC | N|WG Network National Kidney Federation | Natio|nal Trust | | Natio|nal playbus ass|ociation Northern refugee cent|re (Dea|d) OMID Foundation | | Oakley regenera|tion | Ocean| Youth Trust South | Order of St John | PAC|E (Dead) | | Panthe|ra Wi|ldlife Trust Limi|ted Por|tmouth mi|nd ltd (Dead) | Prison Reform Trust | Prospects | RICE | RNLI | RSPB | R|SPCA | | Rare Br|eed |Survival Trust | Redeeming Our Communities | Respond | Revere Char|itable Trust |Riders for Health | R|iversi|de Animal C|entre R|oyal Forestry Society S|amaritans San|dra Charit|able Trust Save| the children Scil|l Scop|e Skid|z Somers|et Wildlife trust Space Youth |Project St Monica's Tr|ust | St Peter's |Hospice | S|tarlight ch|ildr|en's foundation Sue Ryder | Tee|nage Canc|e|r|| |T|ru||s|t| The Anchor| Trus|t The Mary Ros|e Trust The Prince's T|rust | The Rowan Arts Project T|he Royal British L|egion | The Salvation Army | The Sobell Foundation | The Vine Project | The Vivat Trust Ltd Tru|st for Sustainable Liv|ing Volunteer| and commu|nity ac|tion WWF | Wal|sall dome|stic viol|ence forum |ltd W|ill Woodlands | Worcestersh|ire Lifestyles (Dea|d) Yorkshire an|d Humbe|r peopl|e unite|d agains|t crime age UK |All|\n", - "year | |||| | | ||| | | | ||| | ||| || ||| | | | || | | | | || || | || | || | || | | | ||| | | | | | | | | | | | | | | | | | | | || ||||| | | | | ||||| ||||||| || | | | | | | | | | | | | || | || | || || | | | | | || | | | | | | | | | || || ||| | | |||| || | || || | | || | | | | | | ||| | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | || | | || | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |\n", - "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "2010 | ||||threshold; p-ratio; nk-rul|e; | ||| threshold; p-r|atio|; threshold; p-r|atio; ||| | ||| || thresho|||ld|; p-ratio; threshold;| |p||-|rat|i|o|;|| || | || | || th|resh||old; p-ra|tio; | | thre|||shold|; p-ratio; | | | | threshold; p-ratio;| |thresh|old; p-ratio|; | threshold; p-ratio|; | t|hre|shol|d|; p|-r|a|t||i|||||o|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; | threshold; p-ratio; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; | th|reshold; p|-rati|o; | |threshold; p-ratio; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | | threshold; p-ratio; | th|reshold; p-ratio; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; th|reshold|; p-ratio; thr|eshold; p-rati|o;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| | threshold; p-|ratio; threshold; p-ratio; nk-rule; | | threshold; p-r|atio; | | threshold; p-ratio; | threshold|; p-ratio;| threshold|; p-ratio; threshold; p-|ratio; nk-rule; threshold; p-rat|io; threshold; p-ratio|; threshol|d; p-rati|o; thresh|o||ld; p-ratio; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | | |threshold; |p-ratio;| thresh|o|ld; p-ratio; threshold; p-ratio; nk-rul|e; | | t|hresh|old; p-ratio; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold|; p-ratio; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; | threshold; |p-rati|o; | threshold; p-rati|o; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; | | threshold; p-ratio; | th|reshold; p-ratio;| | thre|shold; p-ratio; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; | t|hreshold; p-ratio; | threshold; p-ratio; t|hreshold; p-ratio; nk-rule; | | thre|shold; p-ratio; || | threshol|d|; p-ratio; |threshold; p-rat|io; nk-rule; | | threshold; p-ratio; threshold|; p-ratio|; nk-|rule; | | threshold; p-ratio; thresh|old;| p-rat|io; nk-rule; | thr|eshold; p-ratio; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; t|hreshold;| p-rat|io; | |thresho|ld; p-ratio; threshold|; p-ratio; nk-rul|e;| |thresh|old; p-ratio; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; thr|eshold; p-ratio; threshold; p-|ratio; nk-rule; thre|shol|d; p-ratio; threshold; p-|ratio; nk-rule; | thres|hold; p-ratio; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; | threshold; p-ratio; threshold; p-ratio|; thre|shold; p-ratio|; nk-rule|; | threshold;| p-ratio; threshold; p-ratio; |nk-rule|; threshold; |p-rati|o; threshold; p-r|atio; n|k-rule; | threshold; p-r|atio; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; | t|hreshold; p-ratio|; th|reshold; |p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; | t|hreshold; p-ratio|; threshold; p-ratio|; threshold; p-ratio; | t|hreshold; p-ratio; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; | threshold; p-ratio; |threshold; p-ratio; nk-rule; | thr|eshold; p-ratio; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; | threshold; p-ratio; th|reshold; p-ratio; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; | th|resh|old; p-ratio; t|hreshold; p-ratio; th|reshold; |p|-||r|a|ti||o|;| nk-rule; |thres|hold; p-ratio; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; | threshold; p-ra|tio; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | | threshold; p-rati|o; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", - "2011 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| | ||| || thresho|||ld|; p-ratio; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | || | || th|resh||old; p-ra|tio; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; | | threshold; p-ratio; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", - "2012 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | || | || th|resh||old; p-ra|tio; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; | | threshold; p-ratio; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", - "2013 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| threshold|; p-|r|ati|o|; nk-rule;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; threshold;| p-ratio; nk-rule; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; nk-rule; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | threshold; |p-ratio|; nk-ru|le; | | threshold; p-ratio; nk-rule; | ok|\n", - "2014 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-ratio; nk-|rule; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | th|reshold; p-ratio|; |nk-||r|ule; threshold; p-ratio; n|k-rule; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; th|reshold; p||-ratio;| nk-rule; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| |thre|s|hol|d|; p-ratio;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-ratio; nk|-rule; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshold; p-rati|o; nk-rule; threshold; p-ratio|; nk-rule; threshold; p-ratio;| nk-rul|e; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | thresh|old|; p-ratio; nk-rule;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; threshold|; p-ratio|; nk-|rule; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; t|hreshold; p-ratio; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; threshold; |p-ratio;| nk-rule; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; p-ratio; |nk-rule|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ratio; nk-r|ule; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; |thresh|old; p-ratio; nk-rule|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshold; p-rati|o; nk-rule; |thresh|old; p-rati|o; nk-rule; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio; n|k-rule; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; nk-rule; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-ratio; nk-|rule; threshold; p-ratio; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | th|reshold|; p-rat|io; | | threshold; p-ratio; nk-rule; | ok|\n", - "2015 | ||||threshold; p-ratio; nk-rul|e; t|hres|||hold; p-ratio; nk-|rule|; threshold; p-r|atio; ||| t|h|||resh||old; p-rat|||io|; nk-rule; threshold; p-ratio;| |n||k|-ru|l|e|;|| || | thr||esho|l||d; |p-ra||tio; nk-r|ule; thre|sho|ld; p-|||ratio|; nk-rule; | | thr|e|shold; p-ratio; nk-rule;| threshold|; p-ra|tio; nk-rule|; thresh|old; p-ratio; nk-rule|; thre|shold;| p-|rati|o|; n|k-|r|u||l|||||e|; |th|re|sh|||||ol|||||||d||;| p-ratio; nk-rule; |threshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; |p-ratio; n|k-rul|e; th|reshold|; p-ratio; nk-rule; th|reshold|; p-ratio; nk-ru||le; t|hr||e|s||hol||d; p-ratio; |nk-rule; | | threshold|; |p-r||a|tio; threshold; p|-ratio; | thresh|old; p-ratio; nk-rule; thresho|ld; |p-ratio; nk-rule; | thresh|old; p-r|atio; nk-rule; || thr||es|||hold|; p-ra|t||||io;|| nk|-||ru||l|e; | thr||eshold;| p-ratio; threshold; |p-ratio|; nk-rule; threshold; p|-ratio; nk-rul|e;| ||| |thre|s|hol|d|; p-ratio;| | thresho|ld; p-rat|io;| nk-rule; | threshold; p-|ratio; nk-|r|ule;|| threshol|d; p-ratio; nk|-rule; threshold; p-ratio; nk-rule; | | threshold; p-ratio; nk-|rule; t|hreshol|d; p-ratio; nk-rule; | threshold; p-ratio|; nk-rule;| threshold; p-ratio|; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-ratio; nk-ru|le; threshold; p-ratio; nk-rule|; threshold; p-rati|o; nk-rul|e; threshold; p-ra|t||io; nk-rule; threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk|-rule; | t|hre|shold; p-ratio; nk|-rule; | threshold; p-|ratio; | thr|eshold; p-ratio|; nk-|rule; | |threshold|; p-ratio; |nk-rule;| threshold; p-ra|t|io; nk-rule; threshold; p-ratio; nk-rul|e; | threshol|d;| p-ra|tio; nk-rule; threshold; p-rat|io; |nk-rule; threshold; p-rat|io; nk-rule; threshol|d; p-ratio; threshold; p-ratio|; nk-rule; threshold;| p-rati|o; threshold; p-ratio;| nk-rule; thresh|old; p-ratio; |nk-rul|e; thre|shold; p-ratio; nk-rul|e; thresho|ld; p-ratio; nk-rule; thresho|ld; p-rati|o; nk-rule; | threshold; p-ratio; nk-r|ule; thre|shold; p-ratio; nk-ru|le; thr|eshold; p-rat|io; |nk|-rule; | threshold; p-ratio; nk-rule; |thr|eshold; p-ratio; nk-rule; thre|shold; |p-ratio; nk-rule;| thresho|ld; p-|ratio; nk-rule; | threshold|; p-ratio; nk-rule; | | |threshold; p-ratio;| thr|eshold; p-ratio; nk-rule; | threshold;| p-ratio|; nk-rule; |threshold;| p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; t|hreshold; p-ratio; nk-rule; th|resho|ld; p-|ratio; nk-rule; t||hresh|old; p-rati|o|; nk-rule; |threshold; p-rat|io; nk-rule; th|resho|ld; p-ratio; nk-rule; |threshold|; p-r|atio; | thres|hold; p-ratio; nk-rule; thresh|old;| p-rat|io; nk-rule; thresho|ld; p|-ratio; nk-rule; threshol|d; p-rati|o; nk-rule; t|hreshold; p-ratio; threshold;| p-ratio;| nk-ru|le; t|hreshold|; p-rat|io; nk-rule; threshold|; p-ratio; nk-rul|e;| threshold|; p-ra|tio; nk-rule; th|reshold;| p-ratio; t|hreshold; p|-ratio; nk-rule; threshold; p|-ratio; nk-rule; threshold; p-|ratio; nk-rule; threshold; p-|rati|o; nk-rule; threshold; p-|ratio; nk-rule; | threshold; p-r|atio; nk-rule; threshold; p-r|atio|; nk-rule; threshold; p-ratio; nk-r|ule; |threshold; p-ratio; nk-rule; threshold; p-ratio; nk-rule|; thre|shold; p-ratio|; nk-rule|; thre|shold; p-ratio;| nk-rule; threshold; |p-ratio|; threshold; p-ratio; |nk-rul|e; threshold; p-r|atio; n|k-rule; thre|shold; p-ratio; nk-|rule; threshold; p-ra|tio; th|reshold; p-ra|tio; nk-rul|e; thres|hold;| p-ratio; nk-rule|; th|reshold; |p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-ratio; nk-rul|e; threshold; p-rati|o; threshold; p-ratio; nk-rul|e; | | threshold; p-ratio|; thresh|old;| p-ratio; nk-rule|; threshold; p-ratio; nk-rule|; threshold; p-ratio; nk-rule; | threshold;| p-ratio; nk-rule; | threshol|d; p-ratio; | | threshol|d; p-ratio; |threshold; p-ratio; nk-rule; |threshold; p-ratio; nk-rule; th|reshold; p|-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; thr|eshold; p-ratio; nk-rule; | threshold; p-ratio; thres|hold; p-ratio; nk-rule; threshold; |p-ratio; nk-rule; thre|shold; p|-ratio; threshold;| p-ratio; |nk-rule; |threshold; |p-ra|tio; nk-rule; threshold;| p-ratio; nk-rule; th|reshold; |p|-||r|a|ti||o|;| nk-rule; threshold|; p-r|atio; nk-rule; threshold; |p-ratio; nk-rule; threshold; p-|ratio;| nk-rule; threshold; p-ratio; |threshold; p-ratio|; nk-rule;| threshold; p-ratio;| nk-rule; threshold; p-ratio; n|k-rule; threshold; p-r|atio; threshold; p-ratio; th|reshold; p-ratio; nk-r|ule; threshol|d; p-ratio|; nk-ru|le; threshold; p-ratio; nk-ru|le; th|reshold; |p-ratio; |nk-rule; | |threshold; p-ratio; nk-rul|e; threshold;| p-ratio; nk-rule; | th|reshold|; p-rat|io; | | threshold; p-ratio; nk-rule; | ok|\n", - "All | |||| threshol|d; | ||| thres|hold|; thres|hold; ||| | ||| || ||| |threshold; t|h|r||e|sho|l|d|;|| || | || | || | || thresh|old; | | ||| |threshold; | | | | threshold;| | | threshold|; | threshold|; | | | | |thr|es|h|o||l|||||d|; | | | ||||| ||||||| || | threshold; | threshold; | threshold; | threshold; | | thr|eshol|d; | | threshold; | | thresho||ld; | || | || || th|reshold; | | |th|res||h|old; thr|eshold; | | threshold; | | threshold; | | | threshold; || || ||| | | |||| t||hre|s||ho||l|d; | || t|hreshold; | |threshold; | threshol|d;| ||| | | | | |threshold;| | | | t|hreshold; | | thres|h|old;|| | thre|shold; threshold; | | thres|hold; | | threshold; | |threshold;| |threshold; | threshold; thresho|ld; threshold|; | threshol|d; | || threshold; | threshold; | | thre|shold; | | | thre|shold; | thre|shold; | | |thres|hold; | | | th|reshold;| | | threshold; threshol|d; | | | | threshold; | th|reshold; | threshold; | threshold; |threshold; t|hreshol|d; t|hreshold; | th|reshol|d; | threshol|d; | threshold; | | threshold; | thresh|old; | thresho|ld; | | th|re|shold; | threshold; | | threshold; | | threshold;| | | threshold; | | threshold; | | | threshold;| | threshold; | | |threshold; | | threshold; | threshold; | threshold; | | | threshold; || | | |threshold; | | threshold; | | threshold; | |thres|hold; | | threshold; | | | threshold; | | threshold; | | threshold; | threshold; | t|hresho|ld; | | | threshold; | threshol|d;| | | threshold; | t|hreshold; | | threshold; | threshold; | threshold; | | threshold; | threshold; | | threshold; | |threshold; thresh|old; | threshold; threshold|; | |threshold|; | t|hreshold; th|reshold|; th|reshol|d; | thr|eshold; | thres|hold; thresh|old; | | threshol|d; | | threshold|; | | threshol|d; threshol|d; threshol|d; threshol|d; threshol|d; threshol|d; | | threshold|; | | threshold|; threshold|; threshold; | | threshold; | | threshold; | | | threshold; | threshold; | threshold; | | threshold; | threshold; | threshold; | threshold; | threshold; | threshold; | threshold; | thr|eshold; | th|reshold; | | | threshold; | threshold; | | | || | | || |t|hreshold; | | threshold; | threshold; | t|hreshold; threshold; | |threshold;| t|hreshold; thr|eshold; thres|hold; threshold; | thresh|old; | |thresho|ld; thresho|ld; | | th|reshold; | | threshol|d; | threshold; | | |thresho|ld; | | threshold; | ok|\n", - "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "\n", - "INFO:acro:All the cells in this data are discolsive. Thus suppression can not be applied\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "None\n" - ] - } - ], - "source": [ - "acro.suppress = True\n", - "table = acro.crosstab(\n", - " df.year, df.grant_type, values=df.inc_total, aggfunc=\"mean\", margins=all\n", - ")\n", - "print(table)" - ] - }, { "cell_type": "markdown", "id": "6d4730c4", @@ -624,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "37ddb939", "metadata": {}, "outputs": [ @@ -646,7 +594,7 @@ "2015 | ok | ok | ok | threshold; |\n", "---------------------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_2\n" + "INFO:acro:records:add(): output_1\n" ] }, { @@ -741,7 +689,7 @@ "2015 11133433.0 146572.187500 10812888.0 NaN" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -753,6 +701,72 @@ "safe_table" ] }, + { + "cell_type": "markdown", + "id": "0c695e09", + "metadata": {}, + "source": [ + "### ACRO crosstab with supression and totals" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ef42beb6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:get_summary(): fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \n", + "INFO:acro:outcome_df:\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |All|\n", + "survivor |Dead in 2015 Alive in 2015 |Alive in 2015 |Dead in 2015 Alive in 2015 |Alive in 2015 | |\n", + "year | | | | | |\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "2010 | threshold; p-ratio; nk-rule; ok | threshold; p-ratio; | ok ok | threshold; p-ratio; nk-rule; | ok|\n", + "2011 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2012 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2013 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2014 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", + "2015 | threshold; p-ratio; nk-rule; threshold; | ok | ok ok | threshold; | ok|\n", + "All | ok ok | ok | ok ok | ok | ok|\n", + "------------------------------------------------------------------------------------------------------------------------------------------------|\n", + "\n", + "INFO:acro:records:add(): output_2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "grant_type G N R All\n", + "survivor Alive in 2015 Alive in 2015 Dead in 2015 Alive in 2015 \n", + "year \n", + "2010 11571583.0 NaN 1385162.500 22436528.0 8930501.0\n", + "2011 10624083.0 124013.859375 1380839.250 19596956.0 4799615.5\n", + "2012 14319667.0 131859.062500 1354274.750 17465130.0 4712294.0\n", + "2013 16943250.0 147937.796875 1409097.250 18547244.0 5094428.0\n", + "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", + "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", + "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5\n" + ] + } + ], + "source": [ + "acro.suppress = True\n", + "table = acro.crosstab(\n", + " df.year,\n", + " [df.grant_type, df.survivor],\n", + " values=df.inc_grants,\n", + " aggfunc=\"mean\",\n", + " margins=True,\n", + ")\n", + "print(table)" + ] + }, { "cell_type": "code", "execution_count": 10, @@ -915,29 +929,198 @@ "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:169: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " if t_values[col].sum() == 0:\n" - ] - }, - { - "ename": "ValueError", - "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 21\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m safe_table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 2\u001b[0m df\u001b[39m.\u001b[39;49myear, df\u001b[39m.\u001b[39;49mgrant_type, values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants, aggfunc\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mmean\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstd\u001b[39;49m\u001b[39m\"\u001b[39;49m], margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m safe_table\n", - "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:169\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize)\u001b[0m\n\u001b[0;32m 167\u001b[0m \u001b[39mif\u001b[39;00m dropna \u001b[39mor\u001b[39;00m margins:\n\u001b[0;32m 168\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m t_values\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m--> 169\u001b[0m \u001b[39mif\u001b[39;00m t_values[col]\u001b[39m.\u001b[39;49msum() \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m:\n\u001b[0;32m 170\u001b[0m t_values \u001b[39m=\u001b[39m t_values\u001b[39m.\u001b[39mdrop(col, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 171\u001b[0m t_values \u001b[39m=\u001b[39m t_values \u001b[39m<\u001b[39m THRESHOLD\n", - "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\generic.py:1527\u001b[0m, in \u001b[0;36mNDFrame.__nonzero__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1525\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[0;32m 1526\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__nonzero__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1528\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe truth value of a \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m is ambiguous. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1529\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mUse a.empty, a.bool(), a.item(), a.any() or a.all().\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1530\u001b[0m )\n", - "\u001b[1;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.01.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502247.0124013.8593757716880.016047500.05303808.01.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.02.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.517845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.405324e+07
\n", + "
" + ], + "text/plain": [ + " mean \\\n", + "grant_type G N R R/G All \n", + "year \n", + "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.0 \n", + "2011 8502247.0 124013.859375 7716880.0 16047500.0 5303808.0 \n", + "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.0 \n", + "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", + "2014 13748147.0 133198.250000 8277525.5 17845750.0 6117054.5 \n", + "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", + "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", + "\n", + " std \\\n", + "grant_type G N R R/G \n", + "year \n", + "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", + "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", + "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", + "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", + "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", + "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", + "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", + "\n", + " \n", + "grant_type All \n", + "year \n", + "2010 2.727398e+07 \n", + "2011 2.137658e+07 \n", + "2012 2.026400e+07 \n", + "2013 2.251787e+07 \n", + "2014 2.641722e+07 \n", + "2015 2.784636e+07 \n", + "All 2.405324e+07 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "safe_table = acro.crosstab(\n", + "safe_table = pd.crosstab(\n", " df.year, df.grant_type, values=df.inc_grants, aggfunc=[\"mean\", \"std\"], margins=True\n", ")\n", "safe_table" @@ -963,17 +1146,18 @@ "text": [ "INFO:acro:get_summary(): review; missing values found\n", "INFO:acro:outcome_df:\n", - "--------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "--------------------------------------------------|\n", - "2010 | missing | missing | missing | missing|\n", - "2011 | | missing | missing | |\n", - "2012 | | | missing | |\n", - "2013 | | missing | missing | |\n", - "2014 | | missing | missing | |\n", - "2015 | missing | missing | missing | |\n", - "--------------------------------------------------|\n", + "-------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |All|\n", + "year | | | | | |\n", + "-------------------------------------------------------|\n", + "2010 | missing | missing | missing | missing | |\n", + "2011 | | missing | missing | | |\n", + "2012 | | | missing | | |\n", + "2013 | | missing | missing | | |\n", + "2014 | | missing | missing | | |\n", + "2015 | missing | missing | missing | | |\n", + "All | | | | | |\n", + "-------------------------------------------------------|\n", "\n", "INFO:acro:records:add(): output_4\n" ] @@ -1003,6 +1187,7 @@ " N\n", " R\n", " R/G\n", + " All\n", " \n", " \n", " year\n", @@ -1010,6 +1195,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -1019,6 +1205,7 @@ " 0.000000\n", " 8420372.0\n", " 11636000.0\n", + " 8320154.5\n", " \n", " \n", " 2011\n", @@ -1026,6 +1213,7 @@ " 125663.226562\n", " 7689140.5\n", " 16047500.0\n", + " 5310392.0\n", " \n", " \n", " 2012\n", @@ -1033,6 +1221,7 @@ " 131859.062500\n", " 6896304.0\n", " 16810000.0\n", + " 5220580.5\n", " \n", " \n", " 2013\n", @@ -1040,6 +1229,7 @@ " 150488.453125\n", " 7088096.0\n", " 16765625.0\n", + " 5578657.0\n", " \n", " \n", " 2014\n", @@ -1047,6 +1237,7 @@ " 135494.781250\n", " 8118565.0\n", " 17845750.0\n", + " 6072600.0\n", " \n", " \n", " 2015\n", @@ -1054,20 +1245,30 @@ " 149143.625000\n", " 10596385.0\n", " 18278624.0\n", + " 6442131.0\n", + " \n", + " \n", + " All\n", + " 11412787.0\n", + " 136158.859375\n", + " 8006360.5\n", + " 16648273.0\n", + " 5968295.5\n", " \n", " \n", "\n", "" ], "text/plain": [ - "grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 0.000000 8420372.0 11636000.0\n", - "2011 8502247.0 125663.226562 7689140.5 16047500.0\n", - "2012 11458580.0 131859.062500 6896304.0 16810000.0\n", - "2013 13557147.0 150488.453125 7088096.0 16765625.0\n", - "2014 13748147.0 135494.781250 8118565.0 17845750.0\n", - "2015 11133433.0 149143.625000 10596385.0 18278624.0" + "grant_type G N R R/G All\n", + "year \n", + "2010 9921906.0 0.000000 8420372.0 11636000.0 8320154.5\n", + "2011 8502247.0 125663.226562 7689140.5 16047500.0 5310392.0\n", + "2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5\n", + "2013 13557147.0 150488.453125 7088096.0 16765625.0 5578657.0\n", + "2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n", + "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", + "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5" ] }, "execution_count": 13, @@ -1081,7 +1282,9 @@ "missing = df.inc_grants.copy()\n", "missing[0:10] = np.NaN\n", "\n", - "safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc=\"mean\")\n", + "safe_table = acro.crosstab(\n", + " df.year, df.grant_type, values=missing, aggfunc=\"mean\", margins=True\n", + ")\n", "safe_table" ] }, @@ -1654,10 +1857,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00 \n", + " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 10:33:12 Log-Likelihood: -14495. \n", + " Time: 12:25:20 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1712,8 +1915,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 10:33:12 Log-Likelihood: -14495.\n", + "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", + "Time: 12:25:20 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -1766,7 +1969,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "cc90f7c9", "metadata": {}, "outputs": [ @@ -1774,14 +1977,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:acro:records:add(): output_11\n" + "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", + "INFO:acro:records:add(): output_10\n" ] }, { @@ -1799,10 +1996,10 @@ " Method: Least Squares F-statistic: 2261. \n", "\n", "\n", - " Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00 \n", + " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 10:33:21 Log-Likelihood: -14495. \n", + " Time: 12:25:20 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1857,8 +2054,8 @@ "Dep. Variable: inc_activity R-squared: 0.894\n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", - "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 10:33:21 Log-Likelihood: -14495.\n", + "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", + "Time: 12:25:20 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -1884,7 +2081,7 @@ "\"\"\"" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1906,7 +2103,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "5b1a1611", "metadata": {}, "outputs": [ @@ -1921,7 +2118,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:records:add(): output_12\n" + "INFO:acro:records:add(): output_11\n" ] }, { @@ -1948,10 +2145,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2140 \n", + " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140 \n", "\n", "\n", - " Time: 10:33:24 Log-Likelihood: -400.46 \n", + " Time: 12:25:20 Log-Likelihood: -400.46 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -1989,8 +2186,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2140\n", - "Time: 10:33:24 Log-Likelihood: -400.46\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140\n", + "Time: 12:25:20 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", @@ -2009,7 +2206,7 @@ "\"\"\"" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2037,7 +2234,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "dcf30f8f", "metadata": {}, "outputs": [ @@ -2045,8 +2242,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", - "INFO:acro:records:add(): output_13\n" + "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:acro:records:add(): output_12\n" ] }, { @@ -2073,10 +2276,10 @@ " Method: MLE Df Model: 4 \n", "\n", "\n", - " Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2187 \n", + " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187 \n", "\n", "\n", - " Time: 10:33:26 Log-Likelihood: -398.07 \n", + " Time: 12:25:20 Log-Likelihood: -398.07 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2114,8 +2317,8 @@ "Dep. Variable: survivor No. Observations: 811\n", "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", - "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 0.2187\n", - "Time: 10:33:26 Log-Likelihood: -398.07\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187\n", + "Time: 12:25:20 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", @@ -2134,7 +2337,7 @@ "\"\"\"" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -2154,7 +2357,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "ec960039", "metadata": { "scrolled": true @@ -2187,7 +2390,7 @@ "2013 15 59 71 8\n", "2014 15 59 71 8\n", "2015 15 59 71 8]\n", - "timestamp: 2023-09-22T10:32:14.193937\n", + "timestamp: 2023-09-26T12:25:19.155653\n", "comments: []\n", "exception: \n", "\n", @@ -2196,35 +2399,6 @@ "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: table = acro.crosstab(df.year,df.grant_type, values=df.inc_grants, aggfunc=\"sum\", margins=True)\n", - "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", - "outcome: grant_type G N R R/G All\n", - "year \n", - "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", - "2011 ok ok ok threshold; ok\n", - "2012 ok ok ok threshold; ok\n", - "2013 ok ok ok threshold; ok\n", - "2014 ok ok ok threshold; ok\n", - "2015 ok ok ok threshold; ok\n", - "All ok ok ok ok ok\n", - "output: [grant_type G N R All\n", - "year \n", - "2010 138906688.0 NaN 5.041371e+08 6.430438e+08\n", - "2011 127533696.0 7192804.0 5.324647e+08 6.671912e+08\n", - "2012 171878704.0 7779685.0 4.801055e+08 6.597638e+08\n", - "2013 203357200.0 8728330.0 5.113614e+08 7.234470e+08\n", - "2014 206222208.0 7858697.0 5.545942e+08 7.686751e+08\n", - "2015 133601200.0 8501187.0 5.514573e+08 6.935597e+08\n", - "All 981499712.0 40060704.0 3.134120e+09 4.155681e+09]\n", - "timestamp: 2023-09-22T10:32:20.714474\n", - "comments: []\n", - "exception: \n", - "\n", - "uid: output_2\n", - "status: fail\n", - "type: table\n", - "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", "outcome: grant_type G N R R/G\n", @@ -2243,16 +2417,58 @@ "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.5 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-09-22T10:32:34.076384\n", + "timestamp: 2023-09-26T12:25:19.230978\n", "comments: []\n", "exception: \n", "\n", - "uid: output_3\n", + "uid: output_2\n", "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n", + "command: table = acro.crosstab(\n", + "summary: fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \n", + "outcome: grant_type G N \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n", + "2011 threshold; p-ratio; nk-rule; ok ok \n", + "2012 threshold; p-ratio; nk-rule; ok ok \n", + "2013 threshold; p-ratio; nk-rule; ok ok \n", + "2014 threshold; p-ratio; nk-rule; ok ok \n", + "2015 threshold; p-ratio; nk-rule; threshold; ok \n", + "All ok ok ok \n", + "\n", + "grant_type R R/G All \n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 ok ok threshold; p-ratio; nk-rule; ok \n", + "2011 ok ok threshold; ok \n", + "2012 ok ok threshold; ok \n", + "2013 ok ok threshold; ok \n", + "2014 ok ok threshold; ok \n", + "2015 ok ok threshold; ok \n", + "All ok ok ok ok \n", + "output: [grant_type G N R All\n", + "survivor Alive in 2015 Alive in 2015 Dead in 2015 Alive in 2015 \n", + "year \n", + "2010 11571583.0 NaN 1385162.500 22436528.0 8930501.0\n", + "2011 10624083.0 124013.859375 1380839.250 19596956.0 4799615.5\n", + "2012 14319667.0 131859.062500 1354274.750 17465130.0 4712294.0\n", + "2013 16943250.0 147937.796875 1409097.250 18547244.0 5094428.0\n", + "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", + "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", + "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", + "timestamp: 2023-09-26T12:25:19.445330\n", + "comments: []\n", + "exception: \n", + "\n", + "uid: output_3\n", + "status: fail\n", + "type: table\n", + "properties: {'method': 'crosstab'}\n", + "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", + "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", "summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "outcome: grant_type G N R R/G\n", "year \n", @@ -2270,7 +2486,7 @@ "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-09-22T10:32:42.233235\n", + "timestamp: 2023-09-26T12:25:19.553369\n", "comments: []\n", "exception: \n", "\n", @@ -2279,25 +2495,27 @@ "type: table\n", "properties: {'method': 'crosstab'}\n", "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 14, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc=\"mean\")\n", + "command: safe_table = acro.crosstab(\n", "summary: review; missing values found\n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 missing missing missing missing\n", - "2011 missing missing \n", - "2012 missing \n", - "2013 missing missing \n", - "2014 missing missing \n", - "2015 missing missing missing \n", - "output: [grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 0.000000 8420372.0 11636000.0\n", - "2011 8502247.0 125663.226562 7689140.5 16047500.0\n", - "2012 11458580.0 131859.062500 6896304.0 16810000.0\n", - "2013 13557147.0 150488.453125 7088096.0 16765625.0\n", - "2014 13748147.0 135494.781250 8118565.0 17845750.0\n", - "2015 11133433.0 149143.625000 10596385.0 18278624.0]\n", - "timestamp: 2023-09-22T10:32:56.949849\n", + "outcome: grant_type G N R R/G All\n", + "year \n", + "2010 missing missing missing missing \n", + "2011 missing missing \n", + "2012 missing \n", + "2013 missing missing \n", + "2014 missing missing \n", + "2015 missing missing missing \n", + "All \n", + "output: [grant_type G N R R/G All\n", + "year \n", + "2010 9921906.0 0.000000 8420372.0 11636000.0 8320154.5\n", + "2011 8502247.0 125663.226562 7689140.5 16047500.0 5310392.0\n", + "2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5\n", + "2013 13557147.0 150488.453125 7088096.0 16765625.0 5578657.0\n", + "2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n", + "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", + "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5]\n", + "timestamp: 2023-09-26T12:25:19.750575\n", "comments: []\n", "exception: \n", "\n", @@ -2324,7 +2542,7 @@ "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", "2015 11133433.0 146572.015625 10388613.0 18278624.0]\n", - "timestamp: 2023-09-22T10:33:03.111509\n", + "timestamp: 2023-09-26T12:25:19.848126\n", "comments: []\n", "exception: \n", "\n", @@ -2349,7 +2567,7 @@ "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-22T10:33:05.216495\n", + "timestamp: 2023-09-26T12:25:19.915731\n", "comments: []\n", "exception: \n", "\n", @@ -2374,7 +2592,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-22T10:33:07.188134\n", + "timestamp: 2023-09-26T12:25:19.994753\n", "comments: []\n", "exception: \n", "\n", @@ -2399,7 +2617,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-22T10:33:10.739040\n", + "timestamp: 2023-09-26T12:25:20.122274\n", "comments: []\n", "exception: \n", "\n", @@ -2417,8 +2635,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 10:33:12 Log-Likelihood: -14495.000\n", + "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", + "Time: 12:25:20 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2431,7 +2649,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-22T10:33:12.893068\n", + "timestamp: 2023-09-26T12:25:20.194182\n", "comments: []\n", "exception: \n", "\n", @@ -2449,8 +2667,8 @@ "Dep. Variable: \n", "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", - "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 10:33:20 Log-Likelihood: -14495.000\n", + "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", + "Time: 12:25:20 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2463,45 +2681,13 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-22T10:33:20.830086\n", + "timestamp: 2023-09-26T12:25:20.256095\n", "comments: []\n", "exception: \n", "\n", "uid: output_11\n", "status: pass\n", "type: regression\n", - "properties: {'method': 'olsr', 'dof': 807.0}\n", - "sdc: {}\n", - "command: results = acro.olsr(\n", - "summary: pass; dof=807.0 >= 10\n", - "outcome: Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "output: [ inc_activity R-squared: 0.894\n", - "Dep. Variable: \n", - "Model: OLS Adj. R-squared: 0.893\n", - "Method: Least Squares F-statistic: 2261.000\n", - "Date: Fri, 22 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 10:33:21 Log-Likelihood: -14495.000\n", - "No. Observations: 811 AIC: 29000.000\n", - "Df Residuals: 807 BIC: 29020.000\n", - "Df Model: 3 NaN NaN\n", - "Covariance Type: nonrobust NaN NaN, coef std err t P>|t| [0.025 0.975]\n", - "Intercept 301000.0000 533000.000 0.565 0.572 -745000.000 1350000.000\n", - "inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836\n", - "inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633\n", - "total_costs 0.8313 0.011 78.674 0.000 0.811 0.852, 1339.956 Durbin-Watson: 1.414\n", - "Omnibus: \n", - "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", - "Skew: 9.899 Prob(JB): 0.000000e+00\n", - "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-22T10:33:21.826209\n", - "comments: []\n", - "exception: \n", - "\n", - "uid: output_12\n", - "status: pass\n", - "type: regression\n", "properties: {'method': 'probit', 'dof': 806.0}\n", "sdc: {}\n", "command: results = acro.probit(y, x)\n", @@ -2513,8 +2699,8 @@ "Dep. Variable: \n", "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 2.140000e-01\n", - "Time: 10:33:24 Log-Likelihood: -4.004600e+02\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.140000e-01\n", + "Time: 12:25:20 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", @@ -2529,11 +2715,11 @@ "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", - "timestamp: 2023-09-22T10:33:24.118828\n", + "timestamp: 2023-09-26T12:25:20.316852\n", "comments: []\n", "exception: \n", "\n", - "uid: output_13\n", + "uid: output_12\n", "status: pass\n", "type: regression\n", "properties: {'method': 'logit', 'dof': 806.0}\n", @@ -2547,8 +2733,8 @@ "Dep. Variable: \n", "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", - "Date: Fri, 22 Sep 2023 Pseudo R-squ.: 2.187000e-01\n", - "Time: 10:33:26 Log-Likelihood: -3.980700e+02\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.187000e-01\n", + "Time: 12:25:20 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", @@ -2563,7 +2749,7 @@ "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", - "timestamp: 2023-09-22T10:33:26.379481\n", + "timestamp: 2023-09-26T12:25:20.356849\n", "comments: []\n", "exception: \n", "\n", @@ -2585,7 +2771,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "b1f77749", "metadata": {}, "outputs": [ @@ -2675,7 +2861,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:records:add_custom(): output_14\n" + "INFO:acro:records:add_custom(): output_13\n" ] } ], @@ -2735,70 +2921,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:records:\n", - "uid: output_1\n", - "status: fail\n", - "type: table\n", - "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: table = acro.crosstab(df.year,df.grant_type, values=df.inc_grants, aggfunc=\"sum\", margins=True)\n", - "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", - "outcome: grant_type G N R R/G All\n", - "year \n", - "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; ok\n", - "2011 ok ok ok threshold; ok\n", - "2012 ok ok ok threshold; ok\n", - "2013 ok ok ok threshold; ok\n", - "2014 ok ok ok threshold; ok\n", - "2015 ok ok ok threshold; ok\n", - "All ok ok ok ok ok\n", - "output: [grant_type G N R All\n", - "year \n", - "2010 138906688.0 NaN 5.041371e+08 6.430438e+08\n", - "2011 127533696.0 7192804.0 5.324647e+08 6.671912e+08\n", - "2012 171878704.0 7779685.0 4.801055e+08 6.597638e+08\n", - "2013 203357200.0 8728330.0 5.113614e+08 7.234470e+08\n", - "2014 206222208.0 7858697.0 5.545942e+08 7.686751e+08\n", - "2015 133601200.0 8501187.0 5.514573e+08 6.935597e+08\n", - "All 981499712.0 40060704.0 3.134120e+09 4.155681e+09]\n", - "timestamp: 2023-09-22T10:32:20.714474\n", - "comments: []\n", - "exception: \n", - "\n", - "The status of the record above is: fail.\n", - "Please explain why an exception should be granted.\n", - "\n", - "INFO:acro:records:\n", - "uid: output_4\n", - "status: review\n", - "type: table\n", - "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 14, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [[0, 0], [0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 0], [5, 1], [5, 2]], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: safe_table = acro.crosstab(df.year, df.grant_type, values=missing, aggfunc=\"mean\")\n", - "summary: review; missing values found\n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 missing missing missing missing\n", - "2011 missing missing \n", - "2012 missing \n", - "2013 missing missing \n", - "2014 missing missing \n", - "2015 missing missing missing \n", - "output: [grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 0.000000 8420372.0 11636000.0\n", - "2011 8502247.0 125663.226562 7689140.5 16047500.0\n", - "2012 11458580.0 131859.062500 6896304.0 16810000.0\n", - "2013 13557147.0 150488.453125 7088096.0 16765625.0\n", - "2014 13748147.0 135494.781250 8118565.0 17845750.0\n", - "2015 11133433.0 149143.625000 10596385.0 18278624.0]\n", - "timestamp: 2023-09-22T10:32:56.949849\n", - "comments: []\n", - "exception: \n", - "\n", - "The status of the record above is: review.\n", - "Please explain why an exception should be granted.\n", - "\n", "INFO:acro:records:\n", "uid: output_7\n", "status: review\n", @@ -2821,7 +2943,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-22T10:33:07.188134\n", + "timestamp: 2023-09-26T12:25:19.994753\n", "comments: []\n", "exception: \n", "\n", @@ -2850,7 +2972,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-22T10:33:10.739040\n", + "timestamp: 2023-09-26T12:25:20.122274\n", "comments: []\n", "exception: \n", "\n", @@ -2862,26 +2984,41 @@ "status: fail\n", "type: table\n", "properties: {'method': 'crosstab'}\n", - "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]]}}\n", - "command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\"mean\")\n", - "summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", - "outcome: grant_type G N R R/G\n", - "year \n", - "2010 ok threshold; p-ratio; ok threshold; p-ratio; nk-rule; \n", - "2011 ok ok ok threshold; \n", - "2012 ok ok ok threshold; \n", - "2013 ok ok ok threshold; \n", - "2014 ok ok ok threshold; \n", - "2015 ok ok ok threshold; \n", - "output: [grant_type G N R R/G\n", - "year \n", - "2010 9921906.0 NaN 8402284.0 NaN\n", - "2011 8502247.0 124013.859375 7716880.0 NaN\n", - "2012 11458580.0 131859.062500 6958050.5 NaN\n", - "2013 13557147.0 147937.796875 7202273.5 NaN\n", - "2014 13748147.0 133198.250000 8277525.5 NaN\n", - "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-09-22T10:32:34.076384\n", + "sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]]}}\n", + "command: table = acro.crosstab(\n", + "summary: fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \n", + "outcome: grant_type G N \\\n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 threshold; p-ratio; nk-rule; ok threshold; p-ratio; \n", + "2011 threshold; p-ratio; nk-rule; ok ok \n", + "2012 threshold; p-ratio; nk-rule; ok ok \n", + "2013 threshold; p-ratio; nk-rule; ok ok \n", + "2014 threshold; p-ratio; nk-rule; ok ok \n", + "2015 threshold; p-ratio; nk-rule; threshold; ok \n", + "All ok ok ok \n", + "\n", + "grant_type R R/G All \n", + "survivor Dead in 2015 Alive in 2015 Alive in 2015 \n", + "year \n", + "2010 ok ok threshold; p-ratio; nk-rule; ok \n", + "2011 ok ok threshold; ok \n", + "2012 ok ok threshold; ok \n", + "2013 ok ok threshold; ok \n", + "2014 ok ok threshold; ok \n", + "2015 ok ok threshold; ok \n", + "All ok ok ok ok \n", + "output: [grant_type G N R All\n", + "survivor Alive in 2015 Alive in 2015 Dead in 2015 Alive in 2015 \n", + "year \n", + "2010 11571583.0 NaN 1385162.500 22436528.0 8930501.0\n", + "2011 10624083.0 124013.859375 1380839.250 19596956.0 4799615.5\n", + "2012 14319667.0 131859.062500 1354274.750 17465130.0 4712294.0\n", + "2013 16943250.0 147937.796875 1409097.250 18547244.0 5094428.0\n", + "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", + "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", + "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", + "timestamp: 2023-09-26T12:25:19.445330\n", "comments: []\n", "exception: \n", "\n", @@ -2889,7 +3026,7 @@ "Please explain why an exception should be granted.\n", "\n", "INFO:acro:records:\n", - "uid: output_14\n", + "uid: output_13\n", "status: review\n", "type: custom\n", "properties: {}\n", @@ -2900,7 +3037,7 @@ "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", - "timestamp: 2023-09-22T10:33:43.655255\n", + "timestamp: 2023-09-26T12:25:20.472783\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", @@ -2928,7 +3065,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "f78b5a08", "metadata": {}, "outputs": [ @@ -2979,7 +3116,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "df2a02e0", "metadata": {}, "outputs": [ @@ -3030,7 +3167,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "56d2b6a1", "metadata": {}, "outputs": [ @@ -3131,7 +3268,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type)\",\n", " \"summary\": \"fail; threshold: 6 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-08-14T14:14:31.710403\",\n", + " \"timestamp\": \"2023-09-26T12:25:19.155653\",\n", " \"comments\": [\n", " \"This is a cross table between year and grant_type\",\n", " \"6 cells were suppressed in this table\"\n", @@ -3153,9 +3290,9 @@ " \"suppressed\": false,\n", " \"negative\": 0,\n", " \"missing\": 0,\n", - " \"threshold\": 12,\n", + " \"threshold\": 7,\n", " \"p-ratio\": 2,\n", - " \"nk-rule\": 2\n", + " \"nk-rule\": 1\n", " },\n", " \"cells\": {\n", " \"negative\": [],\n", @@ -3163,71 +3300,47 @@ " \"threshold\": [\n", " [\n", " 0,\n", - " 3\n", + " 1\n", " ],\n", " [\n", " 0,\n", - " 8\n", - " ],\n", - " [\n", - " 1,\n", " 3\n", " ],\n", " [\n", " 1,\n", - " 8\n", - " ],\n", - " [\n", - " 2,\n", " 3\n", " ],\n", " [\n", " 2,\n", - " 8\n", - " ],\n", - " [\n", - " 3,\n", " 3\n", " ],\n", " [\n", " 3,\n", - " 8\n", - " ],\n", - " [\n", - " 4,\n", " 3\n", " ],\n", " [\n", " 4,\n", - " 8\n", - " ],\n", - " [\n", - " 5,\n", " 3\n", " ],\n", " [\n", " 5,\n", - " 8\n", + " 3\n", " ]\n", " ],\n", " \"p-ratio\": [\n", " [\n", " 0,\n", - " 3\n", + " 1\n", " ],\n", " [\n", " 0,\n", - " 8\n", + " 3\n", " ]\n", " ],\n", " \"nk-rule\": [\n", " [\n", " 0,\n", " 3\n", - " ],\n", - " [\n", - " 0,\n", - " 8\n", " ]\n", " ]\n", " }\n", @@ -3235,100 +3348,42 @@ " }\n", " ],\n", " \"outcome\": {\n", - " \"('mean', 'G')\": {\n", - " \"2010\": \"ok\",\n", - " \"2011\": \"ok\",\n", - " \"2012\": \"ok\",\n", - " \"2013\": \"ok\",\n", - " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('mean', 'N')\": {\n", - " \"2010\": \"ok\",\n", - " \"2011\": \"ok\",\n", - " \"2012\": \"ok\",\n", - " \"2013\": \"ok\",\n", - " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('mean', 'R')\": {\n", - " \"2010\": \"ok\",\n", - " \"2011\": \"ok\",\n", - " \"2012\": \"ok\",\n", - " \"2013\": \"ok\",\n", - " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('mean', 'R/G')\": {\n", - " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", - " \"2011\": \"threshold; \",\n", - " \"2012\": \"threshold; \",\n", - " \"2013\": \"threshold; \",\n", - " \"2014\": \"threshold; \",\n", - " \"2015\": \"threshold; \",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('mean', 'All')\": {\n", - " \"2010\": \"ok\",\n", - " \"2011\": \"ok\",\n", - " \"2012\": \"ok\",\n", - " \"2013\": \"ok\",\n", - " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('std', 'G')\": {\n", + " \"G\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", + " \"2015\": \"ok\"\n", " },\n", - " \"('std', 'N')\": {\n", - " \"2010\": \"ok\",\n", + " \"N\": {\n", + " \"2010\": \"threshold; p-ratio; \",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", + " \"2015\": \"ok\"\n", " },\n", - " \"('std', 'R')\": {\n", + " \"R\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", + " \"2015\": \"ok\"\n", " },\n", - " \"('std', 'R/G')\": {\n", + " \"R/G\": {\n", " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", " \"2011\": \"threshold; \",\n", " \"2012\": \"threshold; \",\n", " \"2013\": \"threshold; \",\n", " \"2014\": \"threshold; \",\n", - " \"2015\": \"threshold; \",\n", - " \"All\": \"ok\"\n", - " },\n", - " \"('std', 'All')\": {\n", - " \"2010\": \"ok\",\n", - " \"2011\": \"ok\",\n", - " \"2012\": \"ok\",\n", - " \"2013\": \"ok\",\n", - " \"2014\": \"ok\",\n", - " \"2015\": \"ok\",\n", - " \"All\": \"ok\"\n", + " \"2015\": \"threshold; \"\n", " }\n", " },\n", - " \"command\": \"safe_table = acro.crosstab(\",\n", - " \"summary\": \"fail; threshold: 12 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 2 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-08-14T14:15:05.753875\",\n", + " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", + " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", + " \"timestamp\": \"2023-09-26T12:25:19.553369\",\n", " \"comments\": [],\n", " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n", " },\n", @@ -3346,9 +3401,9 @@ " \"summary\": {\n", " \"suppressed\": false,\n", " \"negative\": 10,\n", - " \"missing\": 0,\n", - " \"threshold\": 6,\n", - " \"p-ratio\": 1,\n", + " \"missing\": 11,\n", + " \"threshold\": 7,\n", + " \"p-ratio\": 2,\n", " \"nk-rule\": 1\n", " },\n", " \"cells\": {\n", @@ -3394,10 +3449,59 @@ " 2\n", " ]\n", " ],\n", - " \"missing\": [],\n", + " \"missing\": [\n", + " [\n", + " 0,\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 0,\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 3\n", + " ],\n", + " [\n", + " 1,\n", + " 1\n", + " ],\n", + " [\n", + " 1,\n", + " 2\n", + " ],\n", + " [\n", + " 2,\n", + " 2\n", + " ],\n", + " [\n", + " 4,\n", + " 2\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 1\n", + " ],\n", + " [\n", + " 5,\n", + " 2\n", + " ]\n", + " ],\n", " \"threshold\": [\n", " [\n", " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 0,\n", " 3\n", " ],\n", " [\n", @@ -3424,6 +3528,10 @@ " \"p-ratio\": [\n", " [\n", " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 0,\n", " 3\n", " ]\n", " ],\n", @@ -3473,13 +3581,13 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\\\"mean\\\")\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-08-14T14:15:31.575434\",\n", + " \"timestamp\": \"2023-09-26T12:25:19.848126\",\n", " \"comments\": [],\n", " \"exception\": \"It's not disclosive, I promise.\"\n", " },\n", " \"output_6\": {\n", " \"uid\": \"output_6\",\n", - " \"status\": \"pass\",\n", + " \"status\": \"review\",\n", " \"type\": \"table\",\n", " \"properties\": {\n", " \"method\": \"pivot_table\"\n", @@ -3491,14 +3599,47 @@ " \"summary\": {\n", " \"suppressed\": false,\n", " \"negative\": 0,\n", - " \"missing\": 0,\n", + " \"missing\": 8,\n", " \"threshold\": 0,\n", " \"p-ratio\": 0,\n", " \"nk-rule\": 0\n", " },\n", " \"cells\": {\n", " \"negative\": [],\n", - " \"missing\": [],\n", + " \"missing\": [\n", + " [\n", + " 0,\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 1,\n", + " 1\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 1\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 1\n", + " ]\n", + " ],\n", " \"threshold\": [],\n", " \"p-ratio\": [],\n", " \"nk-rule\": []\n", @@ -3508,21 +3649,21 @@ " ],\n", " \"outcome\": {\n", " \"('mean', 'inc_grants')\": {\n", - " \"G\": \"ok\",\n", - " \"N\": \"ok\",\n", - " \"R\": \"ok\",\n", - " \"R/G\": \"ok\"\n", + " \"G\": \"missing\",\n", + " \"N\": \"missing\",\n", + " \"R\": \"missing\",\n", + " \"R/G\": \"missing\"\n", " },\n", " \"('std', 'inc_grants')\": {\n", - " \"G\": \"ok\",\n", - " \"N\": \"ok\",\n", - " \"R\": \"ok\",\n", - " \"R/G\": \"ok\"\n", + " \"G\": \"missing\",\n", + " \"N\": \"missing\",\n", + " \"R\": \"missing\",\n", + " \"R/G\": \"missing\"\n", " }\n", " },\n", " \"command\": \"table = acro.pivot_table(\",\n", - " \"summary\": \"pass\",\n", - " \"timestamp\": \"2023-08-14T14:15:42.742599\",\n", + " \"summary\": \"review; missing values found\",\n", + " \"timestamp\": \"2023-09-26T12:25:19.915731\",\n", " \"comments\": [],\n", " \"exception\": \"I need this one too\"\n", " },\n", @@ -3604,9 +3745,9 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-08-14T14:15:48.940003\",\n", + " \"timestamp\": \"2023-09-26T12:25:19.994753\",\n", " \"comments\": [],\n", - " \"exception\": \"a message\"\n", + " \"exception\": \"yes\"\n", " },\n", " \"output_8\": {\n", " \"uid\": \"output_8\",\n", @@ -3622,7 +3763,7 @@ " \"summary\": {\n", " \"suppressed\": false,\n", " \"negative\": 4,\n", - " \"missing\": 0,\n", + " \"missing\": 8,\n", " \"threshold\": 0,\n", " \"p-ratio\": 0,\n", " \"nk-rule\": 0\n", @@ -3646,7 +3787,40 @@ " 1\n", " ]\n", " ],\n", - " \"missing\": [],\n", + " \"missing\": [\n", + " [\n", + " 0,\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 1\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 1,\n", + " 1\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 1\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 1\n", + " ]\n", + " ],\n", " \"threshold\": [],\n", " \"p-ratio\": [],\n", " \"nk-rule\": []\n", @@ -3670,9 +3844,9 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-08-14T14:15:56.952843\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.122274\",\n", " \"comments\": [],\n", - " \"exception\": \"and another\"\n", + " \"exception\": \"yes\"\n", " },\n", " \"output_9\": {\n", " \"uid\": \"output_9\",\n", @@ -3699,7 +3873,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.ols(y, x)\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-08-14T14:16:00.497443\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.194182\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3728,7 +3902,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.olsr(\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-08-14T14:16:09.552690\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.256095\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3753,7 +3927,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.probit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-08-14T14:16:16.190948\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.316852\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3778,7 +3952,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.logit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-08-14T14:16:20.747689\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.356849\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3794,12 +3968,12 @@ " \"name\": \"pivot_table_0.csv\",\n", " \"sdc\": {\n", " \"summary\": {\n", - " \"suppressed\": false,\n", + " \"suppressed\": true,\n", " \"negative\": 0,\n", " \"missing\": 0,\n", - " \"threshold\": 6,\n", - " \"p-ratio\": 1,\n", - " \"nk-rule\": 1\n", + " \"threshold\": 14,\n", + " \"p-ratio\": 8,\n", + " \"nk-rule\": 7\n", " },\n", " \"cells\": {\n", " \"negative\": [],\n", @@ -3807,39 +3981,123 @@ " \"threshold\": [\n", " [\n", " 0,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", " ],\n", " [\n", " 1,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 1,\n", + " 5\n", " ],\n", " [\n", " 2,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 5\n", " ],\n", " [\n", " 3,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 5\n", " ],\n", " [\n", " 4,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 5\n", " ],\n", " [\n", " 5,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 1\n", + " ],\n", + " [\n", + " 5,\n", + " 5\n", " ]\n", " ],\n", " \"p-ratio\": [\n", " [\n", " 0,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 2\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", " ]\n", " ],\n", " \"nk-rule\": [\n", " [\n", " 0,\n", - " 3\n", + " 0\n", + " ],\n", + " [\n", + " 0,\n", + " 5\n", + " ],\n", + " [\n", + " 1,\n", + " 0\n", + " ],\n", + " [\n", + " 2,\n", + " 0\n", + " ],\n", + " [\n", + " 3,\n", + " 0\n", + " ],\n", + " [\n", + " 4,\n", + " 0\n", + " ],\n", + " [\n", + " 5,\n", + " 0\n", " ]\n", " ]\n", " }\n", @@ -3847,44 +4105,75 @@ " }\n", " ],\n", " \"outcome\": {\n", - " \"G\": {\n", + " \"('G', 'Dead in 2015')\": {\n", + " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2011\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2012\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2013\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2014\": \"threshold; p-ratio; nk-rule; \",\n", + " \"2015\": \"threshold; p-ratio; nk-rule; \",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('G', 'Alive in 2015')\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"threshold; \",\n", + " \"All\": \"ok\"\n", " },\n", - " \"N\": {\n", + " \"('N', 'Alive in 2015')\": {\n", + " \"2010\": \"threshold; p-ratio; \",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('R', 'Dead in 2015')\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " },\n", - " \"R\": {\n", + " \"('R', 'Alive in 2015')\": {\n", " \"2010\": \"ok\",\n", " \"2011\": \"ok\",\n", " \"2012\": \"ok\",\n", " \"2013\": \"ok\",\n", " \"2014\": \"ok\",\n", - " \"2015\": \"ok\"\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " },\n", - " \"R/G\": {\n", + " \"('R/G', 'Alive in 2015')\": {\n", " \"2010\": \"threshold; p-ratio; nk-rule; \",\n", " \"2011\": \"threshold; \",\n", " \"2012\": \"threshold; \",\n", " \"2013\": \"threshold; \",\n", " \"2014\": \"threshold; \",\n", - " \"2015\": \"threshold; \"\n", + " \"2015\": \"threshold; \",\n", + " \"All\": \"ok\"\n", + " },\n", + " \"('All', '')\": {\n", + " \"2010\": \"ok\",\n", + " \"2011\": \"ok\",\n", + " \"2012\": \"ok\",\n", + " \"2013\": \"ok\",\n", + " \"2014\": \"ok\",\n", + " \"2015\": \"ok\",\n", + " \"All\": \"ok\"\n", " }\n", " },\n", - " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", - " \"summary\": \"fail; threshold: 6 cells may need suppressing; p-ratio: 1 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-08-14T14:14:55.245016\",\n", + " \"command\": \"table = acro.crosstab(\",\n", + " \"summary\": \"fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \",\n", + " \"timestamp\": \"2023-09-26T12:25:19.445330\",\n", " \"comments\": [],\n", - " \"exception\": \"and another\"\n", + " \"exception\": \"yes\"\n", " },\n", " \"output_13\": {\n", " \"uid\": \"output_13\",\n", @@ -3900,11 +4189,11 @@ " \"outcome\": {},\n", " \"command\": \"custom\",\n", " \"summary\": \"review\",\n", - " \"timestamp\": \"2023-08-14T14:16:38.781407\",\n", + " \"timestamp\": \"2023-09-26T12:25:20.472783\",\n", " \"comments\": [\n", " \"This output is an image showing the relationship between X and Y\"\n", " ],\n", - " \"exception\": \"image is not disclosive\"\n", + " \"exception\": \"d\"\n", " }\n", " }\n", "}\n" diff --git a/test/test_initial.py b/test/test_initial.py index 716c6aa..aa7430e 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -15,6 +15,7 @@ # pylint: disable=redefined-outer-name PATH: str = "RES_PYTEST" +RUN_TEST = False @pytest.fixture @@ -41,28 +42,6 @@ def test_crosstab_without_suppression(data): assert 48 == output.output[0]["R/G"].sum() -def test_crosstab_multiple_aggregate_function(data, acro): - """Crosstab with multiple agg funcs.""" - acro = ACRO(suppress=False) - - _ = acro.crosstab( - data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"] - ) - output = acro.results.get_index(0) - correct_summary: str = ( - "fail; threshold: 12 cells may need suppressing;" - " p-ratio: 2 cells may need suppressing; " - "nk-rule: 2 cells may need suppressing; " - ) - assert ( - output.summary == correct_summary - ), f"\n{output.summary}\n should be \n{correct_summary}\n" - print(f"{output.output[0]['mean'][ 'R/G'].sum()}") - correctval = 97383496.0 - errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" - assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg - - def test_crosstab_threshold(data, acro): """Crosstab threshold test.""" _ = acro.crosstab(data.year, data.grant_type) @@ -567,14 +546,106 @@ def test_zeros_are_not_disclosive(data, acro): assert output_0.summary == correct_summary -def test_crosstab_with_sum(data, acro): - """Test the crosstab with two columns and aggfunc sum.""" - acro = ACRO(suppress=False) +def test_crosstab_with_totals_without_suppression(data, acro): + """Test the crosstab with margins id true and suppression is false.""" + acro.suppress = False + _ = acro.crosstab(data.year, data.grant_type, margins=True) + output = acro.results.get_index(0) + assert 153 == output.output[0]["All"].iat[0] + + total_rows = output.output[0].iloc[-1, 0:4].sum() + total_cols = output.output[0].loc[2010:2015, "All"].sum() + assert 918 == total_rows == total_cols == output.output[0]["All"].iat[6] + + +def test_crosstab_with_totals_with_suppression(data, acro): + """Test the crosstab with both margins and suprression are true.""" + _ = acro.crosstab(data.year, data.grant_type, margins=True) + output = acro.results.get_index(0) + assert 145 == output.output[0]["All"].iat[0] + + total_rows = output.output[0].iloc[-1, 0:3].sum() + total_cols = output.output[0].loc[2010:2015, "All"].sum() + assert 870 == total_cols == total_rows == output.output[0]["All"].iat[6] + assert "R/G" not in output.output[0].columns + + +def test_crosstab_with_totals_with_suppression_herichical(data, acro): + """Test the crosstab with both margins and suprression are true.""" + _ = acro.crosstab( + [data.year, data.survivor], [data.grant_type, data.status], margins=True + ) + output = acro.results.get_index(0) + assert 47 == output.output[0]["All"].iat[0] + + total_rows = (output.output[0].loc[("All", ""), :].sum()) - output.output[0][ + "All" + ].iat[12] + total_cols = (output.output[0].loc[:, "All"].sum()) - output.output[0]["All"].iat[ + 12 + ] + assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + + +def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): + """Test the crosstab with both margins and suprression are true and with one aggfunc.""" _ = acro.crosstab( data.year, - [data.grant_type, data.survivor], + data.grant_type, values=data.inc_grants, aggfunc="mean", + margins=True, ) output = acro.results.get_index(0) - assert (6, 8) == output.output[0].shape + assert 8689781 == output.output[0]["All"].iat[0] + assert 5425170.5 == output.output[0]["All"].iat[6] + + +def test_crosstab_with_manual_totals_with_suppression(data, acro): + """Test the crosstab with both margins and + suprression are true while using the total manual function. + """ + _ = acro.crosstab(data.year, data.grant_type, margins=True, show_suppressed=True) + output = acro.results.get_index(0) + assert 145 == output.output[0]["All"].iat[0] + + total_rows = output.output[0].iloc[-1, 0:4].sum() + total_cols = output.output[0].loc[2010:2015, "All"].sum() + assert 870 == total_cols == total_rows == output.output[0]["All"].iat[6] + assert "R/G" in output.output[0].columns + + +if RUN_TEST: + + def test_crosstab_with_sum(data, acro): + """Test the crosstab with two columns and aggfunc sum.""" + acro = ACRO(suppress=False) + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="mean", + ) + output = acro.results.get_index(0) + assert (6, 8) == output.output[0].shape + + def test_crosstab_multiple_aggregate_function(data, acro): + """Crosstab with multiple agg funcs.""" + acro = ACRO(suppress=False) + + _ = acro.crosstab( + data.year, data.grant_type, values=data.inc_grants, aggfunc=["mean", "std"] + ) + output = acro.results.get_index(0) + correct_summary: str = ( + "fail; threshold: 12 cells may need suppressing;" + " p-ratio: 2 cells may need suppressing; " + "nk-rule: 2 cells may need suppressing; " + ) + assert ( + output.summary == correct_summary + ), f"\n{output.summary}\n should be \n{correct_summary}\n" + print(f"{output.output[0]['mean'][ 'R/G'].sum()}") + correctval = 97383496.0 + errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" + assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg From 39b845c0d0f9f655fdfbde591c1936827eff1759 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Tue, 26 Sep 2023 16:51:11 +0100 Subject: [PATCH 06/14] fixing pylint errors --- acro/acro_tables.py | 207 +++++++++++++++++++++++++------------------- 1 file changed, 116 insertions(+), 91 deletions(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 83e7f32..54ba7fa 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -134,93 +134,18 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals normalize, ) - # suppression masks to apply based on the following checks - masks: dict[str, DataFrame] = {} - - if agg_func is not None: - # create lists with single entry for when there is only one aggfunc - count_funcs: list[str] = [AGGFUNC["count"]] - neg_funcs: list[Callable] = [agg_negative] - pperc_funcs: list[Callable] = [agg_p_percent] - nk_funcs: list[Callable] = [agg_nk] - missing_funcs: list[Callable] = [agg_missing] - # then expand them to deal with extra columns as needed - if isinstance(agg_func, list): - num = len(agg_func) - count_funcs.extend([AGGFUNC["count"] for i in range(1, num)]) - neg_funcs.extend([agg_negative for i in range(1, num)]) - pperc_funcs.extend([agg_p_percent for i in range(1, num)]) - nk_funcs.extend([agg_nk for i in range(1, num)]) - missing_funcs.extend([agg_missing for i in range(1, num)]) - # threshold check- doesn't matter what we pass for value - - t_values = pd.crosstab( # type: ignore - index, - columns, - values=values, - rownames=rownames, - colnames=colnames, - aggfunc=count_funcs, - margins=margins, - margins_name=margins_name, - dropna=dropna, - normalize=normalize, - ) - - if dropna or margins: - for col in t_values.columns: - if t_values[col].sum() == 0: - t_values = t_values.drop(col, axis=1) - t_values = t_values < THRESHOLD - masks["threshold"] = t_values - # check for negative values -- currently unsupported - negative = pd.crosstab( # type: ignore - index, columns, values, aggfunc=neg_funcs, margins=margins - ) - if negative.to_numpy().sum() > 0: - masks["negative"] = negative - # p-percent check - masks["p-ratio"] = pd.crosstab( # type: ignore - index, - columns, - values, - aggfunc=pperc_funcs, - margins=margins, - dropna=dropna, - ) - # nk values check - masks["nk-rule"] = pd.crosstab( # type: ignore - index, columns, values, aggfunc=nk_funcs, margins=margins, dropna=dropna - ) - # check for missing values -- currently unsupported - if CHECK_MISSING_VALUES: - masks["missing"] = pd.crosstab( # type: ignore - index, columns, values, aggfunc=missing_funcs, margins=margins - ) - else: - # threshold check- doesn't matter what we pass for value - t_values = pd.crosstab( # type: ignore - index, - columns, - values=None, - rownames=rownames, - colnames=colnames, - aggfunc=None, - margins=margins, - margins_name=margins_name, - dropna=dropna, - normalize=normalize, - ) - t_values = t_values < THRESHOLD - masks["threshold"] = t_values - - # pd.crosstab returns nan for an empty cell - for name, mask in masks.items(): - mask.fillna(value=1, inplace=True) - mask = mask.astype(int) - mask.replace({0: False, 1: True}, inplace=True) - masks[name] = mask - + masks = create_crosstab_masks( + index, + columns, + values, + rownames, + colnames, + agg_func, + margins, + margins_name, + dropna, + normalize, + ) # build the sdc dictionary sdc: dict = get_table_sdc(masks, self.suppress) # get the status and summary @@ -556,6 +481,107 @@ def plot( # pylint: disable=too-many-arguments,too-many-locals return plot +def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals + index, + columns, + values, + rownames, + colnames, + agg_func, + margins, + margins_name, + dropna, + normalize, +): + # suppression masks to apply based on the following checks + masks: dict[str, DataFrame] = {} + + if agg_func is not None: + # create lists with single entry for when there is only one aggfunc + count_funcs: list[str] = [AGGFUNC["count"]] + neg_funcs: list[Callable] = [agg_negative] + pperc_funcs: list[Callable] = [agg_p_percent] + nk_funcs: list[Callable] = [agg_nk] + missing_funcs: list[Callable] = [agg_missing] + # then expand them to deal with extra columns as needed + if isinstance(agg_func, list): + num = len(agg_func) + count_funcs.extend([AGGFUNC["count"] for i in range(1, num)]) + neg_funcs.extend([agg_negative for i in range(1, num)]) + pperc_funcs.extend([agg_p_percent for i in range(1, num)]) + nk_funcs.extend([agg_nk for i in range(1, num)]) + missing_funcs.extend([agg_missing for i in range(1, num)]) + # threshold check- doesn't matter what we pass for value + + t_values = pd.crosstab( # type: ignore + index, + columns, + values=values, + rownames=rownames, + colnames=colnames, + aggfunc=count_funcs, + margins=margins, + margins_name=margins_name, + dropna=dropna, + normalize=normalize, + ) + + if dropna or margins: + for col in t_values.columns: + if t_values[col].sum() == 0: + t_values = t_values.drop(col, axis=1) + t_values = t_values < THRESHOLD + masks["threshold"] = t_values + # check for negative values -- currently unsupported + negative = pd.crosstab( # type: ignore + index, columns, values, aggfunc=neg_funcs, margins=margins + ) + if negative.to_numpy().sum() > 0: + masks["negative"] = negative + # p-percent check + masks["p-ratio"] = pd.crosstab( # type: ignore + index, + columns, + values, + aggfunc=pperc_funcs, + margins=margins, + dropna=dropna, + ) + # nk values check + masks["nk-rule"] = pd.crosstab( # type: ignore + index, columns, values, aggfunc=nk_funcs, margins=margins, dropna=dropna + ) + # check for missing values -- currently unsupported + if CHECK_MISSING_VALUES: + masks["missing"] = pd.crosstab( # type: ignore + index, columns, values, aggfunc=missing_funcs, margins=margins + ) + else: + # threshold check- doesn't matter what we pass for value + t_values = pd.crosstab( # type: ignore + index, + columns, + values=None, + rownames=rownames, + colnames=colnames, + aggfunc=None, + margins=margins, + margins_name=margins_name, + dropna=dropna, + normalize=normalize, + ) + t_values = t_values < THRESHOLD + masks["threshold"] = t_values + + # pd.crosstab returns nan for an empty cell + for name, mask in masks.items(): + mask.fillna(value=1, inplace=True) + mask = mask.astype(int) + mask.replace({0: False, 1: True}, inplace=True) + masks[name] = mask + return masks + + def rounded_survival_table(survival_table): """Calculates the rounded surival function.""" death_censored = ( @@ -719,7 +745,6 @@ def agg_p_percent(vals: Series) -> bool: whether the p percent rule is violated. """ assert isinstance(vals, Series), "vals is not a pandas series" - logger.debug(f"vals is {vals} with size {vals.size}") sorted_vals = vals.sort_values(ascending=False) total: float = sorted_vals.sum() if total <= 0.0 or vals.size <= 1: @@ -1041,7 +1066,7 @@ def get_index_columns(index, columns, data) -> tuple[list | Series, list | Serie return index_new, columns_new -def crosstab_with_totals( +def crosstab_with_totals( # pylint: disable=too-many-arguments,too-many-locals masks, aggfunc, index, @@ -1131,7 +1156,7 @@ def crosstab_with_totals( return table -def manual_crossstab_with_totals( +def manual_crossstab_with_totals( # pylint: disable=too-many-arguments,too-many-locals table, aggfunc, index, @@ -1188,7 +1213,7 @@ def manual_crossstab_with_totals( "Please create a table for each aggregation function" ) return None - elif aggfunc is None or aggfunc == "sum" or aggfunc == "count": + if aggfunc is None or aggfunc == "sum" or aggfunc == "count": table = recalculate_margin(table, margins_name) elif aggfunc == "mean": From 9615417f1cc7b00fad1412f4f9c21633214d95b5 Mon Sep 17 00:00:00 2001 From: Maha Albashir Date: Tue, 26 Sep 2023 16:56:10 +0100 Subject: [PATCH 07/14] adding docstring --- acro/acro_tables.py | 1 + 1 file changed, 1 insertion(+) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 54ba7fa..700dd51 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -493,6 +493,7 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals dropna, normalize, ): + """Creates masks to specify the cells to suppress""" # suppression masks to apply based on the following checks masks: dict[str, DataFrame] = {} From 09b56d6b70c64d77e6aa1b7b72a4064684610633 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 15:57:33 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- acro/acro_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 700dd51..9d0f328 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -493,7 +493,7 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals dropna, normalize, ): - """Creates masks to specify the cells to suppress""" + """Creates masks to specify the cells to suppress.""" # suppression masks to apply based on the following checks masks: dict[str, DataFrame] = {} From d1b4d05a62fbca440317d92ab26813d8473267ea Mon Sep 17 00:00:00 2001 From: Jim-smith Date: Tue, 26 Sep 2023 19:57:08 +0100 Subject: [PATCH 09/14] linting over-rode too-many-lines while see which version the community prefers Signed-off-by: Jim-smith --- acro/acro_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 9d0f328..2702be3 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -1,5 +1,5 @@ """ACRO: Tables functions.""" - +# pylint: disable=too-many-lines from __future__ import annotations import logging From b4514f84baa0d8f67aca27659a1fe454a2c00ede Mon Sep 17 00:00:00 2001 From: mahaalbashir Date: Wed, 27 Sep 2023 15:43:23 +0100 Subject: [PATCH 10/14] adding more tests --- notebooks/test.ipynb | 377 ++++++++++++------------------------------- test/test_initial.py | 102 ++++++++++++ 2 files changed, 203 insertions(+), 276 deletions(-) diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index 1bc6777..a82dd96 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -929,198 +929,29 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.01.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502247.0124013.8593757716880.016047500.05303808.01.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.02.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.517845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.405324e+07
\n", - "
" - ], - "text/plain": [ - " mean \\\n", - "grant_type G N R R/G All \n", - "year \n", - "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.0 \n", - "2011 8502247.0 124013.859375 7716880.0 16047500.0 5303808.0 \n", - "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.0 \n", - "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", - "2014 13748147.0 133198.250000 8277525.5 17845750.0 6117054.5 \n", - "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", - "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", - "\n", - " std \\\n", - "grant_type G N R R/G \n", - "year \n", - "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", - "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", - "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", - "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", - "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", - "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", - "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", - "\n", - " \n", - "grant_type All \n", - "year \n", - "2010 2.727398e+07 \n", - "2011 2.137658e+07 \n", - "2012 2.026400e+07 \n", - "2013 2.251787e+07 \n", - "2014 2.641722e+07 \n", - "2015 2.784636e+07 \n", - "All 2.405324e+07 " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172: PerformanceWarning: indexing past lexsort depth may impact performance.\n", + " if t_values[col].sum() == 0:\n" + ] + }, + { + "ename": "ValueError", + "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 21\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m safe_table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 2\u001b[0m df\u001b[39m.\u001b[39;49myear, df\u001b[39m.\u001b[39;49mgrant_type, values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants, aggfunc\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mmean\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstd\u001b[39;49m\u001b[39m\"\u001b[39;49m], margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m safe_table\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize, show_suppressed)\u001b[0m\n\u001b[0;32m 170\u001b[0m \u001b[39mif\u001b[39;00m dropna \u001b[39mor\u001b[39;00m margins:\n\u001b[0;32m 171\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m t_values\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m--> 172\u001b[0m \u001b[39mif\u001b[39;00m t_values[col]\u001b[39m.\u001b[39;49msum() \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m:\n\u001b[0;32m 173\u001b[0m t_values \u001b[39m=\u001b[39m t_values\u001b[39m.\u001b[39mdrop(col, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 174\u001b[0m t_values \u001b[39m=\u001b[39m t_values \u001b[39m<\u001b[39m THRESHOLD\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\generic.py:1527\u001b[0m, in \u001b[0;36mNDFrame.__nonzero__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1525\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[0;32m 1526\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__nonzero__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1528\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe truth value of a \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m is ambiguous. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1529\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mUse a.empty, a.bool(), a.item(), a.any() or a.all().\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1530\u001b[0m )\n", + "\u001b[1;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ] } ], "source": [ - "safe_table = pd.crosstab(\n", + "safe_table = acro.crosstab(\n", " df.year, df.grant_type, values=df.inc_grants, aggfunc=[\"mean\", \"std\"], margins=True\n", ")\n", "safe_table" @@ -1136,7 +967,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "bf132239", "metadata": {}, "outputs": [ @@ -1271,7 +1102,7 @@ "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1290,7 +1121,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "7cc417a0", "metadata": {}, "outputs": [], @@ -1308,7 +1139,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "15bcdc7c", "metadata": {}, "outputs": [ @@ -1425,7 +1256,7 @@ "2015 11133433.0 146572.015625 10388613.0 18278624.0" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1448,7 +1279,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "6d4730c4", "metadata": {}, "outputs": [ @@ -1546,7 +1377,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1568,7 +1399,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "f3a87c20", "metadata": {}, "outputs": [ @@ -1666,7 +1497,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1684,7 +1515,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "8b603548", "metadata": {}, "outputs": [], @@ -1702,7 +1533,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "de4266cd-b4d4-417b-ae44-5d972e8bfdde", "metadata": {}, "outputs": [ @@ -1800,7 +1631,7 @@ "R/G 1.664827e+07 1.583532e+07" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1824,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "a521cb83", "metadata": {}, "outputs": [ @@ -1860,7 +1691,7 @@ " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -14495. \n", + " Time: 16:44:14 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -1916,7 +1747,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 12:25:20 Log-Likelihood: -14495.\n", + "Time: 16:44:14 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -1942,7 +1773,7 @@ "\"\"\"" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1969,7 +1800,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "cc90f7c9", "metadata": {}, "outputs": [ @@ -1977,7 +1808,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n", + "INFO:acro:olsr() outcome: pass; dof=807.0 >= 10\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "INFO:acro:records:add(): output_10\n" ] }, @@ -1999,7 +1836,7 @@ " Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -14495. \n", + " Time: 16:44:14 Log-Likelihood: -14495. \n", "\n", "\n", " No. Observations: 811 AIC: 2.900e+04\n", @@ -2055,7 +1892,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.00\n", - "Time: 12:25:20 Log-Likelihood: -14495.\n", + "Time: 16:44:14 Log-Likelihood: -14495.\n", "No. Observations: 811 AIC: 2.900e+04\n", "Df Residuals: 807 BIC: 2.902e+04\n", "Df Model: 3 \n", @@ -2081,7 +1918,7 @@ "\"\"\"" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2103,7 +1940,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "5b1a1611", "metadata": {}, "outputs": [ @@ -2111,13 +1948,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "INFO:acro:probit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_11\n" ] }, @@ -2148,7 +1979,7 @@ " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -400.46 \n", + " Time: 16:44:15 Log-Likelihood: -400.46 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2187,7 +2018,7 @@ "Model: Probit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2140\n", - "Time: 12:25:20 Log-Likelihood: -400.46\n", + "Time: 16:44:15 Log-Likelihood: -400.46\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.875e-46\n", "=================================================================================\n", @@ -2206,7 +2037,7 @@ "\"\"\"" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2234,7 +2065,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "dcf30f8f", "metadata": {}, "outputs": [ @@ -2242,13 +2073,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "INFO:acro:logit() outcome: pass; dof=806.0 >= 10\n", "INFO:acro:records:add(): output_12\n" ] }, @@ -2279,7 +2104,7 @@ " Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187 \n", "\n", "\n", - " Time: 12:25:20 Log-Likelihood: -398.07 \n", + " Time: 16:44:15 Log-Likelihood: -398.07 \n", "\n", "\n", " converged: True LL-Null: -509.50 \n", @@ -2318,7 +2143,7 @@ "Model: Logit Df Residuals: 806\n", "Method: MLE Df Model: 4\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 0.2187\n", - "Time: 12:25:20 Log-Likelihood: -398.07\n", + "Time: 16:44:15 Log-Likelihood: -398.07\n", "converged: True LL-Null: -509.50\n", "Covariance Type: nonrobust LLR p-value: 4.532e-47\n", "=================================================================================\n", @@ -2337,7 +2162,7 @@ "\"\"\"" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2357,7 +2182,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "ec960039", "metadata": { "scrolled": true @@ -2390,7 +2215,7 @@ "2013 15 59 71 8\n", "2014 15 59 71 8\n", "2015 15 59 71 8]\n", - "timestamp: 2023-09-26T12:25:19.155653\n", + "timestamp: 2023-09-26T16:43:37.007348\n", "comments: []\n", "exception: \n", "\n", @@ -2417,7 +2242,7 @@ "2013 13557147.0 147937.796875 7202273.5 NaN\n", "2014 13748147.0 133198.250000 8277525.5 NaN\n", "2015 11133433.0 146572.187500 10812888.0 NaN]\n", - "timestamp: 2023-09-26T12:25:19.230978\n", + "timestamp: 2023-09-26T16:43:37.137368\n", "comments: []\n", "exception: \n", "\n", @@ -2459,7 +2284,7 @@ "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", - "timestamp: 2023-09-26T12:25:19.445330\n", + "timestamp: 2023-09-26T16:43:37.469934\n", "comments: []\n", "exception: \n", "\n", @@ -2486,7 +2311,7 @@ "2013 13557147.0 147937.796875 7202273.5 16765625.0\n", "2014 13748147.0 133198.250000 8277525.5 17845750.0\n", "2015 11133433.0 146572.187500 10812888.0 18278624.0]\n", - "timestamp: 2023-09-26T12:25:19.553369\n", + "timestamp: 2023-09-26T16:43:37.654102\n", "comments: []\n", "exception: \n", "\n", @@ -2515,7 +2340,7 @@ "2014 13748147.0 135494.781250 8118565.0 17845750.0 6072600.0\n", "2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0\n", "All 11412787.0 136158.859375 8006360.5 16648273.0 5968295.5]\n", - "timestamp: 2023-09-26T12:25:19.750575\n", + "timestamp: 2023-09-26T16:44:04.817429\n", "comments: []\n", "exception: \n", "\n", @@ -2542,7 +2367,7 @@ "2013 13557147.0 147937.625000 6988263.5 16765625.0\n", "2014 13748147.0 133198.078125 7997392.5 17845750.0\n", "2015 11133433.0 146572.015625 10388613.0 18278624.0]\n", - "timestamp: 2023-09-26T12:25:19.848126\n", + "timestamp: 2023-09-26T16:44:14.222443\n", "comments: []\n", "exception: \n", "\n", @@ -2567,7 +2392,7 @@ "N 1.344319e+05 1.988737e+05\n", "R 8.098502e+06 3.204495e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.915731\n", + "timestamp: 2023-09-26T16:44:14.374409\n", "comments: []\n", "exception: \n", "\n", @@ -2592,7 +2417,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.994753\n", + "timestamp: 2023-09-26T16:44:14.514747\n", "comments: []\n", "exception: \n", "\n", @@ -2617,7 +2442,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:20.122274\n", + "timestamp: 2023-09-26T16:44:14.676251\n", "comments: []\n", "exception: \n", "\n", @@ -2636,7 +2461,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 12:25:20 Log-Likelihood: -14495.000\n", + "Time: 16:44:14 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2649,7 +2474,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-26T12:25:20.194182\n", + "timestamp: 2023-09-26T16:44:14.841394\n", "comments: []\n", "exception: \n", "\n", @@ -2668,7 +2493,7 @@ "Model: OLS Adj. R-squared: 0.893\n", "Method: Least Squares F-statistic: 2261.000\n", "Date: Tue, 26 Sep 2023 Prob (F-statistic): 0.000\n", - "Time: 12:25:20 Log-Likelihood: -14495.000\n", + "Time: 16:44:14 Log-Likelihood: -14495.000\n", "No. Observations: 811 AIC: 29000.000\n", "Df Residuals: 807 BIC: 29020.000\n", "Df Model: 3 NaN NaN\n", @@ -2681,7 +2506,7 @@ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1.253318e+06\n", "Skew: 9.899 Prob(JB): 0.000000e+00\n", "Kurtosis: 194.566 Cond. No. 1.050000e+08]\n", - "timestamp: 2023-09-26T12:25:20.256095\n", + "timestamp: 2023-09-26T16:44:14.939403\n", "comments: []\n", "exception: \n", "\n", @@ -2700,7 +2525,7 @@ "Model: Probit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.140000e-01\n", - "Time: 12:25:20 Log-Likelihood: -4.004600e+02\n", + "Time: 16:44:15 Log-Likelihood: -4.004600e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.875000e-46, coef std err z P>|z| [0.025 \\\n", "const 4.740000e-02 5.700000e-02 0.838 0.402 -6.300000e-02 \n", @@ -2715,7 +2540,7 @@ "inc_grants 1.620000e-07 \n", "inc_donations 3.300000e-07 \n", "total_costs -1.440000e-08 ]\n", - "timestamp: 2023-09-26T12:25:20.316852\n", + "timestamp: 2023-09-26T16:44:15.025370\n", "comments: []\n", "exception: \n", "\n", @@ -2734,7 +2559,7 @@ "Model: Logit Df Residuals: 8.060000e+02\n", "Method: MLE Df Model: 4.000000e+00\n", "Date: Tue, 26 Sep 2023 Pseudo R-squ.: 2.187000e-01\n", - "Time: 12:25:20 Log-Likelihood: -3.980700e+02\n", + "Time: 16:44:15 Log-Likelihood: -3.980700e+02\n", "converged: True LL-Null: -5.095000e+02\n", "Covariance Type: nonrobust LLR p-value: 4.532000e-47, coef std err z P>|z| [0.025 \\\n", "const 5.120000e-02 9.100000e-02 0.561 0.575 -1.280000e-01 \n", @@ -2749,7 +2574,7 @@ "inc_grants 2.660000e-07 \n", "inc_donations 7.160000e-07 \n", "total_costs -2.150000e-08 ]\n", - "timestamp: 2023-09-26T12:25:20.356849\n", + "timestamp: 2023-09-26T16:44:15.133359\n", "comments: []\n", "exception: \n", "\n", @@ -2771,7 +2596,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "b1f77749", "metadata": {}, "outputs": [ @@ -2799,7 +2624,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "45ec04ef", "metadata": {}, "outputs": [ @@ -2825,7 +2650,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "0c826271", "metadata": {}, "outputs": [ @@ -2853,7 +2678,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "2816eac7", "metadata": {}, "outputs": [ @@ -2881,7 +2706,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "f38b4334", "metadata": {}, "outputs": [ @@ -2913,7 +2738,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "9e554eea", "metadata": {}, "outputs": [ @@ -2943,7 +2768,7 @@ "N 1.364700e+05 1.999335e+05\n", "R 8.006360e+06 3.228216e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:19.994753\n", + "timestamp: 2023-09-26T16:44:14.514747\n", "comments: []\n", "exception: \n", "\n", @@ -2972,7 +2797,7 @@ "N 1.341800e+05 1.990196e+05\n", "R 7.882231e+06 3.204558e+07\n", "R/G 1.664827e+07 1.583532e+07]\n", - "timestamp: 2023-09-26T12:25:20.122274\n", + "timestamp: 2023-09-26T16:44:14.676251\n", "comments: []\n", "exception: \n", "\n", @@ -3018,7 +2843,7 @@ "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5]\n", - "timestamp: 2023-09-26T12:25:19.445330\n", + "timestamp: 2023-09-26T16:43:37.469934\n", "comments: []\n", "exception: \n", "\n", @@ -3037,7 +2862,7 @@ "Columns: []\n", "Index: []\n", "output: ['XandY.jpeg']\n", - "timestamp: 2023-09-26T12:25:20.472783\n", + "timestamp: 2023-09-26T16:44:15.336345\n", "comments: ['This output is an image showing the relationship between X and Y']\n", "exception: \n", "\n", @@ -3065,7 +2890,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "f78b5a08", "metadata": {}, "outputs": [ @@ -3116,7 +2941,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "df2a02e0", "metadata": {}, "outputs": [ @@ -3167,7 +2992,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "56d2b6a1", "metadata": {}, "outputs": [ @@ -3268,7 +3093,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type)\",\n", " \"summary\": \"fail; threshold: 6 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.155653\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.007348\",\n", " \"comments\": [\n", " \"This is a cross table between year and grant_type\",\n", " \"6 cells were suppressed in this table\"\n", @@ -3383,7 +3208,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc=\\\"mean\\\")\",\n", " \"summary\": \"fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.553369\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.654102\",\n", " \"comments\": [],\n", " \"exception\": \"This one is safe. Trust me, I'm a professor.\"\n", " },\n", @@ -3581,7 +3406,7 @@ " },\n", " \"command\": \"safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc=\\\"mean\\\")\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.848126\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.222443\",\n", " \"comments\": [],\n", " \"exception\": \"It's not disclosive, I promise.\"\n", " },\n", @@ -3663,7 +3488,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.915731\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.374409\",\n", " \"comments\": [],\n", " \"exception\": \"I need this one too\"\n", " },\n", @@ -3745,7 +3570,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; missing values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:19.994753\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.514747\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -3844,7 +3669,7 @@ " },\n", " \"command\": \"table = acro.pivot_table(\",\n", " \"summary\": \"review; negative values found\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.122274\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.676251\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -3873,7 +3698,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.ols(y, x)\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.194182\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.841394\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3902,7 +3727,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.olsr(\",\n", " \"summary\": \"pass; dof=807.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.256095\",\n", + " \"timestamp\": \"2023-09-26T16:44:14.939403\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3927,7 +3752,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.probit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.316852\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.025370\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -3952,7 +3777,7 @@ " \"outcome\": {},\n", " \"command\": \"results = acro.logit(y, x)\",\n", " \"summary\": \"pass; dof=806.0 >= 10\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.356849\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.133359\",\n", " \"comments\": [],\n", " \"exception\": \"\"\n", " },\n", @@ -4171,7 +3996,7 @@ " },\n", " \"command\": \"table = acro.crosstab(\",\n", " \"summary\": \"fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \",\n", - " \"timestamp\": \"2023-09-26T12:25:19.445330\",\n", + " \"timestamp\": \"2023-09-26T16:43:37.469934\",\n", " \"comments\": [],\n", " \"exception\": \"yes\"\n", " },\n", @@ -4189,11 +4014,11 @@ " \"outcome\": {},\n", " \"command\": \"custom\",\n", " \"summary\": \"review\",\n", - " \"timestamp\": \"2023-09-26T12:25:20.472783\",\n", + " \"timestamp\": \"2023-09-26T16:44:15.336345\",\n", " \"comments\": [\n", " \"This output is an image showing the relationship between X and Y\"\n", " ],\n", - " \"exception\": \"d\"\n", + " \"exception\": \"yes\"\n", " }\n", " }\n", "}\n" diff --git a/test/test_initial.py b/test/test_initial.py index aa7430e..1071ce2 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -585,6 +585,7 @@ def test_crosstab_with_totals_with_suppression_herichical(data, acro): 12 ] assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + assert ("G", "dead") not in output.output[0].columns def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): @@ -599,6 +600,7 @@ def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): output = acro.results.get_index(0) assert 8689781 == output.output[0]["All"].iat[0] assert 5425170.5 == output.output[0]["All"].iat[6] + assert "R/G" not in output.output[0].columns def test_crosstab_with_manual_totals_with_suppression(data, acro): @@ -615,6 +617,87 @@ def test_crosstab_with_manual_totals_with_suppression(data, acro): assert "R/G" in output.output[0].columns +def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro): + """Test the crosstab with both margins and suprression + are true with multilevel index and columns while using the total manual function. + """ + _ = acro.crosstab( + [data.year, data.survivor], + [data.grant_type, data.status], + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 47 == output.output[0]["All"].iat[0] + + total_rows = (output.output[0].loc[("All", ""), :].sum()) - output.output[0][ + "All" + ].iat[12] + total_cols = (output.output[0].loc[:, "All"].sum()) - output.output[0]["All"].iat[ + 12 + ] + assert total_cols == total_rows == output.output[0]["All"].iat[12] == 852 + assert ("G", "dead") in output.output[0].columns + + +def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, acro): + """Test the crosstab with both margins and suprression are true + and with aggfunc mean while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="mean", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 8689780 == round(output.output[0]["All"].iat[0]) + assert 5425170 == round(output.output[0]["All"].iat[6]) + assert "R/G" in output.output[0].columns + + +def test_herichical_crosstab_with_manual_totals_with_mean(data, acro): + """Test the crosstab with both margins and suprression are true,with + aggfunc mean and with multilevel columns and rows while using the total manual function. + """ + _ = acro.crosstab( + [data.year, data.survivor], + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="mean", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert 1385162 == round(output.output[0]["All"].iat[0]) + assert 5434959 == round(output.output[0]["All"].iat[12]) + assert ("G", "Dead in 2015") in output.output[0].columns + + +def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_std( + data, acro, caplog +): + """Test the crosstab with both margins and suprression are true and with + aggfunc std while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="std", + margins=True, + show_suppressed=True, + ) + output = acro.results.get_index(0) + assert "All" not in output.output[0].columns + assert ( + "The margins with the std agg func can not be calculated. " + "Please set the show_suppressed to false to calculate it." in caplog.text + ) + + if RUN_TEST: def test_crosstab_with_sum(data, acro): @@ -649,3 +732,22 @@ def test_crosstab_multiple_aggregate_function(data, acro): correctval = 97383496.0 errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg + + def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( + data, acro, caplog + ): + """Test the crosstab with both margins and suprression are true + and with a list of aggfuncs while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + show_suppressed=True, + ) + assert ( + "We can not calculate the margins with a list of aggregation functions. " + "Please create a table for each aggregation function" in caplog.text + ) From 6daf8515e1e91823c824893fcab82f9d8549c0c6 Mon Sep 17 00:00:00 2001 From: mahaalbashir Date: Thu, 28 Sep 2023 04:52:16 +0100 Subject: [PATCH 11/14] adding tests and fixing issues --- acro/acro_tables.py | 56 +- notebooks/test.ipynb | 1532 +++++++++++++++++++++++++++++++++++++++--- test/test_initial.py | 81 ++- 3 files changed, 1530 insertions(+), 139 deletions(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 2702be3..7051eaa 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -541,12 +541,7 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals masks["negative"] = negative # p-percent check masks["p-ratio"] = pd.crosstab( # type: ignore - index, - columns, - values, - aggfunc=pperc_funcs, - margins=margins, - dropna=dropna, + index, columns, values, aggfunc=pperc_funcs, margins=margins, dropna=dropna ) # nk values check masks["nk-rule"] = pd.crosstab( # type: ignore @@ -837,7 +832,7 @@ def apply_suppression( outcome_df += tmp_df except TypeError: logger.warning("problem mask %s is not binary", name) - except ValueError as error: + except ValueError as error: # pragma: no cove error_message = ( f"An error occurred with the following details" f":\n Name: {name}\n Mask: {mask}\n Table: {table}" @@ -949,53 +944,42 @@ def get_queries(masks, aggfunc) -> list[str]: # identify level names for rows and columns index_level_names = mask.index.names column_level_names = mask.columns.names - # iterate through the masks to identify the true cells and extract queries - for column_level_values in mask.columns: - for index_level_values in mask.index: - if mask.loc[index_level_values, column_level_values]: - if isinstance(index_level_values, tuple): + for col_index, col_label in enumerate(mask.columns): + for row_index, row_label in enumerate(mask.index): + if mask.iloc[row_index, col_index]: + if isinstance(row_label, tuple): index_query = " & ".join( [ f"({level} == {val})" if isinstance(val, (int, float)) else f'({level} == "{val}")' - for level, val in zip( - index_level_names, index_level_values - ) + for level, val in zip(index_level_names, row_label) ] ) else: index_query = " & ".join( [ - f"({index_level_names} == {index_level_values})" - if isinstance(index_level_values, (int, float)) - else ( - f"({index_level_names}" - f'== "{index_level_values}")' - ) + f"({index_level_names} == {row_label})" + if isinstance(row_label, (int, float)) + else (f"({index_level_names}" f'== "{row_label}")') ] ) - if isinstance(column_level_values, tuple): + if isinstance(col_label, tuple): column_query = " & ".join( [ f"({level} == {val})" if isinstance(val, (int, float)) else f'({level} == "{val}")' - for level, val in zip( - column_level_names, column_level_values - ) + for level, val in zip(column_level_names, col_label) ] ) else: column_query = " & ".join( [ - f"({column_level_names} == {column_level_values})" - if isinstance(column_level_values, (int, float)) - else ( - f"({column_level_names}" - f'== "{column_level_values}")' - ) + f"({column_level_names} == {col_label})" + if isinstance(col_label, (int, float)) + else (f"({column_level_names}" f'== "{col_label}")') ] ) query = f"{index_query} & {column_query}" @@ -1051,17 +1035,19 @@ def get_index_columns(index, columns, data) -> tuple[list | Series, list | Serie List | Series The columns extracted from the data. """ + shift = 1 if isinstance(index, list): index_new = [] - for _, val in enumerate(index): - index_new.append(data[val.name]) + for i in range(len(index)): + index_new.append(data.iloc[:, i]) + shift = len(index) else: index_new = data[index.name] if isinstance(columns, list): columns_new = [] - for _, val in enumerate(columns): - columns_new.append(data[val.name]) + for i in range(shift, shift + len(columns)): + columns_new.append(data.iloc[:, i]) else: columns_new = data[columns.name] return index_new, columns_new diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index a82dd96..eb49438 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "e33fd4fb", "metadata": {}, "outputs": [], @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "c01cfe12", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "cc8d993a", "metadata": { "scrolled": true @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "4b8a77e2", "metadata": {}, "outputs": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "8722735f", "metadata": { "scrolled": true @@ -296,7 +296,7 @@ "[5 rows x 44 columns]" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -317,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "961684cb", "metadata": {}, "outputs": [ @@ -413,7 +413,7 @@ "2015 15 59 71 8" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "bb4b2677", "metadata": { "scrolled": true @@ -443,7 +443,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 6 cells may need suppressing; \n", + "INFO:acro:get_summary(): fail; threshold: 6 cells suppressed; \n", "INFO:acro:outcome_df:\n", "---------------------------------------|\n", "grant_type |G |N |R |R/G |\n", @@ -457,7 +457,7 @@ "2015 | ok | ok | ok | threshold; |\n", "---------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_0\n" + "INFO:acro:records:add(): output_1\n" ] }, { @@ -500,42 +500,42 @@ " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", " 2011\n", " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", " 2012\n", " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", " 2013\n", " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", " 2014\n", " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", " 2015\n", " 15\n", " 59\n", " 71\n", - " 8\n", + " NaN\n", " \n", " \n", "\n", @@ -544,15 +544,15 @@ "text/plain": [ "grant_type G N R R/G\n", "year \n", - "2010 15 59 71 8\n", - "2011 15 59 71 8\n", - "2012 15 59 71 8\n", - "2013 15 59 71 8\n", - "2014 15 59 71 8\n", - "2015 15 59 71 8" + "2010 15 59 71 NaN\n", + "2011 15 59 71 NaN\n", + "2012 15 59 71 NaN\n", + "2013 15 59 71 NaN\n", + "2014 15 59 71 NaN\n", + "2015 15 59 71 NaN" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "37ddb939", "metadata": {}, "outputs": [ @@ -580,21 +580,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", + "INFO:acro:get_summary(): fail; threshold: 6 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed; \n", "INFO:acro:outcome_df:\n", - "---------------------------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "---------------------------------------------------------------------------|\n", - "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", - "2011 | ok | ok | ok | threshold; |\n", - "2012 | ok | ok | ok | threshold; |\n", - "2013 | ok | ok | ok | threshold; |\n", - "2014 | ok | ok | ok | threshold; |\n", - "2015 | ok | ok | ok | threshold; |\n", - "---------------------------------------------------------------------------|\n", + "----------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |\n", + "year | | | | |\n", + "----------------------------------------------------------------|\n", + "2010 | ok | p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", + "2011 | ok | ok | ok | threshold; |\n", + "2012 | ok | ok | ok | threshold; |\n", + "2013 | ok | ok | ok | threshold; |\n", + "2014 | ok | ok | ok | threshold; |\n", + "2015 | ok | ok | ok | threshold; |\n", + "----------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_1\n" + "INFO:acro:records:add(): output_2\n" ] }, { @@ -689,7 +689,7 @@ "2015 11133433.0 146572.187500 10812888.0 NaN" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -711,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "id": "ef42beb6", "metadata": {}, "outputs": [ @@ -719,49 +719,1191 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 14 cells suppressed; p-ratio: 8 cells suppressed; nk-rule: 7 cells suppressed; \n", - "INFO:acro:outcome_df:\n", - "------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |All|\n", - "survivor |Dead in 2015 Alive in 2015 |Alive in 2015 |Dead in 2015 Alive in 2015 |Alive in 2015 | |\n", - "year | | | | | |\n", - "------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "2010 | threshold; p-ratio; nk-rule; ok | threshold; p-ratio; | ok ok | threshold; p-ratio; nk-rule; | ok|\n", - "2011 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", - "2012 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", - "2013 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", - "2014 | threshold; p-ratio; nk-rule; ok | ok | ok ok | threshold; | ok|\n", - "2015 | threshold; p-ratio; nk-rule; threshold; | ok | ok ok | threshold; | ok|\n", - "All | ok ok | ok | ok ok | ok | ok|\n", - "------------------------------------------------------------------------------------------------------------------------------------------------|\n", - "\n", - "INFO:acro:records:add(): output_2\n" + "INFO:acro:get_summary(): fail; threshold: 26 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing; \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "grant_type G N R All\n", - "survivor Alive in 2015 Alive in 2015 Dead in 2015 Alive in 2015 \n", - "year \n", - "2010 11571583.0 NaN 1385162.500 22436528.0 8930501.0\n", - "2011 10624083.0 124013.859375 1380839.250 19596956.0 4799615.5\n", - "2012 14319667.0 131859.062500 1354274.750 17465130.0 4712294.0\n", - "2013 16943250.0 147937.796875 1409097.250 18547244.0 5094428.0\n", - "2014 17182166.0 133198.250000 1370867.125 20651954.0 5569847.0\n", - "2015 NaN 146572.187500 1779520.625 21810030.0 5137233.5\n", - "All 14128150.0 136725.953125 1425355.125 20004548.0 5434959.5\n" + "p-ratio 379 0.0\n", + "496 NaN\n", + "625 47690.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 142 93000.0\n", + "147 57600000.0\n", + "174 46628000.0\n", + "225 0.0\n", + "445 3000.0\n", + "537 3066000.0\n", + "695 0.0\n", + "786 0.0\n", + "831 14175000.0\n", + "840 8670000.0\n", + "855 0.0\n", + "916 8624000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 11 NaN\n", + "18 NaN\n", + "26 NaN\n", + "42 NaN\n", + "50 NaN\n", + "60 NaN\n", + "69 0.0\n", + "75 NaN\n", + "90 NaN\n", + "101 NaN\n", + "103 0.0\n", + "130 NaN\n", + "136 NaN\n", + "157 NaN\n", + "164 NaN\n", + "191 NaN\n", + "202 NaN\n", + "211 NaN\n", + "218 NaN\n", + "239 NaN\n", + "245 NaN\n", + "249 NaN\n", + "283 NaN\n", + "301 NaN\n", + "309 0.0\n", + "325 NaN\n", + "347 NaN\n", + "350 NaN\n", + "387 NaN\n", + "392 NaN\n", + "401 NaN\n", + "407 NaN\n", + "435 NaN\n", + "451 NaN\n", + "458 NaN\n", + "473 NaN\n", + "482 NaN\n", + "510 NaN\n", + "519 NaN\n", + "533 NaN\n", + "571 NaN\n", + "576 0.0\n", + "588 NaN\n", + "597 NaN\n", + "619 NaN\n", + "631 NaN\n", + "652 NaN\n", + "663 NaN\n", + "677 NaN\n", + "678 0.0\n", + "697 NaN\n", + "705 NaN\n", + "710 NaN\n", + "714 NaN\n", + "734 NaN\n", + "747 NaN\n", + "803 NaN\n", + "870 NaN\n", + "897 NaN\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 4 7335103.0\n", + "15 NaN\n", + "33 NaN\n", + "41 NaN\n", + "108 458770.0\n", + "114 688207.0\n", + "124 1210229.0\n", + "155 156900.0\n", + "170 2104127.0\n", + "184 878201.0\n", + "192 5622461.0\n", + "204 NaN\n", + "256 1213752.0\n", + "261 33298.0\n", + "275 917475.0\n", + "280 170861.0\n", + "322 833023.0\n", + "330 4608790.0\n", + "364 318506.0\n", + "376 613608.0\n", + "415 199544.0\n", + "420 232862.0\n", + "430 299737.0\n", + "465 122330.0\n", + "477 1767464.0\n", + "491 19583.0\n", + "501 1570768.0\n", + "504 5067053.0\n", + "527 1456214.0\n", + "550 NaN\n", + "606 NaN\n", + "613 1039950.0\n", + "647 774522.0\n", + "654 254530.0\n", + "668 10324751.0\n", + "720 2300382.0\n", + "729 10944.0\n", + "757 571965.0\n", + "769 101420.0\n", + "783 NaN\n", + "839 151783.0\n", + "860 267257.0\n", + "867 453295.0\n", + "876 140314.0\n", + "891 253028.0\n", + "904 296621.0\n", + "910 566870.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 82 586000.0\n", + "84 72000.0\n", + "295 396491.0\n", + "338 4707584.0\n", + "355 7520663.0\n", + "367 1457000.0\n", + "408 900425.0\n", + "441 NaN\n", + "557 173171008.0\n", + "562 NaN\n", + "569 14192000.0\n", + "587 16719000.0\n", + "603 9913000.0\n", + "640 4445000.0\n", + "687 32500000.0\n", + "743 820000.0\n", + "755 165531008.0\n", + "766 6051000.0\n", + "775 NaN\n", + "794 1579000.0\n", + "814 0.0\n", + "821 NaN\n", + "827 4097391.0\n", + "848 4072000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 58 NaN\n", + "232 NaN\n", + "269 913000.0\n", + "292 NaN\n", + "314 913000.0\n", + "544 36645000.0\n", + "804 NaN\n", + "884 8073000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 383 0.0\n", + "495 0.0\n", + "628 44700.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 143 97000.0\n", + "149 52900000.0\n", + "176 44369000.0\n", + "224 0.0\n", + "444 200000.0\n", + "536 584000.0\n", + "693 0.0\n", + "790 0.0\n", + "828 13372000.0\n", + "842 9107000.0\n", + "857 0.0\n", + "912 6860000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 6 30000.0\n", + "19 NaN\n", + "27 257094.0\n", + "44 0.0\n", + "53 0.0\n", + "65 50000.0\n", + "66 0.0\n", + "77 0.0\n", + "92 514743.0\n", + "100 363900.0\n", + "107 0.0\n", + "127 19495.0\n", + "137 8599.0\n", + "160 0.0\n", + "166 999.0\n", + "189 0.0\n", + "203 46276.0\n", + "210 337924.0\n", + "216 0.0\n", + "236 0.0\n", + "242 550662.0\n", + "251 12540.0\n", + "284 3452.0\n", + "300 128962.0\n", + "308 0.0\n", + "324 0.0\n", + "345 300543.0\n", + "348 0.0\n", + "386 0.0\n", + "390 147001.0\n", + "400 1250.0\n", + "403 0.0\n", + "432 0.0\n", + "452 557665.0\n", + "461 29280.0\n", + "470 189866.0\n", + "483 272.0\n", + "511 293041.0\n", + "516 60.0\n", + "529 0.0\n", + "574 960417.0\n", + "580 0.0\n", + "590 0.0\n", + "598 2174.0\n", + "620 81751.0\n", + "632 0.0\n", + "653 3000.0\n", + "664 675401.0\n", + "674 96956.0\n", + "679 0.0\n", + "696 200000.0\n", + "706 70617.0\n", + "708 400509.0\n", + "719 409441.0\n", + "737 20000.0\n", + "749 200000.0\n", + "800 0.0\n", + "874 0.0\n", + "898 228914.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 0 9603182.0\n", + "16 1308251.0\n", + "30 NaN\n", + "36 226700.0\n", + "110 410184.0\n", + "115 857918.0\n", + "121 1110583.0\n", + "150 181137.0\n", + "168 2097111.0\n", + "183 963550.0\n", + "193 5959576.0\n", + "207 NaN\n", + "257 1080661.0\n", + "262 29511.0\n", + "270 960648.0\n", + "279 131449.0\n", + "321 804104.0\n", + "333 6215960.0\n", + "365 288052.0\n", + "377 865189.0\n", + "419 328063.0\n", + "422 220221.0\n", + "428 297715.0\n", + "466 105147.0\n", + "479 1401967.0\n", + "489 0.0\n", + "499 1659064.0\n", + "506 5255193.0\n", + "524 1326767.0\n", + "548 3569.0\n", + "607 36187.0\n", + "612 1038408.0\n", + "646 732056.0\n", + "657 242378.0\n", + "669 10632990.0\n", + "725 2953606.0\n", + "728 9320.0\n", + "758 584362.0\n", + "772 162350.0\n", + "782 59044.0\n", + "834 72155.0\n", + "859 252079.0\n", + "866 455934.0\n", + "877 205629.0\n", + "892 293339.0\n", + "905 253995.0\n", + "906 462461.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 81 619000.0\n", + "89 178000.0\n", + "294 188089.0\n", + "339 8145394.0\n", + "357 7298357.0\n", + "366 1814000.0\n", + "411 1535733.0\n", + "438 1163000.0\n", + "556 171564992.0\n", + "560 4066298.0\n", + "567 10030000.0\n", + "585 12395000.0\n", + "605 10863000.0\n", + "637 3200000.0\n", + "684 29700000.0\n", + "742 777000.0\n", + "753 179178000.0\n", + "765 5545000.0\n", + "778 1181800.0\n", + "796 1907000.0\n", + "813 0.0\n", + "818 4687000.0\n", + "824 9742296.0\n", + "847 4548000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 54 23634000.0\n", + "230 26984000.0\n", + "266 1546000.0\n", + "290 370000.0\n", + "317 1546000.0\n", + "541 39118000.0\n", + "807 30369000.0\n", + "883 4813000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 381 0.0\n", + "492 0.0\n", + "629 42700.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 141 460000.0\n", + "144 60100000.0\n", + "175 57500000.0\n", + "226 0.0\n", + "446 3507000.0\n", + "535 656000.0\n", + "694 0.0\n", + "789 0.0\n", + "829 17120000.0\n", + "843 25319000.0\n", + "853 0.0\n", + "917 7174000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 10 45000.0\n", + "21 0.0\n", + "25 857674.0\n", + "45 664.0\n", + "51 0.0\n", + "64 48812.0\n", + "71 0.0\n", + "76 0.0\n", + "95 289356.0\n", + "97 418226.0\n", + "105 0.0\n", + "126 3988.0\n", + "135 12592.0\n", + "159 0.0\n", + "163 185480.0\n", + "190 0.0\n", + "198 99747.0\n", + "213 381990.0\n", + "220 0.0\n", + "235 0.0\n", + "240 370941.0\n", + "248 38907.0\n", + "282 8592.0\n", + "304 330424.0\n", + "306 0.0\n", + "327 0.0\n", + "346 237377.0\n", + "349 0.0\n", + "388 0.0\n", + "394 196970.0\n", + "398 15952.0\n", + "402 29769.0\n", + "437 0.0\n", + "455 16405.0\n", + "457 29678.0\n", + "469 271255.0\n", + "480 250.0\n", + "515 198047.0\n", + "518 25.0\n", + "531 0.0\n", + "573 982879.0\n", + "581 0.0\n", + "592 126436.0\n", + "596 1020.0\n", + "622 81433.0\n", + "635 12060.0\n", + "650 2208.0\n", + "661 619242.0\n", + "672 127249.0\n", + "683 0.0\n", + "698 238836.0\n", + "703 130127.0\n", + "713 361811.0\n", + "716 400024.0\n", + "733 15916.0\n", + "748 200000.0\n", + "798 0.0\n", + "875 0.0\n", + "895 392323.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 5 11156805.0\n", + "12 1216430.0\n", + "34 NaN\n", + "37 303019.0\n", + "109 285279.0\n", + "117 736907.0\n", + "123 1189416.0\n", + "154 197316.0\n", + "172 1920315.0\n", + "180 950461.0\n", + "195 4710045.0\n", + "206 NaN\n", + "252 569537.0\n", + "260 27482.0\n", + "271 919883.0\n", + "277 155549.0\n", + "323 914415.0\n", + "332 5372233.0\n", + "360 89304.0\n", + "374 1459481.0\n", + "416 417988.0\n", + "425 221496.0\n", + "427 295882.0\n", + "463 54698.0\n", + "476 1105857.0\n", + "490 32422.0\n", + "498 1616876.0\n", + "508 5621966.0\n", + "522 761119.0\n", + "551 557.0\n", + "611 235962.0\n", + "615 485925.0\n", + "642 891598.0\n", + "655 205421.0\n", + "667 10309506.0\n", + "722 2451239.0\n", + "727 0.0\n", + "756 574000.0\n", + "768 210754.0\n", + "780 106468.0\n", + "837 116147.0\n", + "858 182621.0\n", + "865 1342632.0\n", + "879 134934.0\n", + "889 313656.0\n", + "902 262306.0\n", + "907 816456.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 78 550000.0\n", + "88 151000.0\n", + "298 169873.0\n", + "336 5060737.0\n", + "354 6712465.0\n", + "369 2205000.0\n", + "413 732329.0\n", + "440 620000.0\n", + "553 171692992.0\n", + "561 3738474.0\n", + "566 11819000.0\n", + "586 7910000.0\n", + "602 9914000.0\n", + "639 1800000.0\n", + "686 26800000.0\n", + "739 1874000.0\n", + "751 150663008.0\n", + "764 2148000.0\n", + "776 1037130.0\n", + "793 1590000.0\n", + "812 0.0\n", + "816 4088000.0\n", + "822 5076112.0\n", + "851 2811000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 55 24309000.0\n", + "231 31064000.0\n", + "267 3240000.0\n", + "293 470000.0\n", + "312 935000.0\n", + "540 42228000.0\n", + "806 28116000.0\n", + "885 4118000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 378 0.0\n", + "493 0.0\n", + "627 38200.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 139 200000.0\n", + "148 69100000.0\n", + "178 74500000.0\n", + "223 0.0\n", + "447 8892000.0\n", + "534 447000.0\n", + "691 0.0\n", + "791 0.0\n", + "830 17270000.0\n", + "844 25700000.0\n", + "854 0.0\n", + "914 7210000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 9 0.0\n", + "22 0.0\n", + "28 469903.0\n", + "43 186548.0\n", + "49 0.0\n", + "61 238454.0\n", + "70 0.0\n", + "74 30000.0\n", + "94 182599.0\n", + "99 316358.0\n", + "106 0.0\n", + "128 5964.0\n", + "132 6585.0\n", + "161 0.0\n", + "162 276164.0\n", + "187 0.0\n", + "200 200645.0\n", + "214 450720.0\n", + "217 9057.0\n", + "237 0.0\n", + "244 611216.0\n", + "250 67975.0\n", + "287 36840.0\n", + "305 97000.0\n", + "307 0.0\n", + "326 0.0\n", + "342 573657.0\n", + "352 0.0\n", + "384 0.0\n", + "395 301846.0\n", + "399 43047.0\n", + "405 37932.0\n", + "436 0.0\n", + "454 82101.0\n", + "459 25224.0\n", + "471 201906.0\n", + "485 0.0\n", + "513 384354.0\n", + "517 0.0\n", + "532 0.0\n", + "572 669620.0\n", + "579 0.0\n", + "589 229442.0\n", + "595 399.0\n", + "618 224613.0\n", + "633 7580.0\n", + "649 8694.0\n", + "662 547259.0\n", + "676 258188.0\n", + "682 0.0\n", + "699 53101.0\n", + "704 151581.0\n", + "712 368542.0\n", + "718 712067.0\n", + "736 80763.0\n", + "744 5000.0\n", + "801 0.0\n", + "871 0.0\n", + "899 575386.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 3 15194731.0\n", + "13 1134533.0\n", + "35 428037.0\n", + "38 296059.0\n", + "111 692963.0\n", + "119 654788.0\n", + "125 995153.0\n", + "151 175792.0\n", + "171 1916899.0\n", + "182 971061.0\n", + "197 5720093.0\n", + "205 1166873.0\n", + "255 368911.0\n", + "259 23635.0\n", + "274 2489663.0\n", + "281 121382.0\n", + "320 840097.0\n", + "335 1982284.0\n", + "363 58129.0\n", + "375 1749818.0\n", + "418 318608.0\n", + "423 227764.0\n", + "429 297089.0\n", + "467 67365.0\n", + "474 230639.0\n", + "486 36506.0\n", + "500 1561944.0\n", + "505 6139550.0\n", + "526 544329.0\n", + "549 0.0\n", + "608 699834.0\n", + "614 648864.0\n", + "643 848729.0\n", + "656 0.0\n", + "671 11103487.0\n", + "721 2550805.0\n", + "731 24450.0\n", + "761 588057.0\n", + "770 61000.0\n", + "785 80730.0\n", + "836 159385.0\n", + "862 167037.0\n", + "869 1461070.0\n", + "880 147852.0\n", + "893 259947.0\n", + "900 209038.0\n", + "909 812591.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 80 647000.0\n", + "86 243000.0\n", + "299 114885.0\n", + "341 7093319.0\n", + "358 6525913.0\n", + "370 1218000.0\n", + "409 476370.0\n", + "439 711000.0\n", + "555 173006000.0\n", + "558 3929100.0\n", + "565 9627000.0\n", + "582 7477000.0\n", + "601 8232000.0\n", + "636 1000000.0\n", + "688 23700000.0\n", + "740 1766000.0\n", + "750 186928992.0\n", + "762 2629000.0\n", + "777 1526802.0\n", + "795 1590000.0\n", + "810 70000.0\n", + "820 2878000.0\n", + "825 1511482.0\n", + "849 2233000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 59 27452000.0\n", + "233 27439000.0\n", + "268 1602000.0\n", + "291 837000.0\n", + "313 1156000.0\n", + "545 44385000.0\n", + "805 26443000.0\n", + "887 4811000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 382 0.0\n", + "497 0.0\n", + "626 36200.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 140 0.0\n", + "145 85300000.0\n", + "177 94700000.0\n", + "227 0.0\n", + "448 0.0\n", + "539 2004000.0\n", + "692 0.0\n", + "788 0.0\n", + "833 17425000.0\n", + "841 6221000.0\n", + "856 0.0\n", + "915 536000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 7 0.0\n", + "20 0.0\n", + "29 352309.0\n", + "47 487529.0\n", + "52 0.0\n", + "63 168553.0\n", + "67 0.0\n", + "72 10000.0\n", + "91 282745.0\n", + "98 383960.0\n", + "104 0.0\n", + "131 250295.0\n", + "133 8136.0\n", + "156 0.0\n", + "165 311050.0\n", + "188 15000.0\n", + "199 356591.0\n", + "215 393918.0\n", + "221 0.0\n", + "238 0.0\n", + "243 645517.0\n", + "246 70018.0\n", + "285 0.0\n", + "302 54324.0\n", + "311 0.0\n", + "329 0.0\n", + "343 484697.0\n", + "351 0.0\n", + "389 0.0\n", + "391 129110.0\n", + "397 43880.0\n", + "406 27632.0\n", + "433 0.0\n", + "450 0.0\n", + "456 28634.0\n", + "472 230616.0\n", + "484 0.0\n", + "512 395461.0\n", + "521 0.0\n", + "530 0.0\n", + "575 561611.0\n", + "578 0.0\n", + "591 403477.0\n", + "594 198.0\n", + "621 297569.0\n", + "634 1494.0\n", + "651 1001.0\n", + "665 368991.0\n", + "675 0.0\n", + "680 0.0\n", + "700 44238.0\n", + "702 321888.0\n", + "711 332210.0\n", + "715 0.0\n", + "735 100736.0\n", + "746 0.0\n", + "799 0.0\n", + "872 0.0\n", + "896 295309.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 1 18768904.0\n", + "17 1324735.0\n", + "31 422035.0\n", + "39 300661.0\n", + "112 117965.0\n", + "116 620807.0\n", + "120 1293522.0\n", + "153 212640.0\n", + "173 NaN\n", + "181 1100724.0\n", + "196 5582594.0\n", + "209 1622675.0\n", + "253 6626.0\n", + "258 108212.0\n", + "272 2738544.0\n", + "278 103056.0\n", + "319 657960.0\n", + "334 959535.0\n", + "361 9176.0\n", + "373 1526627.0\n", + "414 304814.0\n", + "421 149042.0\n", + "431 303852.0\n", + "464 99751.0\n", + "478 538515.0\n", + "487 13542.0\n", + "502 1570092.0\n", + "507 NaN\n", + "523 1110421.0\n", + "547 0.0\n", + "610 744851.0\n", + "616 1087153.0\n", + "644 724352.0\n", + "658 0.0\n", + "670 12332877.0\n", + "723 1078046.0\n", + "730 0.0\n", + "760 473246.0\n", + "771 74468.0\n", + "781 71829.0\n", + "835 123359.0\n", + "863 107604.0\n", + "864 NaN\n", + "881 109061.0\n", + "890 242073.0\n", + "901 211339.0\n", + "911 NaN\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 83 138000.0\n", + "85 0.0\n", + "297 197839.0\n", + "340 4358596.0\n", + "359 6653000.0\n", + "371 945000.0\n", + "410 354558.0\n", + "442 1090000.0\n", + "552 176550000.0\n", + "559 3198039.0\n", + "568 11600000.0\n", + "583 5951000.0\n", + "600 8333000.0\n", + "638 100000.0\n", + "685 25392000.0\n", + "741 2343000.0\n", + "752 234218000.0\n", + "767 3767000.0\n", + "774 1238989.0\n", + "797 2123000.0\n", + "811 70000.0\n", + "817 3322000.0\n", + "826 885891.0\n", + "846 2818000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 57 31447000.0\n", + "229 31459000.0\n", + "265 1333000.0\n", + "289 1166000.0\n", + "315 1333000.0\n", + "543 43780000.0\n", + "809 27304000.0\n", + "882 4944000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 380 0.0\n", + "494 0.0\n", + "624 34200.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 138 0.0\n", + "146 8700000.0\n", + "179 89300000.0\n", + "222 0.0\n", + "449 0.0\n", + "538 NaN\n", + "690 NaN\n", + "787 NaN\n", + "832 18759000.0\n", + "845 16113000.0\n", + "852 0.0\n", + "913 695000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 8 0.0\n", + "23 10872.0\n", + "24 295254.0\n", + "46 128341.0\n", + "48 0.0\n", + "62 204781.0\n", + "68 0.0\n", + "73 0.0\n", + "93 154354.0\n", + "96 321696.0\n", + "102 0.0\n", + "129 10979.0\n", + "134 14946.0\n", + "158 0.0\n", + "167 138351.0\n", + "186 106500.0\n", + "201 156770.0\n", + "212 316794.0\n", + "219 37000.0\n", + "234 0.0\n", + "241 541240.0\n", + "247 48127.0\n", + "286 22830.0\n", + "303 63426.0\n", + "310 0.0\n", + "328 0.0\n", + "344 951007.0\n", + "353 0.0\n", + "385 0.0\n", + "393 193999.0\n", + "396 106726.0\n", + "404 40051.0\n", + "434 0.0\n", + "453 0.0\n", + "460 30495.0\n", + "468 240659.0\n", + "481 0.0\n", + "514 438716.0\n", + "520 0.0\n", + "528 0.0\n", + "570 441466.0\n", + "577 0.0\n", + "593 317569.0\n", + "599 422.0\n", + "623 419269.0\n", + "630 616825.0\n", + "648 375787.0\n", + "660 590666.0\n", + "673 0.0\n", + "681 NaN\n", + "701 51135.0\n", + "707 215670.0\n", + "709 371943.0\n", + "717 0.0\n", + "732 208604.0\n", + "745 100000.0\n", + "802 0.0\n", + "873 10000.0\n", + "894 207917.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 2 21638036.0\n", + "14 NaN\n", + "32 389290.0\n", + "40 249662.0\n", + "113 142940.0\n", + "118 NaN\n", + "122 NaN\n", + "152 NaN\n", + "169 NaN\n", + "185 1246979.0\n", + "194 5605438.0\n", + "208 1701495.0\n", + "254 0.0\n", + "263 157020.0\n", + "273 NaN\n", + "276 109401.0\n", + "318 NaN\n", + "331 NaN\n", + "362 7605.0\n", + "372 1637083.0\n", + "417 NaN\n", + "424 62499.0\n", + "426 102994.0\n", + "462 100948.0\n", + "475 NaN\n", + "488 17980.0\n", + "503 1147292.0\n", + "509 NaN\n", + "525 2368799.0\n", + "546 0.0\n", + "609 599233.0\n", + "617 NaN\n", + "645 594114.0\n", + "659 15000.0\n", + "666 11057465.0\n", + "724 NaN\n", + "726 NaN\n", + "759 295968.0\n", + "773 NaN\n", + "784 81871.0\n", + "838 NaN\n", + "861 NaN\n", + "868 NaN\n", + "878 92916.0\n", + "888 236786.0\n", + "903 167765.0\n", + "908 NaN\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 79 203000.0\n", + "87 71000.0\n", + "296 150246.0\n", + "337 4326506.0\n", + "356 6407000.0\n", + "368 561000.0\n", + "412 433703.0\n", + "443 615000.0\n", + "554 165548000.0\n", + "563 3090412.0\n", + "564 11647000.0\n", + "584 6770000.0\n", + "604 7981000.0\n", + "641 NaN\n", + "689 26701000.0\n", + "738 2076000.0\n", + "754 249327008.0\n", + "763 3994000.0\n", + "779 1548244.0\n", + "792 1745000.0\n", + "815 0.0\n", + "819 2923000.0\n", + "823 3420589.0\n", + "850 2092000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 56 33580000.0\n", + "228 31602000.0\n", + "264 1397000.0\n", + "288 2612000.0\n", + "316 1397000.0\n", + "542 43693000.0\n", + "808 26284000.0\n", + "886 5664000.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 0 9603182.0\n", + "1 18768904.0\n", + "2 21638036.0\n", + "3 15194731.0\n", + "4 7335103.0\n", + " ... \n", + "913 695000.0\n", + "914 7210000.0\n", + "915 536000.0\n", + "916 8624000.0\n", + "917 7174000.0\n", + "Name: __dummy__, Length: 815, dtype: float32\n", + "p-ratio 4 7335103.0\n", + "69 0.0\n", + "82 586000.0\n", + "84 72000.0\n", + "103 0.0\n", + " ... \n", + "884 8073000.0\n", + "891 253028.0\n", + "904 296621.0\n", + "910 566870.0\n", + "916 8624000.0\n", + "Name: __dummy__, Length: 83, dtype: float32\n", + "p-ratio 0 9603182.0\n", + "6 30000.0\n", + "16 1308251.0\n", + "27 257094.0\n", + "36 226700.0\n", + " ... \n", + "892 293339.0\n", + "898 228914.0\n", + "905 253995.0\n", + "906 462461.0\n", + "912 6860000.0\n", + "Name: __dummy__, Length: 150, dtype: float32\n", + "p-ratio 5 11156805.0\n", + "10 45000.0\n", + "12 1216430.0\n", + "21 0.0\n", + "25 857674.0\n", + " ... \n", + "889 313656.0\n", + "895 392323.0\n", + "902 262306.0\n", + "907 816456.0\n", + "917 7174000.0\n", + "Name: __dummy__, Length: 151, dtype: float32\n", + "p-ratio 3 15194731.0\n", + "9 0.0\n", + "13 1134533.0\n", + "22 0.0\n", + "28 469903.0\n", + " ... \n", + "893 259947.0\n", + "899 575386.0\n", + "900 209038.0\n", + "909 812591.0\n", + "914 7210000.0\n", + "Name: __dummy__, Length: 153, dtype: float32\n", + "p-ratio 1 18768904.0\n", + "7 0.0\n", + "17 1324735.0\n", + "20 0.0\n", + "29 352309.0\n", + " ... \n", + "882 4944000.0\n", + "890 242073.0\n", + "896 295309.0\n", + "901 211339.0\n", + "915 536000.0\n", + "Name: __dummy__, Length: 149, dtype: float32\n", + "p-ratio 2 21638036.0\n", + "8 0.0\n", + "23 10872.0\n", + "24 295254.0\n", + "32 389290.0\n", + " ... \n", + "886 5664000.0\n", + "888 236786.0\n", + "894 207917.0\n", + "903 167765.0\n", + "913 695000.0\n", + "Name: __dummy__, Length: 129, dtype: float32\n", + "p-ratio 378 0.0\n", + "379 0.0\n", + "380 0.0\n", + "381 0.0\n", + "382 0.0\n", + "383 0.0\n", + "492 0.0\n", + "493 0.0\n", + "494 0.0\n", + "495 0.0\n", + "497 0.0\n", + "624 34200.0\n", + "625 47690.0\n", + "626 36200.0\n", + "627 38200.0\n", + "628 44700.0\n", + "629 42700.0\n", + "Name: __dummy__, dtype: float32\n", + "p-ratio 138 0.0\n", + "139 200000.0\n", + "140 0.0\n", + "141 460000.0\n", + "142 93000.0\n", + " ... \n", + "913 695000.0\n", + "914 7210000.0\n", + "915 536000.0\n", + "916 8624000.0\n", + "917 7174000.0\n", + "Name: __dummy__, Length: 69, dtype: float32\n", + "p-ratio 6 30000.0\n", + "7 0.0\n", + "8 0.0\n", + "9 0.0\n", + "10 45000.0\n", + " ... \n", + "894 207917.0\n", + "895 392323.0\n", + "896 295309.0\n", + "898 228914.0\n", + "899 575386.0\n", + "Name: __dummy__, Length: 298, dtype: float32\n", + "p-ratio 0 9603182.0\n", + "1 18768904.0\n", + "2 21638036.0\n", + "3 15194731.0\n", + "4 7335103.0\n", + " ... \n", + "905 253995.0\n", + "906 462461.0\n", + "907 816456.0\n", + "909 812591.0\n", + "910 566870.0\n", + "Name: __dummy__, Length: 248, dtype: float32\n", + "p-ratio 78 550000.0\n", + "79 203000.0\n", + "80 647000.0\n", + "81 619000.0\n", + "82 586000.0\n", + " ... \n", + "847 4548000.0\n", + "848 4072000.0\n", + "849 2233000.0\n", + "850 2092000.0\n", + "851 2811000.0\n", + "Name: __dummy__, Length: 139, dtype: float32\n", + "p-ratio 54 23634000.0\n", + "55 24309000.0\n", + "56 33580000.0\n", + "57 31447000.0\n", + "59 27452000.0\n", + "228 31602000.0\n", + "229 31459000.0\n", + "230 26984000.0\n", + "231 31064000.0\n", + "233 27439000.0\n", + "264 1397000.0\n", + "265 1333000.0\n", + "266 1546000.0\n", + "267 3240000.0\n", + "268 1602000.0\n", + "269 913000.0\n", + "288 2612000.0\n", + "289 1166000.0\n", + "290 370000.0\n", + "291 837000.0\n", + "293 470000.0\n", + "312 935000.0\n", + "313 1156000.0\n", + "314 913000.0\n", + "315 1333000.0\n", + "316 1397000.0\n", + "317 1546000.0\n", + "540 42228000.0\n", + "541 39118000.0\n", + "542 43693000.0\n", + "543 43780000.0\n", + "544 36645000.0\n", + "545 44385000.0\n", + "805 26443000.0\n", + "806 28116000.0\n", + "807 30369000.0\n", + "808 26284000.0\n", + "809 27304000.0\n", + "882 4944000.0\n", + "883 4813000.0\n", + "884 8073000.0\n", + "885 4118000.0\n", + "886 5664000.0\n", + "887 4811000.0\n", + "Name: __dummy__, dtype: float32\n" + ] + }, + { + "ename": "ValueError", + "evalue": "An error occurred with the following details:\n Name: p-ratio\n Mask: agg_p_percent \\\ngrant_type G N R \nsurvivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \nyear \n2010 True False True False \n2011 True False False False \n2012 True False False False \n2013 True False False False \n2014 True False False False \n2015 True False False False \nAll False False False False \n\n \ngrant_type R/G All \nsurvivor Alive in 2015 Alive in 2015 \nyear \n2010 False True False \n2011 False False False \n2012 False False False \n2013 False False False \n2014 False False False \n2015 False False False \nAll False False False \n Table: grant_type G N R \\\nsurvivor Dead in 2015 Alive in 2015 Dead in 2015 Alive in 2015 Dead in 2015 \nyear \n2010 2 12 0.0 5 40 \n2011 3 12 0.0 58 45 \n2012 3 12 0.0 59 45 \n2013 3 12 0.0 59 47 \n2014 3 12 0.0 59 43 \n2015 3 9 0.0 58 28 \nAll 17 69 NaN 298 248 \n\ngrant_type R/G All \nsurvivor Alive in 2015 Dead in 2015 Alive in 2015 \nyear \n2010 20 0.0 4 83 \n2011 24 0.0 8 150 \n2012 24 0.0 8 151 \n2013 24 0.0 8 153 \n2014 24 0.0 8 149 \n2015 23 0.0 8 129 \nAll 139 NaN 44 815 ", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:834\u001b[0m, in \u001b[0;36mapply_suppression\u001b[1;34m(table, masks)\u001b[0m\n\u001b[0;32m 833\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 834\u001b[0m safe_df[mask\u001b[39m.\u001b[39;49mvalues] \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mNaN\n\u001b[0;32m 835\u001b[0m tmp_df \u001b[39m=\u001b[39m DataFrame()\u001b[39m.\u001b[39mreindex_like(outcome_df)\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\frame.py:3641\u001b[0m, in \u001b[0;36mDataFrame.__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 3640\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, DataFrame) \u001b[39mor\u001b[39;00m \u001b[39mgetattr\u001b[39m(key, \u001b[39m\"\u001b[39m\u001b[39mndim\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m) \u001b[39m==\u001b[39m \u001b[39m2\u001b[39m:\n\u001b[1;32m-> 3641\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_setitem_frame(key, value)\n\u001b[0;32m 3642\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, (Series, np\u001b[39m.\u001b[39mndarray, \u001b[39mlist\u001b[39m, Index)):\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\frame.py:3754\u001b[0m, in \u001b[0;36mDataFrame._setitem_frame\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 3753\u001b[0m \u001b[39mif\u001b[39;00m key\u001b[39m.\u001b[39mshape \u001b[39m!=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mshape:\n\u001b[1;32m-> 3754\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mArray conditional must be same shape as self\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 3755\u001b[0m key \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_constructor(key, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_construct_axes_dict())\n", + "\u001b[1;31mValueError\u001b[0m: Array conditional must be same shape as self", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 17\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 1\u001b[0m acro\u001b[39m.\u001b[39msuppress \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 3\u001b[0m df\u001b[39m.\u001b[39;49myear,\n\u001b[0;32m 4\u001b[0m [df\u001b[39m.\u001b[39;49mgrant_type, df\u001b[39m.\u001b[39;49msurvivor],\n\u001b[0;32m 5\u001b[0m values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants,\n\u001b[0;32m 6\u001b[0m aggfunc\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mcount\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 7\u001b[0m margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[0;32m 8\u001b[0m )\n\u001b[0;32m 9\u001b[0m \u001b[39mprint\u001b[39m(table)\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:154\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize, show_suppressed)\u001b[0m\n\u001b[0;32m 152\u001b[0m status, summary \u001b[39m=\u001b[39m get_summary(sdc)\n\u001b[0;32m 153\u001b[0m \u001b[39m# apply the suppression\u001b[39;00m\n\u001b[1;32m--> 154\u001b[0m safe_table, outcome \u001b[39m=\u001b[39m apply_suppression(table, masks)\n\u001b[0;32m 155\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39msuppress:\n\u001b[0;32m 156\u001b[0m table \u001b[39m=\u001b[39m safe_table\n", + "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:846\u001b[0m, in \u001b[0;36mapply_suppression\u001b[1;34m(table, masks)\u001b[0m\n\u001b[0;32m 841\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mValueError\u001b[39;00m \u001b[39mas\u001b[39;00m error:\n\u001b[0;32m 842\u001b[0m error_message \u001b[39m=\u001b[39m (\n\u001b[0;32m 843\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mAn error occurred with the following details\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 844\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m Name: \u001b[39m\u001b[39m{\u001b[39;00mname\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m Mask: \u001b[39m\u001b[39m{\u001b[39;00mmask\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m Table: \u001b[39m\u001b[39m{\u001b[39;00mtable\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 845\u001b[0m )\n\u001b[1;32m--> 846\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(error_message) \u001b[39mfrom\u001b[39;00m \u001b[39merror\u001b[39;00m\n\u001b[0;32m 848\u001b[0m outcome_df \u001b[39m=\u001b[39m outcome_df\u001b[39m.\u001b[39mreplace({\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mok\u001b[39m\u001b[39m\"\u001b[39m})\n\u001b[0;32m 849\u001b[0m logger\u001b[39m.\u001b[39minfo(\u001b[39m\"\u001b[39m\u001b[39moutcome_df:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m, utils\u001b[39m.\u001b[39mprettify_table_string(outcome_df))\n", + "\u001b[1;31mValueError\u001b[0m: An error occurred with the following details:\n Name: p-ratio\n Mask: agg_p_percent \\\ngrant_type G N R \nsurvivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 \nyear \n2010 True False True False \n2011 True False False False \n2012 True False False False \n2013 True False False False \n2014 True False False False \n2015 True False False False \nAll False False False False \n\n \ngrant_type R/G All \nsurvivor Alive in 2015 Alive in 2015 \nyear \n2010 False True False \n2011 False False False \n2012 False False False \n2013 False False False \n2014 False False False \n2015 False False False \nAll False False False \n Table: grant_type G N R \\\nsurvivor Dead in 2015 Alive in 2015 Dead in 2015 Alive in 2015 Dead in 2015 \nyear \n2010 2 12 0.0 5 40 \n2011 3 12 0.0 58 45 \n2012 3 12 0.0 59 45 \n2013 3 12 0.0 59 47 \n2014 3 12 0.0 59 43 \n2015 3 9 0.0 58 28 \nAll 17 69 NaN 298 248 \n\ngrant_type R/G All \nsurvivor Alive in 2015 Dead in 2015 Alive in 2015 \nyear \n2010 20 0.0 4 83 \n2011 24 0.0 8 150 \n2012 24 0.0 8 151 \n2013 24 0.0 8 153 \n2014 24 0.0 8 149 \n2015 23 0.0 8 129 \nAll 139 NaN 44 815 " ] } ], "source": [ - "acro.suppress = True\n", + "acro.suppress = False\n", "table = acro.crosstab(\n", " df.year,\n", " [df.grant_type, df.survivor],\n", " values=df.inc_grants,\n", - " aggfunc=\"mean\",\n", + " aggfunc=\"count\",\n", " margins=True,\n", ")\n", "print(table)" @@ -769,7 +1911,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "506135e0", "metadata": {}, "outputs": [], @@ -787,7 +1929,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "4ae844a0", "metadata": {}, "outputs": [ @@ -795,21 +1937,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", + "INFO:acro:get_summary(): fail; threshold: 6 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing; \n", "INFO:acro:outcome_df:\n", - "---------------------------------------------------------------------------|\n", - "grant_type |G |N |R |R/G |\n", - "year | | | | |\n", - "---------------------------------------------------------------------------|\n", - "2010 | ok | threshold; p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", - "2011 | ok | ok | ok | threshold; |\n", - "2012 | ok | ok | ok | threshold; |\n", - "2013 | ok | ok | ok | threshold; |\n", - "2014 | ok | ok | ok | threshold; |\n", - "2015 | ok | ok | ok | threshold; |\n", - "---------------------------------------------------------------------------|\n", + "----------------------------------------------------------------|\n", + "grant_type |G |N |R |R/G |\n", + "year | | | | |\n", + "----------------------------------------------------------------|\n", + "2010 | ok | p-ratio; | ok | threshold; p-ratio; nk-rule; |\n", + "2011 | ok | ok | ok | threshold; |\n", + "2012 | ok | ok | ok | threshold; |\n", + "2013 | ok | ok | ok | threshold; |\n", + "2014 | ok | ok | ok | threshold; |\n", + "2015 | ok | ok | ok | threshold; |\n", + "----------------------------------------------------------------|\n", "\n", - "INFO:acro:records:add(): output_3\n" + "INFO:acro:records:add(): output_4\n" ] }, { @@ -904,7 +2046,7 @@ "2015 11133433.0 146572.187500 10812888.0 18278624.0" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -924,7 +2066,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "fb7abfc9-e428-4b71-9066-01ac9a08d655", "metadata": {}, "outputs": [ @@ -932,22 +2074,214 @@ "name": "stderr", "output_type": "stream", "text": [ - "c:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172: PerformanceWarning: indexing past lexsort depth may impact performance.\n", - " if t_values[col].sum() == 0:\n" + "INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing; \n", + "INFO:acro:outcome_df:\n", + "------------------------------------------------------------------------------------------------------------------------|\n", + " mean |std |\n", + "grant_type G N R R/G All |G N R R/G All|\n", + "year | |\n", + "------------------------------------------------------------------------------------------------------------------------|\n", + "2010 ok p-ratio; ok threshold; p-ratio; nk-rule; ok | ok p-ratio; ok threshold; p-ratio; nk-rule; ok|\n", + "2011 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", + "2012 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", + "2013 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", + "2014 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", + "2015 ok ok ok threshold; ok | ok ok ok threshold; ok|\n", + "All ok ok ok ok ok | ok ok ok ok ok|\n", + "------------------------------------------------------------------------------------------------------------------------|\n", + "\n", + "INFO:acro:records:add(): output_5\n" ] }, { - "ename": "ValueError", - "evalue": "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\notebooks\\test.ipynb Cell 21\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m safe_table \u001b[39m=\u001b[39m acro\u001b[39m.\u001b[39;49mcrosstab(\n\u001b[0;32m 2\u001b[0m df\u001b[39m.\u001b[39;49myear, df\u001b[39m.\u001b[39;49mgrant_type, values\u001b[39m=\u001b[39;49mdf\u001b[39m.\u001b[39;49minc_grants, aggfunc\u001b[39m=\u001b[39;49m[\u001b[39m\"\u001b[39;49m\u001b[39mmean\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstd\u001b[39;49m\u001b[39m\"\u001b[39;49m], margins\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m\n\u001b[0;32m 3\u001b[0m )\n\u001b[0;32m 4\u001b[0m safe_table\n", - "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\AI-SDC\\ACRO\\acro\\acro_tables.py:172\u001b[0m, in \u001b[0;36mTables.crosstab\u001b[1;34m(self, index, columns, values, rownames, colnames, aggfunc, margins, margins_name, dropna, normalize, show_suppressed)\u001b[0m\n\u001b[0;32m 170\u001b[0m \u001b[39mif\u001b[39;00m dropna \u001b[39mor\u001b[39;00m margins:\n\u001b[0;32m 171\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m t_values\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m--> 172\u001b[0m \u001b[39mif\u001b[39;00m t_values[col]\u001b[39m.\u001b[39;49msum() \u001b[39m==\u001b[39;49m \u001b[39m0\u001b[39;49m:\n\u001b[0;32m 173\u001b[0m t_values \u001b[39m=\u001b[39m t_values\u001b[39m.\u001b[39mdrop(col, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 174\u001b[0m t_values \u001b[39m=\u001b[39m t_values \u001b[39m<\u001b[39m THRESHOLD\n", - "File \u001b[1;32mc:\\Users\\M-ALBASHIR\\Desktop\\SACRO\\venvs\\acro_venv\\lib\\site-packages\\pandas\\core\\generic.py:1527\u001b[0m, in \u001b[0;36mNDFrame.__nonzero__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1525\u001b[0m \u001b[39m@final\u001b[39m\n\u001b[0;32m 1526\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__nonzero__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1528\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mThe truth value of a \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m is ambiguous. \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1529\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mUse a.empty, a.bool(), a.item(), a.any() or a.all().\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1530\u001b[0m )\n", - "\u001b[1;31mValueError\u001b[0m: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
grant_typeGNRR/GAllGNRR/GAll
year
20109921906.00.0000008402284.011636000.08308286.01.855055e+070.0000003.059557e+071.701088e+072.727398e+07
20118502247.0124013.8593757716880.016047500.05303808.01.688595e+07205959.4929032.954322e+071.561638e+072.137658e+07
201211458580.0131859.0625006958050.516810000.05259893.02.061090e+07210476.5391752.721184e+071.646449e+072.026400e+07
201313557147.0147937.7968757202273.516765625.05605045.52.486844e+07203747.4170172.989833e+071.671112e+072.251787e+07
201413748147.0133198.2500008277525.517845750.06117054.53.134559e+07181865.9255803.546348e+071.741251e+072.641722e+07
201511133433.0146572.18750010812888.018278624.06509989.52.553919e+07201602.8008324.130935e+071.730471e+072.784636e+07
All11412787.0134431.8906258098502.016648273.05997796.52.283220e+07198873.7266563.204495e+071.583532e+072.405324e+07
\n", + "
" + ], + "text/plain": [ + " mean \\\n", + "grant_type G N R R/G All \n", + "year \n", + "2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.0 \n", + "2011 8502247.0 124013.859375 7716880.0 16047500.0 5303808.0 \n", + "2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.0 \n", + "2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 \n", + "2014 13748147.0 133198.250000 8277525.5 17845750.0 6117054.5 \n", + "2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 \n", + "All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 \n", + "\n", + " std \\\n", + "grant_type G N R R/G \n", + "year \n", + "2010 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 \n", + "2011 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 \n", + "2012 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 \n", + "2013 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 \n", + "2014 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 \n", + "2015 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 \n", + "All 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 \n", + "\n", + " \n", + "grant_type All \n", + "year \n", + "2010 2.727398e+07 \n", + "2011 2.137658e+07 \n", + "2012 2.026400e+07 \n", + "2013 2.251787e+07 \n", + "2014 2.641722e+07 \n", + "2015 2.784636e+07 \n", + "All 2.405324e+07 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ diff --git a/test/test_initial.py b/test/test_initial.py index 1071ce2..b8226c2 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -588,8 +588,8 @@ def test_crosstab_with_totals_with_suppression_herichical(data, acro): assert ("G", "dead") not in output.output[0].columns -def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): - """Test the crosstab with both margins and suprression are true and with one aggfunc.""" +def test_crosstab_with_totals_with_suppression_with_mean(data, acro): + """Test the crosstab with both margins and suprression are true and with aggfunc mean.""" _ = acro.crosstab( data.year, data.grant_type, @@ -603,6 +603,28 @@ def test_crosstab_with_totals_with_suppression_with_aggfunc(data, acro): assert "R/G" not in output.output[0].columns +def test_crosstab_with_totals_and_empty_data(data, acro, caplog): + """Test the crosstab with both margins and suprression are true + and with a dataset that all its data violate one or more rule. + """ + data = data[ + (data.year == 2010) + & (data.grant_type == "G") + & (data.survivor == "Dead in 2015") + ] + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc="mean", + margins=True, + ) + assert ( + "All the cells in this data are discolsive. Thus suppression can not be applied" + in caplog.text + ) + + def test_crosstab_with_manual_totals_with_suppression(data, acro): """Test the crosstab with both margins and suprression are true while using the total manual function. @@ -707,7 +729,7 @@ def test_crosstab_with_sum(data, acro): data.year, [data.grant_type, data.survivor], values=data.inc_grants, - aggfunc="mean", + aggfunc="sum", ) output = acro.results.get_index(0) assert (6, 8) == output.output[0].shape @@ -721,8 +743,8 @@ def test_crosstab_multiple_aggregate_function(data, acro): ) output = acro.results.get_index(0) correct_summary: str = ( - "fail; threshold: 12 cells may need suppressing;" - " p-ratio: 2 cells may need suppressing; " + "fail; threshold: 14 cells may need suppressing;" + " p-ratio: 4 cells may need suppressing; " "nk-rule: 2 cells may need suppressing; " ) assert ( @@ -733,6 +755,55 @@ def test_crosstab_multiple_aggregate_function(data, acro): errmsg = f"{output.output[0]['mean']['R/G'].sum()} should be {correctval}" assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg + def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro): + """Test the crosstab with both margins and suprression are true + and with a list of aggfuncs while using the total manual function. + """ + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + ) + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="count", + margins=True, + ) + _ = acro.crosstab( + data.year, + data.grant_type, + values=data.inc_grants, + aggfunc="std", + margins=True, + ) + output = acro.results.get_index(0) + assert 8 == output.output[0].shape[1] + output_1 = acro.results.get_index(1) + output_2 = acro.results.get_index(2) + output_3 = pd.concat([output_1.output[0], output_2.output[0]], axis=1) + output_4 = (output.output[0]).droplevel(0, axis=1) + assert output_3.equals(output_4) + + def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical( + data, acro + ): + """Test the crosstab with both margins and suprression are true + and with a list of aggfuncs and list of columns while using the total manual function. + """ + _ = acro.crosstab( + data.year, + [data.grant_type, data.survivor], + values=data.inc_grants, + aggfunc=["count", "std"], + margins=True, + ) + output = acro.results.get_index(0) + assert ("G", "Dead in 2015") in output.output[0].columns + def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( data, acro, caplog ): From bceae26daaf4e34b67e141c1da20f5ac5f4123df Mon Sep 17 00:00:00 2001 From: mahaalbashir Date: Thu, 28 Sep 2023 04:56:30 +0100 Subject: [PATCH 12/14] fixing a typo --- acro/acro_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 7051eaa..b46c0be 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -832,7 +832,7 @@ def apply_suppression( outcome_df += tmp_df except TypeError: logger.warning("problem mask %s is not binary", name) - except ValueError as error: # pragma: no cove + except ValueError as error: # pragma: no cover error_message = ( f"An error occurred with the following details" f":\n Name: {name}\n Mask: {mask}\n Table: {table}" From bf28914cc4e4e4efe72fca6ce477a616f39b9634 Mon Sep 17 00:00:00 2001 From: mahaalbashir Date: Mon, 2 Oct 2023 21:12:47 +0100 Subject: [PATCH 13/14] fixing typo --- acro/acro_tables.py | 2 +- test/test_initial.py | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index b46c0be..738718c 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -1136,7 +1136,7 @@ def crosstab_with_totals( # pylint: disable=too-many-arguments,too-many-locals ) except ValueError: logger.warning( - "All the cells in this data are discolsive." + "All the cells in this data are disclosive." " Thus suppression can not be applied" ) return None diff --git a/test/test_initial.py b/test/test_initial.py index b8226c2..cfdd0cd 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -527,7 +527,7 @@ def test_surv_func(acro): def test_zeros_are_not_disclosive(data, acro): - """Test that zeros are handled as not disclosive when the parameter () is False.""" + """Test that zeros are handled as not disclosive when the parameter (zeros_are_disclosive) is False.""" acro_tables.ZEROS_ARE_DISCLOSIVE = False _ = acro.pivot_table( data, @@ -547,7 +547,7 @@ def test_zeros_are_not_disclosive(data, acro): def test_crosstab_with_totals_without_suppression(data, acro): - """Test the crosstab with margins id true and suppression is false.""" + """Test the crosstab with margins is true and suppression is false.""" acro.suppress = False _ = acro.crosstab(data.year, data.grant_type, margins=True) output = acro.results.get_index(0) @@ -559,7 +559,7 @@ def test_crosstab_with_totals_without_suppression(data, acro): def test_crosstab_with_totals_with_suppression(data, acro): - """Test the crosstab with both margins and suprression are true.""" + """Test the crosstab with both margins and suppression are true.""" _ = acro.crosstab(data.year, data.grant_type, margins=True) output = acro.results.get_index(0) assert 145 == output.output[0]["All"].iat[0] @@ -571,7 +571,7 @@ def test_crosstab_with_totals_with_suppression(data, acro): def test_crosstab_with_totals_with_suppression_herichical(data, acro): - """Test the crosstab with both margins and suprression are true.""" + """Test the crosstab with both margins and suppression are true.""" _ = acro.crosstab( [data.year, data.survivor], [data.grant_type, data.status], margins=True ) @@ -589,7 +589,7 @@ def test_crosstab_with_totals_with_suppression_herichical(data, acro): def test_crosstab_with_totals_with_suppression_with_mean(data, acro): - """Test the crosstab with both margins and suprression are true and with aggfunc mean.""" + """Test the crosstab with both margins and suppression are true and with aggfunc mean.""" _ = acro.crosstab( data.year, data.grant_type, @@ -604,8 +604,8 @@ def test_crosstab_with_totals_with_suppression_with_mean(data, acro): def test_crosstab_with_totals_and_empty_data(data, acro, caplog): - """Test the crosstab with both margins and suprression are true - and with a dataset that all its data violate one or more rule. + """Test the crosstab with both margins and suppression are true + and with a dataset that all its data violate one or more rules. """ data = data[ (data.year == 2010) @@ -620,14 +620,14 @@ def test_crosstab_with_totals_and_empty_data(data, acro, caplog): margins=True, ) assert ( - "All the cells in this data are discolsive. Thus suppression can not be applied" + "All the cells in this data are disclosive. Thus suppression can not be applied" in caplog.text ) def test_crosstab_with_manual_totals_with_suppression(data, acro): """Test the crosstab with both margins and - suprression are true while using the total manual function. + suppression are true while using the total manual function. """ _ = acro.crosstab(data.year, data.grant_type, margins=True, show_suppressed=True) output = acro.results.get_index(0) @@ -640,8 +640,8 @@ def test_crosstab_with_manual_totals_with_suppression(data, acro): def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro): - """Test the crosstab with both margins and suprression - are true with multilevel index and columns while using the total manual function. + """Test the crosstab with both margins and suppression + are true with multilevel indexes and columns while using the total manual function. """ _ = acro.crosstab( [data.year, data.survivor], @@ -663,7 +663,7 @@ def test_crosstab_with_manual_totals_with_suppression_herichical(data, acro): def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, acro): - """Test the crosstab with both margins and suprression are true + """Test the crosstab with both margins and suppression are true and with aggfunc mean while using the total manual function. """ _ = acro.crosstab( @@ -681,7 +681,7 @@ def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_mean(data, ac def test_herichical_crosstab_with_manual_totals_with_mean(data, acro): - """Test the crosstab with both margins and suprression are true,with + """Test the crosstab with both margins and suppression are true, with aggfunc mean and with multilevel columns and rows while using the total manual function. """ _ = acro.crosstab( @@ -701,7 +701,7 @@ def test_herichical_crosstab_with_manual_totals_with_mean(data, acro): def test_crosstab_with_manual_totals_with_suppression_with_aggfunc_std( data, acro, caplog ): - """Test the crosstab with both margins and suprression are true and with + """Test the crosstab with both margins and suppression are true and with aggfunc std while using the total manual function. """ _ = acro.crosstab( @@ -756,7 +756,7 @@ def test_crosstab_multiple_aggregate_function(data, acro): assert correctval == output.output[0]["mean"]["R/G"].sum(), errmsg def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro): - """Test the crosstab with both margins and suprression are true + """Test the crosstab with both margins and suppression are true and with a list of aggfuncs while using the total manual function. """ _ = acro.crosstab( @@ -791,8 +791,8 @@ def test_crosstab_with_totals_with_suppression_with_two_aggfuncs(data, acro): def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical( data, acro ): - """Test the crosstab with both margins and suprression are true - and with a list of aggfuncs and list of columns while using the total manual function. + """Test the crosstab with both margins and suppression are true + and with a list of aggfuncs and a list of columns while using the total manual function. """ _ = acro.crosstab( data.year, @@ -807,7 +807,7 @@ def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical( def test_crosstab_with_manual_totals_with_suppression_with_two_aggfunc( data, acro, caplog ): - """Test the crosstab with both margins and suprression are true + """Test the crosstab with both margins and suppression are true and with a list of aggfuncs while using the total manual function. """ _ = acro.crosstab( From 42d708ca5f87d54b808c33bc960672663b5ca613 Mon Sep 17 00:00:00 2001 From: mahaalbashir Date: Mon, 2 Oct 2023 21:18:02 +0100 Subject: [PATCH 14/14] fixing long lines --- test/test_initial.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_initial.py b/test/test_initial.py index cfdd0cd..d341ce4 100644 --- a/test/test_initial.py +++ b/test/test_initial.py @@ -527,7 +527,9 @@ def test_surv_func(acro): def test_zeros_are_not_disclosive(data, acro): - """Test that zeros are handled as not disclosive when the parameter (zeros_are_disclosive) is False.""" + """Test that zeros are handled as not disclosive when + the parameter (zeros_are_disclosive) is False. + """ acro_tables.ZEROS_ARE_DISCLOSIVE = False _ = acro.pivot_table( data, @@ -792,7 +794,8 @@ def test_crosstab_with_totals_with_suppression_with_two_aggfuncs_herichical( data, acro ): """Test the crosstab with both margins and suppression are true - and with a list of aggfuncs and a list of columns while using the total manual function. + and with a list of aggfuncs and a list of columns while using + the total manual function. """ _ = acro.crosstab( data.year,