diff --git a/notebooks/acro_demo.py b/notebooks/acro_demo.py index 8aaa7ec..8c46a7f 100644 --- a/notebooks/acro_demo.py +++ b/notebooks/acro_demo.py @@ -5,6 +5,7 @@ # import libraries import os + import pandas as pd from scipy.io.arff import loadarff @@ -19,12 +20,12 @@ acro = ACRO(suppress=False) # Load test data -# The dataset used in this notebook is the nursery dataset from OpenML. -# - In this version, the data can be read directly from the local machine after it has been downloaded. +# The dataset used in this notebook is the nursery dataset from OpenML. +# - In this version, the data can be read directly from the local machine after it has been downloaded. # - The code below reads the data from a folder called "data" which we assume is at the same level as the folder where you are working. # - The path might need to be changed if the data has been downloaded and stored elsewhere. -# - for example use: -# path = os.path.join("data", "nursery.arff") +# - for example use: +# path = os.path.join("data", "nursery.arff") # if the data is in a sub-folder of your work folder path = os.path.join("../data", "nursery.arff") @@ -36,12 +37,12 @@ df.head() # Examples of producing tabular output -# We rely on the industry-standard package **pandas** for tabulating data. +# We rely on the industry-standard package **pandas** for tabulating data. # In the next few examples we show: # - first, how a researcher would normally make a call in pandas, saving the results in a variable that they can view on screen (or save to file?) # - then how the call is identical in SACRO, except that: # - "pd" is replaced by "acro" -# - the researcher immediately sees a copy of what the TRE output checker will see. +# - the researcher immediately sees a copy of what the TRE output checker will see. print( "\nThese examples show acro wrappers around " @@ -50,7 +51,7 @@ # Pandas crosstab -# This is an example of crosstab using pandas. +# This is an example of crosstab using pandas. # We first make the call, then the second line print the outputs to screen. print("\nCalling crosstab of recommendation by parents using pandas") @@ -58,20 +59,18 @@ print(table) # ACRO crosstab -# - This is an example of crosstab using ACRO. +# - This is an example of crosstab using ACRO. # - The INFO lines show the researcher what will be reported to the output checkers. # - Then the (suppressed as necessary) table is shown via the print command as before. print("\nNow the same crosstab call using the ACRO interface") -safe_table = acro.crosstab( - df.recommend, df.parents -) +safe_table = acro.crosstab(df.recommend, df.parents) print("\nand this is the researchers output") print(safe_table) # ACRO crosstab with suppression # - This is an example of crosstab with suppressing the cells that violate the disclosure tests. -# - Note that you need to change the value of the suppress variable in the acro object to True. Then run the crosstab command. +# - Note that you need to change the value of the suppress variable in the acro object to True. Then run the crosstab command. # - If you wish to continue the research while suppressing the outputs, leave the suppress variable as it is, otherwise turn it off. print("\nTurn on the suppression variable") @@ -84,7 +83,7 @@ acro.suppress = False # ACRO functionality to let users manage their outputs -# +# # 1: List current ACRO outputs # This is an example of using the print_output function to list all the outputs created so far @@ -95,10 +94,10 @@ ) acro.print_outputs() -# 2: Remove some ACRO outputs before finalising -# This is an example of deleting some of the ACRO outputs. -# The name of the output that needs to be removed should be passed to the function remove_output. -# - The output name can be taken from the outputs listed by the print_outputs function, +# 2: Remove some ACRO outputs before finalising +# This is an example of deleting some of the ACRO outputs. +# The name of the output that needs to be removed should be passed to the function remove_output. +# - The output name can be taken from the outputs listed by the print_outputs function, # - or by listing the results and choosing the specific output that needs to be removed print("\nNow removing the first output") @@ -111,16 +110,16 @@ acro.rename_output("output_1", "cross_tabulation") # 4: Add a comment to output -# This is an example to add a comment to outputs. +# This is an example to add a comment to outputs. # It can be used to provide a description or to pass additional information to the output checkers. print("\nUsers can add comments which the output checkers will see.") acro.add_comments("cross_tabulation", "Please let me have this data.") # 5: (the big one) Finalise ACRO -# This is an example of the function _finalise()_ which the users must call at the end of each session. -# - It takes each output and saves it to a CSV file. -# - It also saves the SDC analysis for each output to a json file or Excel file +# This is an example of the function _finalise()_ which the users must call at the end of each session. +# - It takes each output and saves it to a CSV file. +# - It also saves the SDC analysis for each output to a json file or Excel file # (depending on the extension of the name of the file provided as an input to the function) print( @@ -128,5 +127,3 @@ " If they don't, the SDC analysis, and their outputs, are lost." ) output = acro.finalise("Examples", "json") - -