Output data scripts and documentation

iai-group · Jan 30, 2024 · 44110e8 · 44110e8
1 parent 243eb27
commit 44110e8
Show file tree

Hide file tree

Showing 7 changed files with 3,455 additions and 15 deletions.
diff --git a/results/user_study_output/README.md b/results/user_study_output/README.md
@@ -0,0 +1,5 @@
+# User Study Output
+
+This directory contains data collected in our study. Raw output from Amazon Mechanical Turk aggregated for all the HITs can be found [here](mturk_output.csv). Processed data can be found [here](output_processed.csv) and data presented on response instead of HIT level can be found [here](output_processed_aggregated.csv).
+
+We used [this script](../../scripts/results_processing.py) for processing MTurk output and [this](../../scripts/results_aggregation.py) for aggregating the results on response level.
diff --git a/results/user_study_output/mturk_output.csv b/results/user_study_output/mturk_output.csv
diff --git a/results/user_study_output/output_processed.csv b/results/user_study_output/output_processed.csv
diff --git a/results/user_study_output/output_processed_aggregated.csv b/results/user_study_output/output_processed_aggregated.csv
diff --git a/scripts/input_processing.py b/scripts/input_processing.py
@@ -8,18 +8,7 @@
 
     question_set_data_mturk_dict_final = []
 
-    for experimental_condition in [
-        "1",
-        "2",
-        "3",
-        "4",
-        "5",
-        "6",
-        "7",
-        "8",
-        "9",
-        "10",
-    ]:
+    for experimental_condition in [str(i) for i in range(1,11)]:
         question_set = ["EC" + experimental_condition] * 10
 
         responses_to_add = []
@@ -157,9 +146,9 @@
         )
 
         question_set_data = question_set_data.sample(frac=1)
-        question_set_data.to_csv(
-            "qs" + experimental_condition + ".csv", index=False
-        )
+        # question_set_data.to_csv(
+        #     "qs" + experimental_condition + ".csv", index=False
+        # )
 
         question_set_data_mturk_dict = {}
 

diff --git a/scripts/results_aggregation.py b/scripts/results_aggregation.py
@@ -0,0 +1,95 @@
+"""Script for aggregating the output data from the user study."""
+import ast
+
+import pandas as pd
+
+if __name__ == "__main__":
+    output = pd.read_csv("results/user_study_output/output_processed.csv")
+
+    presentation_modes = []
+    response_quality = []
+    additional_info_quality = []
+
+    for id, row in output.iterrows():
+        if row["answers_ids"] in ["EC" + str(i) for i in [1, 3, 5, 7]]:
+            presentation_modes.append("V")
+        elif row["answers_ids"] in ["EC" + str(i) for i in [2, 4, 6, 8]]:
+            presentation_modes.append("T")
+        else:
+            presentation_modes.append("None")
+
+        if row["answers_ids"] in ["EC" + str(i) for i in [1, 2, 5, 6, 9]]:
+            response_quality.append("Good")
+        else:
+            response_quality.append("Flawed")
+
+        if row["answers_ids"] in ["EC" + str(i) for i in [1, 2, 3, 4]]:
+            additional_info_quality.append("Good")
+        elif row["answers_ids"] in ["EC" + str(i) for i in [5, 6, 7, 8]]:
+            additional_info_quality.append("Flawed")
+        else:
+            additional_info_quality.append("None")
+
+    output["presentation_modes"] = presentation_modes
+    output["response_quality"] = response_quality
+    output["additional_info_quality"] = additional_info_quality
+
+    metrics = [
+        "familiarity",
+        "interest",
+        "search_prob",
+        "relevance",
+        "correctness",
+        "completeness",
+        "comprehensiveness",
+        "conciseness",
+        "serendipity",
+        "coherence",
+        "factuality",
+        "fairness",
+        "readability",
+        "satisfaction",
+        "usefulness",
+    ]
+
+    aggregated_data = pd.DataFrame(columns=output.columns)
+
+    for id, row in output.iterrows():
+        for annotation_id in range(0, 16):
+            new_row = row.copy(deep=True)
+            for metric in metrics + [
+                "worker_ids",
+                "summary_result",
+                "explanation",
+                "age",
+                "gender",
+                "education",
+                "conversational_frequency",
+                "voice_frequency",
+                "source_usefulness",
+                "warning_usefulness",
+                "confidence_usefulness",
+                "source_usefulness_explanation",
+                "warning_usefulness_explanation",
+                "confidence_usefulness_explanation",
+            ]:
+                new_row[metric] = list(ast.literal_eval(row[metric]))[
+                    annotation_id
+                ]
+            # aggregated_data = aggregated_data.append(new_row, ignore_index = True)
+            aggregated_data = pd.concat(
+                [aggregated_data, pd.DataFrame([new_row])], ignore_index=True
+            )
+
+    aggregated_data = aggregated_data.sort_values(
+        ["questions_ids", "answers_ids"], ascending=[True, True]
+    )
+
+    for metric in metrics:
+        aggregated_data[metric] = aggregated_data[metric].apply(
+            lambda xn: int(xn)
+        )
+
+    aggregated_data.to_csv(
+        "results/user_study_output/output_processed_aggregated.csv", index=False
+    )