finalized the code for the interactive charts and added README for data

rlskoeser · May 10, 2024 · 597c45f · 597c45f
1 parent 67e9b56
commit 597c45f
Show file tree

Hide file tree

Showing 21 changed files with 393,992 additions and 392,613 deletions.
diff --git a/interactive_charts/InteractiveRecommendationsChart.ipynb b/interactive_charts/InteractiveRecommendationsChart.ipynb
diff --git a/interactive_charts/html_figures/figure14_chart.html b/interactive_charts/html_figures/figure14_chart.html
diff --git a/speculative_reading/CollaborativeFilteringRecommendations.ipynb b/speculative_reading/CollaborativeFilteringRecommendations.ipynb
@@ -816,21 +816,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Deduping final scores: (1740, 20) -> (946, 21)\n",
-      "Deduping final scores: (2601, 20) -> (1415, 21)\n",
-      "Deduping final scores: (244, 20) -> (60, 21)\n",
-      "Deduping final scores: (276, 20) -> (100, 21)\n",
-      "Deduping final scores: (1988, 20) -> (1097, 21)\n",
-      "Deduping final scores: (2990, 20) -> (1648, 21)\n",
-      "Deduping final scores: (60, 20) -> (60, 21)\n",
-      "Deduping final scores: (102, 20) -> (102, 21)\n"
+      "Deduping final scores: (1740, 19) -> (946, 20)\n",
+      "Deduping final scores: (2601, 19) -> (1415, 20)\n",
+      "Deduping final scores: (244, 19) -> (60, 20)\n",
+      "Deduping final scores: (276, 19) -> (100, 20)\n",
+      "Deduping final scores: (1988, 19) -> (1097, 20)\n",
+      "Deduping final scores: (2990, 19) -> (1648, 20)\n",
+      "Deduping final scores: (60, 19) -> (60, 20)\n",
+      "Deduping final scores: (102, 19) -> (102, 20)\n"
      ]
     }
    ],
@@ -880,7 +880,8 @@
     "\tfinal_scores = pd.merge(mode_scores[['member_id', 'period', 'item_uri', 'formatted_table_title', 'formatted_chart_title', 'mode_score', 'mode_zscore', 'subscription_start', 'subscription_end']], scores_df, on=['member_id', 'period', 'item_uri', 'formatted_table_title', 'formatted_chart_title', 'subscription_start', 'subscription_end'])\n",
     "\n",
     "\t# Drop duplicate rows from 'final_scores' to get a DataFrame of unique scores.\n",
-    "\tfinal_scores_dedup = final_scores.drop_duplicates(final_scores.columns.difference(['metric']))\n",
+    "\tfinal_scores = final_scores.drop(columns=['metric'])\n",
+    "\tfinal_scores_dedup = final_scores.drop_duplicates()\n",
     "\n",
     "\t# Calculate the coefficient of variation (standard deviation divided by median) for each score in 'final_scores_dedup'.\n",
     "\t# Add the coefficient of variation to 'final_scores_dedup' as a new column.\n",
@@ -900,13 +901,6 @@
     "aggregated_formatted_predictions_top200_limit_circulation_with_periodicals = aggregate_predictions(formatted_predictions_top200_limit_circulation_with_periodicals, './data/collaborative_filtering_results/aggregated_top200_predictions_with_periodicals_circulation_limited.csv')\n",
     "aggregated_formatted_predictions_top200_with_periodicals = aggregate_predictions(formatted_predictions_top200_with_periodicals, './data/collaborative_filtering_results/aggregated_top200_predictions_with_periodicals.csv')"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/speculative_reading/LenskitRecommendations.ipynb b/speculative_reading/LenskitRecommendations.ipynb
diff --git a/speculative_reading/data/README.md b/speculative_reading/data/README.md
@@ -2,4 +2,25 @@
 
 This folder contains data files used in the analysis of the "Speculative Reading" section of the "Missing Data, Speculative Reading" article. 
 
-## Contents
+## Contents
+
+### Lenskit Results
+
+This folder holds csv files containing the results of the Lenskit analysis. The files are named with the following experiment names:
+
+- `with_periodicals` or `without_periodicals` to indicate whether the model includes periodicals.
+- `comparison_model_runs` are the results of the comparison model runs to validate the lenskit model.
+- `sampled` indicates selecting the top recommendations from the model.
+- `full` indicates selecting all recommendations from the model.
+- `aggregated` are combined results from all runs of the model.
+- `popularity` are the top popularity results.
+
+### Collaborative Filtering Results
+
+This folder holds csv files containing the results of the collaborative filtering analysis. The files are named with the following experiment names:
+
+- `with_periodicals` or `without_periodicals` to indicate whether the model includes periodicals.
+- `full` indicates selecting all recommendations from the model.
+- `top200` indicates selecting the top 200 recommendations from the model.
+- `aggregate` are combined results from all runs of the model.
+- `circulation limited` are the results when we limit to the circulation window.
diff --git a/...ing/data/collaborative_filtering_results/aggregated_full_predictions_with_periodicals.csv b/...ing/data/collaborative_filtering_results/aggregated_full_predictions_with_periodicals.csv
diff --git a/...ve_filtering_results/aggregated_full_predictions_with_periodicals_circulation_limited.csv b/...ve_filtering_results/aggregated_full_predictions_with_periodicals_circulation_limited.csv
diff --git a/.../data/collaborative_filtering_results/aggregated_full_predictions_without_periodicals.csv b/.../data/collaborative_filtering_results/aggregated_full_predictions_without_periodicals.csv
diff --git a/...filtering_results/aggregated_full_predictions_without_periodicals_circulation_limited.csv b/...filtering_results/aggregated_full_predictions_without_periodicals_circulation_limited.csv
diff --git a/...g/data/collaborative_filtering_results/aggregated_top200_predictions_with_periodicals.csv b/...g/data/collaborative_filtering_results/aggregated_top200_predictions_with_periodicals.csv
diff --git a/..._filtering_results/aggregated_top200_predictions_with_periodicals_circulation_limited.csv b/..._filtering_results/aggregated_top200_predictions_with_periodicals_circulation_limited.csv
diff --git a/...ata/collaborative_filtering_results/aggregated_top200_predictions_without_periodicals.csv b/...ata/collaborative_filtering_results/aggregated_top200_predictions_without_periodicals.csv
diff --git a/...ltering_results/aggregated_top200_predictions_without_periodicals_circulation_limited.csv b/...ltering_results/aggregated_top200_predictions_without_periodicals_circulation_limited.csv
diff --git a/...ve_reading/data/lenskit_results/aggregated_full_predictions_model100_with_periodicals.csv b/...ve_reading/data/lenskit_results/aggregated_full_predictions_model100_with_periodicals.csv
diff --git a/...reading/data/lenskit_results/aggregated_full_predictions_model100_without_periodicals.csv b/...reading/data/lenskit_results/aggregated_full_predictions_model100_without_periodicals.csv
diff --git a/...reading/data/lenskit_results/aggregated_sampled_predictions_model100_with_periodicals.csv b/...reading/data/lenskit_results/aggregated_sampled_predictions_model100_with_periodicals.csv
diff --git a/...ding/data/lenskit_results/aggregated_sampled_predictions_model100_without_periodicals.csv b/...ding/data/lenskit_results/aggregated_sampled_predictions_model100_without_periodicals.csv
diff --git a/speculative_reading/data/lenskit_results/full_predictions_model100_with_periodicals.csv b/speculative_reading/data/lenskit_results/full_predictions_model100_with_periodicals.csv