updated

CFZhai · Nov 15, 2024 · 71dfeab · 71dfeab
1 parent 040f0d4
commit 71dfeab
Show file tree

Hide file tree

Showing 21 changed files with 730 additions and 4,924 deletions.
diff --git a/content/call-center.csv b/content/call-center.csv
diff --git a/content/call_center_anomaly.ipynb b/content/call_center_anomaly.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "66907701-1675-4ef5-ac61-a88a436e5b16",
+   "metadata": {},
+   "source": [
+    "# Case 1 - Call Center Staffing Analytics - Detecting Favoritism using Machine Learning\n",
+    "## Situation: \n",
+    "A call center operation was under close scrutiny for uneven performance,\n",
+    "shoddy operations and low employee morale. There was a rumor floating around that\n",
+    "the call center manager was engaging in favoritism, that certain employees were given\n",
+    "unfairly easy working conditions, but no one was able to present a convincing case\n",
+    "against the manager.\n",
+    "## Complication: \n",
+    "Some even went so far as to accuse the manager of nepotism --\n",
+    "implying that the workers being given extra sweet deals were those that were related to\n",
+    "the manager. The case was before a judge who, given the seriousness of the case,\n",
+    "asked for hard evidence.\n",
+    "## Key question:\n",
+    "Can we use machine learning to ‘objectively’ identify whether there is\n",
+    "any hard evidence to prove that certain employees were being treated in a\n",
+    "systematically different way than others. Can we do this using not one, but multiple\n",
+    "dimensions, together?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4d3b2e4-8015-455f-8b10-9dad7beae687",
+   "metadata": {},
+   "source": [
+    "# start your analysis "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f1efa156-e449-4e40-a2b2-a580df2bbe05",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.neighbors import LocalOutlierFactor\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "dea8b335-e502-4b51-a825-f914c29d752e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Employee ID</th>\n",
+       "      <th>Avg Tix / Day</th>\n",
+       "      <th>Customer rating</th>\n",
+       "      <th>Tardies</th>\n",
+       "      <th>Graveyard Shifts Taken</th>\n",
+       "      <th>Weekend Shifts Taken</th>\n",
+       "      <th>Sick Days Taken</th>\n",
+       "      <th>% Sick Days Taken on Friday</th>\n",
+       "      <th>Employee Dev. Hours</th>\n",
+       "      <th>Shift Swaps Requested</th>\n",
+       "      <th>Shift Swaps Offered</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "      <td>400.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>137946.037500</td>\n",
+       "      <td>156.085750</td>\n",
+       "      <td>3.495150</td>\n",
+       "      <td>1.465000</td>\n",
+       "      <td>1.985000</td>\n",
+       "      <td>0.952500</td>\n",
+       "      <td>1.875000</td>\n",
+       "      <td>35.220000</td>\n",
+       "      <td>11.970000</td>\n",
+       "      <td>1.447500</td>\n",
+       "      <td>1.760000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>4240.877417</td>\n",
+       "      <td>4.416638</td>\n",
+       "      <td>0.461497</td>\n",
+       "      <td>0.972697</td>\n",
+       "      <td>0.794577</td>\n",
+       "      <td>0.548631</td>\n",
+       "      <td>1.673732</td>\n",
+       "      <td>39.295061</td>\n",
+       "      <td>7.470852</td>\n",
+       "      <td>0.999872</td>\n",
+       "      <td>1.812626</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>130564.000000</td>\n",
+       "      <td>143.100000</td>\n",
+       "      <td>2.070000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>134401.500000</td>\n",
+       "      <td>153.075000</td>\n",
+       "      <td>3.210000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>137906.500000</td>\n",
+       "      <td>156.050000</td>\n",
+       "      <td>3.505000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>12.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>141771.250000</td>\n",
+       "      <td>159.100000</td>\n",
+       "      <td>3.810000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>67.000000</td>\n",
+       "      <td>17.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>145176.000000</td>\n",
+       "      <td>168.700000</td>\n",
+       "      <td>4.810000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>100.000000</td>\n",
+       "      <td>34.000000</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>9.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         Employee ID  Avg Tix / Day  Customer rating     Tardies  \\\n",
+       "count     400.000000     400.000000       400.000000  400.000000   \n",
+       "mean   137946.037500     156.085750         3.495150    1.465000   \n",
+       "std      4240.877417       4.416638         0.461497    0.972697   \n",
+       "min    130564.000000     143.100000         2.070000    0.000000   \n",
+       "25%    134401.500000     153.075000         3.210000    1.000000   \n",
+       "50%    137906.500000     156.050000         3.505000    1.000000   \n",
+       "75%    141771.250000     159.100000         3.810000    2.000000   \n",
+       "max    145176.000000     168.700000         4.810000    4.000000   \n",
+       "\n",
+       "       Graveyard Shifts Taken  Weekend Shifts Taken  Sick Days Taken  \\\n",
+       "count              400.000000            400.000000       400.000000   \n",
+       "mean                 1.985000              0.952500         1.875000   \n",
+       "std                  0.794577              0.548631         1.673732   \n",
+       "min                  0.000000              0.000000         0.000000   \n",
+       "25%                  1.000000              1.000000         0.000000   \n",
+       "50%                  2.000000              1.000000         2.000000   \n",
+       "75%                  2.000000              1.000000         3.000000   \n",
+       "max                  4.000000              2.000000         7.000000   \n",
+       "\n",
+       "       % Sick Days Taken on Friday  Employee Dev. Hours  \\\n",
+       "count                   400.000000           400.000000   \n",
+       "mean                     35.220000            11.970000   \n",
+       "std                      39.295061             7.470852   \n",
+       "min                       0.000000             0.000000   \n",
+       "25%                       0.000000             6.000000   \n",
+       "50%                      25.000000            12.000000   \n",
+       "75%                      67.000000            17.000000   \n",
+       "max                     100.000000            34.000000   \n",
+       "\n",
+       "       Shift Swaps Requested  Shift Swaps Offered  \n",
+       "count             400.000000           400.000000  \n",
+       "mean                1.447500             1.760000  \n",
+       "std                 0.999872             1.812626  \n",
+       "min                 0.000000             0.000000  \n",
+       "25%                 1.000000             0.000000  \n",
+       "50%                 1.000000             1.000000  \n",
+       "75%                 2.000000             3.000000  \n",
+       "max                 5.000000             9.000000  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"call-center.csv\")\n",
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c792038-fc69-4471-bbd8-e4cd5f033faf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize the data\n",
+    "scaler = StandardScaler()\n",
+    "call_center_scaled = pd.DataFrame(scaler.fit_transform(call_center.iloc[:, 1:11]), columns=call_center.columns[1:11])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48dbc8e2-d38d-41fd-b8bb-3ffce9978336",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate the outlier scores using LocalOutlierFactor (similar to LOF in R)\n",
+    "lof = LocalOutlierFactor(n_neighbors=5)\n",
+    "outlier_scores = -lof.fit_predict(call_center_scaled)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f94b9b53-ccfc-4368-9963-059bbad3b623",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot score density\n",
+    "sns.kdeplot(outlier_scores)\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f36ca55e-3624-4a4f-ad27-e271c9ce796f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Filter rows with high outlier scores\n",
+    "outliers = call_center[outlier_scores > 1.5]\n",
+    "print(outliers)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}