generated from jupyterlite/demo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
730 additions
and
4,924 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,329 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "66907701-1675-4ef5-ac61-a88a436e5b16", | ||
"metadata": {}, | ||
"source": [ | ||
"# Case 1 - Call Center Staffing Analytics - Detecting Favoritism using Machine Learning\n", | ||
"## Situation: \n", | ||
"A call center operation was under close scrutiny for uneven performance,\n", | ||
"shoddy operations and low employee morale. There was a rumor floating around that\n", | ||
"the call center manager was engaging in favoritism, that certain employees were given\n", | ||
"unfairly easy working conditions, but no one was able to present a convincing case\n", | ||
"against the manager.\n", | ||
"## Complication: \n", | ||
"Some even went so far as to accuse the manager of nepotism --\n", | ||
"implying that the workers being given extra sweet deals were those that were related to\n", | ||
"the manager. The case was before a judge who, given the seriousness of the case,\n", | ||
"asked for hard evidence.\n", | ||
"## Key question:\n", | ||
"Can we use machine learning to ‘objectively’ identify whether there is\n", | ||
"any hard evidence to prove that certain employees were being treated in a\n", | ||
"systematically different way than others. Can we do this using not one, but multiple\n", | ||
"dimensions, together?" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "a4d3b2e4-8015-455f-8b10-9dad7beae687", | ||
"metadata": {}, | ||
"source": [ | ||
"# start your analysis " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "f1efa156-e449-4e40-a2b2-a580df2bbe05", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from sklearn.preprocessing import StandardScaler\n", | ||
"from sklearn.neighbors import LocalOutlierFactor\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import seaborn as sns\n", | ||
"\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "dea8b335-e502-4b51-a825-f914c29d752e", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Employee ID</th>\n", | ||
" <th>Avg Tix / Day</th>\n", | ||
" <th>Customer rating</th>\n", | ||
" <th>Tardies</th>\n", | ||
" <th>Graveyard Shifts Taken</th>\n", | ||
" <th>Weekend Shifts Taken</th>\n", | ||
" <th>Sick Days Taken</th>\n", | ||
" <th>% Sick Days Taken on Friday</th>\n", | ||
" <th>Employee Dev. Hours</th>\n", | ||
" <th>Shift Swaps Requested</th>\n", | ||
" <th>Shift Swaps Offered</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>count</th>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" <td>400.000000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>mean</th>\n", | ||
" <td>137946.037500</td>\n", | ||
" <td>156.085750</td>\n", | ||
" <td>3.495150</td>\n", | ||
" <td>1.465000</td>\n", | ||
" <td>1.985000</td>\n", | ||
" <td>0.952500</td>\n", | ||
" <td>1.875000</td>\n", | ||
" <td>35.220000</td>\n", | ||
" <td>11.970000</td>\n", | ||
" <td>1.447500</td>\n", | ||
" <td>1.760000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>std</th>\n", | ||
" <td>4240.877417</td>\n", | ||
" <td>4.416638</td>\n", | ||
" <td>0.461497</td>\n", | ||
" <td>0.972697</td>\n", | ||
" <td>0.794577</td>\n", | ||
" <td>0.548631</td>\n", | ||
" <td>1.673732</td>\n", | ||
" <td>39.295061</td>\n", | ||
" <td>7.470852</td>\n", | ||
" <td>0.999872</td>\n", | ||
" <td>1.812626</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>min</th>\n", | ||
" <td>130564.000000</td>\n", | ||
" <td>143.100000</td>\n", | ||
" <td>2.070000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>25%</th>\n", | ||
" <td>134401.500000</td>\n", | ||
" <td>153.075000</td>\n", | ||
" <td>3.210000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" <td>6.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>0.000000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>50%</th>\n", | ||
" <td>137906.500000</td>\n", | ||
" <td>156.050000</td>\n", | ||
" <td>3.505000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>25.000000</td>\n", | ||
" <td>12.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>75%</th>\n", | ||
" <td>141771.250000</td>\n", | ||
" <td>159.100000</td>\n", | ||
" <td>3.810000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>1.000000</td>\n", | ||
" <td>3.000000</td>\n", | ||
" <td>67.000000</td>\n", | ||
" <td>17.000000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>3.000000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>max</th>\n", | ||
" <td>145176.000000</td>\n", | ||
" <td>168.700000</td>\n", | ||
" <td>4.810000</td>\n", | ||
" <td>4.000000</td>\n", | ||
" <td>4.000000</td>\n", | ||
" <td>2.000000</td>\n", | ||
" <td>7.000000</td>\n", | ||
" <td>100.000000</td>\n", | ||
" <td>34.000000</td>\n", | ||
" <td>5.000000</td>\n", | ||
" <td>9.000000</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Employee ID Avg Tix / Day Customer rating Tardies \\\n", | ||
"count 400.000000 400.000000 400.000000 400.000000 \n", | ||
"mean 137946.037500 156.085750 3.495150 1.465000 \n", | ||
"std 4240.877417 4.416638 0.461497 0.972697 \n", | ||
"min 130564.000000 143.100000 2.070000 0.000000 \n", | ||
"25% 134401.500000 153.075000 3.210000 1.000000 \n", | ||
"50% 137906.500000 156.050000 3.505000 1.000000 \n", | ||
"75% 141771.250000 159.100000 3.810000 2.000000 \n", | ||
"max 145176.000000 168.700000 4.810000 4.000000 \n", | ||
"\n", | ||
" Graveyard Shifts Taken Weekend Shifts Taken Sick Days Taken \\\n", | ||
"count 400.000000 400.000000 400.000000 \n", | ||
"mean 1.985000 0.952500 1.875000 \n", | ||
"std 0.794577 0.548631 1.673732 \n", | ||
"min 0.000000 0.000000 0.000000 \n", | ||
"25% 1.000000 1.000000 0.000000 \n", | ||
"50% 2.000000 1.000000 2.000000 \n", | ||
"75% 2.000000 1.000000 3.000000 \n", | ||
"max 4.000000 2.000000 7.000000 \n", | ||
"\n", | ||
" % Sick Days Taken on Friday Employee Dev. Hours \\\n", | ||
"count 400.000000 400.000000 \n", | ||
"mean 35.220000 11.970000 \n", | ||
"std 39.295061 7.470852 \n", | ||
"min 0.000000 0.000000 \n", | ||
"25% 0.000000 6.000000 \n", | ||
"50% 25.000000 12.000000 \n", | ||
"75% 67.000000 17.000000 \n", | ||
"max 100.000000 34.000000 \n", | ||
"\n", | ||
" Shift Swaps Requested Shift Swaps Offered \n", | ||
"count 400.000000 400.000000 \n", | ||
"mean 1.447500 1.760000 \n", | ||
"std 0.999872 1.812626 \n", | ||
"min 0.000000 0.000000 \n", | ||
"25% 1.000000 0.000000 \n", | ||
"50% 1.000000 1.000000 \n", | ||
"75% 2.000000 3.000000 \n", | ||
"max 5.000000 9.000000 " | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df = pd.read_csv(\"call-center.csv\")\n", | ||
"df.describe()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7c792038-fc69-4471-bbd8-e4cd5f033faf", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Normalize the data\n", | ||
"scaler = StandardScaler()\n", | ||
"call_center_scaled = pd.DataFrame(scaler.fit_transform(call_center.iloc[:, 1:11]), columns=call_center.columns[1:11])\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "48dbc8e2-d38d-41fd-b8bb-3ffce9978336", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Calculate the outlier scores using LocalOutlierFactor (similar to LOF in R)\n", | ||
"lof = LocalOutlierFactor(n_neighbors=5)\n", | ||
"outlier_scores = -lof.fit_predict(call_center_scaled)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "f94b9b53-ccfc-4368-9963-059bbad3b623", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Plot score density\n", | ||
"sns.kdeplot(outlier_scores)\n", | ||
"plt.show()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "f36ca55e-3624-4a4f-ad27-e271c9ce796f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Filter rows with high outlier scores\n", | ||
"outliers = call_center[outlier_scores > 1.5]\n", | ||
"print(outliers)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.