diff --git a/projects/customer_segments/customer_segments.ipynb b/projects/customer_segments/customer_segments.ipynb old mode 100755 new mode 100644 index abb9a9fb07..1ddd480da2 --- a/projects/customer_segments/customer_segments.ipynb +++ b/projects/customer_segments/customer_segments.ipynb @@ -36,9 +36,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Import libraries necessary for this project\n", @@ -56,9 +54,9 @@ "try:\n", " data = pd.read_csv(\"customers.csv\")\n", " data.drop(['Region', 'Channel'], axis = 1, inplace = True)\n", - " print \"Wholesale customers dataset has {} samples with {} features each.\".format(*data.shape)\n", + " print(\"Wholesale customers dataset has {} samples with {} features each.\".format(*data.shape))\n", "except:\n", - " print \"Dataset could not be loaded. Is the dataset missing?\"" + " print(\"Dataset could not be loaded. Is the dataset missing?\")" ] }, { @@ -104,7 +102,7 @@ "\n", "# Create a DataFrame of the chosen samples\n", "samples = pd.DataFrame(data.loc[indices], columns = data.keys()).reset_index(drop = True)\n", - "print \"Chosen samples of wholesale customers dataset:\"\n", + "print(\"Chosen samples of wholesale customers dataset:\")\n", "display(samples)" ] }, @@ -330,7 +328,7 @@ " step = None\n", " \n", " # Display the outliers\n", - " print \"Data points considered outliers for the feature '{}':\".format(feature)\n", + " print(\"Data points considered outliers for the feature '{}':\".format(feature))\n", " display(log_data[~((log_data[feature] >= Q1 - step) & (log_data[feature] <= Q3 + step))])\n", " \n", "# OPTIONAL: Select the indices for data points you wish to remove\n", @@ -704,7 +702,7 @@ "source": [ "# Display the predictions\n", "for i, pred in enumerate(sample_preds):\n", - " print \"Sample point\", i, \"predicted to be in Cluster\", pred" + " print(\"Sample point\", i, \"predicted to be in Cluster\", pred)" ] }, { @@ -820,21 +818,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.6.2" } }, "nbformat": 4, diff --git a/projects/customer_segments/visuals.py b/projects/customer_segments/visuals.py index fc73962cf6..466e8c48c9 100644 --- a/projects/customer_segments/visuals.py +++ b/projects/customer_segments/visuals.py @@ -25,7 +25,7 @@ def pca_results(good_data, pca): dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)] # PCA components - components = pd.DataFrame(np.round(pca.components_, 4), columns = good_data.keys()) + components = pd.DataFrame(np.round(pca.components_, 4), columns = list(good_data.keys())) components.index = dimensions # PCA explained variance @@ -132,7 +132,7 @@ def channel_results(reduced_data, outliers, pca_samples): try: full_data = pd.read_csv("customers.csv") except: - print "Dataset could not be loaded. Is the file missing?" + print("Dataset could not be loaded. Is the file missing?") return False # Create the Channel DataFrame