Regarding the quantity of data which the script can handle. #520
Unanswered
counterpoint19811220
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Recently, I have three samples. One sample contains around 2300 rows, the script works. However, when I import another sample which has more than 3500 rows, the script doesn't work and has following error message. Does it mean the script has limitation of data or is there some mistake in my script? Please do me a favor and any feedback will be great helpful.
Script:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as matplotlib
from scipy import stats
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
df = pd.read_csv("D:/1/PyOD sample 2.csv")
df.plot.scatter('Location','Value')
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
df[['Location','Value']] = scaler.fit_transform(df[['Location','Value']])
df[['Location','Value']].head()
X1 = df['Location'].values.reshape(-1,1)
X2 = df['Value'].values.reshape(-1,1)
X = np.concatenate((X1,X2),axis=1)
random_state = np.random.RandomState(42)
outliers_fraction = 0.05
classifiers = {
'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction),
'Cluster-based Local Outlier Factor (CBLOF)':CBLOF(contamination=outliers_fraction,check_estimator=False, random_state=random_state),
'Feature Bagging':FeatureBagging(LOF(n_neighbors=35),contamination=outliers_fraction,check_estimator=False,random_state=random_state),
'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction),
'Isolation Forest': IForest(contamination=outliers_fraction,random_state=random_state),
'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
'Average KNN': KNN(method='mean',contamination=outliers_fraction)
}
xx , yy = np.meshgrid(np.linspace(0,1 , 200), np.linspace(0, 1, 200))
for i, (clf_name, clf) in enumerate(classifiers.items()):
clf.fit(X)
scores_pred = clf.decision_function(X) * -1
y_pred = clf.predict(X)
n_inliers = len(y_pred) - np.count_nonzero(y_pred)
n_outliers = np.count_nonzero(y_pred == 1)
plt.figure(figsize=(10, 10))
dfx = df
dfx['outlier'] = y_pred.tolist()
IX1 = np.array(dfx['Location'][dfx['outlier'] == 0]).reshape(-1,1)
IX2 = np.array(dfx['Value'][dfx['outlier'] == 0]).reshape(-1,1)
OX1 = dfx['Location'][dfx['outlier'] == 1].values.reshape(-1,1)
OX2 = dfx['Value'][dfx['outlier'] == 1].values.reshape(-1,1)
print('OUTLIERS : ',n_outliers,'INLIERS : ',n_inliers, clf_name)
threshold = stats.scoreatpercentile(scores_pred,100 * outliers_fraction)
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1
Z = Z.reshape(xx.shape)
Error message:
IndexError Traceback (most recent call last)
d:\1\PyOD V2.py in line 115
112 c = subplot.scatter(x=outliers_people, y=outliers_order, c='black',s=20, edgecolor='k')
114 subplot.axis('tight')
--> 115 subplot.legend([a.collections[0],b,c], ['learned decision function', 'inliers','outliers'],prop=matplotlib.font_manager.FontProperties(size=60),loc='upper right')
116 subplot.xlim((0, 1))
117 subplot.ylim((0, 1))
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\axes_axes.py:318, in Axes.legend(self, *args, **kwargs)
316 if len(extra_args):
317 raise TypeError('legend only accepts two non-keyword arguments')
--> 318 self.legend_ = mlegend.Legend(self, handles, labels, **kwargs)
319 self.legend_._remove_method = self.remove_legend
320 return self.legend
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib_api\deprecation.py:454, in make_keyword_only..wrapper(*args, **kwargs)
448 if len(args) > name_idx:
449 warn_deprecated(
450 since, message="Passing the %(name)s %(obj_type)s "
451 "positionally is deprecated since Matplotlib %(since)s; the "
452 "parameter will become keyword-only %(removal)s.",
453 name=name, obj_type=f"parameter of {func.name}()")
--> 454 return func(*args, **kwargs)
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\legend.py:583, in Legend.init(self, parent, handles, labels, loc, numpoints, markerscale, markerfirst, reverse, scatterpoints, scatteryoffsets, prop, fontsize, labelcolor, borderpad, labelspacing, handlelength, handleheight, handletextpad, borderaxespad, columnspacing, ncols, mode, fancybox, shadow, title, title_fontsize, framealpha, edgecolor, facecolor, bbox_to_anchor, bbox_transform, frameon, handler_map, title_fontproperties, alignment, ncol, draggable)
580 self._alignment = alignment
582 # init with null renderer
--> 583 self._init_legend_box(handles, labels, markerfirst)
585 tmp = self._loc_used_default
586 self._set_loc(loc)
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\legend.py:867, in Legend._init_legend_box(self, handles, labels, markerfirst)
864 text_list.append(textbox._text)
865 # Create the artist for the legend which represents the
866 # original artist/handle.
--> 867 handle_list.append(handler.legend_artist(self, orig_handle,
868 fontsize, handlebox))
869 handles_and_labels.append((handlebox, textbox))
871 columnbox = []
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\legend_handler.py:130, in HandlerBase.legend_artist(self, legend, orig_handle, fontsize, handlebox)
106 """
107 Return the artist that this HandlerBase generates for the given
108 original artist/handle.
(...)
123
124 """
125 xdescent, ydescent, width, height = self.adjust_drawing_area(
126 legend, orig_handle,
127 handlebox.xdescent, handlebox.ydescent,
128 handlebox.width, handlebox.height,
129 fontsize)
--> 130 artists = self.create_artists(legend, orig_handle,
131 xdescent, ydescent, width, height,
132 fontsize, handlebox.get_transform())
134 # create_artists will return a list of artists.
135 for a in artists:
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\legend_handler.py:496, in HandlerRegularPolyCollection.create_artists(self, legend, orig_handle, xdescent, ydescent, width, height, fontsize, trans)
490 ydata = self.get_ydata(legend, xdescent, ydescent,
491 width, height, fontsize)
493 sizes = self.get_sizes(legend, orig_handle, xdescent, ydescent,
494 width, height, fontsize)
--> 496 p = self.create_collection(
497 orig_handle, sizes,
498 offsets=list(zip(xdata_marker, ydata)), offset_transform=trans)
500 self.update_prop(p, orig_handle, legend)
501 p.set_offset_transform(trans)
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib_api\deprecation.py:297, in rename_parameter..wrapper(*args, **kwargs)
292 warn_deprecated(
293 since, message=f"The {old!r} parameter of {func.name}() "
294 f"has been renamed {new!r} since Matplotlib {since}; support "
295 f"for the old name will be dropped %(removal)s.")
296 kwargs[new] = kwargs.pop(old)
--> 297 return func(*args, **kwargs)
File c:\Users\z004eeud\AppData\Local\Programs\Python\Python311\Lib\site-packages\matplotlib\legend_handler.py:511, in HandlerPathCollection.create_collection(self, orig_handle, sizes, offsets, offset_transform)
508 @_api.rename_parameter("3.6", "transOffset", "offset_transform")
509 def create_collection(self, orig_handle, sizes, offsets, offset_transform):
510 return type(orig_handle)(
--> 511 [orig_handle.get_paths()[0]], sizes=sizes,
512 offsets=offsets, offset_transform=offset_transform,
513 )
IndexError: list index out of range
Beta Was this translation helpful? Give feedback.
All reactions