From 319328c0cab25335ef72122ec2a6b2fc1484e3e5 Mon Sep 17 00:00:00 2001 From: Yonatan Tarazona Coronel Date: Mon, 20 Mar 2023 14:45:41 -0500 Subject: [PATCH] confusion matrix by estimated proportions of area --- scikeo/process.py | 129 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/scikeo/process.py b/scikeo/process.py index fa9cafc..19a72ef 100644 --- a/scikeo/process.py +++ b/scikeo/process.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # + import os +import copy import fiona import rasterio import rasterio.mask @@ -152,3 +153,131 @@ def extract(image, shp): raise TypeError('DataFrame contains nan values. Check it out') return join_df + + +def confintervalML(matrix, image_predicted, pixel_size = 10, conf = 1.96): + + '''The error matrix is a simple cross-tabulation of the class labels allocated by the classification of the remotely + sensed data against the reference data for the sample sites. The error matrix organizes the acquired sample data + in a way that summarizes key results and aids the quantification of accuracy and area. The main diagonal of the error + matrix highlights correct classifications while the off-diagonal elements show omission and commission errors. + The cell entries and marginal values of the error matrix are fundamental to both accuracy assessment and area + estimation. The cell entries of the population error matrix and the parameters derived from it must be estimated + from a sample. This function shows how to obtain a confusion matrix by estimated proportions of area with a confidence + interval at 95% (1.96). + + This function supports DataFrame (as a confusion matrix) and an image classified in array as input. + + + Parameters: + + matrix: confusion matrix or error matrix in DataFrame + + image_predicted: Array with 2d. This array should be the image classified with classes. + + pixel_size: Pixel size of the image classified. By default is 10m of Sentinel-2. + + conf: Confidence interval. By default is 95%. + + Return: + + Information of confusion matrix by proportions of area, user's accuracy with confidence interval and + estimated area with confidence interval as well. + ''' + + matConf = matrix.drop(['Total','Producer_Accuracy','Omission'], axis = 0) + + # classes + iclass = matConf.index.to_numpy().astype(int) + + # ni + ni = matConf['Total'].to_numpy() + + matConf = matConf.drop(['Total','Users_Accuracy','Commission'], axis = 1).to_numpy() + + # number of classes + n = matConf.shape[0] + + pixels = [] + + for i in iclass: + pixels.append((image_predicted == i).sum()) #((30**2)/10000) # ha + + wi = (np.array([pixels])/np.array([pixels]).sum()).flatten() + + pixels = np.array(pixels) + + for i in range(n): + matConf[i,:] = (matConf[i,:]/ni[i])*wi[i] + + # user's accuracy + ua = np.diag(matConf)/np.sum(matConf, axis = 1) + + # total Wi + total_wi = np.sum(matConf, axis = 1) + + # copy the matrix of proportions + mat_conf = matConf.copy() + # building the matrix of proportion area + mat_conf = np.concatenate([mat_conf, total_wi.reshape(n, 1)], axis = 1) + mat_conf = np.concatenate([mat_conf, pixels.reshape(n, 1)], axis = 1) + # total Wi in rows + total = np.sum(mat_conf, axis = 0) + # final matrix + mat_conf = np.concatenate([mat_conf, total.reshape(1, n+2)], axis = 0) + + namesCol = [] + for i in np.arange(1, n + 1): namesCol.append(str(i)) + namesCol.extend(['Total[Wi]', 'Area[pixels]']) + + namesRow = [] + for i in np.arange(1, n + 1): namesRow.append(str(i)) + namesRow.extend(['Total']) + + # error matrix (DataFrame) in proportions of area + cm_prop_area = pd.DataFrame(np.round(mat_conf, 4), columns = namesCol, index = namesRow) + + # confidence interval for Overall accuracy at 95% 1.96 + conf_int_oa = list(map(lambda Wi, UA, Ni: (Wi)**2*UA*(1-UA)/(Ni-1), wi, ua, ni)) + conf_int_oa = conf*(np.array(conf_int_oa).sum()) + + # confidence interval for user's accuracy at 95% 1.96 + conf_int_ua = conf*np.array(list(map(lambda UA, Ni: UA*(1-UA)/(Ni-1), ua, ni))) + + # confidence interval for the area at 95% + sp = [] + for i in np.arange(n): + s_pi = list(map(lambda Wi, Pik, Ni: (Wi*Pik - Pik**2)/(Ni-1), wi, matConf[:,i], ni)) + s_pi = np.sqrt(np.array(s_pi).sum()) + sp.append(s_pi) + + # S(A)=1.96*s(p)*A(total) in ha + SA = conf*np.array(sp)*(np.array(pixels).sum())*(pixel_size**2/10000) + + # Area total estimated + A = total[0:n]*(np.array(pixels).sum())*(pixel_size**2/10000) + + # print info + def print_info(matrixCEA, a, b, c, d): + print('***** Confusion Matrix by Estimated Proportions of area *****') + print('') + print('Overall accuracy') + print(conf_int_oa) + print('') + print('Confusion matrix') + print(matrixCEA) + print('') + print('User´s accuracy at 95%') + for i in range(b.shape[0]): + print(f'{iclass[i]}: {a[i]:.4f} ± {b[i]:.4f}') + print('') + print('Estimating area (Ha) and uncertainty at 95%') + for i in range(b.shape[0]): + print(f'{iclass[i]}: {c[i]:.4f} ± {d[i]:.4f}') + + + return print_info(cm_prop_area, + ua, + conf_int_ua, + A, + SA)