Skip to content

Commit

Permalink
confusion matrix by estimated proportions of area
Browse files Browse the repository at this point in the history
  • Loading branch information
ytarazona committed Mar 20, 2023
1 parent f18905b commit 319328c
Showing 1 changed file with 129 additions and 0 deletions.
129 changes: 129 additions & 0 deletions scikeo/process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# +
import os
import copy
import fiona
import rasterio
import rasterio.mask
@@ -152,3 +153,131 @@ def extract(image, shp):
raise TypeError('DataFrame contains nan values. Check it out')

return join_df


def confintervalML(matrix, image_predicted, pixel_size = 10, conf = 1.96):

'''The error matrix is a simple cross-tabulation of the class labels allocated by the classification of the remotely
sensed data against the reference data for the sample sites. The error matrix organizes the acquired sample data
in a way that summarizes key results and aids the quantification of accuracy and area. The main diagonal of the error
matrix highlights correct classifications while the off-diagonal elements show omission and commission errors.
The cell entries and marginal values of the error matrix are fundamental to both accuracy assessment and area
estimation. The cell entries of the population error matrix and the parameters derived from it must be estimated
from a sample. This function shows how to obtain a confusion matrix by estimated proportions of area with a confidence
interval at 95% (1.96).
This function supports DataFrame (as a confusion matrix) and an image classified in array as input.
Parameters:
matrix: confusion matrix or error matrix in DataFrame
image_predicted: Array with 2d. This array should be the image classified with classes.
pixel_size: Pixel size of the image classified. By default is 10m of Sentinel-2.
conf: Confidence interval. By default is 95%.
Return:
Information of confusion matrix by proportions of area, user's accuracy with confidence interval and
estimated area with confidence interval as well.
'''

matConf = matrix.drop(['Total','Producer_Accuracy','Omission'], axis = 0)

# classes
iclass = matConf.index.to_numpy().astype(int)

# ni
ni = matConf['Total'].to_numpy()

matConf = matConf.drop(['Total','Users_Accuracy','Commission'], axis = 1).to_numpy()

# number of classes
n = matConf.shape[0]

pixels = []

for i in iclass:
pixels.append((image_predicted == i).sum()) #((30**2)/10000) # ha

wi = (np.array([pixels])/np.array([pixels]).sum()).flatten()

pixels = np.array(pixels)

for i in range(n):
matConf[i,:] = (matConf[i,:]/ni[i])*wi[i]

# user's accuracy
ua = np.diag(matConf)/np.sum(matConf, axis = 1)

# total Wi
total_wi = np.sum(matConf, axis = 1)

# copy the matrix of proportions
mat_conf = matConf.copy()
# building the matrix of proportion area
mat_conf = np.concatenate([mat_conf, total_wi.reshape(n, 1)], axis = 1)
mat_conf = np.concatenate([mat_conf, pixels.reshape(n, 1)], axis = 1)
# total Wi in rows
total = np.sum(mat_conf, axis = 0)
# final matrix
mat_conf = np.concatenate([mat_conf, total.reshape(1, n+2)], axis = 0)

namesCol = []
for i in np.arange(1, n + 1): namesCol.append(str(i))
namesCol.extend(['Total[Wi]', 'Area[pixels]'])

namesRow = []
for i in np.arange(1, n + 1): namesRow.append(str(i))
namesRow.extend(['Total'])

# error matrix (DataFrame) in proportions of area
cm_prop_area = pd.DataFrame(np.round(mat_conf, 4), columns = namesCol, index = namesRow)

# confidence interval for Overall accuracy at 95% 1.96
conf_int_oa = list(map(lambda Wi, UA, Ni: (Wi)**2*UA*(1-UA)/(Ni-1), wi, ua, ni))
conf_int_oa = conf*(np.array(conf_int_oa).sum())

# confidence interval for user's accuracy at 95% 1.96
conf_int_ua = conf*np.array(list(map(lambda UA, Ni: UA*(1-UA)/(Ni-1), ua, ni)))

# confidence interval for the area at 95%
sp = []
for i in np.arange(n):
s_pi = list(map(lambda Wi, Pik, Ni: (Wi*Pik - Pik**2)/(Ni-1), wi, matConf[:,i], ni))
s_pi = np.sqrt(np.array(s_pi).sum())
sp.append(s_pi)

# S(A)=1.96*s(p)*A(total) in ha
SA = conf*np.array(sp)*(np.array(pixels).sum())*(pixel_size**2/10000)

# Area total estimated
A = total[0:n]*(np.array(pixels).sum())*(pixel_size**2/10000)

# print info
def print_info(matrixCEA, a, b, c, d):
print('***** Confusion Matrix by Estimated Proportions of area *****')
print('')
print('Overall accuracy')
print(conf_int_oa)
print('')
print('Confusion matrix')
print(matrixCEA)
print('')
print('User´s accuracy at 95%')
for i in range(b.shape[0]):
print(f'{iclass[i]}: {a[i]:.4f} ± {b[i]:.4f}')
print('')
print('Estimating area (Ha) and uncertainty at 95%')
for i in range(b.shape[0]):
print(f'{iclass[i]}: {c[i]:.4f} ± {d[i]:.4f}')


return print_info(cm_prop_area,
ua,
conf_int_ua,
A,
SA)

0 comments on commit 319328c

Please sign in to comment.