-
Notifications
You must be signed in to change notification settings - Fork 0
/
CorrectZForEveryCluster.py
1948 lines (1727 loc) · 92.6 KB
/
CorrectZForEveryCluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from Regions import *
from gwyfile import load as gwyload
from gwyfile.util import get_datafields , find_datafields
import pickle
import pandas as pd
from scipy.signal import find_peaks
from scipy.spatial import Voronoi, voronoi_plot_2d, cKDTree
from matplotlib import path ### just for "drawing" an polygon for later extraction of the values inside this polygon
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm, ticker
import multiprocessing
from sklearn.neighbors import KernelDensity
import time
import warnings
# from skimage import draw
from skimage.draw import line
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from decimal import Decimal
from numpy.ma import masked_outside
import traceback
import logging
import beepy
from os import listdir
class clusterpic():
"""
Creats an Object for working on STM data.
Class Instances:
path: Path to the file
name: name of file given by user
data: 2d numpy array
xres: number of points in x direction
yres: number of points in y direction
xreal: length of picture in x diektion (e.g. 100nm)
yreal: length of picture in y diektion
si_unit_xy: unitis of xreal/yreal e.g m or nm
si_unit_z: unitis of z e.g m or nm
"""
#import numpy as np
def __init__(self, path = '', name ='', data = None , xres = None, yres= None ,
xreal = None, yreal= None, si_unit_xy= None, si_unit_z= None, metaData = None):
self.path = path
self.name = name
self.data = pd.DataFrame(data).dropna().to_numpy() ### Temporary fix for the data wich was drift corrected
self.xres = xres
self.yres = yres
self.xreal = xreal
self.yreal = yreal
self.metaData = metaData
self.currentSetPoint = None
self.gapVoltage = None
try:
self.si_unit_xy = si_unit_xy.unitstr
except:
self.si_unit_xy = si_unit_xy
try:
self.si_unit_z = si_unit_z.unitstr
except:
self.si_unit_z = si_unit_z
if (self.xreal is not None) and (self.yreal is not None):
self.area = self.xreal * self.yreal
else:
self.area = None
self.peak_XYdata = pd.DataFrame()
self.clusters_coord = np.empty(0)
self.ax = None
self.pickable_artists = None
self.event =None
self.coor_regieons = []
self.regions = []
# self.tmp = [] # for debuging
self.creat_heights_table()
#self.heights = pd.DataFrame([])
self.cluster_distribution = None
self.cluster_distribution_balls = None
self.nearest_neighbors_ditribution = None
self.slope_map = []
self.path_profiles ={} # for all profiles on the imshow() between clsuters cuts of the data so to say
if (self.data.shape[0] == self.data.shape[1]) == False:
#print(self.data.shape)
#### correct cuted images to full dimetions, so it is simple to compute all other stuff
xmeter_per_pix = self.xreal/self.xres
ymeter_per_pix = self.yreal/self.yres
dataFr = pd.DataFrame(self.data)
#self.data = dataFr.reindex(index=list(dataFr.columns)).to_numpy() ### I am not shure why reindexiing hier but this cause problems with drift corected images, comment out for now
self.yres = self.data.shape[0]
self.xres = self.data.shape[1]
self.yreal = ymeter_per_pix*self.data.shape[0]
self.xreal = xmeter_per_pix*self.data.shape[1]
def __repr__(self):
return f"{self.name}"
def creat_heights_table(self):
self.heights = pd.DataFrame(columns = ['x',
'y',
f'x_{self.si_unit_xy}',
f'y_{self.si_unit_xy}',
'initial_Z',
'corrected_Z_averaged',
'corrected_Z_closest_step',
'corrected_Z_highest_step',
'corrected_Z_lowest_step'])
def dump_picture(self,
prefix : str = None,
sufix : str = '_clusterpic_obj',
folder : str = None) -> pickle:
"""
Saves the hole clusterpic object into a pickel file with prefix and folder
"""
if prefix:
dump_path = '[%s]%s%s.pkl' %( str(prefix) , self.name, str(sufix))
else:
dump_path = '%s%s.pkl' %(self.name, str(sufix))
if folder:
dump_path = folder+dump_path
with open(dump_path, 'wb') as pickle_file:
pickle.dump(self,pickle_file)
def dump_calculated_heights(self,
prefix = None,
folder=None):
if self.heights.empty:
print('You try to dump empty pandas.DataFrame \n So no data for cluster heights were found. Run first finde_peaks_in_row(), group_clusters() or load cluster coordinates from pickle by runing load_dumped_clusters() and then calc_true_height_4_every_region()')
else:
if prefix:
dump_path = f'{prefix}.pkl'
else:
dump_path = f'{self.name}_cluster_heights.pkl'
if folder:
dump_path = folder+dump_path
with open(dump_path, 'wb') as pickle_file:
pickle.dump(self.heights,pickle_file)
def load_dumped_cluster_heights(self, path_to_pickle):
"""
Loads cluster heights witch were dumeted by dump_cluster_heights()
Parameters:
-----------
path_to_pickle: str
path to .pkl file
"""
with open(path_to_pickle, "rb") as input_file:
self.heights = pickle.load(input_file)
def dump_cluster_coord(self,
prefix = None,
folder=None):
if self.clusters_coord.size == 0:
print('You try to dump empty array \n So no coordinates for cluster were found. Run first finde_peaks_in_row(), group_clusters() or load cluster coordinates from pickle by runing load_dumped_clusters()')
else:
if prefix:
dump_path = f'{prefix}.pkl'
else:
dump_path = '%s_cluster_coordinates.pkl' %(self.name)
if folder:
dump_path = folder+dump_path
with open(dump_path, 'wb') as pickle_file:
pickle.dump(self.clusters_coord,pickle_file)
def load_dumped_cluster_coord(self, path_to_pickle):
"""
Loads cluster coordinates witch were dumeted by dump_cluster_coord()
Parameters:
-----------
path_to_pickle: str
path to .pkl file
"""
with open(path_to_pickle, "rb") as input_file:
self.clusters_coord = pickle.load(input_file)
def cluster_coords_walk_to_extrema(self, pixelRange = 2, extrema = 'max'):
"""
Update cluster coordinates to the nearest extrema points within a specified pixel range.
This method iterates through each cluster coordinate and finds the nearest extrema point
within a given pixel range by calling the 'walk_to_the_extrema' method. The type of extrema
point to search for (maximum or minimum) can be specified using the 'extrema' parameter.
If the cluster coordinate is updated to a different extrema point, it prints the change
and updates the cluster coordinate.
Args:
pixelRange (int, optional): The maximum pixel range to search for extrema points (default is 2).
extrema (str, optional): The type of extrema point to search for.
Can be 'max' for maximum or 'min' for minimum (default is 'max').
Returns:
None
Note:
This method modifies the 'clusters_coord' attribute of the object if any cluster coordinate
is updated to a different extrema point within the specified pixel range.
"""
new_clusters_coord = []
for nr, i in enumerate(self.clusters_coord):
heyho = self.walk_to_the_extrema(self.data, [int(i[0]), int(i[1]), i[2]], extrema = extrema, pixelRange=pixelRange)[0]
new_clusters_coord.append(heyho)
if heyho != [int(i[0]), int(i[1]), i[2]]:
print(f'{nr}:{[int(i[0]), int(i[1]), i[2]]} -> {heyho}:diff: {np.array(i)-np.array(heyho)}')
self.clusters_coord = np.array(new_clusters_coord)
#return np.array(new_clusters_coord)
def walk_to_the_extrema(self,test_data, xyz_current_max, extrema = 'max', pixelRange = 2):
"""
Finds locle maxima by slicing test_data (NXN array) here STM image data with a window of +- PixelRange in first and second dimension.
It searches for local maxima in the slices and if there is no other maxima the search is aborted.
Parameters:
extrema: str: 'max' or 'min' if min find minima if max find maxima :)
test_data: NxN numpy array, STM image data
xyz_current_max: list of [x,y,z] coordinates of maximum/point from witch start searching
pixelRange: integer, how big is the wind in wich to search. E.g pixelRange=4 produces 8X8 window (array with the shape = (8,8))
"""
no_new_extrema_found = True
suspect = xyz_current_max#[1],xyz_current_max[0], xyz_current_max[2]
step_counter = 0
while no_new_extrema_found:
step_counter +=1
y_range = [suspect[1]-pixelRange, suspect[1]+pixelRange]
if y_range[0] < 0:y_range[0] = 0 # if you hit the boundaries important for correction later see maxXX
if y_range[1] > test_data.shape[0] : y_range[1] = test_data.shape[0] # if you hit the boundaries
x_range = [suspect[0]-pixelRange, suspect[0]+pixelRange] # if you hit the boundaries
if x_range[0] < 0 : x_range[0] = 0 # if you hit the boundaries
if x_range[1] > test_data.shape[1] : x_range[1] = test_data.shape[1] # if you hit the boundaries
#aslice = test_data[x_range[0]:x_range[1],y_range[0]:y_range[1]]
#print(test_data.shape,x_range[0],x_range[1],y_range[0],y_range[1])
aslice = test_data[y_range[0]:y_range[1],x_range[0]:x_range[1]]
if extrema == 'min':
extremaY, extremaX = np.unravel_index(aslice.argmin(), aslice.shape) ## finde maximum ids in 2d array slice
else:
# print(y_range, x_range)
# print(aslice)
extremaY, extremaX = np.unravel_index(aslice.argmax(), aslice.shape) ## finde maximum ids in 2d array slice
extremaXX, extremaYY = extremaX+x_range[0], extremaY+y_range[0] ## correct for the actual array, so not the slice
#self.tmp.append([aslice.max() ,suspect])
if np.isnan(suspect[2]): ### some times it is just nan, no idea why. This is not very good fix but it works
suspect[2] = 0.0
if extrema == 'min':
if aslice.min() < suspect[2]:
suspect = [extremaXX, extremaYY, aslice.min() ]
#no_new_max_found = False
else:
no_new_extrema_found = False
else:
if aslice.max() > suspect[2]:
suspect = [extremaXX, extremaYY, aslice.max() ]
#no_new_max_found = False
else:
no_new_extrema_found = False
return suspect, y_range, x_range, aslice, step_counter
def find_peaks_in_rows(self,
prominence = 0.6E-09,
distance = 1,
height = 1.1E-09,
axes= 'both'):
"""
Walk through rows or columns (or both) in 2d numpy array (self.data) and findes all peaks by aplaying pind_peaks from scipy
Parameter:
prominence: float, see scipy.signal.find_peaks for explanation
distance: float, see scipy.signal.find_peaks for explanation
height: float, see scipy.signal.find_peaks for explanation
axes: str, 'rows', 'colums' or 'both' calculate peaks along row axes, column axes or both respectevly
Reruns:
data_frme with same dimenstion as test_data wiht peaks hiths and peaks positions
and nan values at other positions
"""
max_dic_columns = {}
df_rows = None
df_column = None
if axes == 'rows' or axes == 'both':
counter = 0
for i in self.data: # iterate row by row
xpeaks = find_peaks(i,height=height, distance=distance, prominence=prominence)[0]
zpeaks = i[xpeaks]
max_dic_columns[counter] = {x:z for x,z in zip(xpeaks,zpeaks)}
counter +=1
dataFr = pd.DataFrame(max_dic_columns).T
dataFr.columns.name = "Y-Coordinate"
dataFr.index.name = "X-Coordinate"
dataFr = dataFr.reindex(columns=list(dataFr.index)) ### Fill all not existing rows with NaNs in order to get quadratik matrix again
df_rows = dataFr.reindex(sorted(dataFr.columns), axis=1)
if axes == 'columns' or axes == 'both':
counter = 0
for i in self.data.T: # iterate column by column
xpeaks = find_peaks(i, height=height, distance=distance, prominence=prominence)[0]
zpeaks = i[xpeaks]
max_dic_columns[counter] = {x:z for x,z in zip(xpeaks,zpeaks)}
counter +=1
dataFr = pd.DataFrame(max_dic_columns).T
dataFr.columns.name = "X-Coordinate"
dataFr.index.name = "Y-Coordinate"
dataFr = dataFr.reindex(columns=list(dataFr.index))### Fill all not existing rows with NaNs in order to get quadratik matrix again
df_column = dataFr.reindex(sorted(dataFr.columns), axis=1).T
if axes == 'rows':
peak_data = df_rows
elif axes == 'columns':
peak_data = df_column
elif axes == 'both':
peak_data = df_column.combine_first(df_rows)
self.peak_XYdata = peak_data
def show_data(self,
ax =None,
cmap = 'gray',
mode = 'normal', # normal or latex
latex_font = 5 , #font need to be smaller for latex because strings are longer
data_multiplier = 1,
cbar_on = True,
cbar_location = 'right',
cbar_fraction = 0.04740,
cbar_pad = 0.004,
cbar_diggi = 1, # how many digits after comma in cbar ticks
cbar_tick_spacing = False,#set_tick_spacing
cbar_ticks_labelsize = 5,
bar = True,
bar_space_left = 0.05, # space in % from hole range
bar_space_bottom = 0.95,# space in % from hole range
bar_length = None, # if non will be calculatet automaticaly 10% of picture width
bar_color = 'white',
bar_size = 10,
bar_label_xshift = 1.5, # in percent from origin
bar_label_yshift = 0.99, # in percent from origin
bar_ticks = False,
mask = None,
unit = 'nm',
no_ticks =False,
show_clusters = False,
show_regions = False,
clusters_markersize = 3,
clusters_markercolor = 'r',
font_size= 10,
window_face_color = 'red', ## color of the square regions
rim_color = 'black', ## color of the edges of square regions
alpha = 0.5, ## transparency of the regions
show_cluster_numbers = False,
cl_numb_fontsize = 8
):
"""
Plots the scanning tunneling microscope (STM) data.
Parameters:
-----------
cmap: str
The colormap used for the plot. Default is 'gray'.
mask: list of lists:
Plots only regions inside the numbers e.g. a,b for mask =[[a,b], [a1,b1], ...]
bar: bool
Whether to show a bar on the plot or not. Default is True.
bar_space_left: float
The space from the left side of the plot to the bar, as a percentage of the x range. Default is 0.05.
bar_space_bottom: float
The space from the bottom of the plot to the bar, as a percentage of the y range. Default is 0.05.
bar_length: float
The length of the bar in nanometers. Default is 100.
bar_color: str
The color of the bar. Default is 'white'.
bar_size: float
The width of the bar in points. Default is 10.
unit: str
The unit for the axis labels. Default is 'nm'. Available are '$\\mu$m' and $\\AA$ for 10^-6 and 10^-10 m
no_ticks: bool
Whether to show axis ticks or not. Default is False.
Returns:
--------
fig, ax: tuple
The figure and axis objects of the plot.
"""
from matplotlib import ticker as mpl_ticker
if not ax:
fig, ax = plt.subplots()
plt.rcParams.update({'font.size': font_size})
multiplier = 1
if not bar_length:
match unit:
case r'$\AA$':
multiplier = 1e10
case r'nm':
multiplier = 1e9
case r'$\mu$m':
multiplier = 1e6
bar_length = round(self.xreal*0.1*multiplier)
if mask:
for mski in mask:
in_your_face = masked_outside(self.data*data_multiplier,mski[0],mski[1])
im = ax.imshow(in_your_face,
cmap=cmap,
interpolation = None,
extent =[0, self.xreal*data_multiplier, self.yreal*data_multiplier, 0]
)
else:
im = ax.imshow(self.data*data_multiplier,
cmap = cmap,
#origin = 'lower',
interpolation = None,
extent =[0, self.xreal*data_multiplier, self.yreal*data_multiplier, 0]
)
if show_clusters:
for i in range(0,len(self.clusters_coord)):
ax.plot(self.clusters_coord[:,0][i]*(self.xreal/self.xres)*data_multiplier,
self.clusters_coord[:,1][i]*(self.yreal/self.yres)*data_multiplier,
'o', c = clusters_markercolor, ms = clusters_markersize)
if show_regions:
if self.regions:
for i in self.regions:
if i.region_type == 'voronoi':
vor = Voronoi( np.vstack((self.clusters_coord[:,0]*(self.xreal/self.xres)*data_multiplier,
self.clusters_coord[:,1]*(self.yreal/self.yres)*data_multiplier)).T, qhull_options='Qbb Qc Qx')
voronoi_plot_2d(vor, ax = ax, show_points = False)
break
elif 'rectangular' in i.region_type:
x_min , x_max, y_min, y_max =min(i.coordinates[:,0])*(self.xreal/self.xres)*data_multiplier, max(i.coordinates[:,0])*(self.xreal/self.xres)*data_multiplier, min(i.coordinates[:,1])*(self.yreal/self.yres)*data_multiplier, max(i.coordinates[:,1])*(self.yreal/self.yres)*data_multiplier
rectangle = plt.Rectangle((x_min,y_min), x_max - x_min, y_max - y_min, fc=window_face_color,ec=rim_color, alpha = alpha)
ax.add_patch(rectangle)
ax.set_xlim(0,self.xreal*data_multiplier) ### reset limit of x,y because Voroni diagram exceeds thous limits
ax.set_ylim(self.yreal*data_multiplier,0)
if bar:
ax.hlines(self.yreal*bar_space_bottom*data_multiplier,
#1E-8,
xmin= self.xreal*bar_space_left*data_multiplier,
xmax = self.xreal*bar_space_left*data_multiplier + bar_length*1e-9*data_multiplier,
colors = bar_color,
linewidth = bar_size)
latex_bar = f'\\textcolor{{{bar_color}}}{{{bar_length} {unit}}}'
normal_bar = f'{bar_length} {unit}'
ax.annotate(latex_bar if mode == 'latex' else normal_bar,
(self.xreal*bar_space_left*bar_label_xshift*data_multiplier,
self.yreal*bar_space_bottom*bar_label_yshift*data_multiplier),
color = bar_color,
fontsize = latex_font if mode == 'latex' else 8, )
if unit == r'nm':
func = lambda x,pos: "{:g}".format(x*1e9)
if unit == r'$\mu$m':
func = lambda x,pos: "{:g}".format(x*1e6)
if unit == r'$\AA$':
func = lambda x,pos: "{:g}".format(x*1e10)
fmt = mpl_ticker.FuncFormatter(func)
if no_ticks:
# Hide X and Y axes label marks
ax.xaxis.set_tick_params(labelbottom=False)
ax.yaxis.set_tick_params(labelleft=False)
# Hide X and Y axes tick marks
ax.set_xticks([])
ax.set_yticks([])
if cbar_on:
cbar = plt.colorbar(mappable= im, fraction=cbar_fraction, pad=cbar_pad, location = cbar_location)
tick_locator = ticker.MaxNLocator(nbins=9)
cbar.locator = tick_locator
cbar.ax.tick_params(axis = 'y', which = 'both', direction = 'out', labelsize=cbar_ticks_labelsize)
cbar.update_ticks()
#### [START]this creats a white space in cbar!!!
#cbar.ax.set_xticks(cbar.ax.get_xticks()) ## strange avoding of error
#cbar.ax.set_yticks(cbar.ax.get_yticks())
#ticklabs = cbar.ax.get_yticklabels()
#cbar.ax.set_yticklabels(ticklabs, fontsize=3)
#### [END]this creats a white space in cbar!!!
cbar.ax.yaxis.set_major_formatter(f'{{x:1.{cbar_diggi}f}}')
cbar.ax.set_title(r'\si{\nano\meter}', fontsize = 1, pad = 1)
if cbar_tick_spacing: set_tick_spacing(cbar.ax, axes = 'y', major_spac = cbar_tick_spacing[0], minor_space = cbar_tick_spacing[1])
if show_cluster_numbers:
for i in range(0,len(self.clusters_coord)):
ax.annotate(i, (self.clusters_coord[:,0][i]*(self.xreal/self.xres)*data_multiplier, self.clusters_coord[:,1][i]*(self.yreal/self.yres)*data_multiplier), fontsize=cl_numb_fontsize)
return ax
def intensity_profile_along_path(self, counter = 1, cluster_numbers = 'all', data_multiplier = 1):
"""
Generate intensity profile along a specified path.
This method computes the intensity profile along a path defined by cluster indices or all clusters. It can generate a straight line between two clusters or a zigzag line between multiple specified clusters.
Parameters:
- counter (int, optional): Counter for the profiles. Defaults to 1.
- cluster_numbers (str or list, optional): Defines the clusters along the path. If 'all', considers all clusters. If a list, specify cluster indices. If a list with two elements, generates a straight line between two clusters. If a list with more than two elements, generates a zigzag line between specified clusters. Defaults to 'all'.
- data_multiplier (float, optional): Multiplier to scale the intensity values. Defaults to 1.
Returns:
numpy.ndarray: A 2D array containing the profile data. Each row corresponds to a point along the path, with columns [row_indices, column_indices, distances, intensity_values].
Example:
intensity_profile_along_path(counter=1, cluster_numbers=[0, 1, 3], data_multiplier=1e9) # 1e9 for representing nm
"""
rr, cc = [], []
if cluster_numbers == 'all':
points = self.clusters_coord[:,:2].astype(int)
elif isinstance(cluster_numbers,list):
if len(cluster_numbers) == 2: # strait line between two clusters
points = (self.clusters_coord[cluster_numbers[0]][:2].astype(int) ,
self.clusters_coord[cluster_numbers[1]][:2].astype(int)
)
else: ## zig zag line between all clusters in the list cluster_numbers
points = self.clusters_coord[cluster_numbers][:,:2].astype(int)
else:
warnings.warn(f"This is not a list {cluster_numbers}")
# Generate indices along the path for each segment
for i in range(len(points) - 1):
segment_rr, segment_cc = line(*points[i], *points[i + 1])
rr.extend(segment_rr)
cc.extend(segment_cc)
# Clip indices to stay within image boundaries
rr = np.clip(rr, 0, self.data.shape[0] - 1)
cc = np.clip(cc, 0, self.data.shape[1] - 1)
# Extract intensity values along the path
intensity_profile = self.data[cc,rr]
dd_zero = [rr[0],cc[0]]#np.sqrt((rr[0]*(self.xreal/self.xres))**2. + cc[0]*(self.yreal/self.yres)**2.)# [rr[0],cc[0]]
dd = []
dsum = 0
for x,y in zip(rr,cc):
d = np.sqrt(((x-dd_zero[0])*(self.xreal/self.xres))**2. + ((y-dd_zero[1])*(self.yreal/self.yres))**2.)
dsum = dsum + d
dd.append(dsum)
dd_zero = [x,y]
dd = np.array(dd)
profiles = np.array((rr,cc,dd*data_multiplier,intensity_profile)).T
self.path_profiles[counter] ={'x_pix':profiles[:,0],'y_pix':profiles[:,1],'x':profiles[:,0]*(self.xreal/self.xres)*data_multiplier,'y':profiles[:,1]*(self.yreal/self.yres)*data_multiplier,'distance':profiles[:,2], 'intensity':profiles[:,3]}
def intensity_profile_along_path_multi(self, list_of_all_cluster_pairs =[], data_multiplier = 1,
# width=1
):
"""
Generate intensity profiles along specified paths for multiple cluster pairs.
Parameters:
- list_of_all_cluster_pairs (list): List of cluster pairs, where each pair is represented as [cluster_index_1, cluster_index_2].
The function will generate intensity profiles for each specified pair.
- cluster_numbers (str or list): If 'all', consider all clusters. If a list, specify cluster indices.
If a list with two elements, generate a straight line between two clusters.
If a list with more than two elements, generate a zigzag line between specified clusters.
- data_multiplier (float): Multiplier to scale the intensity values.
Data is stored in self.path_profiles attribute
Returns:
list: List containing intensity profiles for each specified pair. Each element in the list is an array containing
information along the path for a pair. Each row represents a point on the path with columns:
[row_index, column_index, distance_from_start, intensity_value].
Example:
intensity_profile_along_path_multi(list_of_all_cluster_pairs=[[0, 1], [2, 3]], cluster_numbers=[0, 1, 3], data_multiplier=1e9)
"""
result =[]
counter = 0
if len(list_of_all_cluster_pairs)==0:
warnings.warn(f"Pleas provide the list of lists for extraction of paths between cluster pairs a1,a1 and a2,a2 like [[a1,a1], [a2,a2]]")
else:
for i in list_of_all_cluster_pairs:
result.append(self.intensity_profile_along_path(counter = counter, cluster_numbers = i, data_multiplier = data_multiplier,
#width = width
))
counter +=1
def get_peakXYdata(self):
"""
Calculates 2d numpy array of XY data of peaks estimatet by finde_peaks_in_rows().
it is just the heigest points not the maxima of cluster, you need to run group_clusters() for this
"""
return self.peak_XYdata
def calculate_angle_betwee_3_clusters(self, cls1, cls2, cls3):
"""
Calculates the angle in degrees between three clusters in a 2D space.
Parameters:
- cls1 (int): Index of the first cluster.
- cls2 (int): Index of the second cluster (common vertex).
- cls3 (int): Index of the third cluster.
Returns:
float: The angle in degrees between the vectors formed by the clusters.
"""
vector1 = np.array(self.clusters_coord[cls1]) - np.array(self.clusters_coord[cls2])
vector2 = np.array(self.clusters_coord[cls3]) - np.array(self.clusters_coord[cls2])
# Calculate the cosine of the angle
cosine_angle = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
# Use arccosine to get the angle in radians
angle_radians = np.arccos(cosine_angle)
# Convert radians to degrees
angle_degrees = np.degrees(angle_radians)
return angle_degrees
def distance_between_clusters(self, first, second):
"""
calculates distance between clusters by number so distance between first and second cluster would be self.distance_between_cluster(1,2)
"""
nm_per_pix = (self.xreal/self.xres)
return np.sqrt( (self.clusters_coord[first][0]*nm_per_pix - self.clusters_coord[second][0]*nm_per_pix)**2.
+ (self.clusters_coord[first][1]*nm_per_pix - self.clusters_coord[second][1]*nm_per_pix)**2.
)
def finde_nn_in_r(self, dr = np.sqrt(1e-9**2+1e-9**2), count_solo_cluster = False):
"""
Finds the nearest neighbors within a given distance for each coordinate in `coord`.
Parameters:
-----------
dr: float, optional
The distance threshold within which neighbors are considered. Default is the square root of 2 nanometers squared.
Returns:
--------
result: dict
A dictionary where the keys are the number of nearest neighbors and the values are the count of points having that number of neighbors within the given distance.
"""
coord = self.get_xy_coord()
tree = cKDTree(coord)
l = []
for i in coord:
nn = tree.query_ball_point(i, r = dr)#, return_length = True)
if nn:
tmp_count = 0
for k in nn:
if (coord[k] == i).all(): #skip counting current cluster as neighbor
pass
else:
tmp_count +=1
#print(tmp_count)
l.append(tmp_count)
nn_compl = np.arange(1,max(l)+1)
count = []
for p in nn_compl:
counter = 0
for s in l:
if s == p:
counter +=1
count.append(counter)
#return(pd.DataFrame(np.vstack((nn_compl,np.array(count)))))
#return(pd.DataFrame((nn_compl,np.array(count)))
result = {}
if count_solo_cluster:
solo_clusters = len(coord) - np.array(count).sum()
result[0] = solo_clusters
for i,k in zip (nn_compl,count):
result[i] = k
return result
def show_peakXYdata(self, figsize = (10,10) , cmap = None, returnImag = False):
"""
Plots image of all peaks estimatet by find_peaks_in_rows() (plt.imshow())
"""
if not self.peak_XYdata.empty:
plt.figure(figsize=figsize)
Peakimage = plt.imshow(self.peak_XYdata,interpolation='nearest', aspect='auto', cmap = cmap)
if returnImag:
return Peakimage
else:
print("No data found, run first find_peaks_in_rows()")
def group_clusters(self, max_d = 30, delet_at_edge = False, xres = None, yres=None):
"""
Group clusters with the data from finde_peaks_in_rows() by clustering the peak data due to the neighbours distance
Parameters:
peak_data: 2d numpy.array, peak data produced by finde_peaks_in_rows()
max_d: int, max distance of contiguous clusters
Returns:
clusters_coord: Nx3 numpy.array, x,y,z data of all found clusters
"""
coordinate_list = []
max_values_of_clusters = []## same shape as coordinate_list
clusterizerMe = self.peak_XYdata.to_numpy().T
for ix, iy in np.ndindex(clusterizerMe.shape):
if not np.isnan(clusterizerMe[ix][iy]):
coordinate_list.append([ix,iy,clusterizerMe[ix][iy]])
coordinate_list = np.array(coordinate_list)
Z = linkage(coordinate_list,
method='average', # dissimilarity metric: max distance across all pairs of
# records between two clusters
metric='euclidean'
) # you can peek into the Z matrix to see how clusters are
# merged at each iteration of the algorithm
clusters = fcluster(Z, max_d, criterion='distance')
all_clusters_list = [coordinate_list[np.where(clusters==k)[0].tolist()] for k in np.unique(clusters)]
clusters_coord =np.array([i[np.where(i[:,2]==i[:,2].max())][0] for i in all_clusters_list])### finde row with maximum value in 3erd columns
if delet_at_edge:
list_for_deletion = []
index_for_deletion = -1
for i in clusters_coord:
index_for_deletion += 1
if i[0] == 0 or i[0] == xres or i[1] == 0 or i[1] == yres:
list_for_deletion.append(index_for_deletion)
if index_for_deletion != -1:
clusters_coord = np.delete(clusters_coord,list_for_deletion,axis=0)
self.clusters_coord = clusters_coord
def update_peaked_clusters(self,
pickable_artists,
xyz =None,
max_crawler = False,
pixelRange = 2,
extrema = 'max'
):
"""
Updates the list of clusters withch was peakt by cluster_peaker() or by given list xyz
Parameters:
data: 2d numpy array, STM Image
pickable_artists: matplotlib.pickable_artists
xyz: [x,y,z] or list of [[x1,y1,z1], [x2,y2,z2], ...] coordinates if something gets wrong with the UI peaking cluster
max_crawler: If True an xy is given, uses walk_to_the_extrema() for finding maximum/minimum
Returns:
clusters_coord: nX3 numpy array ([x1,y1,z1],[x2,y2,z2], ... )
"""
#clusters_coord = np.array([[i.get_data()[0][0],i.get_data()[1][0],self.data[int(i.get_data()[1][0])][int(i.get_data()[0][0])]] for i in pickable_artists])
clusters_coord = np.array([[b[0][0],b[1][0],self.data[int(b[1][0])][int(b[0][0])]] for b in [i.get_data() for i in pickable_artists]])
if max_crawler:
self.cluster_coords_walk_to_extrema(pixelRange = pixelRange, extrema = extrema)
if xyz:
if any(isinstance(el, list) for el in xyz): ### only if it is list of lists
new_xyz = []
for i in xyz:
if max_crawler:
new_xyz.append(np.array(self.walk_to_the_extrema(self.data,i, extrema = extrema)[0]))
else:
new_xyz.append(i)
clusters_coord = np.vstack((clusters_coord,np.array(new_xyz)))
else:
if max_crawler:
new_xyz = self.walk_to_the_extrema(self.data,xyz, extrema = extrema)[0]
clusters_coord = np.vstack((clusters_coord,np.array(new_xyz)))
else:
clusters_coord = np.vstack((clusters_coord,np.array([int(xyz[0]),int(xyz[1]),self.data[int(xyz[1])][int(xyz[0])]])))
clusters_coord = np.unique(clusters_coord, axis = 0) ### erase duplicates
self.clusters_coord = clusters_coord
def voronoi_finite_polygons_2d(self, vor, radius=None):
"""
Reconstruct infinite voronoi regions in a 2D diagram to finite
regions.
Parameters
----------
vor : Voronoi
Input diagram
radius : float, optional
Distance to 'points at infinity'.
Returns
-------
regions : list of tuples
Indices of vertices in each revised Voronoi regions.
vertices : list of tuples
Coordinates for revised Voronoi vertices. Same as coordinates
of input vertices, with 'points at infinity' appended to the
end.
"""
if vor.points.shape[1] != 2:
raise ValueError("Requires 2D input")
new_regions = []
new_vertices = vor.vertices.tolist()
center = vor.points.mean(axis=0)
if radius is None:
radius = vor.points.ptp().max()*2
# Construct a map containing all ridges for a given point
all_ridges = {}
for (p1, p2), (v1, v2) in zip(vor.ridge_points, vor.ridge_vertices):
all_ridges.setdefault(p1, []).append((p2, v1, v2))
all_ridges.setdefault(p2, []).append((p1, v1, v2))
# Reconstruct infinite regions
for p1, region in enumerate(vor.point_region):
vertices = vor.regions[region]
if all([v >= 0 for v in vertices]):
# finite region
new_regions.append(vertices)
continue
# reconstruct a non-finite region
ridges = all_ridges[p1]
new_region = [v for v in vertices if v >= 0]
for p2, v1, v2 in ridges:
if v2 < 0:
v1, v2 = v2, v1
if v1 >= 0:
# finite ridge: already in the region
continue
# Compute the missing endpoint of an infinite ridge
t = vor.points[p2] - vor.points[p1] # tangent
t /= np.linalg.norm(t)
n = np.array([-t[1], t[0]]) # normal
midpoint = vor.points[[p1, p2]].mean(axis=0)
direction = np.sign(np.dot(midpoint - center, n)) * n
far_point = vor.vertices[v2] + direction * radius
new_region.append(len(new_vertices))
new_vertices.append(far_point.tolist())
# sort region counterclockwise
vs = np.asarray([new_vertices[v] for v in new_region])
c = vs.mean(axis=0)
angles = np.arctan2(vs[:,1] - c[1], vs[:,0] - c[0])
new_region = np.array(new_region)[np.argsort(angles)]
# finish
new_regions.append(new_region.tolist())
return new_regions, np.asarray(new_vertices)
def get_xy_coord(self):
return np.array((self.clusters_coord[:,0]*(self.xreal/self.xres),
self.clusters_coord[:,1]*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
def calc_cluster_distribution(self):
"""
Calculates all distances c = sqrt((a1-a2)**2 + (b1-b2)**2) from calculated cluster heigts table. So calc_true_height_4_every_region() have to be run befor
All distances are stored in the class attribute
'cluster_distribution'.
Returns:
--------
None
"""
# all_coord = np.array((self.heights['x']*(self.xreal/self.xres),
# self.heights['y']*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
# all_coord = np.array((self.heights[f'x_{self.si_unit_xy}'],
# self.heights[f'y_{self.si_unit_xy}'])).T # prepare 2d array vor surching distance
all_coord = np.array((self.clusters_coord[:,0]*(self.xreal/self.xres),
self.clusters_coord[:,1]*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
distribution = []
for i in range(0, len(all_coord)):
for k in range(0, len(all_coord)):
if k!=i:
distribution.append(np.sqrt( (all_coord[k][0] - all_coord[i][0])**2.
+ ( all_coord[k][1] - all_coord[i][1])**2. ))
self.cluster_distribution = np.array(distribution)
def calc_cluster_distribution_balls(self, height_substraction = 'initial_Z'):
"""
Calculate cluster distribution like in the calc_cluster_distribution but this time subtract the radius of the clusters.
Basically distance between outside shells of clusters
Parameters:
height_substraction: str : default 'initial_Z'
avalible options : 'corrected_Z_averaged', 'corrected_Z_closest_step', 'corrected_Z_highest_step', 'corrected_Z_lowest_step'
Returns:
--------
None
"""
# all_coord = np.array((self.heights['x']*(self.xreal/self.xres),
# self.heights['y']*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
# all_coord = np.array((self.heights[f'x_{self.si_unit_xy}'],
# self.heights[f'y_{self.si_unit_xy}'])).T # prepare 2d array vor surching distance
if self.heights.empty:
warnings.warn(f"Nothing to do. Height table is empty. Initialize parallel_correct_height() or load the data form .pkl file.")
return None
all_coord = self.heights[['x_m','y_m',height_substraction]]
distribution = []
for items, rows in all_coord.iterrows():
for items2, rows2 in all_coord.iterrows():
if items!=items2:
the_shit = np.sqrt((rows[0]-rows2[0])**2.) + (rows[1]-rows2[1])**2. - (rows[2]*0.5+ rows2[2]*0.5)
if the_shit >0.:
distribution.append(the_shit)
self.cluster_distribution_balls = np.array(distribution)
def calc_nn_distribution(self):
"""
Calculates all distances c = sqrt((a1-a2)**2 + (b1-b2)**2) from calculated cluster heigts table. So calc_true_height_4_every_region() have to be run befor
All distances are stored in the class attribute
'nn_distribution'.
Returns:
--------
None
"""
# all_coord = np.array((self.heights['x']*(self.xreal/self.xres),
# self.heights['y']*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
# all_coord = np.array((self.heights[f'x_{self.si_unit_xy}'],
# self.heights[f'y_{self.si_unit_xy}'])).T # prepare 2d array vor surching distance
all_coord = np.array((self.clusters_coord[:,0]*(self.xreal/self.xres),
self.clusters_coord[:,1]*(self.yreal/self.yres))).T # prepare 2d array vor surching distance
distribution = []
for i in range(0, len(all_coord)):
tmp_nn_distribution = []
for k in range(0, len(all_coord)):
if k!=i:
tmp_nn_distribution.append(np.sqrt( (all_coord[k][0] - all_coord[i][0])**2.
+ ( all_coord[k][1] - all_coord[i][1])**2. ))
distribution.append(min(tmp_nn_distribution))
self.nn_distribution = np.array(distribution)
@staticmethod
def calculate_slope(array):
rows, cols = array.shape
slope_matrix = np.zeros_like(array, dtype=float)
for i in range(2, rows - 2):
for j in range(2, cols - 2):
z_pp = array[i+1, j+1]
z_op = array[i+1, j]
z_mp = array[i+1, j-1]
z_po = array[i, j+1]
z_mo = array[i, j-1]
z_pm = array[i-1, j+1]
z_om = array[i-1, j]
z_mm = array[i-1, j-1]
dz_dx = (( z_pp + 2. * z_po + z_pm ) - ( z_mp + 2 * z_mo + z_mm ))/8
dz_dy = (( z_pp + 2. * z_op + z_mp ) - ( z_pm + 2 * z_om + z_mm ))/8
slope = np.sqrt(dz_dx**2 + dz_dy**2)
slope_matrix[i, j] = np.degrees(np.arctan(slope))
slope_matrix = slope_matrix/slope_matrix.max() #normalize
return slope_matrix
def cut_image_regions(self, window = None):
"""
Cuts the image data in to regions around clusters determine by Voronoi algorithm if window id None. If window is given: cuts image in squers with the length of window
Args:
window (int): length of a side of a rectangular for the window bilding arraound a cluster
"""
if not self.heights.empty:
self.creat_heights_table()
if np.all(self.slope_map):
self.slope_map = clusterpic.calculate_slope(self.data)
if window is not None:
region_type = f'rectangular {window} pix'
self.heights.index.name = region_type
self.regions = []
for idx,i in enumerate(self.clusters_coord): # sclice data in to reagion bei squers
y_range = [int(i[1]-window), int(i[1]+window)]
if y_range[0] < 0:y_range[0] = 0 # if you hit the boundaries important for correction later see maxXX
if y_range[1] > self.data.shape[0] : y_range[1] = self.data.shape[0] # if you hit the boundaries
x_range = [int(i[0]-window), int(i[0]+window)]
if x_range[0] < 0 : x_range[0] = 0 # if you hit the boundaries
if x_range[1] > self.data.shape[1] : x_range[1] = self.data.shape[1] # if you hit the boundaries
#print(x_range, y_range)
aslice = self.data[y_range[0]:y_range[1],x_range[0]:x_range[1]]
#maxX, maxY = np.unravel_index(aslice.argmax(), aslice.shape) ## finde maximum ids in 2d array slice
#maxXX, maxYY = maxX+x_range[0], maxY+y_range[0] ## correct for the actual array, so not the slice
#[maxYY, maxXX, aslice.max() ]
slice_xyz = []
for y in range(aslice.shape[0]):
for x in range(aslice.shape[1]):
slice_xyz.append([x+x_range[0],
y+y_range[0],
aslice[y][x]])
#self.coor_regieons.update({idx:{'x_min_id_offset':x_range[0], 'y_min_id_offset':y_range[0],'slice':aslice,'xyz(max)':i}})
#self.coor_regieons.append([np.array(slice_xyz),i])
a_region = region()
a_region.region_id = idx
a_region.coordinates = np.array(slice_xyz)
a_region.slope_map = self.slope_map[y_range[0]:y_range[1],x_range[0]:x_range[1]]
a_region.slope_map = a_region.slope_map/a_region.slope_map.max() #normolize