From 5e68157d09771b784f8aaa65d895170105a68d9a Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 25 Feb 2022 12:39:56 -0500 Subject: [PATCH 01/43] --Added spatial folding to the fold method --- sidpy/sid/dataset.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 3995bf05..346a24df 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -1187,9 +1187,13 @@ def fold(self, dim_order=None, method=None): new axis after the collapse -Default: None method: str - -'spaspec' collapses the original dataset to a 2D dataset, where + -'spaspec': collapses the original dataset to a 2D dataset, where spatail dimensions form the zeroth axis and spectral dimensions form the first axis + -'spa': combines all the spatial dimensions into a single dimension and + the combined dimension will be first + -'spec': combines all the spectral dimensions into a single dimension and + the combined dimension will be last -Uses the user defined dim_order when set to None -Default: None @@ -1223,6 +1227,21 @@ def fold(self, dim_order=None, method=None): part of the last collapsed dimension') dim_order_list[1].extend(dim) + if method == 'spa': + dim_order_list = [[]] + for dim, axis in self._axes.items(): + if axis.dimension_type == DimensionType.SPATIAL: + dim_order_list[0].extend(dim) + else: + dim_order_list.append([dim]) + + if len(dim_order_list[0]) == 0: + raise NotImplementedError('No spatial dimensions found') + if len(dim_order_list[0]) == 1: + warnings.warn('Only one spatial dimension found\ + Folding returns the original dataset') + + # We need the flattened list to transpose the original array dim_order_flattened = [item for sublist in dim_order_list for item in sublist] From ad95aa203867510349886895c7d17819255ccf05 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 25 Feb 2022 13:46:46 -0500 Subject: [PATCH 02/43] --Added spectral folding to the fold method that fold all the spectral dimensions into a single dimension. --- sidpy/sid/dataset.py | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 346a24df..8d0ea2e2 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -178,7 +178,7 @@ def hdf_close(self): print(self.h5_dataset) @classmethod - def from_array(cls, x, title='generic', chunks='auto', lock=False, + def from_array(cls, x, title='generic', chunks='auto', lock=False, datatype='UNKNOWN', units='generic', quantity='generic', modality='generic', source='generic', **kwargs): """ @@ -262,7 +262,7 @@ def like_data(self, data, title=None, chunks='auto', lock=False, **kwargs): checkdims = kwargs.get('checkdims', True) new_data = self.from_array(data, chunks=chunks, lock=lock) - + new_data.data_type = self.data_type # units @@ -278,9 +278,9 @@ def like_data(self, data, title=None, chunks='auto', lock=False, **kwargs): new_data.title = self.title else: new_data.title = self.title + '_new' - - new_data.title = title_prefix+new_data.title+title_suffix - + + new_data.title = title_prefix + new_data.title + title_suffix + # quantity if reset_quantity: new_data.quantity = 'generic' @@ -300,7 +300,7 @@ def like_data(self, data, title=None, chunks='auto', lock=False, **kwargs): try: scale = get_slope(self._axes[dim]) # axis = self._axes[dim].copy() - axis = Dimension(np.arange(new_data.shape[dim])*scale, self._axes[dim].name) + axis = Dimension(np.arange(new_data.shape[dim]) * scale, self._axes[dim].name) axis.quantity = self._axes[dim].quantity axis.units = self._axes[dim].units axis.dimension_type = self._axes[dim].dimension_type @@ -393,7 +393,7 @@ def __validate_dim(self, ind, name): raise TypeError('Dimension must be an integer') if (0 > ind) or (ind >= self.ndim): raise IndexError('Dimension must be an integer between 0 and {}' - ''.format(self.ndim-1)) + ''.format(self.ndim - 1)) for key, dim in self._axes.items(): if key != ind: if name == dim.name: @@ -571,7 +571,7 @@ def set_thumbnail(self, figure=None, thumbnail_size=128): import imageio # Thumbnail configurations for matplotlib - kwargs = {'figsize': (1,1), 'colorbar': False, 'set_title': False} + kwargs = {'figsize': (1, 1), 'colorbar': False, 'set_title': False} view = self.plot(figure=figure, **kwargs) for axis in view.axes: axis.set_axis_off() @@ -609,8 +609,8 @@ def get_extent(self, dimensions): extent = [] for ind, dim in enumerate(dimensions): temp = self._axes[dim].values - start = temp[0] - (temp[1] - temp[0])/2 - end = temp[-1] + (temp[-1] - temp[-2])/2 + start = temp[0] - (temp[1] - temp[0]) / 2 + end = temp[-1] + (temp[-1] - temp[-2]) / 2 if ind == 1: extent.append(end) # y-axis starts on top extent.append(start) @@ -950,7 +950,6 @@ def min(self, axis=None, keepdims=False, split_every=None, out=None): checkdims=False) return result, locals().copy() - @reduce_dims def max(self, axis=None, keepdims=False, split_every=None, out=None): @@ -974,9 +973,9 @@ def mean(self, axis=None, dtype=None, keepdims=False, split_every=None, out=None split_every=split_every, out=out), title_prefix='mean_aggregate_', checkdims=False) return result, locals().copy() - + @reduce_dims - def std(self, axis=None, dtype=None, keepdims=False, ddof = 0, split_every=None, out=None): + def std(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, out=None): result = self.like_data(super().std(axis=axis, dtype=dtype, keepdims=keepdims, ddof=0, split_every=split_every, out=out), @@ -991,7 +990,7 @@ def var(self, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, o ddof=ddof, split_every=split_every, out=out), title_prefix='var_aggregate_', checkdims=False) return result, locals().copy() - + @reduce_dims def argmin(self, axis=None, split_every=None, out=None): @@ -1236,11 +1235,24 @@ def fold(self, dim_order=None, method=None): dim_order_list.append([dim]) if len(dim_order_list[0]) == 0: - raise NotImplementedError('No spatial dimensions found') + raise NotImplementedError("No spatial dimensions found and the method is set to 'spa' ") if len(dim_order_list[0]) == 1: warnings.warn('Only one spatial dimension found\ Folding returns the original dataset') + if method == 'spec': + dim_order_list = [[]] + for dim, axis in self._axes.items(): + if axis.dimension_type == DimensionType.Spectral: + dim_order_list[-1].extend(dim) + else: + dim_order_list.insert(-1, [dim]) + + if len(dim_order_list[-1]) == 0: + raise NotImplementedError("No spectral dimensions found and the method is set to 'spec'") + if len(dim_order_list[-1]) == 1: + warnings.warn('Only one spatial dimension found\ + Folding returns the original dataset') # We need the flattened list to transpose the original array dim_order_flattened = [item for sublist in dim_order_list for item in sublist] From b18e2f02b9fba2dfc1d5c20512344968e2994b96 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 25 Feb 2022 17:42:01 -0500 Subject: [PATCH 03/43] --Started adding fitter function in sidpy.proc.process.py, added __init__, _setup_calc, and do_guess methods. do_guess method is to be extensively modified later --- sidpy/proc/pocess.py | 101 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 sidpy/proc/pocess.py diff --git a/sidpy/proc/pocess.py b/sidpy/proc/pocess.py new file mode 100644 index 00000000..f8bd0604 --- /dev/null +++ b/sidpy/proc/pocess.py @@ -0,0 +1,101 @@ +from dask.distributed import Client +import numpy as np +import dask +from ..sid import Dimension, Dataset +from ..sid.dimension import DimensionType +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +import inspect + + +class SidFitter(): + # An extension of the Process Class for Functional Fitting + def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit_parms=None, + fit_parameter_labels=None, num_workers=2, threads=2): + """ + Parameters + ---------- + xvec: (numpy ndarray) Independent variable for fitting. Should be an array + + sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit + + ind_dims: (tuple) Tuple with integer entries of the dimensions + over which to parallelize. These should be the independent variable for the fitting. + + fit_fn: (function) Function used for fitting. + + guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit + It takes the same arguments as the fitting function and should return the same type of results array. + + If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. + + num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. + + fit_parameter_labels: (list) (default None) List of parameter labels + + num_workers: (int) (default =2) Number of workers to use when setting up Dask client + + threads: (int) (default =2) Number of threads to use when setting up Dask client + + """ + if guess_fn is None: + if num_fit_parms is None: + raise ValueError("You did not supply a guess function, you must atleast provide number of fit " + "parameters") + + self.guess_completed = False + self.num_fit_parms = num_fit_parms + self.xvec = xvec + self.dataset = sidpy_dataset + self.fit_fn = fit_fn + self.ind_dims = ind_dims + self._setup_calc() + self.guess_fn = guess_fn + self.prior = None + self.fit_labels = fit_parameter_labels + self.guess_results = [] + self.fit_results = [] + self.num_workers = num_workers + self.threads = threads + + # set up dask client + self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) + + def _setup_calc(self): + fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed + self.num_computations = 1 + for i in np.arange(self.dataset.ndim): + if i not in self.ind_dims: + fold_order[0].extend([i]) + self.num_computations *= self.dataset.shape[i] + else: + fold_order.append([i]) + + self.folded_dataset = self.dataset.fold(dim_order=fold_order) + self.fit_results = [] + + def do_guess(self): + """ + Get back to this + Returns + ------- + + """ + self.guess_results = [] + for ind in range(self.num_computations): + lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.dataset_flat[ind, :]) + self.guess_results.append(lazy_result) + + self.guess_results = dask.compute(*self.guess_results) + self.guess_results_arr = np.array(self.guess_results) + + self.guess_results_reshaped_shape = self.pos_dim_shapes + tuple([-1]) + self.guess_results_reshaped = np.array(self.guess_results_arr).reshape(self.guess_results_reshaped_shape) + num_model_parms = self.guess_results_reshaped.shape[-1] + self.prior = self.guess_results_reshaped.reshape(-1, num_model_parms) + self.guess_completed = True + return self.guess_results, self.guess_results_reshaped + + + + From 83663a6b2def2f04b459249d7c17a9ebf8a6c25e Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 2 Mar 2022 16:39:14 -0500 Subject: [PATCH 04/43] Fitter class: do_guess function is now updated --- sidpy/proc/pocess.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sidpy/proc/pocess.py b/sidpy/proc/pocess.py index f8bd0604..8d225dc9 100644 --- a/sidpy/proc/pocess.py +++ b/sidpy/proc/pocess.py @@ -64,6 +64,11 @@ def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit def _setup_calc(self): fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed self.num_computations = 1 + + # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions + # In other words make the argument 'ind_dims' optional + # if self.ind_dims is not None: + for i in np.arange(self.dataset.ndim): if i not in self.ind_dims: fold_order[0].extend([i]) @@ -87,15 +92,9 @@ def do_guess(self): self.guess_results.append(lazy_result) self.guess_results = dask.compute(*self.guess_results) - self.guess_results_arr = np.array(self.guess_results) - - self.guess_results_reshaped_shape = self.pos_dim_shapes + tuple([-1]) - self.guess_results_reshaped = np.array(self.guess_results_arr).reshape(self.guess_results_reshaped_shape) - num_model_parms = self.guess_results_reshaped.shape[-1] - self.prior = self.guess_results_reshaped.reshape(-1, num_model_parms) + self.prior = np.squeeze(np.array(self.guess_results)) + self.num_model_parms = self.prior.shape[-1] self.guess_completed = True - return self.guess_results, self.guess_results_reshaped - From 6e833e737fcaf7cc4e75202cbc0542a68ac64c31 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 2 Mar 2022 17:28:18 -0500 Subject: [PATCH 05/43] Fitter class: do_fit is now added --- sidpy/sid/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 8d0ea2e2..53d0d62a 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -1182,7 +1182,7 @@ def fold(self, dim_order=None, method=None): ---------- dim_order: List of lists or tuple of tuples - -Each element correponds to the order of axes in the corresponding + -Each element corresponds to the order of axes in the corresponding new axis after the collapse -Default: None method: str @@ -1210,7 +1210,7 @@ def fold(self, dim_order=None, method=None): dim_order_list = [list(x) for x in dim_order] - # Book-keeping for uncollapsing + # Book-keeping for unfolding fold_attr = {'shape': self.shape, '_axes': self._axes.copy()} if method == 'spaspec': From 5470e783d3a92da19606abd4993cdfad70271477 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 2 Mar 2022 17:34:10 -0500 Subject: [PATCH 06/43] Added process.py to the __init__.py file under proc --- sidpy/proc/__init__.py | 3 +- sidpy/proc/pocess.py | 100 ----------------------------------------- 2 files changed, 2 insertions(+), 101 deletions(-) delete mode 100644 sidpy/proc/pocess.py diff --git a/sidpy/proc/__init__.py b/sidpy/proc/__init__.py index 0efba1f9..f3154be3 100644 --- a/sidpy/proc/__init__.py +++ b/sidpy/proc/__init__.py @@ -3,5 +3,6 @@ """ from . import comp_utils +from . import process -__all__ = ['comp_utils'] +__all__ = ['comp_utils', 'process'] diff --git a/sidpy/proc/pocess.py b/sidpy/proc/pocess.py deleted file mode 100644 index 8d225dc9..00000000 --- a/sidpy/proc/pocess.py +++ /dev/null @@ -1,100 +0,0 @@ -from dask.distributed import Client -import numpy as np -import dask -from ..sid import Dimension, Dataset -from ..sid.dimension import DimensionType -from scipy.optimize import curve_fit -import matplotlib.pyplot as plt -import inspect - - -class SidFitter(): - # An extension of the Process Class for Functional Fitting - def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit_parms=None, - fit_parameter_labels=None, num_workers=2, threads=2): - """ - Parameters - ---------- - xvec: (numpy ndarray) Independent variable for fitting. Should be an array - - sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit - - ind_dims: (tuple) Tuple with integer entries of the dimensions - over which to parallelize. These should be the independent variable for the fitting. - - fit_fn: (function) Function used for fitting. - - guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit - It takes the same arguments as the fitting function and should return the same type of results array. - - If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. - - num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. - - fit_parameter_labels: (list) (default None) List of parameter labels - - num_workers: (int) (default =2) Number of workers to use when setting up Dask client - - threads: (int) (default =2) Number of threads to use when setting up Dask client - - """ - if guess_fn is None: - if num_fit_parms is None: - raise ValueError("You did not supply a guess function, you must atleast provide number of fit " - "parameters") - - self.guess_completed = False - self.num_fit_parms = num_fit_parms - self.xvec = xvec - self.dataset = sidpy_dataset - self.fit_fn = fit_fn - self.ind_dims = ind_dims - self._setup_calc() - self.guess_fn = guess_fn - self.prior = None - self.fit_labels = fit_parameter_labels - self.guess_results = [] - self.fit_results = [] - self.num_workers = num_workers - self.threads = threads - - # set up dask client - self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) - - def _setup_calc(self): - fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed - self.num_computations = 1 - - # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions - # In other words make the argument 'ind_dims' optional - # if self.ind_dims is not None: - - for i in np.arange(self.dataset.ndim): - if i not in self.ind_dims: - fold_order[0].extend([i]) - self.num_computations *= self.dataset.shape[i] - else: - fold_order.append([i]) - - self.folded_dataset = self.dataset.fold(dim_order=fold_order) - self.fit_results = [] - - def do_guess(self): - """ - Get back to this - Returns - ------- - - """ - self.guess_results = [] - for ind in range(self.num_computations): - lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.dataset_flat[ind, :]) - self.guess_results.append(lazy_result) - - self.guess_results = dask.compute(*self.guess_results) - self.prior = np.squeeze(np.array(self.guess_results)) - self.num_model_parms = self.prior.shape[-1] - self.guess_completed = True - - - From 688f60bbe2f24192d06ccdfa8ec6a0c6e1871614 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 2 Mar 2022 17:36:07 -0500 Subject: [PATCH 07/43] Added process.py to the __init__.py file under proc --- sidpy/proc/process.py | 226 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 sidpy/proc/process.py diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py new file mode 100644 index 00000000..fc8e45a6 --- /dev/null +++ b/sidpy/proc/process.py @@ -0,0 +1,226 @@ +from dask.distributed import Client +import numpy as np +import dask +from ..sid import Dimension, Dataset +from ..sid.dimension import DimensionType +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +import inspect + + +class SidFitter(): + # An extension of the Process Class for Functional Fitting + def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit_parms=None, + fit_parameter_labels=None, num_workers=2, threads=2): + """ + Parameters + ---------- + xvec: (numpy ndarray) Independent variable for fitting. Should be an array + + sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit + + ind_dims: (tuple) Tuple with integer entries of the dimensions + over which to parallelize. These should be the independent variable for the fitting. + + fit_fn: (function) Function used for fitting. + + guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit + It takes the same arguments as the fitting function and should return the same type of results array. + + If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. + + num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. + + fit_parameter_labels: (list) (default None) List of parameter labels + + num_workers: (int) (default =2) Number of workers to use when setting up Dask client + + threads: (int) (default =2) Number of threads to use when setting up Dask client + + """ + if guess_fn is None: + if num_fit_parms is None: + raise ValueError("You did not supply a guess function, you must atleast provide number of fit " + "parameters") + + self.guess_completed = False + self.num_fit_parms = num_fit_parms + self.xvec = xvec + self.dataset = sidpy_dataset + self.fit_fn = fit_fn + self.ind_dims = ind_dims + self._setup_calc() + self.guess_fn = guess_fn + self.prior = None + self.fit_labels = fit_parameter_labels + self.guess_results = [] + self.fit_results = [] + self.num_workers = num_workers + self.threads = threads + + # set up dask client + self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) + + def _setup_calc(self): + fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed + self.num_computations = 1 + + # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions + # In other words make the argument 'ind_dims' optional + # if self.ind_dims is not None: + + for i in np.arange(self.dataset.ndim): + if i not in self.ind_dims: + fold_order[0].extend([i]) + self.num_computations *= self.dataset.shape[i] + else: + fold_order.append([i]) + + self.folded_dataset = self.dataset.fold(dim_order=fold_order) + self.fit_results = [] + + def do_guess(self): + """ + Get back to this + Returns + ------- + + """ + self.guess_results = [] + for ind in range(self.num_computations): + lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.dataset_flat[ind, :]) + self.guess_results.append(lazy_result) + + self.guess_results = dask.compute(*self.guess_results) + self.prior = np.squeeze(np.array(self.guess_results)) + self.num_fit_parms = self.prior.shape[-1] + self.guess_completed = True + + def do_fit(self, **kwargs): + """ + Perform the fit. + **kwargs: extra parameters passed to the fitting function, e.g. bounds, type of lsq algorithm, etc. + """ + if self.guess_completed is False and self.guess_fn is not None: + # Calling the guess function + self.do_guess() + + self.fit_results = [] + + for ind in range(self.num_computations): + if self.prior is None: + p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) + else: + p0 = self.prior[ind, :] + + lazy_result = dask.delayed(self.fit_fn)(self.xvec, self.dataset_flat[ind, :], p0=p0, **kwargs) + self.fit_results.append(lazy_result) + + self.fit_results = dask.compute(*self.fit_results) + + if len(self.fit_results[0]) == 1: + # in this case we can just dump it to an array because we only got the parameters back + mean_fit_results = np.squeeze(np.array(self.fit_results)) + self.cov_results = [] + + elif len(self.fit_results[0]) == 2: + # here we get back both: the parameter means and the covariance matrix! + mean_fit_results = np.array([self.fit_results[ind][0] for ind in range(len(self.fit_results))]) + cov_results = np.array([self.fit_results[ind][1] for ind in range(len(self.fit_results))]) + cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape + self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) + else: + raise ValueError('Your fit function returned more than two arrays. This is not supported. Only return \ + optimized parameters and covariance matrix') + + self.fit_opt_results_shape = self.pos_dim_shapes + tuple([-1]) + self.fit_opt_results = mean_fit_results.reshape(self.fit_opt_results_shape) + + self.num_fit_parms = self.fit_opt_results.shape[-1] + + # get it into a sidpy dataset + dims_new_pos = [self.dataset._axes[self.pos_dims[tup_val]] for tup_val in self.pos_dims] + + # that gives a list of the dimensions we can copy into the fit dataset + # we need the last dimension which is just the fit parameters + + fit_dims = [sid.Dimension(np.arange(self.num_fit_parms), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='spectral')] + + cov_dims = [sid.Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_x', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral'), + sid.Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_y', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral')] + + fit_dataset_dims = dims_new_pos + fit_dims + fit_cov_dims = dims_new_pos + cov_dims + + # Specify dimensions + + # Make a sidpy dataset + mean_sid_dset = sid.Dataset.from_array(self.fit_opt_results, name='Fitting_Map') + + # Set the data type + mean_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map + + # Add quantity and units + mean_sid_dset.units = self.dataset.units + mean_sid_dset.quantity = self.dataset.quantity + + # Add dimension info + for ind, val in enumerate(fit_dataset_dims): + mean_sid_dset.set_dimension(ind, val) + + # append metadata + original_parms_dict = self.dataset.metadata + fit_func_str = str(self.fit_fn) + guess_func_str = str(self.guess_fn) + + fit_parms_dict = {'original_metadata': original_parms_dict, + 'fitting method': fit_func_str, + 'guess_method': guess_func_str, 'fitting_dimensions': self.pos_dims, + 'fit_parameter_labels': self.fit_labels} + + mean_sid_dset.metadata = fit_parms_dict + + if len(self.cov_results) > 0: + # Make a sidpy dataset + cov_sid_dset = sid.Dataset.from_array(self.cov_results, name='Fitting_Map_Covariance') + + # Set the data type + cov_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map + + # Add quantity and units + cov_sid_dset.units = self.dataset.units + cov_sid_dset.quantity = self.dataset.quantity + + # Add dimension info + for ind, val in enumerate(fit_cov_dims): + cov_sid_dset.set_dimension(ind, val) + + fit_results_final = [mean_sid_dset, cov_sid_dset] + else: + fit_results_final = [mean_sid_dset] + + # We have a list of sidpy dataset objects + # But what we also want is the original sidpy dataset object, amended with these results as attributes + if not hasattr(self.dataset, 'fit_results'): + self.dataset.fit_results = [] + self.dataset.fit_vectors = [] # list of svectors + self.dataset.guess_fns = [] + self.dataset.fit_fns = [] + + self.dataset.metadata = fit_parms_dict + self.dataset.fit_results.append(fit_results_final) + self.dataset.fit_vectors.append(self.xvec) + self.dataset.guess_fns.append(self.guess_fn) + self.dataset.fit_fns.append(self.fit_fn) + + return fit_results_final, self.dataset + From 7fa891ee652a56ecc7aad0ea46ac3b4d05112eaf Mon Sep 17 00:00:00 2001 From: Mani Date: Thu, 3 Mar 2022 14:42:19 -0500 Subject: [PATCH 08/43] Modified unfold method in dataset.py so that it works with fitter class in process.py --- sidpy/proc/process.py | 13 +++++++++++-- sidpy/sid/dataset.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index fc8e45a6..00beaed0 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -70,13 +70,23 @@ def _setup_calc(self): # if self.ind_dims is not None: for i in np.arange(self.dataset.ndim): - if i not in self.ind_dims: + if i in self.ind_dims: fold_order[0].extend([i]) self.num_computations *= self.dataset.shape[i] else: fold_order.append([i]) self.folded_dataset = self.dataset.fold(dim_order=fold_order) + # Here is the tricky part, dataset.unfold is designed to get back the original dataset with minimal loss of + # information. To do this, unfold utilizes the saved information of the original dataset. Now we are going to + # tweak that information and use the unfold function operate after the fitting. + + self._unfold_attr = {'dim_order_flattened': fold_order[0] + [len(fold_order)], + 'shape_transposed': [self.dataset.shape[i] for i in fold_order[0]] + [-1]} + axes = self.dataset._axes.copy() + axes.popitem() + self._unfold_attr['_axes'] = axes + self.fit_results = [] def do_guess(self): @@ -223,4 +233,3 @@ def do_fit(self, **kwargs): self.dataset.fit_fns.append(self.fit_fn) return fit_results_final, self.dataset - diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 53d0d62a..7bbfc089 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -1311,8 +1311,8 @@ def unfold(self): title=self.title.replace('folded_', ''), checkdims=False) unfolded_dset._axes = {} - for i, dim in enumerate(old_axes): - unfolded_dset._axes[dim] = old_axes[dim].copy() + for i, dim in old_axes.items(): + unfolded_dset.set_dimension(i, dim.copy()) return unfolded_dset # Following methods are to be edited From 2730959a59f049349069d0693312ec34daa851c7 Mon Sep 17 00:00:00 2001 From: Mani Date: Thu, 3 Mar 2022 16:36:16 -0500 Subject: [PATCH 09/43] Initial setup of do_fit function inside the fitter class is done. Now, it has to be extensively debugged to make sure that it behaves as intended --- sidpy/proc/process.py | 113 ++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 75 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index 00beaed0..b732dc24 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -131,50 +131,23 @@ def do_fit(self, **kwargs): if len(self.fit_results[0]) == 1: # in this case we can just dump it to an array because we only got the parameters back mean_fit_results = np.squeeze(np.array(self.fit_results)) - self.cov_results = [] + cov_results = None elif len(self.fit_results[0]) == 2: # here we get back both: the parameter means and the covariance matrix! - mean_fit_results = np.array([self.fit_results[ind][0] for ind in range(len(self.fit_results))]) - cov_results = np.array([self.fit_results[ind][1] for ind in range(len(self.fit_results))]) - cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape - self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) + mean_fit_results = np.squeeze(np.array([self.fit_results[ind][0] for ind in range(len(self.fit_results))])) + cov_results = np.squeeze(np.array([self.fit_results[ind][1] for ind in range(len(self.fit_results))])) + # cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape + # self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) else: raise ValueError('Your fit function returned more than two arrays. This is not supported. Only return \ optimized parameters and covariance matrix') - self.fit_opt_results_shape = self.pos_dim_shapes + tuple([-1]) - self.fit_opt_results = mean_fit_results.reshape(self.fit_opt_results_shape) - - self.num_fit_parms = self.fit_opt_results.shape[-1] - - # get it into a sidpy dataset - dims_new_pos = [self.dataset._axes[self.pos_dims[tup_val]] for tup_val in self.pos_dims] - - # that gives a list of the dimensions we can copy into the fit dataset - # we need the last dimension which is just the fit parameters - - fit_dims = [sid.Dimension(np.arange(self.num_fit_parms), - name='fit_parms', units='a.u.', - quantity='fit_parameters', - dimension_type='spectral')] - - cov_dims = [sid.Dimension(np.arange(self.num_fit_parms), - name='fit_cov_parms_x', units='a.u.', - quantity='fit_cov_parameters', - dimension_type='spectral'), - sid.Dimension(np.arange(self.num_fit_parms), - name='fit_cov_parms_y', units='a.u.', - quantity='fit_cov_parameters', - dimension_type='spectral')] - - fit_dataset_dims = dims_new_pos + fit_dims - fit_cov_dims = dims_new_pos + cov_dims - - # Specify dimensions - + # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them # Make a sidpy dataset - mean_sid_dset = sid.Dataset.from_array(self.fit_opt_results, name='Fitting_Map') + mean_sid_dset = Dataset.from_array(mean_fit_results, title='Fitting_Map') + mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() + mean_sid_dset = mean_sid_dset.unfold() # Set the data type mean_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map @@ -183,25 +156,25 @@ def do_fit(self, **kwargs): mean_sid_dset.units = self.dataset.units mean_sid_dset.quantity = self.dataset.quantity - # Add dimension info - for ind, val in enumerate(fit_dataset_dims): - mean_sid_dset.set_dimension(ind, val) - - # append metadata - original_parms_dict = self.dataset.metadata - fit_func_str = str(self.fit_fn) - guess_func_str = str(self.guess_fn) - - fit_parms_dict = {'original_metadata': original_parms_dict, - 'fitting method': fit_func_str, - 'guess_method': guess_func_str, 'fitting_dimensions': self.pos_dims, - 'fit_parameter_labels': self.fit_labels} - - mean_sid_dset.metadata = fit_parms_dict + # We set the last dimension, i.e., the dimension with the fit parameters + fit_dim = [Dimension(np.arange(self.num_fit_parms), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='spectral')] + mean_fit_results.set_dimension(len(mean_sid_dset), fit_dim) - if len(self.cov_results) > 0: + # Here we deal with the covariance dataset + if cov_results is not None: # Make a sidpy dataset - cov_sid_dset = sid.Dataset.from_array(self.cov_results, name='Fitting_Map_Covariance') + cov_sid_dset = Dataset.from_array(self.cov_results, title='Fitting_Map_Covariance') + num_fit_parms = mean_fit_results.shape[-1] + fold_attr = self._unfold_attr.copy() + fold_attr['dim_order_flattened'] = fold_attr['dim_order_flattened'] + [ + len(fold_attr['dim_order_flattened'])] + fold_attr['shape_transposed'] = fold_attr['shape_transposed'] + [num_fit_parms] + [num_fit_parms] + + cov_sid_dset.metadata['fold_attr'] = fold_attr + cov_sid_dset = cov_sid_dset.unfold() # Set the data type cov_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map @@ -210,26 +183,16 @@ def do_fit(self, **kwargs): cov_sid_dset.units = self.dataset.units cov_sid_dset.quantity = self.dataset.quantity - # Add dimension info - for ind, val in enumerate(fit_cov_dims): - cov_sid_dset.set_dimension(ind, val) + cov_dims = [Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_x', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral'), + Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_y', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral')] - fit_results_final = [mean_sid_dset, cov_sid_dset] - else: - fit_results_final = [mean_sid_dset] - - # We have a list of sidpy dataset objects - # But what we also want is the original sidpy dataset object, amended with these results as attributes - if not hasattr(self.dataset, 'fit_results'): - self.dataset.fit_results = [] - self.dataset.fit_vectors = [] # list of svectors - self.dataset.guess_fns = [] - self.dataset.fit_fns = [] - - self.dataset.metadata = fit_parms_dict - self.dataset.fit_results.append(fit_results_final) - self.dataset.fit_vectors.append(self.xvec) - self.dataset.guess_fns.append(self.guess_fn) - self.dataset.fit_fns.append(self.fit_fn) - - return fit_results_final, self.dataset + for i, dim in enumerate(cov_dims): + cov_sid_dset.set_dimension(i + len(cov_sid_dset.shape), dim) + + return mean_sid_dset From 1564858c5624cc2fcf38c5f43da32bdba270eda9 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 4 Mar 2022 15:29:20 -0500 Subject: [PATCH 10/43] The do_fit function acts exactly as the one in the Rama's branch while utilizing the fold and unfold methods from the dataset.py --- sidpy/proc/process.py | 46 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index b732dc24..a7fde80f 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -98,7 +98,7 @@ def do_guess(self): """ self.guess_results = [] for ind in range(self.num_computations): - lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.dataset_flat[ind, :]) + lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.folded_dataset[ind, :]) self.guess_results.append(lazy_result) self.guess_results = dask.compute(*self.guess_results) @@ -123,20 +123,20 @@ def do_fit(self, **kwargs): else: p0 = self.prior[ind, :] - lazy_result = dask.delayed(self.fit_fn)(self.xvec, self.dataset_flat[ind, :], p0=p0, **kwargs) + lazy_result = dask.delayed(self.fit_fn)(self.xvec, self.folded_dataset[ind, :], p0=p0, **kwargs) self.fit_results.append(lazy_result) - self.fit_results = dask.compute(*self.fit_results) + self.fit_results_comp = dask.compute(*self.fit_results) - if len(self.fit_results[0]) == 1: + if len(self.fit_results_comp[0]) == 1: # in this case we can just dump it to an array because we only got the parameters back - mean_fit_results = np.squeeze(np.array(self.fit_results)) + mean_fit_results = np.squeeze(np.array(self.fit_results_comp)) cov_results = None - elif len(self.fit_results[0]) == 2: + elif len(self.fit_results_comp[0]) == 2: # here we get back both: the parameter means and the covariance matrix! - mean_fit_results = np.squeeze(np.array([self.fit_results[ind][0] for ind in range(len(self.fit_results))])) - cov_results = np.squeeze(np.array([self.fit_results[ind][1] for ind in range(len(self.fit_results))])) + mean_fit_results = np.squeeze(np.array([self.fit_results_comp[ind][0] for ind in range(len(self.fit_results_comp))])) + cov_results = np.squeeze(np.array([self.fit_results_comp[ind][1] for ind in range(len(self.fit_results_comp))])) # cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape # self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) else: @@ -150,28 +150,28 @@ def do_fit(self, **kwargs): mean_sid_dset = mean_sid_dset.unfold() # Set the data type - mean_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map + mean_sid_dset.data_type = 'image_stack' # We may want to pass a new type - fit map # Add quantity and units mean_sid_dset.units = self.dataset.units mean_sid_dset.quantity = self.dataset.quantity # We set the last dimension, i.e., the dimension with the fit parameters - fit_dim = [Dimension(np.arange(self.num_fit_parms), - name='fit_parms', units='a.u.', - quantity='fit_parameters', - dimension_type='spectral')] - mean_fit_results.set_dimension(len(mean_sid_dset), fit_dim) + fit_dim = Dimension(np.arange(self.num_fit_parms), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='temporal') + mean_sid_dset.set_dimension(len(mean_sid_dset.shape)-1, fit_dim) # Here we deal with the covariance dataset if cov_results is not None: # Make a sidpy dataset - cov_sid_dset = Dataset.from_array(self.cov_results, title='Fitting_Map_Covariance') + cov_sid_dset = Dataset.from_array(cov_results, title='Fitting_Map_Covariance') num_fit_parms = mean_fit_results.shape[-1] fold_attr = self._unfold_attr.copy() fold_attr['dim_order_flattened'] = fold_attr['dim_order_flattened'] + [ len(fold_attr['dim_order_flattened'])] - fold_attr['shape_transposed'] = fold_attr['shape_transposed'] + [num_fit_parms] + [num_fit_parms] + fold_attr['shape_transposed'] = fold_attr['shape_transposed'][:-1] + [num_fit_parms] + [num_fit_parms] cov_sid_dset.metadata['fold_attr'] = fold_attr cov_sid_dset = cov_sid_dset.unfold() @@ -193,6 +193,16 @@ def do_fit(self, **kwargs): dimension_type='spectral')] for i, dim in enumerate(cov_dims): - cov_sid_dset.set_dimension(i + len(cov_sid_dset.shape), dim) + cov_sid_dset.set_dimension(i - 2 + len(cov_sid_dset.shape), dim) - return mean_sid_dset + return [mean_sid_dset, cov_sid_dset] + + def get_fitted_dataset(self): + """This method returns the fitted dataset using the parameters generated by the fit function""" + pass + + # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit + + @staticmethod + def default_curve_fit(num_fit_parms): + pass From c506c156a72495eb2688b2d6a726fb6ffd0828a9 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 4 Mar 2022 16:17:52 -0500 Subject: [PATCH 11/43] Default curve_fit function is added as the static method. ind_dims is now an optional argument. The order of the arguments is also slightly changed. --- sidpy/proc/process.py | 63 ++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index a7fde80f..2601bad6 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -10,23 +10,23 @@ class SidFitter(): # An extension of the Process Class for Functional Fitting - def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit_parms=None, + def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, num_fit_parms=None, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters ---------- - xvec: (numpy ndarray) Independent variable for fitting. Should be an array - sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit - ind_dims: (tuple) Tuple with integer entries of the dimensions - over which to parallelize. These should be the independent variable for the fitting. - fit_fn: (function) Function used for fitting. + xvec: (numpy ndarray) Independent variable for fitting. Should be an array + + ind_dims: (tuple) (Optional) Tuple with integer entries of the dimensions + over which to parallelize. These should be the independent variable for the fitting. + If NOT provided, it is assumed that all the non-spectral dimensions are independent dimensions. + guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit It takes the same arguments as the fitting function and should return the same type of results array. - If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. @@ -40,15 +40,26 @@ def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit """ if guess_fn is None: if num_fit_parms is None: - raise ValueError("You did not supply a guess function, you must atleast provide number of fit " + raise ValueError("You did not supply a guess function, you must at least provide number of fit " "parameters") - - self.guess_completed = False - self.num_fit_parms = num_fit_parms - self.xvec = xvec self.dataset = sidpy_dataset self.fit_fn = fit_fn - self.ind_dims = ind_dims + self.xvec = xvec + self.num_fit_parms = num_fit_parms + + if ind_dims is not None: + self.ind_dims = ind_dims + else: + ind_dims = [] + for i, dim in self.dataset._axes.items(): + if dim.dimension_type != DimensionType.SPECTRAL: + ind_dims.extend([i]) + self.ind_dims = tuple(ind_dims) + + # Make sure there is at least one spectral dimension + if len(self.ind_dims) == len(self.dataset.shape): + raise NotImplementedError('No Spectral (dependent) dimensions found to fit') + self._setup_calc() self.guess_fn = guess_fn self.prior = None @@ -57,6 +68,7 @@ def __init__(self, xvec, sidpy_dataset, ind_dims, fit_fn, guess_fn=None, num_fit self.fit_results = [] self.num_workers = num_workers self.threads = threads + self.guess_completed = False # set up dask client self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) @@ -123,7 +135,8 @@ def do_fit(self, **kwargs): else: p0 = self.prior[ind, :] - lazy_result = dask.delayed(self.fit_fn)(self.xvec, self.folded_dataset[ind, :], p0=p0, **kwargs) + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.xvec, self.folded_dataset[ind, :], + p0=p0, **kwargs) self.fit_results.append(lazy_result) self.fit_results_comp = dask.compute(*self.fit_results) @@ -135,8 +148,10 @@ def do_fit(self, **kwargs): elif len(self.fit_results_comp[0]) == 2: # here we get back both: the parameter means and the covariance matrix! - mean_fit_results = np.squeeze(np.array([self.fit_results_comp[ind][0] for ind in range(len(self.fit_results_comp))])) - cov_results = np.squeeze(np.array([self.fit_results_comp[ind][1] for ind in range(len(self.fit_results_comp))])) + mean_fit_results = np.squeeze( + np.array([self.fit_results_comp[ind][0] for ind in range(len(self.fit_results_comp))])) + cov_results = np.squeeze( + np.array([self.fit_results_comp[ind][1] for ind in range(len(self.fit_results_comp))])) # cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape # self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) else: @@ -161,7 +176,7 @@ def do_fit(self, **kwargs): name='fit_parms', units='a.u.', quantity='fit_parameters', dimension_type='temporal') - mean_sid_dset.set_dimension(len(mean_sid_dset.shape)-1, fit_dim) + mean_sid_dset.set_dimension(len(mean_sid_dset.shape) - 1, fit_dim) # Here we deal with the covariance dataset if cov_results is not None: @@ -204,5 +219,15 @@ def get_fitted_dataset(self): # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit @staticmethod - def default_curve_fit(num_fit_parms): - pass + def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): + print(kwargs) + xvec = np.array(xvec) + yvec = np.array(yvec) + yvec = yvec.ravel() + xvec = xvec.ravel() + popt, pcov = curve_fit(fit_fn, xvec, yvec, **kwargs) + + if return_cov: + return popt, pcov + else: + return popt From e0e952984315f4dfc0a5bff9aed461e41d2045e8 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 4 Mar 2022 16:46:36 -0500 Subject: [PATCH 12/43] Added a new argument 'return_cov' that decides whether to return the covariance matrix of the fitting parameters. Changed the entire code accordingly --- sidpy/proc/process.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index 2601bad6..0a2e8a51 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -11,7 +11,7 @@ class SidFitter(): # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, num_fit_parms=None, - fit_parameter_labels=None, num_workers=2, threads=2): + return_cov=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters ---------- @@ -31,6 +31,9 @@ def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, nu num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. + return_cov: (bool) (default False) Returns the estimated covariance of fitting parameters. Confer + scipy.optimize.curve_fit for further details + fit_parameter_labels: (list) (default None) List of parameter labels num_workers: (int) (default =2) Number of workers to use when setting up Dask client @@ -69,6 +72,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, nu self.num_workers = num_workers self.threads = threads self.guess_completed = False + self.return_cov = return_cov # set up dask client self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) @@ -136,17 +140,17 @@ def do_fit(self, **kwargs): p0 = self.prior[ind, :] lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.xvec, self.folded_dataset[ind, :], - p0=p0, **kwargs) + return_cov=self.return_cov, p0=p0, **kwargs) self.fit_results.append(lazy_result) self.fit_results_comp = dask.compute(*self.fit_results) - if len(self.fit_results_comp[0]) == 1: + if not self.return_cov: # in this case we can just dump it to an array because we only got the parameters back mean_fit_results = np.squeeze(np.array(self.fit_results_comp)) cov_results = None - elif len(self.fit_results_comp[0]) == 2: + else: # here we get back both: the parameter means and the covariance matrix! mean_fit_results = np.squeeze( np.array([self.fit_results_comp[ind][0] for ind in range(len(self.fit_results_comp))])) @@ -154,9 +158,6 @@ def do_fit(self, **kwargs): np.array([self.fit_results_comp[ind][1] for ind in range(len(self.fit_results_comp))])) # cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape # self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) - else: - raise ValueError('Your fit function returned more than two arrays. This is not supported. Only return \ - optimized parameters and covariance matrix') # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them # Make a sidpy dataset @@ -210,7 +211,10 @@ def do_fit(self, **kwargs): for i, dim in enumerate(cov_dims): cov_sid_dset.set_dimension(i - 2 + len(cov_sid_dset.shape), dim) - return [mean_sid_dset, cov_sid_dset] + if self.return_cov: + return mean_sid_dset, cov_sid_dset + else: + return mean_sid_dset def get_fitted_dataset(self): """This method returns the fitted dataset using the parameters generated by the fit function""" From b6a8ab7035a65bc6a5d90cb3692486690ed6bb99 Mon Sep 17 00:00:00 2001 From: Mani Date: Mon, 7 Mar 2022 11:12:50 -0500 Subject: [PATCH 13/43] Argument xvec is now optional. If not provided the xvec will be the dimension arrays of dependent dimensions --- sidpy/proc/process.py | 57 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index 0a2e8a51..60977354 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -8,9 +8,9 @@ import inspect -class SidFitter(): +class SidFitter: # An extension of the Process Class for Functional Fitting - def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, num_fit_parms=None, + def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, return_cov=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters @@ -19,7 +19,9 @@ def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, nu fit_fn: (function) Function used for fitting. - xvec: (numpy ndarray) Independent variable for fitting. Should be an array + xvec: (numpy ndarray or list of numpy ndarrays) (Optional) + Independent variable for fitting. Should be an array + If NOT provided, the dimension arrays are assumed to be xvecs ind_dims: (tuple) (Optional) Tuple with integer entries of the dimensions over which to parallelize. These should be the independent variable for the fitting. @@ -47,7 +49,6 @@ def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, nu "parameters") self.dataset = sidpy_dataset self.fit_fn = fit_fn - self.xvec = xvec self.num_fit_parms = num_fit_parms if ind_dims is not None: @@ -63,6 +64,48 @@ def __init__(self, sidpy_dataset, fit_fn, xvec, ind_dims=None, guess_fn=None, nu if len(self.ind_dims) == len(self.dataset.shape): raise NotImplementedError('No Spectral (dependent) dimensions found to fit') + # Let's get the dependent dims here + dep_dims = [] + for d in np.arange(len(self.dataset.shape)): + if d not in self.ind_dims: + dep_dims.extend([d]) + self.dep_dims = tuple(dep_dims) + + if xvec is None: + # 1D fit + if len(self.dep_dims) == 1: + dep_vec = np.array(self.dataset._axes[self.dep_dims[0]]) + # Multidimensional fit + else: + dep_vec = [] + for d in self.dep_dims: + dep_vec.append(np.array(self.dataset._axes[d])) + + if xvec is not None: + # 1D fit + if len(self.dep_dims) == 1: + if isinstance(xvec, np.ndarray): + dep_vec = xvec + elif isinstance(xvec, list): + dep_vec = np.array(xvec) + else: + raise TypeError('Please provide a np.ndarray or a list of independent vector values') + # Multidimensional fit + else: + if isinstance(xvec, list) and len(xvec) == len(self.dep_dims): + dep_vec = xvec + elif isinstance(xvec, list) and len(xvec) != len(self.dep_dims): + raise ValueError('The number of independent dimensions provided in the xvec do not match ' + 'with the number of dependent dimensions of the dataset') + else: + raise TypeError('Please provide a list of value-arrays corresponding to each dependent dimension') + + # Dealing with the meshgrid part of multidimensional fitting + if len(self.dep_dims) > 1: + self.dep_vec = [ar.ravel() for ar in np.meshgrid(*dep_dims)] + else: + self.dep_vec = dep_vec + self._setup_calc() self.guess_fn = guess_fn self.prior = None @@ -97,7 +140,7 @@ def _setup_calc(self): # information. To do this, unfold utilizes the saved information of the original dataset. Now we are going to # tweak that information and use the unfold function operate after the fitting. - self._unfold_attr = {'dim_order_flattened': fold_order[0] + [len(fold_order)], + self._unfold_attr = {'dim_order_flattened': fold_order[0] + [len(fold_order[0])], 'shape_transposed': [self.dataset.shape[i] for i in fold_order[0]] + [-1]} axes = self.dataset._axes.copy() axes.popitem() @@ -114,7 +157,7 @@ def do_guess(self): """ self.guess_results = [] for ind in range(self.num_computations): - lazy_result = dask.delayed(self.guess_fn)(self.xvec, self.folded_dataset[ind, :]) + lazy_result = dask.delayed(self.guess_fn)(self.dep_vec, self.folded_dataset[ind, :]) self.guess_results.append(lazy_result) self.guess_results = dask.compute(*self.guess_results) @@ -139,7 +182,7 @@ def do_fit(self, **kwargs): else: p0 = self.prior[ind, :] - lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.xvec, self.folded_dataset[ind, :], + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, self.folded_dataset[ind, :], return_cov=self.return_cov, p0=p0, **kwargs) self.fit_results.append(lazy_result) From 3fa609aee829d150ede905d3d0c227d68bcaca73 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 12:22:54 -0500 Subject: [PATCH 14/43] Scipy is added as an extra requirement --- setup.py | 1 + sidpy/proc/process.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3ae6b630..4578af14 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,7 @@ extras_require={ 'MPI': ["mpi4py"], 'File_Widgets': ['pyqt5'], + 'process': ['scipy'] }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index 60977354..e32f81e2 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -3,7 +3,12 @@ import dask from ..sid import Dimension, Dataset from ..sid.dimension import DimensionType -from scipy.optimize import curve_fit + +try: + from scipy.optimize import curve_fit +except ModuleNotFoundError: + curve_fit = None + import matplotlib.pyplot as plt import inspect @@ -272,7 +277,10 @@ def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): yvec = np.array(yvec) yvec = yvec.ravel() xvec = xvec.ravel() - popt, pcov = curve_fit(fit_fn, xvec, yvec, **kwargs) + if curve_fit is None: + raise ModuleNotFoundError("scipy is not installed") + else: + popt, pcov = curve_fit(fit_fn, xvec, yvec, **kwargs) if return_cov: return popt, pcov From 9c773764886041c04cf6a282e8ed29a23192d4cc Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 13:32:42 -0500 Subject: [PATCH 15/43] Added the method get_fitted_dataset to fitter class that does exactly what it says --- sidpy/proc/process.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index e32f81e2..f528f7d3 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -126,7 +126,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) def _setup_calc(self): - fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed + self.fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed self.num_computations = 1 # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions @@ -135,18 +135,18 @@ def _setup_calc(self): for i in np.arange(self.dataset.ndim): if i in self.ind_dims: - fold_order[0].extend([i]) + self.fold_order[0].extend([i]) self.num_computations *= self.dataset.shape[i] else: - fold_order.append([i]) + self.fold_order.append([i]) - self.folded_dataset = self.dataset.fold(dim_order=fold_order) + self.folded_dataset = self.dataset.fold(dim_order=self.fold_order) # Here is the tricky part, dataset.unfold is designed to get back the original dataset with minimal loss of # information. To do this, unfold utilizes the saved information of the original dataset. Now we are going to # tweak that information and use the unfold function operate after the fitting. - self._unfold_attr = {'dim_order_flattened': fold_order[0] + [len(fold_order[0])], - 'shape_transposed': [self.dataset.shape[i] for i in fold_order[0]] + [-1]} + self._unfold_attr = {'dim_order_flattened': self.fold_order[0] + [len(self.fold_order[0])], + 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} axes = self.dataset._axes.copy() axes.popitem() self._unfold_attr['_axes'] = axes @@ -209,6 +209,7 @@ def do_fit(self, **kwargs): # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them # Make a sidpy dataset + self.mean_fit_results = mean_fit_results mean_sid_dset = Dataset.from_array(mean_fit_results, title='Fitting_Map') mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() mean_sid_dset = mean_sid_dset.unfold() @@ -266,7 +267,21 @@ def do_fit(self, **kwargs): def get_fitted_dataset(self): """This method returns the fitted dataset using the parameters generated by the fit function""" - pass + fitted_dset = self.dataset.like_data(np.zeros_like(self.dataset.compute()), + title_prefix='fitted_') + fitted_dset_fold = fitted_dset.fold(dim_order=self.fold_order) + + # Here we make a roundtrip to numpy as earlier versions of dask did not support the assignments + # of the form dask_array[2] = 1 + + np_folded_arr = fitted_dset_fold.compute() + for i in range(np_folded_arr.shape[0]): + np_folded_arr[i] = self.fit_fn(self.dep_vec, *self.mean_fit_results[i]) + + fitted_sid_dset_folded = fitted_dset_fold.like_data(np_folded_arr, title=fitted_dset_fold.title) + fitted_sid_dset = fitted_sid_dset_folded.unfold() + + return fitted_sid_dset # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit From 62a77140ae030c48c15ed4697467fb63ee44124d Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 15:21:14 -0500 Subject: [PATCH 16/43] Docstring of fitter class is updated and a new parameter 'return_fit' is added to the class --- sidpy/proc/process.py | 112 +++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index f528f7d3..df006bfe 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -9,20 +9,19 @@ except ModuleNotFoundError: curve_fit = None -import matplotlib.pyplot as plt -import inspect - class SidFitter: # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - return_cov=False, fit_parameter_labels=None, num_workers=2, threads=2): + return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters ---------- sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit fit_fn: (function) Function used for fitting. + Should take xvec as the first argument and parameters as the rest of the arguments. + Should return the function value at each of the points in the xvec xvec: (numpy ndarray or list of numpy ndarrays) (Optional) Independent variable for fitting. Should be an array @@ -36,29 +35,47 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non It takes the same arguments as the fitting function and should return the same type of results array. If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. - num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided. + num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided to set + the priors for the parameters for the curve_fit function. return_cov: (bool) (default False) Returns the estimated covariance of fitting parameters. Confer scipy.optimize.curve_fit for further details + return_fit: (bool) (default False) Returns the fitted sidpy dataset using the optimal parameters + when set to true + fit_parameter_labels: (list) (default None) List of parameter labels num_workers: (int) (default =2) Number of workers to use when setting up Dask client threads: (int) (default =2) Number of threads to use when setting up Dask client + + Returns: + ------- + sidpy.dataset: if return_cov and return_fit are both set to False + List: containing sidpy.dataset objects, if either of return_cov or return fit is set to True + + If multiple datasets are expected, the order of the returned datasets is + + [sidpy.dataset with mean parameter values, + sidpy.dataset with estimated covariances of the fitting parameters, + sidpy.dataset that is fit with the parameters obtained after fitting] + """ + if guess_fn is None: if num_fit_parms is None: raise ValueError("You did not supply a guess function, you must at least provide number of fit " - "parameters") - self.dataset = sidpy_dataset - self.fit_fn = fit_fn - self.num_fit_parms = num_fit_parms + "parameters to set the priors for scipy.optimize.curve_fit") + self.dataset = sidpy_dataset # Sidpy dataset + self.fit_fn = fit_fn # function that takes xvec, *parameters and returns yvec at each value of xvec + self.num_fit_parms = num_fit_parms # int: number of fitting parameters if ind_dims is not None: - self.ind_dims = ind_dims + self.ind_dims = ind_dims # Tuple: containing indices of independent dimensions else: + # All the dimensions that are not spectral will be considered as independent dimensions ind_dims = [] for i, dim in self.dataset._axes.items(): if dim.dimension_type != DimensionType.SPECTRAL: @@ -70,7 +87,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non raise NotImplementedError('No Spectral (dependent) dimensions found to fit') # Let's get the dependent dims here - dep_dims = [] + dep_dims = [] # Tuple: contains all the dependent dimensions. ind_dims+dep_dims = all_dims for d in np.arange(len(self.dataset.shape)): if d not in self.ind_dims: dep_dims.extend([d]) @@ -113,15 +130,17 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self._setup_calc() self.guess_fn = guess_fn - self.prior = None + self.prior = None # shape = [num_computations, num_fitting_parms] self.fit_labels = fit_parameter_labels - self.guess_results = [] - self.fit_results = [] self.num_workers = num_workers self.threads = threads self.guess_completed = False + self.return_fit = return_fit self.return_cov = return_cov + self.mean_fit_results = [] + if self.return_cov: + self.cov_fit_results = None # set up dask client self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) @@ -141,32 +160,35 @@ def _setup_calc(self): self.fold_order.append([i]) self.folded_dataset = self.dataset.fold(dim_order=self.fold_order) + # Here is the tricky part, dataset.unfold is designed to get back the original dataset with minimal loss of - # information. To do this, unfold utilizes the saved information of the original dataset. Now we are going to - # tweak that information and use the unfold function operate after the fitting. + # information. To do this, unfold utilizes the saved information while folding the original dataset. + # Here, we are going to tweak that information and use the unfold method on the dataset with fitted parameters. self._unfold_attr = {'dim_order_flattened': self.fold_order[0] + [len(self.fold_order[0])], 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} axes = self.dataset._axes.copy() - axes.popitem() + for i in self.ind_dims: + del axes[i] self._unfold_attr['_axes'] = axes - self.fit_results = [] - def do_guess(self): """ - Get back to this - Returns + If a guess_fn is provided: Applies the guess_fn to get priors for the fitting parameters. + self.prior is set as the output of guess function at each of the ind_dims + + Returns: + None ------- """ - self.guess_results = [] + guess_results = [] for ind in range(self.num_computations): lazy_result = dask.delayed(self.guess_fn)(self.dep_vec, self.folded_dataset[ind, :]) - self.guess_results.append(lazy_result) + guess_results.append(lazy_result) - self.guess_results = dask.compute(*self.guess_results) - self.prior = np.squeeze(np.array(self.guess_results)) + guess_results = dask.compute(*guess_results) + self.prior = np.squeeze(np.array(guess_results)) self.num_fit_parms = self.prior.shape[-1] self.guess_completed = True @@ -179,8 +201,7 @@ def do_fit(self, **kwargs): # Calling the guess function self.do_guess() - self.fit_results = [] - + fit_results = [] for ind in range(self.num_computations): if self.prior is None: p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) @@ -189,28 +210,25 @@ def do_fit(self, **kwargs): lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, self.folded_dataset[ind, :], return_cov=self.return_cov, p0=p0, **kwargs) - self.fit_results.append(lazy_result) + fit_results.append(lazy_result) - self.fit_results_comp = dask.compute(*self.fit_results) + fit_results_comp = dask.compute(*fit_results) if not self.return_cov: # in this case we can just dump it to an array because we only got the parameters back - mean_fit_results = np.squeeze(np.array(self.fit_results_comp)) - cov_results = None + self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) else: # here we get back both: the parameter means and the covariance matrix! - mean_fit_results = np.squeeze( - np.array([self.fit_results_comp[ind][0] for ind in range(len(self.fit_results_comp))])) - cov_results = np.squeeze( - np.array([self.fit_results_comp[ind][1] for ind in range(len(self.fit_results_comp))])) - # cov_results_reshaped_shape = self.pos_dim_shapes + cov_results[0].shape - # self.cov_results = np.array(cov_results).reshape(cov_results_reshaped_shape) + self.mean_fit_results = np.squeeze( + np.array([fit_results_comp[ind][0] for ind in range(len(fit_results_comp))])) + self.cov_fit_results = np.squeeze( + np.array([fit_results_comp[ind][1] for ind in range(len(fit_results_comp))])) # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them # Make a sidpy dataset - self.mean_fit_results = mean_fit_results - mean_sid_dset = Dataset.from_array(mean_fit_results, title='Fitting_Map') + + mean_sid_dset = Dataset.from_array(self.mean_fit_results, title='Fitting_Map') mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() mean_sid_dset = mean_sid_dset.unfold() @@ -229,14 +247,14 @@ def do_fit(self, **kwargs): mean_sid_dset.set_dimension(len(mean_sid_dset.shape) - 1, fit_dim) # Here we deal with the covariance dataset - if cov_results is not None: + if self.return_cov: # Make a sidpy dataset - cov_sid_dset = Dataset.from_array(cov_results, title='Fitting_Map_Covariance') - num_fit_parms = mean_fit_results.shape[-1] + cov_sid_dset = Dataset.from_array(self.cov_fit_results, title='Fitting_Map_Covariance') fold_attr = self._unfold_attr.copy() fold_attr['dim_order_flattened'] = fold_attr['dim_order_flattened'] + [ len(fold_attr['dim_order_flattened'])] - fold_attr['shape_transposed'] = fold_attr['shape_transposed'][:-1] + [num_fit_parms] + [num_fit_parms] + fold_attr['shape_transposed'] = fold_attr['shape_transposed'][:-1] + [self.num_fit_parms] + \ + [self.num_fit_parms] cov_sid_dset.metadata['fold_attr'] = fold_attr cov_sid_dset = cov_sid_dset.unfold() @@ -260,8 +278,12 @@ def do_fit(self, **kwargs): for i, dim in enumerate(cov_dims): cov_sid_dset.set_dimension(i - 2 + len(cov_sid_dset.shape), dim) - if self.return_cov: - return mean_sid_dset, cov_sid_dset + if self.return_cov and self.return_fit: + return [mean_sid_dset, cov_sid_dset, self.get_fitted_dataset()] + elif self.return_cov and not self.return_fit: + return [mean_sid_dset, cov_sid_dset] + elif not self.return_cov and self.return_fit: + return [mean_sid_dset, self.get_fitted_dataset()] else: return mean_sid_dset From 4a62a1e01a4429735879154af6e330a5de2c1878 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 15:32:04 -0500 Subject: [PATCH 17/43] Added **kwargs in the fitted class and these keyword arguments are then passed to scipy.optimize.curve_fit --- sidpy/proc/process.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py index df006bfe..36be4a85 100644 --- a/sidpy/proc/process.py +++ b/sidpy/proc/process.py @@ -1,3 +1,11 @@ +""" +:class:`~Sidpy.proc.fitter.SidFitter` class that fits the specified dimension of a sidpy.dataset using the +user-specified fit function. An extension of scipy.optimise.curve_fit that works on sidpy.dataset + +Created on Mar 9, 2022 +@author: Rama Vasudevan, Mani Valleti +""" + from dask.distributed import Client import numpy as np import dask @@ -50,7 +58,6 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non threads: (int) (default =2) Number of threads to use when setting up Dask client - Returns: ------- sidpy.dataset: if return_cov and return_fit are both set to False @@ -69,7 +76,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non raise ValueError("You did not supply a guess function, you must at least provide number of fit " "parameters to set the priors for scipy.optimize.curve_fit") self.dataset = sidpy_dataset # Sidpy dataset - self.fit_fn = fit_fn # function that takes xvec, *parameters and returns yvec at each value of xvec + self.fit_fn = fit_fn # function that takes xvec, *parameters and returns yvec at each value of xvec self.num_fit_parms = num_fit_parms # int: number of fitting parameters if ind_dims is not None: @@ -130,7 +137,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self._setup_calc() self.guess_fn = guess_fn - self.prior = None # shape = [num_computations, num_fitting_parms] + self.prior = None # shape = [num_computations, num_fitting_parms] self.fit_labels = fit_parameter_labels self.num_workers = num_workers self.threads = threads @@ -195,9 +202,10 @@ def do_guess(self): def do_fit(self, **kwargs): """ Perform the fit. - **kwargs: extra parameters passed to the fitting function, e.g. bounds, type of lsq algorithm, etc. + **kwargs: extra parameters passed to scipy.optimize.curve_fit, e.g. bounds, type of lsq algorithm, etc. """ - if self.guess_completed is False and self.guess_fn is not None: + + if not self.guess_completed and self.guess_fn is not None: # Calling the guess function self.do_guess() @@ -208,7 +216,8 @@ def do_fit(self, **kwargs): else: p0 = self.prior[ind, :] - lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, self.folded_dataset[ind, :], + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, + self.folded_dataset[ind, :], return_cov=self.return_cov, p0=p0, **kwargs) fit_results.append(lazy_result) From 23fd1fd2b29149675528bc60dcf949c5f35880cb Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 17:01:22 -0500 Subject: [PATCH 18/43] SidFitter class is now functional and is now placed in sidpy.proc.fitter. Requires testing. --- setup.py | 2 +- sidpy/proc/__init__.py | 4 +- sidpy/proc/process.py | 334 ----------------------------------------- sidpy/sid/dataset.py | 2 + 4 files changed, 5 insertions(+), 337 deletions(-) delete mode 100644 sidpy/proc/process.py diff --git a/setup.py b/setup.py index 4578af14..0dc557b0 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ extras_require={ 'MPI': ["mpi4py"], 'File_Widgets': ['pyqt5'], - 'process': ['scipy'] + 'fitter': ['scipy'] }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these diff --git a/sidpy/proc/__init__.py b/sidpy/proc/__init__.py index f3154be3..b32d4864 100644 --- a/sidpy/proc/__init__.py +++ b/sidpy/proc/__init__.py @@ -3,6 +3,6 @@ """ from . import comp_utils -from . import process +from . import fitter -__all__ = ['comp_utils', 'process'] +__all__ = ['comp_utils', 'fitter'] diff --git a/sidpy/proc/process.py b/sidpy/proc/process.py deleted file mode 100644 index 36be4a85..00000000 --- a/sidpy/proc/process.py +++ /dev/null @@ -1,334 +0,0 @@ -""" -:class:`~Sidpy.proc.fitter.SidFitter` class that fits the specified dimension of a sidpy.dataset using the -user-specified fit function. An extension of scipy.optimise.curve_fit that works on sidpy.dataset - -Created on Mar 9, 2022 -@author: Rama Vasudevan, Mani Valleti -""" - -from dask.distributed import Client -import numpy as np -import dask -from ..sid import Dimension, Dataset -from ..sid.dimension import DimensionType - -try: - from scipy.optimize import curve_fit -except ModuleNotFoundError: - curve_fit = None - - -class SidFitter: - # An extension of the Process Class for Functional Fitting - def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): - """ - Parameters - ---------- - sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit - - fit_fn: (function) Function used for fitting. - Should take xvec as the first argument and parameters as the rest of the arguments. - Should return the function value at each of the points in the xvec - - xvec: (numpy ndarray or list of numpy ndarrays) (Optional) - Independent variable for fitting. Should be an array - If NOT provided, the dimension arrays are assumed to be xvecs - - ind_dims: (tuple) (Optional) Tuple with integer entries of the dimensions - over which to parallelize. These should be the independent variable for the fitting. - If NOT provided, it is assumed that all the non-spectral dimensions are independent dimensions. - - guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit - It takes the same arguments as the fitting function and should return the same type of results array. - If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. - - num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided to set - the priors for the parameters for the curve_fit function. - - return_cov: (bool) (default False) Returns the estimated covariance of fitting parameters. Confer - scipy.optimize.curve_fit for further details - - return_fit: (bool) (default False) Returns the fitted sidpy dataset using the optimal parameters - when set to true - - fit_parameter_labels: (list) (default None) List of parameter labels - - num_workers: (int) (default =2) Number of workers to use when setting up Dask client - - threads: (int) (default =2) Number of threads to use when setting up Dask client - - Returns: - ------- - sidpy.dataset: if return_cov and return_fit are both set to False - List: containing sidpy.dataset objects, if either of return_cov or return fit is set to True - - If multiple datasets are expected, the order of the returned datasets is - - [sidpy.dataset with mean parameter values, - sidpy.dataset with estimated covariances of the fitting parameters, - sidpy.dataset that is fit with the parameters obtained after fitting] - - """ - - if guess_fn is None: - if num_fit_parms is None: - raise ValueError("You did not supply a guess function, you must at least provide number of fit " - "parameters to set the priors for scipy.optimize.curve_fit") - self.dataset = sidpy_dataset # Sidpy dataset - self.fit_fn = fit_fn # function that takes xvec, *parameters and returns yvec at each value of xvec - self.num_fit_parms = num_fit_parms # int: number of fitting parameters - - if ind_dims is not None: - self.ind_dims = ind_dims # Tuple: containing indices of independent dimensions - else: - # All the dimensions that are not spectral will be considered as independent dimensions - ind_dims = [] - for i, dim in self.dataset._axes.items(): - if dim.dimension_type != DimensionType.SPECTRAL: - ind_dims.extend([i]) - self.ind_dims = tuple(ind_dims) - - # Make sure there is at least one spectral dimension - if len(self.ind_dims) == len(self.dataset.shape): - raise NotImplementedError('No Spectral (dependent) dimensions found to fit') - - # Let's get the dependent dims here - dep_dims = [] # Tuple: contains all the dependent dimensions. ind_dims+dep_dims = all_dims - for d in np.arange(len(self.dataset.shape)): - if d not in self.ind_dims: - dep_dims.extend([d]) - self.dep_dims = tuple(dep_dims) - - if xvec is None: - # 1D fit - if len(self.dep_dims) == 1: - dep_vec = np.array(self.dataset._axes[self.dep_dims[0]]) - # Multidimensional fit - else: - dep_vec = [] - for d in self.dep_dims: - dep_vec.append(np.array(self.dataset._axes[d])) - - if xvec is not None: - # 1D fit - if len(self.dep_dims) == 1: - if isinstance(xvec, np.ndarray): - dep_vec = xvec - elif isinstance(xvec, list): - dep_vec = np.array(xvec) - else: - raise TypeError('Please provide a np.ndarray or a list of independent vector values') - # Multidimensional fit - else: - if isinstance(xvec, list) and len(xvec) == len(self.dep_dims): - dep_vec = xvec - elif isinstance(xvec, list) and len(xvec) != len(self.dep_dims): - raise ValueError('The number of independent dimensions provided in the xvec do not match ' - 'with the number of dependent dimensions of the dataset') - else: - raise TypeError('Please provide a list of value-arrays corresponding to each dependent dimension') - - # Dealing with the meshgrid part of multidimensional fitting - if len(self.dep_dims) > 1: - self.dep_vec = [ar.ravel() for ar in np.meshgrid(*dep_dims)] - else: - self.dep_vec = dep_vec - - self._setup_calc() - self.guess_fn = guess_fn - self.prior = None # shape = [num_computations, num_fitting_parms] - self.fit_labels = fit_parameter_labels - self.num_workers = num_workers - self.threads = threads - self.guess_completed = False - self.return_fit = return_fit - self.return_cov = return_cov - - self.mean_fit_results = [] - if self.return_cov: - self.cov_fit_results = None - # set up dask client - self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) - - def _setup_calc(self): - self.fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed - self.num_computations = 1 - - # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions - # In other words make the argument 'ind_dims' optional - # if self.ind_dims is not None: - - for i in np.arange(self.dataset.ndim): - if i in self.ind_dims: - self.fold_order[0].extend([i]) - self.num_computations *= self.dataset.shape[i] - else: - self.fold_order.append([i]) - - self.folded_dataset = self.dataset.fold(dim_order=self.fold_order) - - # Here is the tricky part, dataset.unfold is designed to get back the original dataset with minimal loss of - # information. To do this, unfold utilizes the saved information while folding the original dataset. - # Here, we are going to tweak that information and use the unfold method on the dataset with fitted parameters. - - self._unfold_attr = {'dim_order_flattened': self.fold_order[0] + [len(self.fold_order[0])], - 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} - axes = self.dataset._axes.copy() - for i in self.ind_dims: - del axes[i] - self._unfold_attr['_axes'] = axes - - def do_guess(self): - """ - If a guess_fn is provided: Applies the guess_fn to get priors for the fitting parameters. - self.prior is set as the output of guess function at each of the ind_dims - - Returns: - None - ------- - - """ - guess_results = [] - for ind in range(self.num_computations): - lazy_result = dask.delayed(self.guess_fn)(self.dep_vec, self.folded_dataset[ind, :]) - guess_results.append(lazy_result) - - guess_results = dask.compute(*guess_results) - self.prior = np.squeeze(np.array(guess_results)) - self.num_fit_parms = self.prior.shape[-1] - self.guess_completed = True - - def do_fit(self, **kwargs): - """ - Perform the fit. - **kwargs: extra parameters passed to scipy.optimize.curve_fit, e.g. bounds, type of lsq algorithm, etc. - """ - - if not self.guess_completed and self.guess_fn is not None: - # Calling the guess function - self.do_guess() - - fit_results = [] - for ind in range(self.num_computations): - if self.prior is None: - p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - else: - p0 = self.prior[ind, :] - - lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, - self.folded_dataset[ind, :], - return_cov=self.return_cov, p0=p0, **kwargs) - fit_results.append(lazy_result) - - fit_results_comp = dask.compute(*fit_results) - - if not self.return_cov: - # in this case we can just dump it to an array because we only got the parameters back - self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) - - else: - # here we get back both: the parameter means and the covariance matrix! - self.mean_fit_results = np.squeeze( - np.array([fit_results_comp[ind][0] for ind in range(len(fit_results_comp))])) - self.cov_fit_results = np.squeeze( - np.array([fit_results_comp[ind][1] for ind in range(len(fit_results_comp))])) - - # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them - # Make a sidpy dataset - - mean_sid_dset = Dataset.from_array(self.mean_fit_results, title='Fitting_Map') - mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() - mean_sid_dset = mean_sid_dset.unfold() - - # Set the data type - mean_sid_dset.data_type = 'image_stack' # We may want to pass a new type - fit map - - # Add quantity and units - mean_sid_dset.units = self.dataset.units - mean_sid_dset.quantity = self.dataset.quantity - - # We set the last dimension, i.e., the dimension with the fit parameters - fit_dim = Dimension(np.arange(self.num_fit_parms), - name='fit_parms', units='a.u.', - quantity='fit_parameters', - dimension_type='temporal') - mean_sid_dset.set_dimension(len(mean_sid_dset.shape) - 1, fit_dim) - - # Here we deal with the covariance dataset - if self.return_cov: - # Make a sidpy dataset - cov_sid_dset = Dataset.from_array(self.cov_fit_results, title='Fitting_Map_Covariance') - fold_attr = self._unfold_attr.copy() - fold_attr['dim_order_flattened'] = fold_attr['dim_order_flattened'] + [ - len(fold_attr['dim_order_flattened'])] - fold_attr['shape_transposed'] = fold_attr['shape_transposed'][:-1] + [self.num_fit_parms] + \ - [self.num_fit_parms] - - cov_sid_dset.metadata['fold_attr'] = fold_attr - cov_sid_dset = cov_sid_dset.unfold() - - # Set the data type - cov_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map - - # Add quantity and units - cov_sid_dset.units = self.dataset.units - cov_sid_dset.quantity = self.dataset.quantity - - cov_dims = [Dimension(np.arange(self.num_fit_parms), - name='fit_cov_parms_x', units='a.u.', - quantity='fit_cov_parameters', - dimension_type='spectral'), - Dimension(np.arange(self.num_fit_parms), - name='fit_cov_parms_y', units='a.u.', - quantity='fit_cov_parameters', - dimension_type='spectral')] - - for i, dim in enumerate(cov_dims): - cov_sid_dset.set_dimension(i - 2 + len(cov_sid_dset.shape), dim) - - if self.return_cov and self.return_fit: - return [mean_sid_dset, cov_sid_dset, self.get_fitted_dataset()] - elif self.return_cov and not self.return_fit: - return [mean_sid_dset, cov_sid_dset] - elif not self.return_cov and self.return_fit: - return [mean_sid_dset, self.get_fitted_dataset()] - else: - return mean_sid_dset - - def get_fitted_dataset(self): - """This method returns the fitted dataset using the parameters generated by the fit function""" - fitted_dset = self.dataset.like_data(np.zeros_like(self.dataset.compute()), - title_prefix='fitted_') - fitted_dset_fold = fitted_dset.fold(dim_order=self.fold_order) - - # Here we make a roundtrip to numpy as earlier versions of dask did not support the assignments - # of the form dask_array[2] = 1 - - np_folded_arr = fitted_dset_fold.compute() - for i in range(np_folded_arr.shape[0]): - np_folded_arr[i] = self.fit_fn(self.dep_vec, *self.mean_fit_results[i]) - - fitted_sid_dset_folded = fitted_dset_fold.like_data(np_folded_arr, title=fitted_dset_fold.title) - fitted_sid_dset = fitted_sid_dset_folded.unfold() - - return fitted_sid_dset - - # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit - - @staticmethod - def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): - print(kwargs) - xvec = np.array(xvec) - yvec = np.array(yvec) - yvec = yvec.ravel() - xvec = xvec.ravel() - if curve_fit is None: - raise ModuleNotFoundError("scipy is not installed") - else: - popt, pcov = curve_fit(fit_fn, xvec, yvec, **kwargs) - - if return_cov: - return popt, pcov - else: - return popt diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 7bbfc089..4223977c 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -1313,6 +1313,8 @@ def unfold(self): unfolded_dset._axes = {} for i, dim in old_axes.items(): unfolded_dset.set_dimension(i, dim.copy()) + + del unfolded_dset.metadata['fold_attr'] return unfolded_dset # Following methods are to be edited From a6d65e08c9b84e02607d2e960d5b597fd28a19d0 Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 17:03:00 -0500 Subject: [PATCH 19/43] The process.py file under sidpy.proc is now renamed to fitter.py --- sidpy/proc/fitter.py | 358 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 358 insertions(+) create mode 100644 sidpy/proc/fitter.py diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py new file mode 100644 index 00000000..2e77ee93 --- /dev/null +++ b/sidpy/proc/fitter.py @@ -0,0 +1,358 @@ +""" +:class:`~sidpy.proc.fitter.SidFitter` class that fits the specified dimension of a sidpy.dataset using the +user-specified fit function. An extension of scipy.optimise.curve_fit that works on sidpy.dataset + +Created on Mar 9, 2022 +@author: Rama Vasudevan, Mani Valleti +""" + +from dask.distributed import Client +import numpy as np +import dask +import inspect +from ..sid import Dimension, Dataset +from ..sid.dimension import DimensionType + +try: + from scipy.optimize import curve_fit +except ModuleNotFoundError: + curve_fit = None + + +class SidFitter: + # An extension of the Process Class for Functional Fitting + def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, + return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): + """ + Parameters + ---------- + sidpy_dataset: (sidpy.Dataset) Sidpy dataset object to be fit + + fit_fn: (function) Function used for fitting. + Should take xvec as the first argument and parameters as the rest of the arguments. + Should return the function value at each of the points in the xvec + + xvec: (numpy ndarray or list of numpy ndarrays) (Optional) + Independent variable for fitting. Should be an array + If NOT provided, the dimension arrays are assumed to be xvecs + + ind_dims: (tuple) (Optional) Tuple with integer entries of the dimensions + over which to parallelize. These should be the independent variable for the fitting. + If NOT provided, it is assumed that all the non-spectral dimensions are independent dimensions. + + guess_fn: (function) (optional) This optional function should be utilized to generate priors for the full fit + It takes the same arguments as the fitting function and should return the same type of results array. + If the guess_fn is NOT provided, then the user MUST input the num_fit_parms. + + num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided to set + the priors for the parameters for the curve_fit function. + + return_cov: (bool) (default False) Returns the estimated covariance of fitting parameters. Confer + scipy.optimize.curve_fit for further details + + return_fit: (bool) (default False) Returns the fitted sidpy dataset using the optimal parameters + when set to true + + fit_parameter_labels: (list) (default None) List of parameter labels + + num_workers: (int) (default =2) Number of workers to use when setting up Dask client + + threads: (int) (default =2) Number of threads to use when setting up Dask client + + Returns: + ------- + sidpy.dataset: if return_cov and return_fit are both set to False + List: containing sidpy.dataset objects, if either of return_cov or return fit is set to True + + If multiple datasets are expected, the order of the returned datasets is + + [sidpy.dataset with mean parameter values, + sidpy.dataset with estimated covariances of the fitting parameters, + sidpy.dataset that is fit with the parameters obtained after fitting] + + """ + + if guess_fn is None: + if num_fit_parms is None: + raise ValueError("You did not supply a guess function, you must at least provide number of fit " + "parameters to set the priors for scipy.optimize.curve_fit") + self.dataset = sidpy_dataset # Sidpy dataset + self.fit_fn = fit_fn # function that takes xvec, *parameters and returns yvec at each value of xvec + self.num_fit_parms = num_fit_parms # int: number of fitting parameters + + if ind_dims is not None: + self.ind_dims = ind_dims # Tuple: containing indices of independent dimensions + else: + # All the dimensions that are not spectral will be considered as independent dimensions + ind_dims = [] + for i, dim in self.dataset._axes.items(): + if dim.dimension_type != DimensionType.SPECTRAL: + ind_dims.extend([i]) + self.ind_dims = tuple(ind_dims) + + # Make sure there is at least one spectral dimension + if len(self.ind_dims) == len(self.dataset.shape): + raise NotImplementedError('No Spectral (dependent) dimensions found to fit') + + # Let's get the dependent dims here + dep_dims = [] # Tuple: contains all the dependent dimensions. ind_dims+dep_dims = all_dims + for d in np.arange(len(self.dataset.shape)): + if d not in self.ind_dims: + dep_dims.extend([d]) + self.dep_dims = tuple(dep_dims) + + # xvec is not provided + if xvec is None: + # 1D fit + if len(self.dep_dims) == 1: + dep_vec = np.array(self.dataset._axes[self.dep_dims[0]]) + # Multidimensional fit + else: + dep_vec = [] + for d in self.dep_dims: + dep_vec.append(np.array(self.dataset._axes[d])) + + # xvec is provided + if xvec is not None: + # 1D fit + if len(self.dep_dims) == 1: + if isinstance(xvec, np.ndarray): + dep_vec = xvec + elif isinstance(xvec, list): + dep_vec = np.array(xvec) + else: + raise TypeError('Please provide a np.ndarray or a list of independent vector values') + # Multidimensional fit + else: + if isinstance(xvec, list) and len(xvec) == len(self.dep_dims): + dep_vec = xvec + elif isinstance(xvec, list) and len(xvec) != len(self.dep_dims): + raise ValueError('The number of independent dimensions provided in the xvec do not match ' + 'with the number of dependent dimensions of the dataset') + else: + raise TypeError('Please provide a list of value-arrays corresponding to each dependent dimension') + + # Dealing with the meshgrid part of multidimensional fitting + if len(self.dep_dims) > 1: + self.dep_vec = [ar.ravel() for ar in np.meshgrid(*dep_dims)] + else: + self.dep_vec = dep_vec + + self._setup_calc() + self.guess_fn = guess_fn + self.prior = None # shape = [num_computations, num_fitting_parms] + self.fit_labels = fit_parameter_labels + self.num_workers = num_workers + self.threads = threads + self.guess_completed = False + self.return_fit = return_fit + self.return_cov = return_cov + + self.mean_fit_results = [] + if self.return_cov: + self.cov_fit_results = None + # set up dask client + self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) + + def _setup_calc(self): + self.fold_order = [[]] # All the independent dimensions go into the first element and will be collapsed + self.num_computations = 1 + + # Here we have to come up with a way that treats the spatial dimensions as the independent dimensions + # In other words make the argument 'ind_dims' optional + # if self.ind_dims is not None: + + for i in np.arange(self.dataset.ndim): + if i in self.ind_dims: + self.fold_order[0].extend([i]) + self.num_computations *= self.dataset.shape[i] + else: + self.fold_order.append([i]) + + self.folded_dataset = self.dataset.fold(dim_order=self.fold_order) + + # Here is the tricky part, dataset.unfold is designed to get back the original dataset with minimal loss of + # information. To do this, unfold utilizes the saved information while folding the original dataset. + # Here, we are going to tweak that information and use the unfold method on the dataset with fitted parameters. + + self._unfold_attr = {'dim_order_flattened': self.fold_order[0] + [len(self.fold_order[0])], + 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} + axes = self.dataset._axes.copy() + for i in self.dep_dims: + del axes[i] + self._unfold_attr['_axes'] = axes + + def do_guess(self): + """ + If a guess_fn is provided: Applies the guess_fn to get priors for the fitting parameters. + self.prior is set as the output of guess function at each of the ind_dims + + Returns: + None + ------- + + """ + guess_results = [] + for ind in range(self.num_computations): + lazy_result = dask.delayed(self.guess_fn)(self.dep_vec, self.folded_dataset[ind, :]) + guess_results.append(lazy_result) + + guess_results = dask.compute(*guess_results) + self.prior = np.squeeze(np.array(guess_results)) + self.num_fit_parms = self.prior.shape[-1] + self.guess_completed = True + + def do_fit(self, **kwargs): + """ + Perform the fit. + **kwargs: extra parameters passed to scipy.optimize.curve_fit, e.g. bounds, type of lsq algorithm, etc. + """ + + if not self.guess_completed and self.guess_fn is not None: + # Calling the guess function + print('We are now calling the guess function') + guess_function_str = inspect.getsource(self.guess_fn) + self.do_guess() + + fit_results = [] + for ind in range(self.num_computations): + if self.prior is None: + p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) + guess_function_str = 'Not Provided' + else: + p0 = self.prior[ind, :] + + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, + self.folded_dataset[ind, :], + return_cov=self.return_cov, p0=p0, **kwargs) + fit_results.append(lazy_result) + + fit_results_comp = dask.compute(*fit_results) + + if not self.return_cov: + # in this case we can just dump it to an array because we only got the parameters back + self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) + + else: + # here we get back both: the parameter means and the covariance matrix! + self.mean_fit_results = np.squeeze( + np.array([fit_results_comp[ind][0] for ind in range(len(fit_results_comp))])) + self.cov_fit_results = np.squeeze( + np.array([fit_results_comp[ind][1] for ind in range(len(fit_results_comp))])) + + # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them + # Make a sidpy dataset + + mean_sid_dset = Dataset.from_array(self.mean_fit_results, title='Fitting_Map') + mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() + mean_sid_dset = mean_sid_dset.unfold() + + # Set the data type + mean_sid_dset.data_type = 'image_stack' # We may want to pass a new type - fit map + + # Add quantity and units + mean_sid_dset.units = self.dataset.units + mean_sid_dset.quantity = self.dataset.quantity + + # We set the last dimension, i.e., the dimension with the fit parameters + fit_dim = Dimension(np.arange(self.num_fit_parms), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='temporal') + mean_sid_dset.set_dimension(len(mean_sid_dset.shape) - 1, fit_dim) + + fit_parms_dict = {'fit_parameters_labels': self.fit_labels, + 'fitting_function': inspect.getsource(self.fit_fn), + 'guess_function': guess_function_str, + 'ind_dims': self.ind_dims, + } + mean_sid_dset.metadata = self.dataset.metadata.copy() + mean_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() + mean_sid_dset.original_metadata = self.dataset.original_metadata.copy() + + # Here we deal with the covariance dataset + if self.return_cov: + # Make a sidpy dataset + cov_sid_dset = Dataset.from_array(self.cov_fit_results, title='Fitting_Map_Covariance') + fold_attr = self._unfold_attr.copy() + fold_attr['dim_order_flattened'] = fold_attr['dim_order_flattened'] + [ + len(fold_attr['dim_order_flattened'])] + fold_attr['shape_transposed'] = fold_attr['shape_transposed'][:-1] + [self.num_fit_parms] + \ + [self.num_fit_parms] + + cov_sid_dset.metadata['fold_attr'] = fold_attr + cov_sid_dset = cov_sid_dset.unfold() + + # Set the data type + cov_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map + + # Add quantity and units + cov_sid_dset.units = self.dataset.units + cov_sid_dset.quantity = self.dataset.quantity + + cov_dims = [Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_x', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral'), + Dimension(np.arange(self.num_fit_parms), + name='fit_cov_parms_y', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral')] + + for i, dim in enumerate(cov_dims): + cov_sid_dset.set_dimension(i - 2 + len(cov_sid_dset.shape), dim) + + cov_sid_dset.metadata = self.dataset.metadata.copy() + cov_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() + cov_sid_dset.original_metadata = self.dataset.original_metadata.copy() + + if self.return_fit: + fit_dset = self.get_fitted_dataset() + fit_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() + + if self.return_cov and self.return_fit: + return [mean_sid_dset, cov_sid_dset, fit_dset] + elif self.return_cov and not self.return_fit: + return [mean_sid_dset, cov_sid_dset] + elif not self.return_cov and self.return_fit: + return [mean_sid_dset, fit_dset] + else: + return mean_sid_dset + + def get_fitted_dataset(self): + """This method returns the fitted dataset using the parameters generated by the fit function""" + fitted_dset = self.dataset.like_data(np.zeros_like(self.dataset.compute()), + title_prefix='fitted_') + fitted_dset_fold = fitted_dset.fold(dim_order=self.fold_order) + + # Here we make a roundtrip to numpy as earlier versions of dask did not support the assignments + # of the form dask_array[2] = 1 + + np_folded_arr = fitted_dset_fold.compute() + for i in range(np_folded_arr.shape[0]): + np_folded_arr[i] = self.fit_fn(self.dep_vec, *self.mean_fit_results[i]) + + fitted_sid_dset_folded = fitted_dset_fold.like_data(np_folded_arr, title=fitted_dset_fold.title) + fitted_sid_dset = fitted_sid_dset_folded.unfold() + fitted_sid_dset.original_metadata = self.dataset.original_metadata.copy() + + return fitted_sid_dset + + # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit + + @staticmethod + def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): + print(kwargs) + xvec = np.array(xvec) + yvec = np.array(yvec) + yvec = yvec.ravel() + xvec = xvec.ravel() + if curve_fit is None: + raise ModuleNotFoundError("scipy is not installed") + else: + popt, pcov = curve_fit(fit_fn, xvec, yvec, **kwargs) + + if return_cov: + return popt, pcov + else: + return popt From c2eb3eba27d1b5e51d6b9b7560cd7242e7de5fad Mon Sep 17 00:00:00 2001 From: Mani Date: Wed, 9 Mar 2022 17:11:03 -0500 Subject: [PATCH 20/43] Deleted a print statement in the fitter class --- sidpy/proc/fitter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 2e77ee93..51b88604 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -210,7 +210,6 @@ def do_fit(self, **kwargs): if not self.guess_completed and self.guess_fn is not None: # Calling the guess function - print('We are now calling the guess function') guess_function_str = inspect.getsource(self.guess_fn) self.do_guess() From 110952cef23e502b2bd44b789b6c13be6a612795 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 11 Mar 2022 16:26:49 -0500 Subject: [PATCH 21/43] sidFitter now returns an additional dataset with standard deviations of parameters --- sidpy/proc/fitter.py | 72 +++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 51b88604..c9125f49 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -22,7 +22,8 @@ class SidFitter: # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): + return_std=False, return_cov=False, return_fit=False, + fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters ---------- @@ -47,6 +48,9 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided to set the priors for the parameters for the curve_fit function. + return_std: (bool) (default False) Returns the dataset with estimated standard deviation of the parameter + values. Square roots of the diagonal of the covariance matrix. + return_cov: (bool) (default False) Returns the estimated covariance of fitting parameters. Confer scipy.optimize.curve_fit for further details @@ -145,12 +149,15 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self.num_workers = num_workers self.threads = threads self.guess_completed = False - self.return_fit = return_fit + self.return_std = return_std self.return_cov = return_cov + self.return_fit = return_fit self.mean_fit_results = [] if self.return_cov: self.cov_fit_results = None + if self.return_std: + self.std_fit_results = None # set up dask client self.client = Client(threads_per_worker=self.threads, n_workers=self.num_workers) @@ -223,22 +230,23 @@ def do_fit(self, **kwargs): lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, self.folded_dataset[ind, :], - return_cov=self.return_cov, p0=p0, **kwargs) + return_cov=(self.return_cov or self.return_std), + p0=p0, **kwargs) fit_results.append(lazy_result) fit_results_comp = dask.compute(*fit_results) - if not self.return_cov: - # in this case we can just dump it to an array because we only got the parameters back - self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) - - else: + if self.return_cov or self.return_std: # here we get back both: the parameter means and the covariance matrix! self.mean_fit_results = np.squeeze( np.array([fit_results_comp[ind][0] for ind in range(len(fit_results_comp))])) self.cov_fit_results = np.squeeze( np.array([fit_results_comp[ind][1] for ind in range(len(fit_results_comp))])) + else: + # in this case we can just dump it to an array because we only got the parameters back + self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) + # Here we have either the mean fit results or both mean and cov arrays. We make 2 sidpy dataset out of them # Make a sidpy dataset @@ -249,10 +257,6 @@ def do_fit(self, **kwargs): # Set the data type mean_sid_dset.data_type = 'image_stack' # We may want to pass a new type - fit map - # Add quantity and units - mean_sid_dset.units = self.dataset.units - mean_sid_dset.quantity = self.dataset.quantity - # We set the last dimension, i.e., the dimension with the fit parameters fit_dim = Dimension(np.arange(self.num_fit_parms), name='fit_parms', units='a.u.', @@ -269,6 +273,8 @@ def do_fit(self, **kwargs): mean_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() mean_sid_dset.original_metadata = self.dataset.original_metadata.copy() + cov_sid_dset, std_fit_dset, fit_dset = None, None, None + # Here we deal with the covariance dataset if self.return_cov: # Make a sidpy dataset @@ -283,11 +289,7 @@ def do_fit(self, **kwargs): cov_sid_dset = cov_sid_dset.unfold() # Set the data type - cov_sid_dset.data_type = self.dataset.data_type # We may want to pass a new type - fit map - - # Add quantity and units - cov_sid_dset.units = self.dataset.units - cov_sid_dset.quantity = self.dataset.quantity + cov_sid_dset.data_type = 'IMAGE_4D' # We may want to pass a new type - fit map cov_dims = [Dimension(np.arange(self.num_fit_parms), name='fit_cov_parms_x', units='a.u.', @@ -305,18 +307,40 @@ def do_fit(self, **kwargs): cov_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() cov_sid_dset.original_metadata = self.dataset.original_metadata.copy() + # Here is the std_dev dataset + if self.return_std: + self.std_fit_results = np.diagonal(self.cov_fit_results, axis1=-2, axis2=-1) + std_fit_dset = Dataset.from_array(self.std_fit_results, title='std_dev') + std_fit_dset.metadata['fold_attr'] = self._unfold_attr.copy() + std_fit_dset = std_fit_dset.unfold() + + # Set the data type + std_fit_dset.data_type = 'image_stack' # We may want to pass a new type - fit map + + # We set the last dimension, i.e., the dimension with the fit parameters + fit_dim = Dimension(np.arange(self.num_fit_parms), + name='std_dev', units='a.u.', + quantity='std_dev_fit_parms', + dimension_type='temporal') + std_fit_dset.set_dimension(len(std_fit_dset.shape) - 1, fit_dim) + + std_fit_dset.metadata = self.dataset.metadata.copy() + std_fit_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() + std_fit_dset.original_metadata = self.dataset.original_metadata.copy() + + # Fitted dset if self.return_fit: fit_dset = self.get_fitted_dataset() fit_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() - if self.return_cov and self.return_fit: - return [mean_sid_dset, cov_sid_dset, fit_dset] - elif self.return_cov and not self.return_fit: - return [mean_sid_dset, cov_sid_dset] - elif not self.return_cov and self.return_fit: - return [mean_sid_dset, fit_dset] + results = [mean_sid_dset, cov_sid_dset, std_fit_dset, fit_dset] + inds = [True, self.return_cov, self.return_std, self.return_fit] + results = [results[i] for i in range(len(inds)) if inds[i]] + + if len(results) == 0: + return results[0] else: - return mean_sid_dset + return results def get_fitted_dataset(self): """This method returns the fitted dataset using the parameters generated by the fit function""" From ca0b0b52d716f54ad523bda4a5696c87a2e6e5ba Mon Sep 17 00:00:00 2001 From: Rama Vasudevan Date: Fri, 11 Mar 2022 16:41:27 -0500 Subject: [PATCH 22/43] small edits to fitter and dataset_viz --- sidpy/proc/fitter.py | 4 +++- sidpy/viz/dataset_viz.py | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 51b88604..c3eb9473 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -227,7 +227,8 @@ def do_fit(self, **kwargs): fit_results.append(lazy_result) fit_results_comp = dask.compute(*fit_results) - + self.client.close() + if not self.return_cov: # in this case we can just dump it to an array because we only got the parameters back self.mean_fit_results = np.squeeze(np.array(fit_results_comp)) @@ -268,6 +269,7 @@ def do_fit(self, **kwargs): mean_sid_dset.metadata = self.dataset.metadata.copy() mean_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() mean_sid_dset.original_metadata = self.dataset.original_metadata.copy() + mean_sid_dset.fit_dataset = True #We are going to make this attribute for fit datasets # Here we deal with the covariance dataset if self.return_cov: diff --git a/sidpy/viz/dataset_viz.py b/sidpy/viz/dataset_viz.py index 2d6d191b..4bc20b3a 100644 --- a/sidpy/viz/dataset_viz.py +++ b/sidpy/viz/dataset_viz.py @@ -316,15 +316,26 @@ def __init__(self, dset, figure=None, **kwargs): # self.axis = self.fig.add_axes([0.0, 0.2, .9, .7]) self.ind = 0 + self.plot_fit_labels = False self.number_of_slices = self.dset.shape[self.stack_dim] self.axis = None self.plot_image(**kwargs) self.axis = plt.gca() if self.set_title: - self.axis.set_title('image stack: ' + dset.title + '\n use scroll wheel to navigate images') - self.img.axes.figure.canvas.mpl_connect('scroll_event', self._onscroll) + if 'fit_dataset' in dir(dset): + if dset.fit_dataset: + if dset.metadata['fit_parms_dict']['fit_parameters_labels'] is not None: + self.plot_fit_labels = True + img_titles = dset.metadata['fit_parms_dict']['fit_parameters_labels'] + self.image_titles = ['Fitting Parm: ' + img_titles[k] for k in range(len(img_titles))] + else: + self.image_titles = 'Fitting Maps: ' + dset.title + '\n use scroll wheel to navigate images' + else: + self.image_titles = 'Image stack: ' + dset.title + '\n use scroll wheel to navigate images' + #self.axis.set_title(image_titles) + self.img.axes.figure.canvas.mpl_connect('scroll_event', self._onscroll) import ipywidgets as iwgt self.play = iwgt.Play( @@ -436,7 +447,10 @@ def _update(self, frame=0): self.selection[self.stack_dim] = slice(frame, frame + 1) self.img.set_data((self.dset[tuple(self.selection)].squeeze()).T) self.img.axes.figure.canvas.draw_idle() - + if self.plot_fit_labels: + self.axis.set_title(self.image_titles[frame]) + else: + self.axis.set_title(self.image_titles) class SpectralImageVisualizer(object): """ From b024b4bd6db294a980d4d80c966dc4ae8b10f44e Mon Sep 17 00:00:00 2001 From: Rama Vasudevan Date: Fri, 11 Mar 2022 16:45:50 -0500 Subject: [PATCH 23/43] close the client after the fit --- sidpy/proc/fitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index d3cb34cc..6fccdf25 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -235,6 +235,7 @@ def do_fit(self, **kwargs): fit_results.append(lazy_result) fit_results_comp = dask.compute(*fit_results) + self.client.close() if self.return_cov or self.return_std: # here we get back both: the parameter means and the covariance matrix! From cdc27505a3f543cb2ea4088be493b5de1c98a3d8 Mon Sep 17 00:00:00 2001 From: RamaKVasudevan Date: Fri, 18 Mar 2022 15:49:45 -0400 Subject: [PATCH 24/43] adjust file writing to use tempfile --- tests/hdf/test_hdf_utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/hdf/test_hdf_utils.py b/tests/hdf/test_hdf_utils.py index abaa59f2..d3c14f43 100644 --- a/tests/hdf/test_hdf_utils.py +++ b/tests/hdf/test_hdf_utils.py @@ -7,6 +7,7 @@ from __future__ import division, print_function, unicode_literals, \ absolute_import import unittest +import tempfile import os import sys import tempfile @@ -19,6 +20,7 @@ from sidpy.base.dict_utils import flatten_dict from sidpy.hdf import hdf_utils + from . import data_utils if sys.version_info.major == 3: @@ -638,8 +640,9 @@ def test_wrong_type(self): class TestValidateH5ObjsInSameFile(unittest.TestCase): def test_diff_file(self): - file_path_1 = 'source.h5' - file_path_2 = 'sink.h5' + with tempfile.TemporaryDirectory() as tmp_dir: + file_path_1 = tmp_dir + 'source.h5' + file_path_2 = tmp_dir + 'sink.h5' data_utils.delete_existing_file(file_path_1) h5_f1 = h5py.File(file_path_1, mode='w') h5_main = h5_f1.create_dataset('main', data=np.arange(5)) @@ -649,8 +652,8 @@ def test_diff_file(self): with self.assertRaises(ValueError): hdf_utils.validate_h5_objs_in_same_h5_file(h5_main, h5_other) - os.remove(file_path_1) - os.remove(file_path_2) + #os.remove(file_path_1) + #os.remove(file_path_2) def test_same_file(self): file_path = 'test_same_file.h5' From 50d14d5f24c27912e1b11c5352069649683b150f Mon Sep 17 00:00:00 2001 From: RamaKVasudevan Date: Fri, 18 Mar 2022 16:14:12 -0400 Subject: [PATCH 25/43] title assertion --- tests/viz/test_dataset_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/viz/test_dataset_plot.py b/tests/viz/test_dataset_plot.py index 2884470c..b5911007 100644 --- a/tests/viz/test_dataset_plot.py +++ b/tests/viz/test_dataset_plot.py @@ -254,7 +254,7 @@ def test_plot(self): data = view.axes[0].images[0].get_array().data self.assertTrue(np.allclose(data.shape, x.shape[1:])) - self.assertEqual(view.axes[0].get_title(), 'image stack: generic\n use scroll wheel to navigate images') + self.assertEqual(view.axes[0].get_title(), 'Image stack: generic\n use scroll wheel to navigate images') self.assertEqual(f"{dset.x.quantity} ({dset.x.units})", view.axes[0].get_xlabel()) From e306c4c1f24571932244900c9df46917e856d3dd Mon Sep 17 00:00:00 2001 From: RamaKVasudevan Date: Fri, 25 Mar 2022 16:23:33 -0400 Subject: [PATCH 26/43] small update to data_viz --- sidpy/viz/dataset_viz.py | 89 +++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/sidpy/viz/dataset_viz.py b/sidpy/viz/dataset_viz.py index 4bc20b3a..09e81152 100644 --- a/sidpy/viz/dataset_viz.py +++ b/sidpy/viz/dataset_viz.py @@ -319,9 +319,7 @@ def __init__(self, dset, figure=None, **kwargs): self.plot_fit_labels = False self.number_of_slices = self.dset.shape[self.stack_dim] - self.axis = None - self.plot_image(**kwargs) - self.axis = plt.gca() + if self.set_title: if 'fit_dataset' in dir(dset): if dset.fit_dataset: @@ -329,51 +327,56 @@ def __init__(self, dset, figure=None, **kwargs): self.plot_fit_labels = True img_titles = dset.metadata['fit_parms_dict']['fit_parameters_labels'] self.image_titles = ['Fitting Parm: ' + img_titles[k] for k in range(len(img_titles))] + else: + self.image_titles = 'Fitting Maps: ' + dset.title + '\n use scroll wheel to navigate images' else: self.image_titles = 'Fitting Maps: ' + dset.title + '\n use scroll wheel to navigate images' else: self.image_titles = 'Image stack: ' + dset.title + '\n use scroll wheel to navigate images' - - #self.axis.set_title(image_titles) - self.img.axes.figure.canvas.mpl_connect('scroll_event', self._onscroll) - import ipywidgets as iwgt - self.play = iwgt.Play( - value=0, - min=0, - max=self.number_of_slices, - step=1, - interval=500, - description="Press play", - disabled=False - ) - self.slider = iwgt.IntSlider( - value=0, - min=0, - max=self.number_of_slices, - continuous_update=False, - description="Frame:") - # set the slider function - iwgt.interactive(self._update, frame=self.slider) - # link slider and play function - iwgt.jslink((self.play, 'value'), (self.slider, 'value')) - - # We add a button to average the images - self.button = iwgt.widgets.ToggleButton( - value=False, - description='Average', - disabled=False, - button_style='', # 'success', 'info', 'warning', 'danger' or '' - tooltip='Average Images of Stack') - - self.button.observe(self._average_slices, 'value') - - # set play and slider widgets next to each other - widg = iwgt.HBox([self.play, self.slider, self.button]) - display(widg) - - # self.anim = animation.FuncAnimation(self.fig, self._updatefig, interval=200, blit=False, repeat=True) - self._update() + self.axis = None + self.plot_image(**kwargs) + self.axis = plt.gca() + #self.axis.set_title(image_titles) + self.img.axes.figure.canvas.mpl_connect('scroll_event', self._onscroll) + + import ipywidgets as iwgt + self.play = iwgt.Play( + value=0, + min=0, + max=self.number_of_slices, + step=1, + interval=500, + description="Press play", + disabled=False + ) + self.slider = iwgt.IntSlider( + value=0, + min=0, + max=self.number_of_slices, + continuous_update=False, + description="Frame:") + # set the slider function + iwgt.interactive(self._update, frame=self.slider) + # link slider and play function + iwgt.jslink((self.play, 'value'), (self.slider, 'value')) + + # We add a button to average the images + self.button = iwgt.widgets.ToggleButton( + value=False, + description='Average', + disabled=False, + button_style='', # 'success', 'info', 'warning', 'danger' or '' + tooltip='Average Images of Stack') + + self.button.observe(self._average_slices, 'value') + + # set play and slider widgets next to each other + widg = iwgt.HBox([self.play, self.slider, self.button]) + display(widg) + + # self.anim = animation.FuncAnimation(self.fig, self._updatefig, interval=200, blit=False, repeat=True) + self._update() def plot_image(self, **kwargs): From c04d3158eab941d7a6530b4f8a1ca75de5b7c25f Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 26 Mar 2022 14:39:48 -0400 Subject: [PATCH 27/43] Added get_km_priors method to the fitter class. This uses sklearn.optimize.kMeans to get better priors for the fitting function --- sidpy/proc/fitter.py | 41 +++++++++++++++++++++++++++++++++++++++-- sidpy/sid/dataset.py | 1 - 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index c9125f49..fe84ac5c 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -12,6 +12,7 @@ import inspect from ..sid import Dimension, Dataset from ..sid.dimension import DimensionType +from sklearn.cluster import KMeans try: from scipy.optimize import curve_fit @@ -22,7 +23,7 @@ class SidFitter: # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - return_std=False, return_cov=False, return_fit=False, + km_guess=False, return_std=False, return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters @@ -48,6 +49,10 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non num_fit_parms: (int) Number of fitting parameters. This is needed IF the guess function is not provided to set the priors for the parameters for the curve_fit function. + km_guess: (bool) (default False) When set to True: Divides the spectra into clusters using + sklearn.optimize.kMeans, applies the fitting function on the cluster centers, + uses the results as priors to each spectrum of the cluster. + return_std: (bool) (default False) Returns the dataset with estimated standard deviation of the parameter values. Square roots of the diagonal of the covariance matrix. @@ -142,6 +147,9 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non else: self.dep_vec = dep_vec + self.km_guess = km_guess + if self.km_guess: + self.km_prior = None self._setup_calc() self.guess_fn = guess_fn self.prior = None # shape = [num_computations, num_fitting_parms] @@ -361,7 +369,36 @@ def get_fitted_dataset(self): return fitted_sid_dset - # Might be a good idea to add a default fit function like np.polyfit or scipy.curve_fit + def get_km_priors(self): + shape = self.folded_dataset.shape # We get the shape of the folded dataset + # Our prior_dset will have the same shape except for the last dimension whose size will be equal to number of + # fitting parameters + dim_order = [[0], [i + 1 for i in range(len(shape) - 1)]] + # We are using the fold function in case we have a multidimensional fit. + # In that case we need all the spectral dimensions collapsed into a single dimension for kMeans + # In case of a 1D fit the next line essentially does nothing. + km_dset = self.folded_dataset.fold(dim_order) + + n_clus = self.num_computations / 100 + km = KMeans(n_clusters=n_clus, random_state=0).fit(km_dset.compute()) + labels, centers = km.labels_, km.cluster_centers_ + + for i, cen in enumerate(centers): + if self.guess_fn is not None: + p0 = self.guess_fn(self.dep_vec, centers) + else: + p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) + + self.km_prior = np.zeros([n_clus, self.num_fit_parms]) + for i, cen in enumerate(centers): + if self.guess_fn is not None: + p0 = self.guess_fn(self.dep_vec, cen) + else: + p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) + + self.km_prior[i] = SidFitter.default_curve_fit(self.fit_fn, self.dep_vec, cen, + return_cov=False, + p0=p0, maxfev=10000) @staticmethod def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): diff --git a/sidpy/sid/dataset.py b/sidpy/sid/dataset.py index 4223977c..008abafa 100644 --- a/sidpy/sid/dataset.py +++ b/sidpy/sid/dataset.py @@ -1105,7 +1105,6 @@ def reshape(self, *shape, merge_chunks=True): new_shape = shape[0] else: new_shape = shape - print(new_shape) return super().reshape(*new_shape, merge_chunks) @reduce_dims From e7cdb92390cba23a355b384554d712d81610de38 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 26 Mar 2022 15:18:48 -0400 Subject: [PATCH 28/43] Fixed a bug in the fitter class caused by dataset.unfold method. --- sidpy/proc/fitter.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index fe84ac5c..16a07453 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -190,11 +190,13 @@ def _setup_calc(self): # information. To do this, unfold utilizes the saved information while folding the original dataset. # Here, we are going to tweak that information and use the unfold method on the dataset with fitted parameters. - self._unfold_attr = {'dim_order_flattened': self.fold_order[0] + [len(self.fold_order[0])], + self._unfold_attr = {'dim_order_flattened': list(np.arange(len(self.dataset.shape))), 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} - axes = self.dataset._axes.copy() - for i in self.dep_dims: - del axes[i] + axes, j = {}, 0 + for i, dim in self.dataset._axes.items(): + if not i in self.dep_dims: + axes[j] = dim + j += 1 self._unfold_attr['_axes'] = axes def do_guess(self): From f36b1d6ec11139c7fda90e36046d87fe91a2dcf0 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 26 Mar 2022 17:44:29 -0400 Subject: [PATCH 29/43] Added km_guess as an attribute to the fitter class. When set to true uses sklearn.optimize.kMeans to form better priors to the fitting function --- sidpy/proc/fitter.py | 80 +++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 86406415..e8abea7c 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -12,18 +12,23 @@ import inspect from ..sid import Dimension, Dataset from ..sid.dimension import DimensionType -from sklearn.cluster import KMeans + try: from scipy.optimize import curve_fit except ModuleNotFoundError: curve_fit = None +try: + from sklearn.cluster import KMeans +except: + kMeans = None + class SidFitter: # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - km_guess=False, return_std=False, return_cov=False, return_fit=False, + km_guess=False, n_clus=None, return_std=False, return_cov=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters @@ -53,6 +58,9 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non sklearn.optimize.kMeans, applies the fitting function on the cluster centers, uses the results as priors to each spectrum of the cluster. + n_clus: (int) (default None) Used only when km_guess is set to True. Determines the number of clusters to be + formed for sklearn.optimize.kmeans. If not provided then n_clus = self.num_computations/100 + return_std: (bool) (default False) Returns the dataset with estimated standard deviation of the parameter values. Square roots of the diagonal of the covariance matrix. @@ -150,6 +158,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self.km_guess = km_guess if self.km_guess: self.km_prior = None + self.km_labels = None self._setup_calc() self.guess_fn = guess_fn self.prior = None # shape = [num_computations, num_fitting_parms] @@ -231,21 +240,34 @@ def do_fit(self, **kwargs): self.do_guess() fit_results = [] - for ind in range(self.num_computations): - if self.prior is None: - p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - guess_function_str = 'Not Provided' - else: - p0 = self.prior[ind, :] + if not self.km_prior: + for ind in range(self.num_computations): + if self.prior is None: + p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) + guess_function_str = 'Not Provided' + else: + p0 = self.prior[ind, :] - lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, - self.folded_dataset[ind, :], - return_cov=(self.return_cov or self.return_std), - p0=p0, **kwargs) - fit_results.append(lazy_result) + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, + self.folded_dataset[ind, :], + return_cov=(self.return_cov or self.return_std), + p0=p0, **kwargs) + fit_results.append(lazy_result) - fit_results_comp = dask.compute(*fit_results) - self.client.close() + fit_results_comp = dask.compute(*fit_results) + self.client.close() + + else: + self.get_km_priors() + for ind in range(self.num_computations): + lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, + self.folded_dataset[ind, :], + return_cov=(self.return_cov or self.return_std), + p0=self.km_prior[self.km_labels[ind]], **kwargs) + fit_results.append(lazy_result) + + fit_results_comp = dask.compute(*fit_results) + self.client.close() if self.return_cov or self.return_std: # here we get back both: the parameter means and the covariance matrix! @@ -283,7 +305,7 @@ def do_fit(self, **kwargs): mean_sid_dset.metadata = self.dataset.metadata.copy() mean_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() mean_sid_dset.original_metadata = self.dataset.original_metadata.copy() - mean_sid_dset.fit_dataset = True #We are going to make this attribute for fit datasets + mean_sid_dset.fit_dataset = True # We are going to make this attribute for fit datasets cov_sid_dset, std_fit_dset, fit_dset = None, None, None @@ -383,26 +405,24 @@ def get_km_priors(self): # In case of a 1D fit the next line essentially does nothing. km_dset = self.folded_dataset.fold(dim_order) - n_clus = self.num_computations / 100 - km = KMeans(n_clusters=n_clus, random_state=0).fit(km_dset.compute()) - labels, centers = km.labels_, km.cluster_centers_ + n_clus = self.num_computations / 100 # Take care of number of cluster centers + + if kMeans is None: + raise ModuleNotFoundError("sklearn is not installed") + else: + km = KMeans(n_clusters=n_clus, random_state=0).fit(km_dset.compute()) + self.km_labels, centers = km.labels_, km.cluster_centers_ + self.km_prior = np.zeros([n_clus, self.num_fit_parms]) for i, cen in enumerate(centers): if self.guess_fn is not None: - p0 = self.guess_fn(self.dep_vec, centers) + p0 = self.guess_fn(self.dep_vec, cen) else: p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - self.km_prior = np.zeros([n_clus, self.num_fit_parms]) - for i, cen in enumerate(centers): - if self.guess_fn is not None: - p0 = self.guess_fn(self.dep_vec, cen) - else: - p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - - self.km_prior[i] = SidFitter.default_curve_fit(self.fit_fn, self.dep_vec, cen, - return_cov=False, - p0=p0, maxfev=10000) + self.km_prior[i] = SidFitter.default_curve_fit(self.fit_fn, self.dep_vec, cen, + return_cov=False, + p0=p0, maxfev=10000) @staticmethod def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): From 9614e645b7646cc9ca0f671abb1128883930dc00 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 26 Mar 2022 18:07:39 -0400 Subject: [PATCH 30/43] Fixed some bugs in get_km_priors method inside the fitter class --- sidpy/proc/fitter.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index e8abea7c..cd362805 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -13,7 +13,6 @@ from ..sid import Dimension, Dataset from ..sid.dimension import DimensionType - try: from scipy.optimize import curve_fit except ModuleNotFoundError: @@ -21,8 +20,8 @@ try: from sklearn.cluster import KMeans -except: - kMeans = None +except ModuleNotFoundError: + KMeans = None class SidFitter: @@ -157,7 +156,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self.km_guess = km_guess if self.km_guess: - self.km_prior = None + self.km_priors = None self.km_labels = None self._setup_calc() self.guess_fn = guess_fn @@ -233,18 +232,19 @@ def do_fit(self, **kwargs): Perform the fit. **kwargs: extra parameters passed to scipy.optimize.curve_fit, e.g. bounds, type of lsq algorithm, etc. """ - - if not self.guess_completed and self.guess_fn is not None: - # Calling the guess function + if self.guess_fn is not None: guess_function_str = inspect.getsource(self.guess_fn) - self.do_guess() + else: + guess_function_str = 'Not Provided' fit_results = [] - if not self.km_prior: + if not self.km_guess: + if not self.guess_completed and self.guess_fn is not None: + self.do_guess() + for ind in range(self.num_computations): if self.prior is None: p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - guess_function_str = 'Not Provided' else: p0 = self.prior[ind, :] @@ -263,7 +263,8 @@ def do_fit(self, **kwargs): lazy_result = dask.delayed(SidFitter.default_curve_fit)(self.fit_fn, self.dep_vec, self.folded_dataset[ind, :], return_cov=(self.return_cov or self.return_std), - p0=self.km_prior[self.km_labels[ind]], **kwargs) + p0=self.km_priors[self.km_labels[ind]], + **kwargs) fit_results.append(lazy_result) fit_results_comp = dask.compute(*fit_results) @@ -405,24 +406,26 @@ def get_km_priors(self): # In case of a 1D fit the next line essentially does nothing. km_dset = self.folded_dataset.fold(dim_order) - n_clus = self.num_computations / 100 # Take care of number of cluster centers + n_clus = int(self.num_computations / 100) # Take care of number of cluster centers - if kMeans is None: + if KMeans is None: raise ModuleNotFoundError("sklearn is not installed") else: km = KMeans(n_clusters=n_clus, random_state=0).fit(km_dset.compute()) self.km_labels, centers = km.labels_, km.cluster_centers_ - self.km_prior = np.zeros([n_clus, self.num_fit_parms]) + km_priors = [] for i, cen in enumerate(centers): if self.guess_fn is not None: p0 = self.guess_fn(self.dep_vec, cen) else: p0 = np.random.normal(loc=0.5, scale=0.1, size=self.num_fit_parms) - self.km_prior[i] = SidFitter.default_curve_fit(self.fit_fn, self.dep_vec, cen, - return_cov=False, - p0=p0, maxfev=10000) + km_priors.append(SidFitter.default_curve_fit(self.fit_fn, self.dep_vec, cen, + return_cov=False, + p0=p0, maxfev=10000)) + self.km_priors = np.array(km_priors) + self.num_fit_parms = self.km_priors.shape[-1] @staticmethod def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): From e5ac07453c4d16519ca7069ecae75edc58d78efd Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 26 Mar 2022 18:23:11 -0400 Subject: [PATCH 31/43] Added the argument n_clus to the fitter class --- sidpy/proc/fitter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index cd362805..3e7da518 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -158,6 +158,8 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non if self.km_guess: self.km_priors = None self.km_labels = None + self.n_clus = n_clus + self._setup_calc() self.guess_fn = guess_fn self.prior = None # shape = [num_computations, num_fitting_parms] @@ -406,12 +408,12 @@ def get_km_priors(self): # In case of a 1D fit the next line essentially does nothing. km_dset = self.folded_dataset.fold(dim_order) - n_clus = int(self.num_computations / 100) # Take care of number of cluster centers - if KMeans is None: raise ModuleNotFoundError("sklearn is not installed") else: - km = KMeans(n_clusters=n_clus, random_state=0).fit(km_dset.compute()) + if self.n_clus is None: + self.n_clus = int(self.num_computations / 100) + km = KMeans(n_clusters=self.n_clus, random_state=0).fit(km_dset.compute()) self.km_labels, centers = km.labels_, km.cluster_centers_ km_priors = [] From 0196c4b715d3ac4305d08be565c46f631b430923 Mon Sep 17 00:00:00 2001 From: RamaKVasudevan Date: Fri, 1 Apr 2022 15:08:54 -0400 Subject: [PATCH 32/43] print statement removed --- sidpy/proc/fitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 6fccdf25..013b325e 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -253,6 +253,7 @@ def do_fit(self, **kwargs): mean_sid_dset = Dataset.from_array(self.mean_fit_results, title='Fitting_Map') mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() + print('mean_sid_dset is of shape {}'.format(mean_sid_dset.shape)) mean_sid_dset = mean_sid_dset.unfold() # Set the data type From eecfe77710ccf146e61a0d8601a9d9bf5faa0034 Mon Sep 17 00:00:00 2001 From: RamaKVasudevan Date: Fri, 1 Apr 2022 15:14:26 -0400 Subject: [PATCH 33/43] remove print --- sidpy/proc/fitter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index c4d15063..3e7da518 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -288,7 +288,6 @@ def do_fit(self, **kwargs): mean_sid_dset = Dataset.from_array(self.mean_fit_results, title='Fitting_Map') mean_sid_dset.metadata['fold_attr'] = self._unfold_attr.copy() - print('mean_sid_dset is of shape {}'.format(mean_sid_dset.shape)) mean_sid_dset = mean_sid_dset.unfold() # Set the data type From fbd6453fed54ae5d427b6b4887ed38ebd09b21f6 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 1 Apr 2022 15:22:46 -0400 Subject: [PATCH 34/43] test_dataset.py has been updated to replace the dataset attribute 'name' with 'title' --- tests/sid/test_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/sid/test_dataset.py b/tests/sid/test_dataset.py index 38997a4a..454630a7 100644 --- a/tests/sid/test_dataset.py +++ b/tests/sid/test_dataset.py @@ -25,7 +25,7 @@ generic_attributes = ['title', 'quantity', 'units', 'modality', 'source'] -def validate_dataset_properties(self, dataset, values, name='generic', +def validate_dataset_properties(self, dataset, values, title='generic', quantity='generic', units='generic', modality='generic', source='generic', dimension_dict=None, data_type=DataType.UNKNOWN, @@ -183,7 +183,7 @@ def test_user_defined_parms(self): descriptor.metadata = test_dict.copy() descriptor.original_metadata = test_dict.copy() - validate_dataset_properties(self, descriptor, np.arange(3), name='test', + validate_dataset_properties(self, descriptor, np.arange(3), title='test', quantity='test', units='test', modality='test', source='test', dimension_dict=None, data_type=DataType.UNKNOWN, @@ -211,19 +211,19 @@ def test_invalid_main_types(self): def test_numpy_array_input(self): x = np.ones([3, 4, 5]) - descriptor = Dataset.from_array(x, name='test') + descriptor = Dataset.from_array(x, title='test') self.assertEqual(descriptor.shape, x.shape) # TODO: call validate_dataset_properties instead def test_dask_array_input(self): x = da.zeros([3, 4], chunks='auto') - descriptor = Dataset.from_array(x, chunks='auto', name='test') + descriptor = Dataset.from_array(x, chunks='auto', title='test') self.assertEqual(descriptor.shape, x.shape) # TODO: call validate_dataset_properties instead def test_list_input(self): x = [[3, 4, 6], [5, 6, 7]] - descriptor = Dataset.from_array(x, name='test') + descriptor = Dataset.from_array(x, title='test') self.assertEqual(descriptor.shape, np.array(x).shape) # TODO: call validate_dataset_properties instead From f3ef3259908448b8e5dec08eb790358a8034a4cc Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 1 Apr 2022 17:12:23 -0400 Subject: [PATCH 35/43] Tests written for fitting 3D dataset with 1 spectral dimension --- sidpy/proc/fitter.py | 6 +- tests/proc/test_fitter.py | 180 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 tests/proc/test_fitter.py diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 3e7da518..9cd2fb92 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -303,12 +303,11 @@ def do_fit(self, **kwargs): fit_parms_dict = {'fit_parameters_labels': self.fit_labels, 'fitting_function': inspect.getsource(self.fit_fn), 'guess_function': guess_function_str, - 'ind_dims': self.ind_dims, + 'ind_dims': self.ind_dims } mean_sid_dset.metadata = self.dataset.metadata.copy() mean_sid_dset.metadata['fit_parms_dict'] = fit_parms_dict.copy() mean_sid_dset.original_metadata = self.dataset.original_metadata.copy() - mean_sid_dset.fit_dataset = True # We are going to make this attribute for fit datasets cov_sid_dset, std_fit_dset, fit_dset = None, None, None @@ -347,7 +346,7 @@ def do_fit(self, **kwargs): # Here is the std_dev dataset if self.return_std: self.std_fit_results = np.diagonal(self.cov_fit_results, axis1=-2, axis2=-1) - std_fit_dset = Dataset.from_array(self.std_fit_results, title='std_dev') + std_fit_dset = Dataset.from_array(self.std_fit_results, title='Fitting_Map_std_dev') std_fit_dset.metadata['fold_attr'] = self._unfold_attr.copy() std_fit_dset = std_fit_dset.unfold() @@ -431,7 +430,6 @@ def get_km_priors(self): @staticmethod def default_curve_fit(fit_fn, xvec, yvec, return_cov=True, **kwargs): - print(kwargs) xvec = np.array(xvec) yvec = np.array(yvec) yvec = yvec.ravel() diff --git a/tests/proc/test_fitter.py b/tests/proc/test_fitter.py new file mode 100644 index 00000000..39440e3e --- /dev/null +++ b/tests/proc/test_fitter.py @@ -0,0 +1,180 @@ +import sys +import unittest +import numpy as np + +sys.path.insert(0, "../../sidpy/") + +from sidpy.proc.fitter import SidFitter +from sidpy import Dataset, Dimension, DataType +import sidpy as sid +from ..sid.test_dataset import validate_dataset_properties +import inspect + + +class Test_3D_dset_1Dfit(unittest.TestCase): + def setUp(self): + self.data_set_3D, self.xvec = make_3D_dataset(shape=(10, 10, 32)) + fitter = SidFitter(self.data_set_3D, xvec=self.xvec, + fit_fn=return_quad, guess_fn=guess_quad, num_workers=8, + threads=2, return_cov=True, return_fit=True, return_std=True, + km_guess=True, n_clus=10) + + lb, ub = [-50, -50, -50], [50, 50, 50] + self.fit_results = fitter.do_fit(bounds=(lb, ub), maxfev=100) + + self.metadata = self.data_set_3D.metadata.copy() + fit_parms_dict = {'fit_parameters_labels': None, + 'fitting_function': inspect.getsource(return_quad), + 'guess_function': inspect.getsource(guess_quad), + 'ind_dims': (0, 1) + } + self.metadata['fit_parms_dict'] = fit_parms_dict + + def test_num_fitter_outputs(self): + self.assertEqual(len(self.fit_results), 4) # add assertion here + + def test_fit_parms_dset(self): + # First dataset would be the fitting parameters dataset + self.assertEqual(self.fit_results[0].shape, (10, 10, 3)) + ## Getting the dimension dict + dim_dict = {0: self.data_set_3D._axes[0].copy(), 1: self.data_set_3D._axes[1].copy(), + 2: Dimension(np.arange(3), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='temporal')} + + validate_dataset_properties(self, self.fit_results[0], np.array(self.fit_results[0]), + title='Fitting_Map', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_3D.original_metadata.copy(), + metadata=self.metadata) + + def test_cov_dset(self): + # Second dataset is the covariance dataset + self.assertEqual(self.fit_results[1].shape, (10, 10, 3, 3)) + ## Getting the dim_dict + dim_dict = {0: self.data_set_3D._axes[0].copy(), 1: self.data_set_3D._axes[1].copy(), + 2: Dimension(np.arange(3), + name='fit_cov_parms_x', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral'), + 3: Dimension(np.arange(3), + name='fit_cov_parms_y', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral') + } + + validate_dataset_properties(self, self.fit_results[1], np.array(self.fit_results[1]), + title='Fitting_Map_Covariance', data_type=DataType.IMAGE_4D, + dimension_dict=dim_dict, + original_metadata=self.data_set_3D.original_metadata.copy(), + metadata=self.metadata) + + def test_std_dev_dset(self): + # Third dataset is the std_dev dataset + self.assertEqual(self.fit_results[2].shape, (10, 10, 3)) + ## Getting the dim_dict + dim_dict = {0: self.data_set_3D._axes[0].copy(), 1: self.data_set_3D._axes[1].copy(), + 2: Dimension(np.arange(3), + name='std_dev', units='a.u.', + quantity='std_dev_fit_parms', + dimension_type='temporal')} + validate_dataset_properties(self, self.fit_results[2], np.array(self.fit_results[2]), + title='Fitting_Map_std_dev', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_3D.original_metadata.copy(), + metadata=self.metadata) + + def test_fitted_dset(self): + # Fourth dataset is the fitted dataset + self.assertEqual(self.fit_results[3].shape, self.data_set_3D.shape) + validate_dataset_properties(self, self.fit_results[3], np.array(self.fit_results[3]), + title='fitted_' + self.data_set_3D.title, data_type=self.data_set_3D.data_type, + quantity=self.data_set_3D.quantity, modality=self.data_set_3D.modality, + units=self.data_set_3D.units, source=self.data_set_3D.source, + dimension_dict=self.data_set_3D._axes, + original_metadata=self.data_set_3D.original_metadata, + metadata=self.metadata) + + + + +def return_quad(x, *parms): + a, b, c, = parms + return a * x ** 2 + b * x + c + + +def guess_quad(xvec, yvec): + popt = np.polyfit(xvec, yvec, 2) + return popt + + +def make_3D_dataset(shape=(60, 60, 32), cycles=1): + data_mat = np.zeros(shape=(shape[0], shape[1], shape[2], cycles)) + xvec = np.linspace(0, 5, shape[-1]) + + for row in range(data_mat.shape[0]): + for col in range(data_mat.shape[1]): + for cycle in range(cycles): + if row ** 2 + col ** 2 < data_mat.shape[0] * data_mat.shape[1] / 3: + a = np.random.normal(loc=3.0, scale=0.4) + b = np.random.normal(loc=1.0, scale=2.4) + c = np.random.normal(loc=0.5, scale=1.0) + else: + a = np.random.normal(loc=1.0, scale=0.8) + b = np.random.normal(loc=0.0, scale=1.4) + c = np.random.normal(loc=-0.5, scale=1.3) + data_mat[row, col, :, cycle] = return_quad(xvec, *[a, b, c]) + 2.5 * np.random.normal(size=len(xvec)) + + data_mat = np.squeeze(data_mat) + parms_dict = {'info_1': np.linspace(0, 1, 100), 'instrument': 'perseverence rover AFM'} + + # Let's convert it to a sidpy dataset + + # Specify dimensions + x_dim = np.linspace(0, 1E-6, + data_mat.shape[0]) + y_dim = np.linspace(0, 1E-6, + data_mat.shape[1]) + + z_dim = xvec + + # Make a sidpy dataset + data_set = sid.Dataset.from_array(data_mat, title='Current_spectral_map') + + # Set the data type + data_set.data_type = sid.DataType.SPECTRAL_IMAGE + + # Add quantity and units + data_set.units = 'nA' + data_set.quantity = 'Current' + + # Add dimension info + data_set.set_dimension(0, sid.Dimension(x_dim, + name='x', + units='m', quantity='x', + dimension_type='spatial')) + data_set.set_dimension(1, sid.Dimension(y_dim, + name='y', + units='m', quantity='y', + dimension_type='spatial')) + + data_set.set_dimension(2, sid.Dimension(z_dim, + name='Voltage', + units='V', quantity='Voltage', + dimension_type='spectral')) + if cycles > 1: + cycles_dim = np.arange(cycles) + data_set.set_dimension(3, sid.Dimension(cycles_dim, + name='Cycle', + units='#', quantity='Cycle', + dimension_type='spectral')) + + # append metadata + data_set.metadata = parms_dict + + return data_set, xvec + + +if __name__ == '__main__': + unittest.main() From 908be370461c2dfb0e054df1d6a616733b34c096 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 1 Apr 2022 18:28:53 -0400 Subject: [PATCH 36/43] Added sklearn as an extra requirement for fitter class. Updated file: setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0dc557b0..ea7b597f 100644 --- a/setup.py +++ b/setup.py @@ -66,9 +66,9 @@ include_package_data=True, # https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-dependencies extras_require={ - 'MPI': ["mpi4py"], + 'MPI': ["mpi4py"], 'File_Widgets': ['pyqt5'], - 'fitter': ['scipy'] + 'fitter': ['scipy', 'sklearn'] }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these From 25ebbdc34f070f480030cf04d8deb331c512b536 Mon Sep 17 00:00:00 2001 From: Mani Date: Fri, 1 Apr 2022 18:37:34 -0400 Subject: [PATCH 37/43] Added extras_require.txt file. --- extras_require.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 extras_require.txt diff --git a/extras_require.txt b/extras_require.txt new file mode 100644 index 00000000..fdeb7efd --- /dev/null +++ b/extras_require.txt @@ -0,0 +1,4 @@ +mpi4py +pyqt5 +scipy +sklearn \ No newline at end of file From 9f0cfb6cd69d8dd2dc5716daf06df39f1fd02580 Mon Sep 17 00:00:00 2001 From: Mani <55563282+saimani5@users.noreply.github.com> Date: Fri, 1 Apr 2022 18:45:53 -0400 Subject: [PATCH 38/43] Update setup.py Added sklearn and scipy as requirements for now. Will later move them to extras. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index ea7b597f..2a22ffef 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,8 @@ 'ipywidgets>=5.2.2', 'ipython>=5.1.0,<6;python_version<"3.3"', # IPython 6.0+ does not support Python < 3.5 'ipython>=6.0;python_version>="3.3"', # Beginning with IPython 6.0, Python 3.3 and above is required. + 'sklearn', + 'scipy' ] setup( From c099d5047b63826d5982e1584cd560710651ff16 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 2 Apr 2022 14:15:46 -0400 Subject: [PATCH 39/43] Bug found in multidimensional fitting in the fitter class. --- sidpy/proc/fitter.py | 2 +- tests/proc/test_fitter.py | 94 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 9cd2fb92..d6e7c170 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -200,7 +200,7 @@ def _setup_calc(self): # information. To do this, unfold utilizes the saved information while folding the original dataset. # Here, we are going to tweak that information and use the unfold method on the dataset with fitted parameters. - self._unfold_attr = {'dim_order_flattened': list(np.arange(len(self.dataset.shape))), + self._unfold_attr = {'dim_order_flattened': list(np.arange(len(self.fold_order[0])))+[len(self.fold_order[0])], 'shape_transposed': [self.dataset.shape[i] for i in self.fold_order[0]] + [-1]} axes, j = {}, 0 for i, dim in self.dataset._axes.items(): diff --git a/tests/proc/test_fitter.py b/tests/proc/test_fitter.py index 39440e3e..23c66ce6 100644 --- a/tests/proc/test_fitter.py +++ b/tests/proc/test_fitter.py @@ -97,6 +97,24 @@ def test_fitted_dset(self): metadata=self.metadata) +class Test_4D_dset_2Dfit(unittest.TestCase): + def setUp(self): + self.data_set_4D, self.xvec = make_3D_dataset(shape=(10, 10, 32)) + fitter = SidFitter(self.data_set_3D, xvec=self.xvec, + fit_fn=return_quad, guess_fn=guess_quad, num_workers=8, + threads=2, return_cov=True, return_fit=True, return_std=True, + km_guess=True, n_clus=10) + + lb, ub = [-50, -50, -50], [50, 50, 50] + self.fit_results = fitter.do_fit(bounds=(lb, ub), maxfev=100) + + self.metadata = self.data_set_3D.metadata.copy() + fit_parms_dict = {'fit_parameters_labels': None, + 'fitting_function': inspect.getsource(return_quad), + 'guess_function': inspect.getsource(guess_quad), + 'ind_dims': (0, 1) + } + self.metadata['fit_parms_dict'] = fit_parms_dict def return_quad(x, *parms): @@ -176,5 +194,81 @@ def make_3D_dataset(shape=(60, 60, 32), cycles=1): return data_set, xvec +def gauss_2D(fitting_space, *parms): + x = fitting_space[0] + y = fitting_space[1] + amplitude, xo, yo, sigma, offset = parms + xo = float(xo) + yo = float(yo) + r = ((x - xo) ** 2 + (y - yo) ** 2) ** .5 + g = amplitude * np.exp(-(r / sigma) ** 2) + offset + return g.ravel() + + +def make_4D_dataset(shape=(32, 16, 64, 48)): + dataset = np.zeros(shape=shape, dtype=np.float64) + xlen, ylen, kxlen, kylen = shape + kx, ky = np.meshgrid(np.linspace(0, 5, kxlen), np.linspace(0, 6, kylen)) + + for row in range(xlen): + for col in range(ylen): + amp = np.sqrt(row * col // xlen * ylen) + 3.5 + sigma = np.random.normal(loc=2.5, scale=0.34) + offset = 0.1 # col + xo = np.random.uniform(low=0, high=5) + yo = np.random.uniform(low=0, high=6) + + # amplitude, xo, yo, sigma, offset + gauss_mat = gauss_2D([kx.ravel(), ky.ravel()], *[amp, xo, yo, sigma, offset]) + gauss_mat += np.mean(gauss_mat) / 4 * np.random.normal(size=len(gauss_mat)) + gauss_mat = gauss_mat.reshape(kx.shape[0], kx.shape[1]) + dataset[row, col, :, :] = gauss_mat.T + + # Now make it a sidpy dataset + parms_dict = {'info_1': np.linspace(0, 5.6, 30), 'instrument': 'opportunity rover AFM'} + + # Let's convert it to a sidpy dataset + # Specify dimensions + x_dim = np.linspace(0, 1E-6, + dataset.shape[0]) + y_dim = np.linspace(0, 1E-6, + dataset.shape[1]) + kx_dim = np.linspace(0, 5, kxlen) + ky_dim = np.linspace(0, 6, kylen) + + # Make a sidpy dataset + data_set = sid.Dataset.from_array(dataset, name='4D_STEM') + + # Set the data type + data_set.data_type = sid.DataType.IMAGE_4D + + # Add quantity and units + data_set.units = 'nA' + data_set.quantity = 'Current' + + # Add dimension info + data_set.set_dimension(0, sid.Dimension(x_dim, + name='x', + units='m', quantity='x', + dimension_type='spatial')) + data_set.set_dimension(1, sid.Dimension(y_dim, + name='y', + units='m', quantity='y', + dimension_type='spatial')) + data_set.set_dimension(2, sid.Dimension(kx_dim, + name='Intensity KX', + units='counts', quantity='Intensity', + dimension_type='spectral')) + data_set.set_dimension(3, sid.Dimension(ky_dim, + name='Intensity KY', + units='counts', quantity='Intensity', + dimension_type='spectral')) + + # append metadata + data_set.metadata = parms_dict + + return data_set, [kx.ravel(), ky.ravel()] + + if __name__ == '__main__': unittest.main() From 742f6b0514405ba6b9637254d3d36aebd9a79ba8 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 2 Apr 2022 14:37:36 -0400 Subject: [PATCH 40/43] Tests added for 2D spectral fitting of a 4D dataset --- sidpy/proc/fitter.py | 2 +- tests/proc/test_fitter.py | 56 ++++++++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index d6e7c170..0e0152e0 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -27,7 +27,7 @@ class SidFitter: # An extension of the Process Class for Functional Fitting def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=None, num_fit_parms=None, - km_guess=False, n_clus=None, return_std=False, return_cov=False, return_fit=False, + km_guess=False, n_clus=None, return_cov=False, return_std=False, return_fit=False, fit_parameter_labels=None, num_workers=2, threads=2): """ Parameters diff --git a/tests/proc/test_fitter.py b/tests/proc/test_fitter.py index 23c66ce6..9c59876d 100644 --- a/tests/proc/test_fitter.py +++ b/tests/proc/test_fitter.py @@ -99,23 +99,53 @@ def test_fitted_dset(self): class Test_4D_dset_2Dfit(unittest.TestCase): def setUp(self): - self.data_set_4D, self.xvec = make_3D_dataset(shape=(10, 10, 32)) - fitter = SidFitter(self.data_set_3D, xvec=self.xvec, - fit_fn=return_quad, guess_fn=guess_quad, num_workers=8, - threads=2, return_cov=True, return_fit=True, return_std=True, - km_guess=True, n_clus=10) - - lb, ub = [-50, -50, -50], [50, 50, 50] - self.fit_results = fitter.do_fit(bounds=(lb, ub), maxfev=100) - - self.metadata = self.data_set_3D.metadata.copy() - fit_parms_dict = {'fit_parameters_labels': None, - 'fitting_function': inspect.getsource(return_quad), - 'guess_function': inspect.getsource(guess_quad), + self.data_set_4D, self.xyvec = make_4D_dataset() + # Here we don't provide the xyvec as the input, we let the class figure it + fitter = SidFitter(self.data_set_4D, fit_fn=gauss_2D, num_workers=8, num_fit_parms=5, + threads=2, return_fit=True, + km_guess=True, n_clus=10, + fit_parameter_labels=['amplitude', 'x', 'y', 'sigma', 'offset']) + + self.fit_results = fitter.do_fit(maxfev=1000) + + self.metadata = self.data_set_4D.metadata.copy() + fit_parms_dict = {'fit_parameters_labels': ['amplitude', 'x', 'y', 'sigma', 'offset'], + 'fitting_function': inspect.getsource(gauss_2D), + 'guess_function': 'Not Provided', 'ind_dims': (0, 1) } self.metadata['fit_parms_dict'] = fit_parms_dict + def test_num_fitter_outputs(self): + self.assertEqual(len(self.fit_results), 2) # add assertion here + + def test_fit_parms_dset(self): + # First dataset would be the fitting parameters dataset + self.assertEqual(self.fit_results[0].shape, (32, 16, 5)) + ## Getting the dimension dict + dim_dict = {0: self.data_set_4D._axes[0].copy(), 1: self.data_set_4D._axes[1].copy(), + 2: Dimension(np.arange(5), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='temporal')} + + validate_dataset_properties(self, self.fit_results[0], np.array(self.fit_results[0]), + title='Fitting_Map', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_4D.original_metadata.copy(), + metadata=self.metadata) + + def test_fitted_dset(self): + # Fourth dataset is the fitted dataset + self.assertEqual(self.fit_results[1].shape, self.data_set_4D.shape) + validate_dataset_properties(self, self.fit_results[1], np.array(self.fit_results[1]), + title='fitted_' + self.data_set_4D.title, data_type=self.data_set_4D.data_type, + quantity=self.data_set_4D.quantity, modality=self.data_set_4D.modality, + units=self.data_set_4D.units, source=self.data_set_4D.source, + dimension_dict=self.data_set_4D._axes, + original_metadata=self.data_set_4D.original_metadata, + metadata=self.metadata) + def return_quad(x, *parms): a, b, c, = parms From 90b3fe15828bc825cdb9234cbb11e5ac47ecbae0 Mon Sep 17 00:00:00 2001 From: Mani Date: Sat, 2 Apr 2022 15:25:47 -0400 Subject: [PATCH 41/43] Added tests for 1D fitting of a 4D dataset when the spectral dimension is not at the last position (dset with cycles) --- sidpy/proc/fitter.py | 2 +- tests/proc/test_fitter.py | 120 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 7 deletions(-) diff --git a/sidpy/proc/fitter.py b/sidpy/proc/fitter.py index 0e0152e0..09019ceb 100644 --- a/sidpy/proc/fitter.py +++ b/sidpy/proc/fitter.py @@ -97,7 +97,7 @@ def __init__(self, sidpy_dataset, fit_fn, xvec=None, ind_dims=None, guess_fn=Non self.num_fit_parms = num_fit_parms # int: number of fitting parameters if ind_dims is not None: - self.ind_dims = ind_dims # Tuple: containing indices of independent dimensions + self.ind_dims = tuple(ind_dims) # Tuple: containing indices of independent dimensions else: # All the dimensions that are not spectral will be considered as independent dimensions ind_dims = [] diff --git a/tests/proc/test_fitter.py b/tests/proc/test_fitter.py index 9c59876d..700b0b44 100644 --- a/tests/proc/test_fitter.py +++ b/tests/proc/test_fitter.py @@ -11,7 +11,7 @@ import inspect -class Test_3D_dset_1Dfit(unittest.TestCase): +class Test_3Ddset_1Dfit(unittest.TestCase): def setUp(self): self.data_set_3D, self.xvec = make_3D_dataset(shape=(10, 10, 32)) fitter = SidFitter(self.data_set_3D, xvec=self.xvec, @@ -97,12 +97,12 @@ def test_fitted_dset(self): metadata=self.metadata) -class Test_4D_dset_2Dfit(unittest.TestCase): +class Test_4Ddset_2Dfit(unittest.TestCase): def setUp(self): self.data_set_4D, self.xyvec = make_4D_dataset() # Here we don't provide the xyvec as the input, we let the class figure it fitter = SidFitter(self.data_set_4D, fit_fn=gauss_2D, num_workers=8, num_fit_parms=5, - threads=2, return_fit=True, + threads=2, return_std=True, return_fit=True, km_guess=True, n_clus=10, fit_parameter_labels=['amplitude', 'x', 'y', 'sigma', 'offset']) @@ -117,7 +117,7 @@ def setUp(self): self.metadata['fit_parms_dict'] = fit_parms_dict def test_num_fitter_outputs(self): - self.assertEqual(len(self.fit_results), 2) # add assertion here + self.assertEqual(len(self.fit_results), 3) # add assertion here def test_fit_parms_dset(self): # First dataset would be the fitting parameters dataset @@ -135,10 +135,25 @@ def test_fit_parms_dset(self): original_metadata=self.data_set_4D.original_metadata.copy(), metadata=self.metadata) + def test_std_dev_dset(self): + # Third dataset is the std_dev dataset + self.assertEqual(self.fit_results[1].shape, (32, 16, 5)) + ## Getting the dim_dict + dim_dict = {0: self.data_set_4D._axes[0].copy(), 1: self.data_set_4D._axes[1].copy(), + 2: Dimension(np.arange(5), + name='std_dev', units='a.u.', + quantity='std_dev_fit_parms', + dimension_type='temporal')} + validate_dataset_properties(self, self.fit_results[1], np.array(self.fit_results[1]), + title='Fitting_Map_std_dev', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_4D.original_metadata.copy(), + metadata=self.metadata) + def test_fitted_dset(self): # Fourth dataset is the fitted dataset - self.assertEqual(self.fit_results[1].shape, self.data_set_4D.shape) - validate_dataset_properties(self, self.fit_results[1], np.array(self.fit_results[1]), + self.assertEqual(self.fit_results[2].shape, self.data_set_4D.shape) + validate_dataset_properties(self, self.fit_results[2], np.array(self.fit_results[2]), title='fitted_' + self.data_set_4D.title, data_type=self.data_set_4D.data_type, quantity=self.data_set_4D.quantity, modality=self.data_set_4D.modality, units=self.data_set_4D.units, source=self.data_set_4D.source, @@ -147,6 +162,99 @@ def test_fitted_dset(self): metadata=self.metadata) +class Test_4Ddset_1Dfit(unittest.TestCase): + def setUp(self): + self.data_set_cycles, self.xvec = make_3D_dataset(shape=(10, 10, 32), cycles=3) + fitter = SidFitter(self.data_set_cycles, xvec=self.xvec, + fit_fn=return_quad, guess_fn=guess_quad, num_workers=8, + threads=2, return_cov=True, return_fit=True, return_std=True, + km_guess=True, n_clus=10, ind_dims=[0, 1, 3]) + + lb, ub = [-50, -50, -50], [50, 50, 50] + self.fit_results = fitter.do_fit(bounds=(lb, ub), maxfev=100) + + self.metadata = self.data_set_cycles.metadata.copy() + fit_parms_dict = {'fit_parameters_labels': None, + 'fitting_function': inspect.getsource(return_quad), + 'guess_function': inspect.getsource(guess_quad), + 'ind_dims': (0, 1, 3) + } + self.metadata['fit_parms_dict'] = fit_parms_dict + + def test_num_fitter_outputs(self): + self.assertEqual(len(self.fit_results), 4) # add assertion here + + def test_fit_parms_dset(self): + # First dataset would be the fitting parameters dataset + self.assertEqual(self.fit_results[0].shape, (10, 10, 3, 3)) + ## Getting the dimension dict + dim_dict = {0: self.data_set_cycles._axes[0].copy(), 1: self.data_set_cycles._axes[1].copy(), + 2: self.data_set_cycles._axes[3].copy(), + 3: Dimension(np.arange(3), + name='fit_parms', units='a.u.', + quantity='fit_parameters', + dimension_type='temporal')} + + validate_dataset_properties(self, self.fit_results[0], np.array(self.fit_results[0]), + title='Fitting_Map', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_cycles.original_metadata.copy(), + metadata=self.metadata) + + def test_cov_dset(self): + # Second dataset is the covariance dataset + self.assertEqual(self.fit_results[1].shape, (10, 10, 3, 3, 3)) + ## Getting the dim_dict + dim_dict = {0: self.data_set_cycles._axes[0].copy(), 1: self.data_set_cycles._axes[1].copy(), + 2: self.data_set_cycles._axes[3].copy(), + 3: Dimension(np.arange(3), + name='fit_cov_parms_x', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral'), + 4: Dimension(np.arange(3), + name='fit_cov_parms_y', units='a.u.', + quantity='fit_cov_parameters', + dimension_type='spectral') + } + + validate_dataset_properties(self, self.fit_results[1], np.array(self.fit_results[1]), + title='Fitting_Map_Covariance', data_type=DataType.IMAGE_4D, + dimension_dict=dim_dict, + original_metadata=self.data_set_cycles.original_metadata.copy(), + metadata=self.metadata) + + def test_std_dev_dset(self): + # Third dataset is the std_dev dataset + self.assertEqual(self.fit_results[2].shape, (10, 10, 3, 3)) + ## Getting the dim_dict + dim_dict = {0: self.data_set_cycles._axes[0].copy(), 1: self.data_set_cycles._axes[1].copy(), + 2: self.data_set_cycles._axes[3].copy(), + 3: Dimension(np.arange(3), + name='std_dev', units='a.u.', + quantity='std_dev_fit_parms', + dimension_type='temporal')} + validate_dataset_properties(self, self.fit_results[2], np.array(self.fit_results[2]), + title='Fitting_Map_std_dev', data_type=DataType.IMAGE_STACK, + dimension_dict=dim_dict, + original_metadata=self.data_set_cycles.original_metadata.copy(), + metadata=self.metadata) + + def test_fitted_dset(self): + # Fourth dataset is the fitted dataset + shape = [self.data_set_cycles.shape[i] for i in [0, 1, 3, 2]] + self.assertEqual(self.fit_results[3].shape, self.data_set_cycles.shape) + validate_dataset_properties(self, self.fit_results[3], np.array(self.fit_results[3]), + title='fitted_' + self.data_set_cycles.title, + data_type=self.data_set_cycles.data_type, + quantity=self.data_set_cycles.quantity, modality=self.data_set_cycles.modality, + units=self.data_set_cycles.units, source=self.data_set_cycles.source, + dimension_dict=self.data_set_cycles._axes, + original_metadata=self.data_set_cycles.original_metadata, + metadata=self.metadata) + + +# The following functions are used to generate datasets + def return_quad(x, *parms): a, b, c, = parms return a * x ** 2 + b * x + c From b7c7457f9e9216ab185f5c94ee546680cc710bbe Mon Sep 17 00:00:00 2001 From: Mani <55563282+saimani5@users.noreply.github.com> Date: Fri, 8 Apr 2022 15:05:35 -0400 Subject: [PATCH 42/43] Update extras_require.txt Removed scipy and sklearn from extras_require.txt --- extras_require.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/extras_require.txt b/extras_require.txt index fdeb7efd..f4539291 100644 --- a/extras_require.txt +++ b/extras_require.txt @@ -1,4 +1,2 @@ mpi4py pyqt5 -scipy -sklearn \ No newline at end of file From ff6eadb16104ef84f48486e51f0416fcf0b44a3a Mon Sep 17 00:00:00 2001 From: Mani <55563282+saimani5@users.noreply.github.com> Date: Fri, 8 Apr 2022 15:06:39 -0400 Subject: [PATCH 43/43] Update setup.py Removed scipy and sklearn from extra_requirements --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2a22ffef..6afac1eb 100644 --- a/setup.py +++ b/setup.py @@ -69,8 +69,7 @@ # https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-dependencies extras_require={ 'MPI': ["mpi4py"], - 'File_Widgets': ['pyqt5'], - 'fitter': ['scipy', 'sklearn'] + 'File_Widgets': ['pyqt5'] }, # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these