-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from WEgeophysics/dev
config hydro-learn
- Loading branch information
Showing
6 changed files
with
357 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
# -*- coding: utf-8 -*- | ||
# Licence:BSD-3-Clause | ||
# Author: L. Kouadio <etanoyau@gmail.com> | ||
|
||
from __future__ import annotations | ||
import os | ||
import sys | ||
import logging | ||
import random | ||
import warnings | ||
|
||
# set the package name for consistency checker | ||
sys.path.insert(0, os.path.dirname(__file__)) | ||
for p in ('.','..' ,'./hlearn'): | ||
sys.path.insert(0, os.path.abspath(p)) | ||
|
||
# assert package | ||
if __package__ is None: | ||
sys.path.append( os.path.dirname(__file__)) | ||
__package__ ='hlearn' | ||
|
||
# configure the logger file | ||
# from ._hlearnlog import hlearnlog | ||
try: | ||
conffile = os.path.join( | ||
os.path.dirname(__file__), "hlearn/hlog.yml") | ||
if not os.path.isfile (conffile ): | ||
raise | ||
except: | ||
conffile = os.path.join( | ||
os.path.dirname(__file__), "hlog.yml") | ||
|
||
# generated version by setuptools_scm | ||
__version__ = '0.1.0' | ||
|
||
# # set loging Level | ||
logging.getLogger(__name__)#.setLevel(logging.WARNING) | ||
# disable the matplotlib font manager logger. | ||
logging.getLogger('matplotlib.font_manager').disabled = True | ||
# or ust suppress the DEBUG messages but not the others from that logger. | ||
# logging.getLogger('matplotlib.font_manager').setLevel(logging.ERROR) | ||
|
||
# setting up | ||
os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "True") | ||
|
||
# Workaround issue discovered in intel-openmp 2019.5: | ||
# https://github.com/ContinuumIO/anaconda-issues/issues/11294 | ||
os.environ.setdefault("KMP_INIT_AT_FORK", "FALSE") | ||
|
||
# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/ | ||
try: | ||
# This variable is injected in the __builtins__ by the build process. | ||
__HLEARN_SETUP__ # type: ignore | ||
except NameError: | ||
__HLEARN_SETUP__ = False | ||
|
||
if __HLEARN_SETUP__ : | ||
sys.stderr.write("Partial import of hlearn during the build process.\n") | ||
else: | ||
from . import _distributor_init # noqa: F401 | ||
from . import _build # noqa: F401 | ||
from .utils._show_versions import show_versions | ||
|
||
#https://github.com/pandas-dev/pandas | ||
# Let users know if they're missing any of our hard dependencies | ||
_main_dependencies = ("numpy", "scipy", "sklearn", "matplotlib", | ||
"pandas","seaborn") | ||
_missing_dependencies = [] | ||
|
||
for _dependency in _main_dependencies: | ||
try: | ||
__import__(_dependency) | ||
except ImportError as _e: # pragma: no cover | ||
_missing_dependencies.append( | ||
f"{'scikit-learn' if _dependency=='sklearn' else _dependency }: {_e}") | ||
|
||
if _missing_dependencies: # pragma: no cover | ||
raise ImportError( | ||
"Unable to import required dependencies:\n" + "\n".join(_missing_dependencies) | ||
) | ||
del _main_dependencies, _dependency, _missing_dependencies | ||
|
||
# Try to suppress pandas future warnings | ||
# and reduce verbosity. | ||
# Setup hlearn public API | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings(action='ignore', category=UserWarning) | ||
import hlearn.externals as sklearn | ||
|
||
from .datasets import ( | ||
fetch_data, | ||
) | ||
from .methods import ( | ||
Structural, | ||
Structures, | ||
MXS, | ||
) | ||
|
||
from .view import ( | ||
EvalPlot, | ||
plotLearningInspections, | ||
plotSilhouette, | ||
plotDendrogram, | ||
plotProjection, | ||
) | ||
|
||
from .utils import ( | ||
read_data, | ||
cleaner, | ||
reshape, | ||
to_numeric_dtypes, | ||
smart_label_classifier, | ||
select_base_stratum , | ||
reduce_samples , | ||
make_MXS_labels, | ||
predict_NGA_labels, | ||
classify_k, | ||
plot_elbow, | ||
plot_clusters, | ||
plot_pca_components, | ||
plot_naive_dendrogram, | ||
plot_learning_curves, | ||
plot_confusion_matrices, | ||
plot_sbs_feature_selection, | ||
plot_regularization_path, | ||
plot_rf_feature_importances, | ||
plot_logging, | ||
plot_silhouette, | ||
plot_profiling, | ||
plot_confidence_in, | ||
) | ||
|
||
try : | ||
from .utils import ( | ||
selectfeatures, | ||
naive_imputer, | ||
naive_scaler, | ||
make_naive_pipe, | ||
bi_selector, | ||
) | ||
except ImportError : | ||
pass | ||
|
||
def setup_module(module): | ||
"""Fixture for the tests to assure globally controllable seeding of RNGs""" | ||
|
||
import numpy as np | ||
|
||
# Check if a random seed exists in the environment, if not create one. | ||
_random_seed = os.environ.get("hlearn_SEED", None) | ||
if _random_seed is None: | ||
_random_seed = np.random.uniform() * np.iinfo(np.int32).max | ||
_random_seed = int(_random_seed) | ||
print("I: Seeding RNGs with %r" % _random_seed) | ||
np.random.seed(_random_seed) | ||
random.seed(_random_seed) | ||
|
||
__doc__= """\ | ||
hydro-learn: An intelligent solver for hydrogeology engineering issues | ||
======================================================================= | ||
Hydro-learn is a Python-based package for solving hydro-geology engineering | ||
issues. From methodologies based on Machine Learning,It brings novel | ||
approaches for reducing numerous losses during the hydrogeological | ||
exploration projects. It allows to: | ||
- reduce the cost of permeability coefficient (k) data collection during the | ||
engineering projects, | ||
- guide drillers for to locating the drilling operations, | ||
- predict the water content in the well such as the level of water inrush, ... | ||
.. _hlearn: https://github.com/WEgeophysics/hydro-learn/ | ||
""" | ||
# __all__ is used to display a few public API. | ||
# the public API is determined | ||
# based on the documentation. | ||
|
||
__all__ = [ | ||
"sklearn", | ||
"fetch_data", | ||
"Structural", | ||
"Structures", | ||
"MXS", | ||
"EvalPlot", | ||
"plotLearningInspections", | ||
"plotSilhouette", | ||
"plotDendrogram", | ||
"plotProjection", | ||
"plotAnomaly", | ||
"vesSelector", | ||
"erpSelector", | ||
"read_data", | ||
"erpSmartDetector", | ||
"plot_confidence_in", | ||
"reshape", | ||
"to_numeric_dtypes", | ||
"smart_label_classifier", | ||
"select_base_stratum" , | ||
"reduce_samples" , | ||
"make_MXS_labels", | ||
"predict_NGA_labels", | ||
"classify_k", | ||
"plot_elbow", | ||
"plot_clusters", | ||
"plot_pca_components", | ||
"plot_naive_dendrogram", | ||
"plot_learning_curves", | ||
"plot_confusion_matrices", | ||
"plot_sbs_feature_selection", | ||
"plot_regularization_path", | ||
"plot_rf_feature_importances", | ||
"plot_logging", | ||
"plot_silhouette", | ||
"plot_profiling", | ||
"selectfeatures", | ||
"naive_imputer", | ||
"naive_scaler", | ||
"make_naive_pipe", | ||
"bi_selector", | ||
"show_versions", | ||
"cleaner", | ||
] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
""" | ||
Dataset subpackage is used to fetch data from the local machine. | ||
""" | ||
from .sets import ( | ||
load_hlogs, | ||
load_nlogs, | ||
load_mxs, | ||
fetch_data, | ||
) | ||
|
||
__all__=[ | ||
"load_hlogs", | ||
"load_nlogs", | ||
"load_mxs", | ||
"fetch_data", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
# -*- coding: utf-8 -*- | ||
# License: BSD-3-Clause | ||
# Author: LKouadio <etanoyau@gmail.com> | ||
|
||
""" | ||
Set all dataset. | ||
""" | ||
from warnings import warn | ||
|
||
from ..utils.funcutils import ( | ||
smart_format | ||
) | ||
from ..exceptions import DatasetError | ||
from .._hlearnlog import hlearnlog | ||
|
||
_logger = hlearnlog().get_hlearn_logger(__name__) | ||
|
||
_DTAGS=( | ||
"hlogs", | ||
"nlogs", | ||
"mxs", | ||
) | ||
|
||
from .dload import ( | ||
load_hlogs, | ||
load_nlogs, | ||
load_mxs, | ||
) | ||
|
||
__all__=[ | ||
|
||
"load_hlogs", | ||
"load_nlogs", | ||
"fetch_data", | ||
"load_mxs", | ||
|
||
] | ||
|
||
def fetch_data (tag, **kws): | ||
tag = _parse_tags(tag, multi_kind_dataset='nanshan') | ||
funcs= ( load_hlogs, load_nlogs, load_mxs ) | ||
funcns = list (map(lambda f: f.__name__.replace('load_', ''), funcs)) | ||
if tag in (funcns): | ||
func = funcs[funcns.index (tag)] | ||
else : raise DatasetError( | ||
f"Unknown data set {tag!r}. Expect {smart_format( funcns)}") | ||
|
||
return func (tag=tag, data_names=funcns, **kws) if callable (func) else None | ||
|
||
|
||
fetch_data.__doc__ ="""\ | ||
Fetch dataset from `tag`. | ||
A tag corresponds to the name area of data collection or each | ||
level of data processing. | ||
Parameters | ||
------------ | ||
tag: str, ['nlogs', 'hlogs', 'mxs', ] | ||
name of the area of data to fetch. | ||
Returns | ||
------- | ||
dict, X, y : frame of :class:`~hlearn.utils.box.Boxspace` object | ||
""" | ||
|
||
def _parse_tags (tag, multi_kind_dataset ='nanshan'): | ||
""" Parse and sanitize tag to match the different type of datasets. | ||
In principle, only the 'Bagoue' datasets is allowed to contain a tag | ||
composed of two words i.e. 'Bagoue' + '<kind_of_data>'. For instance | ||
``bagoue pipe`` fetchs only the pipeline used for Bagoue case study | ||
data preprocessing and so on. | ||
However , for other type of dataset, it a second word <kind_of_data> is | ||
passed, it should merely discarded. | ||
""" | ||
tag = str(tag); t = tag.strip().split() | ||
|
||
if len(t) ==1 : | ||
if t[0].lower() not in _DTAGS: | ||
tag = multi_kind_dataset +' ' + t[0] | ||
|
||
warn(f"Fetching {multi_kind_dataset.title()!r} data without" | ||
" explicitly prefixing the kind of data with the area" | ||
" name will raise an error. In future, the argument" | ||
f" should be '{tag}' instead.", FutureWarning | ||
) | ||
elif len(t) >1 : | ||
# only the multi kind dataset is allowed | ||
# to contain two words for fetching data | ||
if t[0].lower() !=multi_kind_dataset: | ||
tag = t[0].lower() # skip the second word | ||
return tag | ||
|
||
from ..utils.funcutils import listing_items_format | ||
|
||
_l=[ "{:<7}: {:<7}()".format(s.upper() , 'load_'+s ) for s in _DTAGS ] | ||
_LST = listing_items_format( | ||
_l, | ||
"Fetch data using 'load_<type_of_data|area_name>'like", | ||
" or using ufunc 'fetch_data (<type_of_data|area_name>)'.", | ||
inline=True , verbose= False, | ||
) | ||
|
File renamed without changes.
Oops, something went wrong.