From 8644612b06ef3c91d7e0666f9e439c9593dd32a7 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Thu, 27 Jul 2023 06:04:23 -0700 Subject: [PATCH] black --- examples/daal4py/adaboost.py | 17 +- examples/daal4py/adagrad_mse.py | 25 +- examples/daal4py/association_rules.py | 15 +- examples/daal4py/bacon_outlier.py | 13 +- examples/daal4py/bf_knn_classification.py | 18 +- examples/daal4py/brownboost.py | 17 +- examples/daal4py/cholesky.py | 13 +- examples/daal4py/correlation_distance.py | 17 +- examples/daal4py/cosine_distance.py | 15 +- examples/daal4py/covariance.py | 13 +- examples/daal4py/covariance_spmd.py | 10 +- examples/daal4py/dbscan.py | 15 +- ...ion_forest_classification_default_dense.py | 23 +- .../decision_forest_classification_hist.py | 23 +- ...decision_forest_classification_traverse.py | 8 +- ...ecision_forest_regression_default_dense.py | 19 +- .../decision_forest_regression_hist.py | 19 +- .../decision_forest_regression_traverse.py | 8 +- .../daal4py/decision_tree_classification.py | 15 +- .../decision_tree_classification_traverse.py | 6 +- examples/daal4py/decision_tree_regression.py | 15 +- .../decision_tree_regression_traverse.py | 6 +- examples/daal4py/distributions_bernoulli.py | 13 +- examples/daal4py/distributions_normal.py | 25 +- examples/daal4py/distributions_uniform.py | 25 +- examples/daal4py/elastic_net.py | 15 +- examples/daal4py/em_gmm.py | 13 +- .../gradient_boosted_classification.py | 27 +- ...radient_boosted_classification_traverse.py | 8 +- .../daal4py/gradient_boosted_regression.py | 28 +- .../gradient_boosted_regression_traverse.py | 8 +- examples/daal4py/implicit_als.py | 13 +- examples/daal4py/kdtree_knn_classification.py | 18 +- examples/daal4py/kmeans.py | 13 +- examples/daal4py/lasso_regression.py | 17 +- examples/daal4py/lbfgs_cr_entr_loss.py | 68 +++- examples/daal4py/lbfgs_mse.py | 24 +- examples/daal4py/linear_regression.py | 15 +- examples/daal4py/log_reg_binary_dense.py | 21 +- examples/daal4py/log_reg_dense.py | 49 ++- examples/daal4py/log_reg_model_builder.py | 24 +- examples/daal4py/logitboost.py | 22 +- examples/daal4py/low_order_moms_dense.py | 29 +- examples/daal4py/low_order_moms_spmd.py | 30 +- examples/daal4py/multivariate_outlier.py | 13 +- examples/daal4py/naive_bayes.py | 15 +- examples/daal4py/normalization_minmax.py | 13 +- examples/daal4py/normalization_zscore.py | 13 +- examples/daal4py/pca.py | 20 +- examples/daal4py/pca_transform.py | 24 +- examples/daal4py/pivoted_qr.py | 13 +- examples/daal4py/qr.py | 15 +- examples/daal4py/quantiles.py | 13 +- examples/daal4py/ridge_regression.py | 15 +- examples/daal4py/saga.py | 39 +- examples/daal4py/sgd_logistic_loss.py | 20 +- examples/daal4py/sgd_mse.py | 23 +- examples/daal4py/sorting.py | 15 +- examples/daal4py/stump_classification.py | 19 +- examples/daal4py/stump_regression.py | 17 +- examples/daal4py/svd.py | 27 +- examples/daal4py/svm.py | 17 +- examples/daal4py/svm_multiclass.py | 27 +- examples/daal4py/univariate_outlier.py | 13 +- examples/sklearnex/n_jobs.py | 4 +- examples/sklearnex/patch_sklearn.py | 8 +- examples/sklearnex/verbose_mode.py | 6 +- examples/utils/spmd_utils.py | 17 +- tests/run_examples.py | 102 ++--- tests/spmd_test_examples.py | 123 +++--- tests/test_examples.py | 379 +++++++++++------- 71 files changed, 1052 insertions(+), 793 deletions(-) diff --git a/examples/daal4py/adaboost.py b/examples/daal4py/adaboost.py index 131c5aed83..60a296c5f9 100644 --- a/examples/daal4py/adaboost.py +++ b/examples/daal4py/adaboost.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Adaboost example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/adaboost_train.csv" testfile = "./data/batch/adaboost_test.csv" nClasses = 2 @@ -56,7 +57,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -67,6 +68,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Adaboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/adagrad_mse.py b/examples/daal4py/adagrad_mse.py index 9156c9470b..23f5ffecf5 100644 --- a/examples/daal4py/adagrad_mse.py +++ b/examples/daal4py/adagrad_mse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py AdaGrad (Adaptive Subgradient Method) example for shared memory systems # using Mean Squared Error objective function @@ -26,14 +26,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/mse.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(3)) @@ -47,11 +48,13 @@ def main(readcsv=read_csv, method='defaultDense'): # configure an AdaGrad object lr = np.array([[1.0]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_adagrad(mse_algo, - learningRate=lr, - accuracyThreshold=0.0000001, - nIterations=niters, - batchSize=1) + sgd_algo = d4p.optimization_solver_adagrad( + mse_algo, + learningRate=lr, + accuracyThreshold=0.0000001, + nIterations=niters, + batchSize=1, + ) # finally do the computation inp = np.array([[8], [2], [1], [4]], dtype=np.double) @@ -67,4 +70,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/association_rules.py b/examples/daal4py/association_rules.py index fd64bd6f7e..aefb2811e7 100644 --- a/examples/daal4py/association_rules.py +++ b/examples/daal4py/association_rules.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py assiciation rules example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/apriori.csv" # configure a association_rules object @@ -58,6 +59,6 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result1 = main() - print('Confidence: (20 first)') + print("Confidence: (20 first)") print(result1.confidence[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/bacon_outlier.py b/examples/daal4py/bacon_outlier.py index 2a4f791763..6d7bccdb4c 100644 --- a/examples/daal4py/bacon_outlier.py +++ b/examples/daal4py/bacon_outlier.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection bacon example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -56,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (Bacon method) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/bf_knn_classification.py b/examples/daal4py/bf_knn_classification.py index c3313a37b8..70a2751f53 100644 --- a/examples/daal4py/bf_knn_classification.py +++ b/examples/daal4py/bf_knn_classification.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Brute Force KNN example for shared memory systems @@ -27,17 +27,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input data set parameters - train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') - predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') + train_file = os.path.join("data", "batch", "k_nearest_neighbors_train.csv") + predict_file = os.path.join("data", "batch", "k_nearest_neighbors_test.csv") # Read data. Let's use 5 features per observation nFeatures = 5 @@ -71,6 +72,5 @@ def main(readcsv=read_csv, method='defaultDense'): print("Brute Force kNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print( - "Classification results(observations #30-34):\n", - predict_result.prediction[30:35] + "Classification results(observations #30-34):\n", predict_result.prediction[30:35] ) diff --git a/examples/daal4py/brownboost.py b/examples/daal4py/brownboost.py index 9f34dfa8fa..14fe231fcd 100644 --- a/examples/daal4py/brownboost.py +++ b/examples/daal4py/brownboost.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Brownboost example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/brownboost_train.csv" testfile = "./data/batch/brownboost_test.csv" @@ -55,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -66,6 +67,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Brownboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/cholesky.py b/examples/daal4py/cholesky.py index cfaf6ff275..75d9b76bd6 100644 --- a/examples/daal4py/cholesky.py +++ b/examples/daal4py/cholesky.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py cholesky example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/cholesky.csv" # configure a cholesky object @@ -46,4 +47,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("\nFactor:\n", result.choleskyFactor) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/correlation_distance.py b/examples/daal4py/correlation_distance.py index 56ab78b8b9..2bd4ccefd2 100644 --- a/examples/daal4py/correlation_distance.py +++ b/examples/daal4py/correlation_distance.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py correlation distance example for shared memory systems @@ -27,21 +27,22 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): - data = readcsv(os.path.join('data', 'batch', 'distance.csv'), range(10)) +def main(readcsv=read_csv, method="defaultDense"): + data = readcsv(os.path.join("data", "batch", "distance.csv"), range(10)) # Create algorithm to compute correlation distance (no parameters) algorithm = d4p.correlation_distance() # Computed correlation distance with file or numpy array - res1 = algorithm.compute(os.path.join('data', 'batch', 'distance.csv')) + res1 = algorithm.compute(os.path.join("data", "batch", "distance.csv")) res2 = algorithm.compute(data) assert np.allclose(res1.correlationDistance, res2.correlationDistance) @@ -53,6 +54,6 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print( "\nCorrelation distance (first 15 rows/columns):\n", - res.correlationDistance[0:15, 0:15] + res.correlationDistance[0:15, 0:15], ) print("All looks good!") diff --git a/examples/daal4py/cosine_distance.py b/examples/daal4py/cosine_distance.py index ae8603350c..61da63cf39 100644 --- a/examples/daal4py/cosine_distance.py +++ b/examples/daal4py/cosine_distance.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py cosine distance example for shared memory systems @@ -27,21 +27,22 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): - data = readcsv(os.path.join('data', 'batch', 'distance.csv'), range(10)) +def main(readcsv=read_csv, method="defaultDense"): + data = readcsv(os.path.join("data", "batch", "distance.csv"), range(10)) # Create algorithm to compute cosine distance (no parameters) algorithm = d4p.cosine_distance() # Computed cosine distance with file or numpy array - res1 = algorithm.compute(os.path.join('data', 'batch', 'distance.csv')) + res1 = algorithm.compute(os.path.join("data", "batch", "distance.csv")) res2 = algorithm.compute(data) assert np.allclose(res1.cosineDistance, res2.cosineDistance) diff --git a/examples/daal4py/covariance.py b/examples/daal4py/covariance.py index aa6643a0c9..0fca126ccb 100644 --- a/examples/daal4py/covariance.py +++ b/examples/daal4py/covariance.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/covcormoments_dense.csv" # configure a covariance object @@ -58,4 +59,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("Covariance matrix:\n", res.covariance) print("Mean vector:\n", res.mean) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/covariance_spmd.py b/examples/daal4py/covariance_spmd.py index 70b4b09554..c03e69ba02 100644 --- a/examples/daal4py/covariance_spmd.py +++ b/examples/daal4py/covariance_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for distributed memory systems; SPMD mode # run like this: @@ -29,9 +29,9 @@ def main(): # We know the number of lines in the file # and use this to separate data between processes - skiprows, nrows = get_chunk_params(lines_count=200, - chunks_count=d4p.num_procs(), - chunk_number=d4p.my_procid()) + skiprows, nrows = get_chunk_params( + lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid() + ) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) diff --git a/examples/daal4py/dbscan.py b/examples/daal4py/dbscan.py index c7def79065..187dd416e6 100644 --- a/examples/daal4py/dbscan.py +++ b/examples/daal4py/dbscan.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py DBSCAN example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/dbscan_dense.csv" epsilon = 0.04 minObservations = 45 @@ -45,7 +46,7 @@ def main(readcsv=read_csv, method='defaultDense'): algo = d4p.dbscan( minObservations=minObservations, epsilon=epsilon, - resultsToCompute='computeCoreIndices|computeCoreObservations' + resultsToCompute="computeCoreIndices|computeCoreObservations", ) # and compute result = algo.compute(data) @@ -71,4 +72,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nFirst 10 cluster core indices:\n", result.coreIndices[0:10]) print("\nFirst 10 cluster core observations:\n", result.coreObservations[0:10]) print("\nNumber of clusters:\n", result.nClusters) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_classification_default_dense.py b/examples/daal4py/decision_forest_classification_default_dense.py index 03713e82c1..21a7c88726 100755 --- a/examples/daal4py/decision_forest_classification_default_dense.py +++ b/examples/daal4py/decision_forest_classification_default_dense.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" @@ -45,9 +46,9 @@ def main(readcsv=read_csv, method='defaultDense'): minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError' + resultsToCompute="computeOutOfBagError", ) # Read data. Let's use 3 features per observation @@ -61,7 +62,7 @@ def main(readcsv=read_csv, method='defaultDense'): predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -81,11 +82,11 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_classification_hist.py b/examples/daal4py/decision_forest_classification_hist.py index 420c3b72d6..a2be41356b 100755 --- a/examples/daal4py/decision_forest_classification_hist.py +++ b/examples/daal4py/decision_forest_classification_hist.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example of Hist method for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='hist'): +def main(readcsv=read_csv, method="hist"): # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" @@ -47,9 +48,9 @@ def main(readcsv=read_csv, method='hist'): minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError' + resultsToCompute="computeOutOfBagError", ) # Read data. Let's use 3 features per observation @@ -63,7 +64,7 @@ def main(readcsv=read_csv, method='hist'): predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -83,11 +84,11 @@ def main(readcsv=read_csv, method='hist'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_classification_traverse.py b/examples/daal4py/decision_forest_classification_traverse.py index 0013746c45..a3b1ed5d21 100755 --- a/examples/daal4py/decision_forest_classification_traverse.py +++ b/examples/daal4py/decision_forest_classification_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification Tree Traversal example @@ -54,5 +54,5 @@ def printNodes(node_id, nodes, values, level): printTree(treeState.node_ar, treeState.value_ar) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_default_dense.py b/examples/daal4py/decision_forest_regression_default_dense.py index bf77a382a5..708e32328d 100755 --- a/examples/daal4py/decision_forest_regression_default_dense.py +++ b/examples/daal4py/decision_forest_regression_default_dense.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" @@ -40,10 +41,10 @@ def main(readcsv=read_csv, method='defaultDense'): train_algo = d4p.decision_forest_regression_training( method=method, nTrees=100, - varImportance='MDA_Raw', + varImportance="MDA_Raw", bootstrap=True, engine=d4p.engines_mt2203(seed=777), - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation' + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", ) # Read data. Let's have 13 independent, @@ -75,7 +76,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_hist.py b/examples/daal4py/decision_forest_regression_hist.py index e00926be03..024b31a330 100755 --- a/examples/daal4py/decision_forest_regression_hist.py +++ b/examples/daal4py/decision_forest_regression_hist.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example of Hist method for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='hist'): +def main(readcsv=read_csv, method="hist"): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" @@ -42,10 +43,10 @@ def main(readcsv=read_csv, method='hist'): maxBins=512, minBinSize=1, nTrees=100, - varImportance='MDA_Raw', + varImportance="MDA_Raw", bootstrap=True, engine=d4p.engines_mt2203(seed=777), - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation' + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", ) # Read data. Let's have 13 independent, @@ -77,7 +78,7 @@ def main(readcsv=read_csv, method='hist'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_traverse.py b/examples/daal4py/decision_forest_regression_traverse.py index 9d7925152d..7217c546cd 100755 --- a/examples/daal4py/decision_forest_regression_traverse.py +++ b/examples/daal4py/decision_forest_regression_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression Tree Traversal example @@ -52,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/decision_tree_classification.py b/examples/daal4py/decision_tree_classification.py index 0b9df500c7..320925a345 100644 --- a/examples/daal4py/decision_tree_classification.py +++ b/examples/daal4py/decision_tree_classification.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Classification example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/decision_tree_train.csv" prunefile = "./data/batch/decision_tree_prune.csv" @@ -66,7 +67,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, plabels) = main() print( "\nDecision tree prediction results (first 20 rows):\n", - predict_result.prediction[0:20] + predict_result.prediction[0:20], ) print("\nGround truth (first 20 rows):\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_classification_traverse.py b/examples/daal4py/decision_tree_classification_traverse.py index d3d765bd82..d88db2fdf2 100644 --- a/examples/daal4py/decision_tree_classification_traverse.py +++ b/examples/daal4py/decision_tree_classification_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems @@ -53,4 +53,4 @@ def printNodes(node_id, nodes, values, level): treeState = d4p.getTreeState(train_result.model, treeId, 5) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_regression.py b/examples/daal4py/decision_tree_regression.py index c87b371463..2379e59331 100644 --- a/examples/daal4py/decision_tree_regression.py +++ b/examples/daal4py/decision_tree_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/decision_tree_train.csv" prunefile = "./data/batch/decision_tree_prune.csv" testfile = "./data/batch/decision_tree_test.csv" @@ -67,7 +68,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, ptdata) = main() print( "\nDecision tree prediction results (first 20 rows):\n", - predict_result.prediction[0:20] + predict_result.prediction[0:20], ) print("\nGround truth (first 10 rows):\n", ptdata[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_regression_traverse.py b/examples/daal4py/decision_tree_regression_traverse.py index 3cd61fc16a..e5f4a31c19 100644 --- a/examples/daal4py/decision_tree_regression_traverse.py +++ b/examples/daal4py/decision_tree_regression_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems @@ -53,4 +53,4 @@ def printNodes(node_id, nodes, values, level): treeState = d4p.getTreeState(train_result.model, treeId, 5) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/distributions_bernoulli.py b/examples/daal4py/distributions_bernoulli.py index 7656cc532e..a0eca7b8ff 100644 --- a/examples/daal4py/distributions_bernoulli.py +++ b/examples/daal4py/distributions_bernoulli.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py bernoulli distribution example for shared memory systems @@ -21,8 +21,7 @@ import daal4py as d4p -def main(readcsv=None, method='defaultDense'): - +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_bernoulli(0.5, engine=d4p.engines_mt19937(seed=777)) @@ -32,11 +31,7 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( - data, - [[ - 1.0, 1.000, 1.000, 0.000, 1.000, - 0.000, 1.000, 0.000, 1.000, 0.000 - ]] + data, [[1.0, 1.000, 1.000, 0.000, 1.000, 0.000, 1.000, 0.000, 1.000, 0.000]] ) return data diff --git a/examples/daal4py/distributions_normal.py b/examples/daal4py/distributions_normal.py index 81672e0296..7a7aa3ea4b 100644 --- a/examples/daal4py/distributions_normal.py +++ b/examples/daal4py/distributions_normal.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normal distribution example for shared memory systems @@ -21,8 +21,7 @@ import daal4py as d4p -def main(readcsv=None, method='defaultDense'): - +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_normal(engine=d4p.engines_mt19937(seed=777)) @@ -33,10 +32,20 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( data, - [[ - -0.74104167, -0.13616829, -0.13679562, 2.40385531, -0.33556821, - 0.19041699, -0.61331181, 0.95958821, -0.42301092, 0.09460208 - ]] + [ + [ + -0.74104167, + -0.13616829, + -0.13679562, + 2.40385531, + -0.33556821, + 0.19041699, + -0.61331181, + 0.95958821, + -0.42301092, + 0.09460208, + ] + ], ) return data diff --git a/examples/daal4py/distributions_uniform.py b/examples/daal4py/distributions_uniform.py index 0a79e3f15b..fdccedc908 100644 --- a/examples/daal4py/distributions_uniform.py +++ b/examples/daal4py/distributions_uniform.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py uniform distribution example for shared memory systems @@ -21,8 +21,7 @@ import daal4py as d4p -def main(readcsv=None, method='defaultDense'): - +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_uniform(engine=d4p.engines_mt19937(seed=777)) @@ -33,10 +32,20 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( data, - [[ - 0.22933409, 0.44584412, 0.44559617, 0.9918884, 0.36859825, - 0.57550881, 0.26983509, 0.83136875, 0.33614365, 0.53768455, - ]] + [ + [ + 0.22933409, + 0.44584412, + 0.44559617, + 0.9918884, + 0.36859825, + 0.57550881, + 0.26983509, + 0.83136875, + 0.33614365, + 0.53768455, + ] + ], ) return data diff --git a/examples/daal4py/elastic_net.py b/examples/daal4py/elastic_net.py index e743621321..90af8b9b76 100644 --- a/examples/daal4py/elastic_net.py +++ b/examples/daal4py/elastic_net.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Elastic Net example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -64,7 +65,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nElastic Net prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/em_gmm.py b/examples/daal4py/em_gmm.py index 85c8b01943..8118aec526 100644 --- a/examples/daal4py/em_gmm.py +++ b/examples/daal4py/em_gmm.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py em_gmm example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nComponents = 2 infile = "./data/batch/em_gmm.csv" # We load the data @@ -60,4 +61,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("Means:\n", res.means) for c in res.covariances: print("Covariance:\n", c) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_classification.py b/examples/daal4py/gradient_boosted_classification.py index 404967a022..77a8d99ac6 100644 --- a/examples/daal4py/gradient_boosted_classification.py +++ b/examples/daal4py/gradient_boosted_classification.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Classification example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 3 nClasses = 5 maxIterations = 200 @@ -47,7 +48,7 @@ def main(readcsv=read_csv, method='defaultDense'): maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures, - varImportance='weight|totalCover|cover|totalGain|gain' + varImportance="weight|totalCover|cover|totalGain|gain", ) # Read data. Let's use 3 features per observation @@ -59,7 +60,7 @@ def main(readcsv=read_csv, method='defaultDense'): # previous version has different interface predict_algo = d4p.gbt_classification_prediction( nClasses=nClasses, - resultsToEvaluate="computeClassLabels|computeClassProbabilities" + resultsToEvaluate="computeClassLabels|computeClassProbabilities", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -77,22 +78,20 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, plabels) = main() print( "\nGradient boosted trees prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) print( "\nGradient boosted trees prediction probabilities (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nvariableImportanceByWeight:\n", train_result.variableImportanceByWeight) print( - "\nvariableImportanceByTotalCover:\n", - train_result.variableImportanceByTotalCover + "\nvariableImportanceByTotalCover:\n", train_result.variableImportanceByTotalCover ) print("\nvariableImportanceByCover:\n", train_result.variableImportanceByCover) print( - "\nvariableImportanceByTotalGain:\n", - train_result.variableImportanceByTotalGain + "\nvariableImportanceByTotalGain:\n", train_result.variableImportanceByTotalGain ) print("\nvariableImportanceByGain:\n", train_result.variableImportanceByGain) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_classification_traverse.py b/examples/daal4py/gradient_boosted_classification_traverse.py index dc96d329a7..0ff20553ba 100644 --- a/examples/daal4py/gradient_boosted_classification_traverse.py +++ b/examples/daal4py/gradient_boosted_classification_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Boosting Classification Tree Traversal example @@ -52,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId, 5) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_regression.py b/examples/daal4py/gradient_boosted_regression.py index fcbc6fc687..4292aceae9 100644 --- a/examples/daal4py/gradient_boosted_regression.py +++ b/examples/daal4py/gradient_boosted_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): maxIterations = 200 # input data file @@ -55,16 +56,17 @@ def main(readcsv=read_csv, method='defaultDense'): predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction - ptdata = np.loadtxt(testfile, usecols=range(13, 14), - delimiter=',', ndmin=2, dtype=np.float32) + ptdata = np.loadtxt( + testfile, usecols=range(13, 14), delimiter=",", ndmin=2, dtype=np.float32 + ) # ptdata = np.loadtxt('../tests/unittest_data/gradient_boosted_regression_batch.csv', # delimiter=',', ndmin=2, dtype=np.float32) - if hasattr(ptdata, 'toarray'): + if hasattr(ptdata, "toarray"): ptdata = ptdata.toarray() # to make the next assertion work with scipy's csr_matrix - assert True or \ - np.square(predict_result.prediction - ptdata).mean() < 1e-2, \ - np.square(predict_result.prediction - ptdata).mean() + assert True or np.square(predict_result.prediction - ptdata).mean() < 1e-2, np.square( + predict_result.prediction - ptdata + ).mean() return (train_result, predict_result, ptdata) @@ -73,7 +75,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, ptdata) = main() print( "\nGradient boosted trees prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_regression_traverse.py b/examples/daal4py/gradient_boosted_regression_traverse.py index d83d9bfd3a..b0316eb14d 100644 --- a/examples/daal4py/gradient_boosted_regression_traverse.py +++ b/examples/daal4py/gradient_boosted_regression_traverse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Boosting Regression Tree Traversal example @@ -52,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/implicit_als.py b/examples/daal4py/implicit_als.py index 01407ce048..8750e72512 100644 --- a/examples/daal4py/implicit_als.py +++ b/examples/daal4py/implicit_als.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py implicit_als example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFactors = 2 infile = "./data/batch/implicit_als_dense.csv" # We load the data @@ -62,4 +63,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("Predicted ratings:\n", res.prediction[:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/kdtree_knn_classification.py b/examples/daal4py/kdtree_knn_classification.py index 069c127b02..16cfdec5c3 100644 --- a/examples/daal4py/kdtree_knn_classification.py +++ b/examples/daal4py/kdtree_knn_classification.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py KD-Tree KNN example for shared memory systems @@ -27,17 +27,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input data set parameters - train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') - predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') + train_file = os.path.join("data", "batch", "k_nearest_neighbors_train.csv") + predict_file = os.path.join("data", "batch", "k_nearest_neighbors_test.csv") # Read data. Let's use 5 features per observation nFeatures = 5 @@ -71,6 +72,5 @@ def main(readcsv=read_csv, method='defaultDense'): print("KD-tree based kNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print( - "Classification results(observations #30-34):\n", - predict_result.prediction[30:35] + "Classification results(observations #30-34):\n", predict_result.prediction[30:35] ) diff --git a/examples/daal4py/kmeans.py b/examples/daal4py/kmeans.py index e521962078..565de34715 100644 --- a/examples/daal4py/kmeans.py +++ b/examples/daal4py/kmeans.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py K-Means example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/kmeans_dense.csv" nClusters = 20 maxIter = 5 @@ -69,4 +70,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nFirst 10 cluster assignments:\n", result.assignments[0:10]) print("\nFirst 10 dimensions of centroids:\n", result.centroids[:, 0:10]) print("\nObjective function value:\n", result.objectiveFunction) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/lasso_regression.py b/examples/daal4py/lasso_regression.py index c6211030f9..6d8ea91f81 100644 --- a/examples/daal4py/lasso_regression.py +++ b/examples/daal4py/lasso_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Lasso Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -59,7 +60,7 @@ def main(readcsv=read_csv, method='defaultDense'): # the example is used in tests with the scipy.sparse matrix # we use this trick until subtracting a sparse matrix is not supported - if hasattr(ptdata, 'toarray'): + if hasattr(ptdata, "toarray"): ptdata = ptdata.toarray() # this assertion is outdated, will be fixed in next release # assert np.square(predict_result.prediction - np.asarray(ptdata)).mean() < 2.2 @@ -71,7 +72,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nLasso Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/lbfgs_cr_entr_loss.py b/examples/daal4py/lbfgs_cr_entr_loss.py index a28936c70f..329b7ec283 100644 --- a/examples/daal4py/lbfgs_cr_entr_loss.py +++ b/examples/daal4py/lbfgs_cr_entr_loss.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py LBFGS (limited memory Broyden-Fletcher-Goldfarb-Shanno) # example for shared memory systems @@ -27,14 +27,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 6 nClasses = 5 nIterations = 1000 @@ -48,15 +49,16 @@ def main(readcsv=read_csv, method='defaultDense'): nVectors = data.shape[0] # configure a function - func = d4p.optimization_solver_cross_entropy_loss(nClasses, nVectors, - interceptFlag=True) + func = d4p.optimization_solver_cross_entropy_loss( + nClasses, nVectors, interceptFlag=True + ) func.setup(data, dep_data) # configure a algorithm stepLengthSequence = np.array([[stepLength]], dtype=np.double) - alg = d4p.optimization_solver_lbfgs(func, - stepLengthSequence=stepLengthSequence, - nIterations=nIterations) + alg = d4p.optimization_solver_lbfgs( + func, stepLengthSequence=stepLengthSequence, nIterations=nIterations + ) # do the computation nParameters = nClasses * (nFeatures + 1) @@ -76,15 +78,45 @@ def main(readcsv=read_csv, method='defaultDense'): "\nExpected coefficients:\n", np.array( [ - [-2.277], [2.836], [14.985], [0.511], [7.510], [-2.831], [-5.814], - [-0.033], [13.227], [-24.447], [3.730], [10.394], [-10.461], [-0.766], - [0.077], [1.558], [-1.133], [2.884], [-3.825], [7.699], [2.421], - [-0.135], [-6.996], [1.785], [-2.294], [-9.819], [1.692], [-0.725], - [0.069], [-8.41], [1.458], [-3.306], [-4.719], [5.507], [-1.642] + [-2.277], + [2.836], + [14.985], + [0.511], + [7.510], + [-2.831], + [-5.814], + [-0.033], + [13.227], + [-24.447], + [3.730], + [10.394], + [-10.461], + [-0.766], + [0.077], + [1.558], + [-1.133], + [2.884], + [-3.825], + [7.699], + [2.421], + [-0.135], + [-6.996], + [1.785], + [-2.294], + [-9.819], + [1.692], + [-0.725], + [0.069], + [-8.41], + [1.458], + [-3.306], + [-4.719], + [5.507], + [-1.642], ], - dtype=np.double - ) + dtype=np.double, + ), ) print("\nResulting coefficients:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/lbfgs_mse.py b/examples/daal4py/lbfgs_mse.py index 928d801f73..9c5e1fd0e6 100644 --- a/examples/daal4py/lbfgs_mse.py +++ b/examples/daal4py/lbfgs_mse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py LBFGS (limited memory Broyden-Fletcher-Goldfarb-Shanno) # example for shared memory systems @@ -27,14 +27,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/lbfgs.csv" # Read the data, let's have 10 independent variables data = readcsv(infile, range(10)) @@ -48,9 +49,9 @@ def main(readcsv=read_csv, method='defaultDense'): # configure an LBFGS object sls = np.array([[1.0e-4]], dtype=np.double) niters = 1000 - lbfgs_algo = d4p.optimization_solver_lbfgs(mse_algo, - stepLengthSequence=sls, - nIterations=niters) + lbfgs_algo = d4p.optimization_solver_lbfgs( + mse_algo, stepLengthSequence=sls, nIterations=niters + ) # finally do the computation inp = np.array([[100]] * 11, dtype=np.double) @@ -67,10 +68,9 @@ def main(readcsv=read_csv, method='defaultDense'): print( "\nExpected coefficients:\n", np.array( - [[11], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], - dtype=np.double - ) + [[11], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], dtype=np.double + ), ) print("\nResulting coefficients:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/linear_regression.py b/examples/daal4py/linear_regression.py index 06113338df..cc66848eab 100644 --- a/examples/daal4py/linear_regression.py +++ b/examples/daal4py/linear_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Linear Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -65,7 +66,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLinear Regression coefficients:\n", train_result.model.Beta) print( "\nLinear Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_binary_dense.py b/examples/daal4py/log_reg_binary_dense.py index 3d3d153770..794f4dbf0e 100644 --- a/examples/daal4py/log_reg_binary_dense.py +++ b/examples/daal4py/log_reg_binary_dense.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 2 nFeatures = 20 @@ -55,8 +56,10 @@ def main(readcsv=read_csv, method='defaultDense'): predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction - assert predict_result.prediction.shape == (predict_data.shape[0], - train_labels.shape[1]) + assert predict_result.prediction.shape == ( + predict_data.shape[0], + train_labels.shape[1], + ) return (train_result, predict_result, predict_labels) @@ -66,7 +69,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_dense.py b/examples/daal4py/log_reg_dense.py index 663cc9d0ad..44c548e650 100644 --- a/examples/daal4py/log_reg_dense.py +++ b/examples/daal4py/log_reg_dense.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 5 nFeatures = 6 @@ -42,10 +43,9 @@ def main(readcsv=read_csv, method='defaultDense'): train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train - train_alg = d4p.logistic_regression_training(nClasses=nClasses, - penaltyL1=0.1, - penaltyL2=0.1, - interceptFlag=True) + train_alg = d4p.logistic_regression_training( + nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True + ) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation @@ -53,24 +53,23 @@ def main(readcsv=read_csv, method='defaultDense'): predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions - predict_alg = \ - d4p.logistic_regression_prediction( - nClasses=nClasses, - resultsToEvaluate="computeClassLabels|computeClassProbabilities|" - "computeClassLogProbabilities" - ) + predict_alg = d4p.logistic_regression_prediction( + nClasses=nClasses, + resultsToEvaluate="computeClassLabels|computeClassProbabilities|" + "computeClassLogProbabilities", + ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt( - testfile, - usecols=range(nFeatures, nFeatures + 1), - delimiter=',', - ndmin=2 + testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=",", ndmin=2 + ) + assert ( + np.count_nonzero(predict_result.prediction - predict_labels) + / predict_labels.shape[0] + < 0.025 ) - assert np.count_nonzero(predict_result.prediction - predict_labels) \ - / predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels) @@ -80,15 +79,15 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) print( "\nLogistic regression prediction probabilities (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print( "\nLogistic regression prediction log probabilities (first 10 rows):\n", - predict_result.logProbabilities[0:10] + predict_result.logProbabilities[0:10], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_model_builder.py b/examples/daal4py/log_reg_model_builder.py index fee1386974..642be4d040 100644 --- a/examples/daal4py/log_reg_model_builder.py +++ b/examples/daal4py/log_reg_model_builder.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== import sys print("KNOWN BUG IN EXAMPLES. TODO: fixme") @@ -33,15 +33,15 @@ def main(): # set parameters and train clf = LogisticRegression(fit_intercept=True, max_iter=1000, random_state=0).fit(X, y) - #set parameters and call model builder - builder = d4p.logistic_regression_model_builder(n_classes=n_classes, - n_features=X.shape[1]) + # set parameters and call model builder + builder = d4p.logistic_regression_model_builder( + n_classes=n_classes, n_features=X.shape[1] + ) builder.set_beta(clf.coef_, clf.intercept_) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction( - nClasses=n_classes, - resultsToEvaluate="computeClassLabels" + nClasses=n_classes, resultsToEvaluate="computeClassLabels" ) # set parameters and compute predictions predict_result_daal = predict_alg.compute(X, builder.model) @@ -51,19 +51,19 @@ def main(): if __name__ == "__main__": - if daal_check_version(((2021, 'P', 1))): + if daal_check_version(((2021, "P", 1))): (builder, predict_result_daal) = main() print("\nLogistic Regression coefficients:\n", builder.model) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result_daal.prediction[0:10] + predict_result_daal.prediction[0:10], ) print( "\nLogistic regression prediction probabilities (first 10 rows):\n", - predict_result_daal.probabilities[0:10] + predict_result_daal.probabilities[0:10], ) print( "\nLogistic regression prediction log probabilities (first 10 rows):\n", - predict_result_daal.logProbabilities[0:10] + predict_result_daal.logProbabilities[0:10], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/logitboost.py b/examples/daal4py/logitboost.py index 39062c47ca..cd038ac44e 100644 --- a/examples/daal4py/logitboost.py +++ b/examples/daal4py/logitboost.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Logitboost example for shared memory systems @@ -25,21 +25,23 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/logitboost_train.csv" testfile = "./data/batch/logitboost_test.csv" nClasses = 5 # Configure a logitboost training object - train_algo = d4p.logitboost_training(nClasses, maxIterations=100, - accuracyThreshold=0.01) + train_algo = d4p.logitboost_training( + nClasses, maxIterations=100, accuracyThreshold=0.01 + ) # Read data. Let's have 20 independent, # and 1 dependent variable (for each observation) @@ -57,7 +59,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -67,6 +69,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Logitboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/low_order_moms_dense.py b/examples/daal4py/low_order_moms_dense.py index 09def55dbd..6846fe204c 100644 --- a/examples/daal4py/low_order_moms_dense.py +++ b/examples/daal4py/low_order_moms_dense.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for shared memory systems @@ -25,11 +25,12 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) def main(readcsv=read_csv, method="defaultDense"): @@ -43,9 +44,21 @@ def main(readcsv=read_csv, method="defaultDense"): # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - assert all(getattr(res, name).shape == (1, data.shape[1]) for name in - ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']) + assert all( + getattr(res, name).shape == (1, data.shape[1]) + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ] + ) return res @@ -63,4 +76,4 @@ def main(readcsv=read_csv, method="defaultDense"): print("\nVariance:\n", res.variance) print("\nStandard deviation:\n", res.standardDeviation) print("\nVariation:\n", res.variation) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/low_order_moms_spmd.py b/examples/daal4py/low_order_moms_spmd.py index c0bd233326..58d3f1b505 100644 --- a/examples/daal4py/low_order_moms_spmd.py +++ b/examples/daal4py/low_order_moms_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for distributed memory systems; SPMD mode # run like this: @@ -29,24 +29,36 @@ def main(): # We know the number of lines in the file # and use this to separate data between processes - skiprows, nrows = get_chunk_params(lines_count=200, - chunks_count=d4p.num_procs(), - chunk_number=d4p.my_procid()) + skiprows, nrows = get_chunk_params( + lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid() + ) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode - alg = d4p.low_order_moments(method='defaultDense', distributed=True) + alg = d4p.low_order_moments(method="defaultDense", distributed=True) # Perform computation res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - assert all(getattr(res, name).shape == (1, data.shape[1]) for name in - ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']) + assert all( + getattr(res, name).shape == (1, data.shape[1]) + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ] + ) return res diff --git a/examples/daal4py/multivariate_outlier.py b/examples/daal4py/multivariate_outlier.py index b7ef7f1439..9b6d139c0f 100644 --- a/examples/daal4py/multivariate_outlier.py +++ b/examples/daal4py/multivariate_outlier.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection multivariate example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -56,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (Default method) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/naive_bayes.py b/examples/daal4py/naive_bayes.py index 80b5dc21ce..034bf0e29f 100644 --- a/examples/daal4py/naive_bayes.py +++ b/examples/daal4py/naive_bayes.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Naive Bayes Classification example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/naivebayes_train_dense.csv" testfile = "./data/batch/naivebayes_test_dense.csv" @@ -63,7 +64,7 @@ def main(readcsv=read_csv, method='defaultDense'): (presult, plabels) = main() print( "\nNaiveBayes classification results (first 20 observations):\n", - presult.prediction[0:20] + presult.prediction[0:20], ) print("\nGround truth (first 20 observations)\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/normalization_minmax.py b/examples/daal4py/normalization_minmax.py index 64fa94cd9e..41bb24ef2f 100644 --- a/examples/daal4py/normalization_minmax.py +++ b/examples/daal4py/normalization_minmax.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normalization minmax example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/normalization.csv" # configure a covariance object @@ -54,4 +55,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("MinMax result (first 5 rows):\n", res.normalizedData[:5]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/normalization_zscore.py b/examples/daal4py/normalization_zscore.py index 1bc693cb60..51bd3fa493 100644 --- a/examples/daal4py/normalization_zscore.py +++ b/examples/daal4py/normalization_zscore.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normalization zscore example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/normalization.csv" # configure a covariance object @@ -57,4 +58,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("ZScore result (first 5 rows):\n", res.normalizedData[:5]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pca.py b/examples/daal4py/pca.py index 67fa89102b..008ce0cee8 100644 --- a/examples/daal4py/pca.py +++ b/examples/daal4py/pca.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems @@ -25,22 +25,26 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/pca_normalized.csv" # 'normalization' is an optional parameter to PCA; # we use z-score which could be configured differently zscore = d4p.normalization_zscore() # configure a PCA object - algo = d4p.pca(resultsToCompute="mean|variance|eigenvalue", - isDeterministic=True, normalization=zscore) + algo = d4p.pca( + resultsToCompute="mean|variance|eigenvalue", + isDeterministic=True, + normalization=zscore, + ) # let's provide a file directly, not a table/array result1 = algo.compute(infile) @@ -68,4 +72,4 @@ def main(readcsv=read_csv, method='svdDense'): print("\nEigenvectors:\n", result1.eigenvectors) print("\nMeans:\n", result1.means) print("\nVariances:\n", result1.variances) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pca_transform.py b/examples/daal4py/pca_transform.py index 1937b56067..8bf250bb6a 100644 --- a/examples/daal4py/pca_transform.py +++ b/examples/daal4py/pca_transform.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): dataFileName = "data/batch/pca_transform.csv" nComponents = 2 @@ -45,8 +46,9 @@ def main(readcsv=read_csv, method='svdDense'): # Apply transform with whitening because means and eigenvalues are provided pcatrans_algo = d4p.pca_transform(nComponents=nComponents) - pcatrans_res = pcatrans_algo.compute(data, pca_res.eigenvectors, - pca_res.dataForTransform) + pcatrans_res = pcatrans_algo.compute( + data, pca_res.eigenvectors, pca_res.dataForTransform + ) # pca_transform_result objects provides transformedData return (pca_res, pcatrans_res) @@ -58,9 +60,9 @@ def main(readcsv=read_csv, method='svdDense'): # print PCA results print("\nEigenvalues:\n", pca_res.eigenvalues) print("\nEigenvectors:\n", pca_res.eigenvectors) - print("\nEigenvalues kv:\n", pca_res.dataForTransform['eigenvalue']) - print("\nMeans kv:\n", pca_res.dataForTransform['mean']) - print("\nVariances kv:\n", pca_res.dataForTransform['variance']) + print("\nEigenvalues kv:\n", pca_res.dataForTransform["eigenvalue"]) + print("\nMeans kv:\n", pca_res.dataForTransform["mean"]) + print("\nVariances kv:\n", pca_res.dataForTransform["variance"]) # print results of tranform print("\nTransformed data:", pcatrans_res.transformedData) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pivoted_qr.py b/examples/daal4py/pivoted_qr.py index a890c62172..071d465253 100644 --- a/examples/daal4py/pivoted_qr.py +++ b/examples/daal4py/pivoted_qr.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py pivoted QR example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/qr.csv" # configure a pivoted QR object @@ -54,4 +55,4 @@ def main(readcsv=read_csv, method='svdDense'): print("Orthogonal matrix Q (:10):\n", result.matrixQ[:10]) print("Triangular matrix R:\n", result.matrixR) print("\nPermutation matrix P:\n", result.permutationMatrix) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/qr.py b/examples/daal4py/qr.py index b5ed6d852a..b14a2ea437 100644 --- a/examples/daal4py/qr.py +++ b/examples/daal4py/qr.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py QR example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/qr.csv" # configure a QR object @@ -52,7 +53,7 @@ def main(readcsv=read_csv, method='svdDense'): assert np.allclose(result1.matrixQ, result2.matrixQ, atol=1e-07) assert np.allclose(result1.matrixR, result2.matrixR, atol=1e-07) - if hasattr(data, 'toarray'): + if hasattr(data, "toarray"): data = data.toarray() # to make the next assertion work with scipy's csr_matrix assert np.allclose(data, np.matmul(result1.matrixQ, result1.matrixR)) @@ -62,4 +63,4 @@ def main(readcsv=read_csv, method='svdDense'): if __name__ == "__main__": (_, result) = main() print(result) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/quantiles.py b/examples/daal4py/quantiles.py index f2d32a11aa..814a78dfb6 100644 --- a/examples/daal4py/quantiles.py +++ b/examples/daal4py/quantiles.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py quantiles example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/quantiles.csv" # configure a quantiles object @@ -54,4 +55,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("Quantiles:\n", result.quantiles) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/ridge_regression.py b/examples/daal4py/ridge_regression.py index 34527d0889..84268683b3 100644 --- a/examples/daal4py/ridge_regression.py +++ b/examples/daal4py/ridge_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Ridge Regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -64,7 +65,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nRidge Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/saga.py b/examples/daal4py/saga.py index 2398babe48..cc7a0dc4bd 100644 --- a/examples/daal4py/saga.py +++ b/examples/daal4py/saga.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Saga example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/XM.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(1)) @@ -40,22 +41,26 @@ def main(readcsv=read_csv, method='defaultDense'): nVectors = data.shape[0] # configure a Logistic Loss object - logloss_algo = d4p.optimization_solver_logistic_loss(numberOfTerms=nVectors, - penaltyL1=0.3, - penaltyL2=0, - interceptFlag=True, - resultsToCompute='gradient') + logloss_algo = d4p.optimization_solver_logistic_loss( + numberOfTerms=nVectors, + penaltyL1=0.3, + penaltyL2=0, + interceptFlag=True, + resultsToCompute="gradient", + ) logloss_algo.setup(data, dep_data) # configure an Saga object lr = np.array([[0.01]], dtype=np.double) niters = 100000 - saga_algo = d4p.optimization_solver_saga(nIterations=niters, - accuracyThreshold=1e-5, - batchSize=1, - function=logloss_algo, - learningRateSequence=lr, - optionalResultRequired=True) + saga_algo = d4p.optimization_solver_saga( + nIterations=niters, + accuracyThreshold=1e-5, + batchSize=1, + function=logloss_algo, + learningRateSequence=lr, + optionalResultRequired=True, + ) # finally do the computation inp = np.zeros((2, 1), dtype=np.double) @@ -72,4 +77,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sgd_logistic_loss.py b/examples/daal4py/sgd_logistic_loss.py index dd7d9ba7b4..bd98ab895f 100644 --- a/examples/daal4py/sgd_logistic_loss.py +++ b/examples/daal4py/sgd_logistic_loss.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SGD (Stochastic Gradient Descent) example for shared memory systems # using Logisitc Loss objective function @@ -26,14 +26,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/custom.csv" # Read the data, let's have 4 independent variables data = readcsv(infile, range(4)) @@ -47,10 +48,9 @@ def main(readcsv=read_csv, method='defaultDense'): # configure a SGD object lrs = np.array([[0.01]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_sgd(ll_algo, - learningRateSequence=lrs, - accuracyThreshold=0.02, - nIterations=niters) + sgd_algo = d4p.optimization_solver_sgd( + ll_algo, learningRateSequence=lrs, accuracyThreshold=0.02, nIterations=niters + ) # finally do the computation inp = np.array([[1], [1], [1], [1], [1]], dtype=np.double) @@ -66,4 +66,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sgd_mse.py b/examples/daal4py/sgd_mse.py index a044f54bc9..c0e578abe3 100644 --- a/examples/daal4py/sgd_mse.py +++ b/examples/daal4py/sgd_mse.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SGD (Stochastic Gradient Descent) example for shared memory systems # using Mean Squared Error objective function @@ -26,14 +26,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/mse.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(3)) @@ -47,10 +48,12 @@ def main(readcsv=read_csv, method='defaultDense'): # configure a SGD object lrs = np.array([[1.0]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_sgd(mse_algo, - learningRateSequence=lrs, - accuracyThreshold=0.0000001, - nIterations=niters) + sgd_algo = d4p.optimization_solver_sgd( + mse_algo, + learningRateSequence=lrs, + accuracyThreshold=0.0000001, + nIterations=niters, + ) # finally do the computation inp = np.array([[8], [2], [1], [4]], dtype=np.double) @@ -66,4 +69,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sorting.py b/examples/daal4py/sorting.py index 0eb055fa36..12160cae1d 100644 --- a/examples/daal4py/sorting.py +++ b/examples/daal4py/sorting.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py sorting example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/sorting.csv" # configure a sorting object @@ -49,7 +50,7 @@ def main(readcsv=read_csv, method='defaultDense'): assert np.allclose(result1.sortedData, result2.sortedData) assert np.allclose( result1.sortedData, - np.sort(data.toarray() if hasattr(data, 'toarray') else data, axis=0) + np.sort(data.toarray() if hasattr(data, "toarray") else data, axis=0), ) return result1 @@ -58,4 +59,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("Sorted matrix of observations:\n", result.sortedData) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/stump_classification.py b/examples/daal4py/stump_classification.py index 262046a144..eaca5a514b 100644 --- a/examples/daal4py/stump_classification.py +++ b/examples/daal4py/stump_classification.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Stump classification example for shared memory systems @@ -25,19 +25,20 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/stump_train.csv" testfile = "./data/batch/stump_test.csv" # Configure a stump classification training object - train_algo = d4p.stump_classification_training(splitCriterion='gini') + train_algo = d4p.stump_classification_training(splitCriterion="gini") # Read data. Let's have 20 independent, # and 1 dependent variable (for each observation) @@ -55,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -65,6 +66,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Stump classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/stump_regression.py b/examples/daal4py/stump_regression.py index 042e734ed3..ab8ff1088a 100644 --- a/examples/daal4py/stump_regression.py +++ b/examples/daal4py/stump_regression.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Stump regression example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/stump_train.csv" testfile = "./data/batch/stump_test.csv" @@ -55,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -66,6 +67,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Stump regression results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svd.py b/examples/daal4py/svd.py index 059d71e4e8..002d895787 100644 --- a/examples/daal4py/svd.py +++ b/examples/daal4py/svd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVD example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/svd.csv" # configure a SVD object @@ -48,23 +49,23 @@ def main(readcsv=read_csv, method='defaultDense'): # SVD result objects provide leftSingularMatrix, # rightSingularMatrix and singularValues - assert np.allclose(result1.leftSingularMatrix, - result2.leftSingularMatrix, atol=1e-07) - assert np.allclose(result1.rightSingularMatrix, - result2.rightSingularMatrix, atol=1e-07) + assert np.allclose(result1.leftSingularMatrix, result2.leftSingularMatrix, atol=1e-07) + assert np.allclose( + result1.rightSingularMatrix, result2.rightSingularMatrix, atol=1e-07 + ) assert np.allclose(result1.singularValues, result2.singularValues, atol=1e-07) assert result1.singularValues.shape == (1, data.shape[1]) assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1]) assert result1.leftSingularMatrix.shape == data.shape - if hasattr(data, 'toarray'): + if hasattr(data, "toarray"): data = data.toarray() # to make the next assertion work with scipy's csr_matrix assert np.allclose( data, np.matmul( np.matmul(result1.leftSingularMatrix, np.diag(result1.singularValues[0])), - result1.rightSingularMatrix - ) + result1.rightSingularMatrix, + ), ) return (data, result1) @@ -73,4 +74,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": (_, result) = main() print(result) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svm.py b/examples/daal4py/svm.py index 023612dbf1..38e35e8677 100644 --- a/examples/daal4py/svm.py +++ b/examples/daal4py/svm.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVM example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/svm_two_class_train_dense.csv" testfile = "./data/batch/svm_two_class_test_dense.csv" @@ -40,7 +41,7 @@ def main(readcsv=read_csv, method='defaultDense'): # Configure a SVM object to use rbf kernel (and adjusting cachesize) kern = d4p.kernel_function_linear() # need an object that lives when creating train_algo - train_algo = d4p.svm_training(method='thunder', kernel=kern, cacheSize=600000000) + train_algo = d4p.svm_training(method="thunder", kernel=kern, cacheSize=600000000) # Read data. Let's use features per observation data = readcsv(infile, range(20)) @@ -70,8 +71,8 @@ def main(readcsv=read_csv, method='defaultDense'): print( "\nSVM classification decision function (first 20 observations):\n", - decision_function[0:20] + decision_function[0:20], ) print("\nSVM classification results (first 20 observations):\n", predict_labels[0:20]) print("\nGround truth (first 20 observations):\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svm_multiclass.py b/examples/daal4py/svm_multiclass.py index 3b6e07dfe9..f56c814f9d 100644 --- a/examples/daal4py/svm_multiclass.py +++ b/examples/daal4py/svm_multiclass.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py multi-class SVM example for shared memory systems @@ -25,28 +25,29 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 20 nClasses = 5 # read training data from file # with nFeatures features per observation and 1 class label - train_file = 'data/batch/svm_multi_class_train_dense.csv' + train_file = "data/batch/svm_multi_class_train_dense.csv" train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create and configure algorithm object algorithm = d4p.multi_class_classifier_training( nClasses=nClasses, - training=d4p.svm_training(method='thunder'), - prediction=d4p.svm_prediction() + training=d4p.svm_training(method="thunder"), + prediction=d4p.svm_prediction(), ) # Pass data to training. Training result provides model @@ -56,15 +57,15 @@ def main(readcsv=read_csv, method='defaultDense'): # Now the prediction stage # Read data - pred_file = 'data/batch/svm_multi_class_test_dense.csv' + pred_file = "data/batch/svm_multi_class_test_dense.csv" pred_data = readcsv(pred_file, range(nFeatures)) pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object to predict multi-class SVM values algorithm = d4p.multi_class_classifier_prediction( nClasses, - training=d4p.svm_training(method='thunder'), - prediction=d4p.svm_prediction() + training=d4p.svm_training(method="thunder"), + prediction=d4p.svm_prediction(), ) # Pass data to prediction. Prediction result provides prediction pred_result = algorithm.compute(pred_data, train_result.model) @@ -77,7 +78,7 @@ def main(readcsv=read_csv, method='defaultDense'): (pred_res, pred_labels) = main() print( "\nSVM classification results (first 20 observations):\n", - pred_res.prediction[0:20] + pred_res.prediction[0:20], ) print("\nGround truth (first 20 observations):\n", pred_labels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/univariate_outlier.py b/examples/daal4py/univariate_outlier.py index 9e71fffa57..886e3268a8 100644 --- a/examples/daal4py/univariate_outlier.py +++ b/examples/daal4py/univariate_outlier.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection univariate example for shared memory systems @@ -25,14 +25,15 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -56,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (univariate) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/sklearnex/n_jobs.py b/examples/sklearnex/n_jobs.py index 0d6ef515ec..df999792db 100644 --- a/examples/sklearnex/n_jobs.py +++ b/examples/sklearnex/n_jobs.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # sklearnex doesn't have interface for threading configuration and not following # scikit-learn n_jobs yet. Thus it's requered to use daal4py package to set this. diff --git a/examples/sklearnex/patch_sklearn.py b/examples/sklearnex/patch_sklearn.py index 5b46b366e4..a0261e1b6d 100644 --- a/examples/sklearnex/patch_sklearn.py +++ b/examples/sklearnex/patch_sklearn.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # Calling scikit-learn patch - this would enable acceleration on all # enabled algorithms. This is most straight forward way of patching @@ -38,10 +38,10 @@ # Direct import of functions in way aligned with scikit-learn from sklearnex.neighbors import NearestNeighbors -#Patching can be enabled for selected algorithms/estimators only +# Patching can be enabled for selected algorithms/estimators only patch_sklearn(["DBSCAN"]) -#Remaining non modified scikit-learn codes +# Remaining non modified scikit-learn codes from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler diff --git a/examples/sklearnex/verbose_mode.py b/examples/sklearnex/verbose_mode.py index 6d3bf6c828..7716e32e14 100644 --- a/examples/sklearnex/verbose_mode.py +++ b/examples/sklearnex/verbose_mode.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # sklearnex can help you debug your aplications by printing messages on it's invocation # to allow you to see if stock of accelerated version was used. @@ -23,7 +23,7 @@ sys.exit() import logging -logging.getLogger('sklearnex').setLevel(logging.INFO) +logging.getLogger("sklearnex").setLevel(logging.INFO) # Calling scikit-learn patch - this would enable acceleration on all enabled algorithms from sklearnex import patch_sklearn diff --git a/examples/utils/spmd_utils.py b/examples/utils/spmd_utils.py index 26c7de0dff..a5bb96e889 100644 --- a/examples/utils/spmd_utils.py +++ b/examples/utils/spmd_utils.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # Example showing reading of file in few chunks, this reader is used in SPMD examples @@ -23,20 +23,23 @@ import pandas def read_csv(f, c=None, sr=0, nr=None, t=np.float64): - return pandas.read_csv(f, usecols=c, skiprows=sr, nrows=nr, - delimiter=',', header=None, dtype=t) + return pandas.read_csv( + f, usecols=c, skiprows=sr, nrows=nr, delimiter=",", header=None, dtype=t + ) + except: # fall back to numpy loadtxt def read_csv(f, c=None, sr=0, nr=np.iinfo(np.int64).max, t=np.float64): - res = np.genfromtxt(f, usecols=c, delimiter=',', - skip_header=sr, max_rows=nr, dtype=t) + res = np.genfromtxt( + f, usecols=c, delimiter=",", skip_header=sr, max_rows=nr, dtype=t + ) if res.ndim == 1: return res[:, np.newaxis] return res def get_chunk_params(lines_count, chunks_count, chunk_number): - 'returns count of rows to skip from beginning of file and count of rows to read' + "returns count of rows to skip from beginning of file and count of rows to read" min_nrows = (int)(lines_count / chunks_count) rest_rows = lines_count - min_nrows * chunks_count is_tail = rest_rows > chunk_number diff --git a/tests/run_examples.py b/tests/run_examples.py index d9a8a092ae..329f951aaa 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -188,10 +188,12 @@ def check_library(rule): req_os = defaultdict(lambda: []) -skiped_files = ['log_reg_model_builder.py', - 'n_jobs.py', - 'verbose_mode.py', - 'patch_sklearn.py'] +skiped_files = [ + "log_reg_model_builder.py", + "n_jobs.py", + "verbose_mode.py", + "patch_sklearn.py", +] def get_exe_cmd(ex, nodist, nostream): @@ -220,7 +222,7 @@ def get_exe_cmd(ex, nodist, nostream): return None if not check_library(req_library[os.path.basename(ex)]): return None - if not nodist and ex.endswith('spmd.py'): + if not nodist and ex.endswith("spmd.py"): if IS_WIN: return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"' return 'mpirun -n 4 "' + sys.executable + '" "' + ex + '"' @@ -235,50 +237,58 @@ def run(exdir, logdir, nodist=False, nostream=False): os.makedirs(logdir) for dirpath, dirnames, filenames in os.walk(exdir): for script in filenames: - if script.endswith('.py') and script not in ['__init__.py']: + if script.endswith(".py") and script not in ["__init__.py"]: n += 1 if script in skiped_files: - print(strftime("%H:%M:%S", gmtime()) + '\tKNOWN BUG IN EXAMPLES\t' + script) + print( + strftime("%H:%M:%S", gmtime()) + + "\tKNOWN BUG IN EXAMPLES\t" + + script + ) else: - logfn = jp(logdir, script.replace(".py", ".res")) - with open(logfn, "w") as logfile: - print("\n##### " + jp(dirpath, script)) - execute_string = get_exe_cmd(jp(dirpath, script), nodist, nostream) - if execute_string: - os.chdir(dirpath) - proc = subprocess.Popen( - execute_string - if IS_WIN - else ["/bin/bash", "-c", execute_string], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=False, - ) - out = proc.communicate()[0] - logfile.write(out.decode("ascii")) - if proc.returncode: - print(out) - print( - strftime("%H:%M:%S", gmtime()) + "\tFAILED" - "\t" + script + "\twith errno" - "\t" + str(proc.returncode) - ) - out = proc.communicate()[0] - logfile.write(out.decode('ascii')) - if proc.returncode: - print(out) - print( - strftime("%H:%M:%S", gmtime()) + '\tFAILED' - '\t' + script + '\twith errno' - '\t' + str(proc.returncode) - ) - else: - success += 1 - print(strftime("%H:%M:%S", gmtime()) + '\t' - 'PASSED\t' + script) - else: - success += 1 - print(strftime("%H:%M:%S", gmtime()) + '\tSKIPPED\t' + script) + logfn = jp(logdir, script.replace(".py", ".res")) + with open(logfn, "w") as logfile: + print("\n##### " + jp(dirpath, script)) + execute_string = get_exe_cmd( + jp(dirpath, script), nodist, nostream + ) + if execute_string: + os.chdir(dirpath) + proc = subprocess.Popen( + execute_string + if IS_WIN + else ["/bin/bash", "-c", execute_string], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=False, + ) + out = proc.communicate()[0] + logfile.write(out.decode("ascii")) + if proc.returncode: + print(out) + print( + strftime("%H:%M:%S", gmtime()) + "\tFAILED" + "\t" + script + "\twith errno" + "\t" + str(proc.returncode) + ) + out = proc.communicate()[0] + logfile.write(out.decode("ascii")) + if proc.returncode: + print(out) + print( + strftime("%H:%M:%S", gmtime()) + "\tFAILED" + "\t" + script + "\twith errno" + "\t" + str(proc.returncode) + ) + else: + success += 1 + print( + strftime("%H:%M:%S", gmtime()) + "\t" + "PASSED\t" + script + ) + else: + success += 1 + print(strftime("%H:%M:%S", gmtime()) + "\tSKIPPED\t" + script) return success, n diff --git a/tests/spmd_test_examples.py b/tests/spmd_test_examples.py index 217f2833e2..2f6b5519b4 100644 --- a/tests/spmd_test_examples.py +++ b/tests/spmd_test_examples.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== import os @@ -24,25 +24,26 @@ import numpy as np from test_examples import add_test, np_read_csv - class Base(): + class Base: def test_svd_spmd(self): import svd_spmd as ex + (data, result) = self.call(ex) self.assertTrue( np.allclose( data, np.matmul( np.matmul( - result.leftSingularMatrix, - np.diag(result.singularValues[0]) + result.leftSingularMatrix, np.diag(result.singularValues[0]) ), - result.rightSingularMatrix - ) + result.rightSingularMatrix, + ), ) ) def test_qr_spmd(self): import qr_spmd as ex + (data, result) = self.call(ex) self.assertTrue(np.allclose(data, np.matmul(result.matrixQ, result.matrixR))) @@ -50,60 +51,67 @@ def test_kmeans_spmd(self): nClusters = 10 maxIter = 25 - data = np.loadtxt("./data/distributed/kmeans_dense.csv", delimiter=',') + data = np.loadtxt("./data/distributed/kmeans_dense.csv", delimiter=",") rpp = int(data.shape[0] / d4p.num_procs()) - spmd_data = data[rpp * d4p.my_procid():rpp * d4p.my_procid() + rpp, :] - - for init_method in ['plusPlusDense', - 'parallelPlusDense', - 'deterministicDense']: - batch_init_res = d4p.kmeans_init(nClusters=nClusters, - method=init_method).compute(data) - spmd_init_res = d4p.kmeans_init(nClusters=nClusters, - method=init_method, - distributed=True).compute(spmd_data) - - if init_method in ['parallelPlusDense']: - print("Warning: It is well known " - "that results of parallelPlusDense init " - "does not match with batch algorithm") + spmd_data = data[rpp * d4p.my_procid() : rpp * d4p.my_procid() + rpp, :] + + for init_method in [ + "plusPlusDense", + "parallelPlusDense", + "deterministicDense", + ]: + batch_init_res = d4p.kmeans_init( + nClusters=nClusters, method=init_method + ).compute(data) + spmd_init_res = d4p.kmeans_init( + nClusters=nClusters, method=init_method, distributed=True + ).compute(spmd_data) + + if init_method in ["parallelPlusDense"]: + print( + "Warning: It is well known " + "that results of parallelPlusDense init " + "does not match with batch algorithm" + ) else: reason = "Initial centroids with " + init_method reason += " does not match with batch algorithm" self.assertTrue( np.allclose(batch_init_res.centroids, spmd_init_res.centroids), - reason + reason, ) batch_res = d4p.kmeans( - nClusters=nClusters, - maxIterations=maxIter).compute(data, batch_init_res.centroids) + nClusters=nClusters, maxIterations=maxIter + ).compute(data, batch_init_res.centroids) spmd_res = d4p.kmeans( - nClusters=nClusters, - maxIterations=maxIter, - distributed=True).compute(spmd_data, spmd_init_res.centroids) - - if init_method in ['parallelPlusDense']: - print("Warning: It is well known " - "that results of parallelPlusDense init " - "does not match with batch algorithm") + nClusters=nClusters, maxIterations=maxIter, distributed=True + ).compute(spmd_data, spmd_init_res.centroids) + + if init_method in ["parallelPlusDense"]: + print( + "Warning: It is well known " + "that results of parallelPlusDense init " + "does not match with batch algorithm" + ) else: reason = "Final centroids with " + init_method reason += " does not match with batch algorithm" self.assertTrue( - np.allclose(batch_res.centroids, spmd_res.centroids), - reason + np.allclose(batch_res.centroids, spmd_res.centroids), reason ) def test_dbscan_spmd(self): epsilon = 0.04 minObservations = 45 - data = np_read_csv(os.path.join(".", 'data', 'batch', 'dbscan_dense.csv')) + data = np_read_csv(os.path.join(".", "data", "batch", "dbscan_dense.csv")) - batch_algo = d4p.dbscan(minObservations=minObservations, - epsilon=epsilon, - resultsToCompute='computeCoreIndices') + batch_algo = d4p.dbscan( + minObservations=minObservations, + epsilon=epsilon, + resultsToCompute="computeCoreIndices", + ) batch_result = batch_algo.compute(data) rpp = int(data.shape[0] / d4p.num_procs()) @@ -111,8 +119,9 @@ def test_dbscan_spmd(self): node_range = range(node_stride, node_stride + rpp) node_data = data[node_range, :] - spmd_algo = d4p.dbscan(minObservations=minObservations, - epsilon=epsilon, distributed=True) + spmd_algo = d4p.dbscan( + minObservations=minObservations, epsilon=epsilon, distributed=True + ) spmd_result = spmd_algo.compute(node_data) # clusters can get different indexes in batch and spmd algos, @@ -126,16 +135,28 @@ def test_dbscan_spmd(self): if not batch_result.assignments[i][0] in cluster_index_dict: cluster_index_dict[batch_result.assignments[i][0]] = right left = cluster_index_dict[batch_result.assignments[i][0]] - self.assertTrue( - left == right - ) + self.assertTrue(left == right) gen_examples = [ - ('covariance_spmd', 'covariance.csv', 'covariance'), - ('low_order_moms_spmd', 'low_order_moms_dense.csv', - lambda r: np.vstack((r.minimum, r.maximum, r.sum, r.sumSquares, - r.sumSquaresCentered, r.mean, r.secondOrderRawMoment, - r.variance, r.standardDeviation, r.variation))), + ("covariance_spmd", "covariance.csv", "covariance"), + ( + "low_order_moms_spmd", + "low_order_moms_dense.csv", + lambda r: np.vstack( + ( + r.minimum, + r.maximum, + r.sum, + r.sumSquares, + r.sumSquaresCentered, + r.mean, + r.secondOrderRawMoment, + r.variance, + r.standardDeviation, + r.variation, + ) + ), + ), ] for example in gen_examples: @@ -153,5 +174,5 @@ def tearDownClass(cls): def call(self, ex): return ex.main() - if __name__ == '__main__': + if __name__ == "__main__": unittest.main() diff --git a/tests/test_examples.py b/tests/test_examples.py index c1a2d98c2f..1a395e8f16 100755 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== import os import sys @@ -35,7 +35,7 @@ # second is minor+patch - 0110, # third item is status - B daal_version = get_daal_version() -print('DAAL version:', daal_version) +print("DAAL version:", daal_version) def check_version(rule, target): @@ -54,7 +54,7 @@ def check_version(rule, target): def check_libraries(rule): for rule_item in rule: try: - __import__(rule_item, fromlist=['']) + __import__(rule_item, fromlist=[""]) except ImportError: return False return True @@ -63,8 +63,8 @@ def check_libraries(rule): # function reading file and returning numpy array def np_read_csv(f, c=None, s=0, n=np.iinfo(np.int64).max, t=np.float64): if s == 0 and n == np.iinfo(np.int64).max: - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n, dtype=t) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -74,8 +74,9 @@ def np_read_csv(f, c=None, s=0, n=np.iinfo(np.int64).max, t=np.float64): # function reading file and returning pandas DataFrame def pd_read_csv(f, c=None, s=0, n=None, t=np.float64): - return pd.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pd.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) # function reading file and returning scipy.sparse.csr_matrix @@ -88,11 +89,10 @@ def add_test(cls, e, f=None, attr=None, ver=(0, 0), req_libs=[]): @unittest.skipUnless( check_version(ver, daal_version), - str(ver) + " not supported in this library version " + str(daal_version) + str(ver) + " not supported in this library version " + str(daal_version), ) @unittest.skipUnless( - check_libraries(req_libs), - "cannot import required libraries " + str(req_libs) + check_libraries(req_libs), "cannot import required libraries " + str(req_libs) ) def testit(self): ex = importlib.import_module(e) @@ -102,57 +102,73 @@ def testit(self): actual = attr(result) if callable(attr) else getattr(result, attr) self.assertTrue( np.allclose(actual, testdata, atol=1e-05), - msg="Discrepancy found: {}".format(np.abs(actual - testdata).max()) + msg="Discrepancy found: {}".format(np.abs(actual - testdata).max()), ) else: self.assertTrue(True) - setattr(cls, 'test_' + e, testit) + + setattr(cls, "test_" + e, testit) -class Base(): +class Base: """ We also use generic functions to test these, they get added later. """ def test_svd(self): import svd as ex + (data, result) = self.call(ex) - self.assertTrue(np.allclose(data, - np.matmul( - np.matmul(result.leftSingularMatrix, - np.diag(result.singularValues[0])), - result.rightSingularMatrix) - ) - ) + self.assertTrue( + np.allclose( + data, + np.matmul( + np.matmul( + result.leftSingularMatrix, np.diag(result.singularValues[0]) + ), + result.rightSingularMatrix, + ), + ) + ) def test_svd_stream(self): import svd_streaming as ex + result = self.call(ex) - data = np.loadtxt("./data/distributed/svd_1.csv", delimiter=',') + data = np.loadtxt("./data/distributed/svd_1.csv", delimiter=",") for f in ["./data/distributed/svd_{}.csv".format(i) for i in range(2, 5)]: - data = np.append(data, np.loadtxt(f, delimiter=','), axis=0) - self.assertTrue(np.allclose(data, - np.matmul(np.matmul(result.leftSingularMatrix, - np.diag(result.singularValues[0])), - result.rightSingularMatrix))) + data = np.append(data, np.loadtxt(f, delimiter=","), axis=0) + self.assertTrue( + np.allclose( + data, + np.matmul( + np.matmul( + result.leftSingularMatrix, np.diag(result.singularValues[0]) + ), + result.rightSingularMatrix, + ), + ) + ) def test_qr(self): import qr as ex + (data, result) = self.call(ex) self.assertTrue(np.allclose(data, np.matmul(result.matrixQ, result.matrixR))) def test_qr_stream(self): import qr_streaming as ex + result = self.call(ex) - data = np.loadtxt("./data/distributed/qr_1.csv", delimiter=',') + data = np.loadtxt("./data/distributed/qr_1.csv", delimiter=",") for f in ["./data/distributed/qr_{}.csv".format(i) for i in range(2, 5)]: - data = np.append(data, np.loadtxt(f, delimiter=','), axis=0) + data = np.append(data, np.loadtxt(f, delimiter=","), axis=0) self.assertTrue(np.allclose(data, np.matmul(result.matrixQ, result.matrixR))) def test_svm(self): - testdata = np_read_csv(os.path.join(unittest_data_path, "svm.csv"), - range(1)) + testdata = np_read_csv(os.path.join(unittest_data_path, "svm.csv"), range(1)) import svm as ex + (decision_result, _, _) = self.call(ex) left = np.absolute(decision_result - testdata).max() right = np.absolute(decision_result.max() - decision_result.min()) * 0.05 @@ -160,103 +176,174 @@ def test_svm(self): gen_examples = [ - ('adaboost', None, None, (2020, 'P', 0)), - ('adagrad_mse', 'adagrad_mse.csv', 'minimum'), - ('association_rules', 'association_rules.csv', 'confidence'), - ('bacon_outlier', 'multivariate_outlier.csv', lambda r: r[1].weights), - ('brownboost', None, None, (2020, 'P', 0)), - ('correlation_distance', 'correlation_distance.csv', - lambda r: [[np.amin(r.correlationDistance)], - [np.amax(r.correlationDistance)], - [np.mean(r.correlationDistance)], - [np.average(r.correlationDistance)]]), - ('cosine_distance', 'cosine_distance.csv', - lambda r: [[np.amin(r.cosineDistance)], - [np.amax(r.cosineDistance)], - [np.mean(r.cosineDistance)], - [np.average(r.cosineDistance)]]), + ("adaboost", None, None, (2020, "P", 0)), + ("adagrad_mse", "adagrad_mse.csv", "minimum"), + ("association_rules", "association_rules.csv", "confidence"), + ("bacon_outlier", "multivariate_outlier.csv", lambda r: r[1].weights), + ("brownboost", None, None, (2020, "P", 0)), + ( + "correlation_distance", + "correlation_distance.csv", + lambda r: [ + [np.amin(r.correlationDistance)], + [np.amax(r.correlationDistance)], + [np.mean(r.correlationDistance)], + [np.average(r.correlationDistance)], + ], + ), + ( + "cosine_distance", + "cosine_distance.csv", + lambda r: [ + [np.amin(r.cosineDistance)], + [np.amax(r.cosineDistance)], + [np.mean(r.cosineDistance)], + [np.average(r.cosineDistance)], + ], + ), # ('gradient_boosted_regression', 'gradient_boosted_regression.csv', # lambda x: x[1].prediction), - ('cholesky', 'cholesky.csv', 'choleskyFactor'), - ('covariance', 'covariance.csv', 'covariance'), - ('covariance_streaming', 'covariance.csv', 'covariance'), - ('decision_forest_classification_default_dense', - None, lambda r: r[1].prediction, (2023, 'P', 1)), - ('decision_forest_classification_hist', - None, lambda r: r[1].prediction, (2023, 'P', 1)), - ('decision_forest_regression_default_dense', - 'decision_forest_regression.csv', lambda r: r[1].prediction, (2023, 'P', 1)), - ('decision_forest_regression_hist', - 'decision_forest_regression.csv', lambda r: r[1].prediction, (2023, 'P', 1)), - ('decision_forest_regression_default_dense', - 'decision_forest_regression_20230101.csv', - lambda r: r[1].prediction, (2023, 'P', 101)), - ('decision_forest_regression_hist', - 'decision_forest_regression_20230101.csv', - lambda r: r[1].prediction, (2023, 'P', 101)), - ('decision_tree_classification', 'decision_tree_classification.csv', - lambda r: r[1].prediction), - ('decision_tree_regression', 'decision_tree_regression.csv', - lambda r: r[1].prediction), - ('distributions_bernoulli',), - ('distributions_normal',), - ('distributions_uniform',), - ('em_gmm', 'em_gmm.csv', lambda r: r.covariances[0]), - ('model_builders_lightgbm', None, None, - ((2020, 'P', 2), (2021, 'B', 109)), ['lightgbm']), - ('model_builders_xgboost', None, None, - ((2020, 'P', 2), (2021, 'B', 109)), ['xgboost']), - ('model_builders_catboost', None, None, - (2021, 'P', 4), ['catboost']), - ('gradient_boosted_classification',), - ('gradient_boosted_regression',), - ('implicit_als', 'implicit_als.csv', 'prediction'), - ('kdtree_knn_classification', None, None), - ('kmeans', 'kmeans.csv', 'centroids'), - ('lbfgs_cr_entr_loss', 'lbfgs_cr_entr_loss.csv', 'minimum'), - ('lbfgs_mse', 'lbfgs_mse.csv', 'minimum'), - ('linear_regression', 'linear_regression.csv', - lambda r: r[1].prediction), - ('linear_regression_streaming', 'linear_regression.csv', - lambda r: r[1].prediction), + ("cholesky", "cholesky.csv", "choleskyFactor"), + ("covariance", "covariance.csv", "covariance"), + ("covariance_streaming", "covariance.csv", "covariance"), + ( + "decision_forest_classification_default_dense", + None, + lambda r: r[1].prediction, + (2023, "P", 1), + ), + ( + "decision_forest_classification_hist", + None, + lambda r: r[1].prediction, + (2023, "P", 1), + ), + ( + "decision_forest_regression_default_dense", + "decision_forest_regression.csv", + lambda r: r[1].prediction, + (2023, "P", 1), + ), + ( + "decision_forest_regression_hist", + "decision_forest_regression.csv", + lambda r: r[1].prediction, + (2023, "P", 1), + ), + ( + "decision_forest_regression_default_dense", + "decision_forest_regression_20230101.csv", + lambda r: r[1].prediction, + (2023, "P", 101), + ), + ( + "decision_forest_regression_hist", + "decision_forest_regression_20230101.csv", + lambda r: r[1].prediction, + (2023, "P", 101), + ), + ( + "decision_tree_classification", + "decision_tree_classification.csv", + lambda r: r[1].prediction, + ), + ( + "decision_tree_regression", + "decision_tree_regression.csv", + lambda r: r[1].prediction, + ), + ("distributions_bernoulli",), + ("distributions_normal",), + ("distributions_uniform",), + ("em_gmm", "em_gmm.csv", lambda r: r.covariances[0]), + ( + "model_builders_lightgbm", + None, + None, + ((2020, "P", 2), (2021, "B", 109)), + ["lightgbm"], + ), + ( + "model_builders_xgboost", + None, + None, + ((2020, "P", 2), (2021, "B", 109)), + ["xgboost"], + ), + ("model_builders_catboost", None, None, (2021, "P", 4), ["catboost"]), + ("gradient_boosted_classification",), + ("gradient_boosted_regression",), + ("implicit_als", "implicit_als.csv", "prediction"), + ("kdtree_knn_classification", None, None), + ("kmeans", "kmeans.csv", "centroids"), + ("lbfgs_cr_entr_loss", "lbfgs_cr_entr_loss.csv", "minimum"), + ("lbfgs_mse", "lbfgs_mse.csv", "minimum"), + ("linear_regression", "linear_regression.csv", lambda r: r[1].prediction), + ("linear_regression_streaming", "linear_regression.csv", lambda r: r[1].prediction), # return when Logistic Regression will be fixed # ('log_reg_binary_dense', 'log_reg_binary_dense.csv', # lambda r: r[1].prediction), - ('log_reg_binary_dense', None, None), - ('log_reg_dense',), - ('logitboost', None, None, (2020, 'P', 0)), - ('low_order_moms_dense', 'low_order_moms_dense.csv', - lambda r: np.vstack((r.minimum, r.maximum, r.sum, r.sumSquares, - r.sumSquaresCentered, r.mean, r.secondOrderRawMoment, - r.variance, r.standardDeviation, r.variation))), - ('low_order_moms_streaming', 'low_order_moms_dense.csv', - lambda r: np.vstack((r.minimum, r.maximum, r.sum, r.sumSquares, - r.sumSquaresCentered, r.mean, r.secondOrderRawMoment, - r.variance, r.standardDeviation, r.variation))), - ('multivariate_outlier', 'multivariate_outlier.csv', - lambda r: r[1].weights), - ('naive_bayes', 'naive_bayes.csv', lambda r: r[0].prediction), - ('naive_bayes_streaming', 'naive_bayes.csv', lambda r: r[0].prediction), - ('normalization_minmax', 'normalization_minmax.csv', 'normalizedData'), - ('normalization_zscore', 'normalization_zscore.csv', 'normalizedData'), - ('pca', 'pca.csv', 'eigenvectors'), - ('pca_transform', 'pca_transform.csv', lambda r: r[1].transformedData), - ('pivoted_qr', 'pivoted_qr.csv', 'matrixR'), - ('quantiles', 'quantiles.csv', 'quantiles'), - ('ridge_regression', 'ridge_regression.csv', lambda r: r[0].prediction), - ('ridge_regression_streaming', 'ridge_regression.csv', - lambda r: r[0].prediction), - ('saga', None, None, (2019, 'P', 3)), - ('sgd_logistic_loss', 'sgd_logistic_loss.csv', 'minimum'), - ('sgd_mse', 'sgd_mse.csv', 'minimum'), - ('sorting',), - ('stump_classification', None, None, (2020, 'P', 0)), - ('stump_regression', None, None, (2020, 'P', 0)), - ('svm_multiclass', 'svm_multiclass.csv', lambda r: r[0].prediction), - ('univariate_outlier', 'univariate_outlier.csv', lambda r: r[1].weights), - ('dbscan', 'dbscan.csv', 'assignments', (2019, 'P', 5)), - ('lasso_regression', None, None, (2019, 'P', 5)), - ('elastic_net', None, None, ((2020, 'P', 1), (2021, 'B', 105))), + ("log_reg_binary_dense", None, None), + ("log_reg_dense",), + ("logitboost", None, None, (2020, "P", 0)), + ( + "low_order_moms_dense", + "low_order_moms_dense.csv", + lambda r: np.vstack( + ( + r.minimum, + r.maximum, + r.sum, + r.sumSquares, + r.sumSquaresCentered, + r.mean, + r.secondOrderRawMoment, + r.variance, + r.standardDeviation, + r.variation, + ) + ), + ), + ( + "low_order_moms_streaming", + "low_order_moms_dense.csv", + lambda r: np.vstack( + ( + r.minimum, + r.maximum, + r.sum, + r.sumSquares, + r.sumSquaresCentered, + r.mean, + r.secondOrderRawMoment, + r.variance, + r.standardDeviation, + r.variation, + ) + ), + ), + ("multivariate_outlier", "multivariate_outlier.csv", lambda r: r[1].weights), + ("naive_bayes", "naive_bayes.csv", lambda r: r[0].prediction), + ("naive_bayes_streaming", "naive_bayes.csv", lambda r: r[0].prediction), + ("normalization_minmax", "normalization_minmax.csv", "normalizedData"), + ("normalization_zscore", "normalization_zscore.csv", "normalizedData"), + ("pca", "pca.csv", "eigenvectors"), + ("pca_transform", "pca_transform.csv", lambda r: r[1].transformedData), + ("pivoted_qr", "pivoted_qr.csv", "matrixR"), + ("quantiles", "quantiles.csv", "quantiles"), + ("ridge_regression", "ridge_regression.csv", lambda r: r[0].prediction), + ("ridge_regression_streaming", "ridge_regression.csv", lambda r: r[0].prediction), + ("saga", None, None, (2019, "P", 3)), + ("sgd_logistic_loss", "sgd_logistic_loss.csv", "minimum"), + ("sgd_mse", "sgd_mse.csv", "minimum"), + ("sorting",), + ("stump_classification", None, None, (2020, "P", 0)), + ("stump_regression", None, None, (2020, "P", 0)), + ("svm_multiclass", "svm_multiclass.csv", lambda r: r[0].prediction), + ("univariate_outlier", "univariate_outlier.csv", lambda r: r[1].weights), + ("dbscan", "dbscan.csv", "assignments", (2019, "P", 5)), + ("lasso_regression", None, None, (2019, "P", 5)), + ("elastic_net", None, None, ((2020, "P", 1), (2021, "B", 105))), ] for example in gen_examples: @@ -268,6 +355,7 @@ class TestExNpyArray(Base, unittest.TestCase): We run and validate all the examples but read data with numpy, so working natively on a numpy arrays. """ + def call(self, ex): return ex.main(readcsv=np_read_csv) @@ -277,6 +365,7 @@ class TestExPandasDF(Base, unittest.TestCase): We run and validate all the examples but read data with pandas, so working natively on a pandas DataFrame """ + def call(self, ex): return ex.main(readcsv=pd_read_csv) @@ -287,31 +376,39 @@ class TestExCSRMatrix(Base, unittest.TestCase): We also let algos use CSR method (some algos ignore the method argument since they do not specifically support CSR). """ + def call(self, ex): # some algos do not support CSR matrices - if ex.__name__.startswith('sorting'): + if ex.__name__.startswith("sorting"): self.skipTest("not supporting CSR") - if any(ex.__name__.startswith(x) for x in ['adaboost', - 'brownboost', - 'stump_classification', - 'model_builders', - 'decision_forest']): + if any( + ex.__name__.startswith(x) + for x in [ + "adaboost", + "brownboost", + "stump_classification", + "model_builders", + "decision_forest", + ] + ): self.skipTest("not supporting CSR") - method = \ - 'singlePassCSR' if any(x in ex.__name__ - for x in ['low_order_moms', 'covariance']) \ - else 'fastCSR' + method = ( + "singlePassCSR" + if any(x in ex.__name__ for x in ["low_order_moms", "covariance"]) + else "fastCSR" + ) # cannot use fastCSR ofr implicit als - if 'implicit_als' in ex.__name__: - method = 'defaultDense' + if "implicit_als" in ex.__name__: + method = "defaultDense" # kmeans have no special method for CSR - if 'kmeans' in ex.__name__: - method = 'randomDense' - if hasattr(ex, 'dflt_method'): - method = ex.dflt_method.replace('defaultDense', - 'fastCSR').replace('Dense', 'CSR') + if "kmeans" in ex.__name__: + method = "randomDense" + if hasattr(ex, "dflt_method"): + method = ex.dflt_method.replace("defaultDense", "fastCSR").replace( + "Dense", "CSR" + ) return ex.main(readcsv=csr_read_csv, method=method) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()