From 256133a3baa4add1cc883fce897c71b1d5d4c107 Mon Sep 17 00:00:00 2001 From: Wenjie Du Date: Sat, 5 Aug 2023 23:37:36 +0800 Subject: [PATCH] refactor: code refactor and release v0.1; --- MANIFEST.in | 1 + setup.py | 9 ++++++--- {tsdb/tests => tests}/__init__.py | 0 {tsdb/tests => tests}/test_tsdb.py | 0 tsdb/__init__.py | 2 +- tsdb/data_processing.py | 18 ++++++++++-------- 6 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 MANIFEST.in rename {tsdb/tests => tests}/__init__.py (100%) rename {tsdb/tests => tests}/test_tsdb.py (100%) diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1eeef06 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +prune tests diff --git a/setup.py b/setup.py index 4423491..32022aa 100644 --- a/setup.py +++ b/setup.py @@ -22,14 +22,17 @@ "Download": "https://github.com/WenjieDu/TSDB/archive/main.zip", }, keywords=[ - "time-series analysis", + "data mining", "time series", + "time-series analysis", "time-series database", "time-series datasets", - "datasets", "database", + "datasets", "dataset downloading", - "data mining", + "imputation", + "classification", + "forecasting", ], packages=find_packages(exclude=["tests"]), include_package_data=True, diff --git a/tsdb/tests/__init__.py b/tests/__init__.py similarity index 100% rename from tsdb/tests/__init__.py rename to tests/__init__.py diff --git a/tsdb/tests/test_tsdb.py b/tests/test_tsdb.py similarity index 100% rename from tsdb/tests/test_tsdb.py rename to tests/test_tsdb.py diff --git a/tsdb/__init__.py b/tsdb/__init__.py index 2c35d09..2844f92 100644 --- a/tsdb/__init__.py +++ b/tsdb/__init__.py @@ -21,7 +21,7 @@ # # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' -__version__ = "0.0.9" +__version__ = "0.1" try: diff --git a/tsdb/data_processing.py b/tsdb/data_processing.py index 5c8d4ba..bb3ea8f 100644 --- a/tsdb/data_processing.py +++ b/tsdb/data_processing.py @@ -47,7 +47,7 @@ def window_truncate(feature_vectors, seq_len): start_indices = numpy.asarray(range(feature_vectors.shape[0] // seq_len)) * seq_len sample_collector = [] for idx in start_indices: - sample_collector.append(feature_vectors[idx: idx + seq_len]) + sample_collector.append(feature_vectors[idx : idx + seq_len]) return numpy.asarray(sample_collector).astype("float32") @@ -105,7 +105,7 @@ def _download_and_extract(url, saving_path): logger.info(f"Successfully downloaded data to {raw_data_saving_path}.") if ( - suffix in supported_compression_format + suffix in supported_compression_format ): # if the file is compressed, then unpack it try: os.makedirs(saving_path, exist_ok=True) @@ -177,7 +177,7 @@ def delete_cached_data(dataset_name=None): # if CACHED_DATASET_DIR exists, then purge if dataset_name is not None: assert ( - dataset_name in AVAILABLE_DATASETS + dataset_name in AVAILABLE_DATASETS ), f"{dataset_name} is not available in TSDB, so it has no cache. Please check your dataset name." dir_to_delete = os.path.join(CACHED_DATASET_DIR, dataset_name) if not os.path.exists(dir_to_delete): @@ -290,13 +290,15 @@ def load_dataset(dataset_name, use_cache=True): ) profile_dir = dataset_name if "ucr_uea_" not in dataset_name else "ucr_uea_datasets" - logger.info(f"You're using dataset {dataset_name}, please cite it properly in your work.\n" - f"You can find its reference information at the below link: \n" - f"https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/{profile_dir}") + logger.info( + f"You're using dataset {dataset_name}, please cite it properly in your work. " + f"You can find its reference information at the below link: \n" + f"https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/{profile_dir}" + ) dataset_saving_path = os.path.join(CACHED_DATASET_DIR, dataset_name) if not os.path.exists( - dataset_saving_path + dataset_saving_path ): # if the dataset is not cached, then download it download_and_extract(dataset_name, dataset_saving_path) else: @@ -320,7 +322,7 @@ def load_dataset(dataset_name, use_cache=True): try: if dataset_name == "physionet_2012": result = load_physionet2012(dataset_saving_path) - if dataset_name == "physionet_2019": + elif dataset_name == "physionet_2019": result = load_physionet2019(dataset_saving_path) elif dataset_name == "electricity_load_diagrams": result = load_electricity(dataset_saving_path)