From 256133a3baa4add1cc883fce897c71b1d5d4c107 Mon Sep 17 00:00:00 2001
From: Wenjie Du <wenjay.du@gmail.com>
Date: Sat, 5 Aug 2023 23:37:36 +0800
Subject: [PATCH] refactor: code refactor and release v0.1;

---
 MANIFEST.in                        |  1 +
 setup.py                           |  9 ++++++---
 {tsdb/tests => tests}/__init__.py  |  0
 {tsdb/tests => tests}/test_tsdb.py |  0
 tsdb/__init__.py                   |  2 +-
 tsdb/data_processing.py            | 18 ++++++++++--------
 6 files changed, 18 insertions(+), 12 deletions(-)
 create mode 100644 MANIFEST.in
 rename {tsdb/tests => tests}/__init__.py (100%)
 rename {tsdb/tests => tests}/test_tsdb.py (100%)

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..1eeef06
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+prune tests
diff --git a/setup.py b/setup.py
index 4423491..32022aa 100644
--- a/setup.py
+++ b/setup.py
@@ -22,14 +22,17 @@
         "Download": "https://github.com/WenjieDu/TSDB/archive/main.zip",
     },
     keywords=[
-        "time-series analysis",
+        "data mining",
         "time series",
+        "time-series analysis",
         "time-series database",
         "time-series datasets",
-        "datasets",
         "database",
+        "datasets",
         "dataset downloading",
-        "data mining",
+        "imputation",
+        "classification",
+        "forecasting",
     ],
     packages=find_packages(exclude=["tests"]),
     include_package_data=True,
diff --git a/tsdb/tests/__init__.py b/tests/__init__.py
similarity index 100%
rename from tsdb/tests/__init__.py
rename to tests/__init__.py
diff --git a/tsdb/tests/test_tsdb.py b/tests/test_tsdb.py
similarity index 100%
rename from tsdb/tests/test_tsdb.py
rename to tests/test_tsdb.py
diff --git a/tsdb/__init__.py b/tsdb/__init__.py
index 2c35d09..2844f92 100644
--- a/tsdb/__init__.py
+++ b/tsdb/__init__.py
@@ -21,7 +21,7 @@
 #
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
-__version__ = "0.0.9"
+__version__ = "0.1"
 
 
 try:
diff --git a/tsdb/data_processing.py b/tsdb/data_processing.py
index 5c8d4ba..bb3ea8f 100644
--- a/tsdb/data_processing.py
+++ b/tsdb/data_processing.py
@@ -47,7 +47,7 @@ def window_truncate(feature_vectors, seq_len):
     start_indices = numpy.asarray(range(feature_vectors.shape[0] // seq_len)) * seq_len
     sample_collector = []
     for idx in start_indices:
-        sample_collector.append(feature_vectors[idx: idx + seq_len])
+        sample_collector.append(feature_vectors[idx : idx + seq_len])
 
     return numpy.asarray(sample_collector).astype("float32")
 
@@ -105,7 +105,7 @@ def _download_and_extract(url, saving_path):
     logger.info(f"Successfully downloaded data to {raw_data_saving_path}.")
 
     if (
-            suffix in supported_compression_format
+        suffix in supported_compression_format
     ):  # if the file is compressed, then unpack it
         try:
             os.makedirs(saving_path, exist_ok=True)
@@ -177,7 +177,7 @@ def delete_cached_data(dataset_name=None):
     # if CACHED_DATASET_DIR exists, then purge
     if dataset_name is not None:
         assert (
-                dataset_name in AVAILABLE_DATASETS
+            dataset_name in AVAILABLE_DATASETS
         ), f"{dataset_name} is not available in TSDB, so it has no cache. Please check your dataset name."
         dir_to_delete = os.path.join(CACHED_DATASET_DIR, dataset_name)
         if not os.path.exists(dir_to_delete):
@@ -290,13 +290,15 @@ def load_dataset(dataset_name, use_cache=True):
     )
 
     profile_dir = dataset_name if "ucr_uea_" not in dataset_name else "ucr_uea_datasets"
-    logger.info(f"You're using dataset {dataset_name}, please cite it properly in your work.\n"
-                f"You can find its reference information at the below link: \n"
-                f"https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/{profile_dir}")
+    logger.info(
+        f"You're using dataset {dataset_name}, please cite it properly in your work. "
+        f"You can find its reference information at the below link: \n"
+        f"https://github.com/WenjieDu/TSDB/tree/main/dataset_profiles/{profile_dir}"
+    )
 
     dataset_saving_path = os.path.join(CACHED_DATASET_DIR, dataset_name)
     if not os.path.exists(
-            dataset_saving_path
+        dataset_saving_path
     ):  # if the dataset is not cached, then download it
         download_and_extract(dataset_name, dataset_saving_path)
     else:
@@ -320,7 +322,7 @@ def load_dataset(dataset_name, use_cache=True):
         try:
             if dataset_name == "physionet_2012":
                 result = load_physionet2012(dataset_saving_path)
-            if dataset_name == "physionet_2019":
+            elif dataset_name == "physionet_2019":
                 result = load_physionet2019(dataset_saving_path)
             elif dataset_name == "electricity_load_diagrams":
                 result = load_electricity(dataset_saving_path)