Skip to content

Commit

Permalink
Merge pull request #70 from WenjieDu/dev
Browse files Browse the repository at this point in the history
Add check_path()
  • Loading branch information
WenjieDu authored Jun 28, 2024
2 parents a57f9e6 + e6d536e commit 46910f8
Showing 1 changed file with 73 additions and 27 deletions.
100 changes: 73 additions & 27 deletions tsdb/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,42 @@
from .logging import logger


def check_path(
path: str,
check_exists: bool = False,
) -> str:
"""Check the given path and return the absolute path.
Parameters
----------
path :
The path to be checked.
check_exists :
If True, check if the path exists, and will raise an AssertionError if the path does not exist.
Returns
-------
checked_path:
The absolute path of the given path.
"""
# expand the home dir if the path starts with "~"
if path.startswith("~"):
checked_path = path.replace("~", os.path.expanduser("~"))
else:
checked_path = path

checked_path = os.path.abspath(checked_path)

if check_exists:
assert os.path.exists(
checked_path
), f"The given path {checked_path} does not exists"

return checked_path


def pickle_dump(data: object, path: str) -> Optional[str]:
"""Pickle the given object.
Expand All @@ -31,6 +67,9 @@ def pickle_dump(data: object, path: str) -> Optional[str]:
`path` if succeed else None
"""
# check the given path
path = check_path(path)

try:
with open(path, "wb") as f:
pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
Expand All @@ -55,6 +94,8 @@ def pickle_load(path: str) -> object:
Pickled object.
"""
# check the given path
path = check_path(path, check_exists=True)
try:
with open(path, "rb") as f:
data = pickle.load(f)
Expand All @@ -76,9 +117,8 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:
Errors are ignored if ignore_errors is set.
"""
assert os.path.exists(
path
), f"The given path {path} does not exists. Operation aborted."
# check the given path, no need to check if the path exists because ignore_errors is set
path = check_path(path)

try:
if os.path.isdir(path):
Expand All @@ -98,18 +138,19 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:


def determine_data_home():
# default path
default_path = check_path("~/.pypots/tsdb")

# read data_home from the config file
# data_home may be changed by users, hence not necessarily equal to the default path
config = read_configs()
data_home_path = config.get("path", "data_home")
# replace '~' with the absolute path if existing in the path
data_home_path = data_home_path.replace("~", os.path.expanduser("~"))
data_home_path = check_path(data_home_path)

# old cached dataset dir path used in TSDB v0.2
old_cached_dataset_dir_02 = os.path.join(
os.path.expanduser("~"), ".tsdb_cached_datasets"
)
old_cached_dataset_dir_02 = check_path("~/.tsdb_cached_datasets")
# old cached dataset dir path used in TSDB v0.4
old_cached_dataset_dir_04 = os.path.join(os.path.expanduser("~"), ".tsdb")
old_cached_dataset_dir_04 = check_path("~/.tsdb")

if os.path.exists(old_cached_dataset_dir_02) or os.path.exists(
old_cached_dataset_dir_04
Expand All @@ -127,15 +168,18 @@ def determine_data_home():
# use the path directly, may be in a portable disk
cached_dataset_dir = data_home_path
else:
# use the default path for initialization,
# e.g. `data_home_path` in a portable disk but the disk is not connected
default_path = os.path.join(os.path.expanduser("~"), ".pypots", "tsdb")
cached_dataset_dir = default_path
if os.path.abspath(data_home_path) != os.path.abspath(default_path):
# if the preset data_home path does not exist,
# e.g. `data_home_path` is in a portable disk that is not connected
# then use the default path
if check_path(data_home_path) != check_path(default_path):
logger.warning(
f"️ The preset data_home path '{data_home_path}' doesn't exist. "
f"Using the default path '{default_path}'"
f"️ The preset data_home {data_home_path} doesn't exist. "
f"This may be caused by the portable disk not connected."
)
logger.warning(f"‼️ Using the default path {default_path} for now")

cached_dataset_dir = default_path

return cached_dataset_dir


Expand All @@ -151,30 +195,29 @@ def migrate(old_path: str, new_path: str) -> None:
The new path of the dataset.
"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")
# check both old_path and new_path
old_path = check_path(old_path, check_exists=True)
new_path = check_path(new_path)

# create new_path if not exists
if not os.path.exists(new_path):
# if new_path does not exist, just rename the old_path into it
new_parent_dir = os.path.abspath(os.path.join(new_path, ".."))
if not os.path.exists(new_parent_dir):
os.makedirs(new_parent_dir, exist_ok=True)
os.makedirs(new_path, exist_ok=True)
else:
logger.warning(f"‼️ Note that new_path {new_path} already exists.")

logger.warning(f"‼️ Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)

if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)
shutil.rmtree(old_path, ignore_errors=True)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)
logger.info(f"Successfully migrated {old_path} to {new_path}")
logger.info(f"Purged the old path {old_path}")


def migrate_cache(target_path: str) -> None:
Expand All @@ -186,6 +229,9 @@ def migrate_cache(target_path: str) -> None:
The new path for TSDB to store cached datasets.
"""
# check the target path
target_path = check_path(target_path)

cached_dataset_dir = determine_data_home()
migrate(cached_dataset_dir, target_path)
config_parser = read_configs()
Expand Down

0 comments on commit 46910f8

Please sign in to comment.