Skip to content

Commit

Permalink
Add file lock during downloading (#590)
Browse files Browse the repository at this point in the history
* add file lock during downloading

* Remove comment

* add file lock

---------

Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>
  • Loading branch information
wxicu and Zethson authored May 8, 2024
1 parent 396ca00 commit 345a98f
Showing 1 changed file with 27 additions and 22 deletions.
49 changes: 27 additions & 22 deletions pertpy/data/_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from zipfile import ZipFile

import requests
from filelock import FileLock
from lamin_utils import logger
from rich.progress import Progress

Expand Down Expand Up @@ -37,30 +38,34 @@ def _download( # pragma: no cover
download_to_path = (
f"{output_path}{output_file_name}" if str(output_path).endswith("/") else f"{output_path}/{output_file_name}"
)
if Path(download_to_path).exists():
warning = f"File {download_to_path} already exists!"
if not overwrite:
logger.warning(warning)

Path(output_path).mkdir(parents=True, exist_ok=True)
lock_path = f"{output_path}/{output_file_name}.lock"
with FileLock(lock_path):
if Path(download_to_path).exists() and not overwrite:
logger.warning(f"File {download_to_path} already exists!")
return
else:
logger.warning(f"{warning} Overwriting...")

response = requests.get(url, stream=True)
total = int(response.headers.get("content-length", 0))
temp_file_name = f"{download_to_path}.part"

response = requests.get(url, stream=True)
total = int(response.headers.get("content-length", 0))

with Progress(refresh_per_second=100) as progress:
task = progress.add_task("[red]Downloading...", total=total)
with Path(temp_file_name).open("wb") as file:
for data in response.iter_content(block_size):
file.write(data)
progress.update(task, advance=block_size)
progress.update(task, completed=total, refresh=True)

with Progress(refresh_per_second=100) as progress:
task = progress.add_task("[red]Downloading...", total=total)
Path(output_path).mkdir(parents=True, exist_ok=True)
with Path(download_to_path).open("wb") as file:
for data in response.iter_content(block_size):
file.write(data)
progress.update(task, advance=block_size)
Path(temp_file_name).replace(download_to_path)
logger.warning(f"Downloaded and saved to {download_to_path}")

# force the progress bar to 100% at the end
progress.update(task, completed=total, refresh=True)
if is_zip:
output_path = output_path or tempfile.gettempdir()
with ZipFile(download_to_path, "r") as zip_obj:
zip_obj.extractall(path=output_path)
zip_obj.namelist()

if is_zip:
output_path = output_path or tempfile.gettempdir()
with ZipFile(download_to_path, "r") as zip_obj:
zip_obj.extractall(path=output_path)
zip_obj.namelist()
Path(lock_path).unlink()

0 comments on commit 345a98f

Please sign in to comment.