Skip to content
This repository has been archived by the owner on Jun 12, 2024. It is now read-only.

add option to use aria2c #20

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Edited by Gavin Gray to use optionally use aria2c
"""Download AIST++ videos from AIST Dance Video Database website."""
import argparse
import multiprocessing
import os
import sys
import urllib.request
import shutil
from functools import partial

SOURCE_URL = 'https://aistdancedb.ongaaccel.jp/v1.0.0/video/10M/'
Expand All @@ -26,7 +29,7 @@
def _download(video_url, download_folder):
save_path = os.path.join(download_folder, os.path.basename(video_url))
urllib.request.urlretrieve(video_url, save_path)

if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Scripts for downloading AIST++ videos.')
Expand All @@ -40,16 +43,36 @@ def _download(video_url, download_folder):
type=int,
default=1,
help='number of threads for multiprocessing.')
parser.add_argument('--aria2c',
action='store_true',
help='use aria2c to download the videos')
aria2c_exists = shutil.which("aria2c") is not None

args = parser.parse_args()
if args.aria2c:
assert aria2c_exists, "aria2c does not appear to be installed"
os.makedirs(args.download_folder, exist_ok=True)

seq_names = urllib.request.urlopen(LIST_URL)
seq_names = [seq_name.strip().decode('utf-8') for seq_name in seq_names]
video_urls = [
os.path.join(SOURCE_URL, seq_name + '.mp4') for seq_name in seq_names]

download_func = partial(_download, download_folder=args.download_folder)
pool = multiprocessing.Pool(processes=args.num_processes)
for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)):
sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls)))
sys.stderr.write('\ndone.\n')
if args.aria2c:
import subprocess
import tempfile
with tempfile.TemporaryDirectory() as tmpdirname:
urlsfile = os.path.join(tmpdirname, "aria-urls.txt")
with open(urlsfile, "w") as f:
f.write("\n".join(video_urls))
subprocess.run(["aria2c",
"-c",
"--dir="+args.download_folder,
"--input-file="+urlsfile,
"--max-concurrent-downloads=%i"%args.num_processes])
else:
download_func = partial(_download, download_folder=args.download_folder)
pool = multiprocessing.Pool(processes=args.num_processes)
for i, _ in enumerate(pool.imap_unordered(download_func, video_urls)):
sys.stderr.write('\rdownloading %d / %d' % (i + 1, len(video_urls)))
sys.stderr.write('\ndone.\n')