diff --git a/audiomate/corpus/io/tatoeba.py b/audiomate/corpus/io/tatoeba.py index 9ec18a7..5fa479c 100644 --- a/audiomate/corpus/io/tatoeba.py +++ b/audiomate/corpus/io/tatoeba.py @@ -8,6 +8,7 @@ from audiomate.utils import download from audiomate.utils import textfile from . import base +import time logger = logutil.getLogger() @@ -137,6 +138,15 @@ def _download_audio_files(self, records, target_path): os.makedirs(audio_folder, exist_ok=True) download_url = 'https://audio.tatoeba.org/sentences/{}/{}.mp3'.format(record[2], record[0]) + while True: + try: + download.download_file(download_url, audio_file) + except ConnectionError as e: + logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) + time.sleep(5) + continue + break + download.download_file(download_url, audio_file) diff --git a/audiomate/corpus/io/voxforge.py b/audiomate/corpus/io/voxforge.py index d5b464d..58ebf28 100644 --- a/audiomate/corpus/io/voxforge.py +++ b/audiomate/corpus/io/voxforge.py @@ -2,6 +2,7 @@ import re import tarfile import shutil +import time import requests @@ -90,7 +91,14 @@ def download_files(self, file_urls, target_path): target_file_path = os.path.join(target_path, file_name) url_to_target[file_url] = target_file_path - dl_result = download.download_files(url_to_target, num_threads=self.num_workers) + while True: + try: + dl_result = download.download_files(url_to_target, num_threads=self.num_workers) + except ConnectionError as e: + logger.info('Remote end closed connection without response. Trying again in 5 seconds... %s', e) + time.sleep(5) + continue + break downloaded_files = [] for url, status, path_or_msg in dl_result: