You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[2022-07-05 00:40:55,618 - train - INFO] - One GPU or CPU training mode start...
[2022-07-05 00:40:56,108 - train - INFO] - Dataloader instances created. Train datasets: 1850 samples Validation datasets: 1850 samples.
[2022-07-05 00:40:57,098 - train - INFO] - Model created, trainable parameters: 68567386.
[2022-07-05 00:40:57,099 - train - INFO] - Optimizer and lr_scheduler created.
[2022-07-05 00:40:57,100 - train - INFO] - Max_epochs: 100 Log_per_step: 10 Validation_per_step: 50.
[2022-07-05 00:40:57,100 - train - INFO] - Training start...
[2022-07-05 00:40:57,173 - trainer - WARNING] - Training is using GPU 0!
Traceback (most recent call last):
File "train.py", line 162, in
entry_point(config)
File "train.py", line 126, in entry_point
main(config, local_master, logger if local_master else None)
File "train.py", line 74, in main
trainer.train()
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/trainer/trainer.py", line 135, in train
result_dict = self._train_epoch(epoch)
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/trainer/trainer.py", line 199, in _train_epoch
for step_idx, input_data_item in enumerate(self.data_loader):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 345, in next
data = self._next_data()
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 838, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 881, in _process_data
data.reraise()
File "/usr/local/lib/python3.7/dist-packages/torch/_utils.py", line 395, in reraise
raise self.exc_type(msg)
IndexError: Caught IndexError in DataLoader worker process 2.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/data_utils/pick_dataset.py", line 111, in getitem
boxes_and_transcripts_file = self.get_ann_file(Path(dataitem['file_name']).stem)
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/data_utils/pick_dataset.py", line 100, in get_ann_file
filename = list(self.boxes_and_transcripts_folder.glob(f'**/{basename}.*'))[0]
IndexError: list index out of range
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
File "/usr/lib/python3.7/multiprocessing/queues.py", line 113, in get
return _ForkingPickler.loads(res)
File "/usr/local/lib/python3.7/dist-packages/torch/multiprocessing/reductions.py", line 294, in rebuild_storage_fd
fd = df.detach()
File "/usr/lib/python3.7/multiprocessing/resource_sharer.py", line 58, in detach
return reduction.recv_handle(conn)
File "/usr/lib/python3.7/multiprocessing/reduction.py", line 185, in recv_handle
return recvfds(s, 1)[0]
File "/usr/lib/python3.7/multiprocessing/reduction.py", line 153, in recvfds
msg, ancdata, flags, addr = sock.recvmsg(1, socket.CMSG_SPACE(bytes_size))
ConnectionResetError: [Errno 104] Connection reset by peer
The text was updated successfully, but these errors were encountered:
My issue was due to the fact there are parenthesis in some file names. My solution is use the glob function escape on basename in get_ann_file and get_image_file : glob.escape(basename). This add robustness to the code without a lot of modifications.
[2022-07-05 00:40:55,618 - train - INFO] - One GPU or CPU training mode start...
[2022-07-05 00:40:56,108 - train - INFO] - Dataloader instances created. Train datasets: 1850 samples Validation datasets: 1850 samples.
[2022-07-05 00:40:57,098 - train - INFO] - Model created, trainable parameters: 68567386.
[2022-07-05 00:40:57,099 - train - INFO] - Optimizer and lr_scheduler created.
[2022-07-05 00:40:57,100 - train - INFO] - Max_epochs: 100 Log_per_step: 10 Validation_per_step: 50.
[2022-07-05 00:40:57,100 - train - INFO] - Training start...
[2022-07-05 00:40:57,173 - trainer - WARNING] - Training is using GPU 0!
Traceback (most recent call last):
File "train.py", line 162, in
entry_point(config)
File "train.py", line 126, in entry_point
main(config, local_master, logger if local_master else None)
File "train.py", line 74, in main
trainer.train()
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/trainer/trainer.py", line 135, in train
result_dict = self._train_epoch(epoch)
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/trainer/trainer.py", line 199, in _train_epoch
for step_idx, input_data_item in enumerate(self.data_loader):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 345, in next
data = self._next_data()
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 838, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 881, in _process_data
data.reraise()
File "/usr/local/lib/python3.7/dist-packages/torch/_utils.py", line 395, in reraise
raise self.exc_type(msg)
IndexError: Caught IndexError in DataLoader worker process 2.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/data_utils/pick_dataset.py", line 111, in getitem
boxes_and_transcripts_file = self.get_ann_file(Path(dataitem['file_name']).stem)
File "/content/gdrive/MyDrive/hdr/PICK-pytorch/data_utils/pick_dataset.py", line 100, in get_ann_file
filename = list(self.boxes_and_transcripts_folder.glob(f'**/{basename}.*'))[0]
IndexError: list index out of range
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
File "/usr/lib/python3.7/multiprocessing/queues.py", line 113, in get
return _ForkingPickler.loads(res)
File "/usr/local/lib/python3.7/dist-packages/torch/multiprocessing/reductions.py", line 294, in rebuild_storage_fd
fd = df.detach()
File "/usr/lib/python3.7/multiprocessing/resource_sharer.py", line 58, in detach
return reduction.recv_handle(conn)
File "/usr/lib/python3.7/multiprocessing/reduction.py", line 185, in recv_handle
return recvfds(s, 1)[0]
File "/usr/lib/python3.7/multiprocessing/reduction.py", line 153, in recvfds
msg, ancdata, flags, addr = sock.recvmsg(1, socket.CMSG_SPACE(bytes_size))
ConnectionResetError: [Errno 104] Connection reset by peer
The text was updated successfully, but these errors were encountered: