You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
is references/segmentation
I used the coco dataset exported from universe.roboflow.com formatted as
train There are pictures and Data format
I modified get_coco in coco_utils.py
"""
PATHS = {
"train": ("train", os.path.join("train", "_annotations.coco.json")),
"val": ("valid", os.path.join("valid", "_annotations.coco.json")),
# "train": ("val2017", os.path.join("annotations", "instances_val2017.json"))
}
"""
I can already run the model, but why did I report the error [rank0]: IndexError: list index out of range
"""
D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py:558: UserWarning: This DataLoader will create 16 worker processes in total. Our sug
gested max number of worker in current system is 8 (cpuset is not taken into account), which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
warnings.warn(_create_warning_msg(
Epoch: [0] [ 0/114] eta: 2:01:56 lr: 0.01999473676515026 loss: 4.7775 (4.7775) time: 64.1789 data: 42.8454 max mem: 4487
Epoch: [0] [ 10/114] eta: 0:23:10 lr: 0.01994209594160194 loss: 1.0890 (1.6459) time: 13.3736 data: 3.8976 max mem: 4599
[rank0]: Traceback (most recent call last):
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 330, in
[rank0]: main(args)
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 247, in main
[rank0]: train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq, scaler)
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 108, in train_one_epoch
[rank0]: for image, target in metric_logger.log_every(data_loader, print_freq, header):
[rank0]: File "D:\PycharmProject\pythonProject4\utils.py", line 164, in log_every
[rank0]: for obj in iterable:
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 631, in next
[rank0]: data = self._next_data()
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 1326, in _next_data
[rank0]: return self._process_data(data)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 1372, in _process_data
[rank0]: data.reraise()
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch_utils.py", line 705, in reraise
[rank0]: raise exception
[rank0]: IndexError: Caught IndexError in DataLoader worker process 14.
[rank0]: Original Traceback (most recent call last):
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data_utils\worker.py", line 308, in _worker_loop
[rank0]: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data_utils\fetch.py", line 49, in fetch
[rank0]: data = self.dataset.getitems(possibly_batched_index)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataset.py", line 419, in getitems
[rank0]: return [self.dataset[self.indices[idx]] for idx in indices]
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataset.py", line 419, in
[rank0]: return [self.dataset[self.indices[idx]] for idx in indices]
[rank0]: ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torchvision\datasets\coco.py", line 57, in getitem
[rank0]: image, target = self.transforms(image, target)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\transforms.py", line 25, in call
[rank0]: image, target = t(image, target)
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\coco_utils.py", line 50, in call
[rank0]: masks = convert_coco_poly_to_mask(segmentations, h, w)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\coco_utils.py", line 30, in convert_coco_poly_to_mask
[rank0]: rles = coco_mask.frPyObjects(polygons, height, width)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "pycocotools\_mask.pyx", line 294, in pycocotools._mask.frPyObjects
[rank0]: IndexError: list index out of range
E0115 14:46:53.931000 27912 torch\distributed\elastic\multiprocessing\api.py:826] failed (exitcode: 1) local_rank: 0 (pid: 28072) of binary: D:\ProgramData\anaconda3\envs\sd2\python.exe
Traceback (most recent call last):
File "\?\D:\ProgramData\anaconda3\envs\sd2\Scripts\torchrun-script.py", line 33, in
sys.exit(load_entry_point('torch==2.3.0', 'console_scripts', 'torchrun')())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\elastic\multiprocessing\errors_init_.py", line 347, in wrapper
return f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\run.py", line 879, in main
run(args)
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\run.py", line 870, in run
elastic_launch(
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\launcher\api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\launcher\api.py", line 263, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
is references/segmentation
I used the coco dataset exported from universe.roboflow.com formatted as
train There are pictures and Data format
I modified get_coco in coco_utils.py
"""
PATHS = {
"train": ("train", os.path.join("train", "_annotations.coco.json")),
"val": ("valid", os.path.join("valid", "_annotations.coco.json")),
# "train": ("val2017", os.path.join("annotations", "instances_val2017.json"))
}
"""
I can already run the model, but why did I report the error [rank0]: IndexError: list index out of range
"""
D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py:558: UserWarning: This DataLoader will create 16 worker processes in total. Our sug
gested max number of worker in current system is 8 (
cpuset
is not taken into account), which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.warnings.warn(_create_warning_msg(
Epoch: [0] [ 0/114] eta: 2:01:56 lr: 0.01999473676515026 loss: 4.7775 (4.7775) time: 64.1789 data: 42.8454 max mem: 4487
Epoch: [0] [ 10/114] eta: 0:23:10 lr: 0.01994209594160194 loss: 1.0890 (1.6459) time: 13.3736 data: 3.8976 max mem: 4599
[rank0]: Traceback (most recent call last):
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 330, in
[rank0]: main(args)
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 247, in main
[rank0]: train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq, scaler)
[rank0]: File "D:\PycharmProject\pythonProject4\train.py", line 108, in train_one_epoch
[rank0]: for image, target in metric_logger.log_every(data_loader, print_freq, header):
[rank0]: File "D:\PycharmProject\pythonProject4\utils.py", line 164, in log_every
[rank0]: for obj in iterable:
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 631, in next
[rank0]: data = self._next_data()
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 1326, in _next_data
[rank0]: return self._process_data(data)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataloader.py", line 1372, in _process_data
[rank0]: data.reraise()
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch_utils.py", line 705, in reraise
[rank0]: raise exception
[rank0]: IndexError: Caught IndexError in DataLoader worker process 14.
[rank0]: Original Traceback (most recent call last):
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data_utils\worker.py", line 308, in _worker_loop
[rank0]: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
[rank0]: ^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data_utils\fetch.py", line 49, in fetch
[rank0]: data = self.dataset.getitems(possibly_batched_index)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataset.py", line 419, in getitems
[rank0]: return [self.dataset[self.indices[idx]] for idx in indices]
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\utils\data\dataset.py", line 419, in
[rank0]: return [self.dataset[self.indices[idx]] for idx in indices]
[rank0]: ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torchvision\datasets\coco.py", line 57, in getitem
[rank0]: image, target = self.transforms(image, target)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\transforms.py", line 25, in call
[rank0]: image, target = t(image, target)
[rank0]: ^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\coco_utils.py", line 50, in call
[rank0]: masks = convert_coco_poly_to_mask(segmentations, h, w)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "D:\PycharmProject\pythonProject4\coco_utils.py", line 30, in convert_coco_poly_to_mask
[rank0]: rles = coco_mask.frPyObjects(polygons, height, width)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "pycocotools\_mask.pyx", line 294, in pycocotools._mask.frPyObjects
[rank0]: IndexError: list index out of range
E0115 14:46:53.931000 27912 torch\distributed\elastic\multiprocessing\api.py:826] failed (exitcode: 1) local_rank: 0 (pid: 28072) of binary: D:\ProgramData\anaconda3\envs\sd2\python.exe
Traceback (most recent call last):
File "\?\D:\ProgramData\anaconda3\envs\sd2\Scripts\torchrun-script.py", line 33, in
sys.exit(load_entry_point('torch==2.3.0', 'console_scripts', 'torchrun')())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\elastic\multiprocessing\errors_init_.py", line 347, in wrapper
return f(*args, **kwargs)
^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\run.py", line 879, in main
run(args)
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\run.py", line 870, in run
elastic_launch(
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\launcher\api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\sd2\Lib\site-packages\torch\distributed\launcher\api.py", line 263, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
./train.py FAILED
Failures:
<NO_OTHER_FAILURES>
Root Cause (first observed failure):
[0]:
time : 2025-01-15_14:46:53
host : YANGZHOUQIGN
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 28072)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
"""
The text was updated successfully, but these errors were encountered: