Skip to content

Inference does not work with PatchDataLoader shuffle=True #8

@ad12

Description

@ad12

Describe the bug
Using medsegpy PatchDataLoader with shuffle=True with inference_on_dataset throws an error (see below)

This may also occur with other DataLoader subclasses, but this has not been verified yet.

Error Messages
With num_workers > 0:

Traceback (most recent call last):
  File "swathi_script.py", line 138, in <module>
    test_lms_vs_patch()
  File "swathi_script.py", line 132, in test_lms_vs_patch
    results = inference_on_dataset(model, test_dataloader, evaluator) 
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/evaluator.py", line 146, in inference_on_dataset
    results = {e.__class__.__name__: e.evaluate() for e in evaluator}
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/evaluator.py", line 146, in <dictcomp>
    results = {e.__class__.__name__: e.evaluate() for e in evaluator}
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/sem_seg_evaluation.py", line 219, in evaluate
    stats_string = get_stats_string(self._metrics_manager)
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/sem_seg_evaluation.py", line 31, in get_stats_string
    s += "%s\n" % manager.summary()
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/metrics.py", line 506, in summary
    df = self.data_frame()
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/metrics.py", line 488, in data_frame
    [np.asarray(x) for x in self._scan_data.values()], axis=0
  File "<__array_function__ internals>", line 6, in stack
  File "/home/arjundd/anaconda3/envs/medsegpy_env/lib/python3.6/site-packages/numpy/core/shape_base.py", line 423, in stack
    raise ValueError('need at least one array to stack')
ValueError: need at least one array to stack

With num_workers=0 (this is the one to debug on, b/c num_workers>0 masks the actual error):

Traceback (most recent call last):
  File "swathi_script.py", line 138, in <module>
    test_lms_vs_patch()
  File "swathi_script.py", line 132, in test_lms_vs_patch
    results = inference_on_dataset(model, test_dataloader, evaluator) 
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/evaluation/evaluator.py", line 108, in inference_on_dataset
    for idx, (input, output) in enumerate(iter_loader(model)):
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/data/data_loader.py", line 357, in inference
    **kwargs
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/modeling/model.py", line 120, in inference_generator
    generator_output = next(output_generator)
  File "/home/arjundd/anaconda3/envs/medsegpy_env/lib/python3.6/site-packages/keras/utils/data_utils.py", line 372, in __iter__
    for item in (self[i] for i in range(len(self))):
  File "/home/arjundd/anaconda3/envs/medsegpy_env/lib/python3.6/site-packages/keras/utils/data_utils.py", line 372, in <genexpr>
    for item in (self[i] for i in range(len(self))):
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/data/data_loader.py", line 489, in __getitem__
    inputs, outputs = self._load_batch(self._idxs[start:stop])
  File "/home/arjundd/fc_seg/msk_seg_networks/medsegpy/data/data_loader.py", line 284, in _load_batch
    dataset_dict = dataset_dicts[file_idx]
IndexError: list index out of range

To Reproduce

import os
import time
import logging
# import wandb as wb
# from wandb import magic
# from wandb.keras import WandbCallback
import tensorflow as tf
from tensorflow.core.protobuf import rewriter_config_pb2
from keras import backend as K
# from tensorflow_large_model_support import LMSKerasCallback
from medsegpy.config import UNetConfig, UNet3DConfig
from medsegpy.data import build_loader, DatasetCatalog, DefaultDataLoader, PatchDataLoader
from medsegpy.modeling.meta_arch import build_model
from medsegpy.losses import (
    DICE_LOSS,
    dice_loss,
    focal_loss,
    get_training_loss,
)
from medsegpy.evaluation.evaluator import DatasetEvaluator
from medsegpy.evaluation import build_evaluator, inference_on_dataset, SemSegEvaluator
from medsegpy.utils.logger import setup_logger

# setup_logger()

#wb.init(project="benchmark_unet3d", magic=True)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
sh = logging.StreamHandler()
sh.setLevel(logging.INFO)
logger.addHandler(sh)
os.environ["CUDA_VISIBLE_DEVICES"] = "3" # run on gpu1

logger = setup_logger()
logger.info("start test")

cfg2d = UNetConfig()
cfg2d.TRAIN_DATASET = "oai_2d_train"
cfg2d.VAL_DATASET = "oai_2d_val"
cfg2d.TEST_DATASET = "oai_2d_test"
cfg2d.CATEGORIES = (0, (1, 2), 3, (4, 5))
cfg2d.IMG_SIZE = (384, 384, 1)

cfg3d = UNet3DConfig()
cfg3d.TAG = "PatchDataLoader"
cfg3d.TRAIN_DATASET = "oai_3d_sf_whitened_train"
cfg3d.VAL_DATASET = "oai_3d_sf_whitened_val"
cfg3d.TEST_DATASET = "oai_3d_sf_whitened_test"
cfg3d.CATEGORIES = (0, (1, 2), 3, (4, 5))
cfg3d.IMG_SIZE = (384, 384, 4, 1)
cfg3d.NUM_WORKERS = 0
cfg3d.TEST_BATCH_SIZE = 8


def test_lms_vs_patch():
    logger.info("Start Eval Test...")
    loss_func = get_training_loss(DICE_LOSS)

    #cfg3d.IMG_SIZE = (384, 384, 32, 1)
    cfg3d.IMG_SIZE = (384, 384, 4, 1)

    """
    lms = LMSKerasCallback(n_tensors=-1, lb=1, debug=True, debug_level=1) 
    tf.logging.set_verbosity(tf.logging.INFO)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    config.graph_options.rewrite_options.dependency_optimization = \
      rewriter_config_pb2.RewriterConfig.OFF
    config.graph_options.rewrite_options.memory_optimization = \
    rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS
    session = tf.Session(config=config)
    K.set_session(session)
    """

    model = build_model(cfg3d)
    model.compile(
         optimizer='adam',
         loss=loss_func,
         metrics=[dice_loss]
    )

    train_dataloader = build_loader(
        cfg3d,
        cfg3d.TRAIN_DATASET,
        batch_size=1,
        is_test=False,
        shuffle=True,
        drop_last=True,
        use_singlefile=True
    )

    val_dataloader = build_loader(
        cfg3d,
        cfg3d.VAL_DATASET,
        batch_size=1,
        is_test=False,
        shuffle=True,
        drop_last=True,
        use_singlefile=True
    )

    test_dataloader = build_loader(
        cfg3d,
        cfg3d.TEST_DATASET,
        batch_size=8,
        is_test=True,
        shuffle=True,
        drop_last=False,
        use_singlefile=True
    )
    
    # start = time.perf_counter()
    # model.fit_generator(
    #     train_dataloader,
    #     epochs=1,
    #     validation_data=val_dataloader,
    #     workers=4,
    #     use_multiprocessing=True,
    #     verbose=1,
    #     shuffle=False,
    #     #callbacks=[WandbCallback()]
    #     #callbacks=[lms, WandbCallback()]
    # )
    # time_elapsed = time.perf_counter() - start
    # logger.info("LMS training time: {}".format(time_elapsed)) 
    #K.get_session().close()

    model.load_weights('/home/swathii/MedSegPy/benchmarking/unet3d-weights-basic-4.h5')
    evaluator = SemSegEvaluator(cfg3d.TEST_DATASET, cfg3d, save_raw_data=False)
    results = inference_on_dataset(model, test_dataloader, evaluator) 
    print(results)
    f = open("unet3d-results-basic-4.txt","w")
    f.write( str(results) )
    f.close()
     
test_lms_vs_patch()

Expected behavior
Given that inference should typically be done without shuffling, I am comfortable (for the time being) having a check in medsegpy.modeling.model.Model -> inference_generator that checks if the shuffle attribute (if available) is False.

However, this bug requires additional investigation and the shuffle=True should be supported in the long run.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions