From 9bfe0bc1017a0c9dad9464de60152b16d473ab85 Mon Sep 17 00:00:00 2001 From: Yousef Moazzam Date: Thu, 2 Apr 2026 11:35:36 +0100 Subject: [PATCH] Explicitly finalise source to cleanup hdf5 file backing dataset store An explicit call to the `finalize()` method of the dataset source has been added in two places: - in the method setting up the source and sink for any section that is not the last section (to delete the temporary hdf5 file used as input to the previous section, which has been superceded by the temporary hdf5 file created as output of the previous section) - after the last section has been completed (for the same reason as above, to delete the temporary hdf5 file used as input to the last section, but must be done independently of `_setup_source_sink()` as that method is not called after the last section has completed, so the code added to it won't help with cleanup of the last section's source) Note that, ideally, the use of `weakref.finalize()` in `DataSetStoreReader` would automatically cleanup the temporary hdf5 files when reader objects are garbage collected, and that explicit calls to `finalize()` would not be necessary. However, because the file-cleanup logic in the reader's `finalize()` method appears to not be getting triggered as expected, it would appear that garbage collection of reader objects is maybe not happening for some reason. This will require further investigation. --- httomo/runner/task_runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/httomo/runner/task_runner.py b/httomo/runner/task_runner.py index c198f6869..04d31f45b 100644 --- a/httomo/runner/task_runner.py +++ b/httomo/runner/task_runner.py @@ -72,6 +72,7 @@ def execute(self) -> None: for i, section in enumerate(self._sections): self._execute_section(section, i) gpumem_cleanup() + self.source.finalize() self._log_pipeline(f"Pipeline finished. Took {t.elapsed:.3f}s") if self.monitor is not None: @@ -228,9 +229,11 @@ def _setup_source_sink(self, section: Section, idx: int): # we have a store-based sink from the last section - use that to determine # the source for this one assert isinstance(self.sink, ReadableDataSetSink) - self.source = self.sink.make_reader( + new_source = self.sink.make_reader( slicing_dim_section, determine_section_padding(section) ) + self.source.finalize() + self.source = new_source store_backing = determine_store_backing( comm=self.comm,