Skip to content
Merged
49 changes: 34 additions & 15 deletions packs/proc/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import BinaryIO
from typing import Generic
from typing import Optional
from datetime import datetime

# imports start from MULE/
from packs.core.core_utils import flatten
Expand Down Expand Up @@ -341,36 +342,54 @@ def save_data(event_information : np.ndarray,



def check_save_path(save_path : str,
overwrite : bool):
def check_save_path(save_path: str,
overwrite: bool,
max_iterations : Optional[int] = 100) -> str:
'''
Checks that the save_path exists. Checks if it is valid/doesn't already exist
and if it does, other `overwrite` it or create an additional file with a number added.
Checks that the save_path directory exists, then either returns the path unmodified
(if overwrite is True) or generates a unique save path by inserting a datetime stamp
(YYYYMMDD_HHMMSS) before the file extension. If a file with that datetime name already
exists, a counter suffix is appended.

Parameters
----------

save_path (str) : Path to saved file
overwrite (bool) : Boolean for overwriting pre-existing files
overwrite (bool) : If True, returns save_path unmodified after confirmation.
If False, appends '_YYYYMMDD_HHMMSS' to the stem, plus '_N'
if needed.
max_iterations (int): Maximum number of iterations to find a unique filename before raising an error

Returns
-------
save_path (str) : Valid path to saved file, either unmodified or altered to add '_N'
where N is number of loops it had to do before finding a valid N
save_path (str) : Valid path to saved file, either unmodified or with datetime
stamp and optional counter appended

Raises
------
FileNotFoundError : If the directory of save_path does not exist
'''
if not os.path.exists(os.path.dirname(save_path)):
raise FileNotFoundError(2, 'Save path not found', os.path.dirname(save_path))

name, ext = os.path.splitext(save_path)
counter = 1
if overwrite:
return save_path


if overwrite == False:
while os.path.exists(save_path):
save_path = name + str(counter) + ext
if not overwrite:
name, ext = os.path.splitext(save_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
dated_path = f"{name}_{timestamp}{ext}"

if not os.path.exists(dated_path):
return dated_path

counter = 1
while os.path.exists(f"{name}_{timestamp}_{counter}{ext}"):
if counter >= max_iterations:
raise RuntimeError(f"Too many save files with the same timestamp: {dated_path}")
counter += 1
if counter > 100:
raise RuntimeError("Writing to file went over 100 loops to find a unique name. Sort out your files!")

return f"{name}_{timestamp}_{counter}{ext}"

return save_path

Expand Down
Empty file.
30 changes: 21 additions & 9 deletions packs/tests/proc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_changing_config_order(config, inpt, output, comparison, MULE_dir, data_
Test that ensure that changing the order of the config parameters
inputted does not affect the code.
"""
# ensure path is correct
# Build absolute paths for all files
file_path = data_dir + inpt
save_path = data_dir + output
comparison_path = data_dir + comparison
Expand All @@ -53,28 +53,40 @@ def test_changing_config_order(config, inpt, output, comparison, MULE_dir, data_
cnfg = configparser.ConfigParser()
cnfg.read(config_path)

# Rebuild the section in new order
new_order = ["save_path", "file_path", "wavedump_edition", "process"]
all_keys = list(cnfg["required"].keys())
extra_keys = [k for k in all_keys if k not in new_order]
Comment thread
jwaiton marked this conversation as resolved.

# create a new config with the same parameters but in a different order
reordered = configparser.ConfigParser()
reordered.add_section("required")

for key in new_order:
for key in new_order + extra_keys:
reordered.set("required", key, cnfg.get("required", key))

reordered.set('required', 'file_path', "'" + file_path + "'") # need to add comments around for config reasons

# overide the file_path and save_path to test specific paths
reordered.set('required', 'file_path', f"'{file_path}'")
reordered.set('required', 'save_path', f"'{save_path}'")

# Write back
# Copy over any other sections without changing their order
for section in cnfg.sections():
if section != "required":
reordered.add_section(section)
for key, value in cnfg.items(section):
reordered.set(section, key, value)

with open(config_path, "w") as f:
reordered.write(f)

# run processing pack decode
# Run MULE proc with reordered config
run_pack = [sys.executable, MULE_dir + "/bin/mule", "proc", config_path]
subprocess.run(run_pack)
# check that the resulting dataframe is as expected

# check that the output is as expected
assert load_evt_info(save_path).equals(load_evt_info(comparison_path))
assert load_rwf_info(save_path, samples).equals(load_rwf_info(comparison_path, samples))

# restore config to original
finally:
# rewrite config file to original state
with open(config_path, "w") as f:
f.write(original_content)
Comment thread
jwaiton marked this conversation as resolved.
36 changes: 21 additions & 15 deletions packs/tests/processing_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import datetime
import os
import sys
import re

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -35,6 +37,9 @@
from hypothesis import given
from hypothesis.strategies import integers


from unittest.mock import patch, MagicMock

@given(integers(min_value = 1, max_value = 1000000))
def test_rwf_type_has_correct_shape(samples):
x = rwf_type(samples)
Expand Down Expand Up @@ -153,26 +158,27 @@ def test_save_path_exists():


def test_ensure_new_path_created(data_dir):
data_path = data_dir + 'three_channels_WD2.h5'
found_path = check_save_path(data_path, overwrite=False)

data_path = data_dir + 'three_channels_WD2.h5'
new_data_path = data_dir + 'three_channels_WD21.h5'

found_path = check_save_path(data_path, overwrite = False)
assert found_path != data_path
assert found_path.endswith('.h5')
assert re.search(r'_\d{8}_\d{6}', found_path), "Expected datetime stamp in filename"
assert not os.path.exists(found_path), "Path should not already exist"
Comment on lines +164 to +167
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is no longer direct testing, which is fine but it should be noted.


assert found_path == new_data_path

Comment thread
Tedsmith100 marked this conversation as resolved.
def test_runtime_error_when_too_many_save_files(tmp_path):
timestamp = "20240101_120000"
mock_dt = MagicMock()
mock_dt.now.return_value.strftime.return_value = timestamp

def test_runtime_error_when_too_many_save_files(data_dir):

relevant_dir = data_dir + 'repetitive_data/'
# generate 101 empty files
with open(relevant_dir + f'test_.txt', 'w'):
pass
(tmp_path / f'test_{timestamp}.txt').touch()
for i in range(1, 101):
with open(relevant_dir + f'test_{i}.txt', 'w'):
pass
with raises(RuntimeError):
check_save_path(relevant_dir + 'test_.txt', overwrite=False)
(tmp_path / f'test_{timestamp}_{i}.txt').touch()

with patch('packs.proc.processing_utils.datetime', mock_dt):
with raises(RuntimeError):
check_save_path(str(tmp_path / 'test.txt'), overwrite=False)

@mark.parametrize("config, inpt, output, comparison", [("process_WD2_1channel.conf", "one_channel_WD2.bin", "one_channel_tmp.h5", "one_channel_WD2.h5"),
("process_WD2_3channel.conf", "three_channels_WD2.bin", "three_channels_tmp.h5", "three_channels_WD2.h5")])
Expand Down
Loading