From 90fad7d8547bbbe054fd1cf97dd74cc8d27c482f Mon Sep 17 00:00:00 2001 From: Ladme Date: Tue, 18 Nov 2025 19:09:37 +0100 Subject: [PATCH 01/27] Made FieldCoupling more general --- src/qq_lib/core/field_coupling.py | 70 ++++--- src/qq_lib/properties/resources.py | 18 +- tests/test_core_field_coupling.py | 317 ++++++++++++++++++++++++----- 3 files changed, 312 insertions(+), 93 deletions(-) diff --git a/src/qq_lib/core/field_coupling.py b/src/qq_lib/core/field_coupling.py index 6bd2fe7..3b054e0 100644 --- a/src/qq_lib/core/field_coupling.py +++ b/src/qq_lib/core/field_coupling.py @@ -7,67 +7,81 @@ class FieldCoupling: """ - Represents a coupling relationship between a dominant and recessive field. + Represents a coupling among multiple fields, ordered by dominance. - If the dominant field is set in the constructor, the recessive one is automatically set to None. + The earlier a field appears in `fields`, the more dominant it is. + If multiple fields have values in an instance, only the most dominant + one is preserved; all others are set to None. """ - def __init__(self, dominant: str, recessive: str): - self.dominant = dominant - self.recessive = recessive + def __init__(self, *fields: str): + if len(fields) < 2: + raise ValueError("FieldCoupling requires at least two fields") + self.fields = list(fields) def contains(self, field_name: str) -> bool: - """Check if a field name is part of this coupling.""" - return field_name in (self.dominant, self.recessive) + """Return True if the field participates in this coupling.""" + return field_name in self.fields - def getPair(self) -> tuple[str, str]: - """Return both field names as a tuple.""" - return (self.dominant, self.recessive) + def getFields(self) -> tuple[str, ...]: + """Return all coupled fields as a tuple.""" + return tuple(self.fields) def hasValue(self, instance: Any) -> bool: - """Check if either field in this coupling has a non-None value.""" - return ( - getattr(instance, self.dominant) is not None - or getattr(instance, self.recessive) is not None - ) + """Return True if any of the coupled fields has a non-None value.""" + return any(getattr(instance, field) is not None for field in self.fields) + + def getMostDominantSetField(self, instance: Any) -> str | None: + """ + Return the name of the most dominant field that has a non-None value, + or None if none of them do. + """ + for field in self.fields: + if getattr(instance, field) is not None: + return field + + return None + + def enforce(self, instance: Any): + """ + Enforce dominance rules: only the most dominant field that is set + keeps its value; others are reset to None. + """ + dominant_set_field = self.getMostDominantSetField(instance) + if dominant_set_field is None: + return + + for field in self.fields: + if field != dominant_set_field: + setattr(instance, field, None) def coupled_fields(*couplings: FieldCoupling): """ - Class decorator that enforces field coupling rules in __post_init__. + Class decorator that enforces multi-field coupling rules in __post_init__. """ def decorator(cls): - # save the couplings cls._field_couplings = couplings - - # save the original __post_init__ if it exists original_post_init = getattr(cls, "__post_init__", None) def __post_init__(self): - # apply coupling rules for coupling in self._field_couplings: - dominant_value = getattr(self, coupling.dominant) - - # if dominant is set, recessive must be None - if dominant_value is not None: - setattr(self, coupling.recessive, None) + coupling.enforce(self) - # call original __post_init__ if it existed if original_post_init: original_post_init(self) @staticmethod def getCouplingForField(field_name: str) -> FieldCoupling | None: - """Return the FieldCoupling that contains the given field name, or None.""" for coupling in cls._field_couplings: if coupling.contains(field_name): return coupling + return None cls.__post_init__ = __post_init__ cls.getCouplingForField = getCouplingForField - return cls return decorator diff --git a/src/qq_lib/properties/resources.py b/src/qq_lib/properties/resources.py index 2d23585..37adecd 100644 --- a/src/qq_lib/properties/resources.py +++ b/src/qq_lib/properties/resources.py @@ -18,9 +18,9 @@ @dataclass(init=False) @coupled_fields( # if mem is set, ignore mem_per_cpu - FieldCoupling(dominant="mem", recessive="mem_per_cpu"), + FieldCoupling("mem", "mem_per_cpu"), # if work_size is set, ignore work_size_per_cpu - FieldCoupling(dominant="work_size", recessive="work_size_per_cpu"), + FieldCoupling("work_size", "work_size_per_cpu"), ) class Resources(HasCouplingMethods): """ @@ -172,16 +172,14 @@ def mergeResources(*resources: "Resources") -> "Resources": (r for r in resources if coupling.hasValue(r)), None ) + # set all fields of the coupling if source_resource: - merged_data[coupling.dominant] = getattr( - source_resource, coupling.dominant - ) - merged_data[coupling.recessive] = getattr( - source_resource, coupling.recessive - ) + for field in coupling.fields: + merged_data[field] = getattr(source_resource, field) + # if no resource has any attribute set for this coupling else: - merged_data[coupling.dominant] = None - merged_data[coupling.recessive] = None + for field in coupling.fields: + merged_data[field] = None continue # default: pick the first non-None value for this field diff --git a/tests/test_core_field_coupling.py b/tests/test_core_field_coupling.py index 6cff4bc..797f95d 100644 --- a/tests/test_core_field_coupling.py +++ b/tests/test_core_field_coupling.py @@ -8,79 +8,110 @@ def test_field_coupling_init(): - coupling = FieldCoupling(dominant="foo", recessive="bar") - assert coupling.dominant == "foo" - assert coupling.recessive == "bar" - - -def test_field_coupling_contains_with_dominant_field(): - coupling = FieldCoupling(dominant="foo", recessive="bar") + coupling = FieldCoupling("foo", "bar") + assert coupling.fields == ["foo", "bar"] + + +def test_field_coupling_init_many_fields(): + fields = [ + "foo", + "bar", + "baz", + "qux", + "quux", + "corge", + "grault", + "garply", + "waldo", + "fred", + "plugh", + "xyzzy", + "thud", + ] + coupling = FieldCoupling(*fields) + assert coupling.fields == fields + + +def test_field_coupling_contains(): + coupling = FieldCoupling("foo", "bar") assert coupling.contains("foo") is True - - -def test_field_coupling_contains_with_recessive_field(): - coupling = FieldCoupling(dominant="foo", recessive="bar") assert coupling.contains("bar") is True def test_field_coupling_contains_with_unrelated_field(): - coupling = FieldCoupling(dominant="foo", recessive="bar") + coupling = FieldCoupling("foo", "bar") assert coupling.contains("baz") is False assert coupling.contains("") is False -def test_field_coupling_get_pair(): - coupling = FieldCoupling(dominant="foo", recessive="bar") - assert coupling.getPair() == ("foo", "bar") +def test_field_coupling_get_fields(): + coupling = FieldCoupling("foo", "bar", "baz", "qux") + assert coupling.getFields() == ("foo", "bar", "baz", "qux") -def test_field_coupling_has_value_with_dominant_set(): +def test_field_coupling_has_value_with_first_set(): @dataclass class MockClass: foo: str | None = None bar: str | None = None + baz: str | None = None - coupling = FieldCoupling(dominant="foo", recessive="bar") + coupling = FieldCoupling("foo", "bar", "baz") instance = MockClass(foo="value") assert coupling.hasValue(instance) is True -def test_field_coupling_has_value_with_recessive_set(): +def test_field_coupling_has_value_with_last_set(): @dataclass class MockClass: foo: str | None = None bar: str | None = None + baz: str | None = None - coupling = FieldCoupling(dominant="foo", recessive="bar") - instance = MockClass(bar="value") + coupling = FieldCoupling("foo", "bar", "baz") + instance = MockClass(baz="value") assert coupling.hasValue(instance) is True -def test_field_coupling_has_value_with_both_set(): +def test_field_coupling_has_value_with_two_set(): @dataclass class MockClass: foo: str | None = None bar: str | None = None + baz: str | None = None - coupling = FieldCoupling(dominant="foo", recessive="bar") + coupling = FieldCoupling("foo", "bar", "baz") instance = MockClass(foo="value1", bar="value2") assert coupling.hasValue(instance) is True +def test_field_coupling_has_value_with_all_set(): + @dataclass + class MockClass: + foo: str | None = None + bar: str | None = None + baz: str | None = None + + coupling = FieldCoupling("foo", "bar", "baz") + instance = MockClass(foo="value1", bar="value2", baz="value3") + assert coupling.hasValue(instance) is True + + def test_field_coupling_has_value_with_neither_set(): @dataclass class MockClass: foo: str | None = None bar: str | None = None + baz: str | None = None - coupling = FieldCoupling(dominant="foo", recessive="bar") + coupling = FieldCoupling("foo", "bar", "baz") instance = MockClass() assert coupling.hasValue(instance) is False def test_decorator_single_coupling_dominant_overrides_recessive(): @dataclass - @coupled_fields(FieldCoupling(dominant="alpha", recessive="beta")) + @coupled_fields(FieldCoupling("alpha", "beta")) class TestClass(HasCouplingMethods): alpha: str | None = None beta: str | None = None @@ -90,9 +121,37 @@ class TestClass(HasCouplingMethods): assert obj.beta is None +def test_decorator_single_coupling_dominant_overrides_recessive_three_fields(): + @dataclass + @coupled_fields(FieldCoupling("alpha", "beta", "gamma")) + class TestClass(HasCouplingMethods): + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + obj = TestClass(alpha="A", gamma="G") + assert obj.alpha == "A" + assert obj.beta is None + assert obj.gamma is None + + +def test_decorator_single_coupling_second_overrides_third(): + @dataclass + @coupled_fields(FieldCoupling("alpha", "beta", "gamma")) + class TestClass(HasCouplingMethods): + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + obj = TestClass(beta="B", gamma="G") + assert obj.alpha is None + assert obj.beta == "B" + assert obj.gamma is None + + def test_decorator_single_coupling_recessive_preserved_when_dominant_none(): @dataclass - @coupled_fields(FieldCoupling(dominant="alpha", recessive="beta")) + @coupled_fields(FieldCoupling("alpha", "beta")) class TestClass(HasCouplingMethods): alpha: str | None = None beta: str | None = None @@ -102,9 +161,23 @@ class TestClass(HasCouplingMethods): assert obj.beta == "B" +def test_decorator_single_coupling_recessive_preserved_when_dominant_none_three_fields(): + @dataclass + @coupled_fields(FieldCoupling("alpha", "beta", "gamma")) + class TestClass(HasCouplingMethods): + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + obj = TestClass(gamma="G") + assert obj.alpha is None + assert obj.beta is None + assert obj.gamma == "G" + + def test_decorator_single_coupling_both_none(): @dataclass - @coupled_fields(FieldCoupling(dominant="alpha", recessive="beta")) + @coupled_fields(FieldCoupling("alpha", "beta")) class TestClass(HasCouplingMethods): alpha: str | None = None beta: str | None = None @@ -114,28 +187,46 @@ class TestClass(HasCouplingMethods): assert obj.beta is None +def test_decorator_single_coupling_all_none(): + @dataclass + @coupled_fields(FieldCoupling("alpha", "beta", "gamma", "delta")) + class TestClass(HasCouplingMethods): + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + delta: str | None = None + + obj = TestClass() + assert obj.alpha is None + assert obj.beta is None + assert obj.gamma is None + assert obj.delta is None + + def test_decorator_multiple_couplings_independent(): @dataclass @coupled_fields( - FieldCoupling(dominant="foo", recessive="bar"), - FieldCoupling(dominant="baz", recessive="qux"), + FieldCoupling("foo", "bar"), + FieldCoupling("baz", "qux", "corge"), ) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None baz: str | None = None qux: str | None = None + corge: str | None = None - obj = TestClass(foo="F", bar="B", qux="Q") + obj = TestClass(foo="F", bar="B", qux="Q", corge="C") assert obj.foo == "F" assert obj.bar is None # overridden by foo assert obj.baz is None assert obj.qux == "Q" # preserved because baz is None + assert obj.corge is None # override by qux def test_decorator_uncoupled_fields_unaffected(): @dataclass - @coupled_fields(FieldCoupling(dominant="foo", recessive="bar")) + @coupled_fields(FieldCoupling("foo", "bar")) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None @@ -147,35 +238,25 @@ class TestClass(HasCouplingMethods): assert obj.uncoupled == "U" -def test_decorator_get_coupling_for_field_finds_dominant(): +def test_decorator_get_coupling_for_field(): @dataclass - @coupled_fields(FieldCoupling(dominant="foo", recessive="bar")) + @coupled_fields(FieldCoupling("foo", "bar")) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None coupling = TestClass.getCouplingForField("foo") assert coupling is not None - assert coupling.dominant == "foo" - assert coupling.recessive == "bar" - - -def test_decorator_get_coupling_for_field_finds_recessive(): - @dataclass - @coupled_fields(FieldCoupling(dominant="foo", recessive="bar")) - class TestClass(HasCouplingMethods): - foo: str | None = None - bar: str | None = None + assert coupling.fields == ["foo", "bar"] coupling = TestClass.getCouplingForField("bar") assert coupling is not None - assert coupling.dominant == "foo" - assert coupling.recessive == "bar" + assert coupling.fields == ["foo", "bar"] def test_decorator_get_coupling_for_field_returns_none_for_uncoupled(): @dataclass - @coupled_fields(FieldCoupling(dominant="foo", recessive="bar")) + @coupled_fields(FieldCoupling("foo", "bar")) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None @@ -188,29 +269,28 @@ class TestClass(HasCouplingMethods): def test_decorator_get_coupling_for_field_with_multiple_couplings(): @dataclass @coupled_fields( - FieldCoupling(dominant="foo", recessive="bar"), - FieldCoupling(dominant="baz", recessive="qux"), + FieldCoupling("foo", "bar"), + FieldCoupling("baz", "qux", "corge"), ) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None baz: str | None = None qux: str | None = None + corge: str | None = None coupling1 = TestClass.getCouplingForField("foo") assert coupling1 is not None - assert coupling1.dominant == "foo" - assert coupling1.recessive == "bar" + assert coupling1.fields == ["foo", "bar"] - coupling2 = TestClass.getCouplingForField("qux") + coupling2 = TestClass.getCouplingForField("corge") assert coupling2 is not None - assert coupling2.dominant == "baz" - assert coupling2.recessive == "qux" + assert coupling2.fields == ["baz", "qux", "corge"] def test_decorator_custom_post_init_preserved(): @dataclass - @coupled_fields(FieldCoupling(dominant="foo", recessive="bar")) + @coupled_fields(FieldCoupling("foo", "bar")) class TestClass(HasCouplingMethods): foo: str | None = None bar: str | None = None @@ -226,8 +306,8 @@ def __post_init__(self): def test_decorator_field_couplings_metadata_stored(): - coupling1 = FieldCoupling(dominant="a", recessive="b") - coupling2 = FieldCoupling(dominant="c", recessive="d") + coupling1 = FieldCoupling("a", "b") + coupling2 = FieldCoupling("c", "d", "e") @dataclass @coupled_fields(coupling1, coupling2) @@ -236,6 +316,7 @@ class TestClass(HasCouplingMethods): b: str | None = None c: str | None = None d: str | None = None + e: str | None = None assert hasattr(TestClass, "_field_couplings") assert len(TestClass._field_couplings) == 2 @@ -254,3 +335,129 @@ class TestClass(HasCouplingMethods): assert obj.foo == "F" assert obj.bar == "B" assert TestClass.getCouplingForField("foo") is None + + +def test_field_coupling_get_most_dominant_set_field_first(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(alpha="A", beta="B", gamma="C") + assert coupling.getMostDominantSetField(instance) == "alpha" + + +def test_field_coupling_get_most_dominant_set_field_middle(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(beta="B", gamma="C") + assert coupling.getMostDominantSetField(instance) == "beta" + + +def test_field_coupling_get_most_dominant_set_field_last(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(gamma="C") + assert coupling.getMostDominantSetField(instance) == "gamma" + + +def test_field_coupling_get_most_dominant_set_field_none_set(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass() + assert coupling.getMostDominantSetField(instance) is None + + +def test_field_coupling_enforce_first_dominant_kept(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(alpha="A", beta="B", gamma="C") + coupling.enforce(instance) + + assert instance.alpha == "A" + assert instance.beta is None + assert instance.gamma is None + + +def test_field_coupling_enforce_middle_dominant_kept(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(beta="B", gamma="C") + coupling.enforce(instance) + + assert instance.alpha is None + assert instance.beta == "B" + assert instance.gamma is None + + +def test_field_coupling_enforce_last_dominant_kept(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + instance = MockClass(gamma="C") + coupling.enforce(instance) + + assert instance.alpha is None + assert instance.beta is None + assert instance.gamma == "C" + + +def test_field_coupling_enforce_does_not_change_uncoupled_fields(): + @dataclass + class MockClass: + alpha: str | None = None + beta: str | None = None + gamma: str | None = None + extra1: str | None = None + extra2: str | None = None + + coupling = FieldCoupling("alpha", "beta", "gamma") + + instance = MockClass( + alpha=None, + beta="B", + gamma="C", + extra1="keep1", + extra2="keep2", + ) + + coupling.enforce(instance) + + assert instance.alpha is None + assert instance.beta == "B" + assert instance.gamma is None + + # uncoupled fields must remain untouched + assert instance.extra1 == "keep1" + assert instance.extra2 == "keep2" From bc87410068baeae64770aa7f2ff758bdd27478f3 Mon Sep 17 00:00:00 2001 From: Ladme Date: Tue, 18 Nov 2025 20:28:05 +0100 Subject: [PATCH 02/27] Per-node properties --- CHANGELOG.md | 7 ++ pyproject.toml | 2 +- src/qq_lib/batch/pbs/pbs.py | 40 ++++++++-- src/qq_lib/batch/slurm/common.py | 6 +- src/qq_lib/batch/slurm/slurm.py | 9 ++- src/qq_lib/batch/slurmit4i/slurm.py | 2 +- src/qq_lib/batch/slurmlumi/slurm.py | 2 +- src/qq_lib/properties/info.py | 4 +- src/qq_lib/properties/resources.py | 40 ++++++++-- src/qq_lib/qq.py | 2 +- src/qq_lib/submit/cli.py | 38 ++++++++- src/qq_lib/submit/submitter.py | 19 ++++- tests/test_batch_pbs_pbs.py | 90 +++++++++++++++++++++ tests/test_batch_slurm_common.py | 10 +++ tests/test_batch_slurm_slurm.py | 64 ++++++++++++++- tests/test_properties_info.py | 2 +- tests/test_properties_resources.py | 117 ++++++++++++++++++++++++++++ tests/test_submit_submitter.py | 41 ++++++++++ uv.lock | 2 +- 19 files changed, 465 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723a402..aef18d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## Version 0.6.0 + +### Support for per-node resources +- Number of CPU cores, number of GPUs, the amount of memory and the amount of storage can be now requested per-node using the submission options `ncpus-per-node`, `ngpus-per-node`, `mem-per-node`, and `work-size-per-node`. Per-node properties override per-cpu properties (`mem-per-cpu`, `work-size-per-cpu`) but are overriden by "total" properties (`ncpus`, `ngpus`, `mem`, `work-size`). + +*** + ## Version 0.5.1 - If no info file is detected when running `qq go`, `qq info`, `qq kill`, `qq sync`, and `qq wipe`, an error message is printed. (This fixes a regression in v0.5.0.) diff --git a/pyproject.toml b/pyproject.toml index a2a7a88..ec90217 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "qq" -version = "0.5.1" +version = "0.6.0-dev.1" description = "A friendly interface to batch processing" readme = "README.md" requires-python = ">=3.12" diff --git a/src/qq_lib/batch/pbs/pbs.py b/src/qq_lib/batch/pbs/pbs.py index 1999847..5f71963 100644 --- a/src/qq_lib/batch/pbs/pbs.py +++ b/src/qq_lib/batch/pbs/pbs.py @@ -586,21 +586,37 @@ def _translatePerChunkResources(cls, res: Resources) -> list[str]: # number of sockets; this does not mean that the run script has to use one MPI # process per CPU core, this value can be overriden trans_res.append(f"mpiprocs={res.ncpus // res.nnodes}") + elif res.ncpus_per_node: + trans_res.append(f"ncpus={res.ncpus_per_node}") + trans_res.append(f"mpiprocs={res.ncpus_per_node}") if res.mem: trans_res.append(f"mem={(res.mem // res.nnodes).toStrExact()}") - elif res.mem_per_cpu and res.ncpus: - trans_res.append( - f"mem={(res.mem_per_cpu * res.ncpus // res.nnodes).toStrExact()}" - ) + elif res.mem_per_node: + trans_res.append(f"mem={res.mem_per_node.toStrExact()}") + elif res.mem_per_cpu: + if res.ncpus: + trans_res.append( + f"mem={(res.mem_per_cpu * res.ncpus // res.nnodes).toStrExact()}" + ) + elif res.ncpus_per_node: + trans_res.append( + f"mem={(res.mem_per_cpu * res.ncpus_per_node).toStrExact()}" + ) + else: + raise QQError( + "Attribute 'mem-per-cpu' requires attributes 'ncpus' or 'ncpus-per-node' to be defined." + ) else: # memory not set in any way raise QQError( - "Attribute 'mem' or attributes 'mem-per-cpu' and 'ncpus' are not defined." + "None of the attributes 'mem', 'mem-per-node', or 'mem-per-cpu' is defined." ) if res.ngpus: trans_res.append(f"ngpus={res.ngpus // res.nnodes}") + elif res.ngpus_per_node: + trans_res.append(f"ngpus={res.ngpus_per_node}") # translate work-dir if workdir := cls._translateWorkDir(res): @@ -629,12 +645,20 @@ def _translateWorkDir(cls, res: Resources) -> str | None: if res.work_size: return f"{res.work_dir}={(res.work_size // res.nnodes).toStrExact()}" + if res.work_size_per_node: + return f"{res.work_dir}={res.work_size_per_node.toStrExact()}" + if res.work_size_per_cpu: + if res.ncpus: + return f"{res.work_dir}={(res.work_size_per_cpu * res.ncpus // res.nnodes).toStrExact()}" + if res.ncpus_per_node: + return f"{res.work_dir}={(res.work_size_per_cpu * res.ncpus_per_node).toStrExact()}" - if res.work_size_per_cpu and res.ncpus: - return f"{res.work_dir}={(res.work_size_per_cpu * res.ncpus // res.nnodes).toStrExact()}" + raise QQError( + "Attribute 'work-size-per-cpu' requires attributes 'ncpus' or 'ncpus-per-node' to be defined." + ) raise QQError( - "Attribute 'work-size' or attributes 'work-size-per-cpu' and 'ncpus' are not defined." + "None of the attributes 'work-size', 'work-size-per-node', or 'work-size-per-cpu' is defined." ) @classmethod diff --git a/src/qq_lib/batch/slurm/common.py b/src/qq_lib/batch/slurm/common.py index cd59178..3361cbf 100644 --- a/src/qq_lib/batch/slurm/common.py +++ b/src/qq_lib/batch/slurm/common.py @@ -52,6 +52,7 @@ def default_resources_from_dict(res: dict[str, str]) -> Resources: converter = { "DefMemPerCPU": "mem_per_cpu", + "DefMemPerNode": "mem_per_node", "DefaultTime": "walltime", } logger.debug(f"Raw dictionary for default resources: {res}.") @@ -66,7 +67,10 @@ def default_resources_from_dict(res: dict[str, str]) -> Resources: converted_key = converter.get(key, key) if converted_key in field_names: - if converted_key in {"mem_per_cpu", "mem"} and value.isnumeric(): + if ( + converted_key in {"mem_per_cpu", "mem_per_node", "mem"} + and value.isnumeric() + ): # default unit for Slurm sizes is MB value += "mb" if converted_key == "walltime": diff --git a/src/qq_lib/batch/slurm/slurm.py b/src/qq_lib/batch/slurm/slurm.py index ac4039a..09352fa 100644 --- a/src/qq_lib/batch/slurm/slurm.py +++ b/src/qq_lib/batch/slurm/slurm.py @@ -477,19 +477,26 @@ def _translatePerChunkResources(cls, res: Resources) -> list[str]: # this setup is here only to allow for better accounting by Slurm trans_res.append("--ntasks-per-node=1") trans_res.append(f"--cpus-per-task={res.ncpus // res.nnodes}") + elif res.ncpus_per_node: + trans_res.append("--ntasks-per-node=1") + trans_res.append(f"--cpus-per-task={res.ncpus_per_node}") if res.mem: trans_res.append(f"--mem={(res.mem // res.nnodes).toStrExactSlurm()}") + elif res.mem_per_node: + trans_res.append(f"--mem={res.mem_per_node.toStrExactSlurm()}") elif res.mem_per_cpu: trans_res.append(f"--mem-per-cpu={res.mem_per_cpu.toStrExactSlurm()}") else: # memory not set in any way raise QQError( - "Attribute 'mem' and attribute 'mem-per-cpu' are not defined." + "None of the attributes 'mem', 'mem-per-node', or 'mem-per-cpu' is defined." ) if res.ngpus: trans_res.append(f"--gpus-per-node={res.ngpus // res.nnodes}") + elif res.ngpus_per_node: + trans_res.append(f"--gpus-per-node={res.ngpus_per_node}") return trans_res diff --git a/src/qq_lib/batch/slurmit4i/slurm.py b/src/qq_lib/batch/slurmit4i/slurm.py index f633809..213d2a6 100644 --- a/src/qq_lib/batch/slurmit4i/slurm.py +++ b/src/qq_lib/batch/slurmit4i/slurm.py @@ -231,7 +231,7 @@ def resubmit(cls, **kwargs) -> None: def _getDefaultResources(cls) -> Resources: return Resources( nnodes=1, - ncpus=128, + ncpus_per_node=128, mem_per_cpu="1gb", work_dir="scratch", walltime="1d", diff --git a/src/qq_lib/batch/slurmlumi/slurm.py b/src/qq_lib/batch/slurmlumi/slurm.py index 190994e..8f9b0ff 100644 --- a/src/qq_lib/batch/slurmlumi/slurm.py +++ b/src/qq_lib/batch/slurmlumi/slurm.py @@ -99,7 +99,7 @@ def getScratchDir(cls, job_id: str) -> Path: def _getDefaultResources(cls) -> Resources: return Resources( nnodes=1, - ncpus=128, + ncpus_per_node=128, mem_per_cpu="500mb", work_dir="scratch", walltime="1d", diff --git a/src/qq_lib/properties/info.py b/src/qq_lib/properties/info.py index 2ef67cc..a0726d3 100644 --- a/src/qq_lib/properties/info.py +++ b/src/qq_lib/properties/info.py @@ -162,9 +162,7 @@ def fromFile(cls, file: Path, host: str | None = None) -> Self: except yaml.YAMLError as e: raise QQError(f"Could not parse the qq info file '{file}': {e}.") from e except TypeError as e: - raise QQError( - f"Mandatory information missing from the qq info file '{file}': {e}." - ) from e + raise QQError(f"Invalid qq info file '{file}': {e}.") from e def toFile(self, file: Path, host: str | None = None) -> None: """ diff --git a/src/qq_lib/properties/resources.py b/src/qq_lib/properties/resources.py index 37adecd..ebc2ed0 100644 --- a/src/qq_lib/properties/resources.py +++ b/src/qq_lib/properties/resources.py @@ -17,10 +17,14 @@ # dataclass decorator has to come before `@coupled_fields`! @dataclass(init=False) @coupled_fields( - # if mem is set, ignore mem_per_cpu - FieldCoupling("mem", "mem_per_cpu"), - # if work_size is set, ignore work_size_per_cpu - FieldCoupling("work_size", "work_size_per_cpu"), + # if mem is set, ignore other mem properties; if mem_per_node is set, ignore mem_per_cpu + FieldCoupling("mem", "mem_per_node", "mem_per_cpu"), + # if work_size is set, ignore other work_size properties; if work_size_per_node is set, ignore work_size_per_cpu + FieldCoupling("work_size", "work_size_per_node", "work_size_per_cpu"), + # if ncpus is set, ignore ncpus_per_node + FieldCoupling("ncpus", "ncpus_per_node"), + # if ngpus is set, ignore ngpus_per_node + FieldCoupling("ngpus", "ngpus_per_node"), ) class Resources(HasCouplingMethods): """ @@ -33,15 +37,24 @@ class Resources(HasCouplingMethods): # Number of CPU cores to use for the job ncpus: int | None = None + # Number of CPU cores to use per node + ncpus_per_node: int | None = None + # Absolute amount of memory to allocate for the job (overrides mem_per_cpu) mem: Size | None = None + # Amount of memory to allocate per node + mem_per_node: Size | None = None + # Amount of memory to allocate per CPU core mem_per_cpu: Size | None = None # Number of GPUs to use ngpus: int | None = None + # Number of GPUs to use per node + ngpus_per_node: int | None = None + # Maximum allowed runtime for the job walltime: str | None = None @@ -51,6 +64,9 @@ class Resources(HasCouplingMethods): # Absolute size of storage requested for the job (overrides work_size_per_cpu) work_size: Size | None = None + # Storage size requested per node + work_size_per_node: Size | None = None + # Storage size requested per CPU core work_size_per_cpu: Size | None = None @@ -61,19 +77,25 @@ def __init__( self, nnodes: int | str | None = None, ncpus: int | str | None = None, + ncpus_per_node: int | str | None = None, mem: Size | str | dict[str, object] | None = None, + mem_per_node: Size | str | dict[str, object] | None = None, mem_per_cpu: Size | str | dict[str, object] | None = None, ngpus: int | str | None = None, + ngpus_per_node: int | str | None = None, walltime: str | None = None, work_dir: str | None = None, work_size: Size | str | dict[str, object] | None = None, + work_size_per_node: Size | str | dict[str, object] | None = None, work_size_per_cpu: Size | str | dict[str, object] | None = None, props: dict[str, str] | str | None = None, ): # convert sizes mem = Resources._parseSize(mem) + mem_per_node = Resources._parseSize(mem_per_node) mem_per_cpu = Resources._parseSize(mem_per_cpu) work_size = Resources._parseSize(work_size) + work_size_per_node = Resources._parseSize(work_size_per_node) work_size_per_cpu = Resources._parseSize(work_size_per_cpu) # convert walltime @@ -84,23 +106,31 @@ def __init__( if isinstance(props, str): props = Resources._parseProps(props) - # convert nnodes, ncpus, and ngpus to integer + # convert nnodes, ncpus, and ngpus to integers if isinstance(nnodes, str): nnodes = int(nnodes) if isinstance(ncpus, str): ncpus = int(ncpus) + if isinstance(ncpus_per_node, str): + ncpus_per_node = int(ncpus_per_node) if isinstance(ngpus, str): ngpus = int(ngpus) + if isinstance(ngpus_per_node, str): + ngpus_per_node = int(ngpus_per_node) # set attributes self.nnodes = nnodes self.ncpus = ncpus + self.ncpus_per_node = ncpus_per_node self.mem = mem + self.mem_per_node = mem_per_node self.mem_per_cpu = mem_per_cpu self.ngpus = ngpus + self.ngpus_per_node = ngpus_per_node self.walltime = walltime self.work_dir = work_dir self.work_size = work_size + self.work_size_per_node = work_size_per_node self.work_size_per_cpu = work_size_per_cpu self.props = props diff --git a/src/qq_lib/qq.py b/src/qq_lib/qq.py index bafd9e5..5cbc275 100644 --- a/src/qq_lib/qq.py +++ b/src/qq_lib/qq.py @@ -22,7 +22,7 @@ from qq_lib.sync import sync from qq_lib.wipe import wipe -__version__ = "0.5.1" +__version__ = "0.6.0-dev.1" # support both --help and -h _CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} diff --git a/src/qq_lib/submit/cli.py b/src/qq_lib/submit/cli.py index 092250f..abcee96 100644 --- a/src/qq_lib/submit/cli.py +++ b/src/qq_lib/submit/cli.py @@ -95,11 +95,17 @@ default=None, help="Number of computing nodes to allocate for the job.", ) +@optgroup.option( + "--ncpus-per-node", + type=int, + default=None, + help="Number of CPU cores to allocate per one requested node.", +) @optgroup.option( "--ncpus", type=int, default=None, - help="Number of CPU cores to allocate for the job.", + help="Total number of CPU cores to allocate for the job. Overrides `--ncpus-per-node`.", ) @optgroup.option( "--mem-per-cpu", @@ -107,14 +113,30 @@ default=None, help="Memory to allocate per CPU core. Specify as 'Nmb' or 'Ngb' (e.g., 500mb or 2gb).", ) +@optgroup.option( + "--mem-per-node", + type=str, + default=None, + help="Memory to allocate per one requested node. Specify as 'Nmb' or 'Ngb' (e.g., 500mb or 32gb). Overrides `--mem-per-cpu`.", +) @optgroup.option( "--mem", type=str, default=None, - help="Total memory to allocate for the job. Specify as 'Nmb' or 'Ngb' (e.g., 500mb or 10gb). Overrides `--mem-per-cpu`.", + help="""Total memory to allocate for the job. Specify as 'Nmb' or 'Ngb' (e.g., 500mb or 64gb). +Overrides `--mem-per-cpu` and `--mem-per-node`.""", +) +@optgroup.option( + "--ngpus-per-node", + type=int, + default=None, + help="Number of GPUs to allocate per one requested node.", ) @optgroup.option( - "--ngpus", type=int, default=None, help="Number of GPUs to allocate for the job." + "--ngpus", + type=int, + default=None, + help="Total number of GPUs to allocate for the job. Overrides `--ngpus-per-node`.", ) @optgroup.option( "--walltime", @@ -136,12 +158,20 @@ default=None, help="Storage to allocate per CPU core. Specify as 'Ngb' (e.g., 1gb).", ) +@optgroup.option( + "--work-size-per-node", + "--worksize-per-node", + type=str, + default=None, + help="Storage to allocate per one requested node. Specify as 'Ngb' (e.g., 32gb). Overrides `--work-size-per-cpu`.", +) @optgroup.option( "--work-size", "--worksize", type=str, default=None, - help="Total storage to allocate for the job. Specify as 'Ngb' (e.g., 10gb). Overrides `--work-size-per-cpu`.", + help="""Total storage to allocate for the job. Specify as 'Ngb' (e.g., 64gb). +Overrides `--work-size-per-cpu` and `--work-size-per-node`.""", ) @optgroup.option( "--props", diff --git a/src/qq_lib/submit/submitter.py b/src/qq_lib/submit/submitter.py index c754cb5..c15d6b3 100644 --- a/src/qq_lib/submit/submitter.py +++ b/src/qq_lib/submit/submitter.py @@ -246,9 +246,22 @@ def _createEnvVarsDict(self) -> dict[str, str]: env_vars[CFG.env_vars.input_dir] = str(self._input_dir) # environment variables for resources - env_vars[CFG.env_vars.ncpus] = str(self._resources.ncpus or 1) - env_vars[CFG.env_vars.ngpus] = str(self._resources.ngpus or 0) - env_vars[CFG.env_vars.nnodes] = str(self._resources.nnodes or 1) + nnodes = self._resources.nnodes or 1 + if ncpus := self._resources.ncpus: + env_vars[CFG.env_vars.ncpus] = str(ncpus) + elif ncpus_per_node := self._resources.ncpus_per_node: + env_vars[CFG.env_vars.ncpus] = str(ncpus_per_node * nnodes) + else: + env_vars[CFG.env_vars.ncpus] = "1" + + if ngpus := self._resources.ngpus: + env_vars[CFG.env_vars.ngpus] = str(ngpus) + elif ngpus_per_node := self._resources.ngpus_per_node: + env_vars[CFG.env_vars.ngpus] = str(ngpus_per_node * nnodes) + else: + env_vars[CFG.env_vars.ngpus] = "0" + + env_vars[CFG.env_vars.nnodes] = str(nnodes) env_vars[CFG.env_vars.walltime] = str( hhmmss_to_duration(self._resources.walltime or "00:00:00").total_seconds() / 3600 diff --git a/tests/test_batch_pbs_pbs.py b/tests/test_batch_pbs_pbs.py index 9f86a57..4e488e2 100644 --- a/tests/test_batch_pbs_pbs.py +++ b/tests/test_batch_pbs_pbs.py @@ -572,6 +572,16 @@ def test_translate_submit_minimal_fields(): ) +def test_translate_submit_ncpus_ngpus_per_node(): + res = Resources( + nnodes=1, ncpus_per_node=1, ngpus_per_node=1, mem="1gb", work_dir="input_dir" + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l ncpus=1,mpiprocs=1,mem=1048576kb,ngpus=1 script.sh" + ) + + def test_translate_submit_with_env_vars(): res = Resources(nnodes=1, ncpus=1, mem="1gb", work_dir="input_dir") assert ( @@ -596,6 +606,16 @@ def test_translate_submit_multiple_nodes(): ) +def test_translate_submit_multiple_nodes_ncpus_and_ngpus_per_node(): + res = Resources( + nnodes=4, ncpus_per_node=8, ngpus_per_node=1, mem="1gb", work_dir="input_dir" + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l select=4:ncpus=8:mpiprocs=8:mem=262144kb:ngpus=1 -l place=vscatter script.sh" + ) + + def test_translate_submit_multiple_nodes_with_env_vars(): res = Resources(nnodes=4, ncpus=8, mem="1gb", work_dir="input_dir") assert ( @@ -668,6 +688,20 @@ def test_translate_submit_scratch_local_work_size(): ) +def test_translate_submit_scratch_local_work_size_per_node(): + res = Resources( + nnodes=2, + ncpus=2, + mem="4gb", + work_dir="scratch_local", + work_size_per_node="16gb", + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l select=2:ncpus=1:mpiprocs=1:mem=2097152kb:scratch_local=16777216kb -l place=vscatter script.sh" + ) + + def test_translate_submit_scratch_ssd_work_size(): res = Resources( nnodes=2, ncpus=2, mem="4gb", work_dir="scratch_ssd", work_size="16gb" @@ -698,6 +732,20 @@ def test_translate_submit_work_size_per_cpu(): ) +def test_translate_submit_work_size_per_cpu_with_cpus_per_node(): + res = Resources( + nnodes=1, + ncpus_per_node=8, + mem="4gb", + work_dir="scratch_local", + work_size_per_cpu="2gb", + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l ncpus=8,mpiprocs=8,mem=4194304kb,scratch_local=16777216kb script.sh" + ) + + def test_translate_submit_work_size_per_cpu_multiple_nodes(): res = Resources( nnodes=3, ncpus=3, mem="4gb", work_dir="scratch_local", work_size_per_cpu="2gb" @@ -718,6 +766,34 @@ def test_translate_submit_mem_per_cpu(): ) +def test_translate_submit_mem_per_cpu_with_ncpus_per_node(): + res = Resources( + nnodes=1, + ncpus_per_node=4, + mem_per_cpu="2gb", + work_dir="scratch_local", + work_size="10gb", + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l ncpus=4,mpiprocs=4,mem=8388608kb,scratch_local=10485760kb script.sh" + ) + + +def test_translate_submit_mem_per_node(): + res = Resources( + nnodes=1, + ncpus=4, + mem_per_node="8gb", + work_dir="scratch_local", + work_size="10gb", + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l ncpus=4,mpiprocs=4,mem=8388608kb,scratch_local=10485760kb script.sh" + ) + + def test_translate_submit_mem_per_cpu_multiple_nodes(): res = Resources( nnodes=2, ncpus=4, mem_per_cpu="2gb", work_dir="scratch_local", work_size="20gb" @@ -728,6 +804,20 @@ def test_translate_submit_mem_per_cpu_multiple_nodes(): ) +def test_translate_submit_mem_per_node_multiple_nodes(): + res = Resources( + nnodes=2, + ncpus=4, + mem_per_node="4gb", + work_dir="scratch_local", + work_size="20gb", + ) + assert ( + PBS._translateSubmit(res, "gpu", Path("tmp"), "script.sh", "job", [], {}) + == f"qsub -N job -q gpu -j eo -e tmp/job{CFG.suffixes.qq_out} -l select=2:ncpus=2:mpiprocs=2:mem=4194304kb:scratch_local=10485760kb -l place=vscatter script.sh" + ) + + def test_translate_submit_mem_per_cpu_and_work_size_per_cpu(): res = Resources( nnodes=1, diff --git a/tests/test_batch_slurm_common.py b/tests/test_batch_slurm_common.py index e0315f9..f891b5d 100644 --- a/tests/test_batch_slurm_common.py +++ b/tests/test_batch_slurm_common.py @@ -67,9 +67,19 @@ def test_default_resources_from_dict_def_mem_per_cpu_numeric(): assert result.mem_per_cpu == Size.fromString("4096mb") +def test_default_resources_from_dict_def_mem_per_node_numeric(): + input_dict = { + "DefMemPerNode": "4096", + } + result = default_resources_from_dict(input_dict) + assert isinstance(result, Resources) + assert result.mem_per_node == Size.fromString("4096mb") + + def test_default_resources_from_dict_ignores_unlimited_values(): input_dict = { "DefMemPerCPU": "UNLIMITED", + "DefMemPerNode": "UNLIMITED", "DefaultTime": "UNLIMITED", } result = default_resources_from_dict(input_dict) diff --git a/tests/test_batch_slurm_slurm.py b/tests/test_batch_slurm_slurm.py index c9521b4..99110c1 100644 --- a/tests/test_batch_slurm_slurm.py +++ b/tests/test_batch_slurm_slurm.py @@ -236,6 +236,19 @@ def test_slurm_translate_per_chunk_resources_two_nodes(): assert "--gpus-per-node=2" in result +def test_slurm_translate_per_chunk_resources_two_nodes_per_node_resources(): + res = Resources() + res.nnodes = 2 + res.ncpus_per_node = 8 + res.mem_per_node = Size(32, "gb") + res.ngpus_per_node = 4 + result = Slurm._translatePerChunkResources(res) + assert "--ntasks-per-node=1" in result + assert "--cpus-per-task=8" in result + assert f"--mem={res.mem_per_node.toStrExactSlurm()}" in result + assert "--gpus-per-node=4" in result + + def test_slurm_translate_per_chunk_resources_single_node(): res = Resources() res.nnodes = 1 @@ -249,6 +262,19 @@ def test_slurm_translate_per_chunk_resources_single_node(): assert "--gpus-per-node=2" in result +def test_slurm_translate_per_chunk_resources_single_node_per_node_resources(): + res = Resources() + res.nnodes = 1 + res.ncpus_per_node = 4 + res.mem_per_node = Size(16, "gb") + res.ngpus_per_node = 2 + result = Slurm._translatePerChunkResources(res) + assert "--ntasks-per-node=1" in result + assert "--cpus-per-task=4" in result + assert f"--mem={res.mem_per_node.toStrExactSlurm()}" in result + assert "--gpus-per-node=2" in result + + def test_slurm_translate_per_chunk_resources_multiple_nodes(): res = Resources() res.nnodes = 5 @@ -279,7 +305,8 @@ def test_slurm_translate_per_chunk_resources_raises_when_mem_missing(): res.mem = None res.mem_per_cpu = None with pytest.raises( - QQError, match="Attribute 'mem' and attribute 'mem-per-cpu' are not defined." + QQError, + match="None of the attributes 'mem', 'mem-per-node', or 'mem-per-cpu' is defined.", ): Slurm._translatePerChunkResources(res) @@ -364,6 +391,41 @@ def test_slurm_translate_submit_basic_command(): assert command.endswith(script) +def test_slurm_translate_submit_basic_command_with_per_node_properties(): + res = Resources() + res.nnodes = 2 + res.ncpus_per_node = 32 + res.mem_per_node = Size(32, "gb") + res.ngpus_per_node = 4 + res.props = {} + res.walltime = "2-00:00:00" + + queue = "gpu" + input_dir = Path("/tmp") + script = "run.sh" + job_name = "job1" + depend = [] + env_vars = {} + account = None + + command = Slurm._translateSubmit( + res, queue, input_dir, script, job_name, depend, env_vars, account + ) + + assert command.startswith("sbatch") + assert f"-J {job_name}" in command + assert f"-p {queue}" in command + assert f"-e {input_dir / (job_name + '.qqout')}" in command + assert f"-o {input_dir / (job_name + '.qqout')}" in command + assert f"--nodes {res.nnodes}" in command + assert "--ntasks-per-node=1" in command + assert f"--cpus-per-task={res.ncpus_per_node}" in command + assert f"--mem={res.mem_per_node.toStrExactSlurm()}" in command + assert f"--gpus-per-node={res.ngpus_per_node}" in command + assert f"--time={res.walltime}" in command + assert command.endswith(script) + + def test_slurm_translate_submit_with_account_and_env_vars(): res = Resources() res.nnodes = 1 diff --git a/tests/test_properties_info.py b/tests/test_properties_info.py index 7be9682..881d328 100644 --- a/tests/test_properties_info.py +++ b/tests/test_properties_info.py @@ -277,5 +277,5 @@ def test_from_file_missing_required_field(tmp_path): } file.write_text(yaml.dump(data)) - with pytest.raises(QQError, match=r"Mandatory information missing"): + with pytest.raises(QQError, match=r"Invalid qq info file"): Info.fromFile(file) diff --git a/tests/test_properties_resources.py b/tests/test_properties_resources.py index 9a218f1..ad0bc00 100644 --- a/tests/test_properties_resources.py +++ b/tests/test_properties_resources.py @@ -56,22 +56,81 @@ def test_init_converts_numeric_strings_to_integers(): assert res.ngpus == 4 +def test_init_mem_overrides_mem_per_node(): + res = Resources(mem_per_node="1gb", mem="4gb") + assert res.mem_per_cpu is None + assert res.mem_per_node is None + + assert res.mem is not None + assert res.mem.value == 4194304 + + def test_init_mem_overrides_mem_per_cpu(): res = Resources(mem_per_cpu="1gb", mem="4gb") assert res.mem_per_cpu is None + assert res.mem_per_node is None assert res.mem is not None assert res.mem.value == 4194304 +def test_init_mem_per_node_overrides_mem_per_cpu(): + res = Resources(mem_per_node="4gb", mem_per_cpu="1gb") + assert res.mem_per_cpu is None + assert res.mem is None + + assert res.mem_per_node is not None + assert res.mem_per_node.value == 4194304 + + +def test_init_mem_overrides_mem_per_node_and_mem_per_cpu(): + res = Resources(mem_per_node="2gb", mem_per_cpu="1gb", mem="4gb") + assert res.mem_per_cpu is None + assert res.mem_per_node is None + + assert res.mem is not None + assert res.mem.value == 4194304 + + +def test_init_worksize_overrides_work_size_per_node(): + res = Resources(work_size_per_node="2gb", work_size="4gb") + assert res.work_size_per_cpu is None + assert res.work_size_per_node is None + + assert res.work_size is not None + assert res.work_size.value == 4194304 + + def test_init_worksize_overrides_work_size_per_cpu(): res = Resources(work_size_per_cpu="1gb", work_size="4gb") assert res.work_size_per_cpu is None + assert res.work_size_per_node is None assert res.work_size is not None assert res.work_size.value == 4194304 +def test_init_worksize_per_node_overrides_work_size_per_cpu(): + res = Resources(work_size_per_node="4gb", work_size_per_cpu="1gb") + assert res.work_size_per_cpu is None + assert res.work_size is None + + assert res.work_size_per_node is not None + assert res.work_size_per_node.value == 4194304 + + +def test_init_ncpus_overrides_ncpus_per_node(): + res = Resources(ncpus_per_node=4, ncpus=8) + assert res.ncpus_per_node is None + assert res.ncpus == 8 + + +def test_init_ngpus_overrides_ngpus_per_node(): + res = Resources(ngpus_per_node=1, ngpus=4) + assert res.ngpus_per_node is None + assert res.ngpus == 4 + + def test_init_leaves_already_converted_types_unchanged(): res = Resources( nnodes=2, @@ -179,6 +238,17 @@ def test_merge_resources_mem_with_mem_per_cpu_precedence3(): assert merged.mem_per_cpu.value == 4194304 +def test_merge_resources_mem_with_mem_per_node_precedence(): + r1 = Resources(mem_per_node="16gb") + r2 = Resources(mem="32gb", mem_per_cpu="4gb") + r3 = Resources(mem="64gb") + merged = Resources.mergeResources(r1, r2, r3) + assert merged.mem_per_cpu is None + assert merged.mem is None + assert merged.mem_per_node is not None + assert merged.mem_per_node.value == 16777216 + + def test_merge_resources_mem_skipped_if_mem_per_cpu_seen_first(): r1 = Resources(mem_per_cpu="4gb") r2 = Resources(mem="32gb") @@ -220,6 +290,53 @@ def test_merge_resources_work_size_with_work_size_per_cpu_precedence3(): assert merged.work_size_per_cpu.value == 10485760 +def test_merge_resources_work_size_with_work_size_per_node_precedence(): + r1 = Resources(work_size_per_node="100gb") + r2 = Resources(work_size="400gb", work_size_per_cpu="10gb") + r3 = Resources(work_size="200gb") + merged = Resources.mergeResources(r1, r2, r3) + assert merged.work_size_per_cpu is None + assert merged.work_size is None + assert merged.work_size_per_node is not None + assert merged.work_size_per_node.value == 104857600 + + +def test_merge_resources_ncpus_with_ncpus_per_node_precedence(): + r1 = Resources(ncpus_per_node=64) + r2 = Resources(ncpus=128) + r3 = Resources(ncpus=32) + merged = Resources.mergeResources(r1, r2, r3) + assert merged.ncpus is None + assert merged.ncpus_per_node == 64 + + +def test_merge_resources_ncpus_with_ncpus_per_node_precedence2(): + r1 = Resources() + r2 = Resources(ncpus=128, ncpus_per_node=64) + r3 = Resources(ncpus=32) + merged = Resources.mergeResources(r1, r2, r3) + assert merged.ncpus == 128 + assert merged.ncpus_per_node is None + + +def test_merge_resources_ngpus_with_ngpus_per_node_precedence(): + r1 = Resources(ngpus_per_node=8) + r2 = Resources(ngpus=16) + r3 = Resources(ngpus=1) + merged = Resources.mergeResources(r1, r2, r3) + assert merged.ngpus is None + assert merged.ngpus_per_node == 8 + + +def test_merge_resources_ngpus_with_ngpus_per_node_precedence2(): + r1 = Resources() + r2 = Resources(ngpus=16, ngpus_per_node=8) + r3 = Resources(ngpus=1) + merged = Resources.mergeResources(r1, r2, r3) + assert merged.ngpus == 16 + assert merged.ngpus_per_node is None + + def test_merge_resources_work_size_skipped_if_work_size_per_cpu_seen_first(): r1 = Resources(work_size_per_cpu="10gb") r2 = Resources(work_size="200gb") diff --git a/tests/test_submit_submitter.py b/tests/test_submit_submitter.py index 5be1e65..a8f446d 100644 --- a/tests/test_submit_submitter.py +++ b/tests/test_submit_submitter.py @@ -253,6 +253,47 @@ def test_submitter_create_env_vars_dict_sets_all_required_variables( assert CFG.env_vars.debug_mode not in env +@pytest.mark.parametrize("debug_mode", [True, False]) +def test_submitter_create_env_vars_dict_sets_all_required_variables_with_per_node_properties( + tmp_path, debug_mode +): + script = tmp_path / "script.sh" + script.write_text("#!/usr/bin/env -S qq run\n") + + submitter = Submitter.__new__(Submitter) + submitter._info_file = tmp_path / "job.qqinfo" + submitter._batch_system = PBS + submitter._loop_info = None + submitter._input_dir = tmp_path + submitter._resources = Resources( + nnodes=2, ncpus_per_node=8, ngpus_per_node=2, walltime="1d" + ) + + if debug_mode: + with patch.dict(os.environ, {CFG.env_vars.debug_mode: "true"}): + env = submitter._createEnvVarsDict() + else: + env = submitter._createEnvVarsDict() + + assert env[CFG.env_vars.guard] == "true" + assert env[CFG.env_vars.info_file] == str(submitter._info_file) + assert env[CFG.env_vars.input_machine] == socket.gethostname() + assert env[CFG.env_vars.batch_system] == str(submitter._batch_system) + assert env[CFG.env_vars.input_dir] == str(submitter._input_dir) + assert env[CFG.env_vars.nnodes] == str(submitter._resources.nnodes) + assert env[CFG.env_vars.ncpus] == str( + submitter._resources.ncpus_per_node * submitter._resources.nnodes + ) + assert env[CFG.env_vars.ngpus] == str( + submitter._resources.ngpus_per_node * submitter._resources.nnodes + ) + assert env[CFG.env_vars.walltime] == "24.0" + if debug_mode: + assert env[CFG.env_vars.debug_mode] == "true" + else: + assert CFG.env_vars.debug_mode not in env + + @pytest.mark.parametrize("debug_mode", [True, False]) def test_submitter_create_env_vars_dict_sets_loop_variables(tmp_path, debug_mode): script = tmp_path / "script.sh" diff --git a/uv.lock b/uv.lock index 265c1ae..ca6dfbc 100644 --- a/uv.lock +++ b/uv.lock @@ -459,7 +459,7 @@ wheels = [ [[package]] name = "qq" -version = "0.5.1" +version = "0.6.0.dev1" source = { virtual = "." } dependencies = [ { name = "click" }, From 0c2145672d07ace195ec1c3284727f6f412be539 Mon Sep 17 00:00:00 2001 From: Ladme Date: Tue, 18 Nov 2025 20:55:28 +0100 Subject: [PATCH 03/27] Show available types of working directories in qq submit -h --- CHANGELOG.md | 3 +++ src/qq_lib/batch/interface/interface.py | 13 +++++++++++++ src/qq_lib/batch/pbs/pbs.py | 15 +++++++++------ src/qq_lib/batch/slurmit4i/slurm.py | 13 ++++++++++--- src/qq_lib/batch/slurmlumi/slurm.py | 7 +++++++ src/qq_lib/core/common.py | 21 +++++++++++++++++++++ src/qq_lib/submit/cli.py | 4 ++-- tests/test_batch_pbs_pbs.py | 12 ++++++++++++ tests/test_batch_slurmit4i_slurm.py | 11 +++++++++-- tests/test_batch_slurmlumi_slurm.py | 5 +++++ tests/test_core_common.py | 16 ++++++++++++++++ 11 files changed, 107 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aef18d1..c399f2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ### Support for per-node resources - Number of CPU cores, number of GPUs, the amount of memory and the amount of storage can be now requested per-node using the submission options `ncpus-per-node`, `ngpus-per-node`, `mem-per-node`, and `work-size-per-node`. Per-node properties override per-cpu properties (`mem-per-cpu`, `work-size-per-cpu`) but are overriden by "total" properties (`ncpus`, `ngpus`, `mem`, `work-size`). +### Bug fixes and minor improvements +- The available types of working directories for the current environment are now shown in the output of `qq submit -h`. + *** ## Version 0.5.1 diff --git a/src/qq_lib/batch/interface/interface.py b/src/qq_lib/batch/interface/interface.py index d7bee48..cded79f 100644 --- a/src/qq_lib/batch/interface/interface.py +++ b/src/qq_lib/batch/interface/interface.py @@ -266,6 +266,19 @@ def getNodes(cls) -> list[TBatchNode]: f"getNodes method is not implemented for {cls.__name__}" ) + @classmethod + def getSupportedWorkDirTypes(cls) -> list[str]: + """ + Retrieve the list of supported types of working directories + (i.e., strings that can be used with the `--work-dir` option). + + Returns: + list[str]: A list of supported types of working directories. + """ + raise NotImplementedError( + f"getSupportedWorkDirTypes method is not implemented for {cls.__name__}" + ) + @classmethod def navigateToDestination(cls, host: str, directory: Path) -> None: """ diff --git a/src/qq_lib/batch/pbs/pbs.py b/src/qq_lib/batch/pbs/pbs.py index 5f71963..dabe34b 100644 --- a/src/qq_lib/batch/pbs/pbs.py +++ b/src/qq_lib/batch/pbs/pbs.py @@ -221,6 +221,14 @@ def getNodes(cls) -> list[PBSNode]: return queues + @classmethod + def getSupportedWorkDirTypes(cls) -> list[str]: + return cls.SUPPORTED_SCRATCHES + [ + "scratch_shm", + "input_dir", + "job_dir", # same as input_dir + ] + @classmethod def readRemoteFile(cls, host: str, file: Path) -> str: if os.environ.get(CFG.env_vars.shared_submit): @@ -413,13 +421,8 @@ def transformResources(cls, queue: str, provided_resources: Resources) -> Resour return resources # unknown work-dir type - supported_types = cls.SUPPORTED_SCRATCHES + [ - "scratch_shm", - "job_dir", - "input_dir", # same as job_dir - ] raise QQError( - f"Unknown working directory type specified: work-dir='{resources.work_dir}'. Supported types for {cls.envName()} are: '{' '.join(supported_types)}'." + f"Unknown working directory type specified: work-dir='{resources.work_dir}'. Supported types for {cls.envName()} are: '{' '.join(cls.getSupportedWorkDirTypes())}'." ) @classmethod diff --git a/src/qq_lib/batch/slurmit4i/slurm.py b/src/qq_lib/batch/slurmit4i/slurm.py index 213d2a6..df82ad3 100644 --- a/src/qq_lib/batch/slurmit4i/slurm.py +++ b/src/qq_lib/batch/slurmit4i/slurm.py @@ -68,6 +68,13 @@ def getScratchDir(cls, job_id: str) -> Path: f"Could not create a scratch directory for job '{job_id}' after {CFG.it4i_scratch_dir_attempts} attempts: {last_exception}" ) from last_exception + @classmethod + def getSupportedWorkDirTypes(cls) -> list[str]: + return cls.SUPPORTED_SCRATCHES + [ + "input_dir", + "job_dir", # same as input_dir + ] + @classmethod def navigateToDestination(cls, host: str, directory: Path) -> None: logger.info( @@ -183,12 +190,12 @@ def transformResources(cls, queue: str, provided_resources: Resources) -> Resour "Setting work-size is not supported in this environment. Working directory has a virtually unlimited capacity." ) - supported_types = cls.SUPPORTED_SCRATCHES + ["input_dir", "job_dir"] if not any( - equals_normalized(resources.work_dir, dir) for dir in supported_types + equals_normalized(resources.work_dir, dir) + for dir in cls.getSupportedWorkDirTypes() ): raise QQError( - f"Unknown working directory type specified: work-dir='{resources.work_dir}'. Supported types for {cls.envName()} are: {' '.join(supported_types)}." + f"Unknown working directory type specified: work-dir='{resources.work_dir}'. Supported types for {cls.envName()} are: {' '.join(cls.getSupportedWorkDirTypes())}." ) return resources diff --git a/src/qq_lib/batch/slurmlumi/slurm.py b/src/qq_lib/batch/slurmlumi/slurm.py index 8f9b0ff..7a37fa8 100644 --- a/src/qq_lib/batch/slurmlumi/slurm.py +++ b/src/qq_lib/batch/slurmlumi/slurm.py @@ -95,6 +95,13 @@ def getScratchDir(cls, job_id: str) -> Path: f"Could not create a scratch directory for job '{job_id}' after {CFG.lumi_scratch_dir_attempts} attempts: {last_exception}" ) from last_exception + @classmethod + def getSupportedWorkDirTypes(cls) -> list[str]: + return cls.SUPPORTED_SCRATCHES + [ + "input_dir", + "job_dir", # same as input_dir + ] + @classmethod def _getDefaultResources(cls) -> Resources: return Resources( diff --git a/src/qq_lib/core/common.py b/src/qq_lib/core/common.py index cf1a44a..2033eba 100644 --- a/src/qq_lib/core/common.py +++ b/src/qq_lib/core/common.py @@ -696,3 +696,24 @@ def construct_info_file_path(input_dir: Path, job_name: str) -> Path: Path: The absolute path to the job's qq info file. """ return (input_dir / job_name).with_suffix(CFG.suffixes.qq_info).resolve() + + +def available_work_dirs() -> str: + """Return the supported work-directory types for the detected batch system. + + The batch system is determined using the `QQ_BATCH_SYSTEM` environment + variable or by automatic detection. The supported work-directory types are + returned as a comma-separated string formatted for display in help text. + + Returns: + str: A comma-separated list of supported work directory types, each + wrapped in quotes. + """ + from qq_lib.batch.interface.meta import BatchMeta + + try: + batch_system = BatchMeta.fromEnvVarOrGuess() + work_dirs = batch_system.getSupportedWorkDirTypes() + return ", ".join([f"'{work_dir_type}'" for work_dir_type in work_dirs]) + except QQError: + return "??? (no batch system detected)" diff --git a/src/qq_lib/submit/cli.py b/src/qq_lib/submit/cli.py index abcee96..2c868af 100644 --- a/src/qq_lib/submit/cli.py +++ b/src/qq_lib/submit/cli.py @@ -10,7 +10,7 @@ from click_option_group import optgroup from qq_lib.core.click_format import GNUHelpColorsCommand -from qq_lib.core.common import get_runtime_files +from qq_lib.core.common import available_work_dirs, get_runtime_files from qq_lib.core.config import CFG from qq_lib.core.error import QQError from qq_lib.core.logger import get_logger @@ -149,7 +149,7 @@ "--workdir", type=str, default=None, - help="Type of working directory to use for the job.", + help=f"Type of working directory to use for the job. Available types: {available_work_dirs()}.", ) @optgroup.option( "--work-size-per-cpu", diff --git a/tests/test_batch_pbs_pbs.py b/tests/test_batch_pbs_pbs.py index 4e488e2..dbea3f2 100644 --- a/tests/test_batch_pbs_pbs.py +++ b/tests/test_batch_pbs_pbs.py @@ -1449,3 +1449,15 @@ def test_pbs_delete_remote_dir_calls_super_for_remote_host(mock_super): PBS.deleteRemoteDir(host, directory) mock_super().deleteRemoteDir.assert_called_once_with(host, directory) + + +def test_pbs_get_supported_work_dir_types_returns_combined_list(): + expected = [ + "scratch_local", + "scratch_ssd", + "scratch_shared", + "scratch_shm", + "input_dir", + "job_dir", + ] + assert PBS.getSupportedWorkDirTypes() == expected diff --git a/tests/test_batch_slurmit4i_slurm.py b/tests/test_batch_slurmit4i_slurm.py index 53c69a7..d703383 100644 --- a/tests/test_batch_slurmit4i_slurm.py +++ b/tests/test_batch_slurmit4i_slurm.py @@ -372,7 +372,7 @@ def test_slurmit4i_get_scratch_dir_third_attempt_succeeds(mock_user): assert mkdir_mock.call_count == 3 -def test_slurm_delete_remote_dir_deletes_local(tmp_path): +def test_slurmit4i_delete_remote_dir_deletes_local(tmp_path): test_dir = tmp_path / "to_delete" test_dir.mkdir() (test_dir / "file.txt").write_text("content") @@ -385,7 +385,9 @@ def test_slurm_delete_remote_dir_deletes_local(tmp_path): assert not test_dir.exists() -def test_slurm_delete_remote_dir_raises_error_on_local_failure(tmp_path, monkeypatch): +def test_slurmit4i_delete_remote_dir_raises_error_on_local_failure( + tmp_path, monkeypatch +): test_dir = tmp_path / "to_delete_fail" test_dir.mkdir() @@ -398,3 +400,8 @@ def mock_rmtree(_): QQError, match=f"Could not delete directory '{test_dir}': access denied." ): SlurmIT4I.deleteRemoteDir("some_host", test_dir) + + +def test_slurmit4i_get_supported_work_dir_types_returns_combined_list(): + expected = ["scratch", "input_dir", "job_dir"] + assert SlurmIT4I.getSupportedWorkDirTypes() == expected diff --git a/tests/test_batch_slurmlumi_slurm.py b/tests/test_batch_slurmlumi_slurm.py index 55562a8..5edef4b 100644 --- a/tests/test_batch_slurmlumi_slurm.py +++ b/tests/test_batch_slurmlumi_slurm.py @@ -107,3 +107,8 @@ def test_slurmlumi_get_scratch_dir_third_attempt_succeeds(mock_user): mock_user.assert_called_once() assert mkdir_mock.call_count == 3 + + +def test_slurmlumi_get_supported_work_dir_types_returns_combined_list(): + expected = ["scratch", "flash", "input_dir", "job_dir"] + assert SlurmLumi.getSupportedWorkDirTypes() == expected diff --git a/tests/test_core_common.py b/tests/test_core_common.py index 4db07e4..f3f12ec 100644 --- a/tests/test_core_common.py +++ b/tests/test_core_common.py @@ -16,6 +16,7 @@ from qq_lib.batch.pbs import PBS, PBSJob from qq_lib.core.common import ( CFG, + available_work_dirs, construct_info_file_path, construct_loop_job_name, convert_absolute_to_relative, @@ -1062,3 +1063,18 @@ def test_construct_info_file_path_returns_expected_path(): result = construct_info_file_path(input_dir, job_name) assert result == expected + + +def test_available_work_dirs_returns_joined_list(): + mock_batch_system = MagicMock() + mock_batch_system.getSupportedWorkDirTypes.return_value = ["a", "b"] + + with patch.object(BatchMeta, "fromEnvVarOrGuess", return_value=mock_batch_system): + expected = "'a', 'b'" + + assert available_work_dirs() == expected + + +def test_available_work_dirs_returns_placeholder_on_error(): + with patch.object(BatchMeta, "fromEnvVarOrGuess", side_effect=QQError): + assert available_work_dirs() == "??? (no batch system detected)" From 9561e4177a57783c3523f2899a76393f2f03a9e1 Mon Sep 17 00:00:00 2001 From: Ladme Date: Tue, 18 Nov 2025 20:59:06 +0100 Subject: [PATCH 04/27] Better formatting for an unknown qq submit option error --- src/qq_lib/submit/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qq_lib/submit/parser.py b/src/qq_lib/submit/parser.py index 9f61be5..71ae3d7 100644 --- a/src/qq_lib/submit/parser.py +++ b/src/qq_lib/submit/parser.py @@ -100,7 +100,7 @@ def parse(self) -> None: self._options[snake_case_key] = value else: raise QQError( - f"Unknown qq submit option '{key}' in '{str(self._script)}': {line}.\nKnown options are '{' '.join(self._known_options)}'." + f"Unknown qq submit option '{key}' in '{str(self._script)}': {line.strip()}.\nKnown options are '{' '.join(self._known_options)}'." ) logger.debug(f"Parsed options from '{self._script}': {self._options}.") From 83bc04bae55125a9918ed510345debe386e4e056 Mon Sep 17 00:00:00 2001 From: Ladme Date: Tue, 18 Nov 2025 21:10:16 +0100 Subject: [PATCH 05/27] Better help for the submission option --walltime --- src/qq_lib/submit/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qq_lib/submit/cli.py b/src/qq_lib/submit/cli.py index 2c868af..0b5fe5c 100644 --- a/src/qq_lib/submit/cli.py +++ b/src/qq_lib/submit/cli.py @@ -142,7 +142,7 @@ "--walltime", type=str, default=None, - help="Maximum runtime allowed for the job.", + help="Maximum runtime allowed for the job. Examples: '1d', '12h', '10m', '24:00:00', '12:00:00', '00:10:00'.", ) @optgroup.option( "--work-dir", From 59840d21a1e6851fe8cb32b779d8ba5df288d81b Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 19 Nov 2025 09:44:19 +0100 Subject: [PATCH 06/27] missing size property correctly intepreted as zero size --- CHANGELOG.md | 1 + src/qq_lib/nodes/presenter.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c399f2d..5dfcefb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Bug fixes and minor improvements - The available types of working directories for the current environment are now shown in the output of `qq submit -h`. +- Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. *** diff --git a/src/qq_lib/nodes/presenter.py b/src/qq_lib/nodes/presenter.py index e49684d..9edec89 100644 --- a/src/qq_lib/nodes/presenter.py +++ b/src/qq_lib/nodes/presenter.py @@ -198,16 +198,16 @@ def _addNodeRow(self, node: BatchNodeInterface, table: Table) -> None: NodesPresenter._formatProcessingUnits(free_gpus, total_gpus, available) if self._show_gpus else None, - Text(str(node.getFreeGPUMemory()), style=style) + Text(str(node.getFreeGPUMemory() or Size(0, "kb")), style=style) if self._show_gpu_mem else None, - Text(str(node.getFreeLocalScratch()), style=style) + Text(str(node.getFreeLocalScratch() or Size(0, "kb")), style=style) if self._show_local else None, - Text(str(node.getFreeSSDScratch()), style=style) + Text(str(node.getFreeSSDScratch() or Size(0, "kb")), style=style) if self._show_ssd else None, - Text(str(node.getFreeSharedScratch()), style=style) + Text(str(node.getFreeSharedScratch() or Size(0, "kb")), style=style) if self._show_shared else None, NodesPresenter._formatNodeProperties( From 64f470099dcae7a08395c1f9676d3017a8affa11 Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 19 Nov 2025 10:21:56 +0100 Subject: [PATCH 07/27] Better default numbers of MPI ranks in run_scripts and specifying MPI ranks per node or per client --- scripts/run_scripts/qq_flex_md | 27 ++++++++++++++++----------- scripts/run_scripts/qq_flex_re | 27 ++++++++++++++++----------- scripts/run_scripts/qq_loop_md | 27 ++++++++++++++++----------- scripts/run_scripts/qq_loop_re | 27 ++++++++++++++++----------- 4 files changed, 64 insertions(+), 44 deletions(-) diff --git a/scripts/run_scripts/qq_flex_md b/scripts/run_scripts/qq_flex_md index 2ac7934..bab5e29 100755 --- a/scripts/run_scripts/qq_flex_md +++ b/scripts/run_scripts/qq_flex_md @@ -2,7 +2,7 @@ ######################################## # Script for running Gromacs # # flexible-length loop jobs using qq # -# script version: 0.8 # +# script version: 0.9 # # support: ladmeb@gmail.com # ######################################## @@ -41,7 +41,7 @@ PLUMED="" # maximum number of warnings; leave empty to determine automatically (gen-vel) MAXWARN="" -# number of MPI ranks to use; leave empty to determine automatically +# number of MPI ranks PER NODE to use; leave empty to determine automatically MPI="" # number of OpenMP threads per MPI rank to use; leave empty to determine automatically NTOMP="" @@ -63,32 +63,37 @@ module add gromacs:2021.4-plumed # Execution section # ######################################## -# set MPI ranks and OpenMP threads -# by default, use one MPI rank per node or per GPU -if [[ -z "${MPI}" ]]; then - MPI=$(( QQ_NGPUS > QQ_NNODES ? QQ_NGPUS : QQ_NNODES )) - echo "[QQ_FLEX_MD] INFO Setting the number of MPI ranks to ${MPI}." +# set MPI ranks +if [[ -z "${MPI}" ]]; then + # if no GPUs are used, use one MPI rank per CPU core + # otherwise, use one MPI rank per GPU + MPI=$(( QQ_NGPUS == 0 ? QQ_NCPUS : QQ_NGPUS )) +else + # convert MPI ranks per node to total number of MPI ranks + MPI=$(( MPI * QQ_NNODES )) fi if [[ "${MPI}" -eq 0 ]]; then echo "[QQ_FLEX_MD] ERROR The number of MPI ranks cannot be 0." >&2 exit 1 +else + echo "[QQ_FLEX_MD] INFO Setting the total number of MPI ranks to ${MPI}." fi -# by default, use the highest possible number of OpenMP threads +# set OpenMP threads if [[ -z "${NTOMP}" ]]; then NTOMP=$(( QQ_NCPUS / MPI )) - echo "[QQ_FLEX_MD] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi if [[ "${NTOMP}" -eq 0 ]]; then echo "[QQ_FLEX_MD] ERROR The number of OpenMP threads per MPI rank cannot be 0." >&2 exit 1 +else + echo "[QQ_FLEX_MD] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi -TOTAL_NTOMP=$(( NTOMP * MPI )) - # check for oversubscription +TOTAL_NTOMP=$(( NTOMP * MPI )) if [[ "${TOTAL_NTOMP}" -gt "${QQ_NCPUS}" ]]; then echo "[QQ_FLEX_MD] ERROR The total number of OpenMP threads (${TOTAL_NTOMP}) exceeds the number of allocated CPU cores (${QQ_NCPUS})." >&2 exit 1 diff --git a/scripts/run_scripts/qq_flex_re b/scripts/run_scripts/qq_flex_re index e21cdf1..d2bb5d4 100755 --- a/scripts/run_scripts/qq_flex_re +++ b/scripts/run_scripts/qq_flex_re @@ -3,7 +3,7 @@ # Script for running Gromacs # # flexible-length replica exchange # # using qq # -# script version: 0.3 # +# script version: 0.5 # # support: ladmeb@gmail.com # ######################################## @@ -53,7 +53,7 @@ HREX="true" # maximum number of warnings; leave empty to determine automatically (gen-vel) MAXWARN="" -# number of MPI ranks to use; leave empty to determine automatically +# number of MPI ranks PER CLIENT to use; leave empty to determine automatically MPI="" # number of OpenMP threads per MPI rank to use; leave empty to determine automatically NTOMP="" @@ -82,32 +82,37 @@ for CLIENT in "${CLIENTS[@]}"; do CLI_SUFFIXES+=( "${CLIENT#${CLIENTS_PATTERN}}" ) done -# set MPI ranks and OpenMP threads -# by default, use one MPI rank per client or GPU -if [[ -z "${MPI}" ]]; then - MPI=$(( QQ_NGPUS > N_CLIENTS ? QQ_NGPUS : N_CLIENTS )) - echo "[QQ_FLEX_RE] INFO Setting the number of MPI ranks to ${MPI}." +# set MPI ranks +if [[ -z "${MPI}" ]]; then + # if no GPUs are used, use one MPI rank per CPU core + # otherwise, use one MPI rank per GPU + MPI=$(( QQ_NGPUS == 0 ? QQ_NCPUS : QQ_NGPUS )) +else + # convert MPI ranks per client to total number of MPI ranks + MPI=$(( MPI * N_CLIENTS )) fi if [[ "${MPI}" -eq 0 ]]; then echo "[QQ_FLEX_RE] ERROR The number of MPI ranks cannot be 0." >&2 exit 1 +else + echo "[QQ_FLEX_RE] INFO Setting the total number of MPI ranks to ${MPI}." fi -# by default, use the highest possible number of OpenMP threads +# set OpenMP threads if [[ -z "${NTOMP}" ]]; then NTOMP=$(( QQ_NCPUS / MPI )) - echo "[QQ_FLEX_RE] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi if [[ "${NTOMP}" -eq 0 ]]; then echo "[QQ_FLEX_RE] ERROR The number of OpenMP threads per MPI rank cannot be 0." >&2 exit 1 +else + echo "[QQ_FLEX_RE] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi -TOTAL_NTOMP=$(( NTOMP * MPI )) - # check for oversubscription +TOTAL_NTOMP=$(( NTOMP * MPI )) if [[ "${TOTAL_NTOMP}" -gt "${QQ_NCPUS}" ]]; then echo "[QQ_FLEX_RE] ERROR The total number of OpenMP threads (${TOTAL_NTOMP}) exceeds the number of allocated CPU cores (${QQ_NCPUS})." >&2 exit 1 diff --git a/scripts/run_scripts/qq_loop_md b/scripts/run_scripts/qq_loop_md index d1fe7c4..1387931 100755 --- a/scripts/run_scripts/qq_loop_md +++ b/scripts/run_scripts/qq_loop_md @@ -2,7 +2,7 @@ ######################################## # Script for running Gromacs # # loop jobs using qq # -# script version: 0.8 # +# script version: 0.9 # # support: ladmeb@gmail.com # ######################################## @@ -41,7 +41,7 @@ PLUMED="" # maximum number of warnings; leave empty to determine automatically (gen-vel) MAXWARN="" -# number of MPI ranks to use; leave empty to determine automatically +# number of MPI ranks PER NODE to use; leave empty to determine automatically MPI="" # number of OpenMP threads per MPI rank to use; leave empty to determine automatically NTOMP="" @@ -60,32 +60,37 @@ module add gromacs:2021.4-plumed # Execution section # ######################################## -# set MPI ranks and OpenMP threads -# by default, use one MPI rank per node or per GPU -if [[ -z "${MPI}" ]]; then - MPI=$(( QQ_NGPUS > QQ_NNODES ? QQ_NGPUS : QQ_NNODES )) - echo "[QQ_LOOP_MD] INFO Setting the number of MPI ranks to ${MPI}." +# set MPI ranks +if [[ -z "${MPI}" ]]; then + # if no GPUs are used, use one MPI rank per CPU core + # otherwise, use one MPI rank per GPU + MPI=$(( QQ_NGPUS == 0 ? QQ_NCPUS : QQ_NGPUS )) +else + # convert MPI ranks per node to total number of MPI ranks + MPI=$(( MPI * QQ_NNODES )) fi if [[ "${MPI}" -eq 0 ]]; then echo "[QQ_LOOP_MD] ERROR The number of MPI ranks cannot be 0." >&2 exit 1 +else + echo "[QQ_LOOP_MD] INFO Setting the total number of MPI ranks to ${MPI}." fi -# by default, use the highest possible number of OpenMP threads +# set OpenMP threads if [[ -z "${NTOMP}" ]]; then NTOMP=$(( QQ_NCPUS / MPI )) - echo "[QQ_LOOP_MD] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi if [[ "${NTOMP}" -eq 0 ]]; then echo "[QQ_LOOP_MD] ERROR The number of OpenMP threads per MPI rank cannot be 0." >&2 exit 1 +else + echo "[QQ_LOOP_MD] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi -TOTAL_NTOMP=$(( NTOMP * MPI )) - # check for oversubscription +TOTAL_NTOMP=$(( NTOMP * MPI )) if [[ "${TOTAL_NTOMP}" -gt "${QQ_NCPUS}" ]]; then echo "[QQ_LOOP_MD] ERROR The total number of OpenMP threads (${TOTAL_NTOMP}) exceeds the number of allocated CPU cores (${QQ_NCPUS})." >&2 exit 1 diff --git a/scripts/run_scripts/qq_loop_re b/scripts/run_scripts/qq_loop_re index c0ac970..3449416 100755 --- a/scripts/run_scripts/qq_loop_re +++ b/scripts/run_scripts/qq_loop_re @@ -2,7 +2,7 @@ ######################################## # Script for running Gromacs # # multidir loop jobs using qq # -# script version: 0.2 # +# script version: 0.5 # # support: ladmeb@gmail.com # ######################################## @@ -52,7 +52,7 @@ HREX="true" # maximum number of warnings; leave empty to determine automatically (gen-vel) MAXWARN="" -# number of MPI ranks to use; leave empty to determine automatically +# number of MPI ranks PER CLIENT to use; leave empty to determine automatically MPI="" # number of OpenMP threads per MPI rank to use; leave empty to determine automatically NTOMP="" @@ -78,32 +78,37 @@ for CLIENT in "${CLIENTS[@]}"; do CLI_SUFFIXES+=( "${CLIENT#${CLIENTS_PATTERN}}" ) done -# set MPI ranks and OpenMP threads -# by default, use one MPI rank per client or GPU -if [[ -z "${MPI}" ]]; then - MPI=$(( QQ_NGPUS > N_CLIENTS ? QQ_NGPUS : N_CLIENTS )) - echo "[QQ_LOOP_RE] INFO Setting the number of MPI ranks to ${MPI}." +# set MPI ranks +if [[ -z "${MPI}" ]]; then + # if no GPUs are used, use one MPI rank per CPU core + # otherwise, use one MPI rank per GPU + MPI=$(( QQ_NGPUS == 0 ? QQ_NCPUS : QQ_NGPUS )) +else + # convert MPI ranks per client to total number of MPI ranks + MPI=$(( MPI * N_CLIENTS )) fi if [[ "${MPI}" -eq 0 ]]; then echo "[QQ_LOOP_RE] ERROR The number of MPI ranks cannot be 0." >&2 exit 1 +else + echo "[QQ_LOOP_RE] INFO Setting the total number of MPI ranks to ${MPI}." fi -# by default, use the highest possible number of OpenMP threads +# set OpenMP threads if [[ -z "${NTOMP}" ]]; then NTOMP=$(( QQ_NCPUS / MPI )) - echo "[QQ_LOOP_RE] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi if [[ "${NTOMP}" -eq 0 ]]; then echo "[QQ_LOOP_RE] ERROR The number of OpenMP threads per MPI rank cannot be 0." >&2 exit 1 +else + echo "[QQ_LOOP_RE] INFO Setting the number of OpenMP threads per MPI rank to ${NTOMP}." fi -TOTAL_NTOMP=$(( NTOMP * MPI )) - # check for oversubscription +TOTAL_NTOMP=$(( NTOMP * MPI )) if [[ "${TOTAL_NTOMP}" -gt "${QQ_NCPUS}" ]]; then echo "[QQ_LOOP_RE] ERROR The total number of OpenMP threads (${TOTAL_NTOMP}) exceeds the number of allocated CPU cores (${QQ_NCPUS})." >&2 exit 1 From 87e275e18aab4e315d1e7155cf89b4c3094e4bdd Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 19 Nov 2025 10:23:37 +0100 Subject: [PATCH 08/27] Changes to run scripts to changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dfcefb..c079bce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ### Support for per-node resources - Number of CPU cores, number of GPUs, the amount of memory and the amount of storage can be now requested per-node using the submission options `ncpus-per-node`, `ngpus-per-node`, `mem-per-node`, and `work-size-per-node`. Per-node properties override per-cpu properties (`mem-per-cpu`, `work-size-per-cpu`) but are overriden by "total" properties (`ncpus`, `ngpus`, `mem`, `work-size`). +### Changes in Gromacs run scripts +- The scripts now by default try to allocate the maximum possible number of MPI ranks. +- Numbers of MPI ranks are now specified per node (in `*_md` scripts) or per client (in `*_re` scripts). + ### Bug fixes and minor improvements - The available types of working directories for the current environment are now shown in the output of `qq submit -h`. - Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. From ec38ccd539c35f205311967c1553a3b2ecb8d175 Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 19 Nov 2025 13:47:54 +0100 Subject: [PATCH 09/27] Fixing exports --- src/qq_lib/__init__.py | 6 +++- src/qq_lib/archive/__init__.py | 6 ++-- src/qq_lib/batch/__init__.py | 12 +++---- src/qq_lib/batch/interface/__init__.py | 12 +++++-- src/qq_lib/batch/pbs/__init__.py | 9 +++-- src/qq_lib/batch/slurm/__init__.py | 9 +++-- src/qq_lib/batch/slurmit4i/__init__.py | 6 ++-- src/qq_lib/batch/slurmlumi/__init__.py | 6 ++-- src/qq_lib/cd/__init__.py | 7 ++-- src/qq_lib/clear/__init__.py | 7 ++-- src/qq_lib/core/__init__.py | 11 ------ src/qq_lib/core/navigator.py | 18 ++++++++++ src/qq_lib/go/__init__.py | 7 ++-- src/qq_lib/info/__init__.py | 5 ++- src/qq_lib/jobs/__init__.py | 5 ++- src/qq_lib/kill/__init__.py | 11 +++--- src/qq_lib/killall/__init__.py | 4 --- src/qq_lib/nodes/__init__.py | 5 ++- src/qq_lib/properties/__init__.py | 27 --------------- src/qq_lib/qq.py | 30 ++++++++--------- src/qq_lib/queues/__init__.py | 6 ++-- src/qq_lib/run/__init__.py | 46 +++----------------------- src/qq_lib/shebang/__init__.py | 4 --- src/qq_lib/stat/__init__.py | 4 --- src/qq_lib/submit/__init__.py | 7 ++-- src/qq_lib/sync/__init__.py | 7 ++-- src/qq_lib/wipe/__init__.py | 7 ++-- tests/test_jobs_cli.py | 2 +- tests/test_nodes_cli.py | 2 +- tests/test_queues_cli.py | 2 +- tests/test_stat_cli.py | 2 +- tests/test_submit_cli.py | 2 +- tests/test_submit_parser.py | 2 +- 33 files changed, 128 insertions(+), 168 deletions(-) diff --git a/src/qq_lib/__init__.py b/src/qq_lib/__init__.py index 011a0d6..dfe6bc0 100644 --- a/src/qq_lib/__init__.py +++ b/src/qq_lib/__init__.py @@ -1,6 +1,10 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 from .qq import __version__, cli + +__all__ = [ + "__version__", + "cli", +] diff --git a/src/qq_lib/archive/__init__.py b/src/qq_lib/archive/__init__.py index fb1594e..6414034 100644 --- a/src/qq_lib/archive/__init__.py +++ b/src/qq_lib/archive/__init__.py @@ -1,6 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa - from .archiver import Archiver + +__all__ = [ + "Archiver", +] diff --git a/src/qq_lib/batch/__init__.py b/src/qq_lib/batch/__init__.py index df82ddb..b9aa356 100644 --- a/src/qq_lib/batch/__init__.py +++ b/src/qq_lib/batch/__init__.py @@ -1,10 +1,10 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 +# import so that these batch systems are available but do not export them from here +from .pbs import PBS as _PBS +from .slurm import Slurm as _Slurm +from .slurmit4i import SlurmIT4I as _SlurmIT4I +from .slurmlumi import SlurmLumi as _SlurmLumi -from .interface import BatchInterface, BatchJobInterface, BatchMeta -from .pbs import PBS, PBSJob, PBSNode, PBSQueue -from .slurm import Slurm, SlurmJob, SlurmNode, SlurmQueue -from .slurmit4i import SlurmIT4I -from .slurmlumi import SlurmLumi +_PBS, _Slurm, _SlurmIT4I, _SlurmLumi diff --git a/src/qq_lib/batch/interface/__init__.py b/src/qq_lib/batch/interface/__init__.py index cb948c4..8a7df0f 100644 --- a/src/qq_lib/batch/interface/__init__.py +++ b/src/qq_lib/batch/interface/__init__.py @@ -1,8 +1,16 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .interface import BatchInterface from .job import BatchJobInterface from .meta import BatchMeta +from .node import BatchNodeInterface +from .queue import BatchQueueInterface + +__all__ = [ + "BatchInterface", + "BatchJobInterface", + "BatchMeta", + "BatchNodeInterface", + "BatchQueueInterface", +] diff --git a/src/qq_lib/batch/pbs/__init__.py b/src/qq_lib/batch/pbs/__init__.py index d5190e5..d782776 100644 --- a/src/qq_lib/batch/pbs/__init__.py +++ b/src/qq_lib/batch/pbs/__init__.py @@ -1,9 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .job import PBSJob from .node import PBSNode from .pbs import PBS from .queue import PBSQueue + +__all__ = [ + "PBSJob", + "PBSNode", + "PBS", + "PBSQueue", +] diff --git a/src/qq_lib/batch/slurm/__init__.py b/src/qq_lib/batch/slurm/__init__.py index b94c1f0..ade150c 100644 --- a/src/qq_lib/batch/slurm/__init__.py +++ b/src/qq_lib/batch/slurm/__init__.py @@ -1,9 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .job import SlurmJob from .node import SlurmNode from .queue import SlurmQueue from .slurm import Slurm + +__all__ = [ + "SlurmJob", + "SlurmNode", + "SlurmQueue", + "Slurm", +] diff --git a/src/qq_lib/batch/slurmit4i/__init__.py b/src/qq_lib/batch/slurmit4i/__init__.py index 6f2b25d..1e53368 100644 --- a/src/qq_lib/batch/slurmit4i/__init__.py +++ b/src/qq_lib/batch/slurmit4i/__init__.py @@ -1,6 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .slurm import SlurmIT4I + +__all__ = [ + "SlurmIT4I", +] diff --git a/src/qq_lib/batch/slurmlumi/__init__.py b/src/qq_lib/batch/slurmlumi/__init__.py index f824337..d60e432 100644 --- a/src/qq_lib/batch/slurmlumi/__init__.py +++ b/src/qq_lib/batch/slurmlumi/__init__.py @@ -1,6 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .slurm import SlurmLumi + +__all__ = [ + "SlurmLumi", +] diff --git a/src/qq_lib/cd/__init__.py b/src/qq_lib/cd/__init__.py index 17c11b5..86329f3 100644 --- a/src/qq_lib/cd/__init__.py +++ b/src/qq_lib/cd/__init__.py @@ -1,7 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .cder import Cder -from .cli import cd + +__all__ = [ + "Cder", +] diff --git a/src/qq_lib/clear/__init__.py b/src/qq_lib/clear/__init__.py index e3c294a..5854efd 100644 --- a/src/qq_lib/clear/__init__.py +++ b/src/qq_lib/clear/__init__.py @@ -1,7 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - from .clearer import Clearer -from .cli import clear + +__all__ = [ + "Clearer", +] diff --git a/src/qq_lib/core/__init__.py b/src/qq_lib/core/__init__.py index 793954c..f43a7d5 100644 --- a/src/qq_lib/core/__init__.py +++ b/src/qq_lib/core/__init__.py @@ -1,14 +1,3 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .common import ( - get_info_file, - get_info_file_from_job_id, - get_info_files, - get_info_files_from_job_id_or_dir, -) -from .error import QQError -from .repeater import Repeater -from .retryer import Retryer diff --git a/src/qq_lib/core/navigator.py b/src/qq_lib/core/navigator.py index 0813732..e5428ef 100644 --- a/src/qq_lib/core/navigator.py +++ b/src/qq_lib/core/navigator.py @@ -73,6 +73,24 @@ def hasDestination(self) -> bool: """ return self._work_dir is not None and self._main_node is not None + def getMainNode(self) -> str | None: + """ + Get the hostname of the main node where the job is running. + + Returns: + str | None: Hostname of the main node or None if undefined. + """ + return self._main_node + + def getWorkDir(self) -> Path | None: + """ + Get the absolute path to the working directory of the job. + + Returns: + Path | None: Absolute path to the working directory or None if undefined. + """ + return self._work_dir + def _setDestination(self) -> None: """ Get the job's host and working directory from the wrapped informer. diff --git a/src/qq_lib/go/__init__.py b/src/qq_lib/go/__init__.py index fbec5d6..16e11ee 100644 --- a/src/qq_lib/go/__init__.py +++ b/src/qq_lib/go/__init__.py @@ -1,7 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .cli import go from .goer import Goer + +__all__ = [ + "Goer", +] diff --git a/src/qq_lib/info/__init__.py b/src/qq_lib/info/__init__.py index 2c3acec..2a686c3 100644 --- a/src/qq_lib/info/__init__.py +++ b/src/qq_lib/info/__init__.py @@ -1,8 +1,7 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .cli import info from .informer import Informer from .presenter import Presenter + +__all__ = ["Informer", "Presenter"] diff --git a/src/qq_lib/jobs/__init__.py b/src/qq_lib/jobs/__init__.py index a31c6c5..da73028 100644 --- a/src/qq_lib/jobs/__init__.py +++ b/src/qq_lib/jobs/__init__.py @@ -1,7 +1,6 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .cli import jobs from .presenter import JobsPresenter, JobsStatistics + +__all__ = ["JobsPresenter", "JobsStatistics"] diff --git a/src/qq_lib/kill/__init__.py b/src/qq_lib/kill/__init__.py index 3fe4fb4..a1a12ee 100644 --- a/src/qq_lib/kill/__init__.py +++ b/src/qq_lib/kill/__init__.py @@ -1,13 +1,12 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - """ -Module for terminating qq jobs submitted from the current directory. - -Read the documentation of the `kill` function for more details. +Module for terminating qq jobs. """ -from .cli import kill from .killer import Killer + +__all__ = [ + "Killer", +] diff --git a/src/qq_lib/killall/__init__.py b/src/qq_lib/killall/__init__.py index b1ba320..fbb51e0 100644 --- a/src/qq_lib/killall/__init__.py +++ b/src/qq_lib/killall/__init__.py @@ -1,6 +1,2 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab - -# ruff: noqa: F401 - -from .cli import killall diff --git a/src/qq_lib/nodes/__init__.py b/src/qq_lib/nodes/__init__.py index f7a1a37..b78a94a 100644 --- a/src/qq_lib/nodes/__init__.py +++ b/src/qq_lib/nodes/__init__.py @@ -1,7 +1,6 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 +from .presenter import NodeGroup, NodeGroupStats, NodesPresenter -from .cli import nodes -from .presenter import NodesPresenter +__all__ = ["NodeGroup", "NodeGroupStats", "NodesPresenter"] diff --git a/src/qq_lib/properties/__init__.py b/src/qq_lib/properties/__init__.py index f24a037..f43a7d5 100644 --- a/src/qq_lib/properties/__init__.py +++ b/src/qq_lib/properties/__init__.py @@ -1,30 +1,3 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -__all__ = [ - "Info", - "JobType", - "LoopInfo", - "Resources", - "Size", - "BatchState", - "NaiveState", - "RealState", -] - -_MODULES = { - "Info": "info", - "JobType": "job_type", - "LoopInfo": "loop", - "Resources": "resources", - "Size": "size", - "BatchState": "states", - "NaiveState": "states", - "RealState": "states", -} - - -def __getattr__(name: str): - if name in _MODULES: - return getattr(__import__(f".{_MODULES[name]}", fromlist=[name], level=1), name) - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/src/qq_lib/qq.py b/src/qq_lib/qq.py index 5cbc275..40b47cd 100644 --- a/src/qq_lib/qq.py +++ b/src/qq_lib/qq.py @@ -6,21 +6,21 @@ import click from click_help_colors import HelpColorsGroup -from qq_lib.cd import cd -from qq_lib.clear import clear -from qq_lib.go import go -from qq_lib.info import info -from qq_lib.jobs import jobs -from qq_lib.kill import kill -from qq_lib.killall import killall -from qq_lib.nodes import nodes -from qq_lib.queues import queues -from qq_lib.run import run -from qq_lib.shebang import shebang -from qq_lib.stat import stat -from qq_lib.submit import submit -from qq_lib.sync import sync -from qq_lib.wipe import wipe +from qq_lib.cd.cli import cd +from qq_lib.clear.cli import clear +from qq_lib.go.cli import go +from qq_lib.info.cli import info +from qq_lib.jobs.cli import jobs +from qq_lib.kill.cli import kill +from qq_lib.killall.cli import killall +from qq_lib.nodes.cli import nodes +from qq_lib.queues.cli import queues +from qq_lib.run.cli import run +from qq_lib.shebang.cli import shebang +from qq_lib.stat.cli import stat +from qq_lib.submit.cli import submit +from qq_lib.sync.cli import sync +from qq_lib.wipe.cli import wipe __version__ = "0.6.0-dev.1" diff --git a/src/qq_lib/queues/__init__.py b/src/qq_lib/queues/__init__.py index c71f29a..f9b6b48 100644 --- a/src/qq_lib/queues/__init__.py +++ b/src/qq_lib/queues/__init__.py @@ -1,6 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 +from .presenter import QueuesPresenter -from .cli import queues +__all__ = [ + "QueuesPresenter", +] diff --git a/src/qq_lib/run/__init__.py b/src/qq_lib/run/__init__.py index 2bc3015..b016ea5 100644 --- a/src/qq_lib/run/__init__.py +++ b/src/qq_lib/run/__init__.py @@ -1,46 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -""" -This module defines the `Runner` class and related helpers that manage the -execution of qq jobs within a batch system. It is invoked internally through -the `qq run` command, which is hidden from the user-facing CLI. - -Lifecycle of a qq job: - 1. Working directory preparation - - Shared storage jobs: The working directory is set to the job - submission directory itself. - - Scratch-using jobs: A dedicated scratch directory (created by the - batch system) is used as a working directory. Job files are copied - to a specific directory inside the working directory. - - 2. Execution - The qq info file is updated to record the "running" state. - The job script is executed. - - 3. Finalization - - On success: - - The qq info file is updated to "finished". - - If running on scratch, job files are copied back to the submission - (job) directory and then removed from scratch. - - On failure: - - The qq info file is updated to "failed". - - If on scratch, files are left in place for debugging. - - X. Cleanup (on interruption) - If the process receives a SIGTERM, the runner updates the qq info file - to "killed", attempts to gracefully terminate the subprocess, and forces - termination with SIGKILL if necessary. - -Summary: - - Shared-storage jobs execute directly in the job directory, with no - file copying. - - Scratch-using jobs copy job files to scratch, execute there, and then - either copy results back (on success) or leave scratch data intact (on - failure). -""" - -from .cli import run from .runner import Runner + +__all__ = [ + "Runner", +] diff --git a/src/qq_lib/shebang/__init__.py b/src/qq_lib/shebang/__init__.py index 5565403..fbb51e0 100644 --- a/src/qq_lib/shebang/__init__.py +++ b/src/qq_lib/shebang/__init__.py @@ -1,6 +1,2 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab - -# ruff: noqa: F401 - -from .cli import shebang diff --git a/src/qq_lib/stat/__init__.py b/src/qq_lib/stat/__init__.py index 7902830..fbb51e0 100644 --- a/src/qq_lib/stat/__init__.py +++ b/src/qq_lib/stat/__init__.py @@ -1,6 +1,2 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab - -# ruff: noqa: F401 - -from .cli import stat diff --git a/src/qq_lib/submit/__init__.py b/src/qq_lib/submit/__init__.py index c484662..1471816 100644 --- a/src/qq_lib/submit/__init__.py +++ b/src/qq_lib/submit/__init__.py @@ -1,13 +1,12 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - """ -This module manages submission of qq jobs using the Submitter class. +This module manages submission of qq jobs to the batch system. """ -from .cli import submit from .factory import SubmitterFactory from .parser import Parser from .submitter import Submitter + +__all__ = ["SubmitterFactory", "Parser", "Submitter"] diff --git a/src/qq_lib/sync/__init__.py b/src/qq_lib/sync/__init__.py index 0b9bbbb..00afdc2 100644 --- a/src/qq_lib/sync/__init__.py +++ b/src/qq_lib/sync/__init__.py @@ -1,7 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .cli import sync from .syncer import Syncer + +__all__ = [ + "Syncer", +] diff --git a/src/qq_lib/wipe/__init__.py b/src/qq_lib/wipe/__init__.py index e54e542..6ebaf4d 100644 --- a/src/qq_lib/wipe/__init__.py +++ b/src/qq_lib/wipe/__init__.py @@ -1,7 +1,8 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -# ruff: noqa: F401 - -from .cli import wipe from .wiper import Wiper + +__all__ = [ + "Wiper", +] diff --git a/tests/test_jobs_cli.py b/tests/test_jobs_cli.py index cafa2db..4accb95 100644 --- a/tests/test_jobs_cli.py +++ b/tests/test_jobs_cli.py @@ -10,7 +10,7 @@ from qq_lib.batch.interface import BatchMeta from qq_lib.batch.pbs import PBS, PBSJob from qq_lib.batch.pbs.common import parse_multi_pbs_dump_to_dictionaries -from qq_lib.jobs import jobs +from qq_lib.jobs.cli import jobs from qq_lib.jobs.presenter import JobsPresenter diff --git a/tests/test_nodes_cli.py b/tests/test_nodes_cli.py index 2e3c311..6d39dfa 100644 --- a/tests/test_nodes_cli.py +++ b/tests/test_nodes_cli.py @@ -7,7 +7,7 @@ from qq_lib.core.config import CFG from qq_lib.core.error import QQError -from qq_lib.nodes import nodes +from qq_lib.nodes.cli import nodes def test_nodes_command_prints_available_nodes(): diff --git a/tests/test_queues_cli.py b/tests/test_queues_cli.py index 51c8a23..9c5cc20 100644 --- a/tests/test_queues_cli.py +++ b/tests/test_queues_cli.py @@ -7,7 +7,7 @@ from qq_lib.core.config import CFG from qq_lib.core.error import QQError -from qq_lib.queues import queues +from qq_lib.queues.cli import queues def test_queues_command_prints_available_queues(): diff --git a/tests/test_stat_cli.py b/tests/test_stat_cli.py index aa36c2d..f999667 100644 --- a/tests/test_stat_cli.py +++ b/tests/test_stat_cli.py @@ -11,7 +11,7 @@ from qq_lib.batch.pbs import PBS, PBSJob from qq_lib.batch.pbs.common import parse_multi_pbs_dump_to_dictionaries from qq_lib.jobs.presenter import JobsPresenter -from qq_lib.stat import stat +from qq_lib.stat.cli import stat @pytest.fixture diff --git a/tests/test_submit_cli.py b/tests/test_submit_cli.py index 5a99767..1fb5cfd 100644 --- a/tests/test_submit_cli.py +++ b/tests/test_submit_cli.py @@ -6,7 +6,7 @@ from click.testing import CliRunner from qq_lib.core.config import CFG -from qq_lib.submit import submit +from qq_lib.submit.cli import submit def test_submit_successful(tmp_path): diff --git a/tests/test_submit_parser.py b/tests/test_submit_parser.py index 07d6afb..d1d7392 100644 --- a/tests/test_submit_parser.py +++ b/tests/test_submit_parser.py @@ -17,7 +17,7 @@ from qq_lib.properties.job_type import JobType from qq_lib.properties.resources import Resources from qq_lib.properties.size import Size -from qq_lib.submit import submit +from qq_lib.submit.cli import submit from qq_lib.submit.parser import Parser # ruff: noqa: W293 From ca66e5938b3e508443478500d63500070218a6a8 Mon Sep 17 00:00:00 2001 From: Ladme Date: Thu, 20 Nov 2025 10:02:03 +0100 Subject: [PATCH 10/27] Renaming methods --- CHANGELOG.md | 4 ++++ src/qq_lib/kill/cli.py | 2 +- src/qq_lib/kill/killer.py | 2 +- src/qq_lib/wipe/cli.py | 2 +- src/qq_lib/wipe/wiper.py | 2 +- tests/test_kill_cli.py | 10 +++++----- tests/test_kill_killer.py | 12 ++++++------ tests/test_wipe_cli.py | 12 ++++++------ tests/test_wipe_wiper.py | 6 +++--- 9 files changed, 28 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c079bce..23e32a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ - The available types of working directories for the current environment are now shown in the output of `qq submit -h`. - Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. +### Internal changes +- `Wiper.delete` method has been renamed to `Wiper.wipe`. +- `Killer.terminate` method has been renamed to `Killer.kill`. + *** ## Version 0.5.1 diff --git a/src/qq_lib/kill/cli.py b/src/qq_lib/kill/cli.py index a990d52..22a589a 100644 --- a/src/qq_lib/kill/cli.py +++ b/src/qq_lib/kill/cli.py @@ -135,7 +135,7 @@ def kill_job(informer: Informer, force: bool, yes: bool) -> None: killer.ensureSuitable() if force or yes or yes_or_no_prompt("Do you want to kill the job?"): - job_id = killer.terminate(force) + job_id = killer.kill(force) logger.info(f"Killed the job '{job_id}'.") else: logger.info("Operation aborted.") diff --git a/src/qq_lib/kill/killer.py b/src/qq_lib/kill/killer.py index 515f79e..e7bb34c 100644 --- a/src/qq_lib/kill/killer.py +++ b/src/qq_lib/kill/killer.py @@ -41,7 +41,7 @@ def ensureSuitable(self) -> None: "Job cannot be terminated. Job is in an exiting state." ) - def terminate(self, force: bool = False) -> str: + def kill(self, force: bool = False) -> str: """ Execute the kill command for the job using the batch system. diff --git a/src/qq_lib/wipe/cli.py b/src/qq_lib/wipe/cli.py index 220369d..86b3b3e 100644 --- a/src/qq_lib/wipe/cli.py +++ b/src/qq_lib/wipe/cli.py @@ -124,7 +124,7 @@ def _wipe_work_dir(informer: Informer, force: bool, yes: bool) -> None: or yes or yes_or_no_prompt("Do you want to delete the job's working directory?") ): - job_id = wiper.delete() + job_id = wiper.wipe() logger.info(f"Deleted the working directory of the job '{job_id}'.") else: logger.info("Operation aborted.") diff --git a/src/qq_lib/wipe/wiper.py b/src/qq_lib/wipe/wiper.py index f084311..30a4a5f 100644 --- a/src/qq_lib/wipe/wiper.py +++ b/src/qq_lib/wipe/wiper.py @@ -50,7 +50,7 @@ def ensureSuitable(self) -> None: "Working directory of the job is the input directory of the job. Cannot delete the input directory." ) - def delete(self) -> str: + def wipe(self) -> str: """ Delete the working directory on the computing node. diff --git a/tests/test_kill_cli.py b/tests/test_kill_cli.py index f2b20ea..0a747bb 100644 --- a/tests/test_kill_cli.py +++ b/tests/test_kill_cli.py @@ -19,13 +19,13 @@ def test_kill_job_force_skips_suitability_and_logs_killed(): patch("qq_lib.kill.cli.console"), ): mock_killer = MagicMock() - mock_killer.terminate.return_value = "1234" + mock_killer.kill.return_value = "1234" mock_killer_ctor.return_value = mock_killer kill_job(MagicMock(), force=True, yes=False) mock_killer.ensureSuitable.assert_not_called() - mock_killer.terminate.assert_called_once_with(True) + mock_killer.kill.assert_called_once_with(True) mock_prompt.assert_not_called() mock_logger.assert_called_once_with("Killed the job '1234'.") @@ -38,13 +38,13 @@ def test_kill_job_prompts_yes_and_kills(): patch("qq_lib.kill.cli.yes_or_no_prompt", return_value=True), ): mock_killer = MagicMock() - mock_killer.terminate.return_value = "5678" + mock_killer.kill.return_value = "5678" mock_killer_ctor.return_value = mock_killer kill_job(MagicMock(), force=False, yes=False) mock_killer.ensureSuitable.assert_called_once() - mock_killer.terminate.assert_called_once_with(False) + mock_killer.kill.assert_called_once_with(False) mock_logger.assert_called_once_with("Killed the job '5678'.") @@ -60,7 +60,7 @@ def test_kill_job_prompts_no_and_aborts(): kill_job(MagicMock(), force=False, yes=False) - mock_killer.terminate.assert_not_called() + mock_killer.kill.assert_not_called() mock_logger.assert_called_once_with("Operation aborted.") diff --git a/tests/test_kill_killer.py b/tests/test_kill_killer.py index e39d859..8842b63 100644 --- a/tests/test_kill_killer.py +++ b/tests/test_kill_killer.py @@ -194,7 +194,7 @@ def test_killer_should_update_info_file_all_combinations_manual( assert killer._shouldUpdateInfoFile(force) is expected -def test_killer_terminate_normal_updates_info_file(): +def test_killer_kill_normal_updates_info_file(): killer = Killer.__new__(Killer) killer._shouldUpdateInfoFile = MagicMock(return_value=True) killer._updateInfoFile = MagicMock() @@ -202,7 +202,7 @@ def test_killer_terminate_normal_updates_info_file(): killer._informer = MagicMock() killer._informer.info.job_id = "1234" - job_id = killer.terminate(force=False) + job_id = killer.kill(force=False) assert job_id == "1234" killer._shouldUpdateInfoFile.assert_called_once_with(False) @@ -211,7 +211,7 @@ def test_killer_terminate_normal_updates_info_file(): killer._updateInfoFile.assert_called_once() -def test_killer_terminate_force_updates_info_file(): +def test_killer_kill_force_updates_info_file(): killer = Killer.__new__(Killer) killer._shouldUpdateInfoFile = MagicMock(return_value=True) killer._updateInfoFile = MagicMock() @@ -219,7 +219,7 @@ def test_killer_terminate_force_updates_info_file(): killer._informer = MagicMock() killer._informer.info.job_id = "5678" - job_id = killer.terminate(force=True) + job_id = killer.kill(force=True) assert job_id == "5678" killer._shouldUpdateInfoFile.assert_called_once_with(True) @@ -228,7 +228,7 @@ def test_killer_terminate_force_updates_info_file(): killer._updateInfoFile.assert_called_once() -def test_killer_terminate_does_not_update_info_file(): +def test_killer_kill_does_not_update_info_file(): killer = Killer.__new__(Killer) killer._shouldUpdateInfoFile = MagicMock(return_value=False) killer._updateInfoFile = MagicMock() @@ -236,7 +236,7 @@ def test_killer_terminate_does_not_update_info_file(): killer._informer = MagicMock() killer._informer.info.job_id = "91011" - job_id = killer.terminate(force=False) + job_id = killer.kill(force=False) assert job_id == "91011" killer._shouldUpdateInfoFile.assert_called_once_with(False) diff --git a/tests/test_wipe_cli.py b/tests/test_wipe_cli.py index 8d70ff7..f606343 100644 --- a/tests/test_wipe_cli.py +++ b/tests/test_wipe_cli.py @@ -16,14 +16,14 @@ @patch("qq_lib.wipe.cli.Wiper.fromInformer") def test_wipe_work_dir_success_with_force(mock_wiper_from_informer, mock_logger_info): mock_wiper = MagicMock() - mock_wiper.delete.return_value = "job123" + mock_wiper.wipe.return_value = "job123" mock_wiper_from_informer.return_value = mock_wiper informer = MagicMock() _wipe_work_dir(informer, force=True, yes=False) mock_wiper.ensureSuitable.assert_not_called() - mock_wiper.delete.assert_called_once() + mock_wiper.wipe.assert_called_once() mock_logger_info.assert_called_with( "Deleted the working directory of the job 'job123'." ) @@ -36,14 +36,14 @@ def test_wipe_work_dir_success_with_prompt( mock_prompt, mock_wiper_from_informer, mock_logger_info ): mock_wiper = MagicMock() - mock_wiper.delete.return_value = "jobXYZ" + mock_wiper.wipe.return_value = "jobXYZ" mock_wiper_from_informer.return_value = mock_wiper informer = MagicMock() _wipe_work_dir(informer, force=False, yes=False) mock_wiper.ensureSuitable.assert_called_once() - mock_wiper.delete.assert_called_once() + mock_wiper.wipe.assert_called_once() mock_prompt.assert_called_once() mock_logger_info.assert_called_with( "Deleted the working directory of the job 'jobXYZ'." @@ -63,7 +63,7 @@ def test_wipe_work_dir_aborts_on_negative_prompt( _wipe_work_dir(informer, force=False, yes=False) mock_wiper.ensureSuitable.assert_called_once() - mock_wiper.delete.assert_not_called() + mock_wiper.wipe.assert_not_called() mock_prompt.assert_called_once() mock_logger_info.assert_called_with("Operation aborted.") @@ -82,7 +82,7 @@ def test_wipe_work_dir_raises_not_suitable_error(mock_wiper_from_informer): @patch("qq_lib.wipe.cli.Wiper.fromInformer") def test_wipe_work_dir_raises_general_error(mock_wiper_from_informer): mock_wiper = MagicMock() - mock_wiper.delete.side_effect = QQError("Cannot delete working directory") + mock_wiper.wipe.side_effect = QQError("Cannot delete working directory") mock_wiper_from_informer.return_value = mock_wiper informer = MagicMock() diff --git a/tests/test_wipe_wiper.py b/tests/test_wipe_wiper.py index 435e883..077f7b1 100644 --- a/tests/test_wipe_wiper.py +++ b/tests/test_wipe_wiper.py @@ -160,7 +160,7 @@ def test_wiper_ensure_suitable_passes_for_allowed_states(state): ), ], ) -def test_wiper_delete_raises_for_invalid_conditions( +def test_wiper_wipe_raises_for_invalid_conditions( has_destination, workdir_is_inputdir, expected_exception, expected_message ): wiper = Wiper.__new__(Wiper) @@ -172,7 +172,7 @@ def test_wiper_delete_raises_for_invalid_conditions( wiper._work_dir = Path("/workdir") with pytest.raises(expected_exception, match=expected_message): - wiper.delete() + wiper.wipe() @patch("qq_lib.wipe.wiper.logger.info") @@ -187,7 +187,7 @@ def test_wiper_delete_success_calls_logger_and_deletes(mock_logger_info): wiper._main_node = "main_node" wiper._work_dir = Path("/some/workdir") - result = wiper.delete() + result = wiper.wipe() assert result == "job123" mock_logger_info.assert_called_once() From cacc4e5438137b1408f91cf22eae3dc56903a559 Mon Sep 17 00:00:00 2001 From: Ladme Date: Thu, 20 Nov 2025 11:05:58 +0100 Subject: [PATCH 11/27] SubmitterFactory no longer needs a list of supported parameters --- CHANGELOG.md | 1 + src/qq_lib/submit/cli.py | 4 +--- src/qq_lib/submit/factory.py | 11 ++++------- tests/test_submit_factory.py | 5 ++--- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e32a6..4dbfaf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ ### Internal changes - `Wiper.delete` method has been renamed to `Wiper.wipe`. - `Killer.terminate` method has been renamed to `Killer.kill`. +- `SubmitterFactory` no longer requires a list of supported parameters and instead loads it itself. *** diff --git a/src/qq_lib/submit/cli.py b/src/qq_lib/submit/cli.py index 0b5fe5c..08832e3 100644 --- a/src/qq_lib/submit/cli.py +++ b/src/qq_lib/submit/cli.py @@ -213,9 +213,7 @@ def submit(script: str, **kwargs) -> NoReturn: raise QQError(f"Script '{script}' does not exist or is not a file.") # parse options from the command line and from the script itself - factory = SubmitterFactory( - script_path.resolve(), submit.params, sys.argv[2:], **kwargs - ) + factory = SubmitterFactory(script_path.resolve(), sys.argv[2:], **kwargs) submitter = factory.makeSubmitter() # guard against multiple submissions from the same directory diff --git a/src/qq_lib/submit/factory.py b/src/qq_lib/submit/factory.py index adb9a90..7e31f3c 100644 --- a/src/qq_lib/submit/factory.py +++ b/src/qq_lib/submit/factory.py @@ -4,8 +4,6 @@ from dataclasses import fields from pathlib import Path -from click import Parameter - from qq_lib.batch.interface import BatchInterface, BatchMeta from qq_lib.core.common import split_files_list from qq_lib.core.error import QQError @@ -24,19 +22,18 @@ class SubmitterFactory: the command-line and from the script itself. """ - def __init__( - self, script: Path, params: list[Parameter], command_line: list[str], **kwargs - ): + def __init__(self, script: Path, command_line: list[str], **kwargs): """ Initialize the factory with the script, command-line parameters, and additional options. Args: script (Path): Path to the script to submit. - params (list[Parameter]): List of all known submission parameters. command_line (list[str]): All the arguments and options specified on the command line. **kwargs: Keyword arguments from the command line. """ - self._parser = Parser(script, params) + from qq_lib.submit.cli import submit + + self._parser = Parser(script, submit.params) self._script = script self._input_dir = script.parent self._kwargs = kwargs diff --git a/tests/test_submit_factory.py b/tests/test_submit_factory.py index 84a8718..13da826 100644 --- a/tests/test_submit_factory.py +++ b/tests/test_submit_factory.py @@ -19,7 +19,6 @@ def test_submitter_factory_init(tmp_path): script = tmp_path / "script.sh" - params = [MagicMock(), MagicMock()] command_line = ["-q", "default", str(script)] kwargs = {"queue": "default"} @@ -27,14 +26,14 @@ def test_submitter_factory_init(tmp_path): mock_parser_instance = MagicMock() mock_parser_class.return_value = mock_parser_instance - factory = SubmitterFactory(script, params, command_line, **kwargs) + factory = SubmitterFactory(script, command_line, **kwargs) assert factory._parser == mock_parser_instance assert factory._script == script assert factory._input_dir == tmp_path assert factory._command_line == command_line assert factory._kwargs == kwargs - mock_parser_class.assert_called_once_with(script, params) + mock_parser_class.assert_called_once() def test_submitter_factory_get_depend(): From b92cb8273a4db14cd5769023ee801946d4437fd5 Mon Sep 17 00:00:00 2001 From: Ladme Date: Thu, 20 Nov 2025 11:21:28 +0100 Subject: [PATCH 12/27] Getter methods for Submitter --- CHANGELOG.md | 1 + src/qq_lib/submit/submitter.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dbfaf4..3bceb33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - `Wiper.delete` method has been renamed to `Wiper.wipe`. - `Killer.terminate` method has been renamed to `Killer.kill`. - `SubmitterFactory` no longer requires a list of supported parameters and instead loads it itself. +- Added getter methods to `Submitter`. *** diff --git a/src/qq_lib/submit/submitter.py b/src/qq_lib/submit/submitter.py index c15d6b3..1f6762e 100644 --- a/src/qq_lib/submit/submitter.py +++ b/src/qq_lib/submit/submitter.py @@ -217,6 +217,50 @@ def getInputDir(self) -> Path: """ return self._input_dir + def getBatchSystem(self) -> type[BatchInterface]: + """Get the batch system used for submiting.""" + return self._batch_system + + def getQueue(self) -> str: + """Get the submission queue.""" + return self._queue + + def getAccount(self) -> str | None: + """Get the user's account.""" + return self._account + + def getScript(self) -> Path: + """Get path to the submitted script.""" + return self._script + + def getJobType(self) -> JobType: + """Get type of the job.""" + return self._job_type + + def getResources(self) -> Resources: + """Get resources requested for the job.""" + return self._resources + + def getCommandLine(self) -> list[str]: + """Get the submission command line.""" + return self._command_line + + def getLoopInfo(self) -> LoopInfo | None: + """Get loop job information.""" + return self._loop_info + + def getExclude(self) -> list[Path] | None: + """Get a list of excluded files.""" + return self._exclude + + def getInclude(self) -> list[Path] | None: + """Get a list of included files.""" + return self._include + + def getDepend(self) -> list[Depend] | None: + """Get the list of dependencies.""" + return self._depend + def _createEnvVarsDict(self) -> dict[str, str]: """ Create a dictionary of environment variables provided to qq runtime. From 2237c639edcc93ab11093f80f1281728433f1615 Mon Sep 17 00:00:00 2001 From: Ladme Date: Thu, 20 Nov 2025 12:19:37 +0100 Subject: [PATCH 13/27] Added a comment that submitting a qq job is not thread-safe --- src/qq_lib/submit/submitter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/qq_lib/submit/submitter.py b/src/qq_lib/submit/submitter.py index 1f6762e..24da35c 100644 --- a/src/qq_lib/submit/submitter.py +++ b/src/qq_lib/submit/submitter.py @@ -115,6 +115,9 @@ def submit(self) -> str: Sets required environment variables, calls the batch system's job submission mechanism, and creates an info file with job metadata. + Note that this method temporarily changes the current working directory, + and is therefore not thread-safe. + Returns: str: The job ID of the submitted job. From c212cc334dc3b590cce97c2a4f6a085a440fd560 Mon Sep 17 00:00:00 2001 From: Ladme Date: Thu, 20 Nov 2025 12:21:17 +0100 Subject: [PATCH 14/27] Copying runtime files a bit later after killing a job --- CHANGELOG.md | 1 + src/qq_lib/run/runner.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bceb33..ae94ef8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ### Bug fixes and minor improvements - The available types of working directories for the current environment are now shown in the output of `qq submit -h`. - Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. +- When a job is killed, runtime files are copied to the input directory only after the executed process finishes. ### Internal changes - `Wiper.delete` method has been renamed to `Wiper.wipe`. diff --git a/src/qq_lib/run/runner.py b/src/qq_lib/run/runner.py index 7d26e5c..e719c45 100644 --- a/src/qq_lib/run/runner.py +++ b/src/qq_lib/run/runner.py @@ -752,10 +752,6 @@ def _cleanup(self) -> None: - Marks job as killed in the info file. - Terminates the subprocess. """ - # copy runtime files to input dir without retrying - if self._use_scratch: - self._copyRunTimeFilesToInputDir(retry=False) - # update the qq info file self._updateInfoKilled() @@ -770,6 +766,10 @@ def _cleanup(self) -> None: if self._process and self._process.poll() is None: self._process.kill() + # copy runtime files to input dir without retrying + if self._use_scratch: + self._copyRunTimeFilesToInputDir(retry=False) + def _handle_sigterm(self, _signum: int, _frame: FrameType | None) -> NoReturn: """ Signal handler for SIGTERM. From 221e00272279145618b617fea46b9b60291750cf Mon Sep 17 00:00:00 2001 From: Ladme Date: Sat, 22 Nov 2025 16:36:20 +0100 Subject: [PATCH 15/27] Removed the required command line specification in submitter --- src/qq_lib/batch/interface/interface.py | 23 +-- src/qq_lib/batch/slurmit4i/slurm.py | 10 +- src/qq_lib/properties/info.py | 44 ++++- src/qq_lib/properties/loop.py | 18 ++ src/qq_lib/properties/resources.py | 55 ++++++ src/qq_lib/run/runner.py | 60 +------ src/qq_lib/submit/cli.py | 2 +- src/qq_lib/submit/factory.py | 5 +- src/qq_lib/submit/submitter.py | 8 - tests/test_batch_slurm_slurm.py | 4 +- tests/test_batch_slurmit4i_slurm.py | 12 +- tests/test_info_presenter.py | 1 - tests/test_properties_info.py | 59 ++++++- tests/test_properties_loop.py | 40 +++++ tests/test_properties_resources.py | 109 ++++++++++++ tests/test_run_runner.py | 213 +----------------------- tests/test_submit_factory.py | 8 +- tests/test_submit_submitter.py | 9 - 18 files changed, 353 insertions(+), 327 deletions(-) diff --git a/src/qq_lib/batch/interface/interface.py b/src/qq_lib/batch/interface/interface.py index cded79f..21fe06e 100644 --- a/src/qq_lib/batch/interface/interface.py +++ b/src/qq_lib/batch/interface/interface.py @@ -706,7 +706,9 @@ def isShared(cls, directory: Path) -> bool: return result.returncode != 0 @classmethod - def resubmit(cls, **kwargs) -> None: + def resubmit( + cls, input_machine: str, input_dir: Path, command_line: list[str] + ) -> None: """ Resubmit a job to the batch system. @@ -716,26 +718,19 @@ def resubmit(cls, **kwargs) -> None: If the resubmission fails, a QQError is raised. - Keyword Args: - input_machine (str): The hostname of the machine where the job - should be resubmitted. - input_dir (str | Path): The directory on the remote machine containing - the job data and submission files. - command_line (list[str]): The original command-line arguments that - should be passed to `qq submit`. + Args: + input_machine (str): Name of the host from which the job is to be submitted. + input_dir (Path): Path to the job's input directory. + command_line (list[str]): Options and arguments to use for submitting. Raises: QQError: If the resubmission fails (non-zero return code from the SSH command). """ - input_machine = kwargs["input_machine"] - input_dir = kwargs["input_dir"] - command_line = kwargs["command_line"] - qq_submit_command = f"{CFG.binary_name} submit {' '.join(command_line)}" logger.debug( - f"Navigating to '{input_machine}:{input_dir}' to execute '{qq_submit_command}'." + f"Navigating to '{input_machine}:{str(input_dir)}' to execute '{qq_submit_command}'." ) result = subprocess.run( [ @@ -745,7 +740,7 @@ def resubmit(cls, **kwargs) -> None: f"-o ConnectTimeout={CFG.timeouts.ssh}", "-q", # suppress some SSH messages input_machine, - f"cd {input_dir} && {qq_submit_command}", + f"cd {str(input_dir)} && {qq_submit_command}", ], capture_output=True, text=True, diff --git a/src/qq_lib/batch/slurmit4i/slurm.py b/src/qq_lib/batch/slurmit4i/slurm.py index df82ad3..6922b06 100644 --- a/src/qq_lib/batch/slurmit4i/slurm.py +++ b/src/qq_lib/batch/slurmit4i/slurm.py @@ -207,9 +207,11 @@ def isShared(cls, directory: Path) -> bool: return True @classmethod - def resubmit(cls, **kwargs) -> None: - input_dir = kwargs["input_dir"] - command_line = kwargs["command_line"] + def resubmit( + cls, input_machine: str, input_dir: Path, command_line: list[str] + ) -> None: + # input machine is unused, resubmit from the current machine + _ = input_machine qq_submit_command = f"{CFG.binary_name} submit {' '.join(command_line)}" @@ -221,7 +223,7 @@ def resubmit(cls, **kwargs) -> None: f"Could not resubmit the job. Could not navigate to '{input_dir}': {e}." ) from e - logger.debug(f"Navigated to {input_dir}.") + logger.debug(f"Navigated to {str(input_dir)}.") result = subprocess.run( ["bash"], input=qq_submit_command, diff --git a/src/qq_lib/properties/info.py b/src/qq_lib/properties/info.py index a0726d3..780c93c 100644 --- a/src/qq_lib/properties/info.py +++ b/src/qq_lib/properties/info.py @@ -82,9 +82,6 @@ class Info: # Resources allocated to the job resources: Resources - # Command line arguments and options provided when submitting. - command_line: list[str] - # List of files and directories to not copy to the working directory. excluded_files: list[Path] = field(default_factory=list) @@ -194,6 +191,47 @@ def toFile(self, file: Path, host: str | None = None) -> None: except Exception as e: raise QQError(f"Cannot create or write to file '{file}': {e}") from e + def getCommandLineForResubmit(self) -> list[str]: + """ + Construct the command-line arguments required to resubmit the job. + + Returns: + list[str]: A list of command-line tokens representing all options + needed to resubmit the job. + """ + + command_line = [ + self.script_name, + "--queue", + self.queue, + "--job-type", + str(self.job_type), + "--batch-system", + str(self.batch_system), + "--depend", + f"afterok={self.job_id}", + ] + + command_line.extend(self.resources.toCommandLine()) + + if self.account: + command_line.extend(["--account", self.account]) + + if self.excluded_files: + command_line.extend( + ["--exclude", ",".join([str(x) for x in self.excluded_files])] + ) + + if self.included_files: + command_line.extend( + ["--include", ",".join([str(x) for x in self.included_files])] + ) + + if self.loop_info: + command_line.extend(self.loop_info.toCommandLine()) + + return command_line + def _toYaml(self) -> str: """ Serialize the Info instance to a YAML string. diff --git a/src/qq_lib/properties/loop.py b/src/qq_lib/properties/loop.py index 0ed666f..1b9e5ac 100644 --- a/src/qq_lib/properties/loop.py +++ b/src/qq_lib/properties/loop.py @@ -82,6 +82,24 @@ def toDict(self) -> dict[str, object]: k: str(v) if isinstance(v, Path) else v for k, v in asdict(self).items() } + def toCommandLine(self) -> list[str]: + """ + Convert loop job settings into a command-line argument list for `qq submit`. + + Returns: + list[str]: A list of command-line arguments ready to pass to ``qq submit``. + """ + return [ + "--loop-start", + str(self.start), + "--loop-end", + str(self.end), + "--archive", + self.archive.name, + "--archive-format", + self.archive_format, + ] + def _getCycle(self) -> int: """ Determine the current cycle number based on files in the archive directory. diff --git a/src/qq_lib/properties/resources.py b/src/qq_lib/properties/resources.py index ebc2ed0..948da7b 100644 --- a/src/qq_lib/properties/resources.py +++ b/src/qq_lib/properties/resources.py @@ -224,6 +224,36 @@ def mergeResources(*resources: "Resources") -> "Resources": return Resources(**merged_data) + def toCommandLine(self) -> list[str]: + """ + Convert resource settings into a command-line argument list for `qq submit`. + + Returns: + list[str]: A list of command-line arguments ready to pass to ``qq submit``. + """ + command_line: list[str] = [] + for f in fields(Resources): + field_name = f.name.replace("_", "-") + value = getattr(self, f.name) + if value is None: + continue + + if isinstance(value, Size): + command_line.extend([f"--{field_name}", value.toStrExact()]) + elif isinstance(value, int): + command_line.extend([f"--{field_name}", str(value)]) + elif isinstance(value, dict): + if value := self._propsToValue(): + command_line.extend([f"--{field_name}", value]) + elif isinstance(value, str): + command_line.extend([f"--{field_name}", value]) + else: + raise QQError( + f"Unknown value type detected: {field_name}={value} of type {type(value)} when converting Resources to command line options. This is a bug, please report this." + ) + + return command_line + @staticmethod def _parseSize(value: object) -> Size | None: """ @@ -284,3 +314,28 @@ def _parseProps(props: str) -> dict[str, str]: result[key] = value return result + + def _propsToValue(self) -> str | None: + """ + Convert a properties dictionary into a command-line raw value string. + + Args: + props (dict[str, str]): Mapping of property names to their string values. + + Returns: + str | None: A comma-separated command-line representation of the property definitions + or None if the dictionary is empty. + """ + if not self.props: + return None + + properties = [] + for key, value in self.props.items(): + if value == "true": + properties.append(key) + elif value == "false": + properties.append(f"^{key}") + else: + properties.append(f"{key}={value}") + + return ",".join(properties) diff --git a/src/qq_lib/run/runner.py b/src/qq_lib/run/runner.py index e719c45..3014a6e 100644 --- a/src/qq_lib/run/runner.py +++ b/src/qq_lib/run/runner.py @@ -648,71 +648,13 @@ def _resubmit(self) -> None: self._batch_system.resubmit, input_machine=self._informer.info.input_machine, input_dir=self._informer.info.input_dir, - command_line=self._prepareCommandLineForResubmit(), + command_line=self._informer.info.getCommandLineForResubmit(), max_tries=CFG.runner.retry_tries, wait_seconds=CFG.runner.retry_wait, ).run() logger.info("Job successfully resubmitted.") - def _prepareCommandLineForResubmit(self) -> list[str]: - """ - Prepare a modified command line for submitting the next cycle of a loop job. - - Removes existing dependency options and replaces the script path with just - the script name. Appends a new dependency on the current job ID to ensure - the resubmitted job runs after successful completion (`afterok`) of the - current one. - - Returns: - list[str]: The sanitized and updated list of command line arguments. - """ - command_line = self._informer.info.command_line - script_name = self._informer.info.script_name - - # here we perform two modifications - # 1) we replace path to the submitted script with just the script name - # this is needed in case we resubmit a loop job that has been originally submitted - # from a different directory than the current working directory - # e.g. if the job was submitted as `qq submit (...) job/run.sh`, we need to - # resubmit as `qq submit (...) run.sh`, because we are resubmitting from the job's - # input directory not from the directory from which the original qq submit was called - # note that this is done heuristically, assuming that the script name is not used as - # an independently-placed parameter of any option - # to protect against silently doing something wrong, we just explicitly raise an exception - # if the script name is detected multiple times - - # 2) we remove dependencies for the previous cycle - # these dependencies had to already be fulfilled for the previous cycle to run - # so we ignore them for the next run - modified = [] - it = iter(command_line) - replaced_script_name = False - for arg in it: - if not arg.startswith("-") and Path(arg).name == script_name: - if replaced_script_name: - # script has already been replaced - raise QQError( - f"Heuristic identification of script name failed for command line: {command_line}." - ) - - # replace the script name - modified.append(script_name) - replaced_script_name = True - - elif arg.strip() == "--depend": - next(it, None) # skip the following argument - - elif "--depend" not in arg: - modified.append(arg) - - # and add in a new dependency for the next cycle - # so that the next cycle always starts only after the previous one finishes - modified.append(f"--depend=afterok={self._informer.info.job_id}") - - logger.debug(f"Command line for resubmit: {modified}.") - return modified - def _getExplicitlyIncludedFilesInWorkDir(self) -> list[Path]: """ Return absolute paths to files and directories in the working directory diff --git a/src/qq_lib/submit/cli.py b/src/qq_lib/submit/cli.py index 08832e3..0f8f41f 100644 --- a/src/qq_lib/submit/cli.py +++ b/src/qq_lib/submit/cli.py @@ -213,7 +213,7 @@ def submit(script: str, **kwargs) -> NoReturn: raise QQError(f"Script '{script}' does not exist or is not a file.") # parse options from the command line and from the script itself - factory = SubmitterFactory(script_path.resolve(), sys.argv[2:], **kwargs) + factory = SubmitterFactory(script_path.resolve(), **kwargs) submitter = factory.makeSubmitter() # guard against multiple submissions from the same directory diff --git a/src/qq_lib/submit/factory.py b/src/qq_lib/submit/factory.py index 7e31f3c..989d92d 100644 --- a/src/qq_lib/submit/factory.py +++ b/src/qq_lib/submit/factory.py @@ -22,13 +22,12 @@ class SubmitterFactory: the command-line and from the script itself. """ - def __init__(self, script: Path, command_line: list[str], **kwargs): + def __init__(self, script: Path, **kwargs): """ Initialize the factory with the script, command-line parameters, and additional options. Args: script (Path): Path to the script to submit. - command_line (list[str]): All the arguments and options specified on the command line. **kwargs: Keyword arguments from the command line. """ from qq_lib.submit.cli import submit @@ -37,7 +36,6 @@ def __init__(self, script: Path, command_line: list[str], **kwargs): self._script = script self._input_dir = script.parent self._kwargs = kwargs - self._command_line = command_line def makeSubmitter(self) -> Submitter: """ @@ -66,7 +64,6 @@ def makeSubmitter(self) -> Submitter: self._script, job_type, self._getResources(BatchSystem, queue), - self._command_line, loop_info, self._getExclude(), self._getInclude(), diff --git a/src/qq_lib/submit/submitter.py b/src/qq_lib/submit/submitter.py index 24da35c..fac3dc1 100644 --- a/src/qq_lib/submit/submitter.py +++ b/src/qq_lib/submit/submitter.py @@ -49,7 +49,6 @@ def __init__( script: Path, job_type: JobType, resources: Resources, - command_line: list[str], loop_info: LoopInfo | None = None, exclude: list[Path] | None = None, include: list[Path] | None = None, @@ -66,7 +65,6 @@ def __init__( script (Path): Path to the job script to submit. job_type (JobType): Type of the job to submit (e.g. standard, loop). resources (Resources): Job resource requirements (e.g., CPUs, memory, walltime). - command_line (list[str]): List of all arguments and options provided on the command line. loop_info (LoopInfo | None): Optional information for loop jobs. Pass None if not applicable. exclude (list[Path] | None): Optional list of files which should not be copied to the working directory. Paths are provided relative to the input directory. @@ -95,7 +93,6 @@ def __init__( self._include = [ i if i.is_absolute() else self._input_dir / i for i in (include or []) ] - self._command_line = command_line self._depend = depend or [] # script must exist @@ -166,7 +163,6 @@ def submit(self) -> str: loop_info=self._loop_info, excluded_files=self._exclude, included_files=self._include, - command_line=self._command_line, depend=self._depend, account=self._account, ) @@ -244,10 +240,6 @@ def getResources(self) -> Resources: """Get resources requested for the job.""" return self._resources - def getCommandLine(self) -> list[str]: - """Get the submission command line.""" - return self._command_line - def getLoopInfo(self) -> LoopInfo | None: """Get loop job information.""" return self._loop_info diff --git a/tests/test_batch_slurm_slurm.py b/tests/test_batch_slurm_slurm.py index 99110c1..24ae6e7 100644 --- a/tests/test_batch_slurm_slurm.py +++ b/tests/test_batch_slurm_slurm.py @@ -541,12 +541,12 @@ def test_slurm_is_shared_delegates_to_interface(mock_is_shared): def test_slurm_resubmit_delegates_to_interface(mock_resubmit): Slurm.resubmit( input_machine="machine1", - input_dir="/work/job", + input_dir=Path("/work/job"), command_line=["-q gpu", "--account fake-account"], ) mock_resubmit.assert_called_once_with( input_machine="machine1", - input_dir="/work/job", + input_dir=Path("/work/job"), command_line=["-q gpu", "--account fake-account"], ) diff --git a/tests/test_batch_slurmit4i_slurm.py b/tests/test_batch_slurmit4i_slurm.py index d703383..71dfb3c 100644 --- a/tests/test_batch_slurmit4i_slurm.py +++ b/tests/test_batch_slurmit4i_slurm.py @@ -89,7 +89,9 @@ def test_slurmit4i_get_default_server_resources_returns_empty_on_failure( def test_slurmit4i_resubmit_success(mock_chdir, mock_run): mock_run.return_value = MagicMock(returncode=0) SlurmIT4I.resubmit( - input_dir=Path("/home/user/jobdir"), command_line=["-q", "default"] + input_machine="unused_machine", + input_dir=Path("/home/user/jobdir"), + command_line=["-q", "default"], ) mock_chdir.assert_called_once_with(Path("/home/user/jobdir")) mock_run.assert_called_once() @@ -99,7 +101,9 @@ def test_slurmit4i_resubmit_success(mock_chdir, mock_run): def test_slurmit4i_resubmit_raises_when_cannot_cd(mock_chdir): with pytest.raises(QQError, match="Could not navigate to"): SlurmIT4I.resubmit( - input_dir=Path("/home/user/jobdir"), command_line=["-q", "default"] + input_machine="unused_machine", + input_dir=Path("/home/user/jobdir"), + command_line=["-q", "default"], ) mock_chdir.assert_called_once_with(Path("/home/user/jobdir")) @@ -110,7 +114,9 @@ def test_slurmit4i_resubmit_raises_when_command_fails(mock_chdir, mock_run): mock_run.return_value = MagicMock(returncode=1, stderr="execution failed") with pytest.raises(QQError): SlurmIT4I.resubmit( - input_dir=Path("/home/user/jobdir"), command_line=["-q", "default"] + input_machine="unused_machine", + input_dir=Path("/home/user/jobdir"), + command_line=["-q", "default"], ) mock_chdir.assert_called_once_with(Path("/home/user/jobdir")) diff --git a/tests/test_info_presenter.py b/tests/test_info_presenter.py index 1740e16..ba274f3 100644 --- a/tests/test_info_presenter.py +++ b/tests/test_info_presenter.py @@ -54,7 +54,6 @@ def sample_info(sample_resources): stderr_file="stderr.log", resources=sample_resources, excluded_files=[Path("ignore.txt")], - command_line=["-q", "default", "script.sh"], main_node="random.node.org", all_nodes=["random.node.org"], work_dir=Path("/scratch/job_12345.fake.server.com"), diff --git a/tests/test_properties_info.py b/tests/test_properties_info.py index 881d328..0b7c478 100644 --- a/tests/test_properties_info.py +++ b/tests/test_properties_info.py @@ -14,6 +14,7 @@ from qq_lib.core.error import QQError from qq_lib.properties.info import CFG, Info from qq_lib.properties.job_type import JobType +from qq_lib.properties.loop import LoopInfo from qq_lib.properties.resources import Resources from qq_lib.properties.states import NaiveState @@ -49,7 +50,6 @@ def sample_info(sample_resources): stderr_file="stderr.log", resources=sample_resources, excluded_files=[Path("ignore.txt")], - command_line=["-q", "default", "script.sh"], work_dir=Path("/scratch/job_12345.fake.server.com"), account="fake-account", ) @@ -279,3 +279,60 @@ def test_from_file_missing_required_field(tmp_path): with pytest.raises(QQError, match=r"Invalid qq info file"): Info.fromFile(file) + + +def test_get_command_line_for_resubmit_basic(sample_info): + sample_info.resources = Resources() + sample_info.account = None + sample_info.excluded_files = [] + + assert sample_info.getCommandLineForResubmit() == [ + "script.sh", + "--queue", + "default", + "--job-type", + "standard", + "--batch-system", + "PBS", + "--depend", + "afterok=12345.fake.server.com", + ] + + +def test_get_command_line_full(sample_info): + sample_info.job_type = JobType.LOOP + sample_info.excluded_files = [Path("exclude.txt"), Path("inner/exclude2.txt")] + sample_info.included_files = [Path("include.txt"), Path("inner/include2.txt")] + sample_info.loop_info = LoopInfo( + start=3, end=10, archive=Path("inner/inner2/archive"), archive_format="job%3d" + ) + + assert sample_info.getCommandLineForResubmit() == [ + "script.sh", + "--queue", + "default", + "--job-type", + "loop", + "--batch-system", + "PBS", + "--depend", + "afterok=12345.fake.server.com", + "--ncpus", + "8", + "--work-dir", + "scratch_local", + "--account", + "fake-account", + "--exclude", + "exclude.txt,inner/exclude2.txt", + "--include", + "include.txt,inner/include2.txt", + "--loop-start", + "3", + "--loop-end", + "10", + "--archive", + "archive", + "--archive-format", + "job%3d", + ] diff --git a/tests/test_properties_loop.py b/tests/test_properties_loop.py index bf550fc..f9d1f77 100644 --- a/tests/test_properties_loop.py +++ b/tests/test_properties_loop.py @@ -199,3 +199,43 @@ def test_get_cycle_non_numeric_files_are_ignored_but_numeric_stems_count(temp_di (temp_dir / "mdxxxx.txt").write_text("x") loop_info = _create_loop_info_stub(0, temp_dir, "md.*") assert loop_info._getCycle() == 10 + + +def test_to_command_line_basic(): + info = LoopInfo( + start=1, + end=10, + archive=Path("/tmp/archive"), + archive_format="job%04d", + ) + + assert info.toCommandLine() == [ + "--loop-start", + "1", + "--loop-end", + "10", + "--archive", + "archive", + "--archive-format", + "job%04d", + ] + + +def test_to_command_line_archive_name_only(): + info = LoopInfo( + start=0, + end=5, + archive=Path("/very/long/path/to/myarchive"), + archive_format="md%03d", + ) + + assert info.toCommandLine() == [ + "--loop-start", + "0", + "--loop-end", + "5", + "--archive", + "myarchive", + "--archive-format", + "md%03d", + ] diff --git a/tests/test_properties_resources.py b/tests/test_properties_resources.py index ad0bc00..03bced5 100644 --- a/tests/test_properties_resources.py +++ b/tests/test_properties_resources.py @@ -451,3 +451,112 @@ def test_parse_props_strips_empty_parts(): def test_parse_props_raises_on_duplicate_keys(props): with pytest.raises(QQError, match="Property 'foo' is defined multiple times."): Resources._parseProps(props) + + +def test_props_to_value_true_value(): + res = Resources.__new__(Resources) + res.props = {"debug": "true"} + assert res._propsToValue() == "debug" + + +def test_props_to_value_false_value(): + res = Resources.__new__(Resources) + res.props = {"debug": "false"} + assert res._propsToValue() == "^debug" + + +def test_props_to_value_regular_value(): + res = Resources.__new__(Resources) + res.props = {"mode": "fast"} + assert res._propsToValue() == "mode=fast" + + +def test_props_to_value_multiple_mixed_values(): + res = Resources.__new__(Resources) + res.props = { + "debug": "true", + "optimize": "false", + "mode": "fast", + } + assert res._propsToValue() == "debug,^optimize,mode=fast" + + +def test_props_to_value_empty_dict(): + res = Resources.__new__(Resources) + res.props = {} + assert res._propsToValue() is None + + +def test_props_to_value_non_boolean_strings(): + res = Resources.__new__(Resources) + res.props = { + "a": "TRUE", + "b": "False", + "c": "trueish", + } + assert res._propsToValue() == "a=TRUE,b=False,c=trueish" + + +def test_to_command_line_int_values(): + res = Resources(nnodes=3, ncpus=12) + + assert res.toCommandLine() == ["--nnodes", "3", "--ncpus", "12"] + + +def test_to_command_line_size_values(): + res = Resources(mem="4gb", work_size="10mb") + + assert res.toCommandLine() == [ + "--mem", + "4194304kb", + "--work-size", + "10240kb", + ] + + +def test_to_command_line_string_values(): + res = Resources(walltime="02:00:00", work_dir="scratch_local") + + assert res.toCommandLine() == [ + "--walltime", + "02:00:00", + "--work-dir", + "scratch_local", + ] + + +def test_to_command_line_props_value(): + res = Resources(props="debug,^gpu,type=A") + + assert res.toCommandLine() == ["--props", "debug,^gpu,type=A"] + + +def test_to_command_line_mixed_value_types(): + res = Resources(nnodes=2, mem="1gb", work_dir="scratch", props="debug") + assert res.toCommandLine() == [ + "--nnodes", + "2", + "--mem", + "1048576kb", + "--work-dir", + "scratch", + "--props", + "debug", + ] + + +def test_to_command_line_mixed_value_types_no_props(): + res = Resources(nnodes=2, mem="1gb", work_dir="scratch") + assert res.toCommandLine() == [ + "--nnodes", + "2", + "--mem", + "1048576kb", + "--work-dir", + "scratch", + ] + + +def test_to_command_line_empty(): + res = Resources() + assert res.toCommandLine() == [] diff --git a/tests/test_run_runner.py b/tests/test_run_runner.py index cac9270..d325cd6 100644 --- a/tests/test_run_runner.py +++ b/tests/test_run_runner.py @@ -316,215 +316,6 @@ def terminate_and_stop(): process_mock.kill.assert_not_called() -def test_runner_prepare_command_line_for_resubmit_only_script(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "script.sh", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert result == informer_mock.info.command_line + ["--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_script_path(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "job/script.sh", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert result == ["script.sh", "-q", "gpu", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_script_path_last(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "-q", - "gpu", - "job/script.sh", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert result == ["-q", "gpu", "script.sh", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_complicated_script_path(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "../path/to/../something/job/script.sh", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert result == ["script.sh", "-q", "gpu", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_inline_depend(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "script.sh", - "--depend=afterok=11111", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert "--depend=afterok=11111" not in result - assert result == ["script.sh", "-q", "gpu", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_separate_depend_argument(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "script.sh", - "--depend", - "afterok=11111", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert "--depend" not in result - assert "afterok=11111" not in result - assert result == ["script.sh", "-q", "gpu", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_multiple_scripts(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "script.sh", - "--depend", - "afterok=11111", - "--exclude", - "script.sh", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - with pytest.raises( - QQError, - match="Heuristic identification of script name failed for command line:", - ): - runner._prepareCommandLineForResubmit() - - -def test_runner_prepare_command_line_for_resubmit_multiple_depends(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "script.sh", - "--depend=afterok=11111", - "--depend", - "afterany=33333", - "--depend=after=22222", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert "--depend" not in result - assert all("afterok=11111" not in arg for arg in result) - assert all("afterany=33333" not in arg for arg in result) - assert result[-1] == "--depend=afterok=99999" - assert "gpu" in result - assert "script.sh" in result - - -def test_runner_prepare_command_line_for_resubmit_multiple_depends_and_script_path_complex(): - informer_mock = MagicMock() - informer_mock.info.command_line = [ - "--depend=afterok=11111", - "--depend", - "afterany=33333", - "job/path/script.sh", - "--depend=after=22222", - "-q", - "gpu", - ] - informer_mock.info.script_name = "script.sh" - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert "--depend" not in result - assert all("afterok=11111" not in arg for arg in result) - assert all("afterany=33333" not in arg for arg in result) - assert result[-1] == "--depend=afterok=99999" - assert "gpu" in result - assert "script.sh" in result - assert "job/path/script.sh" not in result - - -def test_runner_prepare_command_line_for_resubmit_depend_last_arg(): - informer_mock = MagicMock() - informer_mock.info.command_line = ["script.sh", "--depend"] - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert "--depend" not in result - assert result == ["script.sh", "--depend=afterok=99999"] - - -def test_runner_prepare_command_line_for_resubmit_only_depend(): - informer_mock = MagicMock() - informer_mock.info.command_line = ["--depend=afterok=11111"] - informer_mock.info.job_id = "99999" - runner = Runner.__new__(Runner) - runner._informer = informer_mock - - result = runner._prepareCommandLineForResubmit() - - assert result == ["--depend=afterok=99999"] - - def test_runner_resubmit_final_cycle(): informer_mock = MagicMock() informer_mock.info.loop_info.current = 5 @@ -569,7 +360,7 @@ def test_runner_resubmit_successful_resubmission(): runner = Runner.__new__(Runner) runner._informer = informer_mock runner._batch_system = MagicMock() - runner._prepareCommandLineForResubmit = MagicMock(return_value=["cmd"]) + runner._informer.info.getCommandLineForResubmit = MagicMock(return_value=["cmd"]) runner._should_resubmit = True retryer_mock = MagicMock() @@ -603,7 +394,7 @@ def test_runner_resubmit_raises_qqerror(): runner = Runner.__new__(Runner) runner._informer = informer_mock runner._batch_system = MagicMock() - runner._prepareCommandLineForResubmit = MagicMock(return_value=["cmd"]) + runner._informer.info.getCommandLineForResubmit = MagicMock(return_value=["cmd"]) runner._should_resubmit = True with ( diff --git a/tests/test_submit_factory.py b/tests/test_submit_factory.py index 13da826..6b9f22b 100644 --- a/tests/test_submit_factory.py +++ b/tests/test_submit_factory.py @@ -19,19 +19,17 @@ def test_submitter_factory_init(tmp_path): script = tmp_path / "script.sh" - command_line = ["-q", "default", str(script)] kwargs = {"queue": "default"} with patch("qq_lib.submit.factory.Parser") as mock_parser_class: mock_parser_instance = MagicMock() mock_parser_class.return_value = mock_parser_instance - factory = SubmitterFactory(script, command_line, **kwargs) + factory = SubmitterFactory(script, **kwargs) assert factory._parser == mock_parser_instance assert factory._script == script assert factory._input_dir == tmp_path - assert factory._command_line == command_line assert factory._kwargs == kwargs mock_parser_class.assert_called_once() @@ -356,7 +354,6 @@ def test_submitter_factory_make_submitter_standard_job(): factory = SubmitterFactory.__new__(SubmitterFactory) factory._parser = mock_parser factory._script = Path("/tmp/script.sh") - factory._command_line = ["--arg"] factory._kwargs = {"queue": "default", "job_type": "standard"} BatchSystem = MagicMock() @@ -397,7 +394,6 @@ def test_submitter_factory_make_submitter_standard_job(): factory._script, JobType.STANDARD, resources, - factory._command_line, None, # loop_info is None for STANDARD job excludes, includes, @@ -419,7 +415,6 @@ def test_submitter_factory_make_submitter_loop_job(): factory = SubmitterFactory.__new__(SubmitterFactory) factory._parser = mock_parser factory._script = Path("/tmp/script.sh") - factory._command_line = ["--arg"] factory._kwargs = {"queue": "default", "job_type": "loop"} BatchSystem = MagicMock() @@ -461,7 +456,6 @@ def test_submitter_factory_make_submitter_loop_job(): factory._script, JobType.LOOP, resources, - factory._command_line, loop_info, excludes, includes, diff --git a/tests/test_submit_submitter.py b/tests/test_submit_submitter.py index a8f446d..43659e9 100644 --- a/tests/test_submit_submitter.py +++ b/tests/test_submit_submitter.py @@ -36,7 +36,6 @@ def test_submitter_init_sets_all_attributes_correctly(tmp_path): script=script, job_type=JobType.STANDARD, resources=Resources(), - command_line=["-q", "default", str(script)], exclude=[Path("exclude")], include=[Path("include"), Path("/tmp/include")], ) @@ -53,7 +52,6 @@ def test_submitter_init_sets_all_attributes_correctly(tmp_path): assert submitter._resources == Resources() assert submitter._exclude == [tmp_path / "exclude"] assert submitter._include == [tmp_path / "include", Path("/tmp/include")] - assert submitter._command_line == ["-q", "default", str(script)] assert submitter._depend == [] @@ -68,7 +66,6 @@ def test_submitter_init_raises_error_if_script_does_not_exist(tmp_path): script=script, job_type=JobType.STANDARD, resources=Resources(), - command_line=["-q", "default", str(script)], ) @@ -88,7 +85,6 @@ def test_submitter_init_raises_error_if_invalid_shebang(tmp_path): script=script, job_type=JobType.STANDARD, resources=Resources(), - command_line=["-q", "default", str(script)], ) @@ -114,7 +110,6 @@ def test_submitter_init_sets_all_optional_arguments_correctly(tmp_path): script=script, job_type=JobType.LOOP, resources=Resources(), - command_line=["-q", "long", str(script)], loop_info=loop_info, exclude=exclude_files, depend=depend_jobs, @@ -132,7 +127,6 @@ def test_submitter_init_sets_all_optional_arguments_correctly(tmp_path): assert submitter._info_file == tmp_path / f"job{CFG.suffixes.qq_info}" assert submitter._resources == Resources() assert submitter._exclude == exclude_files - assert submitter._command_line == ["-q", "long", str(script)] assert submitter._depend == depend_jobs @@ -490,7 +484,6 @@ def test_submitter_submit_calls_all_steps_and_returns_job_id(tmp_path): submitter._loop_info = None submitter._exclude = [] submitter._include = [] - submitter._command_line = ["-q", "default", str(submitter._script)] submitter._depend = [] submitter._info_file = tmp_path / f"{submitter._job_name}.qqinfo" env_vars = {CFG.env_vars.guard: "true"} @@ -539,7 +532,6 @@ def test_submitter_submit(tmp_path): submitter._loop_info = None submitter._exclude = ["exclude1"] submitter._include = ["include1"] - submitter._command_line = ["-q", "default", str(submitter._script)] submitter._depend = [] submitter._info_file = tmp_path / f"{submitter._job_name}.qqinfo" env_vars = {CFG.env_vars.guard: "true"} @@ -603,5 +595,4 @@ def test_submitter_submit(tmp_path): assert info_arg.loop_info == submitter._loop_info assert info_arg.excluded_files == submitter._exclude assert info_arg.included_files == submitter._include - assert info_arg.command_line == submitter._command_line assert info_arg.depend == submitter._depend From f0f8248b4d24aa97823181acbd8efb4a35df40d2 Mon Sep 17 00:00:00 2001 From: Ladme Date: Sat, 22 Nov 2025 17:00:01 +0100 Subject: [PATCH 16/27] Updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae94ef8..506739d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - `Killer.terminate` method has been renamed to `Killer.kill`. - `SubmitterFactory` no longer requires a list of supported parameters and instead loads it itself. - Added getter methods to `Submitter`. +- `Submitter` no longer requires to provide the "command line". Command line is no longer written into qq info files. *** From 81119cd0820b8ae28389e7e1451d3878a945af09 Mon Sep 17 00:00:00 2001 From: Ladme Date: Sun, 23 Nov 2025 10:16:27 +0100 Subject: [PATCH 17/27] Working directories are now automatically fully removed on LUMI and IT4I --- CHANGELOG.md | 1 + pyproject.toml | 2 +- src/qq_lib/batch/interface/interface.py | 12 +++++----- src/qq_lib/batch/pbs/pbs.py | 30 ++++++++++++++++++++----- src/qq_lib/batch/slurmit4i/slurm.py | 6 ++--- src/qq_lib/batch/slurmlumi/slurm.py | 6 ++--- src/qq_lib/core/config.py | 23 ++++++++++++++++--- src/qq_lib/qq.py | 2 +- src/qq_lib/run/runner.py | 18 +++++---------- tests/test_batch_pbs_pbs.py | 22 ++++++++++++++++++ tests/test_batch_slurmit4i_slurm.py | 24 +++++++++++--------- tests/test_batch_slurmlumi_slurm.py | 24 ++++++++++---------- tests/test_run_runner.py | 19 ++++++++-------- uv.lock | 2 +- 14 files changed, 125 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 506739d..10cae2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - The available types of working directories for the current environment are now shown in the output of `qq submit -h`. - Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. - When a job is killed, runtime files are copied to the input directory only after the executed process finishes. +- Changed the way working directories on Karolina and LUMI are created allowing their complete removal. ### Internal changes - `Wiper.delete` method has been renamed to `Wiper.wipe`. diff --git a/pyproject.toml b/pyproject.toml index ec90217..6f73808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "qq" -version = "0.6.0-dev.1" +version = "0.6.0-dev.2" description = "A friendly interface to batch processing" readme = "README.md" requires-python = ">=3.12" diff --git a/src/qq_lib/batch/interface/interface.py b/src/qq_lib/batch/interface/interface.py index 21fe06e..a2073fb 100644 --- a/src/qq_lib/batch/interface/interface.py +++ b/src/qq_lib/batch/interface/interface.py @@ -72,6 +72,8 @@ def getJobId(cls) -> str | None: """ Get the id of the current job from the corresponding batch system's environment variable. + For this method to work, it has to be called from the inside of an active job. + Returns: str | None: Index of the job or None if the collective variable is not set. """ @@ -80,21 +82,21 @@ def getJobId(cls) -> str | None: ) @classmethod - def getScratchDir(cls, job_id: str) -> Path: + def createWorkDirOnScratch(cls, job_id: str) -> Path: """ - Retrieve the scratch directory for a given job. + Create the working directory on scratch for the given job. Args: job_id (int): Unique identifier of the job. Returns: - Path: Path to the scratch directory. + Path: Absolute path to the working directory on directory. Raises: - QQError: If there is no scratch directory available for this job. + QQError: If the working directory could not be created. """ raise NotImplementedError( - f"getScratchDir method is not implemented for {cls.__name__}" + f"createWorkDirOnScratch method is not implemented for {cls.__name__}" ) @classmethod diff --git a/src/qq_lib/batch/pbs/pbs.py b/src/qq_lib/batch/pbs/pbs.py index dabe34b..ee69ac1 100644 --- a/src/qq_lib/batch/pbs/pbs.py +++ b/src/qq_lib/batch/pbs/pbs.py @@ -48,12 +48,21 @@ def getJobId(cls) -> str | None: return os.environ.get("PBS_JOBID") @classmethod - def getScratchDir(cls, job_id: str) -> Path: - scratch_dir = os.environ.get(CFG.env_vars.pbs_scratch_dir) - if not scratch_dir: - raise QQError(f"Scratch directory for job '{job_id}' is undefined") + def createWorkDirOnScratch(cls, job_id: str) -> Path: + scratch_dir = cls._getScratchDir(job_id) - return Path(scratch_dir) + # create working directory inside the scratch directory allocated by the batch system + # we create this directory because other processes may write files + # into the allocated scratch directory and we do not want these files + # to affect the job execution or be copied back to input_dir + # this also simplifies deletion of the working directory + # (the allocated scratch dir cannot be deleted) + work_dir = (scratch_dir / CFG.pbs_options.scratch_dir_inner).resolve() + + logger.debug(f"Creating working directory '{str(work_dir)}'.") + work_dir.mkdir(exist_ok=True) + + return work_dir @classmethod def jobSubmit( @@ -431,6 +440,17 @@ def sortJobs(cls, jobs: list[PBSJob]) -> None: # and therefore are displayed at the top in the qq jobs / qq stat output jobs.sort(key=lambda job: job.getIdInt() or 0) + @classmethod + def _getScratchDir(cls, job_id: str) -> Path: + """ + Get the path to the scratch directory allocated by PBS. + """ + scratch_dir = os.environ.get(CFG.env_vars.pbs_scratch_dir) + if not scratch_dir: + raise QQError(f"Scratch directory for job '{job_id}' is undefined") + + return Path(scratch_dir) + @classmethod def _sharedGuard(cls, res: Resources, env_vars: dict[str, str]) -> None: """ diff --git a/src/qq_lib/batch/slurmit4i/slurm.py b/src/qq_lib/batch/slurmit4i/slurm.py index 6922b06..72c0536 100644 --- a/src/qq_lib/batch/slurmit4i/slurm.py +++ b/src/qq_lib/batch/slurmit4i/slurm.py @@ -38,7 +38,7 @@ def isAvailable(cls) -> bool: return shutil.which("it4ifree") is not None @classmethod - def getScratchDir(cls, job_id: str) -> Path: + def createWorkDirOnScratch(cls, job_id: str) -> Path: if not (account := os.environ.get(CFG.env_vars.slurm_job_account)): raise QQError(f"No account is defined for job '{job_id}'.") @@ -48,7 +48,7 @@ def getScratchDir(cls, job_id: str) -> Path: # if the user directory is already created but the user does not have permissions # to write into it, we append a number to the user's name and try creating a new directory last_exception = None - for attempt in range(CFG.it4i_scratch_dir_attempts): + for attempt in range(CFG.slurm_it4i_options.scratch_dir_attempts): user_component = ( user if attempt == 0 else f"{user}{attempt + 1}" ) # appended number is 2 for the second attempt @@ -65,7 +65,7 @@ def getScratchDir(cls, job_id: str) -> Path: # if all attempts failed raise QQError( - f"Could not create a scratch directory for job '{job_id}' after {CFG.it4i_scratch_dir_attempts} attempts: {last_exception}" + f"Could not create a working directory on scratch for job '{job_id}' after {CFG.slurm_it4i_options.scratch_dir_attempts} attempts: {last_exception}" ) from last_exception @classmethod diff --git a/src/qq_lib/batch/slurmlumi/slurm.py b/src/qq_lib/batch/slurmlumi/slurm.py index 7a37fa8..aff9dd1 100644 --- a/src/qq_lib/batch/slurmlumi/slurm.py +++ b/src/qq_lib/batch/slurmlumi/slurm.py @@ -56,7 +56,7 @@ def jobSubmit( ) @classmethod - def getScratchDir(cls, job_id: str) -> Path: + def createWorkDirOnScratch(cls, job_id: str) -> Path: if not (account := os.environ.get(CFG.env_vars.slurm_job_account)): raise QQError(f"No account is defined for job '{job_id}'.") @@ -72,7 +72,7 @@ def getScratchDir(cls, job_id: str) -> Path: # if the user directory is already created but the user does not have permissions # to write into it, we append a number to the user's name and try creating a new directory last_exception = None - for attempt in range(CFG.lumi_scratch_dir_attempts): + for attempt in range(CFG.slurm_lumi_options.scratch_dir_attempts): user_component = ( user if attempt == 0 else f"{user}{attempt + 1}" ) # appended number is 2 for the second attempt @@ -92,7 +92,7 @@ def getScratchDir(cls, job_id: str) -> Path: # if all attempts failed raise QQError( - f"Could not create a scratch directory for job '{job_id}' after {CFG.lumi_scratch_dir_attempts} attempts: {last_exception}" + f"Could not create a working directory on {storage_type} for job '{job_id}' after {CFG.slurm_lumi_options.scratch_dir_attempts} attempts: {last_exception}" ) from last_exception @classmethod diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index f8b983a..15fb4ba 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -63,7 +63,6 @@ class RunnerSettings: retry_tries: int = 3 retry_wait: int = 300 - scratch_dir_inner: str = "main" sigterm_to_sigkill: int = 5 subprocess_checks_wait_time: int = 2 @@ -239,6 +238,23 @@ class SizeOptions: max_rounding_error: float = 0.1 +@dataclass +class PBSOptions: + """Options associated with PBS.""" + + scratch_dir_inner: str = "main" + + +@dataclass +class SlurmIT4IOptions: + scratch_dir_attempts: int = 3 + + +@dataclass +class SlurmLumiOptions: + scratch_dir_attempts: int = 3 + + @dataclass class Config: """Main configuration for qq.""" @@ -262,9 +278,10 @@ class Config: exit_codes: ExitCodes = field(default_factory=ExitCodes) state_colors: StateColors = field(default_factory=StateColors) size: SizeOptions = field(default_factory=SizeOptions) + pbs_options: PBSOptions = field(default_factory=PBSOptions) + slurm_it4i_options: SlurmIT4IOptions = field(default_factory=SlurmIT4IOptions) + slurm_lumi_options: SlurmLumiOptions = field(default_factory=SlurmLumiOptions) binary_name: str = "qq" - it4i_scratch_dir_attempts: int = 5 - lumi_scratch_dir_attempts: int = 5 @classmethod def load(cls, config_path: Path | None = None) -> Self: diff --git a/src/qq_lib/qq.py b/src/qq_lib/qq.py index 40b47cd..ac01caf 100644 --- a/src/qq_lib/qq.py +++ b/src/qq_lib/qq.py @@ -22,7 +22,7 @@ from qq_lib.sync.cli import sync from qq_lib.wipe.cli import wipe -__version__ = "0.6.0-dev.1" +__version__ = "0.6.0-dev.2" # support both --help and -h _CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} diff --git a/src/qq_lib/run/runner.py b/src/qq_lib/run/runner.py index 3014a6e..459e258 100644 --- a/src/qq_lib/run/runner.py +++ b/src/qq_lib/run/runner.py @@ -329,22 +329,16 @@ def _setUpScratchDir(self) -> None: Raises: QQError: If scratch directory cannot be determined. """ - # get scratch directory (this directory should be created and allocated by the batch system) - scratch_dir = self._batch_system.getScratchDir(self._informer.info.job_id) - - # create working directory inside the scratch directory allocated by the batch system - # we create this directory because other processes may write files - # into the allocated scratch directory and we do not want these files - # to affect the job execution or be copied back to input_dir - self._work_dir = (scratch_dir / CFG.runner.scratch_dir_inner).resolve() - logger.info(f"Setting up working directory in '{self._work_dir}'.") - Retryer( - Path.mkdir, - self._work_dir, + # get path to the working directory (created by the batch system) + self._work_dir: Path = Retryer( + self._batch_system.createWorkDirOnScratch, + self._informer.info.job_id, max_tries=CFG.runner.retry_tries, wait_seconds=CFG.runner.retry_wait, ).run() + logger.info(f"Setting up working directory in '{self._work_dir}'.") + # move to the working directory Retryer( os.chdir, diff --git a/tests/test_batch_pbs_pbs.py b/tests/test_batch_pbs_pbs.py index dbea3f2..85341b4 100644 --- a/tests/test_batch_pbs_pbs.py +++ b/tests/test_batch_pbs_pbs.py @@ -1461,3 +1461,25 @@ def test_pbs_get_supported_work_dir_types_returns_combined_list(): "job_dir", ] assert PBS.getSupportedWorkDirTypes() == expected + + +def test_pbs_create_work_dir_on_scratch_creates_work_dir(): + job_id = "12345" + fake_scratch = Path("/scratch/job_12345") + inner_name = CFG.pbs_options.scratch_dir_inner + expected_work_dir = (fake_scratch / inner_name).resolve() + + with ( + patch.object( + PBS, "_getScratchDir", return_value=fake_scratch + ) as get_scratch_mock, + patch("qq_lib.batch.pbs.pbs.logger"), + patch("pathlib.Path.mkdir") as mkdir_mock, + ): + result = PBS.createWorkDirOnScratch(job_id) + + get_scratch_mock.assert_called_once_with(job_id) + + assert result == expected_work_dir + + mkdir_mock.assert_called_once_with(exist_ok=True) diff --git a/tests/test_batch_slurmit4i_slurm.py b/tests/test_batch_slurmit4i_slurm.py index 71dfb3c..2d1e555 100644 --- a/tests/test_batch_slurmit4i_slurm.py +++ b/tests/test_batch_slurmit4i_slurm.py @@ -333,34 +333,38 @@ def test_slurmit4i_navigate_to_destination_calls_interface(mock_nav, mock_info): @patch("qq_lib.batch.slurmit4i.slurm.getpass.getuser", return_value="user1") @patch("qq_lib.batch.slurmit4i.slurm.Path.mkdir") @patch.dict(os.environ, {"SLURM_JOB_ACCOUNT": "ACCT"}, clear=True) -def test_slurmit4i_get_scratch_dir_creates_and_returns_path(mock_mkdir, mock_user): - result = SlurmIT4I.getScratchDir("123") +def test_slurmit4i_create_work_dir_on_scratch_creates_and_returns_path( + mock_mkdir, mock_user +): + result = SlurmIT4I.createWorkDirOnScratch("123") assert str(result).endswith("/scratch/project/acct/user1/qq-jobs/job_123") mock_user.assert_called_once() mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) @patch.dict(os.environ, {}, clear=True) -def test_slurmit4i_get_scratch_dir_raises_when_no_account(): +def test_slurmit4i_create_work_dir_on_scratch_raises_when_no_account(): with pytest.raises(QQError, match="No account is defined for job '123'"): - SlurmIT4I.getScratchDir("123") + SlurmIT4I.createWorkDirOnScratch("123") @patch("qq_lib.batch.slurmit4i.slurm.getpass.getuser", return_value="user2") @patch("qq_lib.batch.slurmit4i.slurm.Path.mkdir", side_effect=OSError("disk error")) @patch.dict(os.environ, {"SLURM_JOB_ACCOUNT": "ACCT2"}, clear=True) -def test_slurmit4i_get_scratch_dir_raises_on_mkdir_failure(mock_mkdir, mock_user): +def test_slurmit4i_create_work_dir_on_scratch_raises_on_mkdir_failure( + mock_mkdir, mock_user +): with pytest.raises( - QQError, match="Could not create a scratch directory for job '456'" + QQError, match="Could not create a working directory on scratch for job '456'" ): - SlurmIT4I.getScratchDir("456") + SlurmIT4I.createWorkDirOnScratch("456") mock_user.assert_called_once() - assert mock_mkdir.call_count == CFG.it4i_scratch_dir_attempts + assert mock_mkdir.call_count == CFG.slurm_it4i_options.scratch_dir_attempts @patch("qq_lib.batch.slurmit4i.slurm.getpass.getuser", return_value="userX") @patch.dict(os.environ, {"SLURM_JOB_ACCOUNT": "ACCT"}, clear=True) -def test_slurmit4i_get_scratch_dir_third_attempt_succeeds(mock_user): +def test_slurmit4i_create_work_dir_on_scratch_third_attempt_succeeds(mock_user): mkdir_mock = MagicMock() mkdir_mock.side_effect = [ OSError("fail 1"), @@ -369,7 +373,7 @@ def test_slurmit4i_get_scratch_dir_third_attempt_succeeds(mock_user): ] with patch("qq_lib.batch.slurmit4i.slurm.Path.mkdir", mkdir_mock): - result = SlurmIT4I.getScratchDir("999") + result = SlurmIT4I.createWorkDirOnScratch("999") expected_path = "/scratch/project/acct/userX3/qq-jobs/job_999" assert str(result).endswith(expected_path) diff --git a/tests/test_batch_slurmlumi_slurm.py b/tests/test_batch_slurmlumi_slurm.py index 5edef4b..4e465a7 100644 --- a/tests/test_batch_slurmlumi_slurm.py +++ b/tests/test_batch_slurmlumi_slurm.py @@ -41,35 +41,35 @@ def test_slurmlumi_job_submit_sets_env_var_conditionally(uses_scratch, expect_en mock_super.assert_called_once() -def test_slurmlumi_get_scratch_dir_raises_when_no_account(monkeypatch): +def test_slurmlumi_create_work_dir_on_scratch_raises_when_no_account(monkeypatch): monkeypatch.setattr(os, "environ", {}) with pytest.raises(QQError, match="No account is defined for job '111'"): - SlurmLumi.getScratchDir("111") + SlurmLumi.createWorkDirOnScratch("111") -def test_slurmlumi_get_scratch_dir_raises_when_no_storage_type(monkeypatch): +def test_slurmlumi_create_work_dir_on_scratch_raises_when_no_storage_type(monkeypatch): monkeypatch.setattr(os, "environ", {CFG.env_vars.slurm_job_account: "account"}) with pytest.raises( QQError, match=f"Environment variable '{CFG.env_vars.lumi_scratch_type}' is not defined", ): - SlurmLumi.getScratchDir("222") + SlurmLumi.createWorkDirOnScratch("222") -def test_slurmlumi_get_scratch_dir_creates_directory(monkeypatch): +def test_slurmlumi_create_work_dir_on_scratch_creates_directory(monkeypatch): os.environ[CFG.env_vars.slurm_job_account] = "account" os.environ[CFG.env_vars.lumi_scratch_type] = "scratch" monkeypatch.setattr(getpass, "getuser", lambda: "user") with patch.object(Path, "mkdir") as mock_mkdir: - result = SlurmLumi.getScratchDir("333") + result = SlurmLumi.createWorkDirOnScratch("333") assert isinstance(result, Path) mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) assert Path("/scratch/account/user/qq-jobs/job_333") == result -def test_slurmlumi_get_scratch_dir_raises_on_creation_error(monkeypatch): +def test_slurmlumi_create_work_dir_on_scratch_raises_on_creation_error(monkeypatch): os.environ[CFG.env_vars.slurm_job_account] = "account" os.environ[CFG.env_vars.lumi_scratch_type] = "flash" monkeypatch.setattr(getpass, "getuser", lambda: "user") @@ -77,12 +77,12 @@ def test_slurmlumi_get_scratch_dir_raises_on_creation_error(monkeypatch): with ( patch.object(Path, "mkdir", side_effect=Exception("fail")) as mock_mkdir, pytest.raises( - QQError, match="Could not create a scratch directory for job '444'" + QQError, match="Could not create a working directory on flash for job '444'" ), ): - SlurmLumi.getScratchDir("444") + SlurmLumi.createWorkDirOnScratch("444") - assert mock_mkdir.call_count == CFG.lumi_scratch_dir_attempts + assert mock_mkdir.call_count == CFG.slurm_lumi_options.scratch_dir_attempts @patch("qq_lib.batch.slurmlumi.slurm.getpass.getuser", return_value="userX") @@ -91,7 +91,7 @@ def test_slurmlumi_get_scratch_dir_raises_on_creation_error(monkeypatch): {CFG.env_vars.slurm_job_account: "ACCT", CFG.env_vars.lumi_scratch_type: "scratch"}, clear=True, ) -def test_slurmlumi_get_scratch_dir_third_attempt_succeeds(mock_user): +def test_slurmlumi_create_work_dir_on_scratch_third_attempt_succeeds(mock_user): mkdir_mock = MagicMock() mkdir_mock.side_effect = [ OSError("fail 1"), @@ -100,7 +100,7 @@ def test_slurmlumi_get_scratch_dir_third_attempt_succeeds(mock_user): ] with patch("qq_lib.batch.slurmlumi.slurm.Path.mkdir", mkdir_mock): - result = SlurmLumi.getScratchDir("999") + result = SlurmLumi.createWorkDirOnScratch("999") expected_path = "/scratch/acct/userX3/qq-jobs/job_999" assert str(result).endswith(expected_path) diff --git a/tests/test_run_runner.py b/tests/test_run_runner.py index d325cd6..fea7c1d 100644 --- a/tests/test_run_runner.py +++ b/tests/test_run_runner.py @@ -667,24 +667,23 @@ def test_runner_set_up_scratch_dir_calls_retryers_with_correct_arguments(): runner._informer.info.job_name = "job+0002" runner._archiver = None - scratch_dir = Path("/scratch") - runner._batch_system.getScratchDir.return_value = scratch_dir + work_dir = Path("/scratch/job123") + runner._batch_system.createWorkDirOnScratch.return_value = work_dir with ( patch("qq_lib.run.runner.Retryer") as retryer_cls, patch("qq_lib.run.runner.logger"), patch("qq_lib.run.runner.socket.gethostname", return_value="localhost"), ): + retryer_cls.return_value.run.return_value = work_dir runner._setUpScratchDir() - work_dir = (scratch_dir / CFG.runner.scratch_dir_inner).resolve() - - # first Retryer call: Path.mkdir - mkdir_call = retryer_cls.call_args_list[0] - assert mkdir_call.kwargs["max_tries"] == CFG.runner.retry_tries - assert mkdir_call.kwargs["wait_seconds"] == CFG.runner.retry_wait - assert mkdir_call.args[0] == Path.mkdir - assert mkdir_call.args[1] == work_dir + # first Retryer call: batch_system + batch_system_call = retryer_cls.call_args_list[0] + assert batch_system_call.kwargs["max_tries"] == CFG.runner.retry_tries + assert batch_system_call.kwargs["wait_seconds"] == CFG.runner.retry_wait + assert batch_system_call.args[0] == runner._batch_system.createWorkDirOnScratch + assert batch_system_call.args[1] == runner._informer.info.job_id # second Retryer call: os.chdir chdir_call = retryer_cls.call_args_list[1] diff --git a/uv.lock b/uv.lock index ca6dfbc..e4c7ca9 100644 --- a/uv.lock +++ b/uv.lock @@ -459,7 +459,7 @@ wheels = [ [[package]] name = "qq" -version = "0.6.0.dev1" +version = "0.6.0.dev2" source = { virtual = "." } dependencies = [ { name = "click" }, From dd27d0e96d2d3bb80d9b95a34771c5d954d8ed7e Mon Sep 17 00:00:00 2001 From: Ladme Date: Sun, 23 Nov 2025 10:28:48 +0100 Subject: [PATCH 18/27] Made collecting slurm jobs much faster --- CHANGELOG.md | 1 + src/qq_lib/batch/slurm/slurm.py | 29 ++++++++++++++++++++++++----- src/qq_lib/core/config.py | 13 +++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10cae2a..3c43323 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Fixed a regression from v0.5: missing size property in `qq nodes` is now correctly intepreted as zero size. - When a job is killed, runtime files are copied to the input directory only after the executed process finishes. - Changed the way working directories on Karolina and LUMI are created allowing their complete removal. +- Collection of Slurm jobs (which is complicated by Slurm's architecture) is now performed in parallel and is consequently much faster. ### Internal changes - `Wiper.delete` method has been renamed to `Wiper.wipe`. diff --git a/src/qq_lib/batch/slurm/slurm.py b/src/qq_lib/batch/slurm/slurm.py index 09352fa..7cb27e9 100644 --- a/src/qq_lib/batch/slurm/slurm.py +++ b/src/qq_lib/batch/slurm/slurm.py @@ -4,6 +4,7 @@ import os import shutil import subprocess +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from qq_lib.batch.interface import BatchInterface @@ -604,6 +605,11 @@ def _getBatchJobsUsingSqueueCommand(cls, command: str) -> list[SlurmJob]: """ Execute `squeue` and `scontrol show job` to retrieve information about Slurm jobs. + Multiple `scontrol` commands are executed in parallel + to increase the speed of collecting the information about jobs. + + Note that the jobs are returned in an arbitrary order. + Args: command (str): A Slurm command to get the relevant job IDs. @@ -630,11 +636,24 @@ def _getBatchJobsUsingSqueueCommand(cls, command: str) -> list[SlurmJob]: f"Could not retrieve information about jobs: {result.stderr.strip()}." ) - jobs = [] - for id in result.stdout.split("\n"): - if id.strip() == "": - continue + ids = [line.strip() for line in result.stdout.split("\n") if line.strip()] + + def get_job(job_id: str) -> SlurmJob: + return SlurmJob(job_id) + + jobs: list[SlurmJob] = [] + + # use ThreadPoolExecutor to get information about jobs in parallel + with ThreadPoolExecutor( + max_workers=CFG.slurm_options.jobs_scontrol_nthreads + ) as executor: + future_to_id = {executor.submit(get_job, job_id): job_id for job_id in ids} - jobs.append(SlurmJob(id)) + for future in as_completed(future_to_id): + try: + jobs.append(future.result()) + except Exception as e: + job_id = future_to_id[future] + raise QQError(f"Failed to load job {job_id}: {e}.") from e return jobs diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index 15fb4ba..940fcbd 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -245,13 +245,25 @@ class PBSOptions: scratch_dir_inner: str = "main" +@dataclass +class SlurmOptions: + """Options associated with Slurm.""" + + # maximal number of threads used to collect information about jobs using scontrol + jobs_scontrol_nthreads: int = 8 + + @dataclass class SlurmIT4IOptions: + """Options associated with Slurm on IT4I clusters.""" + scratch_dir_attempts: int = 3 @dataclass class SlurmLumiOptions: + """Options associated with Slurm on LUMI.""" + scratch_dir_attempts: int = 3 @@ -279,6 +291,7 @@ class Config: state_colors: StateColors = field(default_factory=StateColors) size: SizeOptions = field(default_factory=SizeOptions) pbs_options: PBSOptions = field(default_factory=PBSOptions) + slurm_options: SlurmOptions = field(default_factory=SlurmOptions) slurm_it4i_options: SlurmIT4IOptions = field(default_factory=SlurmIT4IOptions) slurm_lumi_options: SlurmLumiOptions = field(default_factory=SlurmLumiOptions) binary_name: str = "qq" From bdc7c01e73976685a0b569bdc228b3e1f56a4a16 Mon Sep 17 00:00:00 2001 From: Ladme Date: Sun, 23 Nov 2025 10:32:29 +0100 Subject: [PATCH 19/27] Obtaining job's input directory --- src/qq_lib/batch/pbs/job.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/qq_lib/batch/pbs/job.py b/src/qq_lib/batch/pbs/job.py index 00db023..59a6778 100644 --- a/src/qq_lib/batch/pbs/job.py +++ b/src/qq_lib/batch/pbs/job.py @@ -257,11 +257,8 @@ def getInputDir(self) -> Path | None: return None if not ( - input_dir := env_vars.get(CFG.env_vars.input_dir) # try qq first - or env_vars.get( - "PBS_O_WORKDIR" - ) # if this fails, try PBS (note that PBS_O_WORKDIR is not the directory with the submitted script, - # but the directory from which submission was done) + input_dir := env_vars.get("PBS_O_WORKDIR") # try PBS first + or env_vars.get(CFG.env_vars.input_dir) # if this fails, try qq or env_vars.get("INF_INPUT_DIR") # if this fails, try Infinity ): logger.debug(f"Could not obtain input directory for '{self._job_id}'.") From a7cd902a5dae2d289066069b8f89a642970bc1a4 Mon Sep 17 00:00:00 2001 From: Ladme Date: Sun, 23 Nov 2025 15:03:56 +0100 Subject: [PATCH 20/27] Documenting modules --- .gitignore | 5 +- pyproject.toml | 1 + src/qq_lib/__init__.py | 26 ++++++ src/qq_lib/archive/__init__.py | 7 ++ src/qq_lib/archive/archiver.py | 34 ++++---- src/qq_lib/batch/__init__.py | 9 ++ src/qq_lib/batch/interface/__init__.py | 23 +++++ src/qq_lib/batch/interface/interface.py | 32 +++---- src/qq_lib/batch/pbs/__init__.py | 17 ++++ src/qq_lib/batch/pbs/job.py | 1 + src/qq_lib/batch/slurm/__init__.py | 16 ++++ src/qq_lib/batch/slurm/job.py | 6 +- src/qq_lib/batch/slurmit4i/__init__.py | 13 +++ src/qq_lib/batch/slurmlumi/__init__.py | 14 +++ src/qq_lib/cd/__init__.py | 8 ++ src/qq_lib/cd/cder.py | 11 ++- src/qq_lib/clear/__init__.py | 8 ++ src/qq_lib/clear/clearer.py | 2 +- src/qq_lib/clear/cli.py | 4 + src/qq_lib/core/__init__.py | 8 ++ src/qq_lib/core/click_format.py | 7 ++ src/qq_lib/core/common.py | 14 ++- src/qq_lib/core/config.py | 10 +++ src/qq_lib/core/error.py | 11 ++- src/qq_lib/core/error_handlers.py | 8 ++ src/qq_lib/core/field_coupling.py | 7 ++ src/qq_lib/core/logger.py | 8 ++ src/qq_lib/core/navigator.py | 10 ++- src/qq_lib/core/operator.py | 8 ++ src/qq_lib/core/repeater.py | 8 ++ src/qq_lib/core/retryer.py | 9 ++ src/qq_lib/go/__init__.py | 9 ++ src/qq_lib/go/goer.py | 6 +- src/qq_lib/info/__init__.py | 12 +++ src/qq_lib/info/informer.py | 26 +++--- src/qq_lib/info/presenter.py | 28 +++++- src/qq_lib/jobs/__init__.py | 15 +++- src/qq_lib/jobs/presenter.py | 10 +-- src/qq_lib/kill/__init__.py | 8 +- src/qq_lib/kill/killer.py | 2 +- src/qq_lib/nodes/__init__.py | 16 +++- src/qq_lib/nodes/presenter.py | 8 +- src/qq_lib/properties/__init__.py | 8 ++ src/qq_lib/properties/depend.py | 7 ++ src/qq_lib/properties/info.py | 15 +++- src/qq_lib/properties/job_type.py | 7 ++ src/qq_lib/properties/loop.py | 9 ++ src/qq_lib/properties/resources.py | 7 ++ src/qq_lib/properties/size.py | 6 ++ src/qq_lib/properties/states.py | 12 +++ src/qq_lib/qq.py | 1 + src/qq_lib/queues/__init__.py | 9 ++ src/qq_lib/queues/presenter.py | 2 +- src/qq_lib/run/__init__.py | 10 +++ src/qq_lib/submit/__init__.py | 20 ++++- src/qq_lib/submit/parser.py | 2 +- src/qq_lib/submit/submitter.py | 3 + src/qq_lib/sync/__init__.py | 9 ++ src/qq_lib/sync/syncer.py | 5 +- src/qq_lib/wipe/__init__.py | 10 +++ tests/test_batch_interface.py | 2 +- tests/test_batch_pbs_pbs.py | 2 +- tests/test_jobs_presenter.py | 68 +++++++-------- uv.lock | 109 +++++++++++++++++++++++- 64 files changed, 681 insertions(+), 127 deletions(-) diff --git a/.gitignore b/.gitignore index 9b14416..0851d16 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ wheels/ # Codecov htmlcov/ -.coverage \ No newline at end of file +.coverage + +# Documentations +docs/ \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6f73808..995bcd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ Documentation = "https://ladme.github.io/qq-manual" [dependency-groups] dev = [ "codecov>=2.1.13", + "pdoc>=16.0.0", "pre-commit>=4.3.0", "pyinstaller>=6.16.0", "pytest>=8.4.2", diff --git a/src/qq_lib/__init__.py b/src/qq_lib/__init__.py index dfe6bc0..fb11b8a 100644 --- a/src/qq_lib/__init__.py +++ b/src/qq_lib/__init__.py @@ -1,10 +1,36 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Core implementation of the qq command-line tool. + +This package provides the internal logic behind qq's job-submission and +job-management workflow. It defines the abstractions for batch systems, concrete +backends (PBS, Slurm, and site-specific variants), utilities for preparing and +synchronizing working directories, loop-job handling, and helpers for inspecting +jobs, queues, and nodes. All qq CLI commands ultimately delegate to the +functionality implemented here. +""" from .qq import __version__, cli __all__ = [ "__version__", "cli", + "archive", + "batch", + "cd", + "clear", + "core", + "go", + "info", + "jobs", + "kill", + "nodes", + "properties", + "queues", + "run", + "submit", + "sync", + "wipe", ] diff --git a/src/qq_lib/archive/__init__.py b/src/qq_lib/archive/__init__.py index 6414034..9e54b0e 100644 --- a/src/qq_lib/archive/__init__.py +++ b/src/qq_lib/archive/__init__.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utilities for archiving and retrieving job-related files. + +This module provides the `Archiver` class, which coordinates the movement +of files between working directory and the job archive. +""" + from .archiver import Archiver __all__ = [ diff --git a/src/qq_lib/archive/archiver.py b/src/qq_lib/archive/archiver.py index c0ca263..2f0777a 100644 --- a/src/qq_lib/archive/archiver.py +++ b/src/qq_lib/archive/archiver.py @@ -3,6 +3,7 @@ import re import socket +from collections.abc import Iterable from pathlib import Path from qq_lib.batch.interface import BatchInterface @@ -16,7 +17,7 @@ class Archiver: """ - Handles archiving and retrieval of files for a job. + Manages archiving and retrieval of job-related files. """ def __init__( @@ -31,11 +32,11 @@ def __init__( Initialize the Archiver. Args: - archive (Path): Absolute path to the archive directory. - archive_format (str): The pattern describing which files to archive. + archive (Path): Absolute path to the job's archive directory. + archive_format (str): Printf-style or regex pattern describing archived filenames. input_machine (str): The hostname from which the job was submitted. input_dir (Path): The directory from which the job was submitted. - batch_system (type[BatchInterface]): The batch system used to run the qq job. + batch_system (type[BatchInterface]): The batch system which manages the job. """ self._batch_system = batch_system self._archive = archive @@ -45,7 +46,7 @@ def __init__( def makeArchiveDir(self) -> None: """ - Create the archive directory if it does not already exist. + Create the archive directory in the job's input directory if it does not already exist. """ logger.debug( f"Attempting to create an archive '{self._archive}' on '{self._input_machine}'." @@ -54,7 +55,7 @@ def makeArchiveDir(self) -> None: def fromArchive(self, dir: Path, cycle: int | None = None) -> None: """ - Fetch files from the archive to a local working directory. + Fetch files from the archive to job's working directory. This method retrieves files from the archive that match the configured archive pattern. If a cycle number is provided, only @@ -63,7 +64,7 @@ def fromArchive(self, dir: Path, cycle: int | None = None) -> None: in the archive are fetched. Args: - dir (Path): The local directory where files will be copied to. + dir (Path): The directory where files will be copied to. cycle (int | None): The cycle number to filter files for. Only relevant for printf-style patterns. If `None`, all matching files are fetched. Defaults to `None`. @@ -94,14 +95,14 @@ def fromArchive(self, dir: Path, cycle: int | None = None) -> None: def toArchive(self, dir: Path) -> None: """ - Archive all files matching the archive format in the specified working directory. + Archive all files matching the archive format in the specified directory. - Copies all files matching the archive pattern from the local + Copies all files matching the archive pattern from directory `dir` to the archive directory. After successfully transferring the files, they are removed from the working directory. Args: - work_dir (Path): The local directory containing files to archive. + work_dir (Path): The directory containing files to archive. Raises: QQError: If file transfer or removal fails. @@ -133,9 +134,9 @@ def toArchive(self, dir: Path) -> None: def archiveRunTimeFiles(self, job_name: str, cycle: int) -> None: """ - Archive qq runtime files from a specific job located in the submission directory. + Archive qq runtime files from a specific job located in the input directory. - The archived files are moved from the submission directory to the archive directory. + The archived files are moved from the input directory to the archive directory. Ensure that `job_name` does not contain special regex characters, or that any such characters are properly escaped. @@ -194,7 +195,8 @@ def _getFiles( Args: directory (Path): Directory to search for files. - host (str | None): Hostname for remote directories, or None for local. + host (str | None): Hostname if the directory is remote, + or None if it is available from the current machine. pattern (str): A printf-style or regex pattern to match file stems. cycle (int | None): Optional cycle number for printf-style patterns. If provided, only files corresponding to that loop are returned. @@ -254,7 +256,7 @@ def _prepare_regex_pattern(pattern: str) -> re.Pattern[str]: pattern (str): The pattern to convert. Returns: - re.Pattern[str]: Compiled regex pattern matching any part of the filename stem. + re.Pattern[str]: Compiled regex pattern that can be used for matching. """ if is_printf_pattern(pattern): pattern = printf_to_regex(pattern) @@ -262,12 +264,12 @@ def _prepare_regex_pattern(pattern: str) -> re.Pattern[str]: return re.compile(pattern) @staticmethod - def _removeFiles(files: list[Path]) -> None: + def _removeFiles(files: Iterable[Path]) -> None: """ Remove a list of files from the filesystem. Args: - files (list[Path]): Files to delete. + files (Iterable[Path]): Files to delete. Raises: OSError: If file removal fails for any file. diff --git a/src/qq_lib/batch/__init__.py b/src/qq_lib/batch/__init__.py index b9aa356..607ded0 100644 --- a/src/qq_lib/batch/__init__.py +++ b/src/qq_lib/batch/__init__.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Batch-system support for qq. + +This module groups all components that allow qq to interact with HPC batch +schedulers. It defines the abstract interfaces for jobs, queues, and nodes, +together with the concrete backends for PBS, Slurm, and site-specific Slurm +variants. +""" + # import so that these batch systems are available but do not export them from here from .pbs import PBS as _PBS from .slurm import Slurm as _Slurm diff --git a/src/qq_lib/batch/interface/__init__.py b/src/qq_lib/batch/interface/__init__.py index 8a7df0f..145f1ad 100644 --- a/src/qq_lib/batch/interface/__init__.py +++ b/src/qq_lib/batch/interface/__init__.py @@ -1,6 +1,29 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Abstractions for integrating qq with HPC batch scheduling systems. + +This module defines the core interfaces that allow qq to interact with +multiple batch systems through a unified API. It provides: + +- `BatchInterface`: the central abstract interface that every batch-system + backend implements. It defines operations such as job submission, job + querying, directory synchronization, remote file access, resubmission, and + navigation to compute nodes. + +- `BatchJobInterface`, `BatchNodeInterface`, and `BatchQueueInterface`: + lightweight abstractions representing jobs, nodes, and queues as reported + by the underlying scheduler. These interfaces expose normalized metadata + and allow qq to present consistent job/queue/node information regardless + of scheduler differences. + +- `BatchMeta`: a metaclass that registers available batch-system backends + and provides mechanisms for selecting one from environment variables or by + probing system availability. The `@batch_system` decorator registers + implementations automatically. +""" + from .interface import BatchInterface from .job import BatchJobInterface from .meta import BatchMeta diff --git a/src/qq_lib/batch/interface/interface.py b/src/qq_lib/batch/interface/interface.py index a2073fb..03c6ac1 100644 --- a/src/qq_lib/batch/interface/interface.py +++ b/src/qq_lib/batch/interface/interface.py @@ -36,9 +36,9 @@ class BatchInterface[ """ # magic number indicating unreachable directory when navigating to it - CD_FAIL = 94 + _CD_FAIL = 94 # exit code of ssh if connection fails - SSH_FAIL = 255 + _SSH_FAIL = 255 @classmethod def envName(cls) -> str: @@ -290,7 +290,7 @@ def navigateToDestination(cls, host: str, directory: Path) -> None: Default behavior: - If the target host is different from the current host, SSH is used to connect and `cd` is executed to switch to the directory. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. - If the target host matches the current host, only `cd` is used. A new terminal should always be opened, regardless of the host. @@ -313,16 +313,16 @@ def navigateToDestination(cls, host: str, directory: Path) -> None: result = subprocess.run(ssh_command) # the subprocess exit code can come from: - # - SSH itself failing - returns SSH_FAIL - # - the explicit exit code we set if 'cd' to the directory fails - returns CD_FAIL + # - SSH itself failing - returns _SSH_FAIL + # - the explicit exit code we set if 'cd' to the directory fails - returns _CD_FAIL # - the exit code of the last command the user runs in the interactive shell # - # we ignore user exit codes entirely and only treat SSH_FAIL and CD_FAIL as errors - if result.returncode == cls.SSH_FAIL: + # we ignore user exit codes entirely and only treat _SSH_FAIL and _CD_FAIL as errors + if result.returncode == cls._SSH_FAIL: raise QQError( f"Could not reach '{host}:{str(directory)}': Could not connect to host." ) - if result.returncode == cls.CD_FAIL: + if result.returncode == cls._CD_FAIL: raise QQError( f"Could not reach '{host}:{str(directory)}': Could not change directory." ) @@ -334,7 +334,7 @@ def readRemoteFile(cls, host: str, file: Path) -> str: The default implementation uses SSH to retrieve the file contents. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -376,7 +376,7 @@ def writeRemoteFile(cls, host: str, file: Path, content: str) -> None: The default implementation uses SSH to send the content to the remote file. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -416,7 +416,7 @@ def makeRemoteDir(cls, host: str, directory: Path) -> None: The default implementation uses SSH to run `mkdir` on the remote host. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -454,7 +454,7 @@ def listRemoteDir(cls, host: str, directory: Path) -> list[Path]: The default implementation uses SSH to run `ls -A` on the remote host. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -502,7 +502,7 @@ def deleteRemoteDir(cls, host: str, directory: Path) -> None: The default implementation uses SSH to run `rm -r` on the remote host. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -541,7 +541,7 @@ def moveRemoteFiles( The default implementation uses SSH to run a sequence of `mv` commands on the remote host. This approach may be inefficient on shared storage or high-latency networks. - Note that the timeout for the SSH connection is set to `SSH_TIMEOUT` seconds. + Note that the timeout for the SSH connection is set to `CFG.timeouts.ssh` seconds. Subclasses should override this method to provide a more efficient implementation if possible. @@ -819,7 +819,7 @@ def _translateSSHCommand(cls, host: str, directory: Path) -> list[str]: f"-o ConnectTimeout={CFG.timeouts.ssh}", host, "-t", - f"cd {directory} || exit {cls.CD_FAIL} && exec bash -l", + f"cd {directory} || exit {cls._CD_FAIL} && exec bash -l", ] @classmethod @@ -1004,7 +1004,7 @@ def _runRsync( Raises: QQError: If the rsync command fails (non-zero exit code) or - if the command times out after `RSYNC_TIMEOUT` seconds. + if the command times out after `CFG.timeouts.rsync` seconds. """ src = f"{src_host}:{str(src_dir)}" if src_host else str(src_dir) dest = f"{dest_host}:{str(dest_dir)}" if dest_host else str(dest_dir) diff --git a/src/qq_lib/batch/pbs/__init__.py b/src/qq_lib/batch/pbs/__init__.py index d782776..0851b76 100644 --- a/src/qq_lib/batch/pbs/__init__.py +++ b/src/qq_lib/batch/pbs/__init__.py @@ -1,6 +1,23 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +PBS backend for qq: job submission, monitoring, and cluster-resource access. + +This module implements qq's full integration with the PBS Pro batch system +as configured on the Metacentrum-family clusters. + +It provides: + +- The `PBS` batch-system backend, implementing job submission, killing, file + synchronization (local and remote), work-directory handling, resource + translation, dependency formatting, and scratch-directory logic. + +- `PBSJob`, `PBSNode`, and `PBSQueue`, concrete implementations of qq's + job/node/queue interfaces, responsible for parsing PBS command output and + exposing normalized metadata to the rest of qq. +""" + from .job import PBSJob from .node import PBSNode from .pbs import PBS diff --git a/src/qq_lib/batch/pbs/job.py b/src/qq_lib/batch/pbs/job.py index 59a6778..071da87 100644 --- a/src/qq_lib/batch/pbs/job.py +++ b/src/qq_lib/batch/pbs/job.py @@ -30,6 +30,7 @@ class PBSJob(BatchJobInterface): """ def __init__(self, job_id: str): + """Query the batch system for information about the job with the specified ID.""" self._job_id = job_id self._info: dict[str, str] = {} diff --git a/src/qq_lib/batch/slurm/__init__.py b/src/qq_lib/batch/slurm/__init__.py index ade150c..758c404 100644 --- a/src/qq_lib/batch/slurm/__init__.py +++ b/src/qq_lib/batch/slurm/__init__.py @@ -1,6 +1,22 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Slurm backend for qq: job submission, monitoring, and cluster-resource access. + +This module implements qq's full integration with the Slurm batch system. + +It provides: + +- The `Slurm` batch-system backend, implementing job submission, killing, + remote file access and synchronization, resource translation, dependency formatting, + and all Slurm-specific environment propagation. + +- `SlurmJob`, `SlurmNode`, and `SlurmQueue`, concrete implementations of qq's + job/node/queue interfaces, responsible for parsing Slurm command output and exposing + normalized metadata to the rest of qq. +""" + from .job import SlurmJob from .node import SlurmNode from .queue import SlurmQueue diff --git a/src/qq_lib/batch/slurm/job.py b/src/qq_lib/batch/slurm/job.py index 6233cc2..b264df7 100644 --- a/src/qq_lib/batch/slurm/job.py +++ b/src/qq_lib/batch/slurm/job.py @@ -30,7 +30,8 @@ class SlurmJob(BatchJobInterface): Stores metadata for a single Slurm job. """ - STATE_CONVERTER: dict[str, BatchState] = { + # converts from Slurm state names to qq BatchStates + _STATE_CONVERTER: dict[str, BatchState] = { "BOOT_FAIL": BatchState.FAILED, "CANCELLED": BatchState.FAILED, "COMPLETED": BatchState.FINISHED, @@ -46,6 +47,7 @@ class SlurmJob(BatchJobInterface): } def __init__(self, job_id: str): + """Query the batch system for information about the job with the specified ID.""" self._job_id = job_id self._info: dict[str, str] = {} @@ -110,7 +112,7 @@ def getState(self) -> BatchState: if not (raw_state := self._info.get("JobState")): return BatchState.UNKNOWN - converted_state = SlurmJob.STATE_CONVERTER.get(raw_state) or BatchState.UNKNOWN + converted_state = SlurmJob._STATE_CONVERTER.get(raw_state) or BatchState.UNKNOWN # if the job is queued due to depending on another job, it should be considered "held" if ( diff --git a/src/qq_lib/batch/slurmit4i/__init__.py b/src/qq_lib/batch/slurmit4i/__init__.py index 1e53368..64d71b5 100644 --- a/src/qq_lib/batch/slurmit4i/__init__.py +++ b/src/qq_lib/batch/slurmit4i/__init__.py @@ -1,6 +1,19 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +SlurmIT4I backend for qq: job submission, monitoring, and IT4I-specific +scratch and resource handling. + +This module provides qq's full integration with the Slurm batch system as +configured on IT4Innovations clusters (e.g., Karolina, Barbora). It extends the +generic Slurm backend with all IT4I-specific behavior: + +- `SlurmIT4I`, the batch-system backend implementing job submission, killing, + resource translation, local/remote file access, scratch-directory creation, + and work-directory selection logic. +""" + from .slurm import SlurmIT4I __all__ = [ diff --git a/src/qq_lib/batch/slurmlumi/__init__.py b/src/qq_lib/batch/slurmlumi/__init__.py index d60e432..8b7cf9e 100644 --- a/src/qq_lib/batch/slurmlumi/__init__.py +++ b/src/qq_lib/batch/slurmlumi/__init__.py @@ -1,6 +1,20 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +SlurmLumi backend for qq: job submission, monitoring, and LUMI-specific +scratch/flash storage handling. + +This module integrates qq with the Slurm environment deployed on the LUMI +supercomputer. It extends the IT4I Slurm backend with all LUMI-specific +behavior, most importantly the dual-tier temporary storage model and +queue-resource conventions. + +- `SlurmLumi`, the batch-system backend implementing job submission, + dependency handling, resource translation, scratch/flash directory creation, + and file/directory operations on LUMI's fully shared storage. +""" + from .slurm import SlurmLumi __all__ = [ diff --git a/src/qq_lib/cd/__init__.py b/src/qq_lib/cd/__init__.py index 86329f3..62177a5 100644 --- a/src/qq_lib/cd/__init__.py +++ b/src/qq_lib/cd/__init__.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utilities for locating and retrieving the input directory of a job. + +This module provides the `Cder` class, which queries the configured +batch system for a job's input directory. The printed path is intended +to be consumed by a shell wrapper function that performs the actual directory change. +""" + from .cder import Cder __all__ = [ diff --git a/src/qq_lib/cd/cder.py b/src/qq_lib/cd/cder.py index a4762fd..cc86e46 100644 --- a/src/qq_lib/cd/cder.py +++ b/src/qq_lib/cd/cder.py @@ -13,8 +13,7 @@ class Cder: """ - Retrieve and provide the input directory for a specific job - in the configured batch system. + Retrieve and provide the input directory for a specific job in the configured batch system. """ def __init__(self, BatchSystem: type[BatchInterface], job_id: str): @@ -22,8 +21,8 @@ def __init__(self, BatchSystem: type[BatchInterface], job_id: str): Initialize the Cder instance with a batch system interface and job ID. Args: - BatchSystem (type[BatchInterface]): Batch system class to use. - job_id (str): Identifier of the job whose input directory is needed. + BatchSystem (type[BatchInterface]): The batch system which manages the job. + job_id (str): Identifier of the job to query. """ self._job_id = job_id self._BatchSystem = BatchSystem @@ -45,10 +44,10 @@ def cd(self) -> str: @staticmethod def _getInputDirFromJobId(BatchSystem: type[BatchInterface], job_id: str) -> Path: """ - Query the batch system for the input/submission directory of a job. + Query the batch system for the input directory of a job. Args: - BatchSystem (type[BatchInterface]): Batch system class to use. + BatchSystem (type[BatchInterface]): The batch system which manages the job. job_id (str): Identifier of the job to query. Returns: diff --git a/src/qq_lib/clear/__init__.py b/src/qq_lib/clear/__init__.py index 5854efd..28a7497 100644 --- a/src/qq_lib/clear/__init__.py +++ b/src/qq_lib/clear/__init__.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utilities for detecting and removing qq runtime files. + +This module provides the `Clearer` class, which identifies and deletes +qq-generated runtime files from a directory. Files associated with active +or successfully completed jobs are preserved unless forced removal is requested. +""" + from .clearer import Clearer __all__ = [ diff --git a/src/qq_lib/clear/clearer.py b/src/qq_lib/clear/clearer.py index d1fc978..0c08692 100644 --- a/src/qq_lib/clear/clearer.py +++ b/src/qq_lib/clear/clearer.py @@ -80,7 +80,7 @@ def _collectRunTimeFiles(self) -> set[Path]: def _collectExcludedFiles(self) -> set[Path]: """ - Collect qq runtime files that should not be deleted. + Collect qq runtime files that should **not** be deleted. Runtime files corresponding to active or successfully finished jobs are included. diff --git a/src/qq_lib/clear/cli.py b/src/qq_lib/clear/cli.py index 93a7b0b..e837e67 100644 --- a/src/qq_lib/clear/cli.py +++ b/src/qq_lib/clear/cli.py @@ -35,6 +35,10 @@ def clear(force: bool) -> NoReturn: """ Delete qq runtime files in the current directory. + + Only runtime files that do **not** correspond to + an active or successfully completed job are deleted, + unless the `force` option is used. """ try: clearer = Clearer(Path()) diff --git a/src/qq_lib/core/__init__.py b/src/qq_lib/core/__init__.py index f43a7d5..aa2d10f 100644 --- a/src/qq_lib/core/__init__.py +++ b/src/qq_lib/core/__init__.py @@ -1,3 +1,11 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Core infrastructure for qq. + +This module collects the foundational classes, utilities, and helpers used +across the qq codebase. It provides the building blocks for command behavior, +job introspection, navigation, configuration, error handling, and structured +logging. +""" diff --git a/src/qq_lib/core/click_format.py b/src/qq_lib/core/click_format.py index 4895c21..d331351 100644 --- a/src/qq_lib/core/click_format.py +++ b/src/qq_lib/core/click_format.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +GNU-style help formatting for Click commands. + +This module defines `GNUHelpColorsCommand`, a Click command class that prints +help text using GNU-style formatting with customizable colors, headings, and +option layouts. +""" from collections.abc import Sequence diff --git a/src/qq_lib/core/common.py b/src/qq_lib/core/common.py index 2033eba..36a7de4 100644 --- a/src/qq_lib/core/common.py +++ b/src/qq_lib/core/common.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +General utility functions for the qq library. + +This module provides helpers for working with qq job files, time durations, +YAML I/O, string normalization, user prompts, path manipulation, and job-name construction. +""" + import re from datetime import timedelta from functools import lru_cache @@ -134,13 +141,13 @@ def get_info_files(directory: Path) -> list[Path]: def get_info_file_from_job_id(job_id: str) -> Path: """ Get path to the qq info file corresponding to a job with the given ID. - The BatchSystem to use is obtained from the environment variable or guessed. + The batch system to use is obtained from the environment variable or guessed. Args: job_id (str): The ID of the job for which to retrieve the info file. Returns: - Path: Absolute path to the QQ job information file. + Path: Absolute path to the qq job info file. Raises: QQError: If the batch system could not be guessed, @@ -699,7 +706,8 @@ def construct_info_file_path(input_dir: Path, job_name: str) -> Path: def available_work_dirs() -> str: - """Return the supported work-directory types for the detected batch system. + """ + Return the supported work-directory types for the detected batch system. The batch system is determined using the `QQ_BATCH_SYSTEM` environment variable or by automatic detection. The supported work-directory types are diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index 940fcbd..64af117 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -1,6 +1,16 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Configuration system for qq. + +This module defines dataclasses representing all configurable aspects of qq, +including file suffixes, environment variables, timeouts, presentation settings, +batch-system options, and global defaults. + +The `Config` class loads user configuration from a TOML file (if available) +and provides a globally accessible `CFG` instance. +""" import os import tomllib diff --git a/src/qq_lib/core/error.py b/src/qq_lib/core/error.py index 14ea641..2a11c30 100644 --- a/src/qq_lib/core/error.py +++ b/src/qq_lib/core/error.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Exception types used throughout qq. + +This module defines the core qq-specific exceptions, including recoverable +errors, job-mismatch and suitability errors, and fatal or communication-related +runner errors. Each exception carries an associated exit code used by qq +commands to report failures consistently. +""" from qq_lib.core.config import CFG @@ -29,8 +37,7 @@ class QQNotSuitableError(QQError): class QQRunFatalError(Exception): """ - Raised when qq runner is unable to load a qq info file - or if qq run is being called outside of qq environment. + Raised when qq runner is unable to load a qq info file. Should only be used to signal that the error state cannot be logged into a qq info file. """ diff --git a/src/qq_lib/core/error_handlers.py b/src/qq_lib/core/error_handlers.py index 0af481f..2ad4437 100644 --- a/src/qq_lib/core/error_handlers.py +++ b/src/qq_lib/core/error_handlers.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Error-handling utilities for qq operations. + +This module provides helper functions for processing and reporting errors +encountered during multi-item qq operations. Handlers distinguish between +unsuitable jobs, job-ID mismatches, general failures, and ignorable errors, +and exit with appropriate qq exit codes when necessary. +""" import sys from typing import NoReturn diff --git a/src/qq_lib/core/field_coupling.py b/src/qq_lib/core/field_coupling.py index 3b054e0..b9f8a12 100644 --- a/src/qq_lib/core/field_coupling.py +++ b/src/qq_lib/core/field_coupling.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utilities for defining and enforcing coupled fields in dataclasses. + +This module provides `FieldCoupling` for specifying dominance-ordered +relationships among multiple fields, and the `@coupled_fields` decorator for +automatically enforcing these rules in a dataclass's `__post_init__`. +""" from typing import Any, Protocol diff --git a/src/qq_lib/core/logger.py b/src/qq_lib/core/logger.py index b6c5335..77aa832 100644 --- a/src/qq_lib/core/logger.py +++ b/src/qq_lib/core/logger.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Unified logging utilities for qq. + +This module provides a helper for creating consistently formatted loggers using +Rich-based output. Loggers automatically adapt to qq's debug mode, support +optional timestamps, and apply standardized styling across the codebase. +""" + import logging import os diff --git a/src/qq_lib/core/navigator.py b/src/qq_lib/core/navigator.py index e5428ef..e714da5 100644 --- a/src/qq_lib/core/navigator.py +++ b/src/qq_lib/core/navigator.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Navigation utilities for qq job directories. + +This module defines the `Navigator` class, an extension of `Operator` that +locates a job's working directory and execution host. It provides helpers for +determining job destination, checking whether the current process is already in +the working directory, and inspecting job state in the context of directory navigation. +""" import socket from pathlib import Path @@ -52,7 +60,7 @@ def fromInformer(cls, informer: Informer) -> Self: informer (Informer): Initialized informer instance containing information about the job. Returns: - Operator: Initialized Operator. + Navigator: Initialized Navigator. """ navigator = super().fromInformer(informer) navigator._setDestination() diff --git a/src/qq_lib/core/operator.py b/src/qq_lib/core/operator.py index e69416d..4212059 100644 --- a/src/qq_lib/core/operator.py +++ b/src/qq_lib/core/operator.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Base functionality for qq job operations. + +This module defines the `Operator` class, which provides a common interface for +working with qq jobs. It loads job information, tracks job state, refreshes +metadata, and renders formatted status output using Rich presenters. +""" + from pathlib import Path from typing import Self diff --git a/src/qq_lib/core/repeater.py b/src/qq_lib/core/repeater.py index 8d7eef3..27bc8e2 100644 --- a/src/qq_lib/core/repeater.py +++ b/src/qq_lib/core/repeater.py @@ -1,6 +1,14 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utility for repeated execution with per-item error handling. + +This module provides the `Repeater` class, which runs a function over a list of +items while capturing exceptions, invoking registered handlers, and tracking +errors on a per-item basis. +""" + from collections.abc import Callable from typing import Any, Self diff --git a/src/qq_lib/core/retryer.py b/src/qq_lib/core/retryer.py index aa0ce64..385c037 100644 --- a/src/qq_lib/core/retryer.py +++ b/src/qq_lib/core/retryer.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utility for retrying operations with configurable backoff. + +This module provides the `Retryer` class, which executes a function repeatedly +until it succeeds or a maximum number of attempts is reached. Failures are +logged with timing information, and the final exception is re-raised with +context when retries are exhausted. +""" + from collections.abc import Callable from time import sleep from typing import Any diff --git a/src/qq_lib/go/__init__.py b/src/qq_lib/go/__init__.py index 16e11ee..eeb9861 100644 --- a/src/qq_lib/go/__init__.py +++ b/src/qq_lib/go/__init__.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Navigation utilities for entering a qq job's working directory. + +This module defines the `Goer` class, which extends `Navigator` to ensure a job +is in a suitable state for directory access and to open an interactive shell on +the job's main execution node. It handles queued jobs, missing destinations, +and state-based safety checks. +""" + from .goer import Goer __all__ = [ diff --git a/src/qq_lib/go/goer.py b/src/qq_lib/go/goer.py index f6b3f9f..c799acb 100644 --- a/src/qq_lib/go/goer.py +++ b/src/qq_lib/go/goer.py @@ -13,7 +13,7 @@ class Goer(Navigator): """ - Provides utilities to open a shell in the working directory of a qq job. + Handles opening a new shell in the job's working directory on the job's main execution node. """ def ensureSuitable(self) -> None: @@ -88,7 +88,7 @@ def go(self) -> None: def _waitQueued(self): """ - Wait until the job is no longer in the queued state. + Wait until the job is no longer in queued/booting/waiting state. Raises: QQNotSuitableError: If at any point the job is found to be finished @@ -96,7 +96,7 @@ def _waitQueued(self): Note: This is a blocking method and will continue looping until the job - leaves the queued state or an exception is raised. + leaves the queued/booting/waiting state or an exception is raised. """ while self._isQueued(): sleep(CFG.goer.wait_time) diff --git a/src/qq_lib/info/__init__.py b/src/qq_lib/info/__init__.py index 2a686c3..019e2ba 100644 --- a/src/qq_lib/info/__init__.py +++ b/src/qq_lib/info/__init__.py @@ -1,6 +1,18 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Facilities for loading, interpreting, and presenting qq job information. + +This module defines the `Informer` class, which loads qq job metadata, +combines information from info files and from the batch system, and provides +access to runtime details such as working nodes, submission/start/end times, and resources. + +It also provides the `Presenter` class, which formats this information into +Rich-based status panels, full job-information views, and compact summaries used +throughout qq's CLI. +""" + from .informer import Informer from .presenter import Presenter diff --git a/src/qq_lib/info/informer.py b/src/qq_lib/info/informer.py index e7fed20..18d9773 100644 --- a/src/qq_lib/info/informer.py +++ b/src/qq_lib/info/informer.py @@ -26,7 +26,7 @@ def __init__(self, info: Info): Initialize the informer with job information. Args: - info: An Info object containing raw job data. + info (Info): An Info object containing raw job data. """ self.info = info self._batch_info: BatchJobInterface | None = None @@ -37,7 +37,7 @@ def batch_system(self) -> type[BatchInterface]: Return the batch system class used for this job. Returns: - type[BatchInterface]: The batch system class associated with the job. + type[BatchInterface]: The batch system class used for this job. """ return self.info.batch_system @@ -155,9 +155,9 @@ def setRunning( Mark the job as running and set associated metadata. Args: - time: Job start time. - main_node: Main node assigned to the job. - work_dir: Working directory used by the job. + time (datetime): Job start time. + main_node (str): Main node assigned to the job. + work_dir (Path): Working directory used by the job. """ self.info.job_state = NaiveState.RUNNING self.info.start_time = time @@ -170,7 +170,7 @@ def setFinished(self, time: datetime) -> None: Mark the job as finished successfully. Args: - time: Job completion time. + time (datetime): Job completion time. """ self.info.job_state = NaiveState.FINISHED self.info.completion_time = time @@ -181,8 +181,8 @@ def setFailed(self, time: datetime, exit_code: int) -> None: Mark the job as failed. Args: - time: Job completion (failure) time. - exit_code: Exit code of the failed job. + time (datetime): Job completion (failure) time. + exit_code (int): Exit code of the failed job. """ self.info.job_state = NaiveState.FAILED self.info.completion_time = time @@ -193,7 +193,7 @@ def setKilled(self, time: datetime) -> None: Mark the job as killed. Args: - time: Time when the job was killed. + time (datetime): Time when the job was killed. """ self.info.job_state = NaiveState.KILLED self.info.completion_time = time @@ -204,7 +204,7 @@ def usesScratch(self) -> bool: Determine if the job uses a scratch directory. Returns: - nool: True if a scratch is used, False if it is not. + bool: True if a scratch is used, False if it is not. """ return self.info.resources.usesScratch() @@ -237,7 +237,7 @@ def getBatchState(self) -> BatchState: def getRealState(self) -> RealState: """ - Get the job's real state by combining qq's internal state (`NaiveState`) + Get the job's real state by combining the internal state (`NaiveState`) with the state reported by the batch system (`BatchState`). Uses cached information if available; otherwise queries the batch system @@ -343,7 +343,9 @@ def getInfoFile(self) -> Path: """ Get absolute path to the info file associated with this job. + Be aware that the info file does not have to exist. + Returns: - Path: Absolute path to the info file. Be aware that the info file does not have to exist. + Path: Absolute path to the info file. """ return construct_info_file_path(self.info.input_dir, self.info.job_name) diff --git a/src/qq_lib/info/presenter.py b/src/qq_lib/info/presenter.py index c21d523..0ced40e 100644 --- a/src/qq_lib/info/presenter.py +++ b/src/qq_lib/info/presenter.py @@ -21,7 +21,7 @@ class Presenter: """ - Presentation layer for qq job information. + Presents information about a qq job. """ def __init__(self, informer: Informer): @@ -362,7 +362,19 @@ def _createJobStatusTable( return table - def _createJobStepsTable(self, steps: list[BatchJobInterface]): + def _createJobStepsTable(self, steps: list[BatchJobInterface]) -> Table: + """ + Create a formatted Rich table displaying job step information. + + Steps without a valid start time are skipped. The resulting table is intended + to be used within full-info job panels. + + Args: + steps: A list of batch-system step objects belonging to the job. + + Returns: + Table: A Rich table containing the formatted step information. + """ table = Table(show_header=True, box=None, padding=(0, 1)) table.add_column("Step", justify="center", style=CFG.presenter.key_style) @@ -400,6 +412,18 @@ def _createJobStepsTable(self, steps: list[BatchJobInterface]): return table def _createJobStepsBlock(self) -> Group: + """ + Create a Rich block containing the job-steps section of the full info panel. + + This block includes a section heading ("STEPS") and the table of job steps + created by `_createJobStepsTable()`. The block is only shown when the job + contains more than one step; for single-step jobs, an empty block is returned. + + Returns: + Group: A Rich group representing the job-steps section, or an empty group + if no multi-step information should be displayed. + """ + job: BatchJobInterface = self._informer.getBatchInfo() steps = job.getSteps() diff --git a/src/qq_lib/jobs/__init__.py b/src/qq_lib/jobs/__init__.py index da73028..5714423 100644 --- a/src/qq_lib/jobs/__init__.py +++ b/src/qq_lib/jobs/__init__.py @@ -1,6 +1,17 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -from .presenter import JobsPresenter, JobsStatistics +""" +Presentation utilities for batch-system job listings and statistics. -__all__ = ["JobsPresenter", "JobsStatistics"] +This module provides `JobsPresenter`, which formats batch-system job data +into compact CLI tables and Rich panels. + +Unlike many other qq modules, this module operates purely +on information obtained directly from the batch system +and does not use qq info files. +""" + +from .presenter import JobsPresenter + +__all__ = ["JobsPresenter"] diff --git a/src/qq_lib/jobs/presenter.py b/src/qq_lib/jobs/presenter.py index a006fca..a06240e 100644 --- a/src/qq_lib/jobs/presenter.py +++ b/src/qq_lib/jobs/presenter.py @@ -25,7 +25,7 @@ class JobsPresenter: """ # Mapping of human-readable color names to ANSI escape codes. - ANSI_COLORS = { + _ANSI_COLORS = { # default "default": "", # standard colors @@ -59,7 +59,7 @@ class JobsPresenter: } # Table formatting configuration for `tabulate`. - COMPACT_TABLE = TableFormat( + _COMPACT_TABLE = TableFormat( lineabove=Line("", "", "", ""), linebelowheader="", linebetweenrows="", @@ -148,7 +148,7 @@ def _createBasicJobsTable(self) -> str: str: Tabulated job information with ANSI color codes applied. Notes: - - Uses `tabulate` with `COMPACT_TABLE` format because + - Uses `tabulate` with `_COMPACT_TABLE` format because Rich's Table is prohibitively slow for large number of items. - Updates internal job statistics via `self._stats`. """ @@ -158,7 +158,7 @@ def _createBasicJobsTable(self) -> str: return tabulate( rows, headers=self._formatHeaders(headers), - tablefmt=JobsPresenter.COMPACT_TABLE, + tablefmt=JobsPresenter._COMPACT_TABLE, stralign="center", numalign="center", ) @@ -537,7 +537,7 @@ def _color(string: str, color: str | None = None, bold: bool = False) -> str: Returns: str: ANSI-colored and optionally bolded string. """ - return f"{JobsPresenter.ANSI_COLORS['bold'] if bold else ''}{JobsPresenter.ANSI_COLORS[color] if color else ''}{string}{JobsPresenter.ANSI_COLORS['reset'] if color or bold else ''}" + return f"{JobsPresenter._ANSI_COLORS['bold'] if bold else ''}{JobsPresenter._ANSI_COLORS[color] if color else ''}{string}{JobsPresenter._ANSI_COLORS['reset'] if color or bold else ''}" @staticmethod def _mainColor(string: str, bold: bool = False) -> str: diff --git a/src/qq_lib/kill/__init__.py b/src/qq_lib/kill/__init__.py index a1a12ee..b0dfaf8 100644 --- a/src/qq_lib/kill/__init__.py +++ b/src/qq_lib/kill/__init__.py @@ -2,7 +2,13 @@ # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab """ -Module for terminating qq jobs. +Termination utilities for qq jobs. + +This module defines the `Killer` class, which extends `Operator` to validate +whether a job can be terminated and to invoke the batch system's kill command. + +It also updates and locks the qq info file when appropriate, ensuring that killed jobs +are consistently recorded. """ from .killer import Killer diff --git a/src/qq_lib/kill/killer.py b/src/qq_lib/kill/killer.py index e7bb34c..b7723a8 100644 --- a/src/qq_lib/kill/killer.py +++ b/src/qq_lib/kill/killer.py @@ -15,7 +15,7 @@ class Killer(Operator): """ - Class to manage the termination of a qq job. + Class managing the termination of qq jobs. """ def ensureSuitable(self) -> None: diff --git a/src/qq_lib/nodes/__init__.py b/src/qq_lib/nodes/__init__.py index b78a94a..1f4b76f 100644 --- a/src/qq_lib/nodes/__init__.py +++ b/src/qq_lib/nodes/__init__.py @@ -1,6 +1,18 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab -from .presenter import NodeGroup, NodeGroupStats, NodesPresenter +""" +Provides node presentation utilities. -__all__ = ["NodeGroup", "NodeGroupStats", "NodesPresenter"] +This module organizes and formats information about compute nodes as reported by +the batch system, preparing it for human-readable terminal output. + +Internal grouping logic clusters nodes with similar naming patterns, extracts +shared attributes, and aggregates resource and property data. These groups are +then rendered by `NodesPresenter`, which produces a unified panel showing node +availability, CPU/GPU capacities, scratch resources, and other relevant metrics. +""" + +from .presenter import NodesPresenter + +__all__ = ["NodesPresenter"] diff --git a/src/qq_lib/nodes/presenter.py b/src/qq_lib/nodes/presenter.py index 9edec89..077fcc9 100644 --- a/src/qq_lib/nodes/presenter.py +++ b/src/qq_lib/nodes/presenter.py @@ -20,7 +20,7 @@ class NodeGroup: """ - Represents a logical group of compute nodes within a batch or cluster system. + Represents a logical group of compute nodes within a batch system. """ def __init__(self, name: str, nodes: list[BatchNodeInterface], user: str): @@ -134,7 +134,7 @@ def extract_prefix(name: str): def extract_number_sequence(name: str): # get individual groups of digits in the name - # this allows properly sorting even names like 'elmo5-18' + # this allows properly sorting even names like 'node5-18' numbers = re.findall(r"\d+", name) return [int(n) for n in numbers] if numbers else [float("inf")] @@ -488,7 +488,7 @@ def _createNodeGroups(self) -> list[NodeGroup]: Nodes sharing the same alphabetic prefix are grouped together (e.g., `node1`, `node2`, `node3` form one group). Groups with fewer than three - nodes are merged into a generic "Others" group. + nodes are merged into a generic "others" group. Returns: list[NodeGroup]: A list of node groups created from the input nodes. @@ -571,7 +571,7 @@ def _formatProcessingUnits(free: int, total: int, available: bool) -> Text: Format numbers of free and total CPUs or GPUs as a styled Rich text element. Args: - free (int): Number of free units (CPUs or GPUs). + free (int): Number of free units (e.g., CPUs or GPUs). total (int): Total number of units. available (bool): Whether the node is available to the user. diff --git a/src/qq_lib/properties/__init__.py b/src/qq_lib/properties/__init__.py index f43a7d5..0db5cd7 100644 --- a/src/qq_lib/properties/__init__.py +++ b/src/qq_lib/properties/__init__.py @@ -1,3 +1,11 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Properties and structured metadata for qq jobs. + +This module provides the core data representations underlying qq's job model. +It collects all property- and metadata-related submodules that describe what a +qq job *is* - its type, requested resources, dependencies, loop-cycle +configuration, sizes, and state information. +""" diff --git a/src/qq_lib/properties/depend.py b/src/qq_lib/properties/depend.py index 806e3cb..9df75cf 100644 --- a/src/qq_lib/properties/depend.py +++ b/src/qq_lib/properties/depend.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Representation and handling of qq job dependencies. + +This module defines `DependType`, an enumeration of supported dependency +conditions and the `Depend` dataclass, which stores both the dependency type +and referenced job IDs. +""" import re from dataclasses import dataclass diff --git a/src/qq_lib/properties/info.py b/src/qq_lib/properties/info.py index 780c93c..b425252 100644 --- a/src/qq_lib/properties/info.py +++ b/src/qq_lib/properties/info.py @@ -1,6 +1,19 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Structured storage and serialization of qq job metadata. + +This module defines the `Info` dataclass, which provides a representation +of qq job information: submission parameters, resource requests, job state, +timing data, dependencies, and execution context. It handles +loading and exporting YAML info files both locally and from remote hosts, and +offers minimal helpers such as command-line reconstruction for resubmission. + +`Info` focuses strictly on data representation and safe serialization; higher-level +logic (state interpretation, batch-system interaction, consistency checks) is +implemented in `Informer` and related components. +""" from dataclasses import dataclass, field, fields from datetime import datetime @@ -297,7 +310,7 @@ def _fromDict(cls, data: dict[str, object]) -> Self: Construct an Info instance from a dictionary. Args: - data: Dictionary containing field names and values. + data (dict[str, object]): Dictionary containing field names and values. Returns: Info: An Info instance. diff --git a/src/qq_lib/properties/job_type.py b/src/qq_lib/properties/job_type.py index d61d6b0..4de9081 100644 --- a/src/qq_lib/properties/job_type.py +++ b/src/qq_lib/properties/job_type.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Enumeration of supported qq job types. + +This module defines `JobType`, an enum distinguishing between standard +(single-run) qq jobs and loop jobs. +""" + from enum import Enum from typing import Self diff --git a/src/qq_lib/properties/loop.py b/src/qq_lib/properties/loop.py index 1b9e5ac..61ec65d 100644 --- a/src/qq_lib/properties/loop.py +++ b/src/qq_lib/properties/loop.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Loop-job metadata and cycle-tracking utilities. + +This module defines `LoopInfo`, a dataclass describing the iteration +parameters of a qq loop job: its cycle range, archive location, archive +naming format, and the current cycle as inferred from existing archived +files. +""" + import re from dataclasses import asdict, dataclass from pathlib import Path diff --git a/src/qq_lib/properties/resources.py b/src/qq_lib/properties/resources.py index 948da7b..adb67b9 100644 --- a/src/qq_lib/properties/resources.py +++ b/src/qq_lib/properties/resources.py @@ -1,6 +1,13 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Structured representation of job resource requirements. + +This module defines the `Resources` dataclass, which captures all CPU, GPU, +memory, storage, walltime, and property requirements associated with a qq job. +""" + import re from dataclasses import asdict, dataclass, fields diff --git a/src/qq_lib/properties/size.py b/src/qq_lib/properties/size.py index 9b5600e..0fcb374 100644 --- a/src/qq_lib/properties/size.py +++ b/src/qq_lib/properties/size.py @@ -1,6 +1,12 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utility class for representing and manipulating memory and storage sizes. + +This module defines the `Size` class, a numeric wrapper used across +qq to express quantities such as memory limits and scratch allocations. +""" import math import re diff --git a/src/qq_lib/properties/states.py b/src/qq_lib/properties/states.py index 7742ae5..544406f 100644 --- a/src/qq_lib/properties/states.py +++ b/src/qq_lib/properties/states.py @@ -1,6 +1,18 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +State models for qq jobs across different layers of the system. + +This module defines three related enums - `NaiveState`, `BatchState`, and +`RealState` - used to represent a job's status as recorded in qq's metadata, +reported by the batch system, and interpreted by qq's higher-level logic. + +qq often receives partial or inconsistent information (e.g., a job marked as +finished locally while still running in the batch system). `RealState` +normalizes these signals into a single coherent state used by qq operators. +""" + from enum import Enum from typing import Self diff --git a/src/qq_lib/qq.py b/src/qq_lib/qq.py index ac01caf..aa9e57b 100644 --- a/src/qq_lib/qq.py +++ b/src/qq_lib/qq.py @@ -22,6 +22,7 @@ from qq_lib.sync.cli import sync from qq_lib.wipe.cli import wipe +# version of the qq package __version__ = "0.6.0-dev.2" # support both --help and -h diff --git a/src/qq_lib/queues/__init__.py b/src/qq_lib/queues/__init__.py index f9b6b48..8d43a9e 100644 --- a/src/qq_lib/queues/__init__.py +++ b/src/qq_lib/queues/__init__.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Presentation utilities for queues of the batch system. + +This module defines `QueuesPresenter`, a formatter that turns raw queue data from +the batch system into user-friendly Rich panels. It summarizes per-queue load, +availability, routing relationships, limits such as walltime and node caps, and +optional administrative comments. +""" + from .presenter import QueuesPresenter __all__ = [ diff --git a/src/qq_lib/queues/presenter.py b/src/qq_lib/queues/presenter.py index d21d88a..82a5496 100644 --- a/src/qq_lib/queues/presenter.py +++ b/src/qq_lib/queues/presenter.py @@ -14,7 +14,7 @@ class QueuesPresenter: """ - Present information about queues of the batch system. + Presents information about queues of the batch system. """ def __init__(self, queues: list[BatchQueueInterface], user: str, all: bool): diff --git a/src/qq_lib/run/__init__.py b/src/qq_lib/run/__init__.py index b016ea5..104f06c 100644 --- a/src/qq_lib/run/__init__.py +++ b/src/qq_lib/run/__init__.py @@ -1,6 +1,16 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Execution utilities for running qq jobs inside the batch environment. + +This module defines the `Runner` class, which prepares the execution +environment, launches the user's job script, updates qq's state tracking, +and performs cleanup on success, failure, or interruption. It handles both +shared and scratch working directories, loop-job archiving, resubmiting, +communication with the batch system, and SIGTERM-safe shutdown. +""" + from .runner import Runner __all__ = [ diff --git a/src/qq_lib/submit/__init__.py b/src/qq_lib/submit/__init__.py index 1471816..7e54600 100644 --- a/src/qq_lib/submit/__init__.py +++ b/src/qq_lib/submit/__init__.py @@ -2,7 +2,25 @@ # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab """ -This module manages submission of qq jobs to the batch system. +Utilities for submitting qq jobs. + +This module integrates three main components - `Parser`, `Submitter`, and +`SubmitterFactory` - that collectively interpret submission settings, construct +job metadata, and hand off execution to the batch system. + +`Parser` extracts qq directives declared inside the script (via `# qq ...` +lines) and normalizes them into structured submission parameters such as +resources, dependencies, file include/exclude rules and loop-job fields. + +`Submitter` validates the script, prevents accidental duplicate submissions, +constructs the qq info file, sets up environment variables needed by `qq run`, +and finally invokes the batch system's submission mechanism. + +`SubmitterFactory` coordinates command-line arguments with script-embedded +directives, merges and resolves resources, determines the batch system and +queue, constructs loop-job settings, and ultimately produces a fully configured +`Submitter`. It ensures a consistent and unified interpretation of submission +parameters from all available sources. """ from .factory import SubmitterFactory diff --git a/src/qq_lib/submit/parser.py b/src/qq_lib/submit/parser.py index 71ae3d7..a4a60b1 100644 --- a/src/qq_lib/submit/parser.py +++ b/src/qq_lib/submit/parser.py @@ -21,7 +21,7 @@ class Parser: """ - Parser for qq job submission options specified in a script. + Parser for qq job submission options (qq directives) specified in a script. """ def __init__(self, script: Path, params: list[Parameter]): diff --git a/src/qq_lib/submit/submitter.py b/src/qq_lib/submit/submitter.py index fac3dc1..8e4a06b 100644 --- a/src/qq_lib/submit/submitter.py +++ b/src/qq_lib/submit/submitter.py @@ -39,6 +39,9 @@ class Submitter: - Guard against multiple submissions from the same directory. - Set environment variables required for `qq run`. - Create a qq info file for tracking job state and metadata. + + Note that Submitter ignores qq directives in the submitted script. + To handle them, you have to build a Submitter using the SubmitterFactory. """ def __init__( diff --git a/src/qq_lib/sync/__init__.py b/src/qq_lib/sync/__init__.py index 00afdc2..55b8f13 100644 --- a/src/qq_lib/sync/__init__.py +++ b/src/qq_lib/sync/__init__.py @@ -1,6 +1,15 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +File-synchronization utilities for retrieving data from a running or failed qq job. + +This module defines the `Syncer` class, an extension of `Navigator` that handles +copying files from a job's remote working directory back to the job's input +directory. It performs safety checks based on the job's real state, ensuring +that synchronization is attempted only when a working directory actually exists. +""" + from .syncer import Syncer __all__ = [ diff --git a/src/qq_lib/sync/syncer.py b/src/qq_lib/sync/syncer.py index 2feb6b0..4c9f3bd 100644 --- a/src/qq_lib/sync/syncer.py +++ b/src/qq_lib/sync/syncer.py @@ -11,13 +11,12 @@ class Syncer(Navigator): """ Handle synchronization of job files between a remote working directory - (on a compute node or cluster) and the local input directory. + (on a compute node) and the local input directory. """ def ensureSuitable(self): """ - Verify that the job is in a state where files - can be fetched from its working directory. + Verify that the job is in a state where files can be fetched from its working directory. Raises: QQNotSuitableError: If the job has already finished / is finishing successfully diff --git a/src/qq_lib/wipe/__init__.py b/src/qq_lib/wipe/__init__.py index 6ebaf4d..e03cf71 100644 --- a/src/qq_lib/wipe/__init__.py +++ b/src/qq_lib/wipe/__init__.py @@ -1,6 +1,16 @@ # Released under MIT License. # Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab +""" +Utilities for removing the working directory of a qq job. + +This module defines the `Wiper` class, an extension of `Navigator` responsible for +safely deleting a job's remote working directory once it is no longer needed. + +`Wiper` distinguishes between shared-storage jobs and scratch-based jobs, and +guards against accidental deletion of the job's input directory. +""" + from .wiper import Wiper __all__ = [ diff --git a/tests/test_batch_interface.py b/tests/test_batch_interface.py index d5ec196..4f4f837 100644 --- a/tests/test_batch_interface.py +++ b/tests/test_batch_interface.py @@ -25,7 +25,7 @@ def test_translate_ssh_command(): f"-o ConnectTimeout={CFG.timeouts.ssh}", host, "-t", - f"cd {directory} || exit {BatchInterface.CD_FAIL} && exec bash -l", + f"cd {directory} || exit {BatchInterface._CD_FAIL} && exec bash -l", ] diff --git a/tests/test_batch_pbs_pbs.py b/tests/test_batch_pbs_pbs.py index 85341b4..54a3e4b 100644 --- a/tests/test_batch_pbs_pbs.py +++ b/tests/test_batch_pbs_pbs.py @@ -53,7 +53,7 @@ def test_navigate_success(tmp_path): f"-o ConnectTimeout={CFG.timeouts.ssh}", "fake.host.org", "-t", - f"cd {directory} || exit {BatchInterface.CD_FAIL} && exec bash -l", + f"cd {directory} || exit {BatchInterface._CD_FAIL} && exec bash -l", ] ) diff --git a/tests/test_jobs_presenter.py b/tests/test_jobs_presenter.py index 9d0a5cb..7641c0a 100644 --- a/tests/test_jobs_presenter.py +++ b/tests/test_jobs_presenter.py @@ -44,47 +44,47 @@ def test_init_sets_all_attributes_and_creates_statistics(): "string,color,bold,expected_prefix", [ ("test", None, False, ""), # no color, no bold - ("test", "red", False, JobsPresenter.ANSI_COLORS["red"]), - ("test", None, True, JobsPresenter.ANSI_COLORS["bold"]), + ("test", "red", False, JobsPresenter._ANSI_COLORS["red"]), + ("test", None, True, JobsPresenter._ANSI_COLORS["bold"]), ( "test", "green", True, - JobsPresenter.ANSI_COLORS["bold"] + JobsPresenter.ANSI_COLORS["green"], + JobsPresenter._ANSI_COLORS["bold"] + JobsPresenter._ANSI_COLORS["green"], ), ], ) def test_color_applies_correct_ansi(string, color, bold, expected_prefix): result = JobsPresenter._color(string, color=color, bold=bold) - reset = JobsPresenter.ANSI_COLORS["reset"] if color or bold else "" + reset = JobsPresenter._ANSI_COLORS["reset"] if color or bold else "" assert result == f"{expected_prefix}{string}{reset}" def test_main_color_applies_main_color(): text = "text" result = JobsPresenter._mainColor(text) - expected = f"{JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.main_style]}text{JobsPresenter.ANSI_COLORS['reset']}" + expected = f"{JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.main_style]}text{JobsPresenter._ANSI_COLORS['reset']}" assert result == expected def test_main_color_applies_main_color_and_bold(): text = "text" result = JobsPresenter._mainColor(text, bold=True) - expected = f"{JobsPresenter.ANSI_COLORS['bold']}{JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.main_style]}text{JobsPresenter.ANSI_COLORS['reset']}" + expected = f"{JobsPresenter._ANSI_COLORS['bold']}{JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.main_style]}text{JobsPresenter._ANSI_COLORS['reset']}" assert result == expected def test_secondary_color_applies_secondary_color(): text = "text" result = JobsPresenter._secondaryColor(text) - expected = f"{JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.secondary_style]}text{JobsPresenter.ANSI_COLORS['reset']}" + expected = f"{JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.secondary_style]}text{JobsPresenter._ANSI_COLORS['reset']}" assert result == expected def test_secondary_color_applies_secondary_color_and_bold(): text = "text" result = JobsPresenter._secondaryColor(text, bold=True) - expected = f"{JobsPresenter.ANSI_COLORS['bold']}{JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.secondary_style]}text{JobsPresenter.ANSI_COLORS['reset']}" + expected = f"{JobsPresenter._ANSI_COLORS['bold']}{JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.secondary_style]}text{JobsPresenter._ANSI_COLORS['reset']}" assert result == expected @@ -176,7 +176,7 @@ def test_format_nodes_or_comment_returns_estimated(mock_job): ): result = JobsPresenter._formatNodesOrComment(BatchState.QUEUED, mock_job) - assert JobsPresenter.ANSI_COLORS[BatchState.QUEUED.color] in result + assert JobsPresenter._ANSI_COLORS[BatchState.QUEUED.color] in result assert desc in result duration_str = format_duration_wdhhmmss(estimated_time - datetime.now()).rsplit( ":", 1 @@ -195,7 +195,7 @@ def test_format_nodes_or_comment_returns_estimated_truncated(mock_job): ): result = JobsPresenter._formatNodesOrComment(BatchState.QUEUED, mock_job) - assert JobsPresenter.ANSI_COLORS[BatchState.QUEUED.color] in result + assert JobsPresenter._ANSI_COLORS[BatchState.QUEUED.color] in result assert "node1 + node2 + node3 + node4 + node5 + …" in result duration_str = format_duration_wdhhmmss(estimated_time - datetime.now()).rsplit( ":", 1 @@ -215,37 +215,37 @@ def test_format_nodes_or_comment_returns_empty_when_no_info(mock_job): @pytest.mark.parametrize("util", [101, 150, 300]) def test_format_util_cpu_above_100_uses_strong_warning(util): result = JobsPresenter._formatUtilCPU(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("util", [80, 85, 99, 100]) def test_format_util_cpu_80_to_100_uses_main_color(util): result = JobsPresenter._formatUtilCPU(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.main_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.main_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("util", [60, 61, 79]) def test_format_util_cpu_60_to_79_uses_mild_warning(util): result = JobsPresenter._formatUtilCPU(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.mild_warning_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.mild_warning_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("util", [0, 10, 59]) def test_format_util_cpu_below_60_uses_strong_warning(util): result = JobsPresenter._formatUtilCPU(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) def test_format_util_mem_none_returns_empty(): @@ -255,28 +255,28 @@ def test_format_util_mem_none_returns_empty(): @pytest.mark.parametrize("util", [0, 50, 89]) def test_format_util_mem_below_90_uses_main_color(util): result = JobsPresenter._formatUtilMem(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.main_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.main_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("util", [90, 95, 99]) def test_format_util_mem_90_to_99_uses_mild_warning(util): result = JobsPresenter._formatUtilMem(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.mild_warning_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.mild_warning_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("util", [100, 110, 150]) def test_format_util_mem_100_or_more_uses_strong_warning(util): result = JobsPresenter._formatUtilMem(util) - color_code = JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] + color_code = JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.strong_warning_style] assert str(util) in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.fixture @@ -299,12 +299,12 @@ def test_format_time_unknown_or_suspended_returns_empty(state, start_end_walltim def test_format_time_finished_or_failed_returns_colored_date(state, start_end_walltime): start, end, walltime = start_end_walltime result = JobsPresenter._formatTime(state, start, end, walltime) - color_code = JobsPresenter.ANSI_COLORS[state.color] + color_code = JobsPresenter._ANSI_COLORS[state.color] formatted_date = end.strftime(CFG.date_formats.standard) assert formatted_date in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize( @@ -316,11 +316,11 @@ def test_format_time_waiting_like_states_show_elapsed_duration( start, end, walltime = start_end_walltime duration_str = format_duration_wdhhmmss(end - start) result = JobsPresenter._formatTime(state, start, end, walltime) - color_code = JobsPresenter.ANSI_COLORS[state.color] + color_code = JobsPresenter._ANSI_COLORS[state.color] assert duration_str in result assert color_code in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("state", [BatchState.RUNNING, BatchState.EXITING]) @@ -331,11 +331,11 @@ def test_format_time_running_or_exiting_within_walltime(state, start_end_walltim result = JobsPresenter._formatTime(state, start, end, walltime) # should use state's color (not strong warning) - color_code = JobsPresenter.ANSI_COLORS[state.color] + color_code = JobsPresenter._ANSI_COLORS[state.color] assert run_duration_str in result assert color_code in result assert f"/ {walltime_str}" in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.mark.parametrize("state", [BatchState.RUNNING, BatchState.EXITING]) @@ -349,13 +349,13 @@ def test_format_time_running_or_exiting_exceeding_walltime_uses_strong_warning( result = JobsPresenter._formatTime(state, start, end, walltime) # should use strong warning color for run time - warning_color_code = JobsPresenter.ANSI_COLORS[ + warning_color_code = JobsPresenter._ANSI_COLORS[ CFG.jobs_presenter.strong_warning_style ] assert run_duration_str in result assert warning_color_code in result assert f"/ {walltime_str}" in result - assert result.endswith(JobsPresenter.ANSI_COLORS["reset"]) + assert result.endswith(JobsPresenter._ANSI_COLORS["reset"]) @pytest.fixture @@ -956,7 +956,7 @@ def test_jobs_presenter_insert_extra_info_uses_cfg_style(): result = presenter._insertExtraInfo("HEADER\nROW1") - assert JobsPresenter.ANSI_COLORS[CFG.jobs_presenter.extra_info_style] in result + assert JobsPresenter._ANSI_COLORS[CFG.jobs_presenter.extra_info_style] in result def test_format_exit_code_returns_empty_string_when_exit_code_is_none(): @@ -1271,7 +1271,7 @@ def test_create_basic_jobs_table_creates_row_for_each_job_with_headers(): ["row3_col1", "row3_col2", "row3_col3"], ], headers=["fmt_S", "fmt_Job_ID", "fmt_User"], - tablefmt=JobsPresenter.COMPACT_TABLE, + tablefmt=JobsPresenter._COMPACT_TABLE, stralign="center", numalign="center", ) diff --git a/uv.lock b/uv.lock index e4c7ca9..f76640e 100644 --- a/uv.lock +++ b/uv.lock @@ -248,6 +248,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + [[package]] name = "macholib" version = "1.16.3" @@ -262,14 +274,86 @@ wheels = [ [[package]] name = "markdown-it-py" -version = "4.0.0" +version = "3.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, +] + +[[package]] +name = "markdown2" +version = "2.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652, upload-time = "2025-07-27T16:16:24.307Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954, upload-time = "2025-07-27T16:16:23.026Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] [[package]] @@ -299,6 +383,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pdoc" +version = "16.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "markdown2" }, + { name = "markupsafe" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/fe/ab3f34a5fb08c6b698439a2c2643caf8fef0d61a86dd3fdcd5501c670ab8/pdoc-16.0.0.tar.gz", hash = "sha256:fdadc40cc717ec53919e3cd720390d4e3bcd40405cb51c4918c119447f913514", size = 111890, upload-time = "2025-10-27T16:02:16.345Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/a1/56a17b7f9e18c2bb8df73f3833345d97083b344708b97bab148fdd7e0b82/pdoc-16.0.0-py3-none-any.whl", hash = "sha256:070b51de2743b9b1a4e0ab193a06c9e6c12cf4151cf9137656eebb16e8556628", size = 100014, upload-time = "2025-10-27T16:02:15.007Z" }, +] + [[package]] name = "pefile" version = "2023.2.7" @@ -474,6 +573,7 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "codecov" }, + { name = "pdoc" }, { name = "pre-commit" }, { name = "pyinstaller" }, { name = "pytest" }, @@ -497,6 +597,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "codecov", specifier = ">=2.1.13" }, + { name = "pdoc", specifier = ">=16.0.0" }, { name = "pre-commit", specifier = ">=4.3.0" }, { name = "pyinstaller", specifier = ">=6.16.0" }, { name = "pytest", specifier = ">=8.4.2" }, From d894ea7f0171040e0ec257b5112866ff46b72ccb Mon Sep 17 00:00:00 2001 From: Ladme Date: Mon, 24 Nov 2025 14:51:59 +0100 Subject: [PATCH 21/27] Basic documentation for qq config options --- src/qq_lib/core/config.py | 119 +++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 3 deletions(-) diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index 64af117..62c808a 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -23,9 +23,13 @@ class FileSuffixes: """File suffixes used by qq.""" + # Suffix for qq info files. qq_info: str = ".qqinfo" + # Suffix for qq output files. qq_out: str = ".qqout" + # Suffix for captured stdout. stdout: str = ".out" + # Suffix for captured stderr. stderr: str = ".err" @property @@ -38,24 +42,43 @@ def all_suffixes(self) -> list[str]: class EnvironmentVariables: """Environment variable names used by qq.""" + # Indicates job is running inside the qq environment. guard: str = "QQ_ENV_SET" + # Enables qq debug mode. debug_mode: str = "QQ_DEBUG" + # Path to the qq info file for the job. info_file: str = "QQ_INFO" + # Machine from which the job was submitted. input_machine: str = "QQ_INPUT_MACHINE" + # Submission directory path. input_dir: str = "QQ_INPUT_DIR" + # Whether submission was from shared storage. shared_submit: str = "QQ_SHARED_SUBMIT" + # Name of the batch system used. batch_system: str = "QQ_BATCH_SYSTEM" + # Current loop-cycle index. loop_current: str = "QQ_LOOP_CURRENT" + # Starting loop-cycle index. loop_start: str = "QQ_LOOP_START" + # Final loop-cycle index. loop_end: str = "QQ_LOOP_END" + # Non-resubmit flag returned by a job script. no_resubmit: str = "QQ_NO_RESUBMIT" + # Archive filename pattern. archive_format: str = "QQ_ARCHIVE_FORMAT" + # Scratch directory on Metacentrum clusters. pbs_scratch_dir: str = "SCRATCHDIR" + # Slurm account used for the job. slurm_job_account: str = "SLURM_JOB_ACCOUNT" + # Storage type for LUMI scratch. lumi_scratch_type: str = "LUMI_SCRATCH_TYPE" + # Total CPUs used. ncpus: str = "QQ_NCPUS" + # Total GPUs used. ngpus: str = "QQ_NGPUS" + # Total nodes used. nnodes: str = "QQ_NNODES" + # Walltime in hours. walltime: str = "QQ_WALLTIME" @@ -63,7 +86,9 @@ class EnvironmentVariables: class TimeoutSettings: """Timeout settings in seconds.""" + # Timeout for SSH in seconds. ssh: int = 60 + # Timeout for rsync in seconds. rsync: int = 600 @@ -71,9 +96,13 @@ class TimeoutSettings: class RunnerSettings: """Settings for Runner operations.""" + # Maximum number of attempts when retrying an operation. retry_tries: int = 3 + # Wait time (in seconds) between retry attempts. retry_wait: int = 300 + # Delay (in seconds) between sending SIGTERM and SIGKILL to a job script. sigterm_to_sigkill: int = 5 + # Interval (in seconds) between successive checks of the running script's state. subprocess_checks_wait_time: int = 2 @@ -81,7 +110,9 @@ class RunnerSettings: class ArchiverSettings: """Settings for Archiver operations.""" + # Maximum number of attempts when retrying an operation. retry_tries: int = 3 + # Wait time (in seconds) between retry attempts. retry_wait: int = 300 @@ -89,6 +120,8 @@ class ArchiverSettings: class GoerSettings: """Settings for Goer operations.""" + # Interval (in seconds) between successive checks of the job's state + # (when waiting for the job to start). wait_time: int = 5 @@ -96,6 +129,7 @@ class GoerSettings: class LoopJobSettings: """Settings for qq loop jobs.""" + # Pattern used for naming loop jobs. pattern: str = "+%04d" @@ -103,9 +137,13 @@ class LoopJobSettings: class JobStatusPanelSettings: """Settings for creating a job status panel.""" + # Maximal width of the job status panel. max_width: int | None = None + # Minimal width of the job status panel. min_width: int | None = 60 + # Style of the border lines. border_style: str = "white" + # Style of the title. title_style: str = "white bold" @@ -113,10 +151,15 @@ class JobStatusPanelSettings: class FullInfoPanelSettings: """Settings for creating a full info panel.""" + # Maximal width of the job info panel. max_width: int | None = None + # Minimal width of the job info panel. min_width: int | None = 80 + # Style of the border lines. border_style: str = "white" + # Style of the title. title_style: str = "white bold" + # Style of the separators between individual sections of the panel. rule_style: str = "white" @@ -124,17 +167,21 @@ class FullInfoPanelSettings: class PresenterSettings: """Settings for Presenter.""" + # Settings for the job status panel job_status_panel: JobStatusPanelSettings = field( default_factory=JobStatusPanelSettings ) + # Settings for the job info panel full_info_panel: FullInfoPanelSettings = field( default_factory=FullInfoPanelSettings ) - # used for both job status panel and full info panel + # Style used for the keys in job status/info panel. key_style: str = "default bold" + # Style used for values in job status/info panel. value_style: str = "white" + # Style used for notes in job status/info panel. notes_style: str = "grey50" @@ -142,15 +189,25 @@ class PresenterSettings: class JobsPresenterSettings: """Settings for JobsPresenter.""" + # Maximum displayed length of a job name before truncation. max_job_name_length: int = 20 + # Maximum displayed length of working nodes before truncation. max_nodes_length: int = 40 + # Style used for border lines. border_style: str = "white" + # Style used for the title. title_style: str = "white bold" + # Style used for table headers. headers_style: str = "default" + # Style used for table values. main_style: str = "white" + # Style used for job statistics. secondary_style: str = "grey70" + # Style used for extra notes. extra_info_style: str = "grey50" + # Style used for strong warning messages. strong_warning_style: str = "bright_red" + # Style used for mild warning messages. mild_warning_style: str = "bright_yellow" @@ -158,20 +215,32 @@ class JobsPresenterSettings: class QueuesPresenterSettings: """Settings for QueuesPresenter.""" + # Maximal width of the queues panel. max_width: int | None = None + # Minimal width of the queues panel. min_width: int | None = 80 + # Style used for border lines. border_style: str = "white" + # Style used for the title. title_style: str = "white bold" + # Style used for table headers. headers_style: str = "default" + # Mark used to denote main queues. main_mark = "●" + # Mark used to denote reroutings. rerouted_mark = " ··>" + # Style used for the mark if the queue is available. available_mark_style: str = "bright_green" + # Style used for the mark if the queue is not available. unavailable_mark_style: str = "bright_red" + # Style used for the mark if the queue is dangling. dangling_mark_style: str = "bright_yellow" + # Style used for information about main queues. main_text_style: str = "white" + # Style used for information about reroutings. rerouted_text_style: str = "grey50" @@ -179,23 +248,39 @@ class QueuesPresenterSettings: class NodesPresenterSettings: """Settings for NodesPresenter.""" + # Maximal width of the nodes panel. max_width: int | None = None + # Minimal width of the nodes panel. min_width: int | None = 80 + # Maximal width of the shared properties section. max_props_panel_width: int = 40 + # Style used for border lines. border_style: str = "white" + # Style used for the title. title_style: str = "white bold" + # Style used for table headers. headers_style: str = "default" + # Style of the separators between individual sections of the panel. rule_style: str = "white" + # Name to use for the leftover nodes that were not assigned to any group. others_group_name: str = "other" + # Name to use for the group if it contains all nodes. all_nodes_group_name: str = "all nodes" + # Mark used to denote nodes. state_mark = "●" + # Style used for main information about the nodes. main_text_style: str = "white" + # Style used for statistics and shared properties. secondary_text_style: str = "grey70" + # Style used for the mark and resources if the node is free. free_node_style: str = "bright_green bold" + # Style used for the mark and resources if the node is partially free. part_free_node_style: str = "green" + # Style used for the mark and resources if the node is busy. busy_node_style: str = "blue" + # Style used for all information about unavailable nodes. unavailable_node_style = "bright_red" @@ -203,8 +288,11 @@ class NodesPresenterSettings: class DateFormats: """Date and time format strings.""" + # Standard date format used by qq. standard: str = "%Y-%m-%d %H:%M:%S" + # Date format used by PBS Pro. pbs: str = "%a %b %d %H:%M:%S %Y" + # Date format used by Slurm. slurm: str = "%Y-%m-%dT%H:%M:%S" @@ -212,12 +300,17 @@ class DateFormats: class ExitCodes: """Exit codes used for various errors.""" + # Returned when a qq script is run outside the qq environment. not_qq_env: int = 90 + # Default error code for failures of qq commands or most errors in the qq environment. default: int = 91 + # Returned when a qq job fails and its error state cannot be written to the qq info file. qq_run_fatal: int = 92 + # Returned when a qq job fails due to a communication error between qq services. qq_run_communication: int = 93 - # used inside the script to indicate to qq that the loop job should not be resubmitted + # Used by job scripts to signal that a loop job should not be resubmitted. qq_run_no_resubmit: int = 95 + # Returned on an unexpected or unhandled error. unexpected_error: int = 99 @@ -225,19 +318,33 @@ class ExitCodes: class StateColors: """Color scheme for RealState display.""" + # Style used for queued jobs. queued: str = "bright_magenta" + # Style used for held jobs. held: str = "bright_magenta" + # Style used for suspended jobs. suspended: str = "bright_black" + # Style used for waiting jobs. waiting: str = "bright_magenta" + # Style used for running jobs. running: str = "bright_blue" + # Style used for booting jobs. booting: str = "bright_cyan" + # Style used for killed jobs. killed: str = "bright_red" + # Style used for failed jobs. failed: str = "bright_red" + # Style used for finished jobs. finished: str = "bright_green" + # Style used for exiting jobs. exiting: str = "bright_yellow" + # Style used for jobs in an inconsistent state. in_an_inconsistent_state: str = "grey70" + # Style used for jobs in an unknown state. unknown: str = "grey70" + # Style used whenever a summary of jobs is provided. sum: str = "white" + # Style used for "other" job states. other: str = "grey70" @@ -245,6 +352,7 @@ class StateColors: class SizeOptions: """Options associated with the Size dataclass.""" + # Maximal error acceptable when rounding Size values for display. max_rounding_error: float = 0.1 @@ -252,6 +360,7 @@ class SizeOptions: class PBSOptions: """Options associated with PBS.""" + # Name of the subdirectory inside SCRATCHDIR used as the job's working directory. scratch_dir_inner: str = "main" @@ -259,7 +368,7 @@ class PBSOptions: class SlurmOptions: """Options associated with Slurm.""" - # maximal number of threads used to collect information about jobs using scontrol + # Maximal number of threads used to collect information about jobs using scontrol. jobs_scontrol_nthreads: int = 8 @@ -267,6 +376,7 @@ class SlurmOptions: class SlurmIT4IOptions: """Options associated with Slurm on IT4I clusters.""" + # Number of attempts when preparing a working directory on scratch. scratch_dir_attempts: int = 3 @@ -274,6 +384,7 @@ class SlurmIT4IOptions: class SlurmLumiOptions: """Options associated with Slurm on LUMI.""" + # Number of attempts when preparing a working directory on scratch. scratch_dir_attempts: int = 3 @@ -304,6 +415,8 @@ class Config: slurm_options: SlurmOptions = field(default_factory=SlurmOptions) slurm_it4i_options: SlurmIT4IOptions = field(default_factory=SlurmIT4IOptions) slurm_lumi_options: SlurmLumiOptions = field(default_factory=SlurmLumiOptions) + + # Name of the qq binary. binary_name: str = "qq" @classmethod From 91c66c37079289e83859dbb0b85adee47ad45a58 Mon Sep 17 00:00:00 2001 From: Ladme Date: Mon, 24 Nov 2025 14:57:46 +0100 Subject: [PATCH 22/27] Max width and min width for jobs panel --- src/qq_lib/core/config.py | 4 ++++ src/qq_lib/jobs/presenter.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index 62c808a..3b8da78 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -189,6 +189,10 @@ class PresenterSettings: class JobsPresenterSettings: """Settings for JobsPresenter.""" + # Maximal width of the jobs panel. + max_width: int | None = None + # Minimal width of the jobs panel. + min_width: int | None = 80 # Maximum displayed length of a job name before truncation. max_job_name_length: int = 20 # Maximum displayed length of working nodes before truncation. diff --git a/src/qq_lib/jobs/presenter.py b/src/qq_lib/jobs/presenter.py index a06240e..77db1ff 100644 --- a/src/qq_lib/jobs/presenter.py +++ b/src/qq_lib/jobs/presenter.py @@ -14,6 +14,7 @@ from qq_lib.batch.interface.interface import BatchInterface from qq_lib.core.common import ( format_duration_wdhhmmss, + get_panel_width, ) from qq_lib.core.config import CFG from qq_lib.properties.states import BatchState @@ -104,7 +105,6 @@ def createJobsInfoPanel(self, console: Console | None = None) -> Group: Group: Rich Group containing the jobs table and stats panel. """ console = console or Console() - panel_width = console.size.width jobs_table = self._createBasicJobsTable() if self._extra: @@ -127,7 +127,9 @@ def createJobsInfoPanel(self, console: Console | None = None) -> Group: ), border_style=CFG.jobs_presenter.border_style, padding=(1, 1), - width=panel_width, + width=get_panel_width( + console, 1, CFG.jobs_presenter.min_width, CFG.jobs_presenter.max_width + ), expand=False, ) From 47e1d3c0731a6947f25b377a3ec21f75411f7fa8 Mon Sep 17 00:00:00 2001 From: Ladme Date: Mon, 24 Nov 2025 15:03:32 +0100 Subject: [PATCH 23/27] Configurable code for total and other jobs --- src/qq_lib/core/config.py | 8 ++++++++ src/qq_lib/jobs/presenter.py | 6 +++++- src/qq_lib/queues/presenter.py | 12 ++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index 3b8da78..a840772 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -214,6 +214,9 @@ class JobsPresenterSettings: # Style used for mild warning messages. mild_warning_style: str = "bright_yellow" + # Code used to signify "total jobs". + sum_jobs_code: str = "Σ" + @dataclass class QueuesPresenterSettings: @@ -247,6 +250,11 @@ class QueuesPresenterSettings: # Style used for information about reroutings. rerouted_text_style: str = "grey50" + # Code used to signify "other jobs". + other_jobs_code: str = "O" + # Code used to signify "total jobs". + sum_jobs_code: str = "Σ" + @dataclass class NodesPresenterSettings: diff --git a/src/qq_lib/jobs/presenter.py b/src/qq_lib/jobs/presenter.py index 77db1ff..5e5abb3 100644 --- a/src/qq_lib/jobs/presenter.py +++ b/src/qq_lib/jobs/presenter.py @@ -689,7 +689,11 @@ def _createJobStatesStats(self) -> Text: # sum of all jobs line.append( - JobsStatistics._colorText("Σ ", color=CFG.state_colors.sum, bold=True) + JobsStatistics._colorText( + f"{CFG.jobs_presenter.sum_jobs_code} ", + color=CFG.state_colors.sum, + bold=True, + ) ) line.append(JobsStatistics._secondaryColorText(str(total))) line.append(spacing) diff --git a/src/qq_lib/queues/presenter.py b/src/qq_lib/queues/presenter.py index 82a5496..c61644d 100644 --- a/src/qq_lib/queues/presenter.py +++ b/src/qq_lib/queues/presenter.py @@ -118,11 +118,19 @@ def _createQueuesTable(self) -> Table: justify="right", ) table.add_column( - header=Text("O", justify="right", style=CFG.state_colors.other), + header=Text( + CFG.queues_presenter.other_jobs_code, + justify="right", + style=CFG.state_colors.other, + ), justify="right", ) table.add_column( - header=Text("Σ", justify="right", style=CFG.state_colors.sum), + header=Text( + CFG.queues_presenter.sum_jobs_code, + justify="right", + style=CFG.state_colors.sum, + ), justify="right", ) table.add_column( From 00eb533219afc83960ee65086c9e82075d571508 Mon Sep 17 00:00:00 2001 From: Ladme Date: Mon, 24 Nov 2025 15:11:05 +0100 Subject: [PATCH 24/27] Columns to show in the output of qq jobs/stat can now be customized --- src/qq_lib/core/config.py | 3 +++ src/qq_lib/jobs/presenter.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index a840772..daa21da 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -213,6 +213,9 @@ class JobsPresenterSettings: strong_warning_style: str = "bright_red" # Style used for mild warning messages. mild_warning_style: str = "bright_yellow" + # List of columns to show in the output. + # If not set, the settings for the current batch system will be used. + columns_to_show: list[str] | None = None # Code used to signify "total jobs". sum_jobs_code: str = "Σ" diff --git a/src/qq_lib/jobs/presenter.py b/src/qq_lib/jobs/presenter.py index 5e5abb3..37a7a72 100644 --- a/src/qq_lib/jobs/presenter.py +++ b/src/qq_lib/jobs/presenter.py @@ -185,9 +185,12 @@ def _getVisibleHeaders(self) -> list[str]: "Node", "%CPU", "%Mem", - "Exit" if self._all else None, + "Exit" if self._all or CFG.jobs_presenter.columns_to_show else None, ] - headers_to_show = self._batch_system.jobsPresenterColumnsToShow() + headers_to_show = ( + CFG.jobs_presenter.columns_to_show + or self._batch_system.jobsPresenterColumnsToShow() + ) return [h for h in all_headers if h and h in headers_to_show] def _createJobRow(self, job: BatchJobInterface, headers: list[str]) -> list[str]: From bdc6009162dd49c74bfe8832bf5d2cb3ab85e35b Mon Sep 17 00:00:00 2001 From: Ladme Date: Mon, 24 Nov 2025 15:35:04 +0100 Subject: [PATCH 25/27] Wording in config comments --- src/qq_lib/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qq_lib/core/config.py b/src/qq_lib/core/config.py index daa21da..0a77a06 100644 --- a/src/qq_lib/core/config.py +++ b/src/qq_lib/core/config.py @@ -367,7 +367,7 @@ class StateColors: class SizeOptions: """Options associated with the Size dataclass.""" - # Maximal error acceptable when rounding Size values for display. + # Maximal relative error acceptable when rounding Size values for display. max_rounding_error: float = 0.1 From a9abe08fd470f1cd00aed758e555a5c2b2f4bc99 Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 26 Nov 2025 15:17:06 +0100 Subject: [PATCH 26/27] Read the docs --- .readthedocs.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..d208575 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,15 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py From 93f1c2b5d2dcbf2ead3efb2e029fd473603401aa Mon Sep 17 00:00:00 2001 From: Ladme Date: Wed, 26 Nov 2025 15:26:20 +0100 Subject: [PATCH 27/27] qq scripts --- pyproject.toml | 5 +- scripts/qq_scripts/gmx-eta | 103 +++++++++++ scripts/qq_scripts/multi-check | 292 ++++++++++++++++++++++++++++++++ scripts/qq_scripts/multi-kill | 170 +++++++++++++++++++ scripts/qq_scripts/multi-submit | 215 +++++++++++++++++++++++ 5 files changed, 784 insertions(+), 1 deletion(-) create mode 100755 scripts/qq_scripts/gmx-eta create mode 100755 scripts/qq_scripts/multi-check create mode 100755 scripts/qq_scripts/multi-kill create mode 100755 scripts/qq_scripts/multi-submit diff --git a/pyproject.toml b/pyproject.toml index 995bcd7..8e3c2c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,9 @@ dev = [ "ty>=0.0.1a24", ] +[tool.ruff] +extend-include = ["scripts/qq_scripts/*"] + [tool.ruff.lint] extend-select = [ "F", # Pyflakes rules @@ -69,4 +72,4 @@ extend-select = [ "NPY", # Some numpy-specific things "ARG", # Catch incorrect use of arguments ] -ignore = ["E501"] +ignore = ["E501"] \ No newline at end of file diff --git a/scripts/qq_scripts/gmx-eta b/scripts/qq_scripts/gmx-eta new file mode 100755 index 0000000..83f1956 --- /dev/null +++ b/scripts/qq_scripts/gmx-eta @@ -0,0 +1,103 @@ +#!/usr/bin/env -S uv run --script + +# Released under MIT License. +# Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab + +""" +Get the estimated time of a Gromacs simulation finishing. +Version 0.2. +Requires `uv`: https://docs.astral.sh/uv +""" + +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "qq", +# ] +# +# [tool.uv.sources] +# qq = { git = "https://github.com/Ladme/qq.git", tag = "v0.6.0" } +# /// + +import argparse +from datetime import datetime +from pathlib import Path + +from rich import print + +from qq_lib.batch.interface import BatchMeta +from qq_lib.core.common import format_duration_wdhhmmss, get_info_files +from qq_lib.core.navigator import Navigator +from qq_lib.info import Informer + + +def get_informer(id: str | None) -> Informer: + """Get informer for the given job id or for the newest job in the current directory.""" + if id: + return Informer.fromJobId(id) + return Informer.fromFile(get_info_files(Path())[-1]) + + +def get_eta_from_content(content: str) -> datetime | None: + """Get the time at which the Gromacs simulation is expected to finish.""" + + # find the last line containing the ETA + eta_line = next((s for s in content if "will finish" in s), None) + if not eta_line: + return None + + # assuming the time information is stored in the last 5 words + eta = " ".join(eta_line.split()[-5:]) + + try: + return datetime.strptime(eta, "%a %b %d %H:%M:%S %Y") + except Exception: + return None + + +def main(): + # parse command line options + parser = argparse.ArgumentParser( + "gmx-eta", + description="Get the estimated time of a Gromacs simulation finishing.", + ) + parser.add_argument("job_id", nargs="?", help="Job ID. Optional.", default=None) + args = parser.parse_args() + + informer = get_informer(args.job_id) + navigator = Navigator.fromInformer(informer) + + if (main_node := navigator.getMainNode()) and (work_dir := navigator.getWorkDir()): + BatchSystem = BatchMeta.fromEnvVarOrGuess() + # use the batch system to read the remote file with Gromacs output + # split the lines and reverse the content to read from the end + try: + content = reversed( + BatchSystem.readRemoteFile( + main_node, work_dir / informer.info.stderr_file + ).splitlines() + ) + except Exception as e: + print(f"No information is available: {e}") + return + + # get eta + eta = get_eta_from_content(content) + if eta and datetime.now() <= eta: + print( + f"Simulation will finish in [bright_blue bold]{format_duration_wdhhmmss(eta - datetime.now())}[/bright_blue bold]." + ) + elif eta and datetime.now() > eta: + print( + f"Simulation has finished at [bright_green bold]{str(eta)}[/bright_green bold]." + ) + else: + print("No information is available.") + + else: + print("No information is available: job does not have a working directory.") + return + + +if __name__ == "__main__": + main() diff --git a/scripts/qq_scripts/multi-check b/scripts/qq_scripts/multi-check new file mode 100755 index 0000000..9c44dcf --- /dev/null +++ b/scripts/qq_scripts/multi-check @@ -0,0 +1,292 @@ +#!/usr/bin/env -S uv run --script + +# Released under MIT License. +# Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab + +""" +Check the state of qq jobs in multiple directories. +Version 0.3. +Requires `uv`: https://docs.astral.sh/uv +""" + +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "qq", +# ] +# +# [tool.uv.sources] +# qq = { git = "https://github.com/Ladme/qq.git", tag = "v0.6.0" } +# /// + +import argparse +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +from contextlib import suppress +from dataclasses import dataclass +from pathlib import Path + +from rich.console import Console +from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn + +from qq_lib.clear import Clearer +from qq_lib.core.common import get_info_files +from qq_lib.core.error import QQError +from qq_lib.info import Informer +from qq_lib.properties.states import RealState +from qq_lib.submit import Submitter +from qq_lib.wipe import Wiper + +console = Console() +# increase logging level to critical +logging.disable(logging.ERROR) + + +@dataclass(frozen=True) +class Job: + """Informer and input directory of the job.""" + + informer: Informer + directory: Path + + +def get_informer(directory: str) -> Informer | None: + """Get informer for the newest job in the specified directory.""" + # check that the directory is actually a directory and convert it to Path + if not (directory := Path(directory)).is_dir(): + return None + + # get all info files in the directory + info_files = get_info_files(directory) + + # if no qq info files are found, return None + if not info_files: + return None + + # get the last info file (the newest one) and load it into an informer + return Informer.fromFile(info_files[-1], None) + + +def process_directory(directory: str) -> tuple[Job, RealState] | None: + """Return (Job, state) for use in thread pool.""" + # get informer from the directory + informer = get_informer(directory) + + # return None, if informer not constructed + if not informer: + return None + + # get the state of the job + state = informer.getRealState() + + return Job(informer, Path(directory)), state + + +def fix_job(job: Job) -> tuple[Job, bool]: + """ + Wipe working directory and resubmit the job. + Returns (Job, True) if successfully resubmitted, else returns (Job, False). + """ + directory = job.directory + informer = job.informer + + # ignore if this fails + with suppress(QQError): + # delete the working directory + # no checks since we assume the provided job is killed or failed + wiper = Wiper.fromInformer(informer) + wiper.wipe() + + # remove runtime files in the directory + clearer = Clearer(directory) + clearer.clear() + + # submit the job again + try: + submitter = Submitter( + informer.batch_system, + informer.info.queue, + informer.info.account, + (directory / informer.info.script_name).resolve(), + informer.info.job_type, + informer.info.resources, + informer.info.loop_info, + informer.info.excluded_files, + informer.info.included_files, + informer.info.depend, + ) + submitter.submit() + return (job, True) + except QQError as e: + console.print( + f"\n[red bold]ERROR[/red bold]. Could not submit job in directory '{job.directory}': {e}" + ) + return (job, False) + + +def get_jobs_and_states( + directories: list[str], threads: int +) -> dict[RealState, set[Job]]: + """Get states of the jobs in the directories.""" + n_directories = len(directories) + + # prepare a progress bar + progress = Progress( + TextColumn("Collecting job states"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console, + expand=False, + ) + + # `states` maps job states to jobs + states: dict[RealState, set[Job]] = {} + + # render rich progress bar + with progress: + # register a new progress task (for progress bar) + task = progress.add_task("collect", total=n_directories) + + # create a thread pool for parallel processing of directories + with ThreadPoolExecutor(max_workers=threads) as executor: + # submit each directory to the thread pool + # `futures` maps Future objects to directory names + futures = {executor.submit(process_directory, d): d for d in directories} + + # iterate over futures as they finish (in arbitrary order) + for future in as_completed(futures): + # get the result from the completed task (job, state or None) + result = future.result() + + # if result is not None + if result: + dirinfo, state = result + # add the job to the set associated with its state + states.setdefault(state, set()).add(dirinfo) + + # advance the progress bar by once since one directory is done + progress.update(task, advance=1) + + return states + + +def fix_jobs(states: dict[RealState, set[Path]]) -> tuple[set[Job], set[Job]]: + """Fix all failed and killed jobs by deleting their working directories and resubmitting them.""" + # get failed nad killed jobs + jobs = states.get(RealState.FAILED, set()).union( + states.get(RealState.KILLED, set()) + ) + + if not jobs: + # nothing to fix + return set(), set() + + # prepare a progress bar + progress = Progress( + TextColumn("Fixing jobs"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console, + expand=False, + ) + + fixed_jobs = set() + unfixed_jobs = set() + with progress: + # register a new progress task (for progress bar) + task = progress.add_task("fix", total=len(jobs)) + + # submitting in qq is not a thread-safe operation, + # we have to do fixing sequentially + for job in jobs: + if fix_job(job)[1]: + fixed_jobs.add(job) + else: + unfixed_jobs.add(job) + + progress.update(task, advance=1) + + return fixed_jobs, unfixed_jobs + + +def main(): + # parse command line options + parser = argparse.ArgumentParser( + "multi-check", + description="Check the state of qq jobs in multiple directories.", + ) + parser.add_argument( + "directories", nargs="+", help="Directories containing qq info files." + ) + parser.add_argument( + "-t", + "--threads", + type=int, + default=16, + help="Number of worker threads (default: 16)", + ) + parser.add_argument( + "--fix", + default=False, + action="store_true", + help="Resubmit all failed and killed jobs.", + ) + args = parser.parse_args() + console.print() + + # collect the states of the jobs + states = get_jobs_and_states(args.directories, args.threads) + + # for each state, print the number of jobs in this state + # and the directories of the corresponding jobs + console.print() + # total number of jobs + total_jobs = 0 + state_keys = sorted(states.keys(), key=lambda x: str(x)) + for state in state_keys: + # get all directories of jobs in this state + dirs = [dirinfo.directory for dirinfo in states[state]] + + # number of jobs in this state + n_jobs = len(dirs) + total_jobs += n_jobs + + color = state.color + + console.print( + f"[{color} bold]{str(state).upper():15s}[/{color} bold] [default]{n_jobs}[/default]" + ) + console.print(f"[grey70]{' '.join(str(x) for x in sorted(dirs))}[/grey70]\n") + + console.print(f"[bold]{'TOTAL':15s}[/bold] [default]{total_jobs}[/default]\n") + + # fix jobs, if requested + if args.fix: + console.print("***********************************\n") + fixed, unfixed = fix_jobs(states) + + if not fixed and not unfixed: + console.print("[bold]Nothing to fix.[/bold]\n") + return + + dirs_fixed = [job.directory for job in fixed] + console.print( + f"\n[bright_green bold]{'FIXED SUCCESSFULLY':25s}[/bright_green bold] [default]{len(fixed)}[/default]" + ) + console.print( + f"[grey70]{' '.join(str(x) for x in sorted(dirs_fixed))}[/grey70]" + ) + + dirs_unfixed = [job.directory for job in unfixed] + console.print( + f"\n[bright_red bold]{'COULD NOT FIX':25s}[/bright_red bold] [default]{len(unfixed)}[/default]" + ) + console.print( + f"[grey70]{' '.join(str(x) for x in sorted(dirs_unfixed))}[/grey70]\n" + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/qq_scripts/multi-kill b/scripts/qq_scripts/multi-kill new file mode 100755 index 0000000..b4b44a9 --- /dev/null +++ b/scripts/qq_scripts/multi-kill @@ -0,0 +1,170 @@ +#!/usr/bin/env -S uv run --script + +# Released under MIT License. +# Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab + +""" +Kill qq jobs in multiple directories. +Version 0.1. +Requires `uv`: https://docs.astral.sh/uv +""" + +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "qq", +# ] +# +# [tool.uv.sources] +# qq = { git = "https://github.com/Ladme/qq.git", tag = "v0.6.0" } +# /// + +import argparse +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +from rich.console import Console +from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn + +from qq_lib.core.common import get_info_files +from qq_lib.core.error import QQError +from qq_lib.info import Informer +from qq_lib.kill import Killer + +console = Console() +# increase logging level to critical +logging.disable(logging.ERROR) + + +def get_killer(directory: str) -> Killer | None: + """Get killer for the newest job in the specified directory.""" + # check that the directory is actually a directory and convert it to Path + if not (directory := Path(directory)).is_dir(): + return None + + # get all info files in the directory + info_files = get_info_files(directory) + + # if no qq info files are found, return None + if not info_files: + return None + + # get the last info file (the newest one) and load it into an informer + informer = Informer.fromFile(info_files[-1], None) + + # construct a killer from informer + if informer: + return Killer.fromInformer(informer) + return None + + +def process_directory(directory: str) -> tuple[str, bool]: + """Return (directory, success) for use in thread pool.""" + # get killer from the directory + killer = get_killer(directory) + + # return (directory, false), if killer not constructed + if not killer: + return (directory, False) + + # make sure that the job is suitable to be killed + try: + killer.ensureSuitable() + except Exception: + return (directory, False) + + # kill the job + try: + killer.kill() + except QQError as e: + console.print( + f"\n[red bold]ERROR[/red bold]. Could not kill the job in directory '{directory}': {e}" + ) + return (directory, False) + + return (directory, True) + + +def kill_directories( + directories: list[Path], threads: int +) -> tuple[set[Path], set[Path]]: + """Kill jobs in directories that are suitable to be killed.""" + + # prepare a progress bar + progress = Progress( + TextColumn("Killing jobs"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console, + expand=False, + ) + + killed_jobs = set() + not_killed_jobs = set() + with progress: + # register a new progress task (for progress bar) + task = progress.add_task("kill", total=len(directories)) + + # create a thread pool for parallel processing of jobs + with ThreadPoolExecutor(max_workers=threads) as executor: + # submit each job to the thread pool + # `futures` maps Future objects to directories + futures = {executor.submit(process_directory, d): d for d in directories} + + # iterate over futures as they finish (in arbitrary order) + for future in as_completed(futures): + result = future.result() + + # if successful, add the directory to the list of killed jobs + if result[1]: + killed_jobs.add(result[0]) + else: + not_killed_jobs.add(result[0]) + + # advance the progress bar by once since one job is killed + progress.update(task, advance=1) + + return killed_jobs, not_killed_jobs + + +def main(): + # parse command line options + parser = argparse.ArgumentParser( + "multi-kill", + description="Kill qq jobs in multiple directories.", + ) + parser.add_argument( + "directories", nargs="+", help="Directories containing qq info files." + ) + parser.add_argument( + "-t", + "--threads", + type=int, + default=16, + help="Number of worker threads (default: 16)", + ) + args = parser.parse_args() + console.print() + + # kill the jobs + killed, not_killed = kill_directories(args.directories, args.threads) + + if not killed and not not_killed: + console.print("[bold]Nothing to kill.[/bold]\n") + return + + console.print( + f"\n[bright_green bold]{'KILLED SUCCESSFULLY':25s}[/bright_green bold] [default]{len(killed)}[/default]" + ) + console.print(f"[grey70]{' '.join(str(x) for x in sorted(killed))}[/grey70]") + + console.print( + f"\n[bright_red bold]{'COULD NOT KILL':25s}[/bright_red bold] [default]{len(not_killed)}[/default]" + ) + console.print(f"[grey70]{' '.join(str(x) for x in sorted(not_killed))}[/grey70]\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/qq_scripts/multi-submit b/scripts/qq_scripts/multi-submit new file mode 100755 index 0000000..827d599 --- /dev/null +++ b/scripts/qq_scripts/multi-submit @@ -0,0 +1,215 @@ +#!/usr/bin/env -S uv run --script + +# Released under MIT License. +# Copyright (c) 2025 Ladislav Bartos and Robert Vacha Lab + +""" +Submit qq jobs from multiple directories. +Version 0.3. +Requires `uv`: https://docs.astral.sh/uv +""" + +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "qq", +# ] +# +# [tool.uv.sources] +# qq = { git = "https://github.com/Ladme/qq.git", tag = "v0.6.0" } +# /// + +import argparse +import logging +from contextlib import suppress +from pathlib import Path + +from rich.console import Console +from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn + +from qq_lib.clear import Clearer +from qq_lib.core.common import get_runtime_files +from qq_lib.core.error import QQError +from qq_lib.submit import Submitter, SubmitterFactory + +console = Console() +# increase logging level to critical +logging.disable(logging.ERROR) + + +def submit_job(base_submitter: Submitter, directory: str) -> bool: + """Return True if job submitted else return False.""" + # we have to modify the archive directory to redirect it into a new input dir + if loop_info := base_submitter.getLoopInfo(): + loop_info.archive = Path(directory).resolve() / loop_info.archive.name + + # create the submitter for the specific directory + submitter = Submitter( + base_submitter.getBatchSystem(), + base_submitter.getQueue(), + base_submitter.getAccount(), + (Path(directory).resolve() / base_submitter.getScript().name), + base_submitter.getJobType(), + base_submitter.getResources(), + loop_info, + base_submitter.getExclude(), + base_submitter.getInclude(), + base_submitter.getDepend(), + ) + + with suppress(QQError): + # remove runtime files in the directory, if possible + clearer = Clearer(Path(directory)) + clearer.clear() + + # make sure that the directory is suitable for submission + if get_runtime_files(submitter.getInputDir()) and not submitter.continuesLoop(): + return False + + try: + submitter.submit() + except Exception as e: + console.print( + f"\n[red bold]ERROR[/red bold]. Could not submit job in directory '{directory}': {e}" + ) + return False + + return True + + +def click_passthrough_to_dict(args): + """Convert a list of CLI tokens into a dict-like structure.""" + result = {} + tokens = list(args) + i = 0 + n = len(tokens) + + def is_option(t): + return t.startswith("-") + + def clean_key(k): + # remove leading "-" or "--" + k = k.lstrip("-") + # rewrite q -> queue + return "queue" if k == "q" else k + + while i < n: + token = tokens[i] + + # long option: --opt=value + if token.startswith("--") and "=" in token: + key, val = token.split("=", 1) + result[clean_key(key)] = val + i += 1 + continue + + # long option: --opt [value] or flag + if token.startswith("--"): + key = clean_key(token) + if i + 1 < n and not is_option(tokens[i + 1]): + result[key] = tokens[i + 1] + i += 2 + else: + result[key] = True + i += 1 + continue + + # short option: -x [value] + if token.startswith("-"): + key = clean_key(token) + if i + 1 < n and not is_option(tokens[i + 1]): + result[key] = tokens[i + 1] + i += 2 + else: + result[key] = True + i += 1 + continue + + # positional argument + result.setdefault("_positional", []).append(token) + i += 1 + + return result + + +def main(): + # parse command line options + parser = argparse.ArgumentParser( + "multi-submit", + description="Submit qq jobs from multiple directories. All jobs must request the same resources!", + ) + parser.add_argument("script", nargs=1, help="Name of the script to submit.") + parser.add_argument( + "directories", nargs="+", help="Directories containing qq info files." + ) + # parse only known arguments + args, passthrough = parser.parse_known_args() + console.print() + + if len(args.directories) == 0 or len(args.script) == 0: + # nothing to submit + return + + # make sure that the script exists + if not (script_path := Path(args.directories[0]) / args.script[0]).is_file(): + console.print( + f"\n[red bold]ERROR[/red bold]. Script '{script_path}' does not exist or is not a file." + ) + return + + # create a submitter factory for building a submitter + factory = SubmitterFactory( + Path(args.directories[0]) / args.script[0], + **click_passthrough_to_dict(passthrough), + ) + + # make the base submitter + base_submitter = factory.makeSubmitter() + + # prepare a progress bar + progress = Progress( + TextColumn("Submitting jobs"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeRemainingColumn(), + console=console, + expand=False, + ) + + # submit the jobs + # we are not using multithreading because submitting a job in qq is not thread-safe + # (working directory is temporarily changed during the submission) + submitted = set() + not_submitted = set() + + with progress: + task = progress.add_task("submit", total=len(args.directories)) + + for dir in args.directories: + if submit_job(base_submitter, dir): + submitted.add(dir) + else: + not_submitted.add(dir) + + progress.update(task, advance=1) + + # print the submission statistics + if not submitted and not not_submitted: + console.print("[bold]Nothing to submit.[/bold]\n") + return + + console.print( + f"\n[bright_green bold]{'SUBMITTED SUCCESSFULLY':25s}[/bright_green bold] [default]{len(submitted)}[/default]" + ) + console.print(f"[grey70]{' '.join(str(x) for x in sorted(submitted))}[/grey70]") + + console.print( + f"\n[bright_red bold]{'COULD NOT SUBMIT':25s}[/bright_red bold] [default]{len(not_submitted)}[/default]" + ) + console.print( + f"[grey70]{' '.join(str(x) for x in sorted(not_submitted))}[/grey70]\n" + ) + + +if __name__ == "__main__": + main()