From 303a549d32fe43ff35aa0de1923defba380af0d1 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 13:48:05 +0100 Subject: [PATCH 01/10] Add extended precision/rounding functionality, and clarify the sign convention and requirements for asymm uncertainties --- docs/usage.rst | 18 +- hepdata_lib/helpers.py | 528 +++++++++++++++++++++++++++-------------- 2 files changed, 363 insertions(+), 183 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 67940e0..be02338 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -362,7 +362,23 @@ need to specify a license for a data table unless it differs from `CC0`_. Uncertainties +++++++++++++ -In many cases, you will want to give uncertainties on the central values provided in the Variable objects. Uncertainties can be *symmetric* or *asymmetric* (up and down variations of the central value either have the same or different magnitudes). For symmetric uncertainties, the values of the uncertainties are simply stored as a one-dimensional list. For asymmetric uncertainties, the up- and downward variations are stored as a list of two-component tuples: +In many cases, you will want to give uncertainties on the central +values provided in the Variable objects. Uncertainties can be +*symmetric* or *asymmetric*. For symmetric +uncertainties, the values of the uncertainties are stored as a +one-dimensional list of positive values, which are applied as +equal-magnitude positive and negative changes to the value. + +For asymmetric uncertainties, the uncertainties are expressed as a +*signed* two-component iterable (e.g. tuple or list): in general, this +pair represents the value changes in response to downward and upward +moves of a nuisance parameter, and so it is possible for both the "up" +and "down" variations to have the same sign (if the effect of the +nuisance is one-sided). Therefore both components should be computed +as ``variation_value - nominal_value`` such that negative variations +correctly acquire a minus sign; asymmetric statistical errors are +represented using the same scheme and should also ensure that the +"down" uncertainty has a negative sign. :: diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 9e35973..a803c0a 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -6,6 +6,9 @@ import math import numpy as np + +## File and command functions + def execute_command(command): """ Execute shell command using subprocess. @@ -37,6 +40,65 @@ def execute_command(command): raise RuntimeError(result) return True + +def convert_pdf_to_png(source, target): + """ + Wrapper for the ImageMagick convert utility. + + :param source: Source file in PDF format. + :type source: str + :param target: Output file in PNG format. + :type target: str + """ + assert os.path.exists(source), f"Source file does not exist: {source}" + + command = f"convert -flatten -density 300 -fuzz 1% -trim +repage {source} {target}" + command_ok = execute_command(command) + if not command_ok: + print("ImageMagick does not seem to be installed \ + or is not in the path - not adding any images.") + + +def convert_png_to_thumbnail(source, target): + """ + Wrapper for the ImageMagick convert utility in thumbnail mode. + + :param source: Source file in PNG format. + :type source: str + :param target: Output thumbnailfile in PNG format. + :type target: str + """ + + command = f"convert -thumbnail 240x179 {source} {target}" + command_ok = execute_command(command) + + if not command_ok: + print("ImageMagick does not seem to be installed \ + or is not in the path - not adding any images.") + + +def file_is_outdated(file_path, reference_file_path): + """ + Check if the given file is outdated compared to the reference file. + + Also returns true if the reference file does not exist. + + :param file_path: Path to the file to check. + :type file_path: str + :param reference_file_path: Path to the reference file. + :type reference_file_path: str + """ + if not os.path.exists(reference_file_path): + raise RuntimeError(f"Reference file does not exist: {reference_file_path}") + if not os.path.exists(file_path): + return True + + modification_outdated = os.path.getmtime(file_path) < os.path.getmtime(reference_file_path) + change_outdated = os.path.getctime(file_path) < os.path.getctime(reference_file_path) + + return modification_outdated | change_outdated + + def find_all_matching(path, pattern): """Utility function that works like 'find' in bash.""" if not os.path.exists(path): @@ -49,10 +111,68 @@ def find_all_matching(path, pattern): return result +def check_file_existence(path_to_file): + """ + Check that the given file path exists. + If not, raise RuntimeError. + + :param path_to_file: File path to check. + :type path_to_file: string + """ + if not os.path.exists(path_to_file): + raise RuntimeError("Cannot find file: " + path_to_file) + return True + + +def check_file_size(path_to_file, upper_limit=None, lower_limit=None): + """ + Check that the file size is between the upper and lower limits. + If not, raise RuntimeError. + + :param path_to_file: File path to check. + :type path_to_file: string + + :param upper_limit: Upper size limit in MB. + :type upper_limit: float + + :param lower_limit: Lower size limit in MB. + :type lower_limit: float + """ + size = 1e-6 * os.path.getsize(path_to_file) + if upper_limit and size > upper_limit: + raise RuntimeError(f"File too big: '{path_to_file}'. Maximum allowed value is {upper_limit}" + + "MB.") + if lower_limit and size < lower_limit: + raise RuntimeError(f"File too small: '{path_to_file}'." + + f"Minimal allowed value is {lower_limit} MB.") + + + +## Value type, formatting and numerical-precision functions + +def sanitize_value(value): + """ + Handle conversion of input types for internal storage. + + :param value: User-side input value to sanitize. + :type value: string, int, NoneType, or castable to float + + Strings, integers and None are left alone, + everything else is converted to float. + """ + if isinstance(value, str): + return value + if isinstance(value, int): + return value + if value is None: + return value + return float(value) + + def get_number_precision(value): """ - Get precision of an input value. - Exact integer powers of 10 are assigned same precision of smaller numbers + Get the scale of an input value, i.e. its rounded-up power of 10. + Exact integer powers of 10 are assigned the same scale/precision as smaller numbers For example get_number_precision(10.0) = 1 get_number_precision(10.001) = 2 @@ -71,26 +191,40 @@ def get_number_precision(value): return math.ceil(math.log10(abs(value))) -def relative_round(value, relative_digits): - """Rounds to a given relative precision""" +def get_number_size(value, rtn_for_zero=float("nan")): + """A near synonym for get_number_precision, with an optional + argument to return for values equal to zero (and hence with no + well-defined order of magnitude). - if isinstance(value, tuple): - return tuple(relative_round(x, relative_digits) for x in value) + This feature is provided since returning 0 or 0.0 can lead to + over-rounding if one uncertainty component is zero. The default + value is NaN, but e.g. None or -float("inf") might sometimes be + more appropriate. - if value == 0 or isinstance(value, str) or np.isnan(value) or np.isinf(value): - return value + : param value : number to evaluate + : type value : float or tuple[float] - value_precision = get_number_precision(value) - absolute_digits = -value_precision + relative_digits # pylint: disable=invalid-unary-operand-type + : returns : order of magnitude (rounded-up power of 10) of ``value``, + normally integer except in the zero-value failure mode - return round(value, int(absolute_digits)) + """ + + # handle tuples like get_number_precision does + if isinstance(value, tuple): + return tuple(get_number_size(x) for x in value) + + if value == 0: + return rtn_for_zero + + return get_number_precision(value) def get_value_precision_wrt_reference(value, reference): """ - relative precision of first argument with respect to the second one - value and reference are both float and/or int - value can be float when reference is an int and viceversa + Get the relative precision (scale) of the first argument with respect to the second one + + ``value`` and ``reference`` are both float and/or int + ``value`` can be float when reference is an int and vice-versa : param value: first value : type value: float, int @@ -111,235 +245,265 @@ def get_value_precision_wrt_reference(value, reference): return get_number_precision(value) - get_number_precision(reference) -def round_value_to_decimals(cont, key="y", decimals=3): +def get_value_size_wrt_reference(value, reference, size_for_zero=float("nan")): """ - round all values in a dictionary to some decimals in one go - default round to 3 digits after period - possible use case: correlations where typical values are within -1,1 + Like the get_value_precision_wrt_reference but calling get_number_size + rather than get_number_precision, and with the optional zero-return + option of the former. - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + ``value`` and ``reference`` are both float and/or int + ``value`` can be float when reference is an int and vice-versa - : param decimals: how many decimals for the rounding - : type decimals: integer + : param value: first value + : type value: float, int + + : param reference: reference value (usually the uncertainty on value) + : type reference: float, int + + : param size_for_zero: the size value to be used for zero-valued ``value`` or ``reference`` + : type size_for_zero: float, int """ - decimals = int(decimals) + this_function = "get_value_size_wrt_reference()" + good_types = [int, float] + arguments = [value, reference] - for i, val in enumerate(cont[key]): - if isinstance(val, tuple): - cont[key][i] = (round(val[0], decimals), round(val[1], decimals)) - else: - cont[key][i] = round(val, decimals) + # first check all arguments have appropriate type + for input_arg in arguments: + if not any(isinstance(input_arg, x) for x in good_types): + raise ValueError("Unsupported input type passed to " + this_function) + return get_number_size(value, size_for_zero) - get_number_size(reference, size_for_zero) -def round_value_and_uncertainty_to_decimals(cont, val_key="y", unc_key="dy", decimals=3): - """ - round values and uncertainty to some decimals - default round to 3 digits after period - possible use case: correlations where typical values are within -1,1 - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary +def relative_round(value, relative_digits): + """Rounds to a given relative precision""" - : param decimals: how many decimals for the rounding - : type decimals: integer + if isinstance(value, tuple): + return tuple(relative_round(x, relative_digits) for x in value) + + if value == 0 or isinstance(value, str) or np.isnan(value) or np.isinf(value): + return value + + value_precision = get_number_precision(value) + absolute_digits = -value_precision + relative_digits # pylint: disable=invalid-unary-operand-type + + return round(value, int(absolute_digits)) + + +def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): """ + Round a collection of values to the precision required for the given sd's to + appear on the larger uncertainty component e.g. +1.3456 -0.2345 @ 2sf --> +1.3 -0.2 - decimals = int(decimals) + Mainly designed for handling uncertainty (particularly an asymmetric +/- pair), + but written to be more generally usable. A passed single number will be handled + transparently, without wrapping in an iterable. - for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): - cont[val_key][i] = round(val, decimals) - if isinstance(unc, tuple): - cont[unc_key][i] = (round(unc[0], decimals), round(unc[1], decimals)) - else: - cont[unc_key][i] = round(unc, decimals) + : param uncs : iterable of values (primarily uncertainties) + : type uncs : float or iterable[float] + : param sig_digits : how many significant digits on the leading component + : type sig_digits : integer -def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc=2): + : param no_round_to_zero : if true, ensure always at least one sd per component + : type no_round_to_zero : bool + + : returns : float or list[float]) of rounded values and + a list of the digit precisions used (even for scalar ``uncs``) """ - round values and uncertainty according to the precision of the uncertainty, - and also round uncertainty to a given number of significant digits - Typical usage: + try: #< if this fails, uncs isn't iterable -> fall back to scalar + # get orders of magnitude of each component + unc_orders = [get_number_size(u) for u in uncs] + # base the nominal precision on the target number of sd's on the largest component + ptarget = -int(np.nanmax(unc_orders)) + sig_digits + # customise the precisions for each component (if instructed to prevent rounding to zero) + ptargets = [(max(ptarget, -uo+1) if no_round_to_zero else ptarget) for uo in unc_orders] + # do the (maybe custom) rounding + newuncs = [round(u, ptargets[i]) for (i, u) in enumerate(uncs)] + return newuncs, ptargets if no_round_to_zero else ptarget + except: + unc_order = get_number_size(uncs) + newunc = relative_round(uncs, sig_digits) + return newunc, [-unc_order+sig_digits] + + +def round_value_and_uncertainty_arrs(vals, uncs, sig_digits_unc=2): + """ + Round arrays of values and a single uncertainty source according to + the precision of the uncertainty, row by row, and also round the + uncertainties to a given number of significant digits. - reader = RootFileReader("rootfile.root") - data = reader.read_hist_1d("histogramName") - round_value_and_uncertainty(data,"y","dy",2) + Named with the _arrs suffix as the pre-existing, canonically named + versions operate on dicts from the ROOT reader. - will round data["y"] to match the precision of data["dy"] for each element, after - rounding each element of data["dy"] to 2 significant digits - e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 + Operates directly on matched lists of values and uncertainties. + Tuple-valued uncertainty entries are assumed to be a +- asymm pair + for that data point, and the larger is used to define the reference + precision. - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + This will round each ``val`` to match the precision of its corresponding + ``unc``, after rounding each element of ``unc`` to 2 significant digits + e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value + will always be reported, though 100% errors are not commonly published. + + : param vals : y values + : type vals : iterable of float + + : param uncs : y uncertainty values + : type uncs : iterable of float or tuple[float] : param sig_digits_unc: how many significant digits used to round the uncertainty : type sig_digits_unc: integer + + : returns : modified (vals, uncs). Note that arguments are also modified in-place. """ sig_digits_unc = int(sig_digits_unc) - for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): - if isinstance(unc, tuple): - # case for TGraphAsymmErrors with unc = (elow,ehigh), the central value is rounded - # using the significant digits of the largest of the two uncertainties, - # the smaller uncertainty would be rounded accordingly (at least 1 digit) - # usually lower and higher uncertainties will be of the same order of magnitude - # or at most different by 1 order (like +0.132 -0.083), in which case, - # if choosing 2 significant digits, the rounding should result in +0.13 -0.08 - max_absunc = 0.0 - index_min_unc = 0 - # set default precision for both sides of uncertainty - sig_digits_unc_ntuple = [sig_digits_unc, sig_digits_unc] - if abs(unc[0]) < abs(unc[1]): - max_absunc = abs(unc[1]) - index_min_unc = 0 - relative_precision = get_value_precision_wrt_reference(unc[0], unc[1]) - else: - max_absunc = abs(unc[0]) - index_min_unc = 1 - relative_precision = get_value_precision_wrt_reference(unc[1], unc[0]) - # update precision on smaller uncertainty (at least 1 significant digit) - sig_digits_unc_ntuple[index_min_unc] = int(max(1, sig_digits_unc + relative_precision)) - cont[unc_key][i] = (relative_round(unc[0], sig_digits_unc_ntuple[0]), - relative_round(unc[1], sig_digits_unc_ntuple[1])) - val_precision = get_value_precision_wrt_reference(val, max_absunc) - sig_digits_val = int(sig_digits_unc + val_precision) - cont[val_key][i] = relative_round(val, sig_digits_val) - else: - # standard case for TH1 or TGraphErrors, uncertainty is a single value - cont[unc_key][i] = relative_round(unc, sig_digits_unc) - val_precision = get_value_precision_wrt_reference(val, unc) - sig_digits_val = int(sig_digits_unc + val_precision) - cont[val_key][i] = relative_round(val, sig_digits_val) + for i, (val, unc) in enumerate(zip(vals, uncs)): + # Two possible types for unc: + # - standard case for TH1 or TGraphErrors: uncertainty is a single value + # - case for TGraphAsymmErrors: uncertainty is a tuple(elow, ehigh) + # round_multiple handles both scalar and tuple in a transparent way + uncs[i], uncprecisions = round_multiple(unc, sig_digits_unc, True) + valprecision = -get_number_size(val)+1 + vals[i] = round(val, max(int(np.nanmin(uncprecisions)), valprecision)) + return vals, uncs -def check_file_existence(path_to_file): - """ - Check that the given file path exists. - If not, raise RuntimeError. - :param path_to_file: File path to check. - :type path_to_file: string +def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2): """ - if not os.path.exists(path_to_file): - raise RuntimeError("Cannot find file: " + path_to_file) - return True + Round values and multiple uncertainty sources according to the precision of the + largest uncertainty, and also round each (asymm) uncertainty to a given number + of significant digits. -def check_file_size(path_to_file, upper_limit=None, lower_limit=None): - """ - Check that the file size is between the upper and lower limits. - If not, raise RuntimeError. + Named with the _arrs suffix as the pre-existing, canonically named + versions operate on dicts from the ROOT reader. - :param path_to_file: File path to check. - :type path_to_file: string + The rounding of each error source is performed independently, with at least one + sd always shown. The smallest precision encountered in the error set (i.e. the + largest uncertainty component) is used to define the precision of the value's + rounding. At least one sd of the value will always be reported, though 100% errors + are not commonly published. - :param upper_limit: Upper size limit in MB. - :type upper_limit: float + : param cont : dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` + : type cont : dictionary - :param lower_limit: Lower size limit in MB. - :type lower_limit: float + : param sig_digits_unc: how many significant digits used to round the uncertainty + : type sig_digits_unc: integer + + : returns : modified (vals, unclists). Note that arguments are also modified in-place. """ - size = 1e-6 * os.path.getsize(path_to_file) - if upper_limit and size > upper_limit: - raise RuntimeError(f"File too big: '{path_to_file}'. Maximum allowed value is {upper_limit}" - + "MB.") - if lower_limit and size < lower_limit: - raise RuntimeError(f"File too small: '{path_to_file}'." - + f"Minimal allowed value is {lower_limit} MB.") + sig_digits_unc = int(sig_digits_unc) + + for ipt, val in enumerate(vals): + # the eventual precision of the value will match that of the largest error, but start with this upper bound + valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) + # round each error source independently with their larger component getting the target sd's + uncs_ipt = [ul[ipt] for ul in unclists] + uncs_ipt_rnd, uncprecisions = round_multiple(uncs_ipt, sig_digits_unc, True) + valprecision = min(int(np.nanmin(uncprecisions)), valprecision) + for iu, unc in enumerate(uncs_ipt_rnd): + unclists[iu][ipt] = unc + # round the value to match the precision of the largest error component + vals[ipt] = round(val, valprecision) -def any_uncertainties_nonzero(uncertainties, size): - """ - Return a mask of bins where any of the uncertainties is nonzero. + return vals, unclists + + +def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc=2): """ - nonzero = np.zeros(size, dtype=bool) + Round values and uncertainty according to the precision of the uncertainty, + and also round uncertainty to a given number of significant digits, on a + dictionary of values and uncertainties like that returned by RootFileReader. - for unc in uncertainties: + Typical usage:: - # Treat one-sided uncertainties as - tmp = 0 if unc.is_symmetric else (0,0) - values = np.array([tmp if v is None else v for v in unc.values]) - values[values.astype(str)==''] = 0 - values = values.astype(float) + reader = RootFileReader("rootfile.root") + data = reader.read_hist_1d("histogramName") + round_value_and_uncertainty(data,"y","dy",2) - if unc.is_symmetric: - nonzero = nonzero | (values != 0) - else: - nonzero = nonzero | np.any(values != 0,axis=1) - return nonzero + will round ``data["y"]`` to match the precision of ``data["dy"]`` for each + element, after rounding each element of ``data["dy"]`` to 2 significant digits + e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value + will always be reported, though 100% errors are not commonly published. -def sanitize_value(value): + : param cont : dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` + : type cont : dictionary + + : param sig_digits_unc: how many significant digits used to round the uncertainty + : type sig_digits_unc: integer """ - Handle conversion of input types for internal storage. + assert(isinstance(cont, dict)) + round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], sig_digits_unc) - :param value: User-side input value to sanitize. - :type value: string, int, NoneType, or castable to float - Strings, integers and None are left alone, - everything else is converted to float. +def round_value_to_decimals(cont, key="y", decimals=3): """ - if isinstance(value,str): - return value - if isinstance(value,int): - return value - if value is None: - return value - return float(value) + Round all values in a dictionary to some decimals in one go. + The default is to round to 3 digits after the period. + Possible use case: correlations where typical values are within -1,1 -def convert_pdf_to_png(source, target): - """ - Wrapper for the ImageMagick convert utility. + : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() + : type cont : dictionary - :param source: Source file in PDF format. - :type source: str - :param target: Output file in PNG format. - :type target: str + : param decimals: how many decimals for the rounding + : type decimals: integer """ - assert os.path.exists(source), f"Source file does not exist: {source}" - command = f"convert -flatten -density 300 -fuzz 1% -trim +repage {source} {target}" - command_ok = execute_command(command) - if not command_ok: - print("ImageMagick does not seem to be installed \ - or is not in the path - not adding any images.") + decimals = int(decimals) + for i, val in enumerate(cont[key]): + if isinstance(val, tuple): + cont[key][i] = (round(val[0], decimals), round(val[1], decimals)) + else: + cont[key][i] = round(val, decimals) -def convert_png_to_thumbnail(source, target): - """ - Wrapper for the ImageMagick convert utility in thumbnail mode. - :param source: Source file in PNG format. - :type source: str - :param target: Output thumbnailfile in PNG format. - :type target: str +def round_value_and_uncertainty_to_decimals(cont, val_key="y", unc_key="dy", decimals=3): """ + Round values and uncertainty to some decimals. - command = f"convert -thumbnail 240x179 {source} {target}" - command_ok = execute_command(command) + The default is to round to 3 digits after the period. + Possible use case: correlations where typical values are within -1,1 - if not command_ok: - print("ImageMagick does not seem to be installed \ - or is not in the path - not adding any images.") + : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() + : type cont : dictionary -def file_is_outdated(file_path, reference_file_path): + : param decimals: how many decimals for the rounding + : type decimals: integer """ - Check if the given file is outdated compared to the reference file. - Also returns true if the reference file does not exist. + decimals = int(decimals) - :param file_path: Path to the file to check. - :type file_path: str - :param reference_file_path: Path to the reference file. - :type reference_file_path: str + for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): + cont[val_key][i] = round(val, decimals) + if isinstance(unc, tuple): + cont[unc_key][i] = (round(unc[0], decimals), round(unc[1], decimals)) + else: + cont[unc_key][i] = round(unc, decimals) + + +def any_uncertainties_nonzero(uncertainties, size): """ - if not os.path.exists(reference_file_path): - raise RuntimeError(f"Reference file does not exist: {reference_file_path}") - if not os.path.exists(file_path): - return True + Return a mask of bins where any of the uncertainties is nonzero. + """ + nonzero = np.zeros(size, dtype=bool) - modification_outdated = os.path.getmtime(file_path) < os.path.getmtime(reference_file_path) - change_outdated = os.path.getctime(file_path) < os.path.getctime(reference_file_path) + for unc in uncertainties: - return modification_outdated | change_outdated + # Treat one-sided uncertainties as + tmp = 0 if unc.is_symmetric else (0,0) + values = np.array([tmp if v is None else v for v in unc.values]) + values[values.astype(str)==''] = 0 + values = values.astype(float) + + if unc.is_symmetric: + nonzero = nonzero | (values != 0) + else: + nonzero = nonzero | np.any(values != 0,axis=1) + return nonzero From 57af632b7a5b0adbba447a42e6782370c6d49cac Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 14:11:37 +0100 Subject: [PATCH 02/10] Address linter complaints --- hepdata_lib/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index a803c0a..17b8b36 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -322,7 +322,7 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): # do the (maybe custom) rounding newuncs = [round(u, ptargets[i]) for (i, u) in enumerate(uncs)] return newuncs, ptargets if no_round_to_zero else ptarget - except: + except TypeError: unc_order = get_number_size(uncs) newunc = relative_round(uncs, sig_digits) return newunc, [-unc_order+sig_digits] @@ -400,7 +400,7 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 sig_digits_unc = int(sig_digits_unc) for ipt, val in enumerate(vals): - # the eventual precision of the value will match that of the largest error, but start with this upper bound + # the value precision will match that of the largest error, but start with this upper bound valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) # round each error source independently with their larger component getting the target sd's uncs_ipt = [ul[ipt] for ul in unclists] @@ -437,7 +437,7 @@ def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc= : param sig_digits_unc: how many significant digits used to round the uncertainty : type sig_digits_unc: integer """ - assert(isinstance(cont, dict)) + #assert isinstance(cont, dict) round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], sig_digits_unc) From dfa6aff99b8883f174148b986a52457ac3f5fe0e Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 14:50:55 +0100 Subject: [PATCH 03/10] Make round_multiple return values as a tuple if they were supplied as a tuple. Fixes the failing test --- hepdata_lib/helpers.py | 8 ++++++-- tests/test_helpers.py | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 17b8b36..46e6e73 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -309,8 +309,9 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): : param no_round_to_zero : if true, ensure always at least one sd per component : type no_round_to_zero : bool - : returns : float or list[float]) of rounded values and - a list of the digit precisions used (even for scalar ``uncs``) + : returns : float or list/tuple[float]) of rounded values and a list of the digit + precisions used for each component (this is a list even for scalar + ``uncs``; note that it can contain NaNs due to zero-valued components) """ try: #< if this fails, uncs isn't iterable -> fall back to scalar # get orders of magnitude of each component @@ -321,6 +322,9 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): ptargets = [(max(ptarget, -uo+1) if no_round_to_zero else ptarget) for uo in unc_orders] # do the (maybe custom) rounding newuncs = [round(u, ptargets[i]) for (i, u) in enumerate(uncs)] + # return as a tuple if the input was a tuple (for ROOT use-case & test consistency) + if type(uncs) is tuple: + newuncs = tuple(newuncs) return newuncs, ptargets if no_round_to_zero else ptarget except TypeError: unc_order = get_number_size(uncs) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index d04e99d..e83d18a 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -13,6 +13,7 @@ class TestHelpers(TestCase): """Test the helper functions.""" + def test_relative_round(self): '''Test behavior of Variable.scale_values function''' # Some values are mapped onto themselves From e3550874e89bc4587d30dd61abf254bff754da31 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 17:52:42 +0100 Subject: [PATCH 04/10] The test implementation caught a subtle bug in the multiple-errs case: it was not actually treating the error sources independently. Fixed now, and the tests are passing as well as manually sanity-checked --- hepdata_lib/helpers.py | 14 ++++-- tests/test_helpers.py | 104 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 110 insertions(+), 8 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 46e6e73..67293a2 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -406,13 +406,17 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 for ipt, val in enumerate(vals): # the value precision will match that of the largest error, but start with this upper bound valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) - # round each error source independently with their larger component getting the target sd's + # get the list of uncertainty sources for the i'th val uncs_ipt = [ul[ipt] for ul in unclists] - uncs_ipt_rnd, uncprecisions = round_multiple(uncs_ipt, sig_digits_unc, True) - valprecision = min(int(np.nanmin(uncprecisions)), valprecision) - for iu, unc in enumerate(uncs_ipt_rnd): - unclists[iu][ipt] = unc + # round each error source independently with their larger component getting the target sd's + #uncs_ipt_rnd, uncprecisions = round_multiple(uncs_ipt, sig_digits_unc, True) + minuncprecision = np.inf #< TODO: there's probably a less pessimistic int starting value! + for iu, u in enumerate(uncs_ipt): + u_rnd, uprecisions = round_multiple(u, sig_digits_unc, True) + unclists[iu][ipt] = u_rnd + minuncprecision = int(np.nanmin(np.hstack((uprecisions, minuncprecision)))) # round the value to match the precision of the largest error component + valprecision = min(minuncprecision, valprecision) vals[ipt] = round(val, valprecision) return vals, unclists diff --git a/tests/test_helpers.py b/tests/test_helpers.py index e83d18a..9d6fa1c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -5,8 +5,12 @@ import numpy as np from hepdata_lib.helpers import relative_round +from hepdata_lib.helpers import round_multiple from hepdata_lib.helpers import get_number_precision +from hepdata_lib.helpers import get_number_size from hepdata_lib.helpers import get_value_precision_wrt_reference +from hepdata_lib.helpers import round_value_and_uncertainty_arrs +from hepdata_lib.helpers import round_value_and_multiple_uncertainties_arrs from hepdata_lib.helpers import round_value_and_uncertainty from hepdata_lib.helpers import file_is_outdated @@ -69,6 +73,51 @@ def test_get_number_precision(self): self.assertTrue(precisions == target_precisions) + def test_get_number_size(self): + '''Test behavior of get_number_size function''' + + # Check that zero handling works + self.assertTrue(np.isnan(get_number_size(0))) + self.assertTrue(np.isnan(get_number_size(0.0))) + self.assertTrue(get_number_size(0.0, None) is None) + + # Self-mappings should work as for get_number_precision + self.assertTrue(get_number_size(np.inf) == np.inf) + self.assertTrue(get_number_size("astring") == "astring") + + # Remaining tests duplicate those for get_number_precision: + + # test case with single value + # test format is (original value, size) + values = [ + (12.5, 2), + (1.25, 1), + (0.125, 0), + (0.0125, -1) + ] + for value, prec in values: + precision = get_number_size(value) + self.assertTrue(precision == prec) + + # test case with ntuple (e.g. with two values) + # test format is (original value, precision) + # both original value and precision are ntuples (with two elements) + ntuples = { + (12.5, 1.25) : (2, 1), + (0.125, 0.0125) : (0, -1) + } + for original_values, target_precisions in ntuples.items(): + precisions = get_number_size(original_values) + self.assertTrue(precisions == target_precisions) + + + def test_round_multiple(self): + '''Test behavior of the multiple-rounding function''' + + self.assertTrue(round_multiple([1.236890, 0.123324, 10.375477, 0.0003345], 3) + == ([1.2, 0.1, 10.4, 0.0003], [1, 1, 1, 4])) + + def test_get_value_precision_wrt_reference(self): '''Test behavior of get_value_precision_wrt_reference function''' @@ -92,11 +141,35 @@ def test_get_value_precision_wrt_reference(self): get_value_precision_wrt_reference("bad", (1.2, 3.4)) + def test_round_value_and_uncertainty_arrs(self): + '''Test behavior of round_value_and_uncertainty_arrs function''' + + # Test for single-valued uncertainties + val = [1.23456, 1234.56, 0.0012345, 0.123] + unc = [0.00123, 1.23, 0.012, 0.12] + val_round = [1.2346, 1234.6, 0.001, 0.12] + unc_round = [0.0012, 1.2, 0.012, 0.12] + # round to two significant digits + round_value_and_uncertainty_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + # Test for pair-valued uncertainties + val = [1.23456, 0.123] + unc = [(0.00123, 0.0123), (0.012, 0.12)] + val_round = [1.235, 0.12] + unc_round = [(0.001, 0.012), (0.01, 0.12)] + # round to two significant digits + round_value_and_uncertainty_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + def test_round_value_and_uncertainty(self): '''Test behavior of round_value_and_uncertainty function''' # Test format is - # (container, key_for_values, key_for_uncertanties, significant_digits) + # (container, key_for_values, key_for_uncertainties, significant_digits) # uncertainty has a single value cont = {"val": [1.23456, 1234.56, 0.0012345, 0.123], "unc": [0.00123, 1.23, 0.012, 0.12], @@ -108,8 +181,8 @@ def test_round_value_and_uncertainty(self): self.assertTrue(cont["unc"] == cont["unc_round"]) # Test format is - # (container, key_for_values, key_for_uncertanties, significant_digits) - # uncertainty has two value, as it would be the case with TGraphAsymmErrors + # (container, key_for_values, key_for_uncertainties, significant_digits) + # uncertainty has two values, as it would be the case with TGraphAsymmErrors cont_asymm_err = {"val": [1.23456, 0.123], "unc": [(0.00123, 0.0123), (0.012, 0.12)], "val_round": [1.235, 0.12], @@ -119,6 +192,31 @@ def test_round_value_and_uncertainty(self): self.assertTrue(cont_asymm_err["val"] == cont_asymm_err["val_round"]) self.assertTrue(cont_asymm_err["unc"] == cont_asymm_err["unc_round"]) + + def test_round_value_and_multiple_uncertainties_arrs(self): + '''Test behavior of round_value_and_multiple_uncertainty_arrs function''' + + # Test for single-valued uncertainties + val = [1.23456, 1234.56, 0.0012345, 0.123] + unc = [[-0.00123, 1.23, 0.012, -0.12], [-0.123, -40.2124, 0.0000643, 0.03]] + val_round = [1.23, 1235.0, 0.001, 0.12] + unc_round = [[-0.0012, 1.2, 0.012, -0.12], [-0.12, -40.0, 6.4e-05, 0.03]] + # round to two significant digits + round_value_and_multiple_uncertainties_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + # Test for pair-valued uncertainties + val = [1.23456, 0.123] + unc = [[(-0.00123, 0.0123), (-0.012, 0.12)], [(0.223, 0.0456), (0.00012, -0.0012)]] + val_round = [1.23, 0.12] + unc_round = [[(-0.001, 0.012), (-0.01, 0.12)], [(0.22, 0.05), (0.0001, -0.0012)]] + # round to two significant digits + round_value_and_multiple_uncertainties_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + def test_file_is_outdated(self): '''Test behavior of file_is_outdated function''' with self.assertRaises(RuntimeError): From 857a040b059abc96a5e96db35005de0601d0e06b Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 17:53:15 +0100 Subject: [PATCH 05/10] Clean replaced code line --- hepdata_lib/helpers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 67293a2..d4cba3f 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -409,7 +409,6 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 # get the list of uncertainty sources for the i'th val uncs_ipt = [ul[ipt] for ul in unclists] # round each error source independently with their larger component getting the target sd's - #uncs_ipt_rnd, uncprecisions = round_multiple(uncs_ipt, sig_digits_unc, True) minuncprecision = np.inf #< TODO: there's probably a less pessimistic int starting value! for iu, u in enumerate(uncs_ipt): u_rnd, uprecisions = round_multiple(u, sig_digits_unc, True) From 0cc60fab5ee48deaf549d5390b9b13754cd7e4f3 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Thu, 30 Apr 2026 17:58:18 +0100 Subject: [PATCH 06/10] Use isinstance type check --- hepdata_lib/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index d4cba3f..a6bcf43 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -323,7 +323,7 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): # do the (maybe custom) rounding newuncs = [round(u, ptargets[i]) for (i, u) in enumerate(uncs)] # return as a tuple if the input was a tuple (for ROOT use-case & test consistency) - if type(uncs) is tuple: + if isinstance(uncs, tuple): newuncs = tuple(newuncs) return newuncs, ptargets if no_round_to_zero else ptarget except TypeError: From 06adfe8684f736f8671b6bdaaac914fa44ba8bff Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Fri, 1 May 2026 12:32:55 +0100 Subject: [PATCH 07/10] Extra test function and various small issues via Copilot review --- hepdata_lib/helpers.py | 105 ++++++++++++++++++++++------------------- tests/test_helpers.py | 26 ++++++++++ 2 files changed, 82 insertions(+), 49 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index a6bcf43..a5172e9 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -201,17 +201,17 @@ def get_number_size(value, rtn_for_zero=float("nan")): value is NaN, but e.g. None or -float("inf") might sometimes be more appropriate. - : param value : number to evaluate - : type value : float or tuple[float] + :param value: number to evaluate + :type value: float or tuple[float] - : returns : order of magnitude (rounded-up power of 10) of ``value``, + :returns: order of magnitude (rounded-up power of 10) of ``value``, normally integer except in the zero-value failure mode """ # handle tuples like get_number_precision does if isinstance(value, tuple): - return tuple(get_number_size(x) for x in value) + return tuple(get_number_size(x, rtn_for_zero) for x in value) if value == 0: return rtn_for_zero @@ -226,11 +226,11 @@ def get_value_precision_wrt_reference(value, reference): ``value`` and ``reference`` are both float and/or int ``value`` can be float when reference is an int and vice-versa - : param value: first value - : type value: float, int + :param value: first value + :type value: float, int - : param reference: reference value (usually the uncertainty on value) - : type reference: float, int + :param reference: reference value (usually the uncertainty on value) + :type reference: float, int """ this_function = "get_value_precision_wrt_reference()" @@ -249,19 +249,19 @@ def get_value_size_wrt_reference(value, reference, size_for_zero=float("nan")): """ Like the get_value_precision_wrt_reference but calling get_number_size rather than get_number_precision, and with the optional zero-return - option of the former. + option of the former passed to the size-assessing function calls. ``value`` and ``reference`` are both float and/or int ``value`` can be float when reference is an int and vice-versa - : param value: first value - : type value: float, int + :param value: first value + :type value: float, int - : param reference: reference value (usually the uncertainty on value) - : type reference: float, int + :param reference: reference value (usually the uncertainty on value) + :type reference: float, int - : param size_for_zero: the size value to be used for zero-valued ``value`` or ``reference`` - : type size_for_zero: float, int + :param size_for_zero: the size value to be used for zero-valued ``value`` or ``reference`` + :type size_for_zero: float, int """ this_function = "get_value_size_wrt_reference()" @@ -300,23 +300,25 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): but written to be more generally usable. A passed single number will be handled transparently, without wrapping in an iterable. - : param uncs : iterable of values (primarily uncertainties) - : type uncs : float or iterable[float] + :param uncs: iterable of values (primarily uncertainties) + :type uncs: float or iterable[float] - : param sig_digits : how many significant digits on the leading component - : type sig_digits : integer + :param sig_digits: how many significant digits on the leading component + :type sig_digits: integer - : param no_round_to_zero : if true, ensure always at least one sd per component - : type no_round_to_zero : bool + :param no_round_to_zero: if true, ensure always at least one sd per component + :type no_round_to_zero: bool - : returns : float or list/tuple[float]) of rounded values and a list of the digit - precisions used for each component (this is a list even for scalar - ``uncs``; note that it can contain NaNs due to zero-valued components) + :returns: float or list/tuple[float]) of rounded values and a list of the digit + precisions used for each component (this is a list even for scalar + ``uncs``; note that it can contain NaNs due to zero-valued components) """ try: #< if this fails, uncs isn't iterable -> fall back to scalar # get orders of magnitude of each component unc_orders = [get_number_size(u) for u in uncs] # base the nominal precision on the target number of sd's on the largest component + if np.all(np.isnan(unc_orders)): + return uncs, [0 for u in uncs] ptarget = -int(np.nanmax(unc_orders)) + sig_digits # customise the precisions for each component (if instructed to prevent rounding to zero) ptargets = [(max(ptarget, -uo+1) if no_round_to_zero else ptarget) for uo in unc_orders] @@ -325,8 +327,10 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): # return as a tuple if the input was a tuple (for ROOT use-case & test consistency) if isinstance(uncs, tuple): newuncs = tuple(newuncs) - return newuncs, ptargets if no_round_to_zero else ptarget + return newuncs, ptargets except TypeError: + if uncs == 0: + return unc, [0] unc_order = get_number_size(uncs) newunc = relative_round(uncs, sig_digits) return newunc, [-unc_order+sig_digits] @@ -351,16 +355,16 @@ def round_value_and_uncertainty_arrs(vals, uncs, sig_digits_unc=2): e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value will always be reported, though 100% errors are not commonly published. - : param vals : y values - : type vals : iterable of float + :param vals: y values + :type vals: iterable of float - : param uncs : y uncertainty values - : type uncs : iterable of float or tuple[float] + :param uncs: y uncertainty values + :type uncs: iterable of float or tuple[float] - : param sig_digits_unc: how many significant digits used to round the uncertainty - : type sig_digits_unc: integer + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer - : returns : modified (vals, uncs). Note that arguments are also modified in-place. + :returns: modified (vals, uncs). Note that arguments are also modified in-place. """ sig_digits_unc = int(sig_digits_unc) @@ -392,13 +396,16 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 rounding. At least one sd of the value will always be reported, though 100% errors are not commonly published. - : param cont : dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` - : type cont : dictionary + :param vals: y values + :type vals: iterable of float - : param sig_digits_unc: how many significant digits used to round the uncertainty - : type sig_digits_unc: integer + :param uncs: y uncertainty values + :type uncs: iterable of float or tuple[float] - : returns : modified (vals, unclists). Note that arguments are also modified in-place. + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer + + :returns: modified (vals, unclists). Note that arguments are also modified in-place. """ sig_digits_unc = int(sig_digits_unc) @@ -438,11 +445,11 @@ def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc= e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value will always be reported, though 100% errors are not commonly published. - : param cont : dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` - : type cont : dictionary + :param cont: dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` + :type cont: dictionary - : param sig_digits_unc: how many significant digits used to round the uncertainty - : type sig_digits_unc: integer + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer """ #assert isinstance(cont, dict) round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], sig_digits_unc) @@ -455,11 +462,11 @@ def round_value_to_decimals(cont, key="y", decimals=3): The default is to round to 3 digits after the period. Possible use case: correlations where typical values are within -1,1 - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + :param cont: dictionary as returned e.g. by RootFileReader::read_hist_1d() + :type cont: dictionary - : param decimals: how many decimals for the rounding - : type decimals: integer + :param decimals: how many decimals for the rounding + :type decimals: integer """ decimals = int(decimals) @@ -478,11 +485,11 @@ def round_value_and_uncertainty_to_decimals(cont, val_key="y", unc_key="dy", dec The default is to round to 3 digits after the period. Possible use case: correlations where typical values are within -1,1 - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + :param cont: dictionary as returned e.g. by RootFileReader::read_hist_1d() + :type cont: dictionary - : param decimals: how many decimals for the rounding - : type decimals: integer + :param decimals: how many decimals for the rounding + :type decimals: integer """ decimals = int(decimals) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 9d6fa1c..3a0772e 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -9,6 +9,7 @@ from hepdata_lib.helpers import get_number_precision from hepdata_lib.helpers import get_number_size from hepdata_lib.helpers import get_value_precision_wrt_reference +from hepdata_lib.helpers import get_value_size_wrt_reference from hepdata_lib.helpers import round_value_and_uncertainty_arrs from hepdata_lib.helpers import round_value_and_multiple_uncertainties_arrs from hepdata_lib.helpers import round_value_and_uncertainty @@ -141,6 +142,31 @@ def test_get_value_precision_wrt_reference(self): get_value_precision_wrt_reference("bad", (1.2, 3.4)) + def test_get_value_size_wrt_reference(self): + '''Test behavior of get_value_size_wrt_reference function''' + + self.assertTrue(np.isnan(get_value_size_wrt_reference(0.0, 0.1))) + self.assertTrue(get_value_size_wrt_reference(0.01, 0, -np.inf) == np.inf) + + # test format is (value, reference, relative precision) + values = [ + (12.5, 0.08, 3), + (1.25, 102.4, -2), + (10.0, 9, 0) + ] + for val, ref, prec in values: + precision = get_value_size_wrt_reference(val, ref) + self.assertTrue(precision == prec) + + # test wrong input type + with self.assertRaises(ValueError): + get_value_size_wrt_reference(1.23, "bad") + with self.assertRaises(ValueError): + get_value_size_wrt_reference(1.23, (1.2, 3.4)) + with self.assertRaises(ValueError): + get_value_size_wrt_reference("bad", (1.2, 3.4)) + + def test_round_value_and_uncertainty_arrs(self): '''Test behavior of round_value_and_uncertainty_arrs function''' From 3c8bda76c1dc87e8c24c52d44a49aeb808de7cb8 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Fri, 1 May 2026 12:41:23 +0100 Subject: [PATCH 08/10] Typo fixes --- hepdata_lib/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index a5172e9..719e936 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -330,7 +330,7 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): return newuncs, ptargets except TypeError: if uncs == 0: - return unc, [0] + return uncs, [0] unc_order = get_number_size(uncs) newunc = relative_round(uncs, sig_digits) return newunc, [-unc_order+sig_digits] @@ -399,8 +399,8 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 :param vals: y values :type vals: iterable of float - :param uncs: y uncertainty values - :type uncs: iterable of float or tuple[float] + :param unclists: iterable of y uncertainty values separated by source + :type unclists: iterable of iterable of float or tuple[float] :param sig_digits_unc: how many significant digits used to round the uncertainty :type sig_digits_unc: integer From 3d98942130cc7021ee57ce1c63ccfddf63144608 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Fri, 1 May 2026 12:56:12 +0100 Subject: [PATCH 09/10] Add precision discussion to docs --- docs/usage.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index be02338..e8c85a6 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -278,7 +278,7 @@ One common use case with more than one independent Variable is that of correlati Adding a plot thumb nail to a table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -HEPData supports the addition of thumb nail images to each table. This makes it easier for the consumer of your entry to find what they are looking for, since they can simply look for the table that has the thumb nail of the plot they are interested in. +HEPData supports the addition of thumbnail images to each table. This makes it easier for the consumer of your entry to find what they are looking for, since they can simply look for the table that has the thumb nail of the plot they are interested in. If you have the full-size plot available on your drive, you can add it to your entry very easily: :: @@ -389,6 +389,19 @@ represented using the same scheme and should also ensure that the unc2 = Uncertainty("An asymmetric uncertainty", is_symmetric=False) unc2.values = [ (-0.08, +0.15), (-0.13, +0.20), (-0.18,+0.27) ] +Note that the sizes of the uncertainties define a natural scale for +the precision to which the central value should be represented (and in +an asymmetric pair, the larger component may naturally set the precision +of reporting for the smaller). In HEPData, any numerical values will be +displayed at full floating-point precision, so it is often desirable to +manually round the values and uncertainties in the submission, to achieve +a more readable final display. The ``hepdata_lib.helpers`` functions +``relative_round``, ``round_multiple``, ``round_value_and_uncertainty_arrs``, +``round_value_and_multiple_uncertainties_arrs`` ``round_value_and_uncertainty``, +``round_value_to_decimals`` and ``round_value_and_uncertainty_to_decimals`` +can be used to manipulate arrays and dicts of numerical data before +attachment to the Variable and Uncertainty objects. + After creating the Uncertainty objects, the only additional step is to attach them to the Variable: :: From 23e6a331187787926991f2d609fe985caad73ca1 Mon Sep 17 00:00:00 2001 From: Andy Buckley Date: Mon, 11 May 2026 16:58:36 +0100 Subject: [PATCH 10/10] Fixes in response to Clemens Lange's comments. Zero uncertainties on a value now preserve full value precision, unless a new optional arg is used to set a target sig figs for such cases --- hepdata_lib/helpers.py | 56 +++++++++++++++++++++++++++++++----------- tests/test_helpers.py | 17 +++++++------ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 719e936..fd6a2ee 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -318,7 +318,7 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): unc_orders = [get_number_size(u) for u in uncs] # base the nominal precision on the target number of sd's on the largest component if np.all(np.isnan(unc_orders)): - return uncs, [0 for u in uncs] + return uncs, unc_orders ptarget = -int(np.nanmax(unc_orders)) + sig_digits # customise the precisions for each component (if instructed to prevent rounding to zero) ptargets = [(max(ptarget, -uo+1) if no_round_to_zero else ptarget) for uo in unc_orders] @@ -330,13 +330,14 @@ def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): return newuncs, ptargets except TypeError: if uncs == 0: - return uncs, [0] + return uncs, [np.nan] unc_order = get_number_size(uncs) newunc = relative_round(uncs, sig_digits) return newunc, [-unc_order+sig_digits] -def round_value_and_uncertainty_arrs(vals, uncs, sig_digits_unc=2): +def round_value_and_uncertainty_arrs(vals, uncs, + sig_digits_unc=2, sig_digits_val_zero_unc=None): """ Round arrays of values and a single uncertainty source according to the precision of the uncertainty, row by row, and also round the @@ -364,24 +365,34 @@ def round_value_and_uncertainty_arrs(vals, uncs, sig_digits_unc=2): :param sig_digits_unc: how many significant digits used to round the uncertainty :type sig_digits_unc: integer + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None + :returns: modified (vals, uncs). Note that arguments are also modified in-place. """ sig_digits_unc = int(sig_digits_unc) + # loop over the bins, rounding each independently for i, (val, unc) in enumerate(zip(vals, uncs)): # Two possible types for unc: # - standard case for TH1 or TGraphErrors: uncertainty is a single value # - case for TGraphAsymmErrors: uncertainty is a tuple(elow, ehigh) # round_multiple handles both scalar and tuple in a transparent way uncs[i], uncprecisions = round_multiple(unc, sig_digits_unc, True) - valprecision = -get_number_size(val)+1 - vals[i] = round(val, max(int(np.nanmin(uncprecisions)), valprecision)) + if not np.all(np.isnan(uncprecisions)): + valprecision = -get_number_size(val)+1 + vals[i] = round(val, max(int(np.nanmin(uncprecisions)), valprecision)) + elif sig_digits_val_zero_unc is not None: + vals[i] = relative_round(val, sig_digits_val_zero_unc) + # else do nothing: keep full precision return vals, uncs -def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2): +def round_value_and_multiple_uncertainties_arrs(vals, unclists, + sig_digits_unc=2, sig_digits_val_zero_unc=None): """ Round values and multiple uncertainty sources according to the precision of the largest uncertainty, and also round each (asymm) uncertainty to a given number @@ -405,30 +416,41 @@ def round_value_and_multiple_uncertainties_arrs(vals, unclists, sig_digits_unc=2 :param sig_digits_unc: how many significant digits used to round the uncertainty :type sig_digits_unc: integer + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None + :returns: modified (vals, unclists). Note that arguments are also modified in-place. """ sig_digits_unc = int(sig_digits_unc) + # loop over the bins, rounding each independently for ipt, val in enumerate(vals): # the value precision will match that of the largest error, but start with this upper bound - valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) + #valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) # get the list of uncertainty sources for the i'th val uncs_ipt = [ul[ipt] for ul in unclists] # round each error source independently with their larger component getting the target sd's - minuncprecision = np.inf #< TODO: there's probably a less pessimistic int starting value! + minuncprecision = np.inf #< note float type: inf/nan -/-> int for iu, u in enumerate(uncs_ipt): u_rnd, uprecisions = round_multiple(u, sig_digits_unc, True) unclists[iu][ipt] = u_rnd - minuncprecision = int(np.nanmin(np.hstack((uprecisions, minuncprecision)))) + if not np.all(np.isnan(uprecisions)): + minuncprecision = np.nanmin(np.hstack((uprecisions, minuncprecision))) #< float! # round the value to match the precision of the largest error component - valprecision = min(minuncprecision, valprecision) - vals[ipt] = round(val, valprecision) - + #valprecision = min(minuncprecision, valprecision) + #vals[ipt] = round(val, valprecision) + if not np.isinf(minuncprecision): + vals[ipt] = round(val, int(minuncprecision)) + elif sig_digits_val_zero_unc is not None: + vals[ipt] = relative_round(val, sig_digits_val_zero_unc) + # else do nothing: keep full precision return vals, unclists -def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc=2): +def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", + sig_digits_unc=2, sig_digits_val_zero_unc=None): """ Round values and uncertainty according to the precision of the uncertainty, and also round uncertainty to a given number of significant digits, on a @@ -450,9 +472,14 @@ def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc= :param sig_digits_unc: how many significant digits used to round the uncertainty :type sig_digits_unc: integer + + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None """ #assert isinstance(cont, dict) - round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], sig_digits_unc) + round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], + sig_digits_unc, sig_digits_val_zero_unc) def round_value_to_decimals(cont, key="y", decimals=3): @@ -471,6 +498,7 @@ def round_value_to_decimals(cont, key="y", decimals=3): decimals = int(decimals) + # loop over the bins, rounding each independently for i, val in enumerate(cont[key]): if isinstance(val, tuple): cont[key][i] = (round(val[0], decimals), round(val[1], decimals)) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 3a0772e..af90e51 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -198,9 +198,9 @@ def test_round_value_and_uncertainty(self): # (container, key_for_values, key_for_uncertainties, significant_digits) # uncertainty has a single value cont = {"val": [1.23456, 1234.56, 0.0012345, 0.123], - "unc": [0.00123, 1.23, 0.012, 0.12], - "val_round": [1.2346, 1234.6, 0.001, 0.12], - "unc_round": [0.0012, 1.2, 0.012, 0.12]} + "unc": [0.00123, 1.23, 0.012, 0.0], + "val_round": [1.2346, 1234.6, 0.001, 0.123], + "unc_round": [0.0012, 1.2, 0.012, 0.0]} # round to two significant digits round_value_and_uncertainty(cont, "val", "unc", 2) self.assertTrue(cont["val"] == cont["val_round"]) @@ -223,14 +223,17 @@ def test_round_value_and_multiple_uncertainties_arrs(self): '''Test behavior of round_value_and_multiple_uncertainty_arrs function''' # Test for single-valued uncertainties - val = [1.23456, 1234.56, 0.0012345, 0.123] - unc = [[-0.00123, 1.23, 0.012, -0.12], [-0.123, -40.2124, 0.0000643, 0.03]] - val_round = [1.23, 1235.0, 0.001, 0.12] - unc_round = [[-0.0012, 1.2, 0.012, -0.12], [-0.12, -40.0, 6.4e-05, 0.03]] + val = [1.23456, 1234.56, 0.0012345, 0.123, 4.567] + unc = [[-0.00123, 1.23, 0.012, -0.12, 0.0], [-0.123, -40.2124, 0.0, 0.03, 0.0]] + val_round = [1.23, 1235.0, 0.001, 0.12, 4.567] + unc_round = [[-0.0012, 1.2, 0.012, -0.12, 0.0], [-0.12, -40.0, 0.0, 0.03, 0.0]] # round to two significant digits round_value_and_multiple_uncertainties_arrs(val, unc, 2) self.assertTrue(val == val_round) self.assertTrue(unc == unc_round) + round_value_and_multiple_uncertainties_arrs(val, unc, 2, 2) + val_round[-1] = 4.6 + self.assertTrue(val == val_round) # Test for pair-valued uncertainties val = [1.23456, 0.123]