diff --git a/docs/usage.rst b/docs/usage.rst index 67940e0..e8c85a6 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -278,7 +278,7 @@ One common use case with more than one independent Variable is that of correlati Adding a plot thumb nail to a table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -HEPData supports the addition of thumb nail images to each table. This makes it easier for the consumer of your entry to find what they are looking for, since they can simply look for the table that has the thumb nail of the plot they are interested in. +HEPData supports the addition of thumbnail images to each table. This makes it easier for the consumer of your entry to find what they are looking for, since they can simply look for the table that has the thumb nail of the plot they are interested in. If you have the full-size plot available on your drive, you can add it to your entry very easily: :: @@ -362,7 +362,23 @@ need to specify a license for a data table unless it differs from `CC0`_. Uncertainties +++++++++++++ -In many cases, you will want to give uncertainties on the central values provided in the Variable objects. Uncertainties can be *symmetric* or *asymmetric* (up and down variations of the central value either have the same or different magnitudes). For symmetric uncertainties, the values of the uncertainties are simply stored as a one-dimensional list. For asymmetric uncertainties, the up- and downward variations are stored as a list of two-component tuples: +In many cases, you will want to give uncertainties on the central +values provided in the Variable objects. Uncertainties can be +*symmetric* or *asymmetric*. For symmetric +uncertainties, the values of the uncertainties are stored as a +one-dimensional list of positive values, which are applied as +equal-magnitude positive and negative changes to the value. + +For asymmetric uncertainties, the uncertainties are expressed as a +*signed* two-component iterable (e.g. tuple or list): in general, this +pair represents the value changes in response to downward and upward +moves of a nuisance parameter, and so it is possible for both the "up" +and "down" variations to have the same sign (if the effect of the +nuisance is one-sided). Therefore both components should be computed +as ``variation_value - nominal_value`` such that negative variations +correctly acquire a minus sign; asymmetric statistical errors are +represented using the same scheme and should also ensure that the +"down" uncertainty has a negative sign. :: @@ -373,6 +389,19 @@ In many cases, you will want to give uncertainties on the central values provide unc2 = Uncertainty("An asymmetric uncertainty", is_symmetric=False) unc2.values = [ (-0.08, +0.15), (-0.13, +0.20), (-0.18,+0.27) ] +Note that the sizes of the uncertainties define a natural scale for +the precision to which the central value should be represented (and in +an asymmetric pair, the larger component may naturally set the precision +of reporting for the smaller). In HEPData, any numerical values will be +displayed at full floating-point precision, so it is often desirable to +manually round the values and uncertainties in the submission, to achieve +a more readable final display. The ``hepdata_lib.helpers`` functions +``relative_round``, ``round_multiple``, ``round_value_and_uncertainty_arrs``, +``round_value_and_multiple_uncertainties_arrs`` ``round_value_and_uncertainty``, +``round_value_to_decimals`` and ``round_value_and_uncertainty_to_decimals`` +can be used to manipulate arrays and dicts of numerical data before +attachment to the Variable and Uncertainty objects. + After creating the Uncertainty objects, the only additional step is to attach them to the Variable: :: diff --git a/hepdata_lib/helpers.py b/hepdata_lib/helpers.py index 9e35973..fd6a2ee 100644 --- a/hepdata_lib/helpers.py +++ b/hepdata_lib/helpers.py @@ -6,6 +6,9 @@ import math import numpy as np + +## File and command functions + def execute_command(command): """ Execute shell command using subprocess. @@ -37,6 +40,65 @@ def execute_command(command): raise RuntimeError(result) return True + +def convert_pdf_to_png(source, target): + """ + Wrapper for the ImageMagick convert utility. + + :param source: Source file in PDF format. + :type source: str + :param target: Output file in PNG format. + :type target: str + """ + assert os.path.exists(source), f"Source file does not exist: {source}" + + command = f"convert -flatten -density 300 -fuzz 1% -trim +repage {source} {target}" + command_ok = execute_command(command) + if not command_ok: + print("ImageMagick does not seem to be installed \ + or is not in the path - not adding any images.") + + +def convert_png_to_thumbnail(source, target): + """ + Wrapper for the ImageMagick convert utility in thumbnail mode. + + :param source: Source file in PNG format. + :type source: str + :param target: Output thumbnailfile in PNG format. + :type target: str + """ + + command = f"convert -thumbnail 240x179 {source} {target}" + command_ok = execute_command(command) + + if not command_ok: + print("ImageMagick does not seem to be installed \ + or is not in the path - not adding any images.") + + +def file_is_outdated(file_path, reference_file_path): + """ + Check if the given file is outdated compared to the reference file. + + Also returns true if the reference file does not exist. + + :param file_path: Path to the file to check. + :type file_path: str + :param reference_file_path: Path to the reference file. + :type reference_file_path: str + """ + if not os.path.exists(reference_file_path): + raise RuntimeError(f"Reference file does not exist: {reference_file_path}") + if not os.path.exists(file_path): + return True + + modification_outdated = os.path.getmtime(file_path) < os.path.getmtime(reference_file_path) + change_outdated = os.path.getctime(file_path) < os.path.getctime(reference_file_path) + + return modification_outdated | change_outdated + + def find_all_matching(path, pattern): """Utility function that works like 'find' in bash.""" if not os.path.exists(path): @@ -49,10 +111,68 @@ def find_all_matching(path, pattern): return result +def check_file_existence(path_to_file): + """ + Check that the given file path exists. + If not, raise RuntimeError. + + :param path_to_file: File path to check. + :type path_to_file: string + """ + if not os.path.exists(path_to_file): + raise RuntimeError("Cannot find file: " + path_to_file) + return True + + +def check_file_size(path_to_file, upper_limit=None, lower_limit=None): + """ + Check that the file size is between the upper and lower limits. + If not, raise RuntimeError. + + :param path_to_file: File path to check. + :type path_to_file: string + + :param upper_limit: Upper size limit in MB. + :type upper_limit: float + + :param lower_limit: Lower size limit in MB. + :type lower_limit: float + """ + size = 1e-6 * os.path.getsize(path_to_file) + if upper_limit and size > upper_limit: + raise RuntimeError(f"File too big: '{path_to_file}'. Maximum allowed value is {upper_limit}" + + "MB.") + if lower_limit and size < lower_limit: + raise RuntimeError(f"File too small: '{path_to_file}'." + + f"Minimal allowed value is {lower_limit} MB.") + + + +## Value type, formatting and numerical-precision functions + +def sanitize_value(value): + """ + Handle conversion of input types for internal storage. + + :param value: User-side input value to sanitize. + :type value: string, int, NoneType, or castable to float + + Strings, integers and None are left alone, + everything else is converted to float. + """ + if isinstance(value, str): + return value + if isinstance(value, int): + return value + if value is None: + return value + return float(value) + + def get_number_precision(value): """ - Get precision of an input value. - Exact integer powers of 10 are assigned same precision of smaller numbers + Get the scale of an input value, i.e. its rounded-up power of 10. + Exact integer powers of 10 are assigned the same scale/precision as smaller numbers For example get_number_precision(10.0) = 1 get_number_precision(10.001) = 2 @@ -71,32 +191,46 @@ def get_number_precision(value): return math.ceil(math.log10(abs(value))) -def relative_round(value, relative_digits): - """Rounds to a given relative precision""" +def get_number_size(value, rtn_for_zero=float("nan")): + """A near synonym for get_number_precision, with an optional + argument to return for values equal to zero (and hence with no + well-defined order of magnitude). - if isinstance(value, tuple): - return tuple(relative_round(x, relative_digits) for x in value) + This feature is provided since returning 0 or 0.0 can lead to + over-rounding if one uncertainty component is zero. The default + value is NaN, but e.g. None or -float("inf") might sometimes be + more appropriate. - if value == 0 or isinstance(value, str) or np.isnan(value) or np.isinf(value): - return value + :param value: number to evaluate + :type value: float or tuple[float] - value_precision = get_number_precision(value) - absolute_digits = -value_precision + relative_digits # pylint: disable=invalid-unary-operand-type + :returns: order of magnitude (rounded-up power of 10) of ``value``, + normally integer except in the zero-value failure mode - return round(value, int(absolute_digits)) + """ + + # handle tuples like get_number_precision does + if isinstance(value, tuple): + return tuple(get_number_size(x, rtn_for_zero) for x in value) + + if value == 0: + return rtn_for_zero + + return get_number_precision(value) def get_value_precision_wrt_reference(value, reference): """ - relative precision of first argument with respect to the second one - value and reference are both float and/or int - value can be float when reference is an int and viceversa + Get the relative precision (scale) of the first argument with respect to the second one + + ``value`` and ``reference`` are both float and/or int + ``value`` can be float when reference is an int and vice-versa - : param value: first value - : type value: float, int + :param value: first value + :type value: float, int - : param reference: reference value (usually the uncertainty on value) - : type reference: float, int + :param reference: reference value (usually the uncertainty on value) + :type reference: float, int """ this_function = "get_value_precision_wrt_reference()" @@ -111,235 +245,307 @@ def get_value_precision_wrt_reference(value, reference): return get_number_precision(value) - get_number_precision(reference) -def round_value_to_decimals(cont, key="y", decimals=3): +def get_value_size_wrt_reference(value, reference, size_for_zero=float("nan")): """ - round all values in a dictionary to some decimals in one go - default round to 3 digits after period - possible use case: correlations where typical values are within -1,1 + Like the get_value_precision_wrt_reference but calling get_number_size + rather than get_number_precision, and with the optional zero-return + option of the former passed to the size-assessing function calls. + + ``value`` and ``reference`` are both float and/or int + ``value`` can be float when reference is an int and vice-versa - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + :param value: first value + :type value: float, int - : param decimals: how many decimals for the rounding - : type decimals: integer + :param reference: reference value (usually the uncertainty on value) + :type reference: float, int + + :param size_for_zero: the size value to be used for zero-valued ``value`` or ``reference`` + :type size_for_zero: float, int """ - decimals = int(decimals) + this_function = "get_value_size_wrt_reference()" + good_types = [int, float] + arguments = [value, reference] - for i, val in enumerate(cont[key]): - if isinstance(val, tuple): - cont[key][i] = (round(val[0], decimals), round(val[1], decimals)) - else: - cont[key][i] = round(val, decimals) + # first check all arguments have appropriate type + for input_arg in arguments: + if not any(isinstance(input_arg, x) for x in good_types): + raise ValueError("Unsupported input type passed to " + this_function) + return get_number_size(value, size_for_zero) - get_number_size(reference, size_for_zero) -def round_value_and_uncertainty_to_decimals(cont, val_key="y", unc_key="dy", decimals=3): - """ - round values and uncertainty to some decimals - default round to 3 digits after period - possible use case: correlations where typical values are within -1,1 - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary +def relative_round(value, relative_digits): + """Rounds to a given relative precision""" - : param decimals: how many decimals for the rounding - : type decimals: integer - """ + if isinstance(value, tuple): + return tuple(relative_round(x, relative_digits) for x in value) - decimals = int(decimals) + if value == 0 or isinstance(value, str) or np.isnan(value) or np.isinf(value): + return value - for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): - cont[val_key][i] = round(val, decimals) - if isinstance(unc, tuple): - cont[unc_key][i] = (round(unc[0], decimals), round(unc[1], decimals)) - else: - cont[unc_key][i] = round(unc, decimals) + value_precision = get_number_precision(value) + absolute_digits = -value_precision + relative_digits # pylint: disable=invalid-unary-operand-type + return round(value, int(absolute_digits)) -def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", sig_digits_unc=2): + +def round_multiple(uncs, sig_digits=2, no_round_to_zero=True): """ - round values and uncertainty according to the precision of the uncertainty, - and also round uncertainty to a given number of significant digits - Typical usage: + Round a collection of values to the precision required for the given sd's to + appear on the larger uncertainty component e.g. +1.3456 -0.2345 @ 2sf --> +1.3 -0.2 - reader = RootFileReader("rootfile.root") - data = reader.read_hist_1d("histogramName") - round_value_and_uncertainty(data,"y","dy",2) + Mainly designed for handling uncertainty (particularly an asymmetric +/- pair), + but written to be more generally usable. A passed single number will be handled + transparently, without wrapping in an iterable. + + :param uncs: iterable of values (primarily uncertainties) + :type uncs: float or iterable[float] - will round data["y"] to match the precision of data["dy"] for each element, after - rounding each element of data["dy"] to 2 significant digits - e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 + :param sig_digits: how many significant digits on the leading component + :type sig_digits: integer - : param cont : dictionary as returned e.g. by RootFileReader::read_hist_1d() - : type cont : dictionary + :param no_round_to_zero: if true, ensure always at least one sd per component + :type no_round_to_zero: bool - : param sig_digits_unc: how many significant digits used to round the uncertainty - : type sig_digits_unc: integer + :returns: float or list/tuple[float]) of rounded values and a list of the digit + precisions used for each component (this is a list even for scalar + ``uncs``; note that it can contain NaNs due to zero-valued components) """ + try: #< if this fails, uncs isn't iterable -> fall back to scalar + # get orders of magnitude of each component + unc_orders = [get_number_size(u) for u in uncs] + # base the nominal precision on the target number of sd's on the largest component + if np.all(np.isnan(unc_orders)): + return uncs, unc_orders + ptarget = -int(np.nanmax(unc_orders)) + sig_digits + # customise the precisions for each component (if instructed to prevent rounding to zero) + ptargets = [(max(ptarget, -uo+1) if no_round_to_zero else ptarget) for uo in unc_orders] + # do the (maybe custom) rounding + newuncs = [round(u, ptargets[i]) for (i, u) in enumerate(uncs)] + # return as a tuple if the input was a tuple (for ROOT use-case & test consistency) + if isinstance(uncs, tuple): + newuncs = tuple(newuncs) + return newuncs, ptargets + except TypeError: + if uncs == 0: + return uncs, [np.nan] + unc_order = get_number_size(uncs) + newunc = relative_round(uncs, sig_digits) + return newunc, [-unc_order+sig_digits] + + +def round_value_and_uncertainty_arrs(vals, uncs, + sig_digits_unc=2, sig_digits_val_zero_unc=None): + """ + Round arrays of values and a single uncertainty source according to + the precision of the uncertainty, row by row, and also round the + uncertainties to a given number of significant digits. - sig_digits_unc = int(sig_digits_unc) + Named with the _arrs suffix as the pre-existing, canonically named + versions operate on dicts from the ROOT reader. - for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): - if isinstance(unc, tuple): - # case for TGraphAsymmErrors with unc = (elow,ehigh), the central value is rounded - # using the significant digits of the largest of the two uncertainties, - # the smaller uncertainty would be rounded accordingly (at least 1 digit) - # usually lower and higher uncertainties will be of the same order of magnitude - # or at most different by 1 order (like +0.132 -0.083), in which case, - # if choosing 2 significant digits, the rounding should result in +0.13 -0.08 - max_absunc = 0.0 - index_min_unc = 0 - # set default precision for both sides of uncertainty - sig_digits_unc_ntuple = [sig_digits_unc, sig_digits_unc] - if abs(unc[0]) < abs(unc[1]): - max_absunc = abs(unc[1]) - index_min_unc = 0 - relative_precision = get_value_precision_wrt_reference(unc[0], unc[1]) - else: - max_absunc = abs(unc[0]) - index_min_unc = 1 - relative_precision = get_value_precision_wrt_reference(unc[1], unc[0]) - # update precision on smaller uncertainty (at least 1 significant digit) - sig_digits_unc_ntuple[index_min_unc] = int(max(1, sig_digits_unc + relative_precision)) - cont[unc_key][i] = (relative_round(unc[0], sig_digits_unc_ntuple[0]), - relative_round(unc[1], sig_digits_unc_ntuple[1])) - val_precision = get_value_precision_wrt_reference(val, max_absunc) - sig_digits_val = int(sig_digits_unc + val_precision) - cont[val_key][i] = relative_round(val, sig_digits_val) - else: - # standard case for TH1 or TGraphErrors, uncertainty is a single value - cont[unc_key][i] = relative_round(unc, sig_digits_unc) - val_precision = get_value_precision_wrt_reference(val, unc) - sig_digits_val = int(sig_digits_unc + val_precision) - cont[val_key][i] = relative_round(val, sig_digits_val) + Operates directly on matched lists of values and uncertainties. + Tuple-valued uncertainty entries are assumed to be a +- asymm pair + for that data point, and the larger is used to define the reference + precision. + This will round each ``val`` to match the precision of its corresponding + ``unc``, after rounding each element of ``unc`` to 2 significant digits + e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value + will always be reported, though 100% errors are not commonly published. -def check_file_existence(path_to_file): - """ - Check that the given file path exists. - If not, raise RuntimeError. + :param vals: y values + :type vals: iterable of float - :param path_to_file: File path to check. - :type path_to_file: string + :param uncs: y uncertainty values + :type uncs: iterable of float or tuple[float] + + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer + + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None + + :returns: modified (vals, uncs). Note that arguments are also modified in-place. """ - if not os.path.exists(path_to_file): - raise RuntimeError("Cannot find file: " + path_to_file) - return True -def check_file_size(path_to_file, upper_limit=None, lower_limit=None): + sig_digits_unc = int(sig_digits_unc) + + # loop over the bins, rounding each independently + for i, (val, unc) in enumerate(zip(vals, uncs)): + # Two possible types for unc: + # - standard case for TH1 or TGraphErrors: uncertainty is a single value + # - case for TGraphAsymmErrors: uncertainty is a tuple(elow, ehigh) + # round_multiple handles both scalar and tuple in a transparent way + uncs[i], uncprecisions = round_multiple(unc, sig_digits_unc, True) + if not np.all(np.isnan(uncprecisions)): + valprecision = -get_number_size(val)+1 + vals[i] = round(val, max(int(np.nanmin(uncprecisions)), valprecision)) + elif sig_digits_val_zero_unc is not None: + vals[i] = relative_round(val, sig_digits_val_zero_unc) + # else do nothing: keep full precision + + return vals, uncs + + +def round_value_and_multiple_uncertainties_arrs(vals, unclists, + sig_digits_unc=2, sig_digits_val_zero_unc=None): """ - Check that the file size is between the upper and lower limits. - If not, raise RuntimeError. + Round values and multiple uncertainty sources according to the precision of the + largest uncertainty, and also round each (asymm) uncertainty to a given number + of significant digits. - :param path_to_file: File path to check. - :type path_to_file: string + Named with the _arrs suffix as the pre-existing, canonically named + versions operate on dicts from the ROOT reader. - :param upper_limit: Upper size limit in MB. - :type upper_limit: float + The rounding of each error source is performed independently, with at least one + sd always shown. The smallest precision encountered in the error set (i.e. the + largest uncertainty component) is used to define the precision of the value's + rounding. At least one sd of the value will always be reported, though 100% errors + are not commonly published. - :param lower_limit: Lower size limit in MB. - :type lower_limit: float - """ - size = 1e-6 * os.path.getsize(path_to_file) - if upper_limit and size > upper_limit: - raise RuntimeError(f"File too big: '{path_to_file}'. Maximum allowed value is {upper_limit}" - + "MB.") - if lower_limit and size < lower_limit: - raise RuntimeError(f"File too small: '{path_to_file}'." - + f"Minimal allowed value is {lower_limit} MB.") + :param vals: y values + :type vals: iterable of float + :param unclists: iterable of y uncertainty values separated by source + :type unclists: iterable of iterable of float or tuple[float] -def any_uncertainties_nonzero(uncertainties, size): + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer + + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None + + :returns: modified (vals, unclists). Note that arguments are also modified in-place. """ - Return a mask of bins where any of the uncertainties is nonzero. + + sig_digits_unc = int(sig_digits_unc) + + # loop over the bins, rounding each independently + for ipt, val in enumerate(vals): + # the value precision will match that of the largest error, but start with this upper bound + #valprecision = max(-get_number_size(val)+sig_digits_unc, sig_digits_unc) + # get the list of uncertainty sources for the i'th val + uncs_ipt = [ul[ipt] for ul in unclists] + # round each error source independently with their larger component getting the target sd's + minuncprecision = np.inf #< note float type: inf/nan -/-> int + for iu, u in enumerate(uncs_ipt): + u_rnd, uprecisions = round_multiple(u, sig_digits_unc, True) + unclists[iu][ipt] = u_rnd + if not np.all(np.isnan(uprecisions)): + minuncprecision = np.nanmin(np.hstack((uprecisions, minuncprecision))) #< float! + # round the value to match the precision of the largest error component + #valprecision = min(minuncprecision, valprecision) + #vals[ipt] = round(val, valprecision) + if not np.isinf(minuncprecision): + vals[ipt] = round(val, int(minuncprecision)) + elif sig_digits_val_zero_unc is not None: + vals[ipt] = relative_round(val, sig_digits_val_zero_unc) + # else do nothing: keep full precision + return vals, unclists + + +def round_value_and_uncertainty(cont, val_key="y", unc_key="dy", + sig_digits_unc=2, sig_digits_val_zero_unc=None): """ - nonzero = np.zeros(size, dtype=bool) + Round values and uncertainty according to the precision of the uncertainty, + and also round uncertainty to a given number of significant digits, on a + dictionary of values and uncertainties like that returned by RootFileReader. - for unc in uncertainties: + Typical usage:: - # Treat one-sided uncertainties as - tmp = 0 if unc.is_symmetric else (0,0) - values = np.array([tmp if v is None else v for v in unc.values]) - values[values.astype(str)==''] = 0 - values = values.astype(float) + reader = RootFileReader("rootfile.root") + data = reader.read_hist_1d("histogramName") + round_value_and_uncertainty(data,"y","dy",2) - if unc.is_symmetric: - nonzero = nonzero | (values != 0) - else: - nonzero = nonzero | np.any(values != 0,axis=1) - return nonzero + will round ``data["y"]`` to match the precision of ``data["dy"]`` for each + element, after rounding each element of ``data["dy"]`` to 2 significant digits + e.g. 26.5345 +/- 1.3456 --> 26.5 +/- 1.3 . At least one sd of the value + will always be reported, though 100% errors are not commonly published. -def sanitize_value(value): - """ - Handle conversion of input types for internal storage. + :param cont: dictionary as returned e.g. by ``RootFileReader::read_hist_1d()`` + :type cont: dictionary - :param value: User-side input value to sanitize. - :type value: string, int, NoneType, or castable to float + :param sig_digits_unc: how many significant digits used to round the uncertainty + :type sig_digits_unc: integer - Strings, integers and None are left alone, - everything else is converted to float. + :param sig_digits_val_zero_unc: how many significant digits used to round a value + if its uncertainty is zero. None -> no rounding + :type sig_digits_val_zero_unc: integer or None """ - if isinstance(value,str): - return value - if isinstance(value,int): - return value - if value is None: - return value - return float(value) + #assert isinstance(cont, dict) + round_value_and_uncertainty_arrs(cont[val_key], cont[unc_key], + sig_digits_unc, sig_digits_val_zero_unc) -def convert_pdf_to_png(source, target): +def round_value_to_decimals(cont, key="y", decimals=3): """ - Wrapper for the ImageMagick convert utility. + Round all values in a dictionary to some decimals in one go. - :param source: Source file in PDF format. - :type source: str - :param target: Output file in PNG format. - :type target: str + The default is to round to 3 digits after the period. + Possible use case: correlations where typical values are within -1,1 + + :param cont: dictionary as returned e.g. by RootFileReader::read_hist_1d() + :type cont: dictionary + + :param decimals: how many decimals for the rounding + :type decimals: integer """ - assert os.path.exists(source), f"Source file does not exist: {source}" - command = f"convert -flatten -density 300 -fuzz 1% -trim +repage {source} {target}" - command_ok = execute_command(command) - if not command_ok: - print("ImageMagick does not seem to be installed \ - or is not in the path - not adding any images.") + decimals = int(decimals) + # loop over the bins, rounding each independently + for i, val in enumerate(cont[key]): + if isinstance(val, tuple): + cont[key][i] = (round(val[0], decimals), round(val[1], decimals)) + else: + cont[key][i] = round(val, decimals) -def convert_png_to_thumbnail(source, target): - """ - Wrapper for the ImageMagick convert utility in thumbnail mode. - :param source: Source file in PNG format. - :type source: str - :param target: Output thumbnailfile in PNG format. - :type target: str +def round_value_and_uncertainty_to_decimals(cont, val_key="y", unc_key="dy", decimals=3): """ + Round values and uncertainty to some decimals. - command = f"convert -thumbnail 240x179 {source} {target}" - command_ok = execute_command(command) + The default is to round to 3 digits after the period. + Possible use case: correlations where typical values are within -1,1 - if not command_ok: - print("ImageMagick does not seem to be installed \ - or is not in the path - not adding any images.") + :param cont: dictionary as returned e.g. by RootFileReader::read_hist_1d() + :type cont: dictionary -def file_is_outdated(file_path, reference_file_path): + :param decimals: how many decimals for the rounding + :type decimals: integer """ - Check if the given file is outdated compared to the reference file. - Also returns true if the reference file does not exist. + decimals = int(decimals) - :param file_path: Path to the file to check. - :type file_path: str - :param reference_file_path: Path to the reference file. - :type reference_file_path: str + for i, (val, unc) in enumerate(zip(cont[val_key], cont[unc_key])): + cont[val_key][i] = round(val, decimals) + if isinstance(unc, tuple): + cont[unc_key][i] = (round(unc[0], decimals), round(unc[1], decimals)) + else: + cont[unc_key][i] = round(unc, decimals) + + +def any_uncertainties_nonzero(uncertainties, size): """ - if not os.path.exists(reference_file_path): - raise RuntimeError(f"Reference file does not exist: {reference_file_path}") - if not os.path.exists(file_path): - return True + Return a mask of bins where any of the uncertainties is nonzero. + """ + nonzero = np.zeros(size, dtype=bool) - modification_outdated = os.path.getmtime(file_path) < os.path.getmtime(reference_file_path) - change_outdated = os.path.getctime(file_path) < os.path.getctime(reference_file_path) + for unc in uncertainties: - return modification_outdated | change_outdated + # Treat one-sided uncertainties as + tmp = 0 if unc.is_symmetric else (0,0) + values = np.array([tmp if v is None else v for v in unc.values]) + values[values.astype(str)==''] = 0 + values = values.astype(float) + + if unc.is_symmetric: + nonzero = nonzero | (values != 0) + else: + nonzero = nonzero | np.any(values != 0,axis=1) + return nonzero diff --git a/tests/test_helpers.py b/tests/test_helpers.py index d04e99d..af90e51 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -5,14 +5,20 @@ import numpy as np from hepdata_lib.helpers import relative_round +from hepdata_lib.helpers import round_multiple from hepdata_lib.helpers import get_number_precision +from hepdata_lib.helpers import get_number_size from hepdata_lib.helpers import get_value_precision_wrt_reference +from hepdata_lib.helpers import get_value_size_wrt_reference +from hepdata_lib.helpers import round_value_and_uncertainty_arrs +from hepdata_lib.helpers import round_value_and_multiple_uncertainties_arrs from hepdata_lib.helpers import round_value_and_uncertainty from hepdata_lib.helpers import file_is_outdated class TestHelpers(TestCase): """Test the helper functions.""" + def test_relative_round(self): '''Test behavior of Variable.scale_values function''' # Some values are mapped onto themselves @@ -68,6 +74,51 @@ def test_get_number_precision(self): self.assertTrue(precisions == target_precisions) + def test_get_number_size(self): + '''Test behavior of get_number_size function''' + + # Check that zero handling works + self.assertTrue(np.isnan(get_number_size(0))) + self.assertTrue(np.isnan(get_number_size(0.0))) + self.assertTrue(get_number_size(0.0, None) is None) + + # Self-mappings should work as for get_number_precision + self.assertTrue(get_number_size(np.inf) == np.inf) + self.assertTrue(get_number_size("astring") == "astring") + + # Remaining tests duplicate those for get_number_precision: + + # test case with single value + # test format is (original value, size) + values = [ + (12.5, 2), + (1.25, 1), + (0.125, 0), + (0.0125, -1) + ] + for value, prec in values: + precision = get_number_size(value) + self.assertTrue(precision == prec) + + # test case with ntuple (e.g. with two values) + # test format is (original value, precision) + # both original value and precision are ntuples (with two elements) + ntuples = { + (12.5, 1.25) : (2, 1), + (0.125, 0.0125) : (0, -1) + } + for original_values, target_precisions in ntuples.items(): + precisions = get_number_size(original_values) + self.assertTrue(precisions == target_precisions) + + + def test_round_multiple(self): + '''Test behavior of the multiple-rounding function''' + + self.assertTrue(round_multiple([1.236890, 0.123324, 10.375477, 0.0003345], 3) + == ([1.2, 0.1, 10.4, 0.0003], [1, 1, 1, 4])) + + def test_get_value_precision_wrt_reference(self): '''Test behavior of get_value_precision_wrt_reference function''' @@ -91,24 +142,73 @@ def test_get_value_precision_wrt_reference(self): get_value_precision_wrt_reference("bad", (1.2, 3.4)) + def test_get_value_size_wrt_reference(self): + '''Test behavior of get_value_size_wrt_reference function''' + + self.assertTrue(np.isnan(get_value_size_wrt_reference(0.0, 0.1))) + self.assertTrue(get_value_size_wrt_reference(0.01, 0, -np.inf) == np.inf) + + # test format is (value, reference, relative precision) + values = [ + (12.5, 0.08, 3), + (1.25, 102.4, -2), + (10.0, 9, 0) + ] + for val, ref, prec in values: + precision = get_value_size_wrt_reference(val, ref) + self.assertTrue(precision == prec) + + # test wrong input type + with self.assertRaises(ValueError): + get_value_size_wrt_reference(1.23, "bad") + with self.assertRaises(ValueError): + get_value_size_wrt_reference(1.23, (1.2, 3.4)) + with self.assertRaises(ValueError): + get_value_size_wrt_reference("bad", (1.2, 3.4)) + + + def test_round_value_and_uncertainty_arrs(self): + '''Test behavior of round_value_and_uncertainty_arrs function''' + + # Test for single-valued uncertainties + val = [1.23456, 1234.56, 0.0012345, 0.123] + unc = [0.00123, 1.23, 0.012, 0.12] + val_round = [1.2346, 1234.6, 0.001, 0.12] + unc_round = [0.0012, 1.2, 0.012, 0.12] + # round to two significant digits + round_value_and_uncertainty_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + # Test for pair-valued uncertainties + val = [1.23456, 0.123] + unc = [(0.00123, 0.0123), (0.012, 0.12)] + val_round = [1.235, 0.12] + unc_round = [(0.001, 0.012), (0.01, 0.12)] + # round to two significant digits + round_value_and_uncertainty_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + def test_round_value_and_uncertainty(self): '''Test behavior of round_value_and_uncertainty function''' # Test format is - # (container, key_for_values, key_for_uncertanties, significant_digits) + # (container, key_for_values, key_for_uncertainties, significant_digits) # uncertainty has a single value cont = {"val": [1.23456, 1234.56, 0.0012345, 0.123], - "unc": [0.00123, 1.23, 0.012, 0.12], - "val_round": [1.2346, 1234.6, 0.001, 0.12], - "unc_round": [0.0012, 1.2, 0.012, 0.12]} + "unc": [0.00123, 1.23, 0.012, 0.0], + "val_round": [1.2346, 1234.6, 0.001, 0.123], + "unc_round": [0.0012, 1.2, 0.012, 0.0]} # round to two significant digits round_value_and_uncertainty(cont, "val", "unc", 2) self.assertTrue(cont["val"] == cont["val_round"]) self.assertTrue(cont["unc"] == cont["unc_round"]) # Test format is - # (container, key_for_values, key_for_uncertanties, significant_digits) - # uncertainty has two value, as it would be the case with TGraphAsymmErrors + # (container, key_for_values, key_for_uncertainties, significant_digits) + # uncertainty has two values, as it would be the case with TGraphAsymmErrors cont_asymm_err = {"val": [1.23456, 0.123], "unc": [(0.00123, 0.0123), (0.012, 0.12)], "val_round": [1.235, 0.12], @@ -118,6 +218,34 @@ def test_round_value_and_uncertainty(self): self.assertTrue(cont_asymm_err["val"] == cont_asymm_err["val_round"]) self.assertTrue(cont_asymm_err["unc"] == cont_asymm_err["unc_round"]) + + def test_round_value_and_multiple_uncertainties_arrs(self): + '''Test behavior of round_value_and_multiple_uncertainty_arrs function''' + + # Test for single-valued uncertainties + val = [1.23456, 1234.56, 0.0012345, 0.123, 4.567] + unc = [[-0.00123, 1.23, 0.012, -0.12, 0.0], [-0.123, -40.2124, 0.0, 0.03, 0.0]] + val_round = [1.23, 1235.0, 0.001, 0.12, 4.567] + unc_round = [[-0.0012, 1.2, 0.012, -0.12, 0.0], [-0.12, -40.0, 0.0, 0.03, 0.0]] + # round to two significant digits + round_value_and_multiple_uncertainties_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + round_value_and_multiple_uncertainties_arrs(val, unc, 2, 2) + val_round[-1] = 4.6 + self.assertTrue(val == val_round) + + # Test for pair-valued uncertainties + val = [1.23456, 0.123] + unc = [[(-0.00123, 0.0123), (-0.012, 0.12)], [(0.223, 0.0456), (0.00012, -0.0012)]] + val_round = [1.23, 0.12] + unc_round = [[(-0.001, 0.012), (-0.01, 0.12)], [(0.22, 0.05), (0.0001, -0.0012)]] + # round to two significant digits + round_value_and_multiple_uncertainties_arrs(val, unc, 2) + self.assertTrue(val == val_round) + self.assertTrue(unc == unc_round) + + def test_file_is_outdated(self): '''Test behavior of file_is_outdated function''' with self.assertRaises(RuntimeError):