From 6ed040695c410cd3c624958fc72988fa1ad1eea9 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 9 Feb 2022 16:24:27 +0000 Subject: [PATCH 01/20] changes for CORDEX --- ukcp_dp/constants.py | 1 + ukcp_dp/data_extractor/_data_extractor.py | 20 ++++++ ukcp_dp/validator/_validator.py | 3 + ukcp_dp/vocab_manager/_vocab.py | 79 +++++++++++++++++++++++ 4 files changed, 103 insertions(+) diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py index f43a6eb..b098188 100644 --- a/ukcp_dp/constants.py +++ b/ukcp_dp/constants.py @@ -180,6 +180,7 @@ def enum(**named_values): COLLECTION_GCM = "land-gcm" COLLECTION_RCM = "land-rcm" COLLECTION_RCM_MIN_YEAR = 1980 +COLLECTION_RCM_CORDEX = "cordex" COLLECTION_RCM_GWL = "land-rcm-gwl" COLLECTION_MARINE = "marine-sim" COLLECTION_MARINE_MIN_YEAR = 2007 diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py index 7ee378c..90d156c 100644 --- a/ukcp_dp/data_extractor/_data_extractor.py +++ b/ukcp_dp/data_extractor/_data_extractor.py @@ -12,6 +12,7 @@ from iris.util import unify_time_units import cf_units +import numpy as from ukcp_dp.constants import ( COLLECTION_PROB, InputType, @@ -266,6 +267,25 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal iris.experimental.equalise_cubes.equalise_attributes(cubes) unify_time_units(cubes) + if collection == COLLECTION_RCM: + # we need to update the type of ensemble_member_id in order to be able to + # process Met Office and CORDEX data together + for cube in cubes: + for ind, aux_coord in enumerate(cube.aux_coords): + if aux_coord.var_name == "ensemble_member_id": + if aux_coord.dtype == np.dtype(" Date: Wed, 9 Feb 2022 16:33:14 +0000 Subject: [PATCH 02/20] fix import typo --- ukcp_dp/data_extractor/_data_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py index 90d156c..dfe9ccd 100644 --- a/ukcp_dp/data_extractor/_data_extractor.py +++ b/ukcp_dp/data_extractor/_data_extractor.py @@ -12,7 +12,7 @@ from iris.util import unify_time_units import cf_units -import numpy as +import numpy as np from ukcp_dp.constants import ( COLLECTION_PROB, InputType, From 9a7a90edb565f56116d10143a2fe6e81e3efe3c5 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 10 Feb 2022 08:32:48 +0000 Subject: [PATCH 03/20] fix issue with file finder introduced with COLLECTION_OBS --- ukcp_dp/file_finder/_file_finder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ukcp_dp/file_finder/_file_finder.py b/ukcp_dp/file_finder/_file_finder.py index c7d64df..0df2fd3 100644 --- a/ukcp_dp/file_finder/_file_finder.py +++ b/ukcp_dp/file_finder/_file_finder.py @@ -74,12 +74,12 @@ def get_file_lists(input_data): ]: file_list["main"] = _get_cm_file_list(input_data) - elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS: - file_list["main"] = get_obs_file_list(input_data) - if input_data.get_value(InputType.BASELINE) is not None: file_list["baseline"] = _get_file_list_for_baseline(input_data) + elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS: + file_list["main"] = get_obs_file_list(input_data) + # the file list for an overlay of probability levels if ( input_data.get_value(InputType.OVERLAY_PROBABILITY_LEVELS) is not None From e781059b35162552e89c6f1cd5526c0d53140307 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 4 May 2022 16:01:17 +0100 Subject: [PATCH 04/20] Before concatenating cubes remove "month_number" from seasonal RCM data --- ukcp_dp/data_extractor/_data_extractor.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py index dfe9ccd..5af36df 100644 --- a/ukcp_dp/data_extractor/_data_extractor.py +++ b/ukcp_dp/data_extractor/_data_extractor.py @@ -286,6 +286,17 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal cube.add_aux_coord(ensemble_coord, ind) break + # the UKCP regional seasonal data has month_number, lets remove it to + # match CORDEX + if ( + self.input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + == TemporalAverageType.SEASONAL + ): + try: + cube.remove_coord("month_number") + except iris.exceptions.CoordinateNotFoundError: + pass + try: cube = cubes.concatenate_cube() except iris.exceptions.ConcatenateError as ex: From 6ea747b71ba4d9fd0ffdda9b074a3ed1cb124a86 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 4 May 2022 16:01:52 +0100 Subject: [PATCH 05/20] CORDEX ensemble members are now 100-177 --- ukcp_dp/vocab_manager/_vocab.py | 80 +-------------------------------- 1 file changed, 1 insertion(+), 79 deletions(-) diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index 2767db8..8c4a783 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -497,85 +497,7 @@ def _get_range(min_value, max_value): "26": ["26"], "27": ["27"], "28": ["28"], - "cordex": [ - "29", - "30", - "31", - "32", - "33", - "AA", - "AB", - "AC", - "AD", - "AE", - "AF", - "AG", - "AH", - "AI", - "AJ", - "0A", - "0B", - "0C", - "0D", - "0E", - "0F", - "0G", - "0H", - "0I", - "0J", - "BB", - "BC", - "BD", - "BE", - "BF", - "BG", - "BH", - "CC", - "CE", - "CF", - "CG", - "DC", - "DE", - "DF", - "DG", - "EA", - "EB", - "EC", - "ED", - "EE", - "EF", - "EG", - "EH", - "EJ", - "FE", - "FF", - "FG", - "FI", - "FJ", - "GA", - "GB", - "GC", - "GD", - "GF", - "GG", - "GH", - "GJ", - "HC", - "HG", - "IC", - "IG", - "II", - "JA", - "JB", - "JC", - "JD", - "JE", - "JF", - "JG", - "JH", - "JI", - "JJ", - ], + "cordex": list(_get_range(100, 177).values()), } From 60c934c60dc673b85fdc3309c996823fc66a1999 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 31 Mar 2022 13:05:01 +0100 Subject: [PATCH 06/20] bump version to 2.7.0 --- RELEASE_NOTES.md | 11 +++++++++-- ukcp_dp/constants.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 0f775f9..0dbc1f0 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,9 +1,16 @@ +# Release 2.7.0 + +This release addresses: + +* Add support for HadUK Grid processes + + # Release 2.6.0 This release addresses: * Performance improvements for writing CSV files -* Work around for files with lat, ong of 0, 0 +* Work around for files with lat, long of 0, 0 # Release 2.5.0 @@ -17,7 +24,7 @@ This release addresses: This release addresses: -* Add option for users to get data as shapesiles for map products +* Add option for users to get data as shape files for map products * Add option for users to set y-axis scale for plume plots diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py index b098188..cd73fc7 100644 --- a/ukcp_dp/constants.py +++ b/ukcp_dp/constants.py @@ -3,7 +3,7 @@ import cartopy.crs as ccrs -VERSION = "2.6.0" +VERSION = "2.7.0" def enum(**named_values): From f677989a3b74891f9ce0166e135b58f183618d75 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Tue, 5 Apr 2022 12:02:01 +0100 Subject: [PATCH 07/20] Add colour palettes for additional HadUK variables --- ukcp_dp/utils/_standards_class.py | 49 +++++++++++++++++++++++++++++++ ukcp_dp/utils/_utils.py | 28 ++++++++++++++++-- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/ukcp_dp/utils/_standards_class.py b/ukcp_dp/utils/_standards_class.py index 385fd38..cd91762 100644 --- a/ukcp_dp/utils/_standards_class.py +++ b/ukcp_dp/utils/_standards_class.py @@ -632,6 +632,10 @@ def __repr__(self): UKCP_WIND.vmid = None UKCP_WIND.vstep = 2.0 +UKCP_WIND_OBS = UKCP_WIND.copy() +UKCP_WIND_OBS.tag = "UKCP_wind_obs" +UKCP_WIND_OBS.extendcolbar = "neither" +UKCP_WIND_OBS.vmid = None # Wind speed anomalies. UKCP_WIND_ANOM = UKCP_WIND.copy() @@ -827,6 +831,10 @@ def __repr__(self): UKCP_RELATIVE_HUMIDITY.vmid = 50.0 UKCP_RELATIVE_HUMIDITY.vstep = 10.0 +UKCP_RELATIVE_HUMIDITY_OBS = UKCP_RELATIVE_HUMIDITY.copy() +UKCP_RELATIVE_HUMIDITY_OBS.tag = "UKCP_rh_obs" +UKCP_RELATIVE_HUMIDITY_OBS.extendcolbar = "neither" +UKCP_RELATIVE_HUMIDITY_OBS.vmid = None UKCP_RELATIVE_HUMIDITY_ANOM = UKCP_RELATIVE_HUMIDITY.copy() UKCP_RELATIVE_HUMIDITY_ANOM.tag = "UKCP_rh_anom" @@ -849,6 +857,11 @@ def __repr__(self): UKCP_PMSL_ANOM.vmid = 0.0 UKCP_PMSL_ANOM.vstep = 5.0 +UKCP_PMSL_OBS = UKCP_PMSL_ANOM.copy() +UKCP_PMSL_OBS.tag = "UKCP_pmsl_obs" +UKCP_PMSL_OBS.extendcolbar = "neither" +UKCP_PMSL_OBS.vmid = None + # Amount of precipitation in 1 day UKCP_1DAY_PRECIP = UKCPNEAT.copy() UKCP_1DAY_PRECIP.tag = "UKCP_1day_precip" @@ -909,3 +922,39 @@ def __repr__(self): UKCP_RAINFALL.extendcolbar = "neither" UKCP_RAINFALL.cpal = "Blues" UKCP_RAINFALL.vmid = None + +# Days of ground frost +UKCP_GROUND_FROST = UKCPNEAT.copy() +UKCP_GROUND_FROST.tag = "UKCP_groundfrost" +UKCP_GROUND_FROST.default_barlabel = "Days of ground frost, days" +UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("day") +UKCP_GROUND_FROST.extendcolbar = "neither" +UKCP_GROUND_FROST.cpal = "Blues" +UKCP_GROUND_FROST.vmid = None + +# Days of snow lying +UKCP_SNOW_LYING = UKCPNEAT.copy() +UKCP_SNOW_LYING.tag = "UKCP_snowLying" +UKCP_SNOW_LYING.default_barlabel = "Days of snow lying, days" +UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("day") +UKCP_SNOW_LYING.extendcolbar = "neither" +UKCP_SNOW_LYING.cpal = "Blues" +UKCP_SNOW_LYING.vmid = None + +# Sunshine hours +UKCP_SUN = UKCPNEAT.copy() +UKCP_SUN.tag = "UKCP_sun" +UKCP_SUN.default_barlabel = "Sunshine, hours" +UKCP_SUN.preferred_unit = cf_units.Unit("hr") +UKCP_SUN.extendcolbar = "neither" +UKCP_SUN.cpal = "Blues" +UKCP_SUN.vmid = None + +# Vapour pressure (hPa) +UKCP_VAPOUR_PRESSURE = UKCPNEAT.copy() +UKCP_VAPOUR_PRESSURE.tag = "UKCP_sun" +UKCP_VAPOUR_PRESSURE.default_barlabel = "Vapour pressure, hPa" +UKCP_VAPOUR_PRESSURE.preferred_unit = cf_units.Unit("hPa") +UKCP_VAPOUR_PRESSURE.extendcolbar = "neither" +UKCP_VAPOUR_PRESSURE.cpal = "Blues" +UKCP_VAPOUR_PRESSURE.vmid = None diff --git a/ukcp_dp/utils/_utils.py b/ukcp_dp/utils/_utils.py index d9c3cd9..708cca9 100644 --- a/ukcp_dp/utils/_utils.py +++ b/ukcp_dp/utils/_utils.py @@ -64,7 +64,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection): elif "sfcWind" in var_id or "wsgmax10m" in var_id: # Wind speed at 10m (m s-1) - if "Anom" in var_id: + if collection == COLLECTION_OBS: + plot_settings = stds.UKCP_WIND_OBS.copy() + elif "Anom" in var_id: plot_settings = stds.UKCP_WIND_ANOM.copy() else: plot_settings = stds.UKCP_WIND.copy() @@ -93,7 +95,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection): elif "hurs" in var_id: # Relative humidity at 1.5m (%) - if "Anom" in var_id: + if collection == COLLECTION_OBS: + plot_settings = stds.UKCP_RELATIVE_HUMIDITY_OBS.copy() + elif "Anom" in var_id: plot_settings = stds.UKCP_RELATIVE_HUMIDITY_ANOM.copy() else: plot_settings = stds.UKCP_RELATIVE_HUMIDITY.copy() @@ -107,7 +111,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection): elif "psl" in var_id: # Sea level pressure (hPa) - if "Anom" in var_id: + if collection == COLLECTION_OBS: + plot_settings = stds.UKCP_PMSL_OBS.copy() + elif "Anom" in var_id: plot_settings = stds.UKCP_PMSL_ANOM.copy() else: # TODO do we need a non-ANOM version? @@ -141,6 +147,22 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection): else: plot_settings = stds.UKCP_SWRAD_NET_MONTHLY.copy() + elif var_id == "groundfrost": + # Days of ground frost (days) + plot_settings = stds.UKCP_GROUND_FROST.copy() + + elif var_id == "pv": + # Vapour pressure (hPa) + plot_settings = stds.UKCP_VAPOUR_PRESSURE.copy() + + elif var_id == "snowLying": + # Days of snow lying (days) + plot_settings = stds.UKCP_SNOW_LYING.copy() + + elif var_id == "sun": + # Sunshine hours (h) + plot_settings = stds.UKCP_SUN.copy() + else: plot_settings = stds.UKCPNEAT.copy() From cf44d10a36f1db678de9d6b05bde21b5ae2fdf52 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Tue, 5 Apr 2022 16:07:12 +0100 Subject: [PATCH 08/20] fix issue with file selection for HadUK area --- ukcp_dp/file_finder/_land_obs.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ukcp_dp/file_finder/_land_obs.py b/ukcp_dp/file_finder/_land_obs.py index f054d1b..aa38d11 100644 --- a/ukcp_dp/file_finder/_land_obs.py +++ b/ukcp_dp/file_finder/_land_obs.py @@ -121,8 +121,20 @@ def _get_date_ranges(input_data, variable): if input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y": return ["196101-199012", "198101-201012"] - # assume this must be a region selection + end_year = "2020" + + if variable in ["tas", "tasmax", "tasmin"]: + start_year = "1884" if variable == "rainfall": - return ["186201-202012"] + start_year = "1862" + if variable == "sun": + start_year = "1919" + if variable == "sfcWind": + start_year = "1969" + if variable in ["psl", "hurs", "pv", "groundfrost"]: + start_year = "1961" + if variable == "snowLying": + start_year = "1971" + + return [f"{start_year}01-{end_year}12"] - return ["188401-202012"] From eea293a76a4e718db4ab1713f1838f9929a7ab3c Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Tue, 5 Apr 2022 16:11:10 +0100 Subject: [PATCH 09/20] update standard units for ground frost and snow lying --- ukcp_dp/utils/_standards_class.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ukcp_dp/utils/_standards_class.py b/ukcp_dp/utils/_standards_class.py index cd91762..bd8c614 100644 --- a/ukcp_dp/utils/_standards_class.py +++ b/ukcp_dp/utils/_standards_class.py @@ -927,7 +927,7 @@ def __repr__(self): UKCP_GROUND_FROST = UKCPNEAT.copy() UKCP_GROUND_FROST.tag = "UKCP_groundfrost" UKCP_GROUND_FROST.default_barlabel = "Days of ground frost, days" -UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("day") +UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("1.0") UKCP_GROUND_FROST.extendcolbar = "neither" UKCP_GROUND_FROST.cpal = "Blues" UKCP_GROUND_FROST.vmid = None @@ -936,7 +936,7 @@ def __repr__(self): UKCP_SNOW_LYING = UKCPNEAT.copy() UKCP_SNOW_LYING.tag = "UKCP_snowLying" UKCP_SNOW_LYING.default_barlabel = "Days of snow lying, days" -UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("day") +UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("1.0") UKCP_SNOW_LYING.extendcolbar = "neither" UKCP_SNOW_LYING.cpal = "Blues" UKCP_SNOW_LYING.vmid = None @@ -952,7 +952,7 @@ def __repr__(self): # Vapour pressure (hPa) UKCP_VAPOUR_PRESSURE = UKCPNEAT.copy() -UKCP_VAPOUR_PRESSURE.tag = "UKCP_sun" +UKCP_VAPOUR_PRESSURE.tag = "UKCP_pv" UKCP_VAPOUR_PRESSURE.default_barlabel = "Vapour pressure, hPa" UKCP_VAPOUR_PRESSURE.preferred_unit = cf_units.Unit("hPa") UKCP_VAPOUR_PRESSURE.extendcolbar = "neither" From 4724189a0512d59ebbb4c06f072968811052c44d Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 6 Apr 2022 12:11:35 +0100 Subject: [PATCH 10/20] update single map plot fix issue where the range of values to be plotted is 0 --- ukcp_dp/plotters/_single_map_plotter.py | 37 ++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/ukcp_dp/plotters/_single_map_plotter.py b/ukcp_dp/plotters/_single_map_plotter.py index 26090fa..4b955f9 100644 --- a/ukcp_dp/plotters/_single_map_plotter.py +++ b/ukcp_dp/plotters/_single_map_plotter.py @@ -40,14 +40,26 @@ def _generate_subplots(self, cube, plot_settings, fig): """ LOG.debug("_generate_subplots") + plot_settings.vrange, plot_settings.vstep = self._get_data_range(cube) + plot_settings.vmid = _get_mid_point(plot_settings.vrange) + if self._is_landscape(cube, 1.25) is True: gs_top = 0.79 gs_bottom = 0.14 gs_left = 0.02 gs_right = 0.98 - # Position of the colour-bar Axes: [left,bottom, width,height] - plot_settings.bar_position = [0.25, 0.08, 0.5, 0.025] + if ( + plot_settings.vstep == 1 + and plot_settings.vrange[1] - plot_settings.vrange[0] == 2 + ): + # special case + # Position of the colour-bar Axes: [left,bottom, width,height] + plot_settings.bar_position = [0.25, 0.08, 0.2, 0.025] + else: + # Position of the colour-bar Axes: [left,bottom, width,height] + plot_settings.bar_position = [0.25, 0.08, 0.5, 0.025] + plot_settings.bar_orientation = "horizontal" else: # portrait @@ -56,16 +68,23 @@ def _generate_subplots(self, cube, plot_settings, fig): gs_left = 0.15 gs_right = 0.8 - # Position of the colour-bar Axes: [left,bottom, width,height] - plot_settings.bar_position = [0.82, 0.25, 0.025, 0.5] + if ( + plot_settings.vstep == 1 + and plot_settings.vrange[1] - plot_settings.vrange[0] == 2 + ): + # special case + # Position of the colour-bar Axes: [left,bottom, width,height] + plot_settings.bar_position = [0.82, 0.25, 0.025, 0.2] + else: + # special case + # Position of the colour-bar Axes: [left,bottom, width,height] + plot_settings.bar_position = [0.82, 0.25, 0.025, 0.5] + plot_settings.bar_orientation = "vertical" grid_spec = gridspec.GridSpec(1, 1) grid_spec.update(top=gs_top, bottom=gs_bottom, left=gs_left, right=gs_right) - plot_settings.vrange, plot_settings.vstep = self._get_data_range(cube) - plot_settings.vmid = _get_mid_point(plot_settings.vrange) - result = self._add_sub_plot(fig, grid_spec[0, 0], plot_settings, cube) return result @@ -106,6 +125,10 @@ def _get_data_range(self, cube): if step > 2 and cube_min + (step * 10) > cube_max: cube_max = cube_min + (step * 10) + if cube_min == cube_max: + cube_max = cube_max + 2 + step = 1 + return [cube_min, cube_max], step def _add_sub_plot(self, fig, grid, plot_settings, data): From 54c2208f51571c772aa978333800124ee663b9c4 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 7 Apr 2022 15:01:09 +0100 Subject: [PATCH 11/20] land-cmp mon-20y files now contain yyyymm aux coord The exception to this is the wsgmax10m/mon-20y files --- ukcp_dp/data_extractor/_data_extractor.py | 1 + ukcp_dp/data_extractor/_utils.py | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py index 5af36df..a465522 100644 --- a/ukcp_dp/data_extractor/_data_extractor.py +++ b/ukcp_dp/data_extractor/_data_extractor.py @@ -156,6 +156,7 @@ def _get_anomaly_cube(self, file_list, climatology_file_list): self.input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), self.input_data.get_value(InputType.TIME_PERIOD), self.input_data.get_value(InputType.COLLECTION), + self.input_data.get_value(InputType.VARIABLE), ) return anomaly diff --git a/ukcp_dp/data_extractor/_utils.py b/ukcp_dp/data_extractor/_utils.py index 35b0ccd..bd2801e 100644 --- a/ukcp_dp/data_extractor/_utils.py +++ b/ukcp_dp/data_extractor/_utils.py @@ -24,6 +24,7 @@ def get_anomaly( temporal_average_type, time_period, collection, + variable, ): """ Generate a cube containing the anomaly values. @@ -39,6 +40,7 @@ def get_anomaly( type @param time_period(str): the name of a month or season or 'all' @param collection(str): the collection + @param variable(str): the variable """ if temporal_average_type == TemporalAverageType.MONTHLY: periods = _get_selected_month_numbers(time_period) @@ -75,10 +77,11 @@ def get_anomaly( except iris.exceptions.CoordinateNotFoundError: pass if collection == COLLECTION_CPM: - try: - cube_absoute_period.remove_coord("yyyymm") - except iris.exceptions.CoordinateNotFoundError: - pass + if variable == "wsgmax10m": + try: + cube_absoute_period.remove_coord("yyyymm") + except iris.exceptions.CoordinateNotFoundError: + pass try: cube_climatology_period.remove_coord("year") except iris.exceptions.CoordinateNotFoundError: From e8753570b0e5ce1e04598766f8813a791a9e2343 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Fri, 8 Apr 2022 12:02:33 +0100 Subject: [PATCH 12/20] CPM data now uses `latitude` inplace of `grid_latitude` The excption is `wsgmax10m`, which still uses `latitude` --- ukcp_dp/plotters/_postage_stamp_map_plotter.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ukcp_dp/plotters/_postage_stamp_map_plotter.py b/ukcp_dp/plotters/_postage_stamp_map_plotter.py index 13f3f40..d78874d 100644 --- a/ukcp_dp/plotters/_postage_stamp_map_plotter.py +++ b/ukcp_dp/plotters/_postage_stamp_map_plotter.py @@ -222,10 +222,12 @@ def _plot_maps_mean_order(self, cube, fig, grid, plot_settings, title_font_size) if self.input_data.get_area_type() == AreaType.BBOX: if self.input_data.get_value(InputType.COLLECTION) in [ - COLLECTION_CPM, COLLECTION_RCM, COLLECTION_RCM_GWL, - ]: + ] or ( + self.input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM + and self.input_data.get_value(InputType.VARIABLE) == "wsgmax10m" + ): # RCM is on a rotated grid ensemble_mean_cube = cube.collapsed( [ From ef8ca3bdd5a4c4ebd977d724b49d09947cdd0337 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 5 May 2022 13:52:59 +0100 Subject: [PATCH 13/20] fix issue with add_aux_coord in data extractor --- ukcp_dp/data_extractor/_data_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py index a465522..7160d21 100644 --- a/ukcp_dp/data_extractor/_data_extractor.py +++ b/ukcp_dp/data_extractor/_data_extractor.py @@ -272,7 +272,7 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal # we need to update the type of ensemble_member_id in order to be able to # process Met Office and CORDEX data together for cube in cubes: - for ind, aux_coord in enumerate(cube.aux_coords): + for aux_coord in cube.aux_coords: if aux_coord.var_name == "ensemble_member_id": if aux_coord.dtype == np.dtype(" Date: Tue, 17 May 2022 16:12:19 +0100 Subject: [PATCH 14/20] Make better use of CVs --- ukcp_dp/vocab_manager/_vocab.py | 36 +++------------------------------ 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index 8c4a783..a06413e 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -6,38 +6,6 @@ class Vocab: VOCAB = { - "ensemble": { - # equivalent to UKCP18_ensemble_member - # UKCP18 values are full values - "01": "00000", - "02": "00605", - "03": "00834", - "04": "01113", - "05": "01554", - "06": "01649", - "07": "01843", - "08": "01935", - "09": "02123", - "10": "02242", - "11": "02305", - "12": "02335", - "13": "02491", - "14": "02832", - "15": "02868", - "16": "bcc-csm1-1", - "17": "CCSM4", - "18": "CESM1-BGC", - "19": "CanESM2", - "20": "CMCC-CM", - "21": "CNRM-CM5", - "22": "EC-EARTH", - "23": "ACCESS1-3", - "24": "HadGEM2-ES", - "25": "IPSL-CM5A-MR", - "26": "MPI-ESM-MR", - "27": "MRI-CGCM3", - "28": "GFDL-ESM2G", - }, "spatial_representation": { # equivalent to UKCP18_resolution # no grid in UKCP18_resolution @@ -243,6 +211,8 @@ def __init__(self): self._load_cv(CV_Type.SCENARIO) self._load_cv(CV_Type.TIME_SLICE_TYPE) + self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME) + self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME] self._load_cv(CV_Type.ADMIN_REGION) self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions" self._load_cv(CV_Type.COUNTRY) @@ -497,7 +467,7 @@ def _get_range(min_value, max_value): "26": ["26"], "27": ["27"], "28": ["28"], - "cordex": list(_get_range(100, 177).values()), + "land-euro-cordex": list(_get_range(100, 166).values()), } From d3306ca4b097cd8d02de60a0a1a863ea8244ea03 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 18 May 2022 13:38:42 +0100 Subject: [PATCH 15/20] refactor file finder --- ukcp_dp/file_finder/_file_finder.py | 531 +--------------------------- ukcp_dp/file_finder/_land_cm.py | 270 ++++++++++++++ ukcp_dp/file_finder/_land_prob.py | 286 +++++++++++++++ 3 files changed, 564 insertions(+), 523 deletions(-) create mode 100644 ukcp_dp/file_finder/_land_cm.py create mode 100644 ukcp_dp/file_finder/_land_prob.py diff --git a/ukcp_dp/file_finder/_file_finder.py b/ukcp_dp/file_finder/_file_finder.py index 0df2fd3..0df4837 100644 --- a/ukcp_dp/file_finder/_file_finder.py +++ b/ukcp_dp/file_finder/_file_finder.py @@ -6,11 +6,9 @@ import os from ukcp_dp.constants import ( - DATA_DIR, DATA_SERVICE_URL, COLLECTION_OBS, COLLECTION_PROB, - COLLECTION_PROB_MIN_YEAR, COLLECTION_CPM, COLLECTION_DERIVED, COLLECTION_GCM, @@ -18,11 +16,10 @@ COLLECTION_RCM_GWL, COLLECTION_MARINE, InputType, - OTHER_MAX_YEAR, - AreaType, - TemporalAverageType, ) +from ._land_cm import get_cm_file_list +from ._land_prob import get_prob_file_list from ._land_obs import get_obs_file_list @@ -63,7 +60,7 @@ def get_file_lists(input_data): COLLECTION_PROB, COLLECTION_MARINE, ]: - file_list["main"] = _get_prob_file_list(input_data) + file_list["main"] = get_prob_file_list(input_data) elif input_data.get_value(InputType.COLLECTION) in [ COLLECTION_CPM, @@ -72,10 +69,12 @@ def get_file_lists(input_data): COLLECTION_RCM, COLLECTION_RCM_GWL, ]: - file_list["main"] = _get_cm_file_list(input_data) + file_list["main"] = get_cm_file_list(input_data, None) if input_data.get_value(InputType.BASELINE) is not None: - file_list["baseline"] = _get_file_list_for_baseline(input_data) + file_list["baseline"] = get_cm_file_list( + input_data, input_data.get_value(InputType.BASELINE) + ) elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS: file_list["main"] = get_obs_file_list(input_data) @@ -88,7 +87,7 @@ def get_file_lists(input_data): if input_data.get_value(InputType.COLLECTION) == COLLECTION_PROB: file_list_overlay = file_list["main"] else: - file_list_overlay = _get_prob_file_list(input_data) + file_list_overlay = get_prob_file_list(input_data) if len(file_list_overlay) == 1: file_list["overlay"] = file_list_overlay @@ -123,517 +122,3 @@ def _get_absolute_path(file_path): path = DATA_SERVICE_URL + path path = path.rstrip("*") return path - - -def _get_prob_file_list(input_data): - """ - Get a list of files based on the data provided in the input data. As this - may be the file list for the overlay, some fields are not from the user - input. - - @param input_data (InputData): an InputData object - - @return a dict where - key: (str) variable name - value: list of lists where: - each list is a list of files per scenario, per variable, including - their full paths - """ - variables = input_data.get_value(InputType.VARIABLE) - - spatial_representation = _get_prob_spatial_representation(input_data) - - file_lists_per_variable = {} - - # if this is a selection for on overlay then the dates will not have been - # validated against this dataset. Check the dates and adjust the minimum if - # needed - year_maximum = input_data.get_value(InputType.YEAR_MAXIMUM) - year_minimum = input_data.get_value(InputType.YEAR_MINIMUM) - if year_maximum < COLLECTION_PROB_MIN_YEAR: - return {} - - if year_minimum < COLLECTION_PROB_MIN_YEAR: - year_minimum = COLLECTION_PROB_MIN_YEAR - - # December's data is included with the next year so if a single year has - # been selected - if year_minimum == year_maximum: - year_maximum = year_maximum + 1 - - for variable in variables: - # generate a list of files for each variable - # NB the marine data are all annual - - file_list_per_scenario = [] - for scenario in input_data.get_value(InputType.SCENARIO): - file_list_per_scenario.extend( - _get_file_list_per_scenario( - input_data, - scenario, - spatial_representation, - variable, - year_minimum, - year_maximum, - ) - ) - - file_lists_per_variable[variable] = file_list_per_scenario - - return file_lists_per_variable - - -def _get_file_list_per_scenario( - input_data, scenario, spatial_representation, variable, year_minimum, year_maximum -): - # generate a list of files for each scenario - file_list_per_data_type = [] - for data_type in input_data.get_value(InputType.DATA_TYPE): - file_path = _get_prob_file_path( - data_type, input_data, scenario, spatial_representation, variable - ) - - if ( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - == TemporalAverageType.ANNUAL - or spatial_representation != "25km" - or (input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE) - ): - # current thinking is that there will only be one file - file_name = "*" - file_list_per_data_type.append([os.path.join(file_path, file_name)]) - - elif input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y": - scenario_file_list = [] - - for year in range(year_minimum, (year_maximum + 1)): - # We cannot check for COLLECTION_PROB as this may be an - # overlay - if ( - input_data.get_value(InputType.COLLECTION) != COLLECTION_MARINE - and year == OTHER_MAX_YEAR - ): - # there is not data for December of the last year - continue - file_name = _get_prob_file_name_for_year( - data_type, - input_data, - scenario, - spatial_representation, - variable, - year, - ) - scenario_file_list.append(os.path.join(file_path, file_name)) - - file_list_per_data_type.append(scenario_file_list) - - else: - # InputType.TIME_SLICE_TYPE) == '20y' or '30y' - file_name = _get_prob_file_name_for_slice( - data_type, input_data, scenario, spatial_representation, variable - ) - file_list_per_data_type.append([os.path.join(file_path, file_name)]) - - return file_list_per_data_type - - -def _get_prob_spatial_representation(input_data): - spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION) - - if spatial_representation == AreaType.RIVER_BASIN: - spatial_representation = RIVER - elif spatial_representation == AreaType.ADMIN_REGION: - spatial_representation = REGION - elif spatial_representation == AreaType.COUNTRY: - pass - else: - # we cannot rely on the input value as this file list may be for the - # overlay - spatial_representation = "25km" - - return spatial_representation - - -def _get_prob_file_path( - data_type, input_data, scenario, spatial_representation, variable -): - - if input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE: - - file_path = os.path.join( - DATA_DIR, - COLLECTION_MARINE, - input_data.get_value(InputType.METHOD), - scenario, - variable, - VERSION, - ) - else: - - file_path = os.path.join( - DATA_DIR, - COLLECTION_PROB, - "uk", - spatial_representation, - scenario, - data_type, - input_data.get_value(InputType.BASELINE), - input_data.get_value(InputType.TIME_SLICE_TYPE), - variable, - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), - VERSION, - ) - - return file_path - - -def _get_prob_file_name_for_year( - data_type, input_data, scenario, spatial_representation, variable, year -): - - # input_data.get_value(InputType.TIME_SLICE_TYPE) == '1y': - # the year starts in December, so subtract 1 from the year - start_date = "{year}{mon_day}".format(year=year - 1, mon_day=START_MONTH_DAY) - end_date = "{year}{mon_day}".format(year=year, mon_day=END_MONTH_DAY) - - return _get_prob_file_name( - data_type, - input_data, - scenario, - spatial_representation, - variable, - start_date, - end_date, - ) - - -def _get_prob_file_name_for_slice( - data_type, input_data, scenario, spatial_representation, variable -): - # input_data.get_value(InputType.TIME_SLICE_TYPE) == 20y or 30y - start_date = "20091201" - end_date = "20991130" - - return _get_prob_file_name( - data_type, - input_data, - scenario, - spatial_representation, - variable, - start_date, - end_date, - ) - - -def _get_prob_file_name( - data_type, - input_data, - scenario, - spatial_representation, - variable, - start_date, - end_date, -): - - return_period = input_data.get_value(InputType.RETURN_PERIOD) - - if return_period is None: - file_name = ( - "{variable}_{scenario}_{collection}_uk_" - "{spatial_representation}_{data_type}_{baseline}_" - "{time_slice_type}_{temporal_type}_{start_data}-" - "{end_date}.nc".format( - variable=variable, - scenario=scenario, - collection=COLLECTION_PROB, - spatial_representation=spatial_representation, - data_type=data_type, - baseline=input_data.get_value(InputType.BASELINE), - time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE), - temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), - start_data=start_date, - end_date=end_date, - ) - ) - - else: - file_name = ( - "{variable}_{return_period}_{scenario}_{collection}_uk_" - "{spatial_representation}_{data_type}_{baseline}_" - "{time_slice_type}_{temporal_type}_{start_data}-" - "{end_date}.nc".format( - variable=variable, - return_period=return_period, - scenario=scenario, - collection=COLLECTION_PROB, - spatial_representation=spatial_representation, - data_type=data_type, - baseline=input_data.get_value(InputType.BASELINE), - time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE), - temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), - start_data=start_date, - end_date=end_date, - ) - ) - - return file_name - - -def _get_cm_file_list(input_data): - """ - Get a list of files based on the data provided in the input data. - - @param input_data (InputData): an InputData object - - @return a dict where - key: (str) variable name - value: list of lists where: - each list is a list of files per scenario, per variable, including - their full paths - """ - return _get_cm_file_list_for_range(input_data, None) - - -def _get_file_list_for_baseline(input_data): - """ - Get a list of files for the baseline based on the data provided in the - input data. - - @param input_data (InputData): an InputData object - - @return a dict where - key: (str) variable name - value: list of lists where: - each list is a list of files per scenario, per variable, including - their full paths - """ - baseline = input_data.get_value(InputType.BASELINE) - return _get_cm_file_list_for_range(input_data, baseline) - - -def _get_cm_file_list_for_range(input_data, baseline): - variables = input_data.get_value(InputType.VARIABLE) - - spatial_representation = _get_cm_spatial_representation(input_data) - - file_lists_per_variable = {} - - for variable in variables: - # generate a list of files for each variable - # we need to use the variable root and calculate the anomaly later - variable_prefix = variable.split("Anom")[0] - - file_list_per_scenario = [] - for scenario in input_data.get_value(InputType.SCENARIO): - # generate a list of files for each scenario - - ensemble_file_list = [] - for ensemble in input_data.get_value(InputType.ENSEMBLE): - file_path = _get_cm_file_path( - input_data, - spatial_representation, - variable_prefix, - scenario, - ensemble, - baseline, - ) - - if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in [ - "1hr", - "3hr", - ]: - # we need lots of files - for year in range( - input_data.get_value(InputType.YEAR_MINIMUM) - 1, - input_data.get_value(InputType.YEAR_MAXIMUM), - ): - for month in range(0, 12): - if ( - year == input_data.get_value(InputType.YEAR_MINIMUM) - 1 - and month < 11 - ): - continue - if ( - year == input_data.get_value(InputType.YEAR_MAXIMUM) - 1 - and month > 10 - ): - continue - date_range = "{year}{month}01-{year}{month_end}30".format( - year=year, - month=MONTH_NUMBERS[month], - month_end=MONTH_NUMBERS[month], - ) - file_name = _get_cm_file_name( - input_data, - spatial_representation, - variable_prefix, - scenario, - ensemble, - baseline, - date_range, - ) - ensemble_file_list.append( - os.path.join(file_path, file_name) - ) - else: - file_name = _get_cm_file_name( - input_data, - spatial_representation, - variable_prefix, - scenario, - ensemble, - baseline, - ) - ensemble_file_list.append(os.path.join(file_path, file_name)) - - file_list_per_scenario.append(ensemble_file_list) - - file_lists_per_variable[variable] = file_list_per_scenario - - return file_lists_per_variable - - -def _get_cm_spatial_representation(input_data): - spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION) - - if spatial_representation == AreaType.RIVER_BASIN: - spatial_representation = RIVER - elif spatial_representation == AreaType.ADMIN_REGION: - spatial_representation = REGION - return spatial_representation - - -def _get_cm_file_path( - input_data, spatial_representation, variable, scenario, ensemble, baseline -): - if baseline is None and input_data.get_value(InputType.TIME_SLICE_TYPE) is None: - temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - - elif ( - baseline == "b8100" or input_data.get_value(InputType.TIME_SLICE_TYPE) == "20y" - ): - temporal_average_type = "{}-20y".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - ) - - elif ( - baseline == "b6190" - or baseline == "b8110" - or input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y" - ): - temporal_average_type = "{}-30y".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - ) - - else: - temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - - collection = input_data.get_value(InputType.COLLECTION) - - if baseline is not None and scenario in ["gwl2", "gwl4"]: - # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 - collection = COLLECTION_GCM - scenario = "rcp85" - - file_path = os.path.join( - DATA_DIR, - collection, - "uk", - spatial_representation, - scenario, - ensemble, - variable, - temporal_average_type, - VERSION, - ) - - return file_path - - -def _get_cm_file_name( - input_data, - spatial_representation, - variable, - scenario, - ensemble, - baseline, - year=None, -): - if baseline is None: - if ( - input_data.get_value(InputType.TIME_SLICE_TYPE) is None - or input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y" - ) and input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) not in [ - "1hr", - "3hr", - ]: - # there will only be one file - return "*" - - if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]: - temporal_average_type = input_data.get_value( - InputType.TEMPORAL_AVERAGE_TYPE - ) - else: - temporal_average_type = "{}-{}".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), - input_data.get_value(InputType.TIME_SLICE_TYPE), - ) - - if input_data.get_value(InputType.COLLECTION) in [ - COLLECTION_GCM, - COLLECTION_DERIVED, - ]: - date_range = "200912-209911" - elif input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM: - if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]: - date_range = "{}".format(year) - elif input_data.get_value(InputType.YEAR_MINIMUM) == 1981: - date_range = "198012-200011" - elif input_data.get_value(InputType.YEAR_MINIMUM) == 2021: - date_range = "202012-204011" - elif input_data.get_value(InputType.YEAR_MINIMUM) == 2061: - date_range = "206012-208011" - else: - date_range = "200912-207911" - - elif baseline == "b8100": - temporal_average_type = "{}-20y".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - ) - date_range = "198012-200011" - - elif baseline == "b6190": - temporal_average_type = "{}-30y".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - ) - date_range = "196012-199011" - - elif baseline == "b8110": - temporal_average_type = "{}-30y".format( - input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - ) - date_range = "198012-201011" - - collection = input_data.get_value(InputType.COLLECTION) - - if baseline is not None and scenario in ["gwl2", "gwl4"]: - # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 - collection = COLLECTION_GCM - scenario = "rcp85" - - file_name = ( - "{variable}_{scenario}_{collection}_uk_" - "{spatial_representation}_{ensemble}_" - "{temporal_average_type}_{date}.nc".format( - variable=variable, - scenario=scenario, - collection=collection, - spatial_representation=spatial_representation, - ensemble=ensemble, - temporal_average_type=temporal_average_type, - date=date_range, - ) - ) - - return file_name diff --git a/ukcp_dp/file_finder/_land_cm.py b/ukcp_dp/file_finder/_land_cm.py new file mode 100644 index 0000000..ea37c1f --- /dev/null +++ b/ukcp_dp/file_finder/_land_cm.py @@ -0,0 +1,270 @@ +""" +This module provides the method get_file_lists. + +""" +import logging +import os + +from ukcp_dp.constants import ( + DATA_DIR, + COLLECTION_CPM, + COLLECTION_DERIVED, + COLLECTION_GCM, + InputType, + AreaType, +) + + +LOG = logging.getLogger(__name__) + + +# month and day +START_MONTH_DAY = "1201" +END_MONTH_DAY = "1130" + +MONTH_NUMBERS = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] + +VERSION = "latest" + +RIVER = "river" +REGION = "region" + + +def get_cm_file_list(input_data, baseline): + """ + Get a list of files based on the data provided in the input data. + + @param input_data (InputData): an InputData object + @param baseline (str): the baseline, may be None + + @return a dict where + key: (str) variable name + value: list of lists where: + each list is a list of files per scenario, per variable, including + their full paths + """ + variables = input_data.get_value(InputType.VARIABLE) + + spatial_representation = _get_cm_spatial_representation(input_data) + + file_lists_per_variable = {} + + for variable in variables: + # generate a list of files for each variable + # we need to use the variable root and calculate the anomaly later + variable_prefix = variable.split("Anom")[0] + + file_list_per_scenario = [] + for scenario in input_data.get_value(InputType.SCENARIO): + # generate a list of files for each scenario + + ensemble_file_list = [] + for ensemble in input_data.get_value(InputType.ENSEMBLE): + file_path = _get_cm_file_path( + input_data, + spatial_representation, + variable_prefix, + scenario, + ensemble, + baseline, + ) + + if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in [ + "1hr", + "3hr", + ]: + # we need lots of files + for year in range( + input_data.get_value(InputType.YEAR_MINIMUM) - 1, + input_data.get_value(InputType.YEAR_MAXIMUM), + ): + for month in range(0, 12): + if ( + year == input_data.get_value(InputType.YEAR_MINIMUM) - 1 + and month < 11 + ): + continue + if ( + year == input_data.get_value(InputType.YEAR_MAXIMUM) - 1 + and month > 10 + ): + continue + date_range = "{year}{month}01-{year}{month_end}30".format( + year=year, + month=MONTH_NUMBERS[month], + month_end=MONTH_NUMBERS[month], + ) + file_name = _get_cm_file_name( + input_data, + spatial_representation, + variable_prefix, + scenario, + ensemble, + baseline, + date_range, + ) + ensemble_file_list.append( + os.path.join(file_path, file_name) + ) + else: + file_name = _get_cm_file_name( + input_data, + spatial_representation, + variable_prefix, + scenario, + ensemble, + baseline, + ) + ensemble_file_list.append(os.path.join(file_path, file_name)) + + file_list_per_scenario.append(ensemble_file_list) + + file_lists_per_variable[variable] = file_list_per_scenario + + return file_lists_per_variable + + +def _get_cm_spatial_representation(input_data): + spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION) + + if spatial_representation == AreaType.RIVER_BASIN: + spatial_representation = RIVER + elif spatial_representation == AreaType.ADMIN_REGION: + spatial_representation = REGION + return spatial_representation + + +def _get_cm_file_path( + input_data, spatial_representation, variable, scenario, ensemble, baseline +): + if baseline is None and input_data.get_value(InputType.TIME_SLICE_TYPE) is None: + temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + + elif ( + baseline == "b8100" or input_data.get_value(InputType.TIME_SLICE_TYPE) == "20y" + ): + temporal_average_type = "{}-20y".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + ) + + elif ( + baseline == "b6190" + or baseline == "b8110" + or input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y" + ): + temporal_average_type = "{}-30y".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + ) + + else: + temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + + collection = input_data.get_value(InputType.COLLECTION) + + if baseline is not None and scenario in ["gwl2", "gwl4"]: + # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 + collection = COLLECTION_GCM + scenario = "rcp85" + + file_path = os.path.join( + DATA_DIR, + collection, + "uk", + spatial_representation, + scenario, + ensemble, + variable, + temporal_average_type, + VERSION, + ) + + return file_path + + +def _get_cm_file_name( + input_data, + spatial_representation, + variable, + scenario, + ensemble, + baseline, + year=None, +): + if baseline is None: + if ( + input_data.get_value(InputType.TIME_SLICE_TYPE) is None + or input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y" + ) and input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) not in [ + "1hr", + "3hr", + ]: + # there will only be one file + return "*" + + if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]: + temporal_average_type = input_data.get_value( + InputType.TEMPORAL_AVERAGE_TYPE + ) + else: + temporal_average_type = "{}-{}".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), + input_data.get_value(InputType.TIME_SLICE_TYPE), + ) + + if input_data.get_value(InputType.COLLECTION) in [ + COLLECTION_GCM, + COLLECTION_DERIVED, + ]: + date_range = "200912-209911" + elif input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM: + if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]: + date_range = "{}".format(year) + elif input_data.get_value(InputType.YEAR_MINIMUM) == 1981: + date_range = "198012-200011" + elif input_data.get_value(InputType.YEAR_MINIMUM) == 2021: + date_range = "202012-204011" + elif input_data.get_value(InputType.YEAR_MINIMUM) == 2061: + date_range = "206012-208011" + else: + date_range = "200912-207911" + + elif baseline == "b8100": + temporal_average_type = "{}-20y".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + ) + date_range = "198012-200011" + + elif baseline == "b6190": + temporal_average_type = "{}-30y".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + ) + date_range = "196012-199011" + + elif baseline == "b8110": + temporal_average_type = "{}-30y".format( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + ) + date_range = "198012-201011" + + collection = input_data.get_value(InputType.COLLECTION) + + if baseline is not None and scenario in ["gwl2", "gwl4"]: + # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 + collection = COLLECTION_GCM + scenario = "rcp85" + + file_name = ( + "{variable}_{scenario}_{collection}_uk_" + "{spatial_representation}_{ensemble}_" + "{temporal_average_type}_{date}.nc".format( + variable=variable, + scenario=scenario, + collection=collection, + spatial_representation=spatial_representation, + ensemble=ensemble, + temporal_average_type=temporal_average_type, + date=date_range, + ) + ) + + return file_name diff --git a/ukcp_dp/file_finder/_land_prob.py b/ukcp_dp/file_finder/_land_prob.py new file mode 100644 index 0000000..51239fc --- /dev/null +++ b/ukcp_dp/file_finder/_land_prob.py @@ -0,0 +1,286 @@ +""" +This module provides the method get_file_lists. + +""" +import logging +import os + +from ukcp_dp.constants import ( + DATA_DIR, + COLLECTION_PROB, + COLLECTION_PROB_MIN_YEAR, + COLLECTION_MARINE, + InputType, + OTHER_MAX_YEAR, + AreaType, + TemporalAverageType, +) + + +LOG = logging.getLogger(__name__) + + +# month and day +START_MONTH_DAY = "1201" +END_MONTH_DAY = "1130" + +MONTH_NUMBERS = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] + +VERSION = "latest" + +RIVER = "river" +REGION = "region" + + +def get_prob_file_list(input_data): + """ + Get a list of files based on the data provided in the input data. As this + may be the file list for the overlay, some fields are not from the user + input. + + @param input_data (InputData): an InputData object + + @return a dict where + key: (str) variable name + value: list of lists where: + each list is a list of files per scenario, per variable, including + their full paths + """ + variables = input_data.get_value(InputType.VARIABLE) + + spatial_representation = _get_prob_spatial_representation(input_data) + + file_lists_per_variable = {} + + # if this is a selection for on overlay then the dates will not have been + # validated against this dataset. Check the dates and adjust the minimum if + # needed + year_maximum = input_data.get_value(InputType.YEAR_MAXIMUM) + year_minimum = input_data.get_value(InputType.YEAR_MINIMUM) + if year_maximum < COLLECTION_PROB_MIN_YEAR: + return {} + + if year_minimum < COLLECTION_PROB_MIN_YEAR: + year_minimum = COLLECTION_PROB_MIN_YEAR + + # December's data is included with the next year so if a single year has + # been selected + if year_minimum == year_maximum: + year_maximum = year_maximum + 1 + + for variable in variables: + # generate a list of files for each variable + # NB the marine data are all annual + + file_list_per_scenario = [] + for scenario in input_data.get_value(InputType.SCENARIO): + file_list_per_scenario.extend( + _get_file_list_per_scenario( + input_data, + scenario, + spatial_representation, + variable, + year_minimum, + year_maximum, + ) + ) + + file_lists_per_variable[variable] = file_list_per_scenario + + return file_lists_per_variable + + +def _get_file_list_per_scenario( + input_data, scenario, spatial_representation, variable, year_minimum, year_maximum +): + # generate a list of files for each scenario + file_list_per_data_type = [] + for data_type in input_data.get_value(InputType.DATA_TYPE): + file_path = _get_prob_file_path( + data_type, input_data, scenario, spatial_representation, variable + ) + + if ( + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) + == TemporalAverageType.ANNUAL + or spatial_representation != "25km" + or (input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE) + ): + # current thinking is that there will only be one file + file_name = "*" + file_list_per_data_type.append([os.path.join(file_path, file_name)]) + + elif input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y": + scenario_file_list = [] + + for year in range(year_minimum, (year_maximum + 1)): + # We cannot check for COLLECTION_PROB as this may be an + # overlay + if ( + input_data.get_value(InputType.COLLECTION) != COLLECTION_MARINE + and year == OTHER_MAX_YEAR + ): + # there is not data for December of the last year + continue + file_name = _get_prob_file_name_for_year( + data_type, + input_data, + scenario, + spatial_representation, + variable, + year, + ) + scenario_file_list.append(os.path.join(file_path, file_name)) + + file_list_per_data_type.append(scenario_file_list) + + else: + # InputType.TIME_SLICE_TYPE) == '20y' or '30y' + file_name = _get_prob_file_name_for_slice( + data_type, input_data, scenario, spatial_representation, variable + ) + file_list_per_data_type.append([os.path.join(file_path, file_name)]) + + return file_list_per_data_type + + +def _get_prob_spatial_representation(input_data): + spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION) + + if spatial_representation == AreaType.RIVER_BASIN: + spatial_representation = RIVER + elif spatial_representation == AreaType.ADMIN_REGION: + spatial_representation = REGION + elif spatial_representation == AreaType.COUNTRY: + pass + else: + # we cannot rely on the input value as this file list may be for the + # overlay + spatial_representation = "25km" + + return spatial_representation + + +def _get_prob_file_path( + data_type, input_data, scenario, spatial_representation, variable +): + + if input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE: + + file_path = os.path.join( + DATA_DIR, + COLLECTION_MARINE, + input_data.get_value(InputType.METHOD), + scenario, + variable, + VERSION, + ) + else: + + file_path = os.path.join( + DATA_DIR, + COLLECTION_PROB, + "uk", + spatial_representation, + scenario, + data_type, + input_data.get_value(InputType.BASELINE), + input_data.get_value(InputType.TIME_SLICE_TYPE), + variable, + input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), + VERSION, + ) + + return file_path + + +def _get_prob_file_name_for_year( + data_type, input_data, scenario, spatial_representation, variable, year +): + + # input_data.get_value(InputType.TIME_SLICE_TYPE) == '1y': + # the year starts in December, so subtract 1 from the year + start_date = "{year}{mon_day}".format(year=year - 1, mon_day=START_MONTH_DAY) + end_date = "{year}{mon_day}".format(year=year, mon_day=END_MONTH_DAY) + + return _get_prob_file_name( + data_type, + input_data, + scenario, + spatial_representation, + variable, + start_date, + end_date, + ) + + +def _get_prob_file_name_for_slice( + data_type, input_data, scenario, spatial_representation, variable +): + # input_data.get_value(InputType.TIME_SLICE_TYPE) == 20y or 30y + start_date = "20091201" + end_date = "20991130" + + return _get_prob_file_name( + data_type, + input_data, + scenario, + spatial_representation, + variable, + start_date, + end_date, + ) + + +def _get_prob_file_name( + data_type, + input_data, + scenario, + spatial_representation, + variable, + start_date, + end_date, +): + + return_period = input_data.get_value(InputType.RETURN_PERIOD) + + if return_period is None: + file_name = ( + "{variable}_{scenario}_{collection}_uk_" + "{spatial_representation}_{data_type}_{baseline}_" + "{time_slice_type}_{temporal_type}_{start_data}-" + "{end_date}.nc".format( + variable=variable, + scenario=scenario, + collection=COLLECTION_PROB, + spatial_representation=spatial_representation, + data_type=data_type, + baseline=input_data.get_value(InputType.BASELINE), + time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE), + temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), + start_data=start_date, + end_date=end_date, + ) + ) + + else: + file_name = ( + "{variable}_{return_period}_{scenario}_{collection}_uk_" + "{spatial_representation}_{data_type}_{baseline}_" + "{time_slice_type}_{temporal_type}_{start_data}-" + "{end_date}.nc".format( + variable=variable, + return_period=return_period, + scenario=scenario, + collection=COLLECTION_PROB, + spatial_representation=spatial_representation, + data_type=data_type, + baseline=input_data.get_value(InputType.BASELINE), + time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE), + temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE), + start_data=start_date, + end_date=end_date, + ) + ) + + return file_name From d40a92fa4bba00d0cd9ec96be0da091466235648 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 18 May 2022 13:47:01 +0100 Subject: [PATCH 16/20] Update file finder for cordex collection --- ukcp_dp/file_finder/_land_cm.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ukcp_dp/file_finder/_land_cm.py b/ukcp_dp/file_finder/_land_cm.py index ea37c1f..8892f9c 100644 --- a/ukcp_dp/file_finder/_land_cm.py +++ b/ukcp_dp/file_finder/_land_cm.py @@ -7,12 +7,15 @@ from ukcp_dp.constants import ( DATA_DIR, + COLLECTION_RCM_CORDEX, COLLECTION_CPM, COLLECTION_DERIVED, COLLECTION_GCM, + COLLECTION_RCM, InputType, AreaType, ) +from ukcp_dp.vocab_manager._vocab import get_ensemble_member_set LOG = logging.getLogger(__name__) @@ -159,7 +162,7 @@ def _get_cm_file_path( else: temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) - collection = input_data.get_value(InputType.COLLECTION) + collection = _get_collection(input_data, ensemble) if baseline is not None and scenario in ["gwl2", "gwl4"]: # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 @@ -246,7 +249,7 @@ def _get_cm_file_name( ) date_range = "198012-201011" - collection = input_data.get_value(InputType.COLLECTION) + collection = _get_collection(input_data, ensemble) if baseline is not None and scenario in ["gwl2", "gwl4"]: # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4 @@ -268,3 +271,13 @@ def _get_cm_file_name( ) return file_name + + +def _get_collection(input_data, ensemble): + collection = input_data.get_value(InputType.COLLECTION) + if collection == COLLECTION_RCM and ensemble in get_ensemble_member_set( + COLLECTION_RCM_CORDEX + ): + collection = COLLECTION_RCM_CORDEX + + return collection From 5b23ede435000e79286d4e0c54001e7fd1feb004 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Wed, 18 May 2022 13:48:20 +0100 Subject: [PATCH 17/20] cordex ensemble update --- ukcp_dp/constants.py | 2 +- ukcp_dp/vocab_manager/_vocab.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py index cd73fc7..2f157cf 100644 --- a/ukcp_dp/constants.py +++ b/ukcp_dp/constants.py @@ -180,7 +180,7 @@ def enum(**named_values): COLLECTION_GCM = "land-gcm" COLLECTION_RCM = "land-rcm" COLLECTION_RCM_MIN_YEAR = 1980 -COLLECTION_RCM_CORDEX = "cordex" +COLLECTION_RCM_CORDEX = "land-euro-cordex" COLLECTION_RCM_GWL = "land-rcm-gwl" COLLECTION_MARINE = "marine-sim" COLLECTION_MARINE_MIN_YEAR = 2007 diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index a06413e..df508da 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -188,7 +188,6 @@ def __init__(self): self.vocab["year_maximum"].update(_get_range(3001, 3052)) self.vocab["sampling_id"] = _get_range(1, 4001) self.vocab["random_sampling_count"] = _get_range(100, 4001) - self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"] self.vocab["sampling_percentile_2"] = self.vocab["sampling_percentile_1"] time_period = {"all": "all"} time_period.update(self.vocab["ann"]) @@ -213,6 +212,7 @@ def __init__(self): self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME) self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME] + self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"] self._load_cv(CV_Type.ADMIN_REGION) self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions" self._load_cv(CV_Type.COUNTRY) @@ -467,7 +467,8 @@ def _get_range(min_value, max_value): "26": ["26"], "27": ["27"], "28": ["28"], - "land-euro-cordex": list(_get_range(100, 166).values()), + "land-euro-cordex": list(_get_range(29, 34).values()) + + list(_get_range(100, 165).values()), } From 0a1fc69447659c9712143d0e2c373de58f314dc1 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 19 May 2022 11:28:43 +0100 Subject: [PATCH 18/20] update vocab for CORDEX --- ukcp_dp/vocab_manager/_vocab.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index df508da..c2c717d 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -212,7 +212,8 @@ def __init__(self): self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME) self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME] - self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"] + self._load_cv(CV_Type.ENSEMBLE_MEMBER) + self.vocab["highlighted_ensemble_members"] = self.vocab[CV_Type.ENSEMBLE_MEMBER] self._load_cv(CV_Type.ADMIN_REGION) self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions" self._load_cv(CV_Type.COUNTRY) From 5ebe3410d79c7d3e545f48ed1a1a05a9cb627069 Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Thu, 19 May 2022 11:28:43 +0100 Subject: [PATCH 19/20] update vocab for CORDEX --- ukcp_dp/vocab_manager/_vocab.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index c2c717d..d7b8c9f 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -468,8 +468,7 @@ def _get_range(min_value, max_value): "26": ["26"], "27": ["27"], "28": ["28"], - "land-euro-cordex": list(_get_range(29, 34).values()) - + list(_get_range(100, 165).values()), + "land-euro-cordex": list(_get_range(100, 165).values()), } From 66de34086e3478308d5e42b60df43438d979e58a Mon Sep 17 00:00:00 2001 From: Antony Wilson Date: Mon, 6 Jun 2022 11:14:35 +0100 Subject: [PATCH 20/20] rename land-euro-cordex to land-eurocordex --- ukcp_dp/constants.py | 2 +- ukcp_dp/vocab_manager/_vocab.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py index 2f157cf..49acba9 100644 --- a/ukcp_dp/constants.py +++ b/ukcp_dp/constants.py @@ -180,7 +180,7 @@ def enum(**named_values): COLLECTION_GCM = "land-gcm" COLLECTION_RCM = "land-rcm" COLLECTION_RCM_MIN_YEAR = 1980 -COLLECTION_RCM_CORDEX = "land-euro-cordex" +COLLECTION_RCM_CORDEX = "land-eurocordex" COLLECTION_RCM_GWL = "land-rcm-gwl" COLLECTION_MARINE = "marine-sim" COLLECTION_MARINE_MIN_YEAR = 2007 diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py index d7b8c9f..803d8c0 100644 --- a/ukcp_dp/vocab_manager/_vocab.py +++ b/ukcp_dp/vocab_manager/_vocab.py @@ -468,7 +468,7 @@ def _get_range(min_value, max_value): "26": ["26"], "27": ["27"], "28": ["28"], - "land-euro-cordex": list(_get_range(100, 165).values()), + "land-eurocordex": list(_get_range(100, 165).values()), }