From 6ed040695c410cd3c624958fc72988fa1ad1eea9 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 9 Feb 2022 16:24:27 +0000
Subject: [PATCH 01/20] changes for CORDEX

---
 ukcp_dp/constants.py                      |  1 +
 ukcp_dp/data_extractor/_data_extractor.py | 20 ++++++
 ukcp_dp/validator/_validator.py           |  3 +
 ukcp_dp/vocab_manager/_vocab.py           | 79 +++++++++++++++++++++++
 4 files changed, 103 insertions(+)

diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py
index f43a6eb..b098188 100644
--- a/ukcp_dp/constants.py
+++ b/ukcp_dp/constants.py
@@ -180,6 +180,7 @@ def enum(**named_values):
 COLLECTION_GCM = "land-gcm"
 COLLECTION_RCM = "land-rcm"
 COLLECTION_RCM_MIN_YEAR = 1980
+COLLECTION_RCM_CORDEX = "cordex"
 COLLECTION_RCM_GWL = "land-rcm-gwl"
 COLLECTION_MARINE = "marine-sim"
 COLLECTION_MARINE_MIN_YEAR = 2007
diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py
index 7ee378c..90d156c 100644
--- a/ukcp_dp/data_extractor/_data_extractor.py
+++ b/ukcp_dp/data_extractor/_data_extractor.py
@@ -12,6 +12,7 @@
 from iris.util import unify_time_units
 
 import cf_units
+import numpy as
 from ukcp_dp.constants import (
     COLLECTION_PROB,
     InputType,
@@ -266,6 +267,25 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal
         iris.experimental.equalise_cubes.equalise_attributes(cubes)
         unify_time_units(cubes)
 
+        if collection == COLLECTION_RCM:
+            # we need to update the type of ensemble_member_id in order to be able to
+            # process Met Office and CORDEX data together
+            for cube in cubes:
+                for ind, aux_coord in enumerate(cube.aux_coords):
+                    if aux_coord.var_name == "ensemble_member_id":
+                        if aux_coord.dtype == np.dtype("<U27"):
+                            # replace string23 with string46 to match CORDEX
+                            cube.remove_coord(aux_coord)
+                            value = aux_coord.points.astype(np.dtype("<U46"))
+                            ensemble_coord = iris.coords.AuxCoord(
+                                value,
+                                units=aux_coord.units,
+                                long_name=aux_coord.long_name,
+                                var_name="ensemble_member_id",
+                            )
+                            cube.add_aux_coord(ensemble_coord, ind)
+                        break
+
         try:
             cube = cubes.concatenate_cube()
         except iris.exceptions.ConcatenateError as ex:
diff --git a/ukcp_dp/validator/_validator.py b/ukcp_dp/validator/_validator.py
index 7e52688..a9442ee 100644
--- a/ukcp_dp/validator/_validator.py
+++ b/ukcp_dp/validator/_validator.py
@@ -14,6 +14,7 @@
     COLLECTION_OBS,
     COLLECTION_OBS_MIN_YEAR,
     COLLECTION_RCM,
+    COLLECTION_RCM_CORDEX,
     COLLECTION_RCM_MIN_YEAR,
     COLLECTION_RCM_GWL,
     EXTENDED_PROJECTIONS,
@@ -231,6 +232,8 @@ def _validate_ensembles(self, ensembles, input_type):
         allowed_ensembles = get_ensemble_member_set(
             self.input_data.get_value(InputType.COLLECTION)
         )
+        if self.input_data.get_value(InputType.COLLECTION) == COLLECTION_RCM:
+            allowed_ensembles.extend(get_ensemble_member_set(COLLECTION_RCM_CORDEX))
         if allowed_ensembles is None:
             raise Exception(
                 "Unable to get list of valid ensembles for {}".format(
diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index 7e955b3..2767db8 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -497,6 +497,85 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
+    "cordex": [
+        "29",
+        "30",
+        "31",
+        "32",
+        "33",
+        "AA",
+        "AB",
+        "AC",
+        "AD",
+        "AE",
+        "AF",
+        "AG",
+        "AH",
+        "AI",
+        "AJ",
+        "0A",
+        "0B",
+        "0C",
+        "0D",
+        "0E",
+        "0F",
+        "0G",
+        "0H",
+        "0I",
+        "0J",
+        "BB",
+        "BC",
+        "BD",
+        "BE",
+        "BF",
+        "BG",
+        "BH",
+        "CC",
+        "CE",
+        "CF",
+        "CG",
+        "DC",
+        "DE",
+        "DF",
+        "DG",
+        "EA",
+        "EB",
+        "EC",
+        "ED",
+        "EE",
+        "EF",
+        "EG",
+        "EH",
+        "EJ",
+        "FE",
+        "FF",
+        "FG",
+        "FI",
+        "FJ",
+        "GA",
+        "GB",
+        "GC",
+        "GD",
+        "GF",
+        "GG",
+        "GH",
+        "GJ",
+        "HC",
+        "HG",
+        "IC",
+        "IG",
+        "II",
+        "JA",
+        "JB",
+        "JC",
+        "JD",
+        "JE",
+        "JF",
+        "JG",
+        "JH",
+        "JI",
+        "JJ",
+    ],
 }
 
 

From 6d0eaa6e179af83f9b1d25700b284cdae8b20f03 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 9 Feb 2022 16:33:14 +0000
Subject: [PATCH 02/20] fix import typo

---
 ukcp_dp/data_extractor/_data_extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py
index 90d156c..dfe9ccd 100644
--- a/ukcp_dp/data_extractor/_data_extractor.py
+++ b/ukcp_dp/data_extractor/_data_extractor.py
@@ -12,7 +12,7 @@
 from iris.util import unify_time_units
 
 import cf_units
-import numpy as
+import numpy as np
 from ukcp_dp.constants import (
     COLLECTION_PROB,
     InputType,

From 9a7a90edb565f56116d10143a2fe6e81e3efe3c5 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 10 Feb 2022 08:32:48 +0000
Subject: [PATCH 03/20] fix issue with file finder introduced with
 COLLECTION_OBS

---
 ukcp_dp/file_finder/_file_finder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ukcp_dp/file_finder/_file_finder.py b/ukcp_dp/file_finder/_file_finder.py
index c7d64df..0df2fd3 100644
--- a/ukcp_dp/file_finder/_file_finder.py
+++ b/ukcp_dp/file_finder/_file_finder.py
@@ -74,12 +74,12 @@ def get_file_lists(input_data):
     ]:
         file_list["main"] = _get_cm_file_list(input_data)
 
-    elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS:
-        file_list["main"] = get_obs_file_list(input_data)
-
         if input_data.get_value(InputType.BASELINE) is not None:
             file_list["baseline"] = _get_file_list_for_baseline(input_data)
 
+    elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS:
+        file_list["main"] = get_obs_file_list(input_data)
+
     # the file list for an overlay of probability levels
     if (
         input_data.get_value(InputType.OVERLAY_PROBABILITY_LEVELS) is not None

From e781059b35162552e89c6f1cd5526c0d53140307 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 4 May 2022 16:01:17 +0100
Subject: [PATCH 04/20] Before concatenating cubes remove "month_number" from
 seasonal RCM data

---
 ukcp_dp/data_extractor/_data_extractor.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py
index dfe9ccd..5af36df 100644
--- a/ukcp_dp/data_extractor/_data_extractor.py
+++ b/ukcp_dp/data_extractor/_data_extractor.py
@@ -286,6 +286,17 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal
                             cube.add_aux_coord(ensemble_coord, ind)
                         break
 
+                # the UKCP regional seasonal data has month_number, lets remove it to
+                # match CORDEX
+                if (
+                    self.input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+                    == TemporalAverageType.SEASONAL
+                ):
+                    try:
+                        cube.remove_coord("month_number")
+                    except iris.exceptions.CoordinateNotFoundError:
+                        pass
+
         try:
             cube = cubes.concatenate_cube()
         except iris.exceptions.ConcatenateError as ex:

From 6ea747b71ba4d9fd0ffdda9b074a3ed1cb124a86 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 4 May 2022 16:01:52 +0100
Subject: [PATCH 05/20] CORDEX ensemble members are now 100-177

---
 ukcp_dp/vocab_manager/_vocab.py | 80 +--------------------------------
 1 file changed, 1 insertion(+), 79 deletions(-)

diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index 2767db8..8c4a783 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -497,85 +497,7 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
-    "cordex": [
-        "29",
-        "30",
-        "31",
-        "32",
-        "33",
-        "AA",
-        "AB",
-        "AC",
-        "AD",
-        "AE",
-        "AF",
-        "AG",
-        "AH",
-        "AI",
-        "AJ",
-        "0A",
-        "0B",
-        "0C",
-        "0D",
-        "0E",
-        "0F",
-        "0G",
-        "0H",
-        "0I",
-        "0J",
-        "BB",
-        "BC",
-        "BD",
-        "BE",
-        "BF",
-        "BG",
-        "BH",
-        "CC",
-        "CE",
-        "CF",
-        "CG",
-        "DC",
-        "DE",
-        "DF",
-        "DG",
-        "EA",
-        "EB",
-        "EC",
-        "ED",
-        "EE",
-        "EF",
-        "EG",
-        "EH",
-        "EJ",
-        "FE",
-        "FF",
-        "FG",
-        "FI",
-        "FJ",
-        "GA",
-        "GB",
-        "GC",
-        "GD",
-        "GF",
-        "GG",
-        "GH",
-        "GJ",
-        "HC",
-        "HG",
-        "IC",
-        "IG",
-        "II",
-        "JA",
-        "JB",
-        "JC",
-        "JD",
-        "JE",
-        "JF",
-        "JG",
-        "JH",
-        "JI",
-        "JJ",
-    ],
+    "cordex": list(_get_range(100, 177).values()),
 }
 
 

From 60c934c60dc673b85fdc3309c996823fc66a1999 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 31 Mar 2022 13:05:01 +0100
Subject: [PATCH 06/20] bump version to 2.7.0

---
 RELEASE_NOTES.md     | 11 +++++++++--
 ukcp_dp/constants.py |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 0f775f9..0dbc1f0 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,9 +1,16 @@
+# Release 2.7.0
+
+This release addresses:
+
+* Add support for HadUK Grid processes
+
+
 # Release 2.6.0
 
 This release addresses:
 
 * Performance improvements for writing CSV files
-* Work around for files with lat, ong of 0, 0
+* Work around for files with lat, long of 0, 0
 
 
 # Release 2.5.0
@@ -17,7 +24,7 @@ This release addresses:
 
 This release addresses:
 
-* Add option for users to get data as shapesiles for map products
+* Add option for users to get data as shape files for map products
 * Add option for users to set y-axis scale for plume plots
 
 
diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py
index b098188..cd73fc7 100644
--- a/ukcp_dp/constants.py
+++ b/ukcp_dp/constants.py
@@ -3,7 +3,7 @@
 import cartopy.crs as ccrs
 
 
-VERSION = "2.6.0"
+VERSION = "2.7.0"
 
 
 def enum(**named_values):

From f677989a3b74891f9ce0166e135b58f183618d75 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Tue, 5 Apr 2022 12:02:01 +0100
Subject: [PATCH 07/20] Add colour palettes for additional HadUK variables

---
 ukcp_dp/utils/_standards_class.py | 49 +++++++++++++++++++++++++++++++
 ukcp_dp/utils/_utils.py           | 28 ++++++++++++++++--
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/ukcp_dp/utils/_standards_class.py b/ukcp_dp/utils/_standards_class.py
index 385fd38..cd91762 100644
--- a/ukcp_dp/utils/_standards_class.py
+++ b/ukcp_dp/utils/_standards_class.py
@@ -632,6 +632,10 @@ def __repr__(self):
 UKCP_WIND.vmid = None
 UKCP_WIND.vstep = 2.0
 
+UKCP_WIND_OBS = UKCP_WIND.copy()
+UKCP_WIND_OBS.tag = "UKCP_wind_obs"
+UKCP_WIND_OBS.extendcolbar = "neither"
+UKCP_WIND_OBS.vmid = None
 
 # Wind speed anomalies.
 UKCP_WIND_ANOM = UKCP_WIND.copy()
@@ -827,6 +831,10 @@ def __repr__(self):
 UKCP_RELATIVE_HUMIDITY.vmid = 50.0
 UKCP_RELATIVE_HUMIDITY.vstep = 10.0
 
+UKCP_RELATIVE_HUMIDITY_OBS = UKCP_RELATIVE_HUMIDITY.copy()
+UKCP_RELATIVE_HUMIDITY_OBS.tag = "UKCP_rh_obs"
+UKCP_RELATIVE_HUMIDITY_OBS.extendcolbar = "neither"
+UKCP_RELATIVE_HUMIDITY_OBS.vmid = None
 
 UKCP_RELATIVE_HUMIDITY_ANOM = UKCP_RELATIVE_HUMIDITY.copy()
 UKCP_RELATIVE_HUMIDITY_ANOM.tag = "UKCP_rh_anom"
@@ -849,6 +857,11 @@ def __repr__(self):
 UKCP_PMSL_ANOM.vmid = 0.0
 UKCP_PMSL_ANOM.vstep = 5.0
 
+UKCP_PMSL_OBS = UKCP_PMSL_ANOM.copy()
+UKCP_PMSL_OBS.tag = "UKCP_pmsl_obs"
+UKCP_PMSL_OBS.extendcolbar = "neither"
+UKCP_PMSL_OBS.vmid = None
+
 # Amount of precipitation in 1 day
 UKCP_1DAY_PRECIP = UKCPNEAT.copy()
 UKCP_1DAY_PRECIP.tag = "UKCP_1day_precip"
@@ -909,3 +922,39 @@ def __repr__(self):
 UKCP_RAINFALL.extendcolbar = "neither"
 UKCP_RAINFALL.cpal = "Blues"
 UKCP_RAINFALL.vmid = None
+
+# Days of ground frost
+UKCP_GROUND_FROST = UKCPNEAT.copy()
+UKCP_GROUND_FROST.tag = "UKCP_groundfrost"
+UKCP_GROUND_FROST.default_barlabel = "Days of ground frost, days"
+UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("day")
+UKCP_GROUND_FROST.extendcolbar = "neither"
+UKCP_GROUND_FROST.cpal = "Blues"
+UKCP_GROUND_FROST.vmid = None
+
+# Days of snow lying
+UKCP_SNOW_LYING = UKCPNEAT.copy()
+UKCP_SNOW_LYING.tag = "UKCP_snowLying"
+UKCP_SNOW_LYING.default_barlabel = "Days of snow lying, days"
+UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("day")
+UKCP_SNOW_LYING.extendcolbar = "neither"
+UKCP_SNOW_LYING.cpal = "Blues"
+UKCP_SNOW_LYING.vmid = None
+
+# Sunshine hours
+UKCP_SUN = UKCPNEAT.copy()
+UKCP_SUN.tag = "UKCP_sun"
+UKCP_SUN.default_barlabel = "Sunshine, hours"
+UKCP_SUN.preferred_unit = cf_units.Unit("hr")
+UKCP_SUN.extendcolbar = "neither"
+UKCP_SUN.cpal = "Blues"
+UKCP_SUN.vmid = None
+
+# Vapour pressure (hPa)
+UKCP_VAPOUR_PRESSURE = UKCPNEAT.copy()
+UKCP_VAPOUR_PRESSURE.tag = "UKCP_sun"
+UKCP_VAPOUR_PRESSURE.default_barlabel = "Vapour pressure, hPa"
+UKCP_VAPOUR_PRESSURE.preferred_unit = cf_units.Unit("hPa")
+UKCP_VAPOUR_PRESSURE.extendcolbar = "neither"
+UKCP_VAPOUR_PRESSURE.cpal = "Blues"
+UKCP_VAPOUR_PRESSURE.vmid = None
diff --git a/ukcp_dp/utils/_utils.py b/ukcp_dp/utils/_utils.py
index d9c3cd9..708cca9 100644
--- a/ukcp_dp/utils/_utils.py
+++ b/ukcp_dp/utils/_utils.py
@@ -64,7 +64,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection):
 
     elif "sfcWind" in var_id or "wsgmax10m" in var_id:
         # Wind speed at 10m (m s-1)
-        if "Anom" in var_id:
+        if collection == COLLECTION_OBS:
+            plot_settings = stds.UKCP_WIND_OBS.copy()
+        elif "Anom" in var_id:
             plot_settings = stds.UKCP_WIND_ANOM.copy()
         else:
             plot_settings = stds.UKCP_WIND.copy()
@@ -93,7 +95,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection):
 
     elif "hurs" in var_id:
         # Relative humidity at 1.5m (%)
-        if "Anom" in var_id:
+        if collection == COLLECTION_OBS:
+            plot_settings = stds.UKCP_RELATIVE_HUMIDITY_OBS.copy()
+        elif "Anom" in var_id:
             plot_settings = stds.UKCP_RELATIVE_HUMIDITY_ANOM.copy()
         else:
             plot_settings = stds.UKCP_RELATIVE_HUMIDITY.copy()
@@ -107,7 +111,9 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection):
 
     elif "psl" in var_id:
         # Sea level pressure (hPa)
-        if "Anom" in var_id:
+        if collection == COLLECTION_OBS:
+            plot_settings = stds.UKCP_PMSL_OBS.copy()
+        elif "Anom" in var_id:
             plot_settings = stds.UKCP_PMSL_ANOM.copy()
         else:
             # TODO do we need a non-ANOM version?
@@ -141,6 +147,22 @@ def get_plot_settings(vocab, cmsize, fsize, var_id, extreme, collection):
         else:
             plot_settings = stds.UKCP_SWRAD_NET_MONTHLY.copy()
 
+    elif var_id == "groundfrost":
+        # Days of ground frost (days)
+        plot_settings = stds.UKCP_GROUND_FROST.copy()
+
+    elif var_id == "pv":
+        # Vapour pressure (hPa)
+        plot_settings = stds.UKCP_VAPOUR_PRESSURE.copy()
+
+    elif var_id == "snowLying":
+        # Days of snow lying (days)
+        plot_settings = stds.UKCP_SNOW_LYING.copy()
+
+    elif var_id == "sun":
+        # Sunshine hours (h)
+        plot_settings = stds.UKCP_SUN.copy()
+
     else:
         plot_settings = stds.UKCPNEAT.copy()
 

From cf44d10a36f1db678de9d6b05bde21b5ae2fdf52 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Tue, 5 Apr 2022 16:07:12 +0100
Subject: [PATCH 08/20] fix issue with file selection for HadUK area

---
 ukcp_dp/file_finder/_land_obs.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/ukcp_dp/file_finder/_land_obs.py b/ukcp_dp/file_finder/_land_obs.py
index f054d1b..aa38d11 100644
--- a/ukcp_dp/file_finder/_land_obs.py
+++ b/ukcp_dp/file_finder/_land_obs.py
@@ -121,8 +121,20 @@ def _get_date_ranges(input_data, variable):
     if input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y":
         return ["196101-199012", "198101-201012"]
 
-    # assume this must be a region selection
+    end_year = "2020"
+
+    if variable in ["tas", "tasmax", "tasmin"]:
+        start_year = "1884"
     if variable == "rainfall":
-        return ["186201-202012"]
+        start_year = "1862"
+    if variable == "sun":
+        start_year = "1919"
+    if variable == "sfcWind":
+        start_year = "1969"
+    if variable in ["psl", "hurs", "pv", "groundfrost"]:
+        start_year = "1961"
+    if variable == "snowLying":
+        start_year = "1971"
+
+    return [f"{start_year}01-{end_year}12"]
 
-    return ["188401-202012"]

From eea293a76a4e718db4ab1713f1838f9929a7ab3c Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Tue, 5 Apr 2022 16:11:10 +0100
Subject: [PATCH 09/20] update standard units for ground frost and snow lying

---
 ukcp_dp/utils/_standards_class.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ukcp_dp/utils/_standards_class.py b/ukcp_dp/utils/_standards_class.py
index cd91762..bd8c614 100644
--- a/ukcp_dp/utils/_standards_class.py
+++ b/ukcp_dp/utils/_standards_class.py
@@ -927,7 +927,7 @@ def __repr__(self):
 UKCP_GROUND_FROST = UKCPNEAT.copy()
 UKCP_GROUND_FROST.tag = "UKCP_groundfrost"
 UKCP_GROUND_FROST.default_barlabel = "Days of ground frost, days"
-UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("day")
+UKCP_GROUND_FROST.preferred_unit = cf_units.Unit("1.0")
 UKCP_GROUND_FROST.extendcolbar = "neither"
 UKCP_GROUND_FROST.cpal = "Blues"
 UKCP_GROUND_FROST.vmid = None
@@ -936,7 +936,7 @@ def __repr__(self):
 UKCP_SNOW_LYING = UKCPNEAT.copy()
 UKCP_SNOW_LYING.tag = "UKCP_snowLying"
 UKCP_SNOW_LYING.default_barlabel = "Days of snow lying, days"
-UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("day")
+UKCP_SNOW_LYING.preferred_unit = cf_units.Unit("1.0")
 UKCP_SNOW_LYING.extendcolbar = "neither"
 UKCP_SNOW_LYING.cpal = "Blues"
 UKCP_SNOW_LYING.vmid = None
@@ -952,7 +952,7 @@ def __repr__(self):
 
 # Vapour pressure (hPa)
 UKCP_VAPOUR_PRESSURE = UKCPNEAT.copy()
-UKCP_VAPOUR_PRESSURE.tag = "UKCP_sun"
+UKCP_VAPOUR_PRESSURE.tag = "UKCP_pv"
 UKCP_VAPOUR_PRESSURE.default_barlabel = "Vapour pressure, hPa"
 UKCP_VAPOUR_PRESSURE.preferred_unit = cf_units.Unit("hPa")
 UKCP_VAPOUR_PRESSURE.extendcolbar = "neither"

From 4724189a0512d59ebbb4c06f072968811052c44d Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 6 Apr 2022 12:11:35 +0100
Subject: [PATCH 10/20] update single map plot

fix issue where the range of values to be plotted is 0
---
 ukcp_dp/plotters/_single_map_plotter.py | 37 ++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/ukcp_dp/plotters/_single_map_plotter.py b/ukcp_dp/plotters/_single_map_plotter.py
index 26090fa..4b955f9 100644
--- a/ukcp_dp/plotters/_single_map_plotter.py
+++ b/ukcp_dp/plotters/_single_map_plotter.py
@@ -40,14 +40,26 @@ def _generate_subplots(self, cube, plot_settings, fig):
         """
         LOG.debug("_generate_subplots")
 
+        plot_settings.vrange, plot_settings.vstep = self._get_data_range(cube)
+        plot_settings.vmid = _get_mid_point(plot_settings.vrange)
+
         if self._is_landscape(cube, 1.25) is True:
             gs_top = 0.79
             gs_bottom = 0.14
             gs_left = 0.02
             gs_right = 0.98
 
-            # Position of the colour-bar Axes: [left,bottom, width,height]
-            plot_settings.bar_position = [0.25, 0.08, 0.5, 0.025]
+            if (
+                plot_settings.vstep == 1
+                and plot_settings.vrange[1] - plot_settings.vrange[0] == 2
+            ):
+                # special case
+                # Position of the colour-bar Axes: [left,bottom, width,height]
+                plot_settings.bar_position = [0.25, 0.08, 0.2, 0.025]
+            else:
+                # Position of the colour-bar Axes: [left,bottom, width,height]
+                plot_settings.bar_position = [0.25, 0.08, 0.5, 0.025]
+
             plot_settings.bar_orientation = "horizontal"
 
         else:  # portrait
@@ -56,16 +68,23 @@ def _generate_subplots(self, cube, plot_settings, fig):
             gs_left = 0.15
             gs_right = 0.8
 
-            # Position of the colour-bar Axes: [left,bottom, width,height]
-            plot_settings.bar_position = [0.82, 0.25, 0.025, 0.5]
+            if (
+                plot_settings.vstep == 1
+                and plot_settings.vrange[1] - plot_settings.vrange[0] == 2
+            ):
+                # special case
+                # Position of the colour-bar Axes: [left,bottom, width,height]
+                plot_settings.bar_position = [0.82, 0.25, 0.025, 0.2]
+            else:
+                # special case
+                # Position of the colour-bar Axes: [left,bottom, width,height]
+                plot_settings.bar_position = [0.82, 0.25, 0.025, 0.5]
+
             plot_settings.bar_orientation = "vertical"
 
         grid_spec = gridspec.GridSpec(1, 1)
         grid_spec.update(top=gs_top, bottom=gs_bottom, left=gs_left, right=gs_right)
 
-        plot_settings.vrange, plot_settings.vstep = self._get_data_range(cube)
-        plot_settings.vmid = _get_mid_point(plot_settings.vrange)
-
         result = self._add_sub_plot(fig, grid_spec[0, 0], plot_settings, cube)
 
         return result
@@ -106,6 +125,10 @@ def _get_data_range(self, cube):
         if step > 2 and cube_min + (step * 10) > cube_max:
             cube_max = cube_min + (step * 10)
 
+        if cube_min == cube_max:
+            cube_max = cube_max + 2
+            step = 1
+
         return [cube_min, cube_max], step
 
     def _add_sub_plot(self, fig, grid, plot_settings, data):

From 54c2208f51571c772aa978333800124ee663b9c4 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 7 Apr 2022 15:01:09 +0100
Subject: [PATCH 11/20] land-cmp mon-20y files now contain yyyymm aux coord

The exception to this is the wsgmax10m/mon-20y files
---
 ukcp_dp/data_extractor/_data_extractor.py |  1 +
 ukcp_dp/data_extractor/_utils.py          | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py
index 5af36df..a465522 100644
--- a/ukcp_dp/data_extractor/_data_extractor.py
+++ b/ukcp_dp/data_extractor/_data_extractor.py
@@ -156,6 +156,7 @@ def _get_anomaly_cube(self, file_list, climatology_file_list):
             self.input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
             self.input_data.get_value(InputType.TIME_PERIOD),
             self.input_data.get_value(InputType.COLLECTION),
+            self.input_data.get_value(InputType.VARIABLE),
         )
 
         return anomaly
diff --git a/ukcp_dp/data_extractor/_utils.py b/ukcp_dp/data_extractor/_utils.py
index 35b0ccd..bd2801e 100644
--- a/ukcp_dp/data_extractor/_utils.py
+++ b/ukcp_dp/data_extractor/_utils.py
@@ -24,6 +24,7 @@ def get_anomaly(
     temporal_average_type,
     time_period,
     collection,
+    variable,
 ):
     """
     Generate a cube containing the anomaly values.
@@ -39,6 +40,7 @@ def get_anomaly(
         type
     @param time_period(str): the name of a month or season or 'all'
     @param collection(str): the collection
+    @param variable(str): the variable
     """
     if temporal_average_type == TemporalAverageType.MONTHLY:
         periods = _get_selected_month_numbers(time_period)
@@ -75,10 +77,11 @@ def get_anomaly(
             except iris.exceptions.CoordinateNotFoundError:
                 pass
             if collection == COLLECTION_CPM:
-                try:
-                    cube_absoute_period.remove_coord("yyyymm")
-                except iris.exceptions.CoordinateNotFoundError:
-                    pass
+                if variable == "wsgmax10m":
+                    try:
+                        cube_absoute_period.remove_coord("yyyymm")
+                    except iris.exceptions.CoordinateNotFoundError:
+                        pass
                 try:
                     cube_climatology_period.remove_coord("year")
                 except iris.exceptions.CoordinateNotFoundError:

From e8753570b0e5ce1e04598766f8813a791a9e2343 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Fri, 8 Apr 2022 12:02:33 +0100
Subject: [PATCH 12/20] CPM data now uses `latitude` inplace of `grid_latitude`

The excption is `wsgmax10m`, which still uses `latitude`
---
 ukcp_dp/plotters/_postage_stamp_map_plotter.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ukcp_dp/plotters/_postage_stamp_map_plotter.py b/ukcp_dp/plotters/_postage_stamp_map_plotter.py
index 13f3f40..d78874d 100644
--- a/ukcp_dp/plotters/_postage_stamp_map_plotter.py
+++ b/ukcp_dp/plotters/_postage_stamp_map_plotter.py
@@ -222,10 +222,12 @@ def _plot_maps_mean_order(self, cube, fig, grid, plot_settings, title_font_size)
         if self.input_data.get_area_type() == AreaType.BBOX:
 
             if self.input_data.get_value(InputType.COLLECTION) in [
-                COLLECTION_CPM,
                 COLLECTION_RCM,
                 COLLECTION_RCM_GWL,
-            ]:
+            ] or (
+                self.input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM
+                and self.input_data.get_value(InputType.VARIABLE) == "wsgmax10m"
+            ):
                 # RCM is on a rotated grid
                 ensemble_mean_cube = cube.collapsed(
                     [

From ef8ca3bdd5a4c4ebd977d724b49d09947cdd0337 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 5 May 2022 13:52:59 +0100
Subject: [PATCH 13/20] fix issue with add_aux_coord in data extractor

---
 ukcp_dp/data_extractor/_data_extractor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ukcp_dp/data_extractor/_data_extractor.py b/ukcp_dp/data_extractor/_data_extractor.py
index a465522..7160d21 100644
--- a/ukcp_dp/data_extractor/_data_extractor.py
+++ b/ukcp_dp/data_extractor/_data_extractor.py
@@ -272,7 +272,7 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal
             # we need to update the type of ensemble_member_id in order to be able to
             # process Met Office and CORDEX data together
             for cube in cubes:
-                for ind, aux_coord in enumerate(cube.aux_coords):
+                for aux_coord in cube.aux_coords:
                     if aux_coord.var_name == "ensemble_member_id":
                         if aux_coord.dtype == np.dtype("<U27"):
                             # replace string23 with string46 to match CORDEX
@@ -284,7 +284,7 @@ def _get_cube(self, file_list, climatology=False, overlay_probability_levels=Fal
                                 long_name=aux_coord.long_name,
                                 var_name="ensemble_member_id",
                             )
-                            cube.add_aux_coord(ensemble_coord, ind)
+                            cube.add_aux_coord(ensemble_coord, 0)
                         break
 
                 # the UKCP regional seasonal data has month_number, lets remove it to

From 34df2eef45b4464dd01769fa8cfa2d3aef91abc1 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Tue, 17 May 2022 16:12:19 +0100
Subject: [PATCH 14/20] Make better use of CVs

---
 ukcp_dp/vocab_manager/_vocab.py | 36 +++------------------------------
 1 file changed, 3 insertions(+), 33 deletions(-)

diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index 8c4a783..a06413e 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -6,38 +6,6 @@
 class Vocab:
 
     VOCAB = {
-        "ensemble": {
-            # equivalent to UKCP18_ensemble_member
-            # UKCP18 values are full values
-            "01": "00000",
-            "02": "00605",
-            "03": "00834",
-            "04": "01113",
-            "05": "01554",
-            "06": "01649",
-            "07": "01843",
-            "08": "01935",
-            "09": "02123",
-            "10": "02242",
-            "11": "02305",
-            "12": "02335",
-            "13": "02491",
-            "14": "02832",
-            "15": "02868",
-            "16": "bcc-csm1-1",
-            "17": "CCSM4",
-            "18": "CESM1-BGC",
-            "19": "CanESM2",
-            "20": "CMCC-CM",
-            "21": "CNRM-CM5",
-            "22": "EC-EARTH",
-            "23": "ACCESS1-3",
-            "24": "HadGEM2-ES",
-            "25": "IPSL-CM5A-MR",
-            "26": "MPI-ESM-MR",
-            "27": "MRI-CGCM3",
-            "28": "GFDL-ESM2G",
-        },
         "spatial_representation": {
             # equivalent to UKCP18_resolution
             # no grid in UKCP18_resolution
@@ -243,6 +211,8 @@ def __init__(self):
         self._load_cv(CV_Type.SCENARIO)
         self._load_cv(CV_Type.TIME_SLICE_TYPE)
 
+        self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME)
+        self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME]
         self._load_cv(CV_Type.ADMIN_REGION)
         self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions"
         self._load_cv(CV_Type.COUNTRY)
@@ -497,7 +467,7 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
-    "cordex": list(_get_range(100, 177).values()),
+    "land-euro-cordex": list(_get_range(100, 166).values()),
 }
 
 

From d3306ca4b097cd8d02de60a0a1a863ea8244ea03 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 18 May 2022 13:38:42 +0100
Subject: [PATCH 15/20] refactor file finder

---
 ukcp_dp/file_finder/_file_finder.py | 531 +---------------------------
 ukcp_dp/file_finder/_land_cm.py     | 270 ++++++++++++++
 ukcp_dp/file_finder/_land_prob.py   | 286 +++++++++++++++
 3 files changed, 564 insertions(+), 523 deletions(-)
 create mode 100644 ukcp_dp/file_finder/_land_cm.py
 create mode 100644 ukcp_dp/file_finder/_land_prob.py

diff --git a/ukcp_dp/file_finder/_file_finder.py b/ukcp_dp/file_finder/_file_finder.py
index 0df2fd3..0df4837 100644
--- a/ukcp_dp/file_finder/_file_finder.py
+++ b/ukcp_dp/file_finder/_file_finder.py
@@ -6,11 +6,9 @@
 import os
 
 from ukcp_dp.constants import (
-    DATA_DIR,
     DATA_SERVICE_URL,
     COLLECTION_OBS,
     COLLECTION_PROB,
-    COLLECTION_PROB_MIN_YEAR,
     COLLECTION_CPM,
     COLLECTION_DERIVED,
     COLLECTION_GCM,
@@ -18,11 +16,10 @@
     COLLECTION_RCM_GWL,
     COLLECTION_MARINE,
     InputType,
-    OTHER_MAX_YEAR,
-    AreaType,
-    TemporalAverageType,
 )
 
+from ._land_cm import get_cm_file_list
+from ._land_prob import get_prob_file_list
 from ._land_obs import get_obs_file_list
 
 
@@ -63,7 +60,7 @@ def get_file_lists(input_data):
         COLLECTION_PROB,
         COLLECTION_MARINE,
     ]:
-        file_list["main"] = _get_prob_file_list(input_data)
+        file_list["main"] = get_prob_file_list(input_data)
 
     elif input_data.get_value(InputType.COLLECTION) in [
         COLLECTION_CPM,
@@ -72,10 +69,12 @@ def get_file_lists(input_data):
         COLLECTION_RCM,
         COLLECTION_RCM_GWL,
     ]:
-        file_list["main"] = _get_cm_file_list(input_data)
+        file_list["main"] = get_cm_file_list(input_data, None)
 
         if input_data.get_value(InputType.BASELINE) is not None:
-            file_list["baseline"] = _get_file_list_for_baseline(input_data)
+            file_list["baseline"] = get_cm_file_list(
+                input_data, input_data.get_value(InputType.BASELINE)
+            )
 
     elif input_data.get_value(InputType.COLLECTION) == COLLECTION_OBS:
         file_list["main"] = get_obs_file_list(input_data)
@@ -88,7 +87,7 @@ def get_file_lists(input_data):
         if input_data.get_value(InputType.COLLECTION) == COLLECTION_PROB:
             file_list_overlay = file_list["main"]
         else:
-            file_list_overlay = _get_prob_file_list(input_data)
+            file_list_overlay = get_prob_file_list(input_data)
 
         if len(file_list_overlay) == 1:
             file_list["overlay"] = file_list_overlay
@@ -123,517 +122,3 @@ def _get_absolute_path(file_path):
     path = DATA_SERVICE_URL + path
     path = path.rstrip("*")
     return path
-
-
-def _get_prob_file_list(input_data):
-    """
-    Get a list of files based on the data provided in the input data. As this
-    may be the file list for the overlay, some fields are not from the user
-    input.
-
-    @param input_data (InputData): an InputData object
-
-    @return a dict where
-        key: (str) variable name
-        value: list of lists where:
-            each list is a list of files per scenario, per variable, including
-            their full paths
-    """
-    variables = input_data.get_value(InputType.VARIABLE)
-
-    spatial_representation = _get_prob_spatial_representation(input_data)
-
-    file_lists_per_variable = {}
-
-    # if this is a selection for on overlay then the dates will not have been
-    # validated against this dataset. Check the dates and adjust the minimum if
-    # needed
-    year_maximum = input_data.get_value(InputType.YEAR_MAXIMUM)
-    year_minimum = input_data.get_value(InputType.YEAR_MINIMUM)
-    if year_maximum < COLLECTION_PROB_MIN_YEAR:
-        return {}
-
-    if year_minimum < COLLECTION_PROB_MIN_YEAR:
-        year_minimum = COLLECTION_PROB_MIN_YEAR
-
-    # December's data is included with the next year so if a single year has
-    # been selected
-    if year_minimum == year_maximum:
-        year_maximum = year_maximum + 1
-
-    for variable in variables:
-        # generate a list of files for each variable
-        # NB the marine data are all annual
-
-        file_list_per_scenario = []
-        for scenario in input_data.get_value(InputType.SCENARIO):
-            file_list_per_scenario.extend(
-                _get_file_list_per_scenario(
-                    input_data,
-                    scenario,
-                    spatial_representation,
-                    variable,
-                    year_minimum,
-                    year_maximum,
-                )
-            )
-
-        file_lists_per_variable[variable] = file_list_per_scenario
-
-    return file_lists_per_variable
-
-
-def _get_file_list_per_scenario(
-    input_data, scenario, spatial_representation, variable, year_minimum, year_maximum
-):
-    # generate a list of files for each scenario
-    file_list_per_data_type = []
-    for data_type in input_data.get_value(InputType.DATA_TYPE):
-        file_path = _get_prob_file_path(
-            data_type, input_data, scenario, spatial_representation, variable
-        )
-
-        if (
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-            == TemporalAverageType.ANNUAL
-            or spatial_representation != "25km"
-            or (input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE)
-        ):
-            # current thinking is that there will only be one file
-            file_name = "*"
-            file_list_per_data_type.append([os.path.join(file_path, file_name)])
-
-        elif input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y":
-            scenario_file_list = []
-
-            for year in range(year_minimum, (year_maximum + 1)):
-                # We cannot check for COLLECTION_PROB as this may be an
-                # overlay
-                if (
-                    input_data.get_value(InputType.COLLECTION) != COLLECTION_MARINE
-                    and year == OTHER_MAX_YEAR
-                ):
-                    # there is not data for December of the last year
-                    continue
-                file_name = _get_prob_file_name_for_year(
-                    data_type,
-                    input_data,
-                    scenario,
-                    spatial_representation,
-                    variable,
-                    year,
-                )
-                scenario_file_list.append(os.path.join(file_path, file_name))
-
-            file_list_per_data_type.append(scenario_file_list)
-
-        else:
-            # InputType.TIME_SLICE_TYPE) == '20y' or '30y'
-            file_name = _get_prob_file_name_for_slice(
-                data_type, input_data, scenario, spatial_representation, variable
-            )
-            file_list_per_data_type.append([os.path.join(file_path, file_name)])
-
-    return file_list_per_data_type
-
-
-def _get_prob_spatial_representation(input_data):
-    spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION)
-
-    if spatial_representation == AreaType.RIVER_BASIN:
-        spatial_representation = RIVER
-    elif spatial_representation == AreaType.ADMIN_REGION:
-        spatial_representation = REGION
-    elif spatial_representation == AreaType.COUNTRY:
-        pass
-    else:
-        # we cannot rely on the input value as this file list may be for the
-        # overlay
-        spatial_representation = "25km"
-
-    return spatial_representation
-
-
-def _get_prob_file_path(
-    data_type, input_data, scenario, spatial_representation, variable
-):
-
-    if input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE:
-
-        file_path = os.path.join(
-            DATA_DIR,
-            COLLECTION_MARINE,
-            input_data.get_value(InputType.METHOD),
-            scenario,
-            variable,
-            VERSION,
-        )
-    else:
-
-        file_path = os.path.join(
-            DATA_DIR,
-            COLLECTION_PROB,
-            "uk",
-            spatial_representation,
-            scenario,
-            data_type,
-            input_data.get_value(InputType.BASELINE),
-            input_data.get_value(InputType.TIME_SLICE_TYPE),
-            variable,
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
-            VERSION,
-        )
-
-    return file_path
-
-
-def _get_prob_file_name_for_year(
-    data_type, input_data, scenario, spatial_representation, variable, year
-):
-
-    # input_data.get_value(InputType.TIME_SLICE_TYPE) == '1y':
-    # the year starts in December, so subtract 1 from the year
-    start_date = "{year}{mon_day}".format(year=year - 1, mon_day=START_MONTH_DAY)
-    end_date = "{year}{mon_day}".format(year=year, mon_day=END_MONTH_DAY)
-
-    return _get_prob_file_name(
-        data_type,
-        input_data,
-        scenario,
-        spatial_representation,
-        variable,
-        start_date,
-        end_date,
-    )
-
-
-def _get_prob_file_name_for_slice(
-    data_type, input_data, scenario, spatial_representation, variable
-):
-    # input_data.get_value(InputType.TIME_SLICE_TYPE) == 20y or 30y
-    start_date = "20091201"
-    end_date = "20991130"
-
-    return _get_prob_file_name(
-        data_type,
-        input_data,
-        scenario,
-        spatial_representation,
-        variable,
-        start_date,
-        end_date,
-    )
-
-
-def _get_prob_file_name(
-    data_type,
-    input_data,
-    scenario,
-    spatial_representation,
-    variable,
-    start_date,
-    end_date,
-):
-
-    return_period = input_data.get_value(InputType.RETURN_PERIOD)
-
-    if return_period is None:
-        file_name = (
-            "{variable}_{scenario}_{collection}_uk_"
-            "{spatial_representation}_{data_type}_{baseline}_"
-            "{time_slice_type}_{temporal_type}_{start_data}-"
-            "{end_date}.nc".format(
-                variable=variable,
-                scenario=scenario,
-                collection=COLLECTION_PROB,
-                spatial_representation=spatial_representation,
-                data_type=data_type,
-                baseline=input_data.get_value(InputType.BASELINE),
-                time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE),
-                temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
-                start_data=start_date,
-                end_date=end_date,
-            )
-        )
-
-    else:
-        file_name = (
-            "{variable}_{return_period}_{scenario}_{collection}_uk_"
-            "{spatial_representation}_{data_type}_{baseline}_"
-            "{time_slice_type}_{temporal_type}_{start_data}-"
-            "{end_date}.nc".format(
-                variable=variable,
-                return_period=return_period,
-                scenario=scenario,
-                collection=COLLECTION_PROB,
-                spatial_representation=spatial_representation,
-                data_type=data_type,
-                baseline=input_data.get_value(InputType.BASELINE),
-                time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE),
-                temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
-                start_data=start_date,
-                end_date=end_date,
-            )
-        )
-
-    return file_name
-
-
-def _get_cm_file_list(input_data):
-    """
-    Get a list of files based on the data provided in the input data.
-
-    @param input_data (InputData): an InputData object
-
-    @return a dict where
-        key: (str) variable name
-        value: list of lists where:
-            each list is a list of files per scenario, per variable, including
-            their full paths
-    """
-    return _get_cm_file_list_for_range(input_data, None)
-
-
-def _get_file_list_for_baseline(input_data):
-    """
-    Get a list of files for the baseline based on the data provided in the
-    input data.
-
-    @param input_data (InputData): an InputData object
-
-    @return a dict where
-        key: (str) variable name
-        value: list of lists where:
-            each list is a list of files per scenario, per variable, including
-            their full paths
-    """
-    baseline = input_data.get_value(InputType.BASELINE)
-    return _get_cm_file_list_for_range(input_data, baseline)
-
-
-def _get_cm_file_list_for_range(input_data, baseline):
-    variables = input_data.get_value(InputType.VARIABLE)
-
-    spatial_representation = _get_cm_spatial_representation(input_data)
-
-    file_lists_per_variable = {}
-
-    for variable in variables:
-        # generate a list of files for each variable
-        # we need to use the variable root and calculate the anomaly later
-        variable_prefix = variable.split("Anom")[0]
-
-        file_list_per_scenario = []
-        for scenario in input_data.get_value(InputType.SCENARIO):
-            # generate a list of files for each scenario
-
-            ensemble_file_list = []
-            for ensemble in input_data.get_value(InputType.ENSEMBLE):
-                file_path = _get_cm_file_path(
-                    input_data,
-                    spatial_representation,
-                    variable_prefix,
-                    scenario,
-                    ensemble,
-                    baseline,
-                )
-
-                if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in [
-                    "1hr",
-                    "3hr",
-                ]:
-                    # we need lots of files
-                    for year in range(
-                        input_data.get_value(InputType.YEAR_MINIMUM) - 1,
-                        input_data.get_value(InputType.YEAR_MAXIMUM),
-                    ):
-                        for month in range(0, 12):
-                            if (
-                                year == input_data.get_value(InputType.YEAR_MINIMUM) - 1
-                                and month < 11
-                            ):
-                                continue
-                            if (
-                                year == input_data.get_value(InputType.YEAR_MAXIMUM) - 1
-                                and month > 10
-                            ):
-                                continue
-                            date_range = "{year}{month}01-{year}{month_end}30".format(
-                                year=year,
-                                month=MONTH_NUMBERS[month],
-                                month_end=MONTH_NUMBERS[month],
-                            )
-                            file_name = _get_cm_file_name(
-                                input_data,
-                                spatial_representation,
-                                variable_prefix,
-                                scenario,
-                                ensemble,
-                                baseline,
-                                date_range,
-                            )
-                            ensemble_file_list.append(
-                                os.path.join(file_path, file_name)
-                            )
-                else:
-                    file_name = _get_cm_file_name(
-                        input_data,
-                        spatial_representation,
-                        variable_prefix,
-                        scenario,
-                        ensemble,
-                        baseline,
-                    )
-                    ensemble_file_list.append(os.path.join(file_path, file_name))
-
-            file_list_per_scenario.append(ensemble_file_list)
-
-        file_lists_per_variable[variable] = file_list_per_scenario
-
-    return file_lists_per_variable
-
-
-def _get_cm_spatial_representation(input_data):
-    spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION)
-
-    if spatial_representation == AreaType.RIVER_BASIN:
-        spatial_representation = RIVER
-    elif spatial_representation == AreaType.ADMIN_REGION:
-        spatial_representation = REGION
-    return spatial_representation
-
-
-def _get_cm_file_path(
-    input_data, spatial_representation, variable, scenario, ensemble, baseline
-):
-    if baseline is None and input_data.get_value(InputType.TIME_SLICE_TYPE) is None:
-        temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-
-    elif (
-        baseline == "b8100" or input_data.get_value(InputType.TIME_SLICE_TYPE) == "20y"
-    ):
-        temporal_average_type = "{}-20y".format(
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-        )
-
-    elif (
-        baseline == "b6190"
-        or baseline == "b8110"
-        or input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y"
-    ):
-        temporal_average_type = "{}-30y".format(
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-        )
-
-    else:
-        temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-
-    collection = input_data.get_value(InputType.COLLECTION)
-
-    if baseline is not None and scenario in ["gwl2", "gwl4"]:
-        # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
-        collection = COLLECTION_GCM
-        scenario = "rcp85"
-
-    file_path = os.path.join(
-        DATA_DIR,
-        collection,
-        "uk",
-        spatial_representation,
-        scenario,
-        ensemble,
-        variable,
-        temporal_average_type,
-        VERSION,
-    )
-
-    return file_path
-
-
-def _get_cm_file_name(
-    input_data,
-    spatial_representation,
-    variable,
-    scenario,
-    ensemble,
-    baseline,
-    year=None,
-):
-    if baseline is None:
-        if (
-            input_data.get_value(InputType.TIME_SLICE_TYPE) is None
-            or input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y"
-        ) and input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) not in [
-            "1hr",
-            "3hr",
-        ]:
-            # there will only be one file
-            return "*"
-
-        if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]:
-            temporal_average_type = input_data.get_value(
-                InputType.TEMPORAL_AVERAGE_TYPE
-            )
-        else:
-            temporal_average_type = "{}-{}".format(
-                input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
-                input_data.get_value(InputType.TIME_SLICE_TYPE),
-            )
-
-        if input_data.get_value(InputType.COLLECTION) in [
-            COLLECTION_GCM,
-            COLLECTION_DERIVED,
-        ]:
-            date_range = "200912-209911"
-        elif input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM:
-            if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]:
-                date_range = "{}".format(year)
-            elif input_data.get_value(InputType.YEAR_MINIMUM) == 1981:
-                date_range = "198012-200011"
-            elif input_data.get_value(InputType.YEAR_MINIMUM) == 2021:
-                date_range = "202012-204011"
-            elif input_data.get_value(InputType.YEAR_MINIMUM) == 2061:
-                date_range = "206012-208011"
-        else:
-            date_range = "200912-207911"
-
-    elif baseline == "b8100":
-        temporal_average_type = "{}-20y".format(
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-        )
-        date_range = "198012-200011"
-
-    elif baseline == "b6190":
-        temporal_average_type = "{}-30y".format(
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-        )
-        date_range = "196012-199011"
-
-    elif baseline == "b8110":
-        temporal_average_type = "{}-30y".format(
-            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
-        )
-        date_range = "198012-201011"
-
-    collection = input_data.get_value(InputType.COLLECTION)
-
-    if baseline is not None and scenario in ["gwl2", "gwl4"]:
-        # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
-        collection = COLLECTION_GCM
-        scenario = "rcp85"
-
-    file_name = (
-        "{variable}_{scenario}_{collection}_uk_"
-        "{spatial_representation}_{ensemble}_"
-        "{temporal_average_type}_{date}.nc".format(
-            variable=variable,
-            scenario=scenario,
-            collection=collection,
-            spatial_representation=spatial_representation,
-            ensemble=ensemble,
-            temporal_average_type=temporal_average_type,
-            date=date_range,
-        )
-    )
-
-    return file_name
diff --git a/ukcp_dp/file_finder/_land_cm.py b/ukcp_dp/file_finder/_land_cm.py
new file mode 100644
index 0000000..ea37c1f
--- /dev/null
+++ b/ukcp_dp/file_finder/_land_cm.py
@@ -0,0 +1,270 @@
+"""
+This module provides the method get_file_lists.
+
+"""
+import logging
+import os
+
+from ukcp_dp.constants import (
+    DATA_DIR,
+    COLLECTION_CPM,
+    COLLECTION_DERIVED,
+    COLLECTION_GCM,
+    InputType,
+    AreaType,
+)
+
+
+LOG = logging.getLogger(__name__)
+
+
+# month and day
+START_MONTH_DAY = "1201"
+END_MONTH_DAY = "1130"
+
+MONTH_NUMBERS = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
+
+VERSION = "latest"
+
+RIVER = "river"
+REGION = "region"
+
+
+def get_cm_file_list(input_data, baseline):
+    """
+    Get a list of files based on the data provided in the input data.
+
+    @param input_data (InputData): an InputData object
+    @param baseline (str): the baseline, may be None
+
+    @return a dict where
+        key: (str) variable name
+        value: list of lists where:
+            each list is a list of files per scenario, per variable, including
+            their full paths
+    """
+    variables = input_data.get_value(InputType.VARIABLE)
+
+    spatial_representation = _get_cm_spatial_representation(input_data)
+
+    file_lists_per_variable = {}
+
+    for variable in variables:
+        # generate a list of files for each variable
+        # we need to use the variable root and calculate the anomaly later
+        variable_prefix = variable.split("Anom")[0]
+
+        file_list_per_scenario = []
+        for scenario in input_data.get_value(InputType.SCENARIO):
+            # generate a list of files for each scenario
+
+            ensemble_file_list = []
+            for ensemble in input_data.get_value(InputType.ENSEMBLE):
+                file_path = _get_cm_file_path(
+                    input_data,
+                    spatial_representation,
+                    variable_prefix,
+                    scenario,
+                    ensemble,
+                    baseline,
+                )
+
+                if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in [
+                    "1hr",
+                    "3hr",
+                ]:
+                    # we need lots of files
+                    for year in range(
+                        input_data.get_value(InputType.YEAR_MINIMUM) - 1,
+                        input_data.get_value(InputType.YEAR_MAXIMUM),
+                    ):
+                        for month in range(0, 12):
+                            if (
+                                year == input_data.get_value(InputType.YEAR_MINIMUM) - 1
+                                and month < 11
+                            ):
+                                continue
+                            if (
+                                year == input_data.get_value(InputType.YEAR_MAXIMUM) - 1
+                                and month > 10
+                            ):
+                                continue
+                            date_range = "{year}{month}01-{year}{month_end}30".format(
+                                year=year,
+                                month=MONTH_NUMBERS[month],
+                                month_end=MONTH_NUMBERS[month],
+                            )
+                            file_name = _get_cm_file_name(
+                                input_data,
+                                spatial_representation,
+                                variable_prefix,
+                                scenario,
+                                ensemble,
+                                baseline,
+                                date_range,
+                            )
+                            ensemble_file_list.append(
+                                os.path.join(file_path, file_name)
+                            )
+                else:
+                    file_name = _get_cm_file_name(
+                        input_data,
+                        spatial_representation,
+                        variable_prefix,
+                        scenario,
+                        ensemble,
+                        baseline,
+                    )
+                    ensemble_file_list.append(os.path.join(file_path, file_name))
+
+            file_list_per_scenario.append(ensemble_file_list)
+
+        file_lists_per_variable[variable] = file_list_per_scenario
+
+    return file_lists_per_variable
+
+
+def _get_cm_spatial_representation(input_data):
+    spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION)
+
+    if spatial_representation == AreaType.RIVER_BASIN:
+        spatial_representation = RIVER
+    elif spatial_representation == AreaType.ADMIN_REGION:
+        spatial_representation = REGION
+    return spatial_representation
+
+
+def _get_cm_file_path(
+    input_data, spatial_representation, variable, scenario, ensemble, baseline
+):
+    if baseline is None and input_data.get_value(InputType.TIME_SLICE_TYPE) is None:
+        temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+
+    elif (
+        baseline == "b8100" or input_data.get_value(InputType.TIME_SLICE_TYPE) == "20y"
+    ):
+        temporal_average_type = "{}-20y".format(
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+        )
+
+    elif (
+        baseline == "b6190"
+        or baseline == "b8110"
+        or input_data.get_value(InputType.TIME_SLICE_TYPE) == "30y"
+    ):
+        temporal_average_type = "{}-30y".format(
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+        )
+
+    else:
+        temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+
+    collection = input_data.get_value(InputType.COLLECTION)
+
+    if baseline is not None and scenario in ["gwl2", "gwl4"]:
+        # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
+        collection = COLLECTION_GCM
+        scenario = "rcp85"
+
+    file_path = os.path.join(
+        DATA_DIR,
+        collection,
+        "uk",
+        spatial_representation,
+        scenario,
+        ensemble,
+        variable,
+        temporal_average_type,
+        VERSION,
+    )
+
+    return file_path
+
+
+def _get_cm_file_name(
+    input_data,
+    spatial_representation,
+    variable,
+    scenario,
+    ensemble,
+    baseline,
+    year=None,
+):
+    if baseline is None:
+        if (
+            input_data.get_value(InputType.TIME_SLICE_TYPE) is None
+            or input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y"
+        ) and input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) not in [
+            "1hr",
+            "3hr",
+        ]:
+            # there will only be one file
+            return "*"
+
+        if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]:
+            temporal_average_type = input_data.get_value(
+                InputType.TEMPORAL_AVERAGE_TYPE
+            )
+        else:
+            temporal_average_type = "{}-{}".format(
+                input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
+                input_data.get_value(InputType.TIME_SLICE_TYPE),
+            )
+
+        if input_data.get_value(InputType.COLLECTION) in [
+            COLLECTION_GCM,
+            COLLECTION_DERIVED,
+        ]:
+            date_range = "200912-209911"
+        elif input_data.get_value(InputType.COLLECTION) == COLLECTION_CPM:
+            if input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE) in ["1hr", "3hr"]:
+                date_range = "{}".format(year)
+            elif input_data.get_value(InputType.YEAR_MINIMUM) == 1981:
+                date_range = "198012-200011"
+            elif input_data.get_value(InputType.YEAR_MINIMUM) == 2021:
+                date_range = "202012-204011"
+            elif input_data.get_value(InputType.YEAR_MINIMUM) == 2061:
+                date_range = "206012-208011"
+        else:
+            date_range = "200912-207911"
+
+    elif baseline == "b8100":
+        temporal_average_type = "{}-20y".format(
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+        )
+        date_range = "198012-200011"
+
+    elif baseline == "b6190":
+        temporal_average_type = "{}-30y".format(
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+        )
+        date_range = "196012-199011"
+
+    elif baseline == "b8110":
+        temporal_average_type = "{}-30y".format(
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+        )
+        date_range = "198012-201011"
+
+    collection = input_data.get_value(InputType.COLLECTION)
+
+    if baseline is not None and scenario in ["gwl2", "gwl4"]:
+        # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
+        collection = COLLECTION_GCM
+        scenario = "rcp85"
+
+    file_name = (
+        "{variable}_{scenario}_{collection}_uk_"
+        "{spatial_representation}_{ensemble}_"
+        "{temporal_average_type}_{date}.nc".format(
+            variable=variable,
+            scenario=scenario,
+            collection=collection,
+            spatial_representation=spatial_representation,
+            ensemble=ensemble,
+            temporal_average_type=temporal_average_type,
+            date=date_range,
+        )
+    )
+
+    return file_name
diff --git a/ukcp_dp/file_finder/_land_prob.py b/ukcp_dp/file_finder/_land_prob.py
new file mode 100644
index 0000000..51239fc
--- /dev/null
+++ b/ukcp_dp/file_finder/_land_prob.py
@@ -0,0 +1,286 @@
+"""
+This module provides the method get_file_lists.
+
+"""
+import logging
+import os
+
+from ukcp_dp.constants import (
+    DATA_DIR,
+    COLLECTION_PROB,
+    COLLECTION_PROB_MIN_YEAR,
+    COLLECTION_MARINE,
+    InputType,
+    OTHER_MAX_YEAR,
+    AreaType,
+    TemporalAverageType,
+)
+
+
+LOG = logging.getLogger(__name__)
+
+
+# month and day
+START_MONTH_DAY = "1201"
+END_MONTH_DAY = "1130"
+
+MONTH_NUMBERS = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
+
+VERSION = "latest"
+
+RIVER = "river"
+REGION = "region"
+
+
+def get_prob_file_list(input_data):
+    """
+    Get a list of files based on the data provided in the input data. As this
+    may be the file list for the overlay, some fields are not from the user
+    input.
+
+    @param input_data (InputData): an InputData object
+
+    @return a dict where
+        key: (str) variable name
+        value: list of lists where:
+            each list is a list of files per scenario, per variable, including
+            their full paths
+    """
+    variables = input_data.get_value(InputType.VARIABLE)
+
+    spatial_representation = _get_prob_spatial_representation(input_data)
+
+    file_lists_per_variable = {}
+
+    # if this is a selection for on overlay then the dates will not have been
+    # validated against this dataset. Check the dates and adjust the minimum if
+    # needed
+    year_maximum = input_data.get_value(InputType.YEAR_MAXIMUM)
+    year_minimum = input_data.get_value(InputType.YEAR_MINIMUM)
+    if year_maximum < COLLECTION_PROB_MIN_YEAR:
+        return {}
+
+    if year_minimum < COLLECTION_PROB_MIN_YEAR:
+        year_minimum = COLLECTION_PROB_MIN_YEAR
+
+    # December's data is included with the next year so if a single year has
+    # been selected
+    if year_minimum == year_maximum:
+        year_maximum = year_maximum + 1
+
+    for variable in variables:
+        # generate a list of files for each variable
+        # NB the marine data are all annual
+
+        file_list_per_scenario = []
+        for scenario in input_data.get_value(InputType.SCENARIO):
+            file_list_per_scenario.extend(
+                _get_file_list_per_scenario(
+                    input_data,
+                    scenario,
+                    spatial_representation,
+                    variable,
+                    year_minimum,
+                    year_maximum,
+                )
+            )
+
+        file_lists_per_variable[variable] = file_list_per_scenario
+
+    return file_lists_per_variable
+
+
+def _get_file_list_per_scenario(
+    input_data, scenario, spatial_representation, variable, year_minimum, year_maximum
+):
+    # generate a list of files for each scenario
+    file_list_per_data_type = []
+    for data_type in input_data.get_value(InputType.DATA_TYPE):
+        file_path = _get_prob_file_path(
+            data_type, input_data, scenario, spatial_representation, variable
+        )
+
+        if (
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
+            == TemporalAverageType.ANNUAL
+            or spatial_representation != "25km"
+            or (input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE)
+        ):
+            # current thinking is that there will only be one file
+            file_name = "*"
+            file_list_per_data_type.append([os.path.join(file_path, file_name)])
+
+        elif input_data.get_value(InputType.TIME_SLICE_TYPE) == "1y":
+            scenario_file_list = []
+
+            for year in range(year_minimum, (year_maximum + 1)):
+                # We cannot check for COLLECTION_PROB as this may be an
+                # overlay
+                if (
+                    input_data.get_value(InputType.COLLECTION) != COLLECTION_MARINE
+                    and year == OTHER_MAX_YEAR
+                ):
+                    # there is not data for December of the last year
+                    continue
+                file_name = _get_prob_file_name_for_year(
+                    data_type,
+                    input_data,
+                    scenario,
+                    spatial_representation,
+                    variable,
+                    year,
+                )
+                scenario_file_list.append(os.path.join(file_path, file_name))
+
+            file_list_per_data_type.append(scenario_file_list)
+
+        else:
+            # InputType.TIME_SLICE_TYPE) == '20y' or '30y'
+            file_name = _get_prob_file_name_for_slice(
+                data_type, input_data, scenario, spatial_representation, variable
+            )
+            file_list_per_data_type.append([os.path.join(file_path, file_name)])
+
+    return file_list_per_data_type
+
+
+def _get_prob_spatial_representation(input_data):
+    spatial_representation = input_data.get_value(InputType.SPATIAL_REPRESENTATION)
+
+    if spatial_representation == AreaType.RIVER_BASIN:
+        spatial_representation = RIVER
+    elif spatial_representation == AreaType.ADMIN_REGION:
+        spatial_representation = REGION
+    elif spatial_representation == AreaType.COUNTRY:
+        pass
+    else:
+        # we cannot rely on the input value as this file list may be for the
+        # overlay
+        spatial_representation = "25km"
+
+    return spatial_representation
+
+
+def _get_prob_file_path(
+    data_type, input_data, scenario, spatial_representation, variable
+):
+
+    if input_data.get_value(InputType.COLLECTION) == COLLECTION_MARINE:
+
+        file_path = os.path.join(
+            DATA_DIR,
+            COLLECTION_MARINE,
+            input_data.get_value(InputType.METHOD),
+            scenario,
+            variable,
+            VERSION,
+        )
+    else:
+
+        file_path = os.path.join(
+            DATA_DIR,
+            COLLECTION_PROB,
+            "uk",
+            spatial_representation,
+            scenario,
+            data_type,
+            input_data.get_value(InputType.BASELINE),
+            input_data.get_value(InputType.TIME_SLICE_TYPE),
+            variable,
+            input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
+            VERSION,
+        )
+
+    return file_path
+
+
+def _get_prob_file_name_for_year(
+    data_type, input_data, scenario, spatial_representation, variable, year
+):
+
+    # input_data.get_value(InputType.TIME_SLICE_TYPE) == '1y':
+    # the year starts in December, so subtract 1 from the year
+    start_date = "{year}{mon_day}".format(year=year - 1, mon_day=START_MONTH_DAY)
+    end_date = "{year}{mon_day}".format(year=year, mon_day=END_MONTH_DAY)
+
+    return _get_prob_file_name(
+        data_type,
+        input_data,
+        scenario,
+        spatial_representation,
+        variable,
+        start_date,
+        end_date,
+    )
+
+
+def _get_prob_file_name_for_slice(
+    data_type, input_data, scenario, spatial_representation, variable
+):
+    # input_data.get_value(InputType.TIME_SLICE_TYPE) == 20y or 30y
+    start_date = "20091201"
+    end_date = "20991130"
+
+    return _get_prob_file_name(
+        data_type,
+        input_data,
+        scenario,
+        spatial_representation,
+        variable,
+        start_date,
+        end_date,
+    )
+
+
+def _get_prob_file_name(
+    data_type,
+    input_data,
+    scenario,
+    spatial_representation,
+    variable,
+    start_date,
+    end_date,
+):
+
+    return_period = input_data.get_value(InputType.RETURN_PERIOD)
+
+    if return_period is None:
+        file_name = (
+            "{variable}_{scenario}_{collection}_uk_"
+            "{spatial_representation}_{data_type}_{baseline}_"
+            "{time_slice_type}_{temporal_type}_{start_data}-"
+            "{end_date}.nc".format(
+                variable=variable,
+                scenario=scenario,
+                collection=COLLECTION_PROB,
+                spatial_representation=spatial_representation,
+                data_type=data_type,
+                baseline=input_data.get_value(InputType.BASELINE),
+                time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE),
+                temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
+                start_data=start_date,
+                end_date=end_date,
+            )
+        )
+
+    else:
+        file_name = (
+            "{variable}_{return_period}_{scenario}_{collection}_uk_"
+            "{spatial_representation}_{data_type}_{baseline}_"
+            "{time_slice_type}_{temporal_type}_{start_data}-"
+            "{end_date}.nc".format(
+                variable=variable,
+                return_period=return_period,
+                scenario=scenario,
+                collection=COLLECTION_PROB,
+                spatial_representation=spatial_representation,
+                data_type=data_type,
+                baseline=input_data.get_value(InputType.BASELINE),
+                time_slice_type=input_data.get_value(InputType.TIME_SLICE_TYPE),
+                temporal_type=input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE),
+                start_data=start_date,
+                end_date=end_date,
+            )
+        )
+
+    return file_name

From d40a92fa4bba00d0cd9ec96be0da091466235648 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 18 May 2022 13:47:01 +0100
Subject: [PATCH 16/20] Update file finder for cordex collection

---
 ukcp_dp/file_finder/_land_cm.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/ukcp_dp/file_finder/_land_cm.py b/ukcp_dp/file_finder/_land_cm.py
index ea37c1f..8892f9c 100644
--- a/ukcp_dp/file_finder/_land_cm.py
+++ b/ukcp_dp/file_finder/_land_cm.py
@@ -7,12 +7,15 @@
 
 from ukcp_dp.constants import (
     DATA_DIR,
+    COLLECTION_RCM_CORDEX,
     COLLECTION_CPM,
     COLLECTION_DERIVED,
     COLLECTION_GCM,
+    COLLECTION_RCM,
     InputType,
     AreaType,
 )
+from ukcp_dp.vocab_manager._vocab import get_ensemble_member_set
 
 
 LOG = logging.getLogger(__name__)
@@ -159,7 +162,7 @@ def _get_cm_file_path(
     else:
         temporal_average_type = input_data.get_value(InputType.TEMPORAL_AVERAGE_TYPE)
 
-    collection = input_data.get_value(InputType.COLLECTION)
+    collection = _get_collection(input_data, ensemble)
 
     if baseline is not None and scenario in ["gwl2", "gwl4"]:
         # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
@@ -246,7 +249,7 @@ def _get_cm_file_name(
         )
         date_range = "198012-201011"
 
-    collection = input_data.get_value(InputType.COLLECTION)
+    collection = _get_collection(input_data, ensemble)
 
     if baseline is not None and scenario in ["gwl2", "gwl4"]:
         # we need to use the GCM RCP8.5 baseline for GWL2 and GWL4
@@ -268,3 +271,13 @@ def _get_cm_file_name(
     )
 
     return file_name
+
+
+def _get_collection(input_data, ensemble):
+    collection = input_data.get_value(InputType.COLLECTION)
+    if collection == COLLECTION_RCM and ensemble in get_ensemble_member_set(
+        COLLECTION_RCM_CORDEX
+    ):
+        collection = COLLECTION_RCM_CORDEX
+
+    return collection

From 5b23ede435000e79286d4e0c54001e7fd1feb004 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Wed, 18 May 2022 13:48:20 +0100
Subject: [PATCH 17/20] cordex ensemble update

---
 ukcp_dp/constants.py            | 2 +-
 ukcp_dp/vocab_manager/_vocab.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py
index cd73fc7..2f157cf 100644
--- a/ukcp_dp/constants.py
+++ b/ukcp_dp/constants.py
@@ -180,7 +180,7 @@ def enum(**named_values):
 COLLECTION_GCM = "land-gcm"
 COLLECTION_RCM = "land-rcm"
 COLLECTION_RCM_MIN_YEAR = 1980
-COLLECTION_RCM_CORDEX = "cordex"
+COLLECTION_RCM_CORDEX = "land-euro-cordex"
 COLLECTION_RCM_GWL = "land-rcm-gwl"
 COLLECTION_MARINE = "marine-sim"
 COLLECTION_MARINE_MIN_YEAR = 2007
diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index a06413e..df508da 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -188,7 +188,6 @@ def __init__(self):
         self.vocab["year_maximum"].update(_get_range(3001, 3052))
         self.vocab["sampling_id"] = _get_range(1, 4001)
         self.vocab["random_sampling_count"] = _get_range(100, 4001)
-        self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"]
         self.vocab["sampling_percentile_2"] = self.vocab["sampling_percentile_1"]
         time_period = {"all": "all"}
         time_period.update(self.vocab["ann"])
@@ -213,6 +212,7 @@ def __init__(self):
 
         self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME)
         self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME]
+        self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"]
         self._load_cv(CV_Type.ADMIN_REGION)
         self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions"
         self._load_cv(CV_Type.COUNTRY)
@@ -467,7 +467,8 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
-    "land-euro-cordex": list(_get_range(100, 166).values()),
+    "land-euro-cordex": list(_get_range(29, 34).values())
+    + list(_get_range(100, 165).values()),
 }
 
 

From 0a1fc69447659c9712143d0e2c373de58f314dc1 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 19 May 2022 11:28:43 +0100
Subject: [PATCH 18/20] update vocab for CORDEX

---
 ukcp_dp/vocab_manager/_vocab.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index df508da..c2c717d 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -212,7 +212,8 @@ def __init__(self):
 
         self._load_cv(CV_Type.ENSEMBLE_SHORT_NAME)
         self.vocab["ensemble"] = self.vocab[CV_Type.ENSEMBLE_SHORT_NAME]
-        self.vocab["highlighted_ensemble_members"] = self.vocab["ensemble"]
+        self._load_cv(CV_Type.ENSEMBLE_MEMBER)
+        self.vocab["highlighted_ensemble_members"] = self.vocab[CV_Type.ENSEMBLE_MEMBER]
         self._load_cv(CV_Type.ADMIN_REGION)
         self.vocab[CV_Type.ADMIN_REGION]["all"] = "All administrative regions"
         self._load_cv(CV_Type.COUNTRY)

From 5ebe3410d79c7d3e545f48ed1a1a05a9cb627069 Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Thu, 19 May 2022 11:28:43 +0100
Subject: [PATCH 19/20] update vocab for CORDEX

---
 ukcp_dp/vocab_manager/_vocab.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index c2c717d..d7b8c9f 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -468,8 +468,7 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
-    "land-euro-cordex": list(_get_range(29, 34).values())
-    + list(_get_range(100, 165).values()),
+    "land-euro-cordex": list(_get_range(100, 165).values()),
 }
 
 

From 66de34086e3478308d5e42b60df43438d979e58a Mon Sep 17 00:00:00 2001
From: Antony Wilson <antony.wilson@stfc.ac.uk>
Date: Mon, 6 Jun 2022 11:14:35 +0100
Subject: [PATCH 20/20] rename land-euro-cordex to land-eurocordex

---
 ukcp_dp/constants.py            | 2 +-
 ukcp_dp/vocab_manager/_vocab.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ukcp_dp/constants.py b/ukcp_dp/constants.py
index 2f157cf..49acba9 100644
--- a/ukcp_dp/constants.py
+++ b/ukcp_dp/constants.py
@@ -180,7 +180,7 @@ def enum(**named_values):
 COLLECTION_GCM = "land-gcm"
 COLLECTION_RCM = "land-rcm"
 COLLECTION_RCM_MIN_YEAR = 1980
-COLLECTION_RCM_CORDEX = "land-euro-cordex"
+COLLECTION_RCM_CORDEX = "land-eurocordex"
 COLLECTION_RCM_GWL = "land-rcm-gwl"
 COLLECTION_MARINE = "marine-sim"
 COLLECTION_MARINE_MIN_YEAR = 2007
diff --git a/ukcp_dp/vocab_manager/_vocab.py b/ukcp_dp/vocab_manager/_vocab.py
index d7b8c9f..803d8c0 100644
--- a/ukcp_dp/vocab_manager/_vocab.py
+++ b/ukcp_dp/vocab_manager/_vocab.py
@@ -468,7 +468,7 @@ def _get_range(min_value, max_value):
     "26": ["26"],
     "27": ["27"],
     "28": ["28"],
-    "land-euro-cordex": list(_get_range(100, 165).values()),
+    "land-eurocordex": list(_get_range(100, 165).values()),
 }