From d651eb3e5b20691c05c6230d2e7abcda40157b4d Mon Sep 17 00:00:00 2001 From: Oliver Sus Date: Wed, 29 Sep 2021 09:42:09 +0000 Subject: [PATCH 1/4] replacing median with 50th percentile --- pygac/reader.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pygac/reader.py b/pygac/reader.py index 90812c1a..e4a6cf64 100644 --- a/pygac/reader.py +++ b/pygac/reader.py @@ -817,7 +817,7 @@ def correct_times_median(self, year, jday, msec): msec_lineno = self.lineno2msec(self.scans["scan_line_number"]) jday = np.where(np.logical_or(jday < 1, jday > 366), - np.median(jday), jday) + np.percentile(jday, 50, interpolation='nearest'), jday) if_wrong_jday = np.ediff1d(jday, to_begin=0) jday = np.where(if_wrong_jday < 0, max(jday), jday) @@ -827,7 +827,7 @@ def correct_times_median(self, year, jday, msec): if if_wrong_msec[0] != 0: msec = msec[0] + msec_lineno else: - msec0 = np.median(msec - msec_lineno) + msec0 = np.percentile(msec - msec_lineno, 50, interpolation='nearest') msec = msec0 + msec_lineno if_wrong_msec = np.ediff1d(msec, to_begin=0) @@ -846,9 +846,9 @@ def correct_times_median(self, year, jday, msec): msec = msec[0] + msec_lineno # Otherwise use median time stamp else: - year = np.median(year) - jday = np.median(jday) - msec0 = np.median(msec - msec_lineno) + year = np.percentile(year, 50, interpolation='nearest') + jday = np.percentile(jday, 50, interpolation='nearest') + msec0 = np.percentile(msec - msec_lineno, 50, interpolation='nearest') msec = msec0 + msec_lineno return year, jday, msec From 599bc95bc58e743af820f4b734d308be788d597d Mon Sep 17 00:00:00 2001 From: Oliver Sus Date: Tue, 18 Jul 2023 08:38:48 +0000 Subject: [PATCH 2/4] add helper function for np.percentile --- pygac/reader.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/pygac/reader.py b/pygac/reader.py index 90812c1a..617a5a60 100644 --- a/pygac/reader.py +++ b/pygac/reader.py @@ -795,7 +795,26 @@ def get_angles(self): arr[self.mask] = np.nan return sat_azi, sat_zenith, sun_azi, sun_zenith, rel_azi + + def _get_true_median(self, input_array, q=50, interpolation='nearest'): + """ + This is a helper function to get the true median value of the input array. + Functions like np.median, but also np.percentile, default to returning + an interpolation of the two closest values in case the population size + is even. Such an interpolation would turn an integer into a float, which + is not always desireable (like when processing time data). + + Args: + input_array: array_like + q: percentile to be computed, array_like of float, + must be between 0 and 100 inclusive + interpolation: 'linear', 'lower', 'higher', 'midpoint', 'nearest' + Returns: + The q-th percentile(s) of the array elements. + """ + return np.percentile(input_array, q, interpolation) + def correct_times_median(self, year, jday, msec): """Replace invalid timestamps with statistical estimates (using median). @@ -816,8 +835,7 @@ def correct_times_median(self, year, jday, msec): # offset, e.g. the first scanline has timestamp 1970-01-01 00:00 msec_lineno = self.lineno2msec(self.scans["scan_line_number"]) - jday = np.where(np.logical_or(jday < 1, jday > 366), - np.median(jday), jday) + jday = np.where(np.logical_or(jday < 1, jday > 366), self._get_true_median(jday), jday) if_wrong_jday = np.ediff1d(jday, to_begin=0) jday = np.where(if_wrong_jday < 0, max(jday), jday) @@ -827,7 +845,7 @@ def correct_times_median(self, year, jday, msec): if if_wrong_msec[0] != 0: msec = msec[0] + msec_lineno else: - msec0 = np.median(msec - msec_lineno) + msec0 = self._get_true_median(msec - msec_lineno) msec = msec0 + msec_lineno if_wrong_msec = np.ediff1d(msec, to_begin=0) @@ -846,9 +864,9 @@ def correct_times_median(self, year, jday, msec): msec = msec[0] + msec_lineno # Otherwise use median time stamp else: - year = np.median(year) - jday = np.median(jday) - msec0 = np.median(msec - msec_lineno) + year = self._get_true_median(year) + jday = self._get_true_median(jday) + msec0 = self._get_true_median(msec - msec_lineno) msec = msec0 + msec_lineno return year, jday, msec From 1f8eb4620c3660af115f46dd2cce68c988485a5d Mon Sep 17 00:00:00 2001 From: Oliver Sus Date: Tue, 18 Jul 2023 08:39:17 +0000 Subject: [PATCH 3/4] add test for new helper function _get_true_median() --- pygac/tests/test_reader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pygac/tests/test_reader.py b/pygac/tests/test_reader.py index 37656364..88b6a68b 100644 --- a/pygac/tests/test_reader.py +++ b/pygac/tests/test_reader.py @@ -534,6 +534,11 @@ def test_update_metadata(self, 'calib_coeffs_version': 'version'} self.assertDictEqual(self.reader.meta_data, mda_exp) + def test__get_true_median(self): + """Test the get true median method.""" + data = [1, 2, 3, 4, 5, 6] + self.assertEqual(self.reader._get_true_median(data), 3) + self.assertEqual(self.reader._get_true_median(data, interpolation='linear'), 3.5) def suite(): """Test suite for test_reader.""" From 07d4de7336ed7942c41453b078bb80d53cb4a3ad Mon Sep 17 00:00:00 2001 From: Oliver Sus Date: Tue, 18 Jul 2023 12:07:35 +0000 Subject: [PATCH 4/4] add keyword to interpolation --- pygac/reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygac/reader.py b/pygac/reader.py index ba21f7d0..dbca2ebb 100644 --- a/pygac/reader.py +++ b/pygac/reader.py @@ -838,7 +838,7 @@ def _get_true_median(self, input_array, q=50, interpolation='nearest'): Returns: The q-th percentile(s) of the array elements. """ - return np.percentile(input_array, q, interpolation) + return np.percentile(input_array, q, interpolation=interpolation) def correct_times_median(self, year, jday, msec): """Replace invalid timestamps with statistical estimates (using median).