From b6c2a486a4d1ff9e6f48932614e953f25d1ae433 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Mon, 29 Jan 2024 14:01:46 -0500 Subject: [PATCH 01/10] return leap second --- src/georinex/obs2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index 292db34..6fa493c 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -305,10 +305,11 @@ def rinexsystem2( obs.attrs["rxmodel"] = hdr["rxmodel"] if "position" in hdr.keys(): obs.attrs["position"] = hdr["position"] - if "position_geodetic" in hdr.keys(): obs.attrs["position_geodetic"] = hdr["position_geodetic"] - + if "LEAP SECONDS" in hdr.keys(): + obs.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) + return obs @@ -454,7 +455,6 @@ def obsheader2( hdr["interval"] = float(hdr["INTERVAL"][:10]) except (KeyError, ValueError): pass - try: s = " " hdr["rxmodel"] = s.join(hdr["REC # / TYPE / VERS"].split()[1:-1]) From 7312ebc8f0ba5971ff344a22cd3db548a1466b78 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Tue, 9 Jul 2024 09:47:37 -0400 Subject: [PATCH 02/10] Remove sparse in I --- src/georinex/nav3.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/georinex/nav3.py b/src/georinex/nav3.py index 02ed7e8..18db41a 100644 --- a/src/georinex/nav3.py +++ b/src/georinex/nav3.py @@ -227,10 +227,16 @@ def _sparefields(cf: list[str], sys: str, N: int) -> list[str]: elif N == 27: # no middle or trailing spare fields cf = cf[:22] + cf[23:-3] elif sys == "I": - if N == 28: - cf = cf[:28] + print (cf) + print (len(cf)) + if N == 26: + cf = cf[:22] + cf[23:26] + [cf[27]] + print (cf) + print (len(cf)) if N != len(cf): + # print (cf, len(cf), N) + # print (np.where(np.array(cf) == 'spare0')) raise ValueError(f"System {sys} NAV data is not the same length as the number of fields.") return cf From caed1de08644a2fc721ac5ec930f123e093d36e4 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Tue, 9 Jul 2024 10:10:11 -0400 Subject: [PATCH 03/10] return leap seconds --- src/georinex/nav2.py | 2 ++ src/georinex/nav3.py | 12 ++++-------- src/georinex/obs3.py | 3 ++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/georinex/nav2.py b/src/georinex/nav2.py index de985d8..2a43e85 100644 --- a/src/georinex/nav2.py +++ b/src/georinex/nav2.py @@ -195,6 +195,8 @@ def rinexnav2(fn: T.TextIO | Path, tlim: tuple[datetime, datetime] | None = None nav[name] *= 1e3 # %% other attributes + if "LEAP SECONDS" in header: + nav.attrs["leap_seconds"] = int(header["LEAP SECONDS"]) nav.attrs["version"] = header["version"] nav.attrs["svtype"] = [svtype] # Use list for consistency with NAV3. nav.attrs["rinextype"] = "nav" diff --git a/src/georinex/nav3.py b/src/georinex/nav3.py index 18db41a..63d9534 100644 --- a/src/georinex/nav3.py +++ b/src/georinex/nav3.py @@ -153,7 +153,8 @@ def rinexnav3( nav.attrs["ionospheric_corr_BDS"] = np.hstack((corr["BDSA"], corr["BDSB"])) if "IRNA" in corr and "IRNB" in corr: nav.attrs["ionospheric_corr_IRN"] = np.hstack((corr["IRNA"], corr["IRNB"])) - + if "LEAP SECONDS" in header: + nav.attrs["leap_seconds"] = int(header["LEAP SECONDS"].split()[0]) nav.attrs["version"] = header["version"] nav.attrs["svtype"] = svtypes nav.attrs["rinextype"] = "nav" @@ -227,16 +228,11 @@ def _sparefields(cf: list[str], sys: str, N: int) -> list[str]: elif N == 27: # no middle or trailing spare fields cf = cf[:22] + cf[23:-3] elif sys == "I": - print (cf) - print (len(cf)) if N == 26: cf = cf[:22] + cf[23:26] + [cf[27]] - print (cf) - print (len(cf)) - + elif N == 28: + cf = cf[:28] if N != len(cf): - # print (cf, len(cf), N) - # print (np.where(np.array(cf) == 'spare0')) raise ValueError(f"System {sys} NAV data is not the same length as the number of fields.") return cf diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index a68dc21..89ce87a 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -160,7 +160,8 @@ def rinexobs3( data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) except ValueError: pass - + if "LEAP SECONDS" in hdr.keys(): + data.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) return data From 08a1406a99a94a9f8e9bb7fcd0774102284915db Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Fri, 12 Jul 2024 12:07:10 -0400 Subject: [PATCH 04/10] parse rnx version --- src/georinex/nav3.py | 1 - src/georinex/obs2.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/georinex/nav3.py b/src/georinex/nav3.py index 63d9534..e2958b9 100644 --- a/src/georinex/nav3.py +++ b/src/georinex/nav3.py @@ -78,7 +78,6 @@ def rinexnav3( raw += ln[STARTCOL3:80] # one line per SV raws.append(raw.replace("D", "E").replace("\n", "")) - # %% parse collected data per SV # NOTE: must be 'ns' or .to_netcdf will fail! t = np.array([np.datetime64(t, "ns") for t in times]) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index 6fa493c..f4c1c8e 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -451,10 +451,14 @@ def obsheader2( except (KeyError, ValueError): pass - try: # This key is OPTIONAL + try: hdr["interval"] = float(hdr["INTERVAL"][:10]) except (KeyError, ValueError): pass + try: + hdr["version"] = float(hdr["RINEX VERSION / TYPE"].split()[0]) + except (KeyError, ValueError): + pass try: s = " " hdr["rxmodel"] = s.join(hdr["REC # / TYPE / VERS"].split()[1:-1]) From e0f610082dcd6492effd0b37a2f849e15dc9b778 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Wed, 17 Jul 2024 09:54:43 -0400 Subject: [PATCH 05/10] Drop empty use-const --- src/georinex/obs2.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index f4c1c8e..f162083 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -52,13 +52,10 @@ def rinexobs2( fast=fast, interval=interval, ) - - if len(o.variables) > 0: + if o.time.values.size > 0: attrs = o.attrs obs = xarray.merge((obs, o)) - obs.attrs = attrs - return obs @@ -283,19 +280,19 @@ def rinexsystem2( obs = obs.dropna(dim="time", how="all") # when tlim specified # %% attributes obs.attrs["version"] = hdr["version"] - + # Get interval from header or derive it from the data - if "interval" in hdr.keys(): + if "interval" in hdr.keys() and np.isfinite(hdr["interval"]): obs.attrs["interval"] = hdr["interval"] elif "time" in obs.coords.keys(): # median is robust against gaps try: - obs.attrs["interval"] = np.median(np.diff(obs.time) / np.timedelta64(1, "s")) + obs.attrs["interval"] = str(np.median(np.diff(obs.time) / np.timedelta64(1, "s"))) except TypeError: - pass - else: - obs.attrs["interval"] = np.nan - + print ("Couldn't process interval") + # else: + # obs.attrs["interval"] = np.nan + obs.attrs["rinextype"] = "obs" obs.attrs["fast_processing"] = int(fast) # bool is not allowed in NetCDF4 obs.attrs["time_system"] = determine_time_system(hdr) @@ -309,7 +306,7 @@ def rinexsystem2( obs.attrs["position_geodetic"] = hdr["position_geodetic"] if "LEAP SECONDS" in hdr.keys(): obs.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) - + return obs From 444110332a445d6e02183d8c353f24017ab555ed Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Wed, 17 Jul 2024 09:56:04 -0400 Subject: [PATCH 06/10] Drop empty use-const --- src/georinex/obs2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index f162083..0365496 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -287,7 +287,7 @@ def rinexsystem2( elif "time" in obs.coords.keys(): # median is robust against gaps try: - obs.attrs["interval"] = str(np.median(np.diff(obs.time) / np.timedelta64(1, "s"))) + obs.attrs["interval"] = np.median(np.diff(obs.time) / np.timedelta64(1, "s")) except TypeError: print ("Couldn't process interval") # else: From 79873100f237c347688c6e63af96ddace36fcb29 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Thu, 8 Aug 2024 06:32:41 -0400 Subject: [PATCH 07/10] asd --- src/georinex/obs2.py | 8 +++++--- src/georinex/obs3.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/georinex/obs2.py b/src/georinex/obs2.py index 0365496..4180abd 100644 --- a/src/georinex/obs2.py +++ b/src/georinex/obs2.py @@ -52,7 +52,7 @@ def rinexobs2( fast=fast, interval=interval, ) - if o.time.values.size > 0: + if len(o.variables) > 0: attrs = o.attrs obs = xarray.merge((obs, o)) obs.attrs = attrs @@ -250,7 +250,6 @@ def rinexsystem2( data[i, j, isv] = darr[:, k] # %% output gathering data = data[:, : times.size, :] # trims down for unneeded preallocated - fields = [] for field in hdr["fields"]: fields.append(field) @@ -305,7 +304,10 @@ def rinexsystem2( if "position_geodetic" in hdr.keys(): obs.attrs["position_geodetic"] = hdr["position_geodetic"] if "LEAP SECONDS" in hdr.keys(): - obs.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) + try: + obs.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) + except: + pass return obs diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 89ce87a..03f54f5 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -161,7 +161,7 @@ def rinexobs3( except ValueError: pass if "LEAP SECONDS" in hdr.keys(): - data.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"]) + data.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"].split()[0]) return data From e6c492f5e8d28f9618358e6f88320dbdb907510a Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Wed, 26 Nov 2025 12:39:45 -0500 Subject: [PATCH 08/10] fast obs3 --- src/georinex/__init__.py | 2 +- src/georinex/obs3.py | 628 ++++++++++++++++++++++++++++++++++----- 2 files changed, 560 insertions(+), 70 deletions(-) diff --git a/src/georinex/__init__.py b/src/georinex/__init__.py index a84edf9..bf05830 100644 --- a/src/georinex/__init__.py +++ b/src/georinex/__init__.py @@ -2,7 +2,7 @@ from .utils import gettime, rinexheader, globber, to_datetime from .rio import rinexinfo from .obs2 import rinexobs2, obsheader2, obstime2 -from .obs3 import rinexobs3, obsheader3, obstime3 +from .obs3 import rinexobs3, obsheader3, obstime3 from .nav2 import rinexnav2, navheader2, navtime2 from .nav3 import rinexnav3, navheader3, navtime3 from .sp3 import load_sp3 diff --git a/src/georinex/obs3.py b/src/georinex/obs3.py index 03f54f5..ad852f7 100644 --- a/src/georinex/obs3.py +++ b/src/georinex/obs3.py @@ -1,10 +1,10 @@ -from __future__ import annotations -from pathlib import Path -import numpy as np -import logging -from datetime import datetime, timedelta -import io -import xarray +from collections.abc import Hashable +from pathlib import Path +import numpy as np +import logging +from datetime import datetime, timedelta +import io +import xarray import typing as T try: @@ -25,6 +25,146 @@ __all__ = ["rinexobs3", "obsheader3", "obstime3"] +# def rinexobs3( +# fn: T.TextIO | Path, +# use: set[str] | None = None, +# tlim: tuple[datetime, datetime] | None = None, +# useindicators: bool = False, +# meas: list[str] | None = None, +# verbose: bool = False, +# *, +# fast: bool = False, +# interval: float | int | timedelta | None = None, +# ): +# """ +# process RINEX 3 OBS data + +# fn: RINEX OBS 3 filename +# use: 'G' or ['G', 'R'] or similar + +# tlim: read between these time bounds +# useindicators: SSI, LLI are output +# meas: 'L1C' or ['L1C', 'C1C'] or similar + +# fast: +# TODO: FUTURE, not yet enabled for OBS3 +# speculative preallocation based on minimum SV assumption and file size. +# Avoids double-reading file and more complicated linked lists. +# Believed that Numpy array should be faster than lists anyway. +# Reduce Nsvmin if error (let us know) + +# interval: allows decimating file read by time e.g. every 5 seconds. +# Useful to speed up reading of very large RINEX files +# """ + +# interval = check_time_interval(interval) + +# if isinstance(use, str): +# use = {use} + +# if isinstance(meas, str): +# meas = [meas] + +# if not meas or not meas[0].strip(): +# meas = None +# # %% allocate +# # times = obstime3(fn) +# data = xarray.Dataset({}, coords={"time": [], "sv": []}) +# if tlim is not None and not isinstance(tlim[0], datetime): +# raise TypeError("time bounds are specified as datetime.datetime") + +# last_epoch = None +# # %% loop +# with opener(fn) as f: +# hdr = obsheader3(f, use, meas) + +# # %% process OBS file +# time_offset = [] +# for ln in f: +# if not ln.startswith(">"): # end of file +# break + +# try: +# time = _timeobs(ln) +# except ValueError: # garbage between header and RINEX data +# logging.debug(f"garbage detected in {fn}, trying to parse at next time step") +# continue + +# try: +# time_offset.append(float(ln[41:56])) +# except ValueError: +# pass +# # %% get SV indices +# sv = [] +# raw = "" +# # Number of visible satellites this time %i3 pg. A13 +# for _ in range(int(ln[33:35])): +# ln = f.readline() +# sv.append(ln[:3]) +# raw += ln[3:] + +# if tlim is not None: +# if time < tlim[0]: +# continue +# elif time > tlim[1]: +# break + +# if interval is not None: +# if last_epoch is None: # initialization +# last_epoch = time +# else: +# if time - last_epoch < interval: +# continue +# else: +# last_epoch += interval + +# if verbose: +# print(time, end="\r") + +# # this time epoch is complete, assemble the data. +# data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + +# # %% patch SV names in case of "G 7" => "G07" +# data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) +# # %% other attributes +# data.attrs["version"] = hdr["version"] + +# # Get interval from header or derive it from the data +# if "interval" in hdr.keys(): +# data.attrs["interval"] = hdr["interval"] +# elif "time" in data.coords.keys(): +# # median is robust against gaps +# try: +# data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) +# except TypeError: +# pass +# else: +# data.attrs["interval"] = np.nan + +# data.attrs["rinextype"] = "obs" +# data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 +# data.attrs["time_system"] = determine_time_system(hdr) +# if isinstance(fn, Path): +# data.attrs["filename"] = fn.name + +# if "position" in hdr.keys(): +# data.attrs["position"] = hdr["position"] +# if ecef2geodetic is not None: +# data.attrs["position_geodetic"] = hdr["position_geodetic"] + +# if time_offset: +# data.attrs["time_offset"] = time_offset +# if "rxmodel" in hdr.keys(): +# data.attrs["rxmodel"] = hdr["rxmodel"] +# if "RCV CLOCK OFFS APPL" in hdr.keys(): +# try: +# data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) +# except ValueError: +# pass +# if "LEAP SECONDS" in hdr.keys(): +# data.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"].split()[0]) +# return data + def rinexobs3( fn: T.TextIO | Path, use: set[str] | None = None, @@ -37,80 +177,84 @@ def rinexobs3( interval: float | int | timedelta | None = None, ): """ - process RINEX 3 OBS data - - fn: RINEX OBS 3 filename - use: 'G' or ['G', 'R'] or similar - - tlim: read between these time bounds - useindicators: SSI, LLI are output - meas: 'L1C' or ['L1C', 'C1C'] or similar - - fast: - TODO: FUTURE, not yet enabled for OBS3 - speculative preallocation based on minimum SV assumption and file size. - Avoids double-reading file and more complicated linked lists. - Believed that Numpy array should be faster than lists anyway. - Reduce Nsvmin if error (let us know) - - interval: allows decimating file read by time e.g. every 5 seconds. - Useful to speed up reading of very large RINEX files + Optimized RINEX 3 reader using collectors to avoid per-epoch xarray ops. + Maintains the same API/behavior as original rinexobs3. """ - interval = check_time_interval(interval) if isinstance(use, str): use = {use} - if isinstance(meas, str): meas = [meas] - if not meas or not meas[0].strip(): meas = None - # %% allocate - # times = obstime3(fn) + data = xarray.Dataset({}, coords={"time": [], "sv": []}) + if tlim is not None and not isinstance(tlim[0], datetime): raise TypeError("time bounds are specified as datetime.datetime") last_epoch = None - # %% loop + time_offsets = [] + + # Open file and read header with opener(fn) as f: hdr = obsheader3(f, use, meas) - # %% process OBS file - time_offset = [] + # Initialize collectors structure + collectors = { + "time": [], + "systems": {}, + } + for sk in hdr["fields"]: + collectors["systems"][sk] = { + "epoch_svs": [], # list of per-epoch arrays of SV names for this system + "obs": {obs: [] for obs in hdr["fields"][sk]}, + "lli": {obs: [] for obs in hdr["fields"][sk]}, + "ssi": {obs: [] for obs in hdr["fields"][sk]}, + } + + # loop through file epochs for ln in f: - if not ln.startswith(">"): # end of file + if not ln.startswith(">"): break try: time = _timeobs(ln) - except ValueError: # garbage between header and RINEX data - logging.debug(f"garbage detected in {fn}, trying to parse at next time step") + except ValueError: + # garbage between header and data — skip until next epoch mark + logging.debug("garbage detected in file while parsing epoch") continue + # optional clock offset reading same as original try: - time_offset.append(float(ln[41:56])) + time_offsets.append(float(ln[41:56])) except ValueError: pass - # %% get SV indices + + # Number of visible satellites on this epoch + try: + nsat_this = int(ln[33:35]) + except Exception: + nsat_this = 0 + sv = [] raw = "" - # Number of visible satellites this time %i3 pg. A13 - for _ in range(int(ln[33:35])): - ln = f.readline() - sv.append(ln[:3]) - raw += ln[3:] + for _ in range(nsat_this): + l2 = f.readline() + sv.append(l2[:3]) + raw += l2[3:] + # time filtering if tlim is not None: if time < tlim[0]: continue - elif time > tlim[1]: + if time > tlim[1]: break + # interval decimation if interval is not None: - if last_epoch is None: # initialization + if last_epoch is None: last_epoch = time else: if time - last_epoch < interval: @@ -121,49 +265,55 @@ def rinexobs3( if verbose: print(time, end="\r") - # this time epoch is complete, assemble the data. - data = _epoch(data, raw, hdr, time, sv, useindicators, verbose) + # collect epoch data (fast) + _collect_epoch(collectors, raw, hdr, time, sv, useindicators) - # %% patch SV names in case of "G 7" => "G07" - data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) - # %% other attributes - data.attrs["version"] = hdr["version"] + # build final dataset from collectors + data = _build_from_collectors(collectors, hdr, useindicators) - # Get interval from header or derive it from the data - if "interval" in hdr.keys(): - data.attrs["interval"] = hdr["interval"] - elif "time" in data.coords.keys(): - # median is robust against gaps + # patch SV names like original code: "G 7" -> "G07" + if "sv" in data.coords: try: - data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) - except TypeError: + data = data.assign_coords(sv=[s.replace(" ", "0") for s in data.sv.values.tolist()]) + except Exception: + # if sv coordinate is not a simple 1D array, skip patch pass + + # copy attributes from header, similar to original + data.attrs["version"] = hdr.get("version", None) + if "interval" in hdr: + data.attrs["interval"] = hdr["interval"] else: - data.attrs["interval"] = np.nan + try: + data.attrs["interval"] = np.median(np.diff(data.time) / np.timedelta64(1, "s")) + except Exception: + data.attrs["interval"] = np.nan data.attrs["rinextype"] = "obs" - data.attrs["fast_processing"] = 0 # bool is not allowed in NetCDF4 + data.attrs["fast_processing"] = 1 data.attrs["time_system"] = determine_time_system(hdr) if isinstance(fn, Path): data.attrs["filename"] = fn.name - if "position" in hdr.keys(): + if "position" in hdr: data.attrs["position"] = hdr["position"] - if ecef2geodetic is not None: + try: + from pymap3d import ecef2geodetic + except Exception: + ecef2geodetic = None + if ecef2geodetic is not None and "position_geodetic" in hdr: data.attrs["position_geodetic"] = hdr["position_geodetic"] - if time_offset: - data.attrs["time_offset"] = time_offset + if time_offsets: + data.attrs["time_offset"] = time_offsets - if "RCV CLOCK OFFS APPL" in hdr.keys(): + if "RCV CLOCK OFFS APPL" in hdr: try: data.attrs["receiver_clock_offset_applied"] = int(hdr["RCV CLOCK OFFS APPL"]) - except ValueError: + except Exception: pass - if "LEAP SECONDS" in hdr.keys(): - data.attrs["leap_seconds"] = int(hdr["LEAP SECONDS"].split()[0]) - return data + return data def _timeobs(ln: str) -> datetime: """ @@ -376,5 +526,345 @@ def obsheader3( hdr["fields"] = fields hdr["fields_ind"] = sysind hdr["Fmax"] = Fmax + + try: + s = " " + hdr["rxmodel"] = s.join(hdr["REC # / TYPE / VERS"].split()[1:-1]) + except (KeyError, ValueError): + pass return hdr + + +def _parse_raw_fixedwidth(raw: str, nsat: int, Fmax: int) -> np.ndarray: + """ + Parse the 'raw' block returned by rinexobs3 into an ndarray shaped (nsat, Fmax*3) + where each observation field is: + - value (14 chars) -> float or np.nan + - LLI indicator (1 char) + - SSI indicator (1 char) + The RINEX 3 observation block normally repeats 16 characters per observation (14+1+1). + """ + # Split into lines (one line per satellite row read earlier) + rows = raw.splitlines() + # Some files might wrap observations across multiple lines per satellite. The calling + # code concatenates lines read from the file; rows should correspond to satellites. + # defensive: trim empty lines + rows = [r for r in rows if r] + if len(rows) != nsat: + # Fallback: allow rows >= nsat or <= nsat, but prefer the parsed number + # If inconsistent, fall back to genfromtxt behaviour (rare) + pass + + # preallocate with NaN + total_cols = Fmax * 3 + garr = np.full((len(rows), total_cols), np.nan, dtype=float) + + # local references for speed + _float = float + _strip = str.strip + + for ri, row in enumerate(rows): + # make sure row has at least total_cols*? chars, pad if necessary + # observation chunk width is 16 characters (14 value + 1 LLI + 1 SSI) + # but the row might be shorter if fewer fields; we only parse what's available + max_parse = min(Fmax, (len(row) + 15) // 16) + base = 0 + for oi in range(max_parse): + start = oi * 16 + vstr = row[start : start + 14] + if vstr.strip(): # only parse non-empty numeric strings + # A small try/except protects against rare bad values but is relatively fast + try: + garr[ri, oi * 3] = _float(vstr) + except ValueError: + garr[ri, oi * 3] = np.nan + # LLI (1 char) and SSI (1 char) are present at start+14 and start+15 + # parse LLI as int if present (0-9 typically) or NaN + lli_pos = start + 14 + ssi_pos = start + 15 + if lli_pos < len(row): + lli_char = row[lli_pos] + if lli_char.strip(): + # convert to numeric code if wanted (keep float NaN for missing) + try: + garr[ri, oi * 3 + 1] = _float(lli_char) + except ValueError: + garr[ri, oi * 3 + 1] = np.nan + if ssi_pos < len(row): + ssi_char = row[ssi_pos] + if ssi_char.strip(): + try: + garr[ri, oi * 3 + 2] = _float(ssi_char) + except ValueError: + garr[ri, oi * 3 + 2] = np.nan + + return garr + +def _parse_epoch_fixedwidth(raw: str, Fmax: int) -> np.ndarray: + """ + raw = multiple lines of epoch SV blocks, each line containing Fmax obs fields. + Returns ndarray of shape (num_sv, Fmax*3) + """ + lines = raw.splitlines() + out = np.empty((len(lines), Fmax*3), dtype=float) + + for i, ln in enumerate(lines): + # Each observable is 16 characters: 14 value + LLI + SSI + # Example positions: [0:14], [14], [15], [16:30], ... + for k in range(Fmax): + base = 16*k + obs = ln[base:base+14].strip() + lli = ln[base+14:base+15] + ssi = ln[base+15:base+16] + + out[i, 3*k] = float(obs) if obs else np.nan + out[i, 3*k+1] = int(lli) if lli.strip() else -1 + out[i, 3*k+2] = int(ssi) if ssi.strip() else -1 + + return out + +# def _epoch_fast( +# data: xarray.Dataset, +# raw: str, +# hdr: dict, +# time: datetime, +# sv_list: list[str], +# useindicators: bool, +# verbose: bool, +# ) -> xarray.Dataset: + +# fields = hdr["fields"] +# fields_ind = hdr["fields_ind"] +# Fmax = hdr["Fmax"] # max # of observables across systems +# Fcount = len(fields) + +# # ---- FAST FIXED-WIDTH PARSE (replaces genfromtxt) ------------------------- +# try: +# arr = _parse_epoch_fixedwidth(raw, Fmax) +# except: +# print (time) +# sv_arr = np.array(sv_list) + +# for system in fields: +# # Find satellites of this GNSS system (e.g., G, R, E) +# mask = np.char.startswith(sv_arr, system) +# idx = np.where(mask)[0] +# if idx.size == 0: +# continue + +# gsv = sv_arr[idx] +# # Select only columns relevant to this GNSS system +# di = fields_ind[system] +# garr = arr[idx][:, di] + +# # Build variables using vectorized extraction: +# sys_vars = {} +# for i, obs in enumerate(fields[system]): +# # Values (3*k), LLI (3*k+1), SSI (3*k+2) +# vals = garr[:, 3*i] + +# sys_vars[obs] = (("time", "sv"), vals[None, :]) + +# if useindicators: +# sys_vars[obs + "lli"] = (("time", "sv"), garr[:, 3*i+1][None, :]) +# sys_vars[obs + "ssi"] = (("time", "sv"), garr[:, 3*i+2][None, :]) + +# sys_ds = xarray.Dataset(sys_vars, coords={"time": [time], "sv": gsv}) + +# # ---- Append to master dataset ----------------------------------------- +# if data.sizes.get("time", 0) == 0: +# data = sys_ds +# elif Fcount == 1: +# data = xarray.concat([data, sys_ds], dim="time") +# else: +# data = xarray.merge([data, sys_ds], compat="no_conflicts") + +# if verbose: +# print(time, end="\r") + +# return data + +def _collect_epoch( + collectors: dict, + raw: str, + hdr: dict, + time: datetime, + sv_list: list[str], + useindicators: bool, +): + """ + Parse the epoch block and append values into collectors. + collectors structure: + { + "time": [datetime,...], + "systems": { + sk: { + "epoch_svs": [array_of_sv_for_epoch0, array_of_sv_for_epoch1, ...], + "obs": {obsname: [1D-array (len=nsat_epoch) , ...] , ...}, + "lli": {obsname: [...], ...}, # only if useindicators + "ssi": {obsname: [...], ...}, + }, ... + } + } + """ + Fmax = hdr["Fmax"] + fields = hdr["fields"] + fields_ind = hdr["fields_ind"] + try: + arr = _parse_epoch_fixedwidth(raw, Fmax) + except: + arr = np.nan * np.empty((len(raw.splitlines()), Fmax*3), dtype=float) + sv_arr = np.array(sv_list) # e.g. ['G07', 'C01', ...] + + # register time + collectors["time"].append(time) + + # For each system, extract satellite subset and append per-observable arrays + for sk in fields: + # boolean mask for satellites of this system (first char is system) + # use np.char.startswith to handle numpy array of strings + mask = np.char.startswith(sv_arr, sk) + idx = np.nonzero(mask)[0] + + # if idx.size == 0: + # # nothing to collect for this system at this epoch + # print ("Notning in this line") + # continue + + # selected SV names and slice of arr + sv_epoch = sv_arr[idx] + di = fields_ind[sk] # index selector into 3*Fmax columns (could be slice or boolean) + garr = arr[idx][:, di] # shape (nsat_sys, 3 * nobs_sys) + + # append the sv list for this epoch + collectors["systems"][sk]["epoch_svs"].append(sv_epoch) + + # append each observable's values and optionally indicators + for i, obs in enumerate(fields[sk]): + vals = garr[:, i * 3] # 1D array length nsat_sys + collectors["systems"][sk]["obs"][obs].append(vals) + + if useindicators: + collectors["systems"][sk]["lli"][obs].append(garr[:, i * 3 + 1]) + collectors["systems"][sk]["ssi"][obs].append(garr[:, i * 3 + 2]) + +def _build_from_collectors(collectors: dict, hdr: dict, useindicators: bool) -> xarray.Dataset: + """ + Build one xarray.Dataset per GNSS system using a master SV ordering + (order of first appearance) and stack per-epoch arrays into shape (ntime, nsv_master). + Then merge the system datasets and return final Dataset. + """ + times = np.asarray(collectors["time"], dtype="datetime64[ns]") + systems = hdr["fields"].keys() + system_dsets = [] + + for sk in systems: + sysc = collectors["systems"][sk] + if len(sysc["epoch_svs"]) == 0: + # system not present in file (or filtered out) + continue + + ntime = len(sysc["epoch_svs"]) + + # Build master SV list preserving first appearance order + master_svs = [] + seen = set() + for sv_epoch in sysc["epoch_svs"]: + for s in sv_epoch: + if s not in seen: + seen.add(s) + master_svs.append(s) + nsv = len(master_svs) + # mapping sv->col index + sv_to_col = {sv: j for j, sv in enumerate(master_svs)} + + # For each observable create an (ntime, nsv) array filled with NaN and fill per epoch values + sys_vars = {} + for obs in hdr["fields"][sk]: + arr_obs = np.full((ntime, nsv), np.nan, dtype=float) + # iterate epochs + for t_idx, vals in enumerate(sysc["obs"][obs]): + # vals length equals number of SVs in that epoch for this system + sv_epoch = sysc["epoch_svs"][t_idx] + for j, s in enumerate(sv_epoch): + col = sv_to_col[s] + arr_obs[t_idx, col] = vals[j] + sys_vars[obs] = (("time", "sv"), arr_obs) + + if useindicators: + # LLI + arr_lli = np.full((ntime, nsv), np.nan, dtype=float) + arr_ssi = np.full((ntime, nsv), np.nan, dtype=float) + for t_idx, vals in enumerate(sysc["lli"][obs]): + sv_epoch = sysc["epoch_svs"][t_idx] + for j, s in enumerate(sv_epoch): + col = sv_to_col[s] + arr_lli[t_idx, col] = vals[j] + for t_idx, vals in enumerate(sysc["ssi"][obs]): + sv_epoch = sysc["epoch_svs"][t_idx] + for j, s in enumerate(sv_epoch): + col = sv_to_col[s] + arr_ssi[t_idx, col] = vals[j] + sys_vars[obs + "lli"] = (("time", "sv"), arr_lli) + sys_vars[obs + "ssi"] = (("time", "sv"), arr_ssi) + # for key, value in sys_vars.items(): + # print (np.squeeze(value[1])) + # print(f"Key: '{key}', Shape: {value[1].shape}") + # Make sure the time dimensions agree + + # Build dataset for this system and set coords (time, sv_master) + ds = xarray.Dataset(sys_vars, coords={"time": times, "sv": np.array(master_svs)}) + system_dsets.append(ds) + + if len(system_dsets) == 0: + # empty file + return xarray.Dataset({}, coords={"time": times, "sv": []}) + + # Merge all system datasets once (no_conflicts to mirror original behavior) + final = xarray.merge(system_dsets, compat="no_conflicts") + return final + +# def build_dataset_from_collectors(collectors, hdr, useindicators): +# datasets = [] +# times = collectors["time"] + +# for system, obsfields in hdr["fields"].items(): +# # If system not present +# if len(collectors["sv"][system]) == 0: +# continue + +# # Build SV coordinate: every epoch can have different SV set +# # xarray allows this as long as dims match variable shapes +# sv_coord = collectors["sv"][system] + +# # For each observable → stack into (ntime, nsv_ep) +# sys_vars = {} +# for obs in obsfields: +# vals = collectors["obs"][system][obs] +# vals = [v[np.newaxis, :] for v in vals] +# sys_vars[obs] = (("time", "sv"), np.concatenate(vals, axis=0)) + +# if useindicators: +# lli = [v[np.newaxis, :] for v in collectors["lli"][system][obs]] +# ssi = [v[np.newaxis, :] for v in collectors["ssi"][system][obs]] +# sys_vars[obs+"lli"] = (("time", "sv"), np.concatenate(lli, axis=0)) +# sys_vars[obs+"ssi"] = (("time", "sv"), np.concatenate(ssi, axis=0)) + +# # Build coords +# ds = xarray.Dataset( +# sys_vars, +# coords={ +# "time": times, +# "sv": ["PLACEHOLDER"], # overwritten next step +# } +# ) + +# # Replace sv coordinate per-epoch +# # xarray allows per-time-variable coordinates +# ds = ds.assign_coords(sv=("time", sv_coord)) + +# datasets.append(ds) + +# # Merge all systems once +# return xarray.merge(datasets, compat="no_conflicts") From 5adef637126ac58935e7f635bb72aa07f80e7919 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Thu, 4 Dec 2025 14:55:04 -0500 Subject: [PATCH 09/10] decimate via xr.resample --- src/georinex/base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/georinex/base.py b/src/georinex/base.py index 60f079a..353b6a3 100644 --- a/src/georinex/base.py +++ b/src/georinex/base.py @@ -221,7 +221,7 @@ def rinexobs( meas=meas, verbose=verbose, fast=fast, - interval=interval, + # interval=interval, ) elif int(info["version"]) == 3: obs = rinexobs3( @@ -232,11 +232,13 @@ def rinexobs( meas=meas, verbose=verbose, fast=fast, - interval=interval, + # interval=interval, ) else: raise ValueError(f"unknown RINEX {info} {fn}") - + if interval is not None: + obs = obs.resample(time=f"{int(interval)}S").median() + obs.attrs["interval"] = int(interval) # %% optional output write if outfn: outfn = Path(outfn).expanduser() From be06d4f118b83e1d235226ad91cd56c88e77e230 Mon Sep 17 00:00:00 2001 From: aldebaran1 Date: Tue, 9 Dec 2025 14:38:38 +0000 Subject: [PATCH 10/10] careful resampling --- src/georinex/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/georinex/base.py b/src/georinex/base.py index 353b6a3..72235ef 100644 --- a/src/georinex/base.py +++ b/src/georinex/base.py @@ -236,7 +236,7 @@ def rinexobs( ) else: raise ValueError(f"unknown RINEX {info} {fn}") - if interval is not None: + if (interval is not None) and obs.time.size > 0: obs = obs.resample(time=f"{int(interval)}S").median() obs.attrs["interval"] = int(interval) # %% optional output write