From 72e594b5da591438b96697cc8126bf5a3c02a1f2 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 30 Jun 2022 15:21:07 -0400 Subject: [PATCH] Simplify delta decoding I just wrote a delta encoding/decoding crate that takes care of some edge cases and has a lot of testing and perf benchmarking, and this PR makes use of that crate. I'm still hacking on the PBF encoding, so the delta encoding from that lib will be used as well. Note that I use `copied()` iterator which simplifies value handling without adding any perf costs (dealing with values vs refs is simpler too). --- Cargo.toml | 1 + src/dense.rs | 124 ++++++++++++++++++++---------------------------- src/elements.rs | 97 ++++++++++++------------------------- 3 files changed, 82 insertions(+), 140 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 37391f1..dd4d91a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ system-libz = ["flate2"] [dependencies] byteorder = "1.4" +delta-encoding = "0.4" flate2 = { version = "1.0", optional = true } inflate = "0.4" memmap = "0.7" diff --git a/src/dense.rs b/src/dense.rs index 8e19be4..2cb572d 100644 --- a/src/dense.rs +++ b/src/dense.rs @@ -3,7 +3,12 @@ use crate::block::{get_stringtable_key_value, str_from_stringtable}; use crate::error::Result; use crate::proto::osmformat; +use delta_encoding::{DeltaDecoderExt, DeltaDecoderIter}; use std; +use std::iter::Copied; +use std::slice::Iter as SliceIter; + +pub(crate) type DeltaIter<'a, T> = DeltaDecoderIter>>; //TODO Add getter functions for id, version, uid, ... /// An OpenStreetMap node element from a compressed array of dense nodes (See [OSM wiki](http://wiki.openstreetmap.org/wiki/Node)). @@ -85,12 +90,9 @@ impl<'a> DenseNode<'a> { #[derive(Clone, Debug)] pub struct DenseNodeIter<'a> { block: &'a osmformat::PrimitiveBlock, - dids: std::slice::Iter<'a, i64>, // deltas - cid: i64, // current id - dlats: std::slice::Iter<'a, i64>, // deltas - clat: i64, - dlons: std::slice::Iter<'a, i64>, // deltas - clon: i64, + ids: DeltaIter<'a, i64>, + lats: DeltaIter<'a, i64>, + lons: DeltaIter<'a, i64>, keys_vals_slice: &'a [i32], keys_vals_index: usize, info_iter: Option>, @@ -107,12 +109,9 @@ impl<'a> DenseNodeIter<'a> { )); DenseNodeIter { block, - dids: osmdense.id.iter(), - cid: 0, - dlats: osmdense.lat.iter(), - clat: 0, - dlons: osmdense.lon.iter(), - clon: 0, + ids: osmdense.id.iter().copied().original(), + lats: osmdense.lat.iter().copied().original(), + lons: osmdense.lon.iter().copied().original(), keys_vals_slice: osmdense.keys_vals.as_slice(), keys_vals_index: 0, info_iter, @@ -122,12 +121,9 @@ impl<'a> DenseNodeIter<'a> { pub(crate) fn empty(block: &'a osmformat::PrimitiveBlock) -> DenseNodeIter<'a> { DenseNodeIter { block, - dids: [].iter(), - cid: 0, - dlats: [].iter(), - clat: 0, - dlons: [].iter(), - clon: 0, + ids: [].iter().copied().original(), + lats: [].iter().copied().original(), + lons: [].iter().copied().original(), keys_vals_slice: &[], keys_vals_index: 0, info_iter: None, @@ -140,16 +136,12 @@ impl<'a> Iterator for DenseNodeIter<'a> { fn next(&mut self) -> Option { match ( - self.dids.next(), - self.dlats.next(), - self.dlons.next(), + self.ids.next(), + self.lats.next(), + self.lons.next(), self.info_iter.as_mut().and_then(|iter| iter.next()), ) { - (Some(did), Some(dlat), Some(dlon), info) => { - self.cid += *did; - self.clat += *dlat; - self.clon += *dlon; - + (Some(id), Some(lat), Some(lon), info) => { let start_index = self.keys_vals_index; let mut end_index = start_index; for chunk in self.keys_vals_slice[self.keys_vals_index..].chunks(2) { @@ -164,9 +156,9 @@ impl<'a> Iterator for DenseNodeIter<'a> { Some(DenseNode { block: self.block, - id: self.cid, - lat: self.clat, - lon: self.clon, + id, + lat, + lon, keys_vals_indices: &self.keys_vals_slice[start_index..end_index], info, }) @@ -176,7 +168,7 @@ impl<'a> Iterator for DenseNodeIter<'a> { } fn size_hint(&self) -> (usize, Option) { - self.dids.size_hint() + self.ids.size_hint() } } @@ -243,16 +235,12 @@ impl<'a> DenseNodeInfo<'a> { #[derive(Clone, Debug)] pub struct DenseNodeInfoIter<'a> { block: &'a osmformat::PrimitiveBlock, - versions: std::slice::Iter<'a, i32>, - dtimestamps: std::slice::Iter<'a, i64>, // deltas - ctimestamp: i64, - dchangesets: std::slice::Iter<'a, i64>, // deltas - cchangeset: i64, - duids: std::slice::Iter<'a, i32>, // deltas - cuid: i32, - duser_sids: std::slice::Iter<'a, i32>, // deltas - cuser_sid: i32, - visible: std::slice::Iter<'a, bool>, + versions: SliceIter<'a, i32>, + timestamps: DeltaIter<'a, i64>, + changesets: DeltaIter<'a, i64>, + uids: DeltaIter<'a, i32>, + user_sids: DeltaIter<'a, i32>, + visible: SliceIter<'a, bool>, } impl<'a> DenseNodeInfoIter<'a> { @@ -263,14 +251,10 @@ impl<'a> DenseNodeInfoIter<'a> { DenseNodeInfoIter { block, versions: info.version.iter(), - dtimestamps: info.timestamp.iter(), - ctimestamp: 0, - dchangesets: info.changeset.iter(), - cchangeset: 0, - duids: info.uid.iter(), - cuid: 0, - duser_sids: info.user_sid.iter(), - cuser_sid: 0, + timestamps: info.timestamp.iter().copied().original(), + changesets: info.changeset.iter().copied().original(), + uids: info.uid.iter().copied().original(), + user_sids: info.user_sid.iter().copied().original(), visible: info.visible.iter(), } } @@ -282,34 +266,28 @@ impl<'a> Iterator for DenseNodeInfoIter<'a> { fn next(&mut self) -> Option { match ( self.versions.next(), - self.dtimestamps.next(), - self.dchangesets.next(), - self.duids.next(), - self.duser_sids.next(), + self.timestamps.next(), + self.changesets.next(), + self.uids.next(), + self.user_sids.next(), self.visible.next(), ) { ( Some(&version), - Some(dtimestamp), - Some(dchangeset), - Some(duid), - Some(duser_sid), + Some(timestamp), + Some(changeset), + Some(uid), + Some(user_sid), visible_opt, - ) => { - self.ctimestamp += *dtimestamp; - self.cchangeset += *dchangeset; - self.cuid += *duid; - self.cuser_sid += *duser_sid; - Some(DenseNodeInfo { - block: self.block, - version, - timestamp: self.ctimestamp, - changeset: self.cchangeset, - uid: self.cuid, - user_sid: self.cuser_sid, - visible: *visible_opt.unwrap_or(&true), - }) - } + ) => Some(DenseNodeInfo { + block: self.block, + version, + timestamp, + changeset, + uid, + user_sid, + visible: *visible_opt.unwrap_or(&true), + }), _ => None, } } @@ -319,7 +297,7 @@ impl<'a> Iterator for DenseNodeInfoIter<'a> { #[derive(Clone, Debug)] pub struct DenseTagIter<'a> { block: &'a osmformat::PrimitiveBlock, - keys_vals_indices: std::slice::Iter<'a, i32>, + keys_vals_indices: SliceIter<'a, i32>, } //TODO return Result @@ -346,7 +324,7 @@ impl<'a> ExactSizeIterator for DenseTagIter<'a> {} /// stringtable of the current [`PrimitiveBlock`](crate::block::PrimitiveBlock). #[derive(Clone, Debug)] pub struct DenseRawTagIter<'a> { - keys_vals_indices: std::slice::Iter<'a, i32>, + keys_vals_indices: SliceIter<'a, i32>, } //TODO return Result diff --git a/src/elements.rs b/src/elements.rs index 233219f..e4562f0 100644 --- a/src/elements.rs +++ b/src/elements.rs @@ -5,8 +5,11 @@ use crate::dense::DenseNode; use crate::error::Result; use crate::proto::osmformat; use crate::proto::osmformat::PrimitiveBlock; +use crate::DeltaIter; +use delta_encoding::DeltaDecoderExt; use osmformat::relation::MemberType; use protobuf::EnumOrUnknown; +use std::slice::Iter as SliceIter; /// An enum with the OSM core elements: nodes, ways and relations. #[derive(Clone, Debug)] @@ -192,10 +195,7 @@ impl<'a> Way<'a> { /// Finding the corresponding node might involve iterating over the whole PBF structure, but /// (to save space) ways themselves usually do not contain geo coordinates. pub fn refs(&self) -> WayRefIter<'a> { - WayRefIter { - deltas: self.osmway.refs.iter(), - current: 0, - } + self.osmway.refs.iter().copied().original() } /// Returns an iterator over the way's node locations (latitude, longitude). @@ -208,10 +208,8 @@ impl<'a> Way<'a> { pub fn node_locations(&self) -> WayNodeLocationsIter<'a> { WayNodeLocationsIter { block: self.block, - dlats: self.osmway.lat.iter(), - dlons: self.osmway.lon.iter(), - clat: 0, - clon: 0, + lats: self.osmway.lat.iter().copied().original(), + lons: self.osmway.lon.iter().copied().original(), } } @@ -323,31 +321,7 @@ impl<'a> Relation<'a> { /// An iterator over the references of a way. /// /// Each reference corresponds to a node id. -#[derive(Clone, Debug)] -pub struct WayRefIter<'a> { - deltas: std::slice::Iter<'a, i64>, - current: i64, -} - -impl<'a> Iterator for WayRefIter<'a> { - type Item = i64; - - fn next(&mut self) -> Option { - match self.deltas.next() { - Some(&d) => { - self.current += d; - Some(self.current) - } - None => None, - } - } - - fn size_hint(&self) -> (usize, Option) { - self.deltas.size_hint() - } -} - -impl<'a> ExactSizeIterator for WayRefIter<'a> {} +pub type WayRefIter<'a> = DeltaIter<'a, i64>; pub struct WayNodeLocation { lat: i64, @@ -392,31 +366,25 @@ impl WayNodeLocation { #[derive(Clone, Debug)] pub struct WayNodeLocationsIter<'a> { block: &'a osmformat::PrimitiveBlock, - dlats: std::slice::Iter<'a, i64>, - dlons: std::slice::Iter<'a, i64>, - clat: i64, - clon: i64, + lats: DeltaIter<'a, i64>, + lons: DeltaIter<'a, i64>, } impl<'a> Iterator for WayNodeLocationsIter<'a> { type Item = WayNodeLocation; fn next(&mut self) -> Option { - match (self.dlats.next(), self.dlons.next()) { - (Some(&dlat), Some(&dlon)) => { - self.clat += dlat; - self.clon += dlon; - Some(WayNodeLocation { - lat: self.block.lat_offset() + i64::from(self.block.granularity()) * self.clat, - lon: self.block.lon_offset() + i64::from(self.block.granularity()) * self.clon, - }) - } + match (self.lats.next(), self.lons.next()) { + (Some(lat), Some(lon)) => Some(WayNodeLocation { + lat: self.block.lat_offset() + i64::from(self.block.granularity()) * lat, + lon: self.block.lon_offset() + i64::from(self.block.granularity()) * lon, + }), _ => None, } } fn size_hint(&self) -> (usize, Option) { - self.dlats.size_hint() + self.lats.size_hint() } } @@ -463,10 +431,9 @@ impl<'a> RelMember<'a> { #[derive(Clone, Debug)] pub struct RelMemberIter<'a> { block: &'a PrimitiveBlock, - role_sids: std::slice::Iter<'a, i32>, - member_id_deltas: std::slice::Iter<'a, i64>, - member_types: std::slice::Iter<'a, EnumOrUnknown>, - current_member_id: i64, + role_sids: SliceIter<'a, i32>, + member_ids: DeltaIter<'a, i64>, + member_types: SliceIter<'a, EnumOrUnknown>, } impl<'a> RelMemberIter<'a> { @@ -474,9 +441,8 @@ impl<'a> RelMemberIter<'a> { RelMemberIter { block, role_sids: osmrel.roles_sid.iter(), - member_id_deltas: osmrel.memids.iter(), + member_ids: osmrel.memids.iter().copied().original(), member_types: osmrel.types.iter(), - current_member_id: 0, } } } @@ -487,18 +453,15 @@ impl<'a> Iterator for RelMemberIter<'a> { fn next(&mut self) -> Option { match ( self.role_sids.next(), - self.member_id_deltas.next(), + self.member_ids.next(), self.member_types.next(), ) { - (Some(role_sid), Some(mem_id_delta), Some(member_type)) => { - self.current_member_id += *mem_id_delta; - Some(RelMember { - block: self.block, - role_sid: *role_sid, - member_id: self.current_member_id, - member_type: RelMemberType::from(*member_type), - }) - } + (Some(role_sid), Some(member_id), Some(member_type)) => Some(RelMember { + block: self.block, + role_sid: *role_sid, + member_id, + member_type: RelMemberType::from(*member_type), + }), _ => None, } } @@ -514,8 +477,8 @@ impl<'a> ExactSizeIterator for RelMemberIter<'a> {} #[derive(Clone, Debug)] pub struct TagIter<'a> { block: &'a PrimitiveBlock, - key_indices: std::slice::Iter<'a, u32>, - val_indices: std::slice::Iter<'a, u32>, + key_indices: SliceIter<'a, u32>, + val_indices: SliceIter<'a, u32>, } //TODO return Result? @@ -541,8 +504,8 @@ impl<'a> ExactSizeIterator for TagIter<'a> {} /// stringtable of the current [`PrimitiveBlock`](crate::block::PrimitiveBlock). #[derive(Clone, Debug)] pub struct RawTagIter<'a> { - key_indices: std::slice::Iter<'a, u32>, - val_indices: std::slice::Iter<'a, u32>, + key_indices: SliceIter<'a, u32>, + val_indices: SliceIter<'a, u32>, } //TODO return Result?