Skip to content

Commit 9f7326d

Browse files
committed
more fixups
1 parent f0071bc commit 9f7326d

5 files changed

Lines changed: 113 additions & 48 deletions

File tree

flex-config/gen/grouped-linemerge.lua

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ function osm2pgsql.process_gen()
9595
where = 'name IS NOT NULL OR ref IS NOT NULL',
9696

9797
-- In append mode, the table of exact changed-way endpoints to consume
98-
-- (written by the expire output's 'endpoint_table' above).
98+
-- (written by the expire output's 'endpoint_table' above). Consumption
99+
-- is destructive, so each generalizer needs its own endpoint table.
99100
endpoint_table = 'exp_roads_endpoints',
100101

101102
-- Create functional endpoint indexes on the src/dest tables in create

src/expire-output.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,14 +191,23 @@ std::size_t expire_output_t::output_endpoints_to_table(
191191

192192
pg_conn_t const db_connection{connection_params, "expire"};
193193

194-
db_connection.prepare("insert_endpoints",
195-
"INSERT INTO {} (geom) VALUES ($1::geometry)", qn);
194+
// COPY is significantly faster than individual INSERTs.
195+
db_connection.copy_start(fmt::format("COPY {} (geom) FROM STDIN", qn));
196196

197+
std::string buffer;
197198
for (auto const &[x, y] : endpoints) {
198199
geom::geometry_t const point{geom::point_t{x, y}, m_endpoint_srid};
199-
db_connection.exec_prepared("insert_endpoints",
200-
util::encode_hex(geom_to_ewkb(point)));
200+
buffer += util::encode_hex(geom_to_ewkb(point));
201+
buffer += '\n';
202+
if (buffer.size() >= 65536) {
203+
db_connection.copy_send(buffer, qn);
204+
buffer.clear();
205+
}
206+
}
207+
if (!buffer.empty()) {
208+
db_connection.copy_send(buffer, qn);
201209
}
210+
db_connection.copy_end(qn);
202211

203212
return endpoints.size();
204213
}

src/gen/gen-grouped-linemerge.hpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,16 @@ class pg_conn_t;
3131
* Unlike the tile-based strategies this does NOT clip to tiles and the
3232
* destination geometries are global merged lines, not tile-keyed rows.
3333
*
34-
* In append (update) mode the work is done incrementally and locally: the
35-
* expire table (populated by osm2pgsql during the update) is used only as a
36-
* seed for "where did line geometry change". For every changed region we
37-
* walk the connected component(s) of matching lines out from the seed (via a
38-
* recursive query), delete the merged outputs that overlap the region and
39-
* regenerate them from scratch. This keeps each update bounded to the local
40-
* connected component instead of re-merging the whole planet.
34+
* In append (update) mode the work is done incrementally and locally: an
35+
* endpoint table (populated by osm2pgsql's expire output with an
36+
* 'endpoint_table' during the update) supplies the exact endpoints of every
37+
* added, modified, or deleted way. Starting from the lines that have one of
38+
* these points as an endpoint we walk each affected connected component
39+
* along shared endpoints (via a recursive query), delete the merged rows
40+
* belonging to those components (matched by exact endpoint equality, never
41+
* by proximity or overlap), and regenerate them from scratch. This keeps
42+
* each update bounded to the local connected components instead of
43+
* re-merging the whole planet.
4144
*/
4245
class gen_grouped_linemerge_t : public gen_base_t
4346
{

tests/bdd/flex/run-with-endpoint-expire.feature

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,14 @@ Feature: Expire changed-geometry endpoints into a table
3939
| --slim | -a |
4040
# The added way is isolated, so it contributes its two distinct endpoints.
4141
Then table changed_endpoints has 2 rows
42+
43+
Given the OSM data
44+
"""
45+
w8000000003 v2 dD
46+
"""
47+
When running osm2pgsql flex with parameters
48+
| --slim | -a |
49+
# Deleting the way records the endpoints of its old geometry (read back
50+
# from the database), adding 2 more rows. Rows accumulate until a
51+
# consumer (e.g. the grouped-linemerge generalizer) processes them.
52+
Then table changed_endpoints has 4 rows

tests/test-gen-grouped-linemerge.cpp

Lines changed: 77 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,25 @@ bool matches_reference(testing::pg::conn_t &conn)
121121

122122
struct edge_t
123123
{
124-
std::string wkt;
124+
std::string wkt; // straight variant
125+
std::string wkt_bent; // same endpoints, detour through an offset midpoint
125126
bool present = false;
127+
bool bent = false;
126128
std::string grp;
127129

128-
explicit edge_t(std::string w) : wkt(std::move(w)) {}
130+
edge_t(std::string w, std::string wb)
131+
: wkt(std::move(w)), wkt_bent(std::move(wb))
132+
{}
133+
134+
std::string const &cur_wkt() const noexcept { return bent ? wkt_bent : wkt; }
129135
};
130136

131137
// All horizontal and vertical segments of a GW x GH grid (the candidate
132138
// "ways"). Interior grid points become degree-3+ junctions when same-group
133139
// edges meet there, which is exactly the case that makes ST_LineMerge split a
134-
// connected component into several output lines.
140+
// connected component into several output lines. Each edge also has a "bent"
141+
// variant with the same endpoints but a different interior, to simulate a way
142+
// whose geometry changes without its endpoints moving.
135143
std::vector<edge_t> build_grid_edges()
136144
{
137145
constexpr int GW = 4;
@@ -141,14 +149,23 @@ std::vector<edge_t> build_grid_edges()
141149
auto const seg = [](int x1, int y1, int x2, int y2) {
142150
return fmt::format("LINESTRING({} {},{} {})", x1, y1, x2, y2);
143151
};
152+
auto const seg_bent = [](int x1, int y1, int x2, int y2) {
153+
// The offset midpoint never coincides with a grid point.
154+
return fmt::format("LINESTRING({} {},{} {},{} {})", x1, y1,
155+
(x1 + x2) / 2 + 123, (y1 + y2) / 2 + 77, x2, y2);
156+
};
144157
for (int j = 0; j < GH; ++j) {
145158
for (int i = 0; i < GW - 1; ++i) {
146-
edges.emplace_back(seg(i * STEP, j * STEP, (i + 1) * STEP, j * STEP));
159+
edges.emplace_back(
160+
seg(i * STEP, j * STEP, (i + 1) * STEP, j * STEP),
161+
seg_bent(i * STEP, j * STEP, (i + 1) * STEP, j * STEP));
147162
}
148163
}
149164
for (int i = 0; i < GW; ++i) {
150165
for (int j = 0; j < GH - 1; ++j) {
151-
edges.emplace_back(seg(i * STEP, j * STEP, i * STEP, (j + 1) * STEP));
166+
edges.emplace_back(
167+
seg(i * STEP, j * STEP, i * STEP, (j + 1) * STEP),
168+
seg_bent(i * STEP, j * STEP, i * STEP, (j + 1) * STEP));
152169
}
153170
}
154171
return edges;
@@ -182,7 +199,7 @@ TEST_CASE("grouped-linemerge: incremental updates match full re-merge (fuzz)")
182199
{
183200
auto conn = db.connect();
184201

185-
constexpr int OPS_PER_SEED = 120;
202+
constexpr int BATCHES_PER_SEED = 60;
186203

187204
for (unsigned const seed : {1U, 2U, 3U, 4U}) {
188205
setup_tables(conn);
@@ -193,48 +210,72 @@ TEST_CASE("grouped-linemerge: incremental updates match full re-merge (fuzz)")
193210
for (auto &e : edges) {
194211
if (rng() % 2U == 0U) {
195212
e.present = true;
213+
e.bent = (rng() % 2U == 0U);
196214
e.grp = GROUPS.at(rng() % 3U);
197-
insert_edge(conn, e.grp, e.wkt);
215+
insert_edge(conn, e.grp, e.cur_wkt());
198216
}
199217
}
200218
run_gen(conn, false);
201219
INFO("seed=" << seed << " phase=create");
202220
REQUIRE(matches_reference(conn));
203221

204-
// Random connect/disconnect operations, each followed by an
205-
// incremental append that must reproduce the from-scratch result.
206-
for (int op = 0; op < OPS_PER_SEED; ++op) {
207-
std::vector<std::size_t> present;
208-
std::vector<std::size_t> absent;
209-
for (std::size_t i = 0; i < edges.size(); ++i) {
210-
(edges[i].present ? present : absent).push_back(i);
211-
}
212-
222+
// Random batches of edits (like a real change file with several
223+
// changed ways), each batch followed by one incremental append that
224+
// must reproduce the from-scratch result. Edit kinds: add a line,
225+
// delete a line, reroute a line (same endpoints, new interior
226+
// geometry) and retag a line (same geometry, new group).
227+
for (int batch = 0; batch < BATCHES_PER_SEED; ++batch) {
228+
auto const num_ops = 1U + rng() % 4U;
213229
std::string desc;
214-
bool const do_add =
215-
!absent.empty() && (present.empty() || (rng() % 2U == 0U));
216-
if (do_add) {
217-
auto const idx = absent[rng() % absent.size()];
218-
auto &e = edges[idx];
219-
e.grp = GROUPS.at(rng() % 3U); // may differ from last time: a retag
220-
e.present = true;
221-
insert_edge(conn, e.grp, e.wkt);
222-
expire(conn, e.wkt);
223-
desc = fmt::format("add slot={} grp={}", idx, e.grp);
224-
} else if (!present.empty()) {
225-
auto const idx = present[rng() % present.size()];
226-
auto &e = edges[idx];
227-
expire(conn, e.wkt); // expire the old footprint, then remove it
228-
delete_edge(conn, e.grp, e.wkt);
229-
e.present = false;
230-
desc = fmt::format("del slot={} grp={}", idx, e.grp);
231-
} else {
232-
continue;
230+
for (unsigned n = 0; n < num_ops; ++n) {
231+
std::vector<std::size_t> present;
232+
std::vector<std::size_t> absent;
233+
for (std::size_t i = 0; i < edges.size(); ++i) {
234+
(edges[i].present ? present : absent).push_back(i);
235+
}
236+
237+
auto const kind = rng() % 4U;
238+
if (present.empty() || (kind == 0U && !absent.empty())) {
239+
auto const idx = absent[rng() % absent.size()];
240+
auto &e = edges[idx];
241+
e.grp = GROUPS.at(rng() % 3U);
242+
e.bent = (rng() % 2U == 0U);
243+
e.present = true;
244+
insert_edge(conn, e.grp, e.cur_wkt());
245+
expire(conn, e.cur_wkt());
246+
desc += fmt::format(" add:{}/{}", idx, e.grp);
247+
} else if (kind == 1U) {
248+
auto const idx = present[rng() % present.size()];
249+
auto &e = edges[idx];
250+
expire(conn, e.cur_wkt()); // the old footprint...
251+
delete_edge(conn, e.grp, e.cur_wkt());
252+
e.present = false;
253+
desc += fmt::format(" del:{}/{}", idx, e.grp);
254+
} else if (kind == 2U) {
255+
// reroute: the geometry changes, the endpoints stay
256+
auto const idx = present[rng() % present.size()];
257+
auto &e = edges[idx];
258+
expire(conn, e.cur_wkt()); // the old geometry...
259+
delete_edge(conn, e.grp, e.cur_wkt());
260+
e.bent = !e.bent;
261+
insert_edge(conn, e.grp, e.cur_wkt());
262+
expire(conn, e.cur_wkt()); // ...and the new geometry
263+
desc += fmt::format(" reroute:{}/{}", idx, e.grp);
264+
} else {
265+
// retag: the group changes, the geometry stays
266+
auto const idx = present[rng() % present.size()];
267+
auto &e = edges[idx];
268+
delete_edge(conn, e.grp, e.cur_wkt());
269+
e.grp = GROUPS.at(rng() % 3U); // may even be unchanged
270+
insert_edge(conn, e.grp, e.cur_wkt());
271+
expire(conn, e.cur_wkt()); // old and new endpoints coincide
272+
desc += fmt::format(" retag:{}/{}", idx, e.grp);
273+
}
233274
}
234275

235276
run_gen(conn, true);
236277

237-
INFO("seed=" << seed << " op=" << op << " " << desc);
278+
INFO("seed=" << seed << " batch=" << batch << desc);
238279
REQUIRE(matches_reference(conn));
239280
}
240281
}

0 commit comments

Comments
 (0)