From 303694beb451c61979841ddc8ff7f867b49476c1 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 11:04:40 +0200 Subject: [PATCH 01/11] Update to new insta snapshot format --- cli/tests/snapshots/cli__export_to_zip_file.snap | 10 ++-------- .../snapshots/cli__list_corpora_fully_loaded.snap | 12 ++---------- .../snapshots/cli__list_corpora_not_loaded.snap | 8 ++------ .../cli__list_corpora_partially_loaded.snap | 12 ++---------- cli/tests/snapshots/cli__show_corpus_info.snap | 12 ++---------- ...b__corpusstorage__tests__corpus-config-after.snap | 1 - ...__corpusstorage__tests__corpus-config-before.snap | 1 - ..._corpusstorage__tests__corpus-config-graphml.snap | 1 - ...corpusstorage__tests__corpus-config-relannis.snap | 1 - 9 files changed, 10 insertions(+), 48 deletions(-) diff --git a/cli/tests/snapshots/cli__export_to_zip_file.snap b/cli/tests/snapshots/cli__export_to_zip_file.snap index fb2545134..4c8b8e808 100644 --- a/cli/tests/snapshots/cli__export_to_zip_file.snap +++ b/cli/tests/snapshots/cli__export_to_zip_file.snap @@ -1,14 +1,8 @@ --- source: cli/tests/cli.rs -info: - program: annis - args: - - "../graphannis/tests/data/" - - "-c" - - corpus sample-disk-based-3.3 - - "-c" - - export sample-disk-based-3.3.zip +expression: actual --- + success: true exit_code: 0 ----- stdout ----- diff --git a/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap b/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap index b7a158610..6a2901070 100644 --- a/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap @@ -1,16 +1,8 @@ --- source: cli/tests/cli.rs -info: - program: annis - args: - - "../graphannis/tests/data/" - - "-c" - - corpus sample-disk-based-3.3 - - "-c" - - preload - - "-c" - - list +expression: actual --- + success: true exit_code: 0 ----- stdout ----- diff --git a/cli/tests/snapshots/cli__list_corpora_not_loaded.snap b/cli/tests/snapshots/cli__list_corpora_not_loaded.snap index 7fd24b45b..ea7de80ac 100644 --- a/cli/tests/snapshots/cli__list_corpora_not_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_not_loaded.snap @@ -1,12 +1,8 @@ --- source: cli/tests/cli.rs -info: - program: annis - args: - - "../graphannis/tests/data/" - - "-c" - - list +expression: actual --- + success: true exit_code: 0 ----- stdout ----- diff --git a/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap b/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap index f8b2994b4..0a8fdc0e1 100644 --- a/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap @@ -1,16 +1,8 @@ --- source: cli/tests/cli.rs -info: - program: annis - args: - - "../graphannis/tests/data/" - - "-c" - - corpus sample-disk-based-3.3 - - "-c" - - count tok - - "-c" - - list +expression: actual --- + success: true exit_code: 0 ----- stdout ----- diff --git a/cli/tests/snapshots/cli__show_corpus_info.snap b/cli/tests/snapshots/cli__show_corpus_info.snap index 876b412df..fcd3e9ae5 100644 --- a/cli/tests/snapshots/cli__show_corpus_info.snap +++ b/cli/tests/snapshots/cli__show_corpus_info.snap @@ -1,16 +1,8 @@ --- source: cli/tests/cli.rs -info: - program: annis - args: - - "../graphannis/tests/data/" - - "-c" - - corpus sample-disk-based-3.8 - - "-c" - - preload - - "-c" - - info +expression: actual --- + success: true exit_code: 0 ----- stdout ----- diff --git a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-after.snap b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-after.snap index 953734822..2d9e71ce1 100644 --- a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-after.snap +++ b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-after.snap @@ -89,4 +89,3 @@ quantity = 45 [corpus_size.unit] name = "tokens" - diff --git a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-before.snap b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-before.snap index 51b508ce4..b2f7acde5 100644 --- a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-before.snap +++ b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-before.snap @@ -89,4 +89,3 @@ quantity = 44 [corpus_size.unit] name = "tokens" - diff --git a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-graphml.snap b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-graphml.snap index bda7f6326..29dc8b75d 100644 --- a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-graphml.snap +++ b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-graphml.snap @@ -89,4 +89,3 @@ quantity = 44 [corpus_size.unit] name = "tokens" - diff --git a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-relannis.snap b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-relannis.snap index d7e6c6820..0be39bf95 100644 --- a/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-relannis.snap +++ b/graphannis/src/annis/db/corpusstorage/snapshots/graphannis__annis__db__corpusstorage__tests__corpus-config-relannis.snap @@ -86,4 +86,3 @@ quantity = 44 [corpus_size.unit] name = "tokens" - From bf7212d4e24a539078b4fc7fc3b93a8bc7a516b0 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Thu, 16 Apr 2026 16:57:42 +0200 Subject: [PATCH 02/11] Update to zip library version 8 which seems to improve the zstd-sys compile times a little bit (cherry picked from commit 95553b8b55ab09c30df22e7ffb2b575f20953e74) --- graphannis/Cargo.toml | 2 +- graphannis/src/annis/db/corpusstorage.rs | 4 ++-- webservice/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index b898c8d42..a3a61a665 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -53,7 +53,7 @@ thiserror = "1" time = "0.3.44" toml = "0.8" transient-btree-index = "0.5" -zip = "0.6.4" +zip = "8.5.1" [dev-dependencies] assert_matches = "1.5.0" diff --git a/graphannis/src/annis/db/corpusstorage.rs b/graphannis/src/annis/db/corpusstorage.rs index df302d9b6..97a1b3590 100644 --- a/graphannis/src/annis/db/corpusstorage.rs +++ b/graphannis/src/annis/db/corpusstorage.rs @@ -800,7 +800,7 @@ impl CorpusStorage { for i in 0..archive.len() { let mut file = archive.by_index(i)?; if let Some(file_path) = file.enclosed_name() { - let output_path = tmp_dir.path().join(file_path); + let output_path = tmp_dir.path().join(&file_path); if let Some(file_name) = output_path.file_name() { if file_name == "corpus.annis" || file_name == "corpus.tab" { @@ -1246,7 +1246,7 @@ impl CorpusStorage { W: Write + Seek, F: Fn(&str), { - let options = zip::write::FileOptions::default() + let options = zip::write::SimpleFileOptions::default() .compression_method(zip::CompressionMethod::Deflated) .large_file(true); diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index 826c9e418..d30b2af83 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -37,7 +37,7 @@ thiserror = "1" time = "0.3.44" uuid = { version = "0.8", features = ["v4"] } walkdir = "2" -zip = "0.6.4" +zip = "8.5.1" [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.5" From efeb4b2cc3e1cda7271506a98ca90d0debb38083 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Thu, 16 Apr 2026 17:10:57 +0200 Subject: [PATCH 03/11] Update lalrpop version This does not give immediate improvements because the regex-automata dependency still needs the same time to compile (cherry picked from commit 46989237d1123fb66b1212a27bc40f5c5f514d40) --- graphannis/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index a3a61a665..ead7dddd2 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -15,7 +15,7 @@ crate-type = ["lib"] [build-dependencies] csv = "1.1" file_diff = "1" -lalrpop = { version = "0.20", default-features = false, features = [ +lalrpop = { version = "0.23", default-features = false, features = [ "lexer", "unicode", ] } @@ -28,7 +28,7 @@ facet = "0.28.0" fs2 = "0.4" graphannis-core = { path = "../core/", version = "^4" } itertools = "0.10" -lalrpop-util = { version = "0.20", features = ["lexer"] } +lalrpop-util = { version = "0.23", features = ["lexer"] } lazy_static = "1.4" libc = "0.2" linked-hash-map = "0.5" From 76bf88113027d49200eb66b0474f02e1a98bcb25 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 11:12:16 +0200 Subject: [PATCH 04/11] Remove some unused dependencies found by cargo-shear --- capi/Cargo.toml | 1 + cli/Cargo.toml | 4 ---- core/Cargo.toml | 3 --- graphannis/Cargo.toml | 1 - webservice/Cargo.toml | 3 +-- 5 files changed, 2 insertions(+), 10 deletions(-) diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 23eb4f4ae..8535c2dfd 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -10,6 +10,7 @@ version = "4.1.1" [lib] crate-type = ["staticlib", "cdylib"] +test = false [dependencies] graphannis = { path = "../graphannis/", version = "^4" } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index f8f4ee5bf..638fa35d7 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -28,7 +28,6 @@ tikv-jemallocator = "0.5" [dev-dependencies] assert_cmd = "2.1" insta = { version = "1.34.0", features = ["filters"] } -insta-cmd = "0.5" serial_test = "2" [[bin]] @@ -36,6 +35,3 @@ name = "annis" [[bin]] name = "annis_bench_queries" - -[package.metadata.cargo-machete] -ignored = ["prettytable-rs"] diff --git a/core/Cargo.toml b/core/Cargo.toml index 6b22d416c..b479dfd90 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -38,9 +38,6 @@ thiserror = "1" toml = "0.8" transient-btree-index = "0.5" -[target.'cfg(windows)'.dependencies] -winapi = { version = "0.3", features = ["heapapi"] } - [dev-dependencies] env_logger = "0.9" fake = "2.2" diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index ead7dddd2..cd47fd79f 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -50,7 +50,6 @@ strum_macros = "0.21" sys-info = "0.9" tempfile = "3" thiserror = "1" -time = "0.3.44" toml = "0.8" transient-btree-index = "0.5" zip = "8.5.1" diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index d30b2af83..cafe8dcb6 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -34,7 +34,6 @@ serde_derive = "1.0" simplelog = "0.12" tempfile = "3" thiserror = "1" -time = "0.3.44" uuid = { version = "0.8", features = ["v4"] } walkdir = "2" zip = "8.5.1" @@ -46,5 +45,5 @@ tikv-jemallocator = "0.5" pretty_assertions = "1.3" insta = { version = "1.34.0", features = ["filters"] } -[package.metadata.cargo-machete] +[package.metadata.cargo-shear] ignored = ["libsqlite3-sys"] From c67e214a20ae7799f077fd04fe1242b707c59217 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 11:27:05 +0200 Subject: [PATCH 05/11] Fix several new clippy lints --- core/src/annostorage/inmemory.rs | 12 ++--- core/src/graph/serialization/graphml.rs | 69 +++++++++++------------- core/src/graph/storage/linear.rs | 4 +- core/src/graph/storage/prepost.rs | 4 +- graphannis/src/annis/db/aql/mod.rs | 7 +-- graphannis/src/annis/db/aql/model.rs | 12 ++--- graphannis/src/annis/db/corpusstorage.rs | 4 +- graphannis/src/annis/db/relannis.rs | 48 +++++++---------- 8 files changed, 70 insertions(+), 90 deletions(-) diff --git a/core/src/annostorage/inmemory.rs b/core/src/annostorage/inmemory.rs index 29150278e..0192e481a 100644 --- a/core/src/annostorage/inmemory.rs +++ b/core/src/annostorage/inmemory.rs @@ -180,8 +180,8 @@ where // flatten the hash set of all items of the value map .flat_map(|(key, values)| { values - .iter() - .flat_map(|(_, items)| items.iter().cloned()) + .values() + .flat_map(|items| items.iter().cloned()) .zip(std::iter::repeat(key)) }) .map(Ok); @@ -572,8 +572,8 @@ where // flatten the hash set of all items of the value map .flat_map(|(key, values)| { values - .iter() - .flat_map(|(_, items)| items.iter().cloned()) + .values() + .flat_map(|items| items.iter().cloned()) .zip(std::iter::repeat(key)) }); @@ -889,8 +889,8 @@ where return Ok(result); } else { let result = values_for_key - .iter() - .filter_map(|(val, _items)| self.anno_values.get_value_ref(*val)) + .keys() + .filter_map(|val| self.anno_values.get_value_ref(*val)) .map(|val| Cow::Borrowed(&val[..])) .collect(); return Ok(result); diff --git a/core/src/graph/serialization/graphml.rs b/core/src/graph/serialization/graphml.rs index 92f4e3504..88b467f45 100644 --- a/core/src/graph/serialization/graphml.rs +++ b/core/src/graph/serialization/graphml.rs @@ -535,48 +535,42 @@ fn read_graphml( level += 1; match e.name().0 { - b"graph" => { - if level == 2 { - in_graph = true; - } + b"graph" if level == 2 => { + in_graph = true; } - b"key" => { - if level == 2 { - add_annotation_key(&mut keys, e.attributes())?; - } + b"key" if level == 2 => { + add_annotation_key(&mut keys, e.attributes())?; } - b"node" => { - if in_graph && level == 3 { - data.clear(); - // Get the ID of this node - for att in e.attributes() { - let att = att?; - if att.key.0 == b"id" { - current_node_id = - Some(String::from_utf8_lossy(&att.value).to_string()); - } + b"node" if in_graph && level == 3 => { + data.clear(); + // Get the ID of this node + for att in e.attributes() { + let att = att?; + if att.key.0 == b"id" { + current_node_id = + Some(String::from_utf8_lossy(&att.value).to_string()); } } } - b"edge" => { - if in_graph && level == 3 { - data.clear(); - // Get the source and target node IDs - for att in e.attributes() { - let att = att?; - if att.key.0 == b"source" { - current_source_id = - Some(String::from_utf8_lossy(&att.value).to_string()); - } else if att.key.0 == b"target" { - current_target_id = - Some(String::from_utf8_lossy(&att.value).to_string()); - } else if att.key.0 == b"label" { - current_component = - Some(String::from_utf8_lossy(&att.value).to_string()); - } + + b"edge" if in_graph && level == 3 => { + data.clear(); + // Get the source and target node IDs + for att in e.attributes() { + let att = att?; + if att.key.0 == b"source" { + current_source_id = + Some(String::from_utf8_lossy(&att.value).to_string()); + } else if att.key.0 == b"target" { + current_target_id = + Some(String::from_utf8_lossy(&att.value).to_string()); + } else if att.key.0 == b"label" { + current_component = + Some(String::from_utf8_lossy(&att.value).to_string()); } } } + b"data" => { for att in e.attributes() { let att = att?; @@ -589,11 +583,10 @@ fn read_graphml( _ => {} } } - Event::Text(t) => { - if in_graph && level == 4 && current_data_key.is_some() { - current_data_value = Some(t.unescape()?.to_string()); - } + Event::Text(t) if in_graph && level == 4 && current_data_key.is_some() => { + current_data_value = Some(t.unescape()?.to_string()); } + Event::CData(t) => { if let Some(current_data_key) = ¤t_data_key && in_graph diff --git a/core/src/graph/storage/linear.rs b/core/src/graph/storage/linear.rs index 9fcd67363..a5a6bca1e 100644 --- a/core/src/graph/storage/linear.rs +++ b/core/src/graph/storage/linear.rs @@ -134,8 +134,8 @@ where // because the last element is only a target node, not a source node let it = self .node_chains - .iter() - .flat_map(|(_root, chain)| chain.iter().rev().skip(1)) + .values() + .flat_map(|chain| chain.iter().rev().skip(1)) .cloned() .map(Ok); diff --git a/core/src/graph/storage/prepost.rs b/core/src/graph/storage/prepost.rs index 275ed982c..54c382b78 100644 --- a/core/src/graph/storage/prepost.rs +++ b/core/src/graph/storage/prepost.rs @@ -162,8 +162,8 @@ where fn source_nodes<'a>(&'a self) -> Box> + 'a> { let it = self .node_to_order - .iter() - .filter_map(move |(n, _order)| { + .keys() + .filter_map(move |n| { // check if this is actual a source node (and not only a target node) if self.get_outgoing_edges(*n).next().is_some() { Some(*n) diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index 0385d33ac..fa8ead0dd 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -596,15 +596,16 @@ fn make_unary_operator_spec(op: ast::UnaryOpSpec) -> Arc } } +/// Calculates at which character offsets each line starts. The result is a map +/// of the character offset in the text the the line number which starts at this +/// offset. fn get_line_offsets(input: &str) -> BTreeMap { let mut offsets = BTreeMap::default(); let mut o = 0; - let mut l = 1; - for line in input.split('\n') { + for (l, line) in input.split('\n').enumerate() { offsets.insert(o, l); o += line.len() + 1; - l += 1; } offsets diff --git a/graphannis/src/annis/db/aql/model.rs b/graphannis/src/annis/db/aql/model.rs index e95773ccc..4ad880787 100644 --- a/graphannis/src/annis/db/aql/model.rs +++ b/graphannis/src/annis/db/aql/model.rs @@ -470,13 +470,11 @@ impl ComponentType for AnnotationComponentType { index: &mut Self::UpdateGraphIndex, ) -> std::result::Result<(), ComponentTypeError> { match update { - UpdateEvent::DeleteNode { node_name } => { - if !index.graph_without_nodes { - let existing_node_id = - index.get_cached_node_id_from_name(Cow::Borrowed(node_name), graph)?; - if !index.invalid_nodes.contains_key(&existing_node_id)? { - index.calculate_invalidated_nodes_by_coverage(graph, existing_node_id)?; - } + UpdateEvent::DeleteNode { node_name } if !index.graph_without_nodes => { + let existing_node_id = + index.get_cached_node_id_from_name(Cow::Borrowed(node_name), graph)?; + if !index.invalid_nodes.contains_key(&existing_node_id)? { + index.calculate_invalidated_nodes_by_coverage(graph, existing_node_id)?; } } UpdateEvent::DeleteEdge { diff --git a/graphannis/src/annis/db/corpusstorage.rs b/graphannis/src/annis/db/corpusstorage.rs index 97a1b3590..840222f98 100644 --- a/graphannis/src/annis/db/corpusstorage.rs +++ b/graphannis/src/annis/db/corpusstorage.rs @@ -2042,14 +2042,14 @@ impl CorpusStorage { if let Some(remaining_limit) = remaining_limit { if single_result_length <= remaining_limit { // All results for this corpus fit inside the limit - result.extend(single_result.into_iter()); + result.extend(single_result); } else { // Only add as many items as allowed by the limit result.extend(single_result[0..remaining_limit].iter().cloned()); } } else { // Add all results since there is no limit - result.extend(single_result.into_iter()); + result.extend(single_result); } if let Some(limit) = limit diff --git a/graphannis/src/annis/db/relannis.rs b/graphannis/src/annis/db/relannis.rs index 7e946ee92..e05e93c38 100644 --- a/graphannis/src/annis/db/relannis.rs +++ b/graphannis/src/annis/db/relannis.rs @@ -667,40 +667,28 @@ where config.view.page_size = value; } } - "default-context-segmentation" => { - if !value.is_empty() { - config.context.segmentation = Some(value.to_string()); - } + "default-context-segmentation" if !value.is_empty() => { + config.context.segmentation = Some(value.to_string()); } - "default-base-text-segmentation" => { - if !value.is_empty() { - config.view.base_text_segmentation = Some(value.to_string()); - } + "default-base-text-segmentation" if !value.is_empty() => { + config.view.base_text_segmentation = Some(value.to_string()); } - "hidden_annos" => { - if !value.is_empty() { - // Entry is a comma-separated list - config.view.hidden_annos = - value.split(',').map(|a| a.trim().to_owned()).collect(); - } + "hidden_annos" if !value.is_empty() => { + // Entry is a comma-separated list + config.view.hidden_annos = + value.split(',').map(|a| a.trim().to_owned()).collect(); } - "virtual_tokenization_from_namespace" => { - if value.to_lowercase() == "true" { - config.view.timeline_strategy = TimelineStrategy::ImplicitFromNamespace - } + "virtual_tokenization_from_namespace" if value.to_lowercase() == "true" => { + config.view.timeline_strategy = TimelineStrategy::ImplicitFromNamespace } - "virtual_tokenization_mapping" => { - if !value.is_empty() { - let mappings: BTreeMap<_, _> = value - .split(',') - .filter_map(|e| e.split_once('=')) - .map(|(anno, segmentation)| { - (anno.to_string(), segmentation.to_string()) - }) - .collect(); - config.view.timeline_strategy = - TimelineStrategy::ImplicitFromMapping { mappings }; - } + "virtual_tokenization_mapping" if !value.is_empty() => { + let mappings: BTreeMap<_, _> = value + .split(',') + .filter_map(|e| e.split_once('=')) + .map(|(anno, segmentation)| (anno.to_string(), segmentation.to_string())) + .collect(); + config.view.timeline_strategy = + TimelineStrategy::ImplicitFromMapping { mappings }; } _ => {} }; From 9024b139a13ee65cd5f6f090a0790242950e757f Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 11:51:14 +0200 Subject: [PATCH 06/11] Update rand crate --- core/Cargo.toml | 2 +- core/src/annostorage/inmemory.rs | 2 +- core/src/annostorage/ondisk.rs | 4 ++-- graphannis/Cargo.toml | 2 +- graphannis/src/annis/util/quicksort.rs | 3 ++- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index b479dfd90..b3dbda383 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -23,7 +23,7 @@ normpath = "1.1.1" num-traits = "0.2" percent-encoding = "2.1" quick-xml = "0.28" -rand = { version = "0.9", features = ["small_rng"] } +rand = "0.10" rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" diff --git a/core/src/annostorage/inmemory.rs b/core/src/annostorage/inmemory.rs index 0192e481a..b97abed8f 100644 --- a/core/src/annostorage/inmemory.rs +++ b/core/src/annostorage/inmemory.rs @@ -796,7 +796,7 @@ where if let Some(histo) = self.histogram_bounds.get(&anno_key_symbol) && !histo.is_empty() { - let sampled_values = histo.iter().choose_multiple(&mut rng, 20); + let sampled_values = histo.iter().sample(&mut rng, 20); let matches = sampled_values .iter() .filter(|v| pattern.is_match(v)) diff --git a/core/src/annostorage/ondisk.rs b/core/src/annostorage/ondisk.rs index e08c0c44d..d6b13d5b4 100644 --- a/core/src/annostorage/ondisk.rs +++ b/core/src/annostorage/ondisk.rs @@ -942,7 +942,7 @@ where if let Some(histo) = self.histogram_bounds.get(&anno_key) && !histo.is_empty() { - let sampled_values = histo.iter().choose_multiple(&mut rng, 20); + let sampled_values = histo.iter().sample(&mut rng, 20); let matches = sampled_values .iter() @@ -1071,7 +1071,7 @@ where let all_values_for_key = self.get_by_anno_qname_range(anno_key); let sampled_anno_values: Result> = all_values_for_key - .choose_multiple(&mut rng, max_sampled_annotations) + .sample(&mut rng, max_sampled_annotations) .into_iter() .map(|data| { let (data, _) = data?; diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index cd47fd79f..6f743de1e 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -37,7 +37,7 @@ lru = "0.7" memory-stats = "1.1.0" page_size = "0.4" percent-encoding = "2.1" -rand = { version = "0.9", features = ["small_rng"] } +rand = "0.10" rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" diff --git a/graphannis/src/annis/util/quicksort.rs b/graphannis/src/annis/util/quicksort.rs index 5e36e30da..2c3d7fcbd 100644 --- a/graphannis/src/annis/util/quicksort.rs +++ b/graphannis/src/annis/util/quicksort.rs @@ -1,7 +1,8 @@ use std::ops::Range; +use rand::RngExt; + use crate::errors::Result; -use rand::Rng; use super::sortablecontainer::SortableContainer; From da20d58cfe00482e35896273ee59743f38ddd0a0 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 11:53:34 +0200 Subject: [PATCH 07/11] Update diesel and libsqlite3-sys versions --- webservice/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index cafe8dcb6..b55338d7c 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -16,7 +16,7 @@ anyhow = "1" bcrypt = "0.10" clap = { version = "2", default-features = false } config = { version = "0.13", default-features = false, features = ["toml"] } -diesel = { version = "2.0.4", default-features = false, features = [ +diesel = { version = "2.3.7", default-features = false, features = [ "sqlite", "r2d2", ] } @@ -25,7 +25,7 @@ futures = "0.3" graphannis = { path = "../graphannis/", version = "^4" } graphannis-core = { path = "../core/", version = "^4" } jsonwebtoken = "7.2" -libsqlite3-sys = { version = "0.26.0", features = ["bundled"] } +libsqlite3-sys = { version = "0.36.0", features = ["bundled"] } log = "0.4" percent-encoding = "2.1" r2d2 = "0.8" From 08c9cebeedd8f83bb8961b26b4f1f09a4868cda9 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 12:12:39 +0200 Subject: [PATCH 08/11] Update several dependencies in graphannis-core --- core/Cargo.toml | 18 +++++++++--------- core/src/errors.rs | 2 ++ core/src/graph/serialization/graphml.rs | 4 ++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index b3dbda383..d3051a8da 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -12,35 +12,35 @@ version = "4.1.1" [dependencies] binary-layout = "4.0.1" -bincode = "1.2" +bincode = "1.3" clru = "0.6.1" -facet = "0.28.0" -itertools = "0.10" +facet = "0.46.0" +itertools = "0.14" lazy_static = "1.4" log = "0.4" memmap2 = "0.9" normpath = "1.1.1" num-traits = "0.2" percent-encoding = "2.1" -quick-xml = "0.28" +quick-xml = "0.39" rand = "0.10" rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" -rustc-hash = "1.0" +rustc-hash = "2.1.2" serde = { version = "1.0", features = ["rc"] } serde_bytes = "0.11" serde_derive = "1.0" smallvec = "1.6" sstable = "0.11" tempfile = "3.1" -thiserror = "1" -toml = "0.8" +thiserror = "2" +toml = "1.1.2" transient-btree-index = "0.5" [dev-dependencies] -env_logger = "0.9" -fake = "2.2" +env_logger = "0.11" +fake = "5.1" insta = { version = "1.38.0", features = ["json"] } pretty_assertions = "1.3" serde_json = "1.0" diff --git a/core/src/errors.rs b/core/src/errors.rs index faa2ea412..e39b3c72e 100644 --- a/core/src/errors.rs +++ b/core/src/errors.rs @@ -46,6 +46,8 @@ pub enum GraphAnnisCoreError { #[error(transparent)] Xml(#[from] quick_xml::Error), #[error(transparent)] + XmlEncoding(#[from] quick_xml::encoding::EncodingError), + #[error(transparent)] XmlAttr(#[from] quick_xml::events::attributes::AttrError), #[error("Cache error: {0}")] LfuCache(String), diff --git a/core/src/graph/serialization/graphml.rs b/core/src/graph/serialization/graphml.rs index 88b467f45..f8bc1f3c8 100644 --- a/core/src/graph/serialization/graphml.rs +++ b/core/src/graph/serialization/graphml.rs @@ -510,7 +510,7 @@ fn read_graphml( progress_callback: &F, ) -> Result> { let mut reader = Reader::from_reader(input); - reader.expand_empty_elements(true); + reader.config_mut().expand_empty_elements = true; let mut keys = BTreeMap::new(); @@ -584,7 +584,7 @@ fn read_graphml( } } Event::Text(t) if in_graph && level == 4 && current_data_key.is_some() => { - current_data_value = Some(t.unescape()?.to_string()); + current_data_value = Some(t.decode()?.to_string()); } Event::CData(t) => { From 0daecda47b731aa1801014b1adaf8f1638770bd0 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 12:33:05 +0200 Subject: [PATCH 09/11] Update dependencies in graphannis crate --- cli/Cargo.toml | 4 ++-- cli/src/bin/annis_bench_queries.rs | 2 +- graphannis/Cargo.toml | 25 +++++++++++++------------ graphannis/src/annis/db/sort_matches.rs | 8 +++++--- webservice/Cargo.toml | 2 +- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 638fa35d7..ad67a2ca0 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -13,14 +13,14 @@ version = "4.1.1" anyhow = "1" clap = { version = "2", default-features = false } compound_duration = "1" -criterion = "0.3" +criterion = "0.8.2" graphannis = { path = "../graphannis/", version = "^4" } log = "0.4" prettytable-rs = "0.10.0" rustyline = "9" rustyline-derive = "0.5" simplelog = "0.12" -toml = "0.5" +toml = "1.1.2" [target.'cfg(not(target_env = "msvc"))'.dependencies] tikv-jemallocator = "0.5" diff --git a/cli/src/bin/annis_bench_queries.rs b/cli/src/bin/annis_bench_queries.rs index d717402c2..7132db4f2 100644 --- a/cli/src/bin/annis_bench_queries.rs +++ b/cli/src/bin/annis_bench_queries.rs @@ -115,7 +115,7 @@ fn main() { if let Some(baseline) = matches.value_of("save-baseline") { crit = crit.save_baseline(baseline.to_string()); } else if let Some(baseline) = matches.value_of("baseline") { - crit = crit.retain_baseline(baseline.to_string()); + crit = crit.retain_baseline(baseline.to_string(), false); } if matches.is_present("measurement-time") { diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index 6f743de1e..92f75f18e 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -24,44 +24,45 @@ regex = "1" [dependencies] boolean_expression = "0.4" csv = "1" -facet = "0.28.0" +facet = "0.46.0" fs2 = "0.4" graphannis-core = { path = "../core/", version = "^4" } -itertools = "0.10" +itertools = "0.14.0" lalrpop-util = { version = "0.23", features = ["lexer"] } lazy_static = "1.4" libc = "0.2" linked-hash-map = "0.5" log = "0.4" -lru = "0.7" +lru = "0.17" memory-stats = "1.1.0" -page_size = "0.4" +nonzero_lit = "0.1" +page_size = "0.6" percent-encoding = "2.1" rand = "0.10" rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" -rustc-hash = "1.0" +rustc-hash = "2.1.2" serde = { version = "1.0", features = ["rc"] } serde_derive = "1.0" smallvec = "1.6" -strum = "0.21" -strum_macros = "0.21" +strum = "0.28.0" +strum_macros = "0.28.0" sys-info = "0.9" tempfile = "3" -thiserror = "1" -toml = "0.8" +thiserror = "2.0.18" +toml = "1.1.2" transient-btree-index = "0.5" zip = "8.5.1" [dev-dependencies] assert_matches = "1.5.0" -criterion = "0.5" -fake = "2.2" +criterion = "0.8" +fake = "5.1.0" insta = "1.34.0" pretty_assertions = "1.3" same-file = "1.0.6" -serial_test = "2" +serial_test = "3.4.0" [[bench]] harness = false diff --git a/graphannis/src/annis/db/sort_matches.rs b/graphannis/src/annis/db/sort_matches.rs index fe196d1f9..83fffb060 100644 --- a/graphannis/src/annis/db/sort_matches.rs +++ b/graphannis/src/annis/db/sort_matches.rs @@ -24,12 +24,14 @@ pub(crate) struct SortCache { gs_order: Option>, } +const CACHE_SIZE: core::num::NonZeroUsize = nonzero_lit::usize!(1000); + impl SortCache { pub fn new(gs_order: Option>) -> Self { Self { - node_name: LruCache::new(1000), - left_token: LruCache::new(1000), - is_connected: LruCache::new(1000), + node_name: LruCache::new(CACHE_SIZE), + left_token: LruCache::new(CACHE_SIZE), + is_connected: LruCache::new(CACHE_SIZE), gs_order, } } diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index b55338d7c..369be01b3 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -33,7 +33,7 @@ serde = { version = "1.0", features = ["rc"] } serde_derive = "1.0" simplelog = "0.12" tempfile = "3" -thiserror = "1" +thiserror = "2.0.18" uuid = { version = "0.8", features = ["v4"] } walkdir = "2" zip = "8.5.1" From c320dd946b3950a532bd254053b2398373bb831d Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 12:35:19 +0200 Subject: [PATCH 10/11] Update itertools als tikv-jemallocator --- capi/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- webservice/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 8535c2dfd..b5fda9405 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -14,7 +14,7 @@ test = false [dependencies] graphannis = { path = "../graphannis/", version = "^4" } -itertools = "0.10" +itertools = "0.14" libc = "0.2" log = "0.4" simplelog = { version = "0.12" } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ad67a2ca0..7f3590702 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -23,7 +23,7 @@ simplelog = "0.12" toml = "1.1.2" [target.'cfg(not(target_env = "msvc"))'.dependencies] -tikv-jemallocator = "0.5" +tikv-jemallocator = "0.6.1" [dev-dependencies] assert_cmd = "2.1" diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index 369be01b3..722d58bf2 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -39,7 +39,7 @@ walkdir = "2" zip = "8.5.1" [target.'cfg(not(target_env = "msvc"))'.dependencies] -tikv-jemallocator = "0.5" +tikv-jemallocator = "0.6.1" [dev-dependencies] pretty_assertions = "1.3" From e19023f4633ad24d9e711365d45199834f0426cc Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Fri, 17 Apr 2026 12:42:05 +0200 Subject: [PATCH 11/11] Update jsonwebtoken and actix-cors --- CHANGELOG.md | 4 ++++ webservice/Cargo.toml | 4 ++-- webservice/src/settings.rs | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01d1c534b..d2d306978 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Include token in subgraph even if not covered by segmentation node #334. +### Changed + +- Updated several internal dependencies. + ## [4.1.1] - 2026-01-13 ### Added diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index 722d58bf2..c464aa2ab 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/korpling/graphANNIS" version = "4.1.1" [dependencies] -actix-cors = "0.6" +actix-cors = "0.7.1" actix-files = "0.6" actix-web = "4" anyhow = "1" @@ -24,7 +24,7 @@ diesel_migrations = { version = " 2", default-features = false } futures = "0.3" graphannis = { path = "../graphannis/", version = "^4" } graphannis-core = { path = "../core/", version = "^4" } -jsonwebtoken = "7.2" +jsonwebtoken = {version = "10.3.0", features = ["aws_lc_rs"]} libsqlite3-sys = { version = "0.36.0", features = ["bundled"] } log = "0.4" percent-encoding = "2.1" diff --git a/webservice/src/settings.rs b/webservice/src/settings.rs index 0cfa78fa9..6be2d96ab 100644 --- a/webservice/src/settings.rs +++ b/webservice/src/settings.rs @@ -37,7 +37,7 @@ pub enum JWTVerification { } impl JWTVerification { - pub fn create_decoding_key(&self) -> Result> { + pub fn create_decoding_key(&self) -> Result { let key = match &self { JWTVerification::HS256 { secret } => { jsonwebtoken::DecodingKey::from_secret(secret.as_bytes())