Skip to content
Merged
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Include token in subgraph even if not covered by segmentation node #334.

### Changed

- Updated several internal dependencies.

## [4.1.1] - 2026-01-13

### Added
Expand Down
3 changes: 2 additions & 1 deletion capi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ version = "4.1.1"

[lib]
crate-type = ["staticlib", "cdylib"]
test = false

[dependencies]
graphannis = { path = "../graphannis/", version = "^4" }
itertools = "0.10"
itertools = "0.14"
libc = "0.2"
log = "0.4"
simplelog = { version = "0.12" }
10 changes: 3 additions & 7 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,25 @@ version = "4.1.1"
anyhow = "1"
clap = { version = "2", default-features = false }
compound_duration = "1"
criterion = "0.3"
criterion = "0.8.2"
graphannis = { path = "../graphannis/", version = "^4" }
log = "0.4"
prettytable-rs = "0.10.0"
rustyline = "9"
rustyline-derive = "0.5"
simplelog = "0.12"
toml = "0.5"
toml = "1.1.2"

[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.5"
tikv-jemallocator = "0.6.1"

[dev-dependencies]
assert_cmd = "2.1"
insta = { version = "1.34.0", features = ["filters"] }
insta-cmd = "0.5"
serial_test = "2"

[[bin]]
name = "annis"

[[bin]]
name = "annis_bench_queries"

[package.metadata.cargo-machete]
ignored = ["prettytable-rs"]
2 changes: 1 addition & 1 deletion cli/src/bin/annis_bench_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ fn main() {
if let Some(baseline) = matches.value_of("save-baseline") {
crit = crit.save_baseline(baseline.to_string());
} else if let Some(baseline) = matches.value_of("baseline") {
crit = crit.retain_baseline(baseline.to_string());
crit = crit.retain_baseline(baseline.to_string(), false);
}

if matches.is_present("measurement-time") {
Expand Down
10 changes: 2 additions & 8 deletions cli/tests/snapshots/cli__export_to_zip_file.snap
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
---
source: cli/tests/cli.rs
info:
program: annis
args:
- "../graphannis/tests/data/"
- "-c"
- corpus sample-disk-based-3.3
- "-c"
- export sample-disk-based-3.3.zip
expression: actual
---

success: true
exit_code: 0
----- stdout -----
Expand Down
12 changes: 2 additions & 10 deletions cli/tests/snapshots/cli__list_corpora_fully_loaded.snap
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
---
source: cli/tests/cli.rs
info:
program: annis
args:
- "../graphannis/tests/data/"
- "-c"
- corpus sample-disk-based-3.3
- "-c"
- preload
- "-c"
- list
expression: actual
---

success: true
exit_code: 0
----- stdout -----
Expand Down
8 changes: 2 additions & 6 deletions cli/tests/snapshots/cli__list_corpora_not_loaded.snap
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
---
source: cli/tests/cli.rs
info:
program: annis
args:
- "../graphannis/tests/data/"
- "-c"
- list
expression: actual
---

success: true
exit_code: 0
----- stdout -----
Expand Down
12 changes: 2 additions & 10 deletions cli/tests/snapshots/cli__list_corpora_partially_loaded.snap
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
---
source: cli/tests/cli.rs
info:
program: annis
args:
- "../graphannis/tests/data/"
- "-c"
- corpus sample-disk-based-3.3
- "-c"
- count tok
- "-c"
- list
expression: actual
---

success: true
exit_code: 0
----- stdout -----
Expand Down
12 changes: 2 additions & 10 deletions cli/tests/snapshots/cli__show_corpus_info.snap
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
---
source: cli/tests/cli.rs
info:
program: annis
args:
- "../graphannis/tests/data/"
- "-c"
- corpus sample-disk-based-3.8
- "-c"
- preload
- "-c"
- info
expression: actual
---

success: true
exit_code: 0
----- stdout -----
Expand Down
23 changes: 10 additions & 13 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,35 @@ version = "4.1.1"

[dependencies]
binary-layout = "4.0.1"
bincode = "1.2"
bincode = "1.3"
clru = "0.6.1"
facet = "0.28.0"
itertools = "0.10"
facet = "0.46.0"
itertools = "0.14"
lazy_static = "1.4"
log = "0.4"
memmap2 = "0.9"
normpath = "1.1.1"
num-traits = "0.2"
percent-encoding = "2.1"
quick-xml = "0.28"
rand = { version = "0.9", features = ["small_rng"] }
quick-xml = "0.39"
rand = "0.10"
rayon = { version = "1.3", default-features = false }
regex = "1"
regex-syntax = "0.8"
rustc-hash = "1.0"
rustc-hash = "2.1.2"
serde = { version = "1.0", features = ["rc"] }
serde_bytes = "0.11"
serde_derive = "1.0"
smallvec = "1.6"
sstable = "0.11"
tempfile = "3.1"
thiserror = "1"
toml = "0.8"
thiserror = "2"
toml = "1.1.2"
transient-btree-index = "0.5"

[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["heapapi"] }

[dev-dependencies]
env_logger = "0.9"
fake = "2.2"
env_logger = "0.11"
fake = "5.1"
insta = { version = "1.38.0", features = ["json"] }
pretty_assertions = "1.3"
serde_json = "1.0"
14 changes: 7 additions & 7 deletions core/src/annostorage/inmemory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ where
// flatten the hash set of all items of the value map
.flat_map(|(key, values)| {
values
.iter()
.flat_map(|(_, items)| items.iter().cloned())
.values()
.flat_map(|items| items.iter().cloned())
.zip(std::iter::repeat(key))
})
.map(Ok);
Expand Down Expand Up @@ -572,8 +572,8 @@ where
// flatten the hash set of all items of the value map
.flat_map(|(key, values)| {
values
.iter()
.flat_map(|(_, items)| items.iter().cloned())
.values()
.flat_map(|items| items.iter().cloned())
.zip(std::iter::repeat(key))
});

Expand Down Expand Up @@ -796,7 +796,7 @@ where
if let Some(histo) = self.histogram_bounds.get(&anno_key_symbol)
&& !histo.is_empty()
{
let sampled_values = histo.iter().choose_multiple(&mut rng, 20);
let sampled_values = histo.iter().sample(&mut rng, 20);
let matches = sampled_values
.iter()
.filter(|v| pattern.is_match(v))
Expand Down Expand Up @@ -889,8 +889,8 @@ where
return Ok(result);
} else {
let result = values_for_key
.iter()
.filter_map(|(val, _items)| self.anno_values.get_value_ref(*val))
.keys()
.filter_map(|val| self.anno_values.get_value_ref(*val))
.map(|val| Cow::Borrowed(&val[..]))
.collect();
return Ok(result);
Expand Down
4 changes: 2 additions & 2 deletions core/src/annostorage/ondisk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ where
if let Some(histo) = self.histogram_bounds.get(&anno_key)
&& !histo.is_empty()
{
let sampled_values = histo.iter().choose_multiple(&mut rng, 20);
let sampled_values = histo.iter().sample(&mut rng, 20);

let matches = sampled_values
.iter()
Expand Down Expand Up @@ -1071,7 +1071,7 @@ where
let all_values_for_key = self.get_by_anno_qname_range(anno_key);

let sampled_anno_values: Result<Vec<String>> = all_values_for_key
.choose_multiple(&mut rng, max_sampled_annotations)
.sample(&mut rng, max_sampled_annotations)
.into_iter()
.map(|data| {
let (data, _) = data?;
Expand Down
2 changes: 2 additions & 0 deletions core/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub enum GraphAnnisCoreError {
#[error(transparent)]
Xml(#[from] quick_xml::Error),
#[error(transparent)]
XmlEncoding(#[from] quick_xml::encoding::EncodingError),
#[error(transparent)]
XmlAttr(#[from] quick_xml::events::attributes::AttrError),
#[error("Cache error: {0}")]
LfuCache(String),
Expand Down
71 changes: 32 additions & 39 deletions core/src/graph/serialization/graphml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ fn read_graphml<CT: ComponentType, R: std::io::BufRead, F: Fn(&str)>(
progress_callback: &F,
) -> Result<Option<String>> {
let mut reader = Reader::from_reader(input);
reader.expand_empty_elements(true);
reader.config_mut().expand_empty_elements = true;

let mut keys = BTreeMap::new();

Expand All @@ -535,48 +535,42 @@ fn read_graphml<CT: ComponentType, R: std::io::BufRead, F: Fn(&str)>(
level += 1;

match e.name().0 {
b"graph" => {
if level == 2 {
in_graph = true;
}
b"graph" if level == 2 => {
in_graph = true;
}
b"key" => {
if level == 2 {
add_annotation_key(&mut keys, e.attributes())?;
}
b"key" if level == 2 => {
add_annotation_key(&mut keys, e.attributes())?;
}
b"node" => {
if in_graph && level == 3 {
data.clear();
// Get the ID of this node
for att in e.attributes() {
let att = att?;
if att.key.0 == b"id" {
current_node_id =
Some(String::from_utf8_lossy(&att.value).to_string());
}
b"node" if in_graph && level == 3 => {
data.clear();
// Get the ID of this node
for att in e.attributes() {
let att = att?;
if att.key.0 == b"id" {
current_node_id =
Some(String::from_utf8_lossy(&att.value).to_string());
}
}
}
b"edge" => {
if in_graph && level == 3 {
data.clear();
// Get the source and target node IDs
for att in e.attributes() {
let att = att?;
if att.key.0 == b"source" {
current_source_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"target" {
current_target_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"label" {
current_component =
Some(String::from_utf8_lossy(&att.value).to_string());
}

b"edge" if in_graph && level == 3 => {
data.clear();
// Get the source and target node IDs
for att in e.attributes() {
let att = att?;
if att.key.0 == b"source" {
current_source_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"target" {
current_target_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"label" {
current_component =
Some(String::from_utf8_lossy(&att.value).to_string());
}
}
}

b"data" => {
for att in e.attributes() {
let att = att?;
Expand All @@ -589,11 +583,10 @@ fn read_graphml<CT: ComponentType, R: std::io::BufRead, F: Fn(&str)>(
_ => {}
}
}
Event::Text(t) => {
if in_graph && level == 4 && current_data_key.is_some() {
current_data_value = Some(t.unescape()?.to_string());
}
Event::Text(t) if in_graph && level == 4 && current_data_key.is_some() => {
current_data_value = Some(t.decode()?.to_string());
}

Event::CData(t) => {
if let Some(current_data_key) = &current_data_key
&& in_graph
Expand Down
4 changes: 2 additions & 2 deletions core/src/graph/storage/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ where
// because the last element is only a target node, not a source node
let it = self
.node_chains
.iter()
.flat_map(|(_root, chain)| chain.iter().rev().skip(1))
.values()
.flat_map(|chain| chain.iter().rev().skip(1))
.cloned()
.map(Ok);

Expand Down
4 changes: 2 additions & 2 deletions core/src/graph/storage/prepost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ where
fn source_nodes<'a>(&'a self) -> Box<dyn Iterator<Item = Result<NodeID>> + 'a> {
let it = self
.node_to_order
.iter()
.filter_map(move |(n, _order)| {
.keys()
.filter_map(move |n| {
// check if this is actual a source node (and not only a target node)
if self.get_outgoing_edges(*n).next().is_some() {
Some(*n)
Expand Down
Loading
Loading