Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- import `graphml`: When generic config attributes are used, such as `extensions`, which the importer has to ignore, a warning is displayed

### Changed

- internal library for documentation generation had changed, as it also changed in graphannis

## [0.51.0] - 2026-04-10

### Added
Expand Down
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ console = "0.15"
csv = "1.3"
encoding_rs = "0.8"
encoding_rs_io = "0.1.7"
facet = "0.28.0"
facet-reflect = "0.28.0"
facet = "0.46.0"
facet-reflect = "0.46.0"
git2 = { version = "0.20.2", default-features = false }
glob = "0.3"
graphannis = "4.1.1"
graphannis-core = "4.1.1"
graphannis = "4.1.4"
graphannis-core = "4.1.4"
graphviz-rust = "0.9.7"
indicatif = "0.17"
itertools = "0.12"
Expand Down
16 changes: 8 additions & 8 deletions src/bin/annatto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ fn list_modules() {
peek_enum_variants(ReadFrom::SHAPE)
.unwrap_or_default()
.iter()
.map(|v| v.name.to_lowercase())
.map(|v| v.effective_name().to_lowercase())
.join(", "),
];
table_builder.push_record(import_row);
Expand All @@ -254,7 +254,7 @@ fn list_modules() {
peek_enum_variants(WriteAs::SHAPE)
.unwrap_or_default()
.iter()
.map(|v| v.name.to_lowercase())
.map(|v| v.effective_name().to_lowercase())
.join(", "),
];
table_builder.push_record(export_row);
Expand All @@ -264,7 +264,7 @@ fn list_modules() {
peek_enum_variants(GraphOp::SHAPE)
.unwrap_or_default()
.iter()
.map(|v| v.name.to_lowercase())
.map(|v| v.effective_name().to_lowercase())
.join(", "),
];
table_builder.push_record(graph_op_row);
Expand All @@ -288,18 +288,18 @@ fn module_info(name: &str) {
let matching_importers: Vec<_> = peek_enum_variants(ReadFrom::SHAPE)
.unwrap_or_default()
.iter()
.filter(|m| m.name.to_lowercase() == name.to_lowercase())
.filter(|m| m.effective_name().to_lowercase() == name.to_lowercase())
.collect();
let matching_exporters: Vec<_> = peek_enum_variants(WriteAs::SHAPE)
.unwrap_or_default()
.iter()
.filter(|m| m.name.to_lowercase() == name.to_lowercase())
.filter(|m| m.effective_name().to_lowercase() == name.to_lowercase())
.collect();

let matching_graph_ops: Vec<_> = peek_enum_variants(GraphOp::SHAPE)
.unwrap_or_default()
.iter()
.filter(|m| m.name.to_lowercase() == name.to_lowercase())
.filter(|m| m.effective_name().to_lowercase() == name.to_lowercase())
.collect();

if matching_importers.is_empty()
Expand Down Expand Up @@ -333,7 +333,7 @@ fn module_info(name: &str) {
print_markdown("# Graph operations\n\n");
for m in matching_graph_ops {
// The name of the module is taken from the wrapper enum
let module_name = m.name.to_lowercase();
let module_name = m.effective_name().to_lowercase();
// Get the inner type wrapped by the graph operations enum and use
// its documentation and fields
if let Some(inner_field) = m.data.fields.first().map(|m| m.shape())
Expand All @@ -349,7 +349,7 @@ fn module_info(name: &str) {
.fields
.iter()
.map(|f| ModuleConfiguration {
name: f.name.to_lowercase(),
name: f.effective_name().to_lowercase(),
description: documentation::clean_string(f.doc),
})
.collect();
Expand Down
6 changes: 3 additions & 3 deletions src/bin/documentation_generation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ fn write_module_list_table(
importers
.iter()
.map(|m| {
let module_name = m.name.to_lowercase();
let module_name = m.effective_name().to_lowercase();
let file_stem = module_name.replace("unstable:", "");
format!("[{module_name}](importers/{file_stem}.md)")
})
Expand All @@ -49,7 +49,7 @@ fn write_module_list_table(
exporters
.iter()
.map(|m| {
let module_name = m.name.to_lowercase();
let module_name = m.effective_name().to_lowercase();
let file_stem = module_name.replace("unstable:", "");
format!("[{module_name}](exporters/{file_stem}.md)")
})
Expand All @@ -62,7 +62,7 @@ fn write_module_list_table(
graph_ops
.iter()
.map(|m| {
let module_name = m.name.to_lowercase();
let module_name = m.effective_name().to_lowercase();
let file_stem = module_name.replace("unstable:", "");
format!("[{module_name}](graph_ops/{file_stem}.md)")
})
Expand Down
2 changes: 1 addition & 1 deletion src/exporter/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ impl ExportTable {
let mut prefixes = sources.iter().map(|_| "in").collect_vec();
prefixes.extend(targets.iter().map(|_| "out"));
for ((connected_node_name, component, mut edge_annotations), prefix) in
sources.into_iter().chain(targets.into_iter()).zip(prefixes)
sources.into_iter().chain(targets).zip(prefixes)
{
let qualified_name = [
prefix,
Expand Down
96 changes: 61 additions & 35 deletions src/importer/graphml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,43 +163,35 @@ fn read_graphml<R: std::io::BufRead>(
level += 1;

match e.name().0 {
b"graph" => {
if level == 2 {
in_graph = true;
}
b"graph" if level == 2 => {
in_graph = true;
}
b"key" => {
if level == 2 {
add_annotation_key(&mut keys, e.attributes())?;
}
b"key" if level == 2 => {
add_annotation_key(&mut keys, e.attributes())?;
}
b"node" => {
if in_graph && level == 3 {
// Get the ID of this node
for att in e.attributes() {
let att = att?;
if att.key.0 == b"id" {
current_node_id =
Some(String::from_utf8_lossy(&att.value).to_string());
}
b"node" if in_graph && level == 3 => {
// Get the ID of this node
for att in e.attributes() {
let att = att?;
if att.key.0 == b"id" {
current_node_id =
Some(String::from_utf8_lossy(&att.value).to_string());
}
}
}
b"edge" => {
if in_graph && level == 3 {
// Get the source and target node IDs
for att in e.attributes() {
let att = att?;
if att.key.0 == b"source" {
current_source_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"target" {
current_target_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"label" {
current_component =
Some(String::from_utf8_lossy(&att.value).to_string());
}
b"edge" if in_graph && level == 3 => {
// Get the source and target node IDs
for att in e.attributes() {
let att = att?;
if att.key.0 == b"source" {
current_source_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"target" {
current_target_id =
Some(String::from_utf8_lossy(&att.value).to_string());
} else if att.key.0 == b"label" {
current_component =
Some(String::from_utf8_lossy(&att.value).to_string());
}
}
}
Expand Down Expand Up @@ -281,11 +273,16 @@ impl Importer for GraphMLImporter {
&self,
path: &Path,
step_id: StepID,
_config: GenericImportConfiguration,
config: GenericImportConfiguration,
tx: Option<StatusSender>,
) -> Result<GraphUpdate, Box<dyn std::error::Error>> {
let reporter = ProgressReporter::new(tx, step_id, 2)?;

if config != self.default_configuration() {
reporter
.warn("Generic configuration keys are currently ignored for GraphML imports.")?;
}

// TODO: support multiple GraphML and connected binary files
// TODO: refactor the graphannis_core create to expose the needed functionality directly

Expand Down Expand Up @@ -314,11 +311,15 @@ impl Importer for GraphMLImporter {

#[cfg(test)]
mod tests {
use std::path::Path;
use std::{path::Path, sync::mpsc};

use insta::assert_snapshot;
use itertools::Itertools;

use crate::{importer::graphml::GraphMLImporter, test_util::import_as_graphml_string};
use crate::{
importer::{GenericImportConfiguration, Importer, graphml::GraphMLImporter},
test_util::import_as_graphml_string,
};

#[test]
fn single_sentence() {
Expand All @@ -331,4 +332,29 @@ mod tests {

assert_snapshot!(actual);
}

#[test]
fn generic_config_warning() {
let input_path = Path::new("tests/data/import/graphml/single_sentence.graphml");
let import = GraphMLImporter::default();
let (tx, rx) = mpsc::channel();
let import = import.import_corpus(
input_path,
crate::StepID {
module_name: "test_import".to_string(),
path: None,
},
GenericImportConfiguration::new_with_root_name("custom_root".to_string()),
Some(tx),
);
assert!(import.is_ok());
assert_snapshot!(
rx.into_iter()
.map(|m| match m {
crate::workflow::StatusMessage::Warning(w) => w,
_ => "".to_string(),
})
.join("\n")
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: src/importer/graphml.rs
expression: "rx.into_iter().map(|m| match m\n{\n crate::workflow::StatusMessage::Warning(w) => w, _ => \"\".to_string(),\n}).join(\"\\n\")"
---
Generic configuration keys are currently ignored for GraphML imports.
2 changes: 1 addition & 1 deletion src/importer/text/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ impl TreeTaggerTokenizer {
}
}
result.push(Token::new_val(current_token));
result.extend(suffix.into_iter());
result.extend(suffix);
}
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/importer/textgrid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,7 @@ impl DocumentMapper<'_> {
}
let mut token_ids = Vec::new();
let mut result = BTreeMap::new();
let mut counter = 1;
for (time_range, token_text) in token_sorted_by_time {
for (counter, (time_range, token_text)) in (1..).zip(token_sorted_by_time) {
let id = map_token(
u,
&NodeInfo::new(&counter.to_string(), &self.doc_path, &self.text_node_name),
Expand All @@ -261,7 +260,6 @@ impl DocumentMapper<'_> {
token_ids.push(id.clone());
result.insert(time_range.0, id.clone());
result.insert(time_range.1, id);
counter += 1;
}
add_order_relations(u, &token_ids, None)?;

Expand Down
6 changes: 2 additions & 4 deletions src/importer/toolbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,8 @@ impl ImportToolBox {
};
}
}
(Rule::spaces, _) => {
if build_joint {
join_list.push(entry_or_space.as_str());
}
(Rule::spaces, _) if build_joint => {
join_list.push(entry_or_space.as_str());
}
(Rule::null, false) => {
timeline_id += 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ digraph G {
2[shape=box label="single_sentence/zossen#text\n \nannis:node_type=datasource"]
1[shape=box label="single_sentence/zossen\n \nannis:doc=zossen\nannis:node_type=corpus"]
0[shape=box label="single_sentence\n \nannis:node_type=corpus"]
3[shape=box label="single_sentence/zossen#n1\n \nsyntax:cat=ROOT\nannis:layer=syntax\nannis:node_type=node"]
4[shape=box label="single_sentence/zossen#n2\n \nsyntax:cat=S\nannis:layer=syntax\nannis:node_type=node"]
3[shape=box label="single_sentence/zossen#n1\n \nsyntax:cat=ROOT\nannis:layer=syntax\nannis:node_type=node"]
5[shape=box label="single_sentence/zossen#n3\n \nsyntax:cat=NP\nannis:layer=syntax\nannis:node_type=node"]
8[shape=box label="single_sentence/zossen#n4\n \nsyntax:cat=PP\nannis:layer=syntax\nannis:node_type=node"]
12[shape=box label="single_sentence/zossen#n5\n \nsyntax:cat=NP\nannis:layer=syntax\nannis:node_type=node"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ digraph G {
2[shape=box label="single_sentence/zossen#text\n \nannis:node_type=datasource"]
1[shape=box label="single_sentence/zossen\n \nannis:doc=zossen\nannis:node_type=corpus"]
0[shape=box label="single_sentence\n \nannis:node_type=corpus"]
3[shape=box label="single_sentence/zossen#n1\n \nsyntax:cat=ROOT\nannis:layer=syntax\nannis:node_type=node"]
4[shape=box label="single_sentence/zossen#n2\n \nsyntax:cat=S\nannis:layer=syntax\nannis:node_type=node"]
3[shape=box label="single_sentence/zossen#n1\n \nsyntax:cat=ROOT\nannis:layer=syntax\nannis:node_type=node"]
5[shape=box label="single_sentence/zossen#n3\n \nsyntax:cat=NP\nannis:layer=syntax\nannis:node_type=node"]
8[shape=box label="single_sentence/zossen#n4\n \nsyntax:cat=PP\nannis:layer=syntax\nannis:node_type=node"]
3 -> 6 [label="annis/inherited-coverage (C)" color=darkgreen fontcolor=darkgreen style=dotted]
Expand Down
6 changes: 3 additions & 3 deletions src/util/documentation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ pub struct ModuleInfo {
impl From<&Variant> for ModuleInfo {
fn from(module: &Variant) -> Self {
// The name of the module is taken from the wrapper enum
let module_name = module.name.to_lowercase();
let module_name = module.effective_name().to_lowercase();
// Get the inner type wrapped by the graph operations enum and use
// its documentation and fields
let mut result = Self {
Expand All @@ -78,7 +78,7 @@ impl From<&Variant> for ModuleInfo {
let shape = m.shape();
if let Some(inner) = shape.inner {
// This can be a boxed type
inner()
inner
} else {
shape
}
Expand All @@ -93,7 +93,7 @@ impl From<&Variant> for ModuleInfo {
.fields
.iter()
.map(|f| ModuleConfiguration {
name: f.name.to_lowercase(),
name: f.effective_name().to_lowercase(),
description: clean_string(f.doc),
})
.collect();
Expand Down
4 changes: 1 addition & 3 deletions src/workflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,7 @@ impl Workflow {
}
// Execute all manipulators in sequence
if let Some(ref manipulators) = self.graph_op {
let mut graph_op_position = 1;
for desc in manipulators.iter() {
for (graph_op_position, desc) in (1..).zip(manipulators.iter()) {
let step_id = StepID::from_graphop_step(desc, graph_op_position);
let workflow_directory = &desc.workflow_directory;
desc.execute(
Expand All @@ -473,7 +472,6 @@ impl Workflow {
reason: reason.to_string(),
manipulator: step_id.to_string(),
})?;
graph_op_position += 1;

if let Some(ref tx) = tx {
tx.send(crate::workflow::StatusMessage::StepDone { id: step_id })?;
Expand Down
Loading