Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions concore_cli/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from rich.panel import Panel
from rich.table import Table
import re
import xml.etree.ElementTree as ET

def validate_workflow(workflow_file, console):
workflow_path = Path(workflow_file)
Expand All @@ -22,15 +23,34 @@ def validate_workflow(workflow_file, console):
errors.append("File is empty")
return show_results(console, errors, warnings, info)

# strict XML syntax check
try:
ET.fromstring(content)
except ET.ParseError as e:
errors.append(f"Invalid XML: {str(e)}")
return show_results(console, errors, warnings, info)

try:
soup = BeautifulSoup(content, 'xml')
except Exception as e:
errors.append(f"Invalid XML: {str(e)}")
return show_results(console, errors, warnings, info)

if not soup.find('graphml'):
root = soup.find('graphml')
if not root:
errors.append("Not a valid GraphML file - missing <graphml> root element")
return show_results(console, errors, warnings, info)

# check the graph attributes
graph = soup.find('graph')
if not graph:
errors.append("Missing <graph> element")
else:
edgedefault = graph.get('edgedefault')
if not edgedefault:
errors.append("Graph missing required 'edgedefault' attribute")
elif edgedefault not in ['directed', 'undirected']:
errors.append(f"Invalid edgedefault value '{edgedefault}' (must be 'directed' or 'undirected')")

nodes = soup.find_all('node')
edges = soup.find_all('edge')
Expand All @@ -47,8 +67,19 @@ def validate_workflow(workflow_file, console):

node_labels = []
for node in nodes:
#check the node id
node_id = node.get('id')
if not node_id:
errors.append("Node missing required 'id' attribute")
#skip further checks for this node to avoid noise
continue

try:
#robust find: try with namespace prefix first, then without
label_tag = node.find('y:NodeLabel')
if not label_tag:
label_tag = node.find('NodeLabel')

if label_tag and label_tag.text:
label = label_tag.text.strip()
node_labels.append(label)
Expand All @@ -60,13 +91,13 @@ def validate_workflow(workflow_file, console):
if len(parts) != 2:
warnings.append(f"Node '{label}' has invalid format")
else:
node_id, filename = parts
nodeId_part, filename = parts
if not filename:
errors.append(f"Node '{label}' has no filename")
elif not any(filename.endswith(ext) for ext in ['.py', '.cpp', '.m', '.v', '.java']):
warnings.append(f"Node '{label}' has unusual file extension")
else:
warnings.append(f"Node {node.get('id', 'unknown')} has no label")
warnings.append(f"Node {node_id} has no label")
except Exception as e:
warnings.append(f"Error parsing node: {str(e)}")

Expand All @@ -91,6 +122,9 @@ def validate_workflow(workflow_file, console):
for edge in edges:
try:
label_tag = edge.find('y:EdgeLabel')
if not label_tag:
label_tag = edge.find('EdgeLabel')

if label_tag and label_tag.text:
if edge_label_regex.match(label_tag.text.strip()):
zmq_edges += 1
Expand Down
133 changes: 133 additions & 0 deletions tests/test_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import unittest
import tempfile
import shutil
from pathlib import Path
from click.testing import CliRunner
from concore_cli.cli import cli

class TestGraphValidation(unittest.TestCase):

def setUp(self):
self.runner = CliRunner()
self.temp_dir = tempfile.mkdtemp()

def tearDown(self):
if Path(self.temp_dir).exists():
shutil.rmtree(self.temp_dir)

def create_graph_file(self, filename, content):
filepath = Path(self.temp_dir) / filename
with open(filepath, 'w') as f:
f.write(content)
return str(filepath)

def test_validate_corrupted_xml(self):
content = '<graphml><node id="n0">'
filepath = self.create_graph_file('corrupted.graphml', content)

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation failed', result.output)
self.assertIn('Invalid XML', result.output)

def test_validate_empty_file(self):
filepath = self.create_graph_file('empty.graphml', '')

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation failed', result.output)
self.assertIn('File is empty', result.output)

def test_validate_missing_node_id(self):
content = '''
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
<graph id="G" edgedefault="directed">
<node>
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
</node>
</graph>
</graphml>
'''
filepath = self.create_graph_file('missing_id.graphml', content)
result = self.runner.invoke(cli, ['validate', filepath])
self.assertIn('Validation failed', result.output)
self.assertIn("Node missing required 'id' attribute", result.output)

def test_validate_missing_edgedefault(self):
content = '''
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
<graph id="G">
<node id="n0">
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
</node>
</graph>
</graphml>
'''
filepath = self.create_graph_file('missing_default.graphml', content)
result = self.runner.invoke(cli, ['validate', filepath])
self.assertIn('Validation failed', result.output)
self.assertIn("Graph missing required 'edgedefault'", result.output)

def test_validate_missing_root_element(self):
content = '<?xml version="1.0"?><other_root></other_root>'
filepath = self.create_graph_file('not_graphml.xml', content)

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation failed', result.output)
self.assertIn('missing <graphml> root element', result.output)

def test_validate_broken_edges(self):
content = '''
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
<graph id="G" edgedefault="directed">
<node id="n0">
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
</node>
<edge source="n0" target="n1"/>
</graph>
</graphml>
'''
filepath = self.create_graph_file('bad_edge.graphml', content)

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation failed', result.output)
self.assertIn('Edge references non-existent target node', result.output)

def test_validate_node_missing_filename(self):
content = '''
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
<graph id="G" edgedefault="directed">
<node id="n0">
<data key="d0"><y:NodeLabel>n0:</y:NodeLabel></data>
</node>
</graph>
</graphml>
'''
filepath = self.create_graph_file('bad_node.graphml', content)

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation failed', result.output)
self.assertIn('has no filename', result.output)

def test_validate_valid_graph(self):
content = '''
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
<graph id="G" edgedefault="directed">
<node id="n0">
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
</node>
</graph>
</graphml>
'''
filepath = self.create_graph_file('valid.graphml', content)

result = self.runner.invoke(cli, ['validate', filepath])

self.assertIn('Validation passed', result.output)
self.assertIn('Workflow is valid', result.output)

if __name__ == '__main__':
unittest.main()