diff --git a/py_qubed/src/lib.rs b/py_qubed/src/lib.rs index 61434c0..854e31c 100644 --- a/py_qubed/src/lib.rs +++ b/py_qubed/src/lib.rs @@ -1,4 +1,6 @@ +use ::qubed::Coordinates; use ::qubed::Qube; +use ::qubed::select::SelectMode; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList, PyModule}; @@ -69,6 +71,96 @@ impl PyQube { } } + pub fn select( + &self, + request: Bound<'_, PyDict>, + mode: Option, + _consume: Option, + ) -> PyResult { + // Collect selection data with owned Strings and Coordinates + let mut selection_data: Vec<(String, Coordinates)> = Vec::new(); + + for (k, v) in request.iter() { + let key: String = + k.extract().map_err(|_| PyTypeError::new_err("select keys must be strings"))?; + + let coords = if v.is_instance_of::() { + let lst = + v.downcast::().map_err(|e| PyTypeError::new_err(e.to_string()))?; + let joined = join_pylist_as_path(lst)?; + Coordinates::from_string(&joined) + } else { + // Convert any value to string representation (handles int, float, str) + let py_str = v.str()?; + let s: String = py_str.extract()?; + Coordinates::from_string(&s) + }; + + selection_data.push((key, coords)); + } + + let select_mode = match mode.as_deref() { + Some(m) if m.eq_ignore_ascii_case("prune") => SelectMode::Prune, + _ => SelectMode::Default, + }; + + // Convert to references for the select call + let pairs: Vec<(&str, Coordinates)> = + selection_data.iter().map(|(k, c)| (k.as_str(), c.clone())).collect(); + + match self.inner.select(&pairs, select_mode) { + Ok(q) => Ok(PyQube { inner: q }), + Err(e) => Err(PyTypeError::new_err(e)), + } + } + + pub fn all_unique_dim_coords(&mut self, py: Python<'_>) -> PyResult> { + let dim_coords = self.inner.all_unique_dim_coords(); + let py_dict = PyDict::new(py); + + for (dimension, coordinates) in dim_coords { + let coord_str = coordinates.to_string(); + // Split on slash if present, otherwise treat as single value + let values: Vec<&str> = if coord_str.is_empty() { + vec![] + } else if coord_str.contains('/') { + coord_str.split('/').collect() + } else { + vec![&coord_str] + }; + + let py_list = PyList::empty(py); + for value in values { + py_list.append(value)?; + } + + py_dict.set_item(dimension, py_list)?; + } + + Ok(py_dict.into_any().unbind()) + } + + pub fn compress(&mut self) -> PyResult<()> { + self.inner.compress(); + Ok(()) + } + + pub fn drop(&mut self, dims: &Bound<'_, PyList>) -> PyResult<()> { + let to_drop: Vec = dims + .iter() + .map(|item| { + item.str() + .and_then(|s| s.extract::()) + .map_err(|_| PyTypeError::new_err("drop: dimension names must be strings")) + }) + .collect::>()?; + self.inner.drop(to_drop).map_err(PyTypeError::new_err) + } + + pub fn squeeze(&mut self) -> PyResult<()> { + self.inner.squeeze().map_err(PyTypeError::new_err) + } + pub fn append(&mut self, other: &Bound<'_, PyQube>) -> PyResult<()> { let mut other_mut = other.borrow_mut(); self.inner.append(&mut other_mut.inner); @@ -98,6 +190,17 @@ impl PyQube { } } +pub(crate) fn join_pylist_as_path(lst: &Bound<'_, PyList>) -> PyResult { + let mut parts: Vec = Vec::with_capacity(lst.len()); + for item in lst.iter() { + // Convert any value to string representation (handles int, float, str) + let py_str = item.str()?; + let s: String = py_str.extract()?; + parts.push(s); + } + Ok(parts.join("/")) +} + #[pymodule] #[pyo3(name = "qubed")] fn py_qubed_module(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/py_qubed/tests/test_qubed_api.py b/py_qubed/tests/test_qubed_api.py index ac765b5..c12d4e3 100644 --- a/py_qubed/tests/test_qubed_api.py +++ b/py_qubed/tests/test_qubed_api.py @@ -85,9 +85,13 @@ def test_to_from_arena_json_roundtrip() -> None: import json parsed = json.loads(arena_json) - assert isinstance(parsed, list) - # expect at least one node entry with dim and coords - assert any(isinstance(item, dict) and "dim" in item and "coords" in item for item in parsed) + assert isinstance(parsed, dict) + assert "qube" in parsed + assert "version" in parsed + # expect qube to be a list with node entries containing dim and coords + qube_list = parsed["qube"] + assert isinstance(qube_list, list) + assert any(isinstance(item, dict) and "dim" in item and "coords" in item for item in qube_list) # Reconstruct and verify ascii equality reconstructed = PyQube.from_arena_json(arena_json) diff --git a/py_qubed/tests/test_select_api.py b/py_qubed/tests/test_select_api.py new file mode 100644 index 0000000..a3414f0 --- /dev/null +++ b/py_qubed/tests/test_select_api.py @@ -0,0 +1,324 @@ +import qubed + + +def test_select_1(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + selected = q.select({"class": [1]}, None, None) + + expected = r"""root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2""" + + assert selected.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() + + +def test_select_2(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + selected = q.select({"class": [1], "param": [1]}, None, None) + + expected = r"""root +└── class=1 + ├── expver=0001 + │ └── param=1 + └── expver=0002 + └── param=1""" + + assert selected.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() + +def test_select_3(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + selected = q.select({"expver": ["0001"]}, None, None) + + expected = r"""root +├── class=1 +│ └── expver=0001 +│ ├── param=1 +│ └── param=2 +└── class=2 + └── expver=0001 + ├── param=1 + ├── param=2 + └── param=3""" + + assert selected.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() + + +def test_all_unique_dim_coords(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + dim_coords = q.all_unique_dim_coords() + + # Should have 3 dimensions (class, expver, param) + assert len(dim_coords) == 3 + + # Check that expected dimensions are present + assert "class" in dim_coords + assert "expver" in dim_coords + assert "param" in dim_coords + + # Check coordinate values are lists + assert isinstance(dim_coords["class"], list) + assert isinstance(dim_coords["expver"], list) + assert isinstance(dim_coords["param"], list) + + # Check that coordinates contain expected values + assert "1" in dim_coords["class"] + assert "2" in dim_coords["class"] + assert "0001" in dim_coords["expver"] + assert "0002" in dim_coords["expver"] + assert "1" in dim_coords["param"] + assert "2" in dim_coords["param"] + assert "3" in dim_coords["param"] + + +def test_compress(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + # Get the ASCII representation before compression + ascii_before = q.to_ascii() + + # Compress the qube + q.compress() + + # The qube should still be valid and have the same structure + ascii_after = q.to_ascii() + + # Verify the structure is preserved or optimized (may change due to deduplication) + assert len(ascii_before) > 0 + assert len(ascii_after) > 0 + + # Verify datacube count is preserved + assert len(q) > 0 + +def test_compress_2(): + input_qube = r"""root +└── class=2 + └── expver=0002 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + # Get the ASCII representation before compression + ascii_before = q.to_ascii() + + # Compress the qube + q.compress() + + # The qube should still be valid and have the same structure + ascii_after = q.to_ascii() + + # Verify the structure is preserved or optimized (may change due to deduplication) + assert len(ascii_before) > 0 + assert len(ascii_after) > 0 + + # Verify datacube count is preserved + assert len(q) > 0 + + +def test_select_multiple_values(): + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + # Select multiple values for the same key + selected = q.select({"param": [1, 3]}, None, None) + + expected = r"""root +├── class=1 +│ ├── expver=0001 +│ │ └── param=1 +│ └── expver=0002 +│ └── param=1 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ └── param=3 + └── expver=0002 + └── param=1""" + + assert selected.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() + + +def test_default(): + """Verify default selection mode shows the full subtree for the selected class""" + input_qube = r"""root +├── class=1 +│ ├── expver=0001 +│ │ ├── param=1 +│ │ └── param=2 +│ └── expver=0002 +│ ├── param=1 +│ └── param=2 +└── class=2 + ├── expver=0001 + │ ├── param=1 + │ ├── param=2 + │ └── param=3 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + + # Default mode: shows full subtree + default_result = q.select({"class": [1]}, None, None) + + default_expected = r"""root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2""" + + assert default_result.to_ascii() == qubed.PyQube.from_ascii(default_expected).to_ascii() + + +def test_drop(): + input_qube = r"""root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + q.drop(["expver"]) + + expected = r"""root +└── class=1 + └── param=1/2""" + + assert q.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() + + +def test_squeeze(): + input_qube = r"""root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2""" + + q = qubed.PyQube.from_ascii(input_qube) + q.squeeze() + + # class has only one value (1), so it gets squeezed out + expected = r"""root +└── expver=0001/0002 + └── param=1/2""" + + assert q.to_ascii() == qubed.PyQube.from_ascii(expected).to_ascii() diff --git a/qubed/src/coordinates/mod.rs b/qubed/src/coordinates/mod.rs index 3da0a31..83fddcb 100644 --- a/qubed/src/coordinates/mod.rs +++ b/qubed/src/coordinates/mod.rs @@ -52,7 +52,7 @@ impl Coordinates { return Coordinates::Empty; } let mut coords = Coordinates::Empty; - let split: Vec<&str> = s.split('|').collect(); + let split: Vec<&str> = s.split('/').collect(); for part in split { // Check for leading zeros to preserve formatting (e.g., "0001") diff --git a/qubed/src/qube.rs b/qubed/src/qube.rs index 381d5a7..c909495 100644 --- a/qubed/src/qube.rs +++ b/qubed/src/qube.rs @@ -242,6 +242,119 @@ impl Qube { Ok(()) } + pub fn drop(&mut self, to_drop: I) -> Result<(), String> + where + I: IntoIterator, + I::Item: AsRef, + { + let drop_set: HashSet = + to_drop.into_iter().map(|s| s.as_ref().to_string()).collect(); + + let root = self.root(); + self.drop_recurse(root, &drop_set)?; + self.compress(); + Ok(()) + } + + /// Removes `node_id` from the tree, re-parenting its children to `parent_id`. + /// Returns the list of grandchild node IDs that were re-parented. + fn splice_out_node( + &mut self, + node_id: NodeIdx, + parent_id: NodeIdx, + ) -> Result, String> { + let node = + self.nodes.get(node_id).ok_or_else(|| format!("Node {:?} not found", node_id))?; + + let node_dim = node.dim; + // Collect grandchildren before mutating + let grandchildren: Vec<(Dimension, Vec)> = + node.children.iter().map(|(d, ids)| (*d, ids.iter().copied().collect())).collect(); + + let all_grandchild_ids: Vec = + grandchildren.iter().flat_map(|(_, ids)| ids.iter().copied()).collect(); + + // Remove the node itself from the slotmap (does not touch its children) + self.nodes.remove(node_id); + + // Remove node from parent's children list + if let Some(parent) = self.nodes.get_mut(parent_id) { + if let Some(children) = parent.children.get_mut(&node_dim) { + children.retain(|&id| id != node_id); + if children.is_empty() { + parent.children.remove(&node_dim); + } + } + parent.structural_hash.store(0, Ordering::Release); + } + + // Re-parent grandchildren to parent_id + for (gc_dim, gc_ids) in grandchildren { + for gc_id in gc_ids { + if let Some(gc_node) = self.nodes.get_mut(gc_id) { + gc_node.parent = Some(parent_id); + } + if let Some(parent) = self.nodes.get_mut(parent_id) { + parent.children.entry(gc_dim).or_insert_with(TinyVec::new).push(gc_id); + } + } + } + + self.invalidate_ancestors(parent_id); + Ok(all_grandchild_ids) + } + + fn drop_recurse(&mut self, node_id: NodeIdx, to_drop: &HashSet) -> Result<(), String> { + // Collect child info upfront before any mutation + let child_info: Vec<(Dimension, Vec)> = self + .node_ref(node_id) + .ok_or_else(|| format!("Node {:?} not found", node_id))? + .children() + .iter() + .map(|(dim, ids)| (*dim, ids.iter().copied().collect())) + .collect(); + + let child_info: Vec<(bool, Vec)> = child_info + .into_iter() + .map(|(dim, ids)| { + let dim_str = self + .dimension_str(&dim) + .ok_or_else(|| format!("Missing dimension string for {:?}", dim))?; + let should_drop = to_drop.contains(dim_str); + Ok((should_drop, ids)) + }) + .collect::>()?; + + for (should_drop, children) in child_info { + if should_drop { + for child_id in children { + // Splice out: move grandchildren up to node_id, then recurse on them + let grandchildren = self.splice_out_node(child_id, node_id)?; + for gc_id in grandchildren { + self.drop_recurse(gc_id, to_drop)?; + } + } + } else { + for child_id in children { + self.drop_recurse(child_id, to_drop)?; + } + } + } + + Ok(()) + } + + pub fn squeeze(&mut self) -> Result<(), String> { + let to_drop: Vec = self + .all_unique_dim_coords() + .into_iter() + .filter(|(_, coords)| coords.len() == 1) + .map(|(dim, _)| dim) + .collect(); + + self.drop(to_drop) + } + pub fn dimension(&self, dim_str: &str) -> Option { self.key_store.get(dim_str).map(Dimension) } @@ -591,4 +704,124 @@ mod tests { let map2 = qube.all_unique_dim_coords(); assert_eq!(map2.len(), 3); } + + #[test] + fn test_drop_single_dimension() { + let mut qube = Qube::new(); + let root = qube.root(); + + let class1 = qube.get_or_create_child("class", root, Some(1.into())).unwrap(); + let expver1 = qube.get_or_create_child("expver", class1, Some(1.into())).unwrap(); + let _param1 = qube.get_or_create_child("param", expver1, Some(1.into())).unwrap(); + + let class2 = qube.get_or_create_child("class", root, Some(2.into())).unwrap(); + let expver2 = qube.get_or_create_child("expver", class2, Some(2.into())).unwrap(); + let _param2 = qube.get_or_create_child("param", expver2, Some(2.into())).unwrap(); + + // Drop the "expver" dimension — its children (param) should be reparented to class + qube.drop(vec!["expver"]).unwrap(); + + // Root should still have "class" children + let root_node = qube.node(root).unwrap(); + assert!(root_node.children(qube.dimension("class").unwrap()).is_some()); + + // Both class nodes should now directly have "param" children (expver was spliced out) + let class1_node = qube.node(class1).unwrap(); + assert!(class1_node.children(qube.dimension("param").unwrap()).is_some()); + + let class2_node = qube.node(class2).unwrap(); + assert!(class2_node.children(qube.dimension("param").unwrap()).is_some()); + } + + #[test] + fn test_drop_middle_dimension_preserves_leaves() { + let input = r#"root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2"#; + + let mut qube = Qube::from_ascii(input).unwrap(); + qube.drop(vec!["expver"]).unwrap(); + + let ascii = qube.to_ascii(); + println!("resulting ascii after drop:\n{}", ascii); + // expver should be gone; param should be directly under class + assert!(!ascii.contains("expver"), "expver should be dropped, got:\n{}", ascii); + assert!(ascii.contains("param"), "param should still be present, got:\n{}", ascii); + assert!(ascii.contains("class"), "class should still be present, got:\n{}", ascii); + } + + #[test] + fn test_drop_multiple_dimensions() { + let mut qube = Qube::new(); + let root = qube.root(); + + let class1 = qube.get_or_create_child("class", root, Some(1.into())).unwrap(); + let expver1 = qube.get_or_create_child("expver", class1, Some(1.into())).unwrap(); + let param1 = qube.get_or_create_child("param", expver1, Some(1.into())).unwrap(); + let type1 = qube.get_or_create_child("type", param1, Some(1.into())).unwrap(); + qube.get_or_create_child("level", type1, Some(1.into())).unwrap(); + + // Drop "expver" and "type" — their children should be spliced up + qube.drop(vec!["expver", "type"]).unwrap(); + + let root_node = qube.node(root).unwrap(); + assert!(root_node.children(qube.dimension("class").unwrap()).is_some()); + + // class1 should now have "param" directly (expver spliced out) + let class1_node = qube.node(class1).unwrap(); + assert!(class1_node.children(qube.dimension("param").unwrap()).is_some()); + + // param1 should now have "level" directly (type spliced out) + let param1_node = qube.node(param1).unwrap(); + assert!(param1_node.children(qube.dimension("level").unwrap()).is_some()); + } + + #[test] + fn test_drop_nonexistent_dimension() { + let mut qube = Qube::new(); + let root = qube.root(); + + let class1 = qube.get_or_create_child("class", root, Some(1.into())).unwrap(); + let _expver1 = qube.get_or_create_child("expver", class1, Some(1.into())).unwrap(); + + // Drop a dimension that doesn't exist - should have no effect + qube.drop(vec!["nonexistent"]).unwrap(); + + let root_node = qube.node(root).unwrap(); + assert!(root_node.children(qube.dimension("class").unwrap()).is_some()); + + let class1_node = qube.node(class1).unwrap(); + assert!(class1_node.children(qube.dimension("expver").unwrap()).is_some()); + } + + #[test] + fn test_squeeze() -> Result<(), String> { + let input = r#"root +└── class=1 + ├── expver=0001 + │ ├── param=1 + │ └── param=2 + └── expver=0002 + ├── param=1 + └── param=2"#; + + let mut qube = Qube::from_ascii(input).unwrap(); + qube.squeeze()?; + + let ascii = qube.to_ascii(); + println!("resulting ascii after squeeze:\n{}", ascii); + // class has only 1 value (1), so it should be squeezed out + assert!(!ascii.contains("class"), "class should be squeezed, got:\n{}", ascii); + // expver has 2 values, so it should remain + assert!(ascii.contains("expver"), "expver should remain, got:\n{}", ascii); + // param has 2 values, so it should remain + assert!(ascii.contains("param"), "param should remain, got:\n{}", ascii); + + Ok(()) + } }