diff --git a/cdlib/algorithms/internal/Highway.py b/cdlib/algorithms/internal/Highway.py new file mode 100644 index 0000000..6a3c713 --- /dev/null +++ b/cdlib/algorithms/internal/Highway.py @@ -0,0 +1,362 @@ +""" +C++ backend wrapper for the Highway overlapping community detection algorithm. + +This module is intended to live at: + + cdlib/algorithms/internal/Highway.py + +The compiled C++ executable is expected at: + + cdlib/algorithms/internal/highway_cpp/build/highway + +The public CDlib wrapper in: + + cdlib/algorithms/overlapping_partition.py + +should call: + + highway_nx(G, ...) + +This module keeps the CDlib-facing API in Python, while the actual Highway +algorithm is executed by the optimized C++ backend. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import tempfile +from pathlib import Path +from typing import Dict, Hashable, List, Sequence, Tuple + +import networkx as nx + + +Node = Hashable + + +def _default_highway_cpp_binary() -> Path: + """ + Return the default path of the compiled Highway C++ executable. + + Expected layout: + cdlib/algorithms/internal/highway_cpp/build/highway + """ + return Path(__file__).resolve().parent / "highway_cpp" / "build" / "highway" + + +def _write_networkx_edgelist_for_cpp( + G: nx.Graph, + path: Path, +) -> Tuple[List[Node], Dict[Node, int]]: + """ + Write a NetworkX graph as a contiguous integer edge list for the C++ backend. + + Returns: + nodes: original NetworkX node labels indexed by contiguous integer id. + node_to_id: mapping from original NetworkX node label to contiguous id. + """ + if G.is_directed(): + G = G.to_undirected() + + nodes = list(G.nodes()) + node_to_id = {node: i for i, node in enumerate(nodes)} + + with path.open("w", encoding="utf-8") as f: + for u, v in G.edges(): + if u == v: + continue + f.write(f"{node_to_id[u]} {node_to_id[v]}\n") + + return nodes, node_to_id + + +def _read_cpp_communities_json( + path: Path, + nodes: Sequence[Node], + min_community_size: int = 1, +) -> List[List[Node]]: + """ + Read communities from the C++ communities.json output. + + The C++ output uses contiguous integer node ids. This function maps them + back to original NetworkX node labels. + """ + if not path.exists(): + return [] + + min_community_size = max(1, int(min_community_size)) + + with path.open("r", encoding="utf-8") as f: + raw_communities = json.load(f) + + communities: List[List[Node]] = [] + + for raw_comm in raw_communities: + comm = [] + + for vid in raw_comm: + vid = int(vid) + if 0 <= vid < len(nodes): + comm.append(nodes[vid]) + + if len(comm) >= min_community_size: + communities.append(comm) + + return communities + + +def _remove_exact_duplicate_communities( + communities: List[List[Node]], + deduplicate_communities: bool = True, +) -> List[List[Node]]: + """ + Remove exactly duplicated communities. + + Two communities are treated as duplicates if they contain the same node set, + regardless of node order. + """ + if not deduplicate_communities: + return communities + + seen = set() + deduped: List[List[Node]] = [] + + for comm in communities: + key = tuple(sorted(comm, key=lambda x: str(x))) + + if key not in seen: + seen.add(key) + deduped.append(comm) + + return deduped + + +def highway_nx( + G: nx.Graph, + highway_top_r: int = 3, + mod_jaccard_alpha: float = 0.70, + ensure_min1_per_node: bool = True, + symmetrize: bool = True, + max_anchors: int | None = None, + prop_top_r: int = 3, + prop_T: int = 10, + prop_damping: float = 0.90, + prop_eta_leak: float = 0.0, + prop_tau: float = 0.85, + enable_pattern_refinement: bool = True, + local_confidence_self_fraction_weight: float = 0.85, + local_confidence_low_entropy_weight: float = 0.15, + local_pattern_confidence_floor: float = 0.05, + local_pattern_confidence_ceiling: float = 1.00, + local_update_strength: float = 0.50, + local_node_mode_power: float = 1.50, + local_pattern_target_mix: float = 0.75, + local_target_sharpen_gamma: float = 1.20, + local_min_abs_mass_to_keep: float = 1e-8, + local_renormalize: bool = True, + decode_theta: float = 0.30, + max_memberships: int = 3, + min_community_size: int = 1, + deduplicate_communities: bool = True, +) -> List[List[Node]]: + """ + Run Highway on a NetworkX graph. + + This function exposes the same Python-facing API as the pure-Python Highway + implementation. In this branch, the computation is delegated internally to + the compiled C++ executable. + + Args: + G: + Input NetworkX graph. + highway_top_r: + Number of top highway edges retained per node. + mod_jaccard_alpha: + Mixture weight between modularity-style score and Jaccard score. + ensure_min1_per_node: + Whether to keep at least one edge per node when possible. + symmetrize: + Whether to symmetrize the retained highway edges. + max_anchors: + Optional maximum number of anchors. + prop_top_r: + Number of propagated memberships retained per node. + prop_T: + Number of propagation iterations. + prop_damping: + Propagation damping factor. + prop_eta_leak: + Leakage parameter in propagation. + prop_tau: + Softmax temperature for propagation. + enable_pattern_refinement: + Whether to enable anchor-preserving pattern refinement. + local_confidence_self_fraction_weight: + Weight for self-fraction confidence. + local_confidence_low_entropy_weight: + Weight for low-entropy confidence. + local_pattern_confidence_floor: + Minimum pattern confidence. + local_pattern_confidence_ceiling: + Maximum pattern confidence. + local_update_strength: + Strength of local refinement update. + local_node_mode_power: + Power parameter for local node mode. + local_pattern_target_mix: + Mixture weight for pattern target. + local_target_sharpen_gamma: + Sharpening parameter for local target. + local_min_abs_mass_to_keep: + Minimum absolute membership mass to keep. + local_renormalize: + Whether to renormalize local memberships. + decode_theta: + Decoding threshold. + max_memberships: + Maximum number of memberships per node. + min_community_size: + Minimum size of returned communities. + deduplicate_communities: + If True, remove exact duplicate communities before returning. + + Returns: + A list of overlapping communities. Each community is represented as a + list of original NetworkX node labels. + """ + if G is None: + raise ValueError("G must be a NetworkX graph.") + + if G.is_directed(): + G = G.to_undirected() + + nodes = list(G.nodes()) + + if len(nodes) == 0: + return [] + + if G.number_of_edges() == 0: + communities = [[node] for node in nodes] + return _remove_exact_duplicate_communities( + communities, + deduplicate_communities=deduplicate_communities, + ) + + binary = _default_highway_cpp_binary() + + if not binary.exists(): + raise FileNotFoundError( + f"Highway C++ binary not found: {binary}. " + "Please compile the C++ backend first." + ) + + if not os.access(binary, os.X_OK): + raise PermissionError( + f"Highway C++ binary is not executable: {binary}. " + f"Run: chmod +x {binary}" + ) + + with tempfile.TemporaryDirectory(prefix="highway_cpp_") as tmpdir_name: + tmpdir = Path(tmpdir_name) + input_path = tmpdir / "graph.edgelist" + communities_json_path = tmpdir / "communities.json" + + nodes, _ = _write_networkx_edgelist_for_cpp(G, input_path) + + cmd = [ + str(binary), + "--input", + str(input_path), + "--highway_top_r", + str(highway_top_r), + "--ensure_min1", + "1" if ensure_min1_per_node else "0", + "--symmetrize", + "1" if symmetrize else "0", + "--mod_jaccard_alpha", + str(mod_jaccard_alpha), + "--prop_top_r", + str(prop_top_r), + "--prop_T", + str(prop_T), + "--prop_damping", + str(prop_damping), + "--prop_eta_leak", + str(prop_eta_leak), + "--prop_tau", + str(prop_tau), + "--local_enable_pattern_refinement", + "1" if enable_pattern_refinement else "0", + "--local_confidence_self_fraction_weight", + str(local_confidence_self_fraction_weight), + "--local_confidence_low_entropy_weight", + str(local_confidence_low_entropy_weight), + "--local_pattern_confidence_floor", + str(local_pattern_confidence_floor), + "--local_pattern_confidence_ceiling", + str(local_pattern_confidence_ceiling), + "--local_update_strength", + str(local_update_strength), + "--local_node_mode_power", + str(local_node_mode_power), + "--local_pattern_target_mix", + str(local_pattern_target_mix), + "--local_target_sharpen_gamma", + str(local_target_sharpen_gamma), + "--local_min_abs_mass_to_keep", + str(local_min_abs_mass_to_keep), + "--local_renormalize", + "1" if local_renormalize else "0", + "--decode_theta", + str(decode_theta), + "--max_memberships", + str(max_memberships), + ] + + if max_anchors is not None: + cmd.extend(["--max_anchors", str(max_anchors)]) + + completed = subprocess.run( + cmd, + check=False, + capture_output=True, + text=True, + cwd=str(tmpdir), + ) + + if completed.returncode != 0: + raise RuntimeError( + "Highway C++ backend failed.\n" + f"Command: {' '.join(cmd)}\n" + f"Working directory: {tmpdir}\n" + f"stdout:\n{completed.stdout}\n" + f"stderr:\n{completed.stderr}" + ) + + communities = _read_cpp_communities_json( + communities_json_path, + nodes, + min_community_size=min_community_size, + ) + + if not communities: + produced_files = "\n".join( + str(p) for p in tmpdir.rglob("*") if p.is_file() + ) + + raise RuntimeError( + "Highway C++ backend finished, but no communities were parsed.\n" + f"Command: {' '.join(cmd)}\n" + f"Working directory: {tmpdir}\n" + f"Expected file: {communities_json_path}\n" + f"Produced files:\n{produced_files}\n" + f"stdout:\n{completed.stdout}\n" + f"stderr:\n{completed.stderr}" + ) + + return _remove_exact_duplicate_communities( + communities, + deduplicate_communities=deduplicate_communities, + ) \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/CMakeLists.txt b/cdlib/algorithms/internal/highway_cpp/CMakeLists.txt new file mode 100644 index 0000000..83d159a --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required(VERSION 3.16) +project(highway LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +find_package(OpenMP) + +add_executable(highway + src/main.cpp + src/graph_io.cpp + src/csr.cpp + src/highway.cpp + src/anchors.cpp + src/propagation.cpp + src/refinement.cpp + src/decode.cpp +) + +target_include_directories(highway PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + +if(OpenMP_CXX_FOUND) + target_link_libraries(highway PRIVATE OpenMP::OpenMP_CXX) + target_compile_definitions(highway PRIVATE HIGHWAY_USE_OPENMP) +endif() + +if(MSVC) + target_compile_options(highway PRIVATE /O2) +else() + target_compile_options(highway PRIVATE -O3) +endif() \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/anchors.hpp b/cdlib/algorithms/internal/highway_cpp/include/anchors.hpp new file mode 100644 index 0000000..9f65286 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/anchors.hpp @@ -0,0 +1,24 @@ +#pragma once +#include "csr.hpp" +#include + +namespace highway { + +struct AnchorSelectionStep { + int step = -1; + int anchor = -1; // contig id + int degree = 0; // in the graph passed into selector + int newly_covered = 0; // how many uncovered nodes became covered due to this anchor + int total_covered = 0; // cumulative covered nodes after this step +}; + +// Original API kept for compatibility. +std::vector select_anchors_greedy_dedup(const CSR& full, int max_anchors); + +// New explainable API. +std::vector select_anchors_greedy_dedup_with_trace( + const CSR& full, + int max_anchors, + std::vector& trace); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/config.hpp b/cdlib/algorithms/internal/highway_cpp/include/config.hpp new file mode 100644 index 0000000..287ed19 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/config.hpp @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace highway { + +struct HighwayBuildConfig { + int top_r = 3; + bool ensure_min1_per_node = true; + bool symmetrize = true; + + // Hybrid backbone score: + // score(u,v) = alpha * modularity_score(u,v) + (1 - alpha) * jaccard(u,v) + // alpha in [0, 1]. Larger alpha -> more modularity-driven. + double mod_jaccard_alpha = 0.70; +}; + +struct PropConfig { + int top_r = 3; + int T = 15; + double damping = 0.9; + double eta_leak = 0.0; + double tau = 0.01; + double eps = 1e-12; +}; + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/csr.hpp b/cdlib/algorithms/internal/highway_cpp/include/csr.hpp new file mode 100644 index 0000000..e3de300 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/csr.hpp @@ -0,0 +1,18 @@ +#pragma once +#include + +namespace highway { + +struct CSR { + int n = 0; + std::vector indptr; // size n+1 + std::vector indices; // size m (two-directed) +}; + +// Build CSR from two-directed edges (src/dst) in contiguous space [0..n-1] +CSR build_csr_from_edges(int n, const std::vector& src, const std::vector& dst); + +// Degrees from CSR +std::vector degrees_from_csr(const CSR& csr); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/decode.hpp b/cdlib/algorithms/internal/highway_cpp/include/decode.hpp new file mode 100644 index 0000000..f74b783 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/decode.hpp @@ -0,0 +1,14 @@ +#pragma once +#include "propagation.hpp" +#include + +namespace highway { + +// node -> list of anchor-node-id (contig space) +std::vector> decode_overlapping_communities( + const TopRState& st, + const std::vector& anchors, + float theta = 0.30f, + int max_memberships = 3); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/graph_io.hpp b/cdlib/algorithms/internal/highway_cpp/include/graph_io.hpp new file mode 100644 index 0000000..7829ac1 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/graph_io.hpp @@ -0,0 +1,26 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace highway { + +struct EdgeList { + // undirected edge list stored as two-directed (u->v and v->u) + int n = 0; // number of nodes in contiguous space [0..n-1] + std::vector src; + std::vector dst; + + // mapping: contig_id -> original_id (for writing outputs) + std::vector inv_map; +}; + +// Read edge list file: each line "u v" (original ids can be int64) +EdgeList read_edgelist_to_undirected_two_directed_contig(const std::string& path); + +// If you already have edges in memory (original ids), convert to contig + two-directed +EdgeList to_undirected_two_directed_contig(const std::vector>& edges); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/highway.hpp b/cdlib/algorithms/internal/highway_cpp/include/highway.hpp new file mode 100644 index 0000000..b7697ff --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/highway.hpp @@ -0,0 +1,33 @@ +#pragma once +#include "config.hpp" +#include "csr.hpp" +#include + +namespace highway { + +struct HighwayEdgeDecision { + int src = -1; // contig id + int dst = -1; // contig id + double modularity_score = 0; // 1 - d_u d_v / (2m) + double jaccard_score = 0; + double hybrid_score = 0; + bool kept_by_src_topr = false; // selected when processing src + bool kept_final = false; // after symmetrize/final retention +}; + +// Original API kept for compatibility. +void build_highway_edges( + const CSR& full, + const HighwayBuildConfig& cfg, + std::vector& out_src, + std::vector& out_dst); + +// New explainable API. +void build_highway_edges_with_trace( + const CSR& full, + const HighwayBuildConfig& cfg, + std::vector& out_src, + std::vector& out_dst, + std::vector& trace); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/propagation.hpp b/cdlib/algorithms/internal/highway_cpp/include/propagation.hpp new file mode 100644 index 0000000..d2ca3b9 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/propagation.hpp @@ -0,0 +1,22 @@ +#pragma once +#include "config.hpp" +#include "csr.hpp" +#include + +namespace highway { + +struct TopRState { + int n = 0; + int r = 0; + int k = 0; + std::vector idx; // size n*r, -1 if empty + std::vector val; // size n*r +}; + +TopRState propagate_other_assignment_topr_cpu( + const CSR& highway_csr, + const CSR& full_csr, + const std::vector& anchors, // contig node ids + const PropConfig& cfg); + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/include/refinement.hpp b/cdlib/algorithms/internal/highway_cpp/include/refinement.hpp new file mode 100644 index 0000000..6eb45b8 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/refinement.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "csr.hpp" +#include "propagation.hpp" + +namespace highway { + +// Anchor-preserving pattern decoding configuration. +// +// Design principle: +// 1) Extract support-set patterns from propagated anchor-indexed memberships. +// 2) Use each pattern only as a calibration unit, not as a new community id. +// 3) Build a pattern-level target distribution in the original anchor space. +// 4) Blend this target with local backbone-neighbor consensus. +// 5) Apply conservative shrinkage: +// alpha_new = (1 - lambda_v) * alpha_old + lambda_v * q_v +// +// Therefore, the returned TopRState preserves the original anchor-indexed +// community ids. This avoids the previous pattern-id reassignment problem. +struct LocalRefineConfig { + // Enable/disable anchor-preserving pattern decoding. + bool enable_pattern_refinement = true; + + // Pattern-confidence weights. + // confidence(P) is computed from: + // self_fraction(P): internal cohesion of the support pattern; + // 1 - neighbor_entropy(P): concentration of neighboring patterns. + // The weighted score is normalized by the total positive weight. + float confidence_self_fraction_weight = 0.75f; + float confidence_low_entropy_weight = 0.25f; + + // Bounds for pattern confidence. + float pattern_confidence_floor = 0.00f; + float pattern_confidence_ceiling = 1.00f; + + // Maximum shrinkage/update strength. + // Actual lambda_v is: + // update_strength * confidence(P_v) * same_pattern_ratio(v)^node_mode_power + float update_strength = 0.45f; + float node_mode_power = 2.00f; + + // Blend between pattern target and node-neighbor target: + // q_v = pattern_target_mix * q_pattern + + // (1 - pattern_target_mix) * q_neighbor + float pattern_target_mix = 0.65f; + + // Optional sharpening applied to pattern-level and final target distributions. + // gamma=1 keeps the distribution unchanged; gamma>1 makes it more peaked. + float target_sharpen_gamma = 1.00f; + + // Output sparsification. + bool renormalize = true; + float min_abs_mass_to_keep = 1e-8f; +}; + +// Anchor-preserving pattern decoding refinement. +// The returned TopRState keeps the original anchor ids as community ids. +TopRState refine_uncertain_nodes_set_search_cpu( + const TopRState& in, + const CSR& full_csr, + const CSR& backbone_csr, + const LocalRefineConfig& cfg); + +} // namespace highway diff --git a/cdlib/algorithms/internal/highway_cpp/include/utils.hpp b/cdlib/algorithms/internal/highway_cpp/include/utils.hpp new file mode 100644 index 0000000..6c99b53 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/include/utils.hpp @@ -0,0 +1,69 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace highway { + +inline bool is_finite(double x) { + return std::isfinite(x); +} + +inline void stable_softmax_inplace(std::vector& x, float tau, float eps = 1e-12f) { + if (x.empty()) return; + if (tau <= 0.0f) tau = 1e-6f; + + float mx = -std::numeric_limits::infinity(); + for (float v : x) mx = std::max(mx, v / tau); + + double sum = 0.0; + for (float& v : x) { + float z = (v / tau) - mx; + float e = std::exp(z); + v = e; + sum += e; + } + if (sum <= eps) { + // fallback uniform + float u = 1.0f / std::max(1, x.size()); + for (float& v : x) v = u; + return; + } + float inv = static_cast(1.0 / sum); + for (float& v : x) v *= inv; +} + +template +inline void topk_indices_values( + const std::vector& a, + int k, + std::vector& out_idx, + std::vector& out_val) { + out_idx.clear(); + out_val.clear(); + if (k <= 0 || a.empty()) return; + k = std::min(k, static_cast(a.size())); + + std::vector idx(a.size()); + std::iota(idx.begin(), idx.end(), 0); + + std::nth_element(idx.begin(), idx.begin() + (k - 1), idx.end(), + [&](int i, int j) { return a[i] > a[j]; }); + idx.resize(k); + + std::sort(idx.begin(), idx.end(), + [&](int i, int j) { return a[i] > a[j]; }); + + out_idx = idx; + out_val.reserve(k); + for (int i : out_idx) out_val.push_back(a[i]); +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/anchors.cpp b/cdlib/algorithms/internal/highway_cpp/src/anchors.cpp new file mode 100644 index 0000000..88308d8 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/anchors.cpp @@ -0,0 +1,85 @@ +#include "anchors.hpp" +#include "csr.hpp" +#include +#include + +namespace highway { + +std::vector select_anchors_greedy_dedup_with_trace( + const CSR& full, + int max_anchors, + std::vector& trace) { + trace.clear(); + + const int n = full.n; + max_anchors = std::max(1, max_anchors); + if (n <= 0) return {}; + + std::vector deg = degrees_from_csr(full); + std::vector order(n); + std::iota(order.begin(), order.end(), 0); + std::sort(order.begin(), order.end(), [&](int a, int b) { + if (deg[a] != deg[b]) return deg[a] > deg[b]; + return a < b; + }); + + std::vector covered(n, 0); + std::vector anchors; + anchors.reserve(std::min(max_anchors, n)); + + int total_covered = 0; + int step_id = 0; + + for (int u : order) { + if ((int)anchors.size() >= max_anchors) break; + if (covered[u]) continue; + + anchors.push_back(u); + + int newly = 0; + if (!covered[u]) { + covered[u] = 1; + ++newly; + ++total_covered; + } + + int s0 = full.indptr[u]; + int s1 = full.indptr[u + 1]; + for (int p = s0; p < s1; ++p) { + int v = full.indices[p]; + if (v >= 0 && v < n && !covered[v]) { + covered[v] = 1; + ++newly; + ++total_covered; + } + } + + AnchorSelectionStep rec; + rec.step = step_id++; + rec.anchor = u; + rec.degree = deg[u]; + rec.newly_covered = newly; + rec.total_covered = total_covered; + trace.push_back(rec); + } + + if (anchors.empty()) { + anchors.push_back(order[0]); + AnchorSelectionStep rec; + rec.step = 0; + rec.anchor = order[0]; + rec.degree = deg[order[0]]; + rec.newly_covered = 1; + rec.total_covered = 1; + trace.push_back(rec); + } + + return anchors; +} + +std::vector select_anchors_greedy_dedup(const CSR& full, int max_anchors) { + std::vector ignored; + return select_anchors_greedy_dedup_with_trace(full, max_anchors, ignored); +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/csr.cpp b/cdlib/algorithms/internal/highway_cpp/src/csr.cpp new file mode 100644 index 0000000..bf7ab8a --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/csr.cpp @@ -0,0 +1,55 @@ +#include "csr.hpp" +#include +#include +#include + +namespace highway { + +CSR build_csr_from_edges(int n, const std::vector& src, const std::vector& dst) { + if (src.size() != dst.size()) throw std::runtime_error("src/dst size mismatch"); + CSR csr; + csr.n = n; + const size_t m = src.size(); + + csr.indptr.assign(n + 1, 0); + + // count + for (size_t i = 0; i < m; ++i) { + int u = src[i]; + if (u < 0 || u >= n) throw std::runtime_error("src out of range"); + csr.indptr[u + 1] += 1; + } + + // prefix sum + for (int i = 0; i < n; ++i) csr.indptr[i + 1] += csr.indptr[i]; + + csr.indices.assign(m, -1); + std::vector cur = csr.indptr; // copy + + // fill + for (size_t i = 0; i < m; ++i) { + int u = src[i]; + int v = dst[i]; + int p = cur[u]++; + csr.indices[p] = v; + } + + // sort neighbor lists (helps deterministic behavior) + for (int u = 0; u < n; ++u) { + int s0 = csr.indptr[u]; + int s1 = csr.indptr[u + 1]; + std::sort(csr.indices.begin() + s0, csr.indices.begin() + s1); + } + + return csr; +} + +std::vector degrees_from_csr(const CSR& csr) { + std::vector deg(csr.n, 0); + for (int u = 0; u < csr.n; ++u) { + deg[u] = csr.indptr[u + 1] - csr.indptr[u]; + } + return deg; +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/decode.cpp b/cdlib/algorithms/internal/highway_cpp/src/decode.cpp new file mode 100644 index 0000000..3522ceb --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/decode.cpp @@ -0,0 +1,46 @@ +#include "decode.hpp" +#include +#include + +namespace highway { + +std::vector> decode_overlapping_communities( + const TopRState& st, + const std::vector& anchors, + float theta, + int max_memberships) { + + max_memberships = std::max(1, max_memberships); + const int n = st.n; + const int r = st.r; + + std::vector> out; + out.reserve(n); + + for (int v = 0; v < n; ++v) { + std::vector> pairs; + pairs.reserve(r); + + for (int j = 0; j < r; ++j) { + int cid = st.idx[v * r + j]; + float pv = st.val[v * r + j]; + if (cid >= 0 && pv >= theta) { + int anchor_node = anchors[cid]; + pairs.emplace_back(anchor_node, pv); + } + } + + std::sort(pairs.begin(), pairs.end(), + [](auto& a, auto& b) { return a.second > b.second; }); + + std::vector labs; + for (int i = 0; i < (int)pairs.size() && i < max_memberships; ++i) { + labs.push_back(pairs[i].first); + } + out.push_back(std::move(labs)); + } + + return out; +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/graph_io.cpp b/cdlib/algorithms/internal/highway_cpp/src/graph_io.cpp new file mode 100644 index 0000000..8e3699a --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/graph_io.cpp @@ -0,0 +1,71 @@ +#include "graph_io.hpp" +#include +#include +#include + +namespace highway { + +static EdgeList build_from_edges(const std::vector>& edges) { + EdgeList out; + std::unordered_map map; + map.reserve(edges.size() * 2); + + auto get_id = [&](long long x) -> int { + auto it = map.find(x); + if (it != map.end()) return it->second; + int nid = static_cast(map.size()); + map.emplace(x, nid); + return nid; + }; + + // first pass: assign ids + for (auto [u, v] : edges) { + (void)get_id(u); + (void)get_id(v); + } + out.n = static_cast(map.size()); + out.inv_map.resize(out.n); + + // invert map + for (auto& kv : map) { + out.inv_map[kv.second] = kv.first; + } + + // build two-directed + out.src.reserve(edges.size() * 2); + out.dst.reserve(edges.size() * 2); + for (auto [u0, v0] : edges) { + int u = map[u0]; + int v = map[v0]; + if (u == v) continue; // drop self-loop (keep consistent w/ most pipelines) + out.src.push_back(u); out.dst.push_back(v); + out.src.push_back(v); out.dst.push_back(u); + } + + return out; +} + +EdgeList read_edgelist_to_undirected_two_directed_contig(const std::string& path) { + std::ifstream fin(path); + if (!fin) throw std::runtime_error("Failed to open file: " + path); + + std::vector> edges; + edges.reserve(1 << 20); + + std::string line; + while (std::getline(fin, line)) { + if (line.empty()) continue; + if (line[0] == '#') continue; + std::istringstream iss(line); + long long u, v; + if (!(iss >> u >> v)) continue; + edges.emplace_back(u, v); + } + return build_from_edges(edges); +} + +EdgeList to_undirected_two_directed_contig(const std::vector>& edges) { + return build_from_edges(edges); +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/highway.cpp b/cdlib/algorithms/internal/highway_cpp/src/highway.cpp new file mode 100644 index 0000000..beeec40 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/highway.cpp @@ -0,0 +1,246 @@ +#include "highway.hpp" +#include "utils.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace highway { + +static inline int intersection_size_sorted( + const CSR& csr, + int u, + int v) { + int pu = csr.indptr[u]; + int pv = csr.indptr[v]; + const int eu = csr.indptr[u + 1]; + const int ev = csr.indptr[v + 1]; + + int inter = 0; + while (pu < eu && pv < ev) { + const int a = csr.indices[pu]; + const int b = csr.indices[pv]; + if (a == b) { + ++inter; + ++pu; + ++pv; + } else if (a < b) { + ++pu; + } else { + ++pv; + } + } + return inter; +} + +static inline double jaccard_score_from_csr( + const CSR& csr, + const std::vector& deg, + int u, + int v) { + const int du = std::max(0, deg[u]); + const int dv = std::max(0, deg[v]); + if (du == 0 && dv == 0) return 0.0; + + const int inter = intersection_size_sorted(csr, u, v); + const int uni = du + dv - inter; + if (uni <= 0) return 0.0; + return static_cast(inter) / static_cast(uni); +} + +void build_highway_edges_with_trace( + const CSR& full, + const HighwayBuildConfig& cfg, + std::vector& out_src, + std::vector& out_dst, + std::vector& trace) { + + const int n = full.n; + out_src.clear(); + out_dst.clear(); + trace.clear(); + + if (n <= 0) return; + + const std::vector deg_i = degrees_from_csr(full); + + // full stores an undirected graph as two-directed edges + const long long m_two_directed = static_cast(full.indices.size()); + const double m_undirected = std::max(1.0, 0.5 * static_cast(m_two_directed)); + + const int r = std::max(1, cfg.top_r); + const double alpha = std::max(0.0, std::min(1.0, cfg.mod_jaccard_alpha)); + + std::vector> picked(n); + std::vector> local_trace(n); + + #if defined(HIGHWAY_USE_OPENMP) + #pragma omp parallel for schedule(dynamic, 256) + #endif + for (int v = 0; v < n; ++v) { + const int s0 = full.indptr[v]; + const int s1 = full.indptr[v + 1]; + const int dv_i = s1 - s0; + if (dv_i <= 0) continue; + + std::vector neigh; + neigh.reserve(dv_i); + for (int p = s0; p < s1; ++p) { + neigh.push_back(full.indices[p]); + } + + std::vector scores(neigh.size(), -std::numeric_limits::infinity()); + std::vector modularity_scores(neigh.size(), 0.0); + std::vector jaccard_scores(neigh.size(), 0.0); + + for (size_t i = 0; i < neigh.size(); ++i) { + const int u = neigh[i]; + if (u < 0 || u >= n) continue; + + const double du = static_cast(std::max(0, deg_i[u])); + const double dv = static_cast(std::max(0, deg_i[v])); + + const double modularity_score = 1.0 - (du * dv) / (2.0 * m_undirected); + const double jaccard_score = jaccard_score_from_csr(full, deg_i, u, v); + const double score = + alpha * modularity_score + + (1.0 - alpha) * jaccard_score; + + modularity_scores[i] = modularity_score; + jaccard_scores[i] = jaccard_score; + scores[i] = score; + } + + std::vector topi; + std::vector topv; + topk_indices_values( + scores, + std::min(r, static_cast(scores.size())), + topi, + topv); + + std::vector chosen_mask(neigh.size(), 0); + for (int ii : topi) { + if (ii >= 0 && ii < static_cast(neigh.size())) chosen_mask[ii] = 1; + } + + std::vector chosen; + chosen.reserve(topi.size()); + for (int ii : topi) { + if (ii >= 0 && ii < static_cast(neigh.size())) { + chosen.push_back(neigh[ii]); + } + } + + if (cfg.ensure_min1_per_node && chosen.empty() && !neigh.empty()) { + int best_idx = 0; + double best_score = scores[0]; + for (int i = 1; i < static_cast(scores.size()); ++i) { + if (scores[i] > best_score) { + best_score = scores[i]; + best_idx = i; + } + } + chosen.push_back(neigh[best_idx]); + chosen_mask[best_idx] = 1; + } + + std::vector my_trace; + my_trace.reserve(neigh.size()); + for (size_t i = 0; i < neigh.size(); ++i) { + HighwayEdgeDecision rec; + rec.src = v; + rec.dst = neigh[i]; + rec.modularity_score = modularity_scores[i]; + rec.jaccard_score = jaccard_scores[i]; + rec.hybrid_score = scores[i]; + rec.kept_by_src_topr = (chosen_mask[i] != 0); + rec.kept_final = false; // finalized later + my_trace.push_back(rec); + } + + picked[v] = std::move(chosen); + local_trace[v] = std::move(my_trace); + } + + std::unordered_set final_kept_dir; + auto make_key = [&](int a, int b) -> long long { + return static_cast(a) * static_cast(n) + static_cast(b); + }; + + if (cfg.symmetrize) { + std::unordered_set dir; + dir.reserve(static_cast(n) * static_cast(r) * 2); + + for (int v = 0; v < n; ++v) { + for (int u : picked[v]) { + dir.insert(make_key(v, u)); + } + } + + std::unordered_set und_seen; + und_seen.reserve(dir.size()); + + out_src.reserve(dir.size() * 2); + out_dst.reserve(dir.size() * 2); + + for (int v = 0; v < n; ++v) { + for (int u : picked[v]) { + if (u < 0 || u >= n || u == v) continue; + + const bool keep = dir.count(make_key(v, u)) || dir.count(make_key(u, v)); + if (!keep) continue; + + const int a = std::min(u, v); + const int b = std::max(u, v); + const long long und_key = make_key(a, b); + + if (und_seen.insert(und_key).second) { + out_src.push_back(a); + out_dst.push_back(b); + out_src.push_back(b); + out_dst.push_back(a); + + final_kept_dir.insert(make_key(a, b)); + final_kept_dir.insert(make_key(b, a)); + } + } + } + } else { + size_t est = 0; + for (int v = 0; v < n; ++v) est += picked[v].size(); + + out_src.reserve(est); + out_dst.reserve(est); + + for (int v = 0; v < n; ++v) { + for (int u : picked[v]) { + if (u < 0 || u >= n || u == v) continue; + out_src.push_back(v); + out_dst.push_back(u); + final_kept_dir.insert(make_key(v, u)); + } + } + } + + for (int v = 0; v < n; ++v) { + for (auto& rec : local_trace[v]) { + rec.kept_final = final_kept_dir.count(make_key(rec.src, rec.dst)) > 0; + trace.push_back(rec); + } + } +} + +void build_highway_edges( + const CSR& full, + const HighwayBuildConfig& cfg, + std::vector& out_src, + std::vector& out_dst) { + std::vector ignored; + build_highway_edges_with_trace(full, cfg, out_src, out_dst, ignored); +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/main.cpp b/cdlib/algorithms/internal/highway_cpp/src/main.cpp new file mode 100644 index 0000000..a4b8167 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/main.cpp @@ -0,0 +1,430 @@ +#include "anchors.hpp" +#include "config.hpp" +#include "csr.hpp" +#include "graph_io.hpp" +#include "highway.hpp" +#include "propagation.hpp" +#include "refinement.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline double now_sec() { + using clock = std::chrono::high_resolution_clock; + return std::chrono::duration(clock::now().time_since_epoch()).count(); +} + +static highway::TopRState make_init_state( + int n, + int r, + const std::vector& anchors) { + highway::TopRState st; + st.n = n; + st.r = r; + st.k = static_cast(anchors.size()); + st.idx.assign(n * r, -1); + st.val.assign(n * r, 0.0f); + + for (int cid = 0; cid < static_cast(anchors.size()); ++cid) { + int v = anchors[cid]; + if (v < 0 || v >= n) continue; + st.idx[v * r + 0] = cid; + st.val[v * r + 0] = 1.0f; + } + + return st; +} + +static std::vector> decode_state_labels( + const highway::TopRState& st, + float theta, + int max_memberships) { + const int keep_max = std::max(1, max_memberships); + std::vector> out(st.n); + + for (int v = 0; v < st.n; ++v) { + std::vector> items; + items.reserve(st.r); + + for (int j = 0; j < st.r; ++j) { + int cid = st.idx[v * st.r + j]; + float pv = st.val[v * st.r + j]; + + if (cid >= 0 && pv > 0.0f) { + items.emplace_back(cid, pv); + } + } + + std::sort(items.begin(), items.end(), [](const auto& a, const auto& b) { + if (a.second != b.second) return a.second > b.second; + return a.first < b.first; + }); + + for (const auto& kv : items) { + if (static_cast(out[v].size()) >= keep_max) break; + if (kv.second >= theta) out[v].push_back(kv.first); + } + + if (out[v].empty() && !items.empty()) { + out[v].push_back(items.front().first); + } + } + + return out; +} + +static long long count_undirected_edges_from_csr(const highway::CSR& csr) { + long long m = 0; + + for (int u = 0; u < csr.n; ++u) { + int s0 = csr.indptr[u]; + int s1 = csr.indptr[u + 1]; + + for (int p = s0; p < s1; ++p) { + int v = csr.indices[p]; + if (u < v) ++m; + } + } + + return m; +} + +static long long count_total_memberships( + const std::vector>& node_memberships) { + long long total = 0; + + for (const auto& labs : node_memberships) { + total += static_cast(labs.size()); + } + + return total; +} + +static std::vector> build_communities_from_node_memberships( + const std::vector>& node_memberships) { + std::map> comm_to_nodes; + + for (int v = 0; v < static_cast(node_memberships.size()); ++v) { + for (int lab : node_memberships[v]) { + if (lab >= 0) { + comm_to_nodes[lab].push_back(v); + } + } + } + + std::vector> communities; + communities.reserve(comm_to_nodes.size()); + + for (auto& kv : comm_to_nodes) { + if (!kv.second.empty()) { + communities.push_back(std::move(kv.second)); + } + } + + return communities; +} + +static void write_communities_json( + const std::string& path, + const std::vector& inv_map, + const std::vector>& communities_contig) { + std::ofstream fout(path); + + if (!fout) { + throw std::runtime_error("Cannot write communities file: " + path); + } + + fout << "[\n"; + + for (int ci = 0; ci < static_cast(communities_contig.size()); ++ci) { + fout << " ["; + + const auto& comm = communities_contig[ci]; + bool first = true; + + for (int i = 0; i < static_cast(comm.size()); ++i) { + int v = comm[i]; + + if (v < 0 || v >= static_cast(inv_map.size())) { + continue; + } + + if (!first) { + fout << ", "; + } + + fout << inv_map[v]; + first = false; + } + + fout << "]"; + + if (ci + 1 < static_cast(communities_contig.size())) { + fout << ","; + } + + fout << "\n"; + } + + fout << "]\n"; +} + +int main(int argc, char** argv) { + std::string input; + + highway::HighwayBuildConfig hcfg; + int max_anchors = -1; + + highway::PropConfig pcfg; + pcfg.top_r = 3; + pcfg.T = 10; + pcfg.damping = 0.9; + pcfg.eta_leak = 0.0; + pcfg.tau = 0.85; + + highway::LocalRefineConfig lrcfg; + + float decode_theta = 0.30f; + int max_memberships = 3; + + for (int i = 1; i < argc; ++i) { + std::string a = argv[i]; + + auto need = [&](const std::string& key) { + if (i + 1 >= argc) { + throw std::runtime_error("Missing value for " + key); + } + return std::string(argv[++i]); + }; + + if (a == "--input") { + input = need(a); + } + + else if (a == "--highway_top_r") { + hcfg.top_r = std::stoi(need(a)); + } + else if (a == "--ensure_min1") { + hcfg.ensure_min1_per_node = (need(a) == "1"); + } + else if (a == "--symmetrize") { + hcfg.symmetrize = (need(a) == "1"); + } + else if (a == "--mod_jaccard_alpha") { + hcfg.mod_jaccard_alpha = std::stod(need(a)); + } + + else if (a == "--max_anchors") { + max_anchors = std::stoi(need(a)); + } + + else if (a == "--prop_top_r") { + pcfg.top_r = std::stoi(need(a)); + } + else if (a == "--prop_T") { + pcfg.T = std::stoi(need(a)); + } + else if (a == "--prop_damping") { + pcfg.damping = std::stod(need(a)); + } + else if (a == "--prop_eta_leak") { + pcfg.eta_leak = std::stod(need(a)); + } + else if (a == "--prop_tau") { + pcfg.tau = std::stod(need(a)); + } + + else if (a == "--local_enable_pattern_refinement") { + lrcfg.enable_pattern_refinement = (need(a) == "1"); + } + else if (a == "--local_confidence_self_fraction_weight") { + lrcfg.confidence_self_fraction_weight = std::stof(need(a)); + } + else if (a == "--local_confidence_low_entropy_weight") { + lrcfg.confidence_low_entropy_weight = std::stof(need(a)); + } + else if (a == "--local_pattern_confidence_floor") { + lrcfg.pattern_confidence_floor = std::stof(need(a)); + } + else if (a == "--local_pattern_confidence_ceiling") { + lrcfg.pattern_confidence_ceiling = std::stof(need(a)); + } + else if (a == "--local_update_strength") { + lrcfg.update_strength = std::stof(need(a)); + } + else if (a == "--local_node_mode_power") { + lrcfg.node_mode_power = std::stof(need(a)); + } + else if (a == "--local_pattern_target_mix") { + lrcfg.pattern_target_mix = std::stof(need(a)); + } + else if (a == "--local_target_sharpen_gamma") { + lrcfg.target_sharpen_gamma = std::stof(need(a)); + } + else if (a == "--local_min_abs_mass_to_keep") { + lrcfg.min_abs_mass_to_keep = std::stof(need(a)); + } + else if (a == "--local_renormalize") { + lrcfg.renormalize = (need(a) == "1"); + } + + else if (a == "--decode_theta") { + decode_theta = std::stof(need(a)); + } + else if (a == "--max_memberships") { + max_memberships = std::stoi(need(a)); + } + + else if (a == "--help") { + std::cout + << "Usage: highway_algorithm_only --input graph.edgelist\n" + << "\n" + << "Core Highway parameters:\n" + << " --highway_top_r 3\n" + << " --ensure_min1 1\n" + << " --symmetrize 1\n" + << " --mod_jaccard_alpha 0.70\n" + << " --max_anchors \n" + << "\n" + << "Propagation parameters:\n" + << " --prop_top_r 3\n" + << " --prop_T 10\n" + << " --prop_damping 0.9\n" + << " --prop_eta_leak 0\n" + << " --prop_tau 0.85\n" + << "\n" + << "Anchor-preserving pattern decoding parameters:\n" + << " --local_enable_pattern_refinement 1\n" + << " --local_confidence_self_fraction_weight 0.85\n" + << " --local_confidence_low_entropy_weight 0.15\n" + << " --local_pattern_confidence_floor 0.05\n" + << " --local_pattern_confidence_ceiling 1.00\n" + << " --local_update_strength 0.50\n" + << " --local_node_mode_power 1.50\n" + << " --local_pattern_target_mix 0.75\n" + << " --local_target_sharpen_gamma 1.20\n" + << " --local_min_abs_mass_to_keep 1e-8\n" + << " --local_renormalize 1\n" + << "\n" + << "Final decode parameters:\n" + << " --decode_theta 0.30\n" + << " --max_memberships 3\n"; + return 0; + } + + else { + throw std::runtime_error("Unknown argument: " + a); + } + } + + if (input.empty()) { + std::cerr << "Error: --input is required. Use --help.\n"; + return 1; + } + + std::unordered_map timers; + const double t_global0 = now_sec(); + + try { + double t0 = now_sec(); + highway::EdgeList el = + highway::read_edgelist_to_undirected_two_directed_contig(input); + timers["Read+Relabel"] = now_sec() - t0; + + const int n = el.n; + if (n <= 0) { + std::cerr << "Empty graph.\n"; + return 0; + } + + t0 = now_sec(); + highway::CSR full = highway::build_csr_from_edges(n, el.src, el.dst); + timers["Full_CSR"] = now_sec() - t0; + + t0 = now_sec(); + std::vector hsrc; + std::vector hdst; + highway::build_highway_edges(full, hcfg, hsrc, hdst); + highway::CSR hcsr = highway::build_csr_from_edges(n, hsrc, hdst); + timers["Highway_build"] = now_sec() - t0; + + t0 = now_sec(); + if (max_anchors < 0) { + max_anchors = std::max(8, std::min(30, n / 5)); + } + std::vector anchors = + highway::select_anchors_greedy_dedup(full, max_anchors); + timers["Anchor_select"] = now_sec() - t0; + + t0 = now_sec(); + highway::TopRState st_init = + make_init_state(n, std::max(1, pcfg.top_r), anchors); + timers["State_init"] = now_sec() - t0; + + t0 = now_sec(); + highway::TopRState st = + highway::propagate_other_assignment_topr_cpu(hcsr, full, anchors, pcfg); + timers["Propagation"] = now_sec() - t0; + + t0 = now_sec(); + highway::TopRState st_ref = + highway::refine_uncertain_nodes_set_search_cpu(st, full, hcsr, lrcfg); + timers["Anchor_preserving_pattern_decoding"] = now_sec() - t0; + + t0 = now_sec(); + auto node_memberships_contig = + decode_state_labels(st_ref, decode_theta, max_memberships); + timers["Decode"] = now_sec() - t0; + + t0 = now_sec(); + auto communities_contig = + build_communities_from_node_memberships(node_memberships_contig); + write_communities_json("communities.json", el.inv_map, communities_contig); + timers["WriteCommunitiesJson"] = now_sec() - t0; + + timers["TOTAL"] = now_sec() - t_global0; + + const long long m_undirected = count_undirected_edges_from_csr(full); + const long long m_backbone_undirected = count_undirected_edges_from_csr(hcsr); + const long long total_memberships = + count_total_memberships(node_memberships_contig); + + std::cout << "Summary:\n"; + std::cout << " n=" << n << "\n"; + std::cout << " m_undirected=" << m_undirected << "\n"; + std::cout << " m_backbone_undirected=" << m_backbone_undirected << "\n"; + std::cout << " backbone_ratio=" + << std::fixed << std::setprecision(6) + << static_cast(m_backbone_undirected) / + std::max(1.0, static_cast(m_undirected)) + << "\n"; + std::cout << " anchors=" << anchors.size() << "\n"; + std::cout << " total_memberships=" << total_memberships << "\n"; + std::cout << " avg_memberships_per_node=" + << std::fixed << std::setprecision(6) + << static_cast(total_memberships) / + std::max(1.0, static_cast(n)) + << "\n"; + + std::cout << "Timers:\n"; + for (const auto& kv : timers) { + std::cout << " " << kv.first << " = " + << std::fixed << std::setprecision(6) + << kv.second << "s\n"; + } + } catch (const std::exception& e) { + std::cerr << "Fatal: " << e.what() << "\n"; + return 1; + } + + return 0; +} \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/propagation.cpp b/cdlib/algorithms/internal/highway_cpp/src/propagation.cpp new file mode 100644 index 0000000..0ad0884 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/propagation.cpp @@ -0,0 +1,217 @@ +#include "propagation.hpp" +#include "utils.hpp" +#include +#include +#include + +namespace highway { + +static inline int at(const std::vector& a, int n, int r, int v, int j) { + return a[v * r + j]; +} +static inline float atf(const std::vector& a, int n, int r, int v, int j) { + return a[v * r + j]; +} +static inline void set(std::vector& a, int n, int r, int v, int j, int x) { + a[v * r + j] = x; +} +static inline void setf(std::vector& a, int n, int r, int v, int j, float x) { + a[v * r + j] = x; +} + +TopRState propagate_other_assignment_topr_cpu( + const CSR& highway_csr, + const CSR& full_csr, + const std::vector& anchors, + const PropConfig& cfg) { + + TopRState st; + const int n = highway_csr.n; + const int r = std::max(1, cfg.top_r); + const int k = (int)anchors.size(); + st.n = n; st.r = r; st.k = k; + + st.idx.assign(n * r, -1); + st.val.assign(n * r, 0.0f); + + if (n <= 0 || k <= 0) return st; + + // init anchors: community ids aligned with anchors list + for (int cid = 0; cid < k; ++cid) { + int v = anchors[cid]; + if (v < 0 || v >= n) continue; + set(st.idx, n, r, v, 0, cid); + setf(st.val, n, r, v, 0, 1.0f); + } + + // degrees + std::vector deg_h_i = degrees_from_csr(highway_csr); + std::vector deg_f_i = degrees_from_csr(full_csr); + std::vector deg_h(n, 1.0), deg_f(n, 1.0); + for (int i = 0; i < n; ++i) { + deg_h[i] = std::max(1, deg_h_i[i]); + deg_f[i] = std::max(1, deg_f_i[i]); + } + + const int T = std::max(0, cfg.T); + const double damping = cfg.damping; + const double eta_leak = cfg.eta_leak; + const float tau = (float)cfg.tau; + const float eps = (float)cfg.eps; + + std::vector idx_new(n * r, -1); + std::vector val_new(n * r, 0.0f); + + std::vector acc_k; acc_k.reserve(k); + + for (int it = 0; it < T; ++it) { + std::fill(idx_new.begin(), idx_new.end(), -1); + std::fill(val_new.begin(), val_new.end(), 0.0f); + + // ---- per node update ---- + #if defined(HIGHWAY_USE_OPENMP) + #pragma omp parallel + #endif + { + std::vector acc_local; + acc_local.resize(k, 0.0f); + + #if defined(HIGHWAY_USE_OPENMP) + #pragma omp for schedule(dynamic, 128) + #endif + for (int v = 0; v < n; ++v) { + std::fill(acc_local.begin(), acc_local.end(), 0.0f); + + // highway neighbors + int s0 = highway_csr.indptr[v]; + int s1 = highway_csr.indptr[v + 1]; + if (s1 > s0) { + for (int p = s0; p < s1; ++p) { + int u = highway_csr.indices[p]; + if (u < 0 || u >= n) continue; + double w = 1.0 / std::sqrt(deg_h[u] * deg_h[v]); + + for (int j = 0; j < r; ++j) { + int cid = at(st.idx, n, r, u, j); + float pv = atf(st.val, n, r, u, j); + if (cid >= 0 && pv > 0.0f) { + acc_local[cid] += (float)(pv * w); + } + } + } + } + + // optional leak from full graph + if (eta_leak > 0.0) { + int f0 = full_csr.indptr[v]; + int f1 = full_csr.indptr[v + 1]; + if (f1 > f0) { + for (int p = f0; p < f1; ++p) { + int u = full_csr.indices[p]; + if (u < 0 || u >= n) continue; + double w = eta_leak / std::sqrt(deg_f[u] * deg_f[v]); + for (int j = 0; j < r; ++j) { + int cid = at(st.idx, n, r, u, j); + float pv = atf(st.val, n, r, u, j); + if (cid >= 0 && pv > 0.0f) { + acc_local[cid] += (float)(pv * w); + } + } + } + } + } + + // fallback copy + bool any_pos = false; + for (int c = 0; c < k; ++c) { + if (acc_local[c] > 0.0f) { any_pos = true; break; } + } + if (!any_pos) { + for (int j = 0; j < r; ++j) { + set(idx_new, n, r, v, j, at(st.idx, n, r, v, j)); + setf(val_new, n, r, v, j, atf(st.val, n, r, v, j)); + } + continue; + } + + // top-r + softmax + std::vector topi; + std::vector topv; + topk_indices_values(acc_local, std::min(r, k), topi, topv); + + stable_softmax_inplace(topv, tau, eps); + + for (int j = 0; j < r; ++j) { + set(idx_new, n, r, v, j, -1); + setf(val_new, n, r, v, j, 0.0f); + } + int rr = (int)topi.size(); + for (int j = 0; j < rr; ++j) { + set(idx_new, n, r, v, j, topi[j]); + setf(val_new, n, r, v, j, topv[j]); + } + } + } + + // ---- damping mix (same as python: scatter into dense acc_k then top-r+softmax) ---- + if (damping < 1.0) { + const float d = (float)damping; + const float one_minus = 1.0f - d; + + #if defined(HIGHWAY_USE_OPENMP) + #pragma omp parallel + #endif + { + std::vector acc_local; + acc_local.resize(k, 0.0f); + + #if defined(HIGHWAY_USE_OPENMP) + #pragma omp for schedule(dynamic, 128) + #endif + for (int v = 0; v < n; ++v) { + float sum_new = 0.0f; + for (int j = 0; j < r; ++j) sum_new += atf(val_new, n, r, v, j); + if (sum_new <= 0.0f) continue; + + std::fill(acc_local.begin(), acc_local.end(), 0.0f); + + // add new + for (int j = 0; j < r; ++j) { + int cid = at(idx_new, n, r, v, j); + float pv = atf(val_new, n, r, v, j); + if (cid >= 0 && pv > 0.0f) acc_local[cid] += d * pv; + } + // add old + for (int j = 0; j < r; ++j) { + int cid = at(st.idx, n, r, v, j); + float pv = atf(st.val, n, r, v, j); + if (cid >= 0 && pv > 0.0f) acc_local[cid] += one_minus * pv; + } + + std::vector topi; + std::vector topv; + topk_indices_values(acc_local, std::min(r, k), topi, topv); + stable_softmax_inplace(topv, tau, eps); + + for (int j = 0; j < r; ++j) { + set(idx_new, n, r, v, j, -1); + setf(val_new, n, r, v, j, 0.0f); + } + int rr = (int)topi.size(); + for (int j = 0; j < rr; ++j) { + set(idx_new, n, r, v, j, topi[j]); + setf(val_new, n, r, v, j, topv[j]); + } + } + } + } + + // swap into state + st.idx.swap(idx_new); + st.val.swap(val_new); + } + + return st; +} + +} // namespace highway \ No newline at end of file diff --git a/cdlib/algorithms/internal/highway_cpp/src/refinement.cpp b/cdlib/algorithms/internal/highway_cpp/src/refinement.cpp new file mode 100644 index 0000000..0bc94c9 --- /dev/null +++ b/cdlib/algorithms/internal/highway_cpp/src/refinement.cpp @@ -0,0 +1,427 @@ +#include "refinement.hpp" + +#include +#include +#include +#include +#include + +namespace highway { + +static inline int idx_at(const TopRState& s, int v, int j) { + return s.idx[v * s.r + j]; +} + +static inline float val_at(const TopRState& s, int v, int j) { + return s.val[v * s.r + j]; +} + +static inline float clamp01(float x) { + return std::max(0.0f, std::min(1.0f, x)); +} + +static inline float clamp_range(float x, float lo, float hi) { + if (lo > hi) std::swap(lo, hi); + return std::max(lo, std::min(hi, x)); +} + +struct PatternKey { + std::vector ids; + bool operator==(const PatternKey& o) const { return ids == o.ids; } +}; + +struct PatternHash { + std::size_t operator()(PatternKey const& k) const noexcept { + std::size_t h = 1469598103934665603ULL; + for (int x : k.ids) { + h ^= static_cast(x) + + 0x9e3779b97f4a7c15ULL + + (h << 6) + + (h >> 2); + } + return h; + } +}; + +struct PatternInfo { + PatternKey key; + std::vector nodes; + + int internal_edges = 0; + int external_edges = 0; + std::unordered_map ext_counts; + + float self_fraction = 0.0f; + float neighbor_entropy = 0.0f; + float confidence = 0.0f; + + // Anchor-indexed target distribution, not pattern-id distribution. + std::unordered_map target; +}; + +static PatternKey support_key_of_node(const TopRState& st, int v) { + PatternKey key; + key.ids.reserve(st.r); + + for (int j = 0; j < st.r; ++j) { + const int cid = idx_at(st, v, j); + const float pv = val_at(st, v, j); + if (cid >= 0 && pv > 0.0f) key.ids.push_back(cid); + } + + std::sort(key.ids.begin(), key.ids.end()); + key.ids.erase(std::unique(key.ids.begin(), key.ids.end()), key.ids.end()); + return key; +} + +static float normalized_entropy_counts(const std::unordered_map& counts) { + double total = 0.0; + int k = 0; + + for (const auto& kv : counts) { + if (kv.second > 0) { + total += kv.second; + ++k; + } + } + + if (total <= 0.0 || k <= 1) return 0.0f; + + double h = 0.0; + for (const auto& kv : counts) { + if (kv.second <= 0) continue; + const double p = static_cast(kv.second) / total; + h -= p * std::log(p + 1e-12); + } + + return static_cast(h / std::log(static_cast(k))); +} + +static void normalize_distribution(std::unordered_map& dist) { + double sum = 0.0; + for (const auto& kv : dist) { + if (kv.second > 0.0f) sum += kv.second; + } + + if (sum <= 1e-20) return; + + for (auto& kv : dist) { + if (kv.second > 0.0f) kv.second = static_cast(kv.second / sum); + else kv.second = 0.0f; + } +} + +static void sharpen_distribution(std::unordered_map& dist, float gamma) { + gamma = std::max(1.0f, gamma); + + if (std::abs(gamma - 1.0f) <= 1e-6f) { + normalize_distribution(dist); + return; + } + + for (auto& kv : dist) { + kv.second = std::pow(std::max(0.0f, kv.second), gamma); + } + normalize_distribution(dist); +} + +static float compute_pattern_confidence( + const PatternInfo& p, + const LocalRefineConfig& cfg) { + + const float w_self = std::max(0.0f, cfg.confidence_self_fraction_weight); + const float w_entropy = std::max(0.0f, cfg.confidence_low_entropy_weight); + const float w_sum = w_self + w_entropy; + + float score = 0.0f; + if (w_sum > 1e-20f) { + score = + w_self * clamp01(p.self_fraction) + + w_entropy * clamp01(1.0f - p.neighbor_entropy); + score /= w_sum; + } + + const float lo = clamp01(cfg.pattern_confidence_floor); + const float hi = clamp01(cfg.pattern_confidence_ceiling); + return clamp_range(score, lo, hi); +} + +static float same_pattern_neighbor_ratio( + const CSR& csr, + const std::vector& node_pid, + int v) { + + if (v < 0 || v >= csr.n || v >= static_cast(node_pid.size())) return 0.0f; + + const int pid = node_pid[v]; + if (pid < 0) return 0.0f; + + int deg = 0; + int same = 0; + + const int s0 = csr.indptr[v]; + const int s1 = csr.indptr[v + 1]; + + for (int p = s0; p < s1; ++p) { + const int u = csr.indices[p]; + if (u < 0 || u >= static_cast(node_pid.size())) continue; + + const int qid = node_pid[u]; + if (qid < 0) continue; + + ++deg; + if (qid == pid) ++same; + } + + if (deg <= 0) return 0.0f; + return static_cast(same) / static_cast(deg); +} + +static std::unordered_map node_neighbor_anchor_consensus( + const TopRState& st, + const CSR& csr, + int v) { + + std::unordered_map dist; + if (v < 0 || v >= csr.n || v >= st.n) return dist; + + const int s0 = csr.indptr[v]; + const int s1 = csr.indptr[v + 1]; + + for (int p = s0; p < s1; ++p) { + const int u = csr.indices[p]; + if (u < 0 || u >= st.n) continue; + + for (int j = 0; j < st.r; ++j) { + const int cid = idx_at(st, u, j); + const float pv = val_at(st, u, j); + if (cid >= 0 && pv > 0.0f) dist[cid] += pv; + } + } + + normalize_distribution(dist); + return dist; +} + +static void write_topr_from_distribution( + TopRState& out, + int v, + const std::unordered_map& dist, + float min_abs_mass_to_keep, + bool renormalize) { + + std::vector> items; + items.reserve(dist.size()); + + for (const auto& kv : dist) { + if (kv.first >= 0 && kv.second > min_abs_mass_to_keep) { + items.push_back(kv); + } + } + + if (items.empty()) return; + + std::sort(items.begin(), items.end(), [](const auto& a, const auto& b) { + if (a.second != b.second) return a.second > b.second; + return a.first < b.first; + }); + + const int keep = std::min(out.r, static_cast(items.size())); + + for (int j = 0; j < out.r; ++j) { + out.idx[v * out.r + j] = -1; + out.val[v * out.r + j] = 0.0f; + } + + float kept_sum = 0.0f; + for (int j = 0; j < keep; ++j) { + out.idx[v * out.r + j] = items[j].first; + out.val[v * out.r + j] = items[j].second; + kept_sum += items[j].second; + } + + if (renormalize && kept_sum > 1e-20f) { + for (int j = 0; j < keep; ++j) { + out.val[v * out.r + j] /= kept_sum; + } + } +} + +TopRState refine_uncertain_nodes_set_search_cpu( + const TopRState& in, + const CSR& full_csr, + const CSR& backbone_csr, + const LocalRefineConfig& cfg) { + + if (!cfg.enable_pattern_refinement) return in; + + const int n = in.n; + const int r = in.r; + if (n <= 0 || r <= 0) return in; + + // ------------------------------------------------------------ + // 1. Extract support-set patterns from propagated memberships. + // ------------------------------------------------------------ + std::unordered_map key_to_pid; + key_to_pid.reserve(static_cast(n) * 2U); + + std::vector patterns; + std::vector node_pid(n, -1); + + for (int v = 0; v < n; ++v) { + PatternKey key = support_key_of_node(in, v); + + auto it = key_to_pid.find(key); + int pid = -1; + + if (it == key_to_pid.end()) { + pid = static_cast(patterns.size()); + key_to_pid.emplace(key, pid); + + PatternInfo info; + info.key = std::move(key); + patterns.push_back(std::move(info)); + } else { + pid = it->second; + } + + node_pid[v] = pid; + patterns[pid].nodes.push_back(v); + } + + if (patterns.empty()) return in; + + // ------------------------------------------------------------ + // 2. Estimate pattern structural confidence on the full graph. + // ------------------------------------------------------------ + for (int u = 0; u < full_csr.n && u < n; ++u) { + const int pu = node_pid[u]; + if (pu < 0) continue; + + const int s0 = full_csr.indptr[u]; + const int s1 = full_csr.indptr[u + 1]; + + for (int p = s0; p < s1; ++p) { + const int v = full_csr.indices[p]; + if (v <= u || v < 0 || v >= n) continue; // undirected once + + const int pv = node_pid[v]; + if (pv < 0) continue; + + if (pu == pv) { + patterns[pu].internal_edges += 1; + } else { + patterns[pu].external_edges += 1; + patterns[pv].external_edges += 1; + patterns[pu].ext_counts[pv] += 1; + patterns[pv].ext_counts[pu] += 1; + } + } + } + + for (auto& p : patterns) { + const int incident_units = 2 * p.internal_edges + p.external_edges; + if (incident_units > 0) { + p.self_fraction = static_cast(2 * p.internal_edges) / + static_cast(incident_units); + } + + p.neighbor_entropy = normalized_entropy_counts(p.ext_counts); + p.confidence = compute_pattern_confidence(p, cfg); + } + + // ------------------------------------------------------------ + // 3. Build anchor-space pattern targets by averaging propagated memberships. + // ------------------------------------------------------------ + for (auto& p : patterns) { + for (int v : p.nodes) { + for (int j = 0; j < r; ++j) { + const int cid = idx_at(in, v, j); + const float pv = val_at(in, v, j); + if (cid >= 0 && pv > 0.0f) p.target[cid] += pv; + } + } + + normalize_distribution(p.target); + sharpen_distribution(p.target, cfg.target_sharpen_gamma); + } + + // ------------------------------------------------------------ + // 4. Anchor-preserving soft decoding. + // + // Pattern is only a calibration unit. The output remains in the original + // anchor community space: + // q_v = mix * q_pattern + (1 - mix) * q_neighbor + // lambda_v = update_strength * confidence(P_v) * same_pattern_ratio(v)^gamma + // alpha_new = (1 - lambda_v) * alpha_old + lambda_v * q_v + // ------------------------------------------------------------ + TopRState out = in; + out.k = in.k; + + const CSR& local_csr = (backbone_csr.n == n ? backbone_csr : full_csr); + const float mix = clamp01(cfg.pattern_target_mix); + const float update_strength = clamp01(cfg.update_strength); + const float mode_power = std::max(0.0f, cfg.node_mode_power); + + for (int v = 0; v < n; ++v) { + const int pid = node_pid[v]; + if (pid < 0 || pid >= static_cast(patterns.size())) continue; + + const PatternInfo& p = patterns[pid]; + if (p.target.empty()) continue; + + std::unordered_map q; + + for (const auto& kv : p.target) { + if (kv.first >= 0 && kv.second > 0.0f) q[kv.first] += mix * kv.second; + } + + std::unordered_map q_neighbor = + node_neighbor_anchor_consensus(in, local_csr, v); + + for (const auto& kv : q_neighbor) { + if (kv.first >= 0 && kv.second > 0.0f) { + q[kv.first] += (1.0f - mix) * kv.second; + } + } + + normalize_distribution(q); + if (q.empty()) continue; + + const float same_ratio = same_pattern_neighbor_ratio(local_csr, node_pid, v); + const float node_factor = std::pow(clamp01(same_ratio), mode_power); + const float lambda_v = clamp01(update_strength * clamp01(p.confidence) * node_factor); + + if (lambda_v <= 1e-7f) continue; + + std::unordered_map blended; + + for (int j = 0; j < r; ++j) { + const int cid = idx_at(in, v, j); + const float pv = val_at(in, v, j); + if (cid >= 0 && pv > 0.0f) { + blended[cid] += (1.0f - lambda_v) * pv; + } + } + + for (const auto& kv : q) { + if (kv.first >= 0 && kv.second > 0.0f) { + blended[kv.first] += lambda_v * kv.second; + } + } + + normalize_distribution(blended); + sharpen_distribution(blended, cfg.target_sharpen_gamma); + + write_topr_from_distribution( + out, + v, + blended, + cfg.min_abs_mass_to_keep, + cfg.renormalize); + } + + return out; +} + +} // namespace highway diff --git a/cdlib/algorithms/overlapping_partition.py b/cdlib/algorithms/overlapping_partition.py index 8a1be26..b8736b0 100644 --- a/cdlib/algorithms/overlapping_partition.py +++ b/cdlib/algorithms/overlapping_partition.py @@ -31,6 +31,7 @@ endntm_find_overlap_cluster, endntm_evalFuction, ) +from cdlib.algorithms.internal.Highway import highway_nx from cdlib.prompt_utils import report_missing_packages import warnings @@ -116,6 +117,7 @@ "coach", "graph_entropy", "ebgc", + "highway", ] @@ -2100,3 +2102,154 @@ def ebgc( return NodeClustering( clustering, g_original, "ebgc", method_parameters={}, overlap=True ) + + +def highway( + g_original: object, + highway_top_r: int = 3, + mod_jaccard_alpha: float = 0.70, + ensure_min1_per_node: bool = True, + symmetrize: bool = True, + max_anchors: int = None, + prop_top_r: int = 3, + prop_T: int = 10, + prop_damping: float = 0.90, + prop_eta_leak: float = 0.0, + prop_tau: float = 0.85, + enable_pattern_refinement: bool = True, + local_confidence_self_fraction_weight: float = 0.85, + local_confidence_low_entropy_weight: float = 0.15, + local_pattern_confidence_floor: float = 0.05, + local_pattern_confidence_ceiling: float = 1.00, + local_update_strength: float = 0.50, + local_node_mode_power: float = 1.50, + local_pattern_target_mix: float = 0.75, + local_target_sharpen_gamma: float = 1.20, + local_min_abs_mass_to_keep: float = 1e-8, + local_renormalize: bool = True, + decode_theta: float = 0.30, + max_memberships: int = 3, + min_community_size: int = 1, + deduplicate_communities: bool = True, +) -> NodeClustering: + """ + Highway is an overlapping community detection algorithm based on sparse + structurally informative backbones and anchor-membership propagation. + + The algorithm first builds a sparse backbone that keeps structurally + informative edges, then selects representative anchor nodes, propagates + anchor-indexed memberships over the backbone, and decodes the resulting + memberships into overlapping communities. + + **Supported Graph Types** + + ========== ======== ======== + Undirected Directed Weighted + ========== ======== ======== + Yes No No + ========== ======== ======== + + :param g_original: a networkx/igraph object + :param highway_top_r: number of retained neighbors per node in the sparse backbone + :param mod_jaccard_alpha: mixing weight between modularity-based and Jaccard-based edge scores + :param ensure_min1_per_node: whether to keep at least one edge for each non-isolated node + :param symmetrize: whether to symmetrize the sparse backbone + :param max_anchors: maximum number of selected anchors + :param prop_top_r: number of retained anchor memberships per node + :param prop_T: number of propagation iterations + :param prop_damping: damping factor used in anchor-membership propagation + :param prop_eta_leak: optional leakage weight from the full graph + :param prop_tau: softmax temperature for propagation + :param enable_pattern_refinement: whether to enable anchor-preserving pattern decoding + :param local_confidence_self_fraction_weight: self-fraction weight in pattern confidence + :param local_confidence_low_entropy_weight: low-entropy weight in pattern confidence + :param local_pattern_confidence_floor: minimum pattern confidence + :param local_pattern_confidence_ceiling: maximum pattern confidence + :param local_update_strength: local decoding update strength + :param local_node_mode_power: local mode exponent + :param local_pattern_target_mix: pattern/local target mixing parameter + :param local_target_sharpen_gamma: target sharpening exponent + :param local_min_abs_mass_to_keep: minimum membership mass to keep + :param local_renormalize: whether to renormalize local refined memberships + :param decode_theta: threshold for decoding node memberships + :param max_memberships: maximum number of memberships retained per node + :param min_community_size: minimum size of returned communities + :param deduplicate_communities: whether to remove exact duplicate communities before returning + :return: NodeClustering object + + :Example: + + >>> from cdlib import algorithms + >>> import networkx as nx + >>> G = nx.karate_club_graph() + >>> coms = algorithms.highway(G) + + To preserve exact duplicate communities from the algorithm output: + + >>> coms = algorithms.highway(G, deduplicate_communities=False) + """ + + g = convert_graph_formats(g_original, nx.Graph) + + coms = highway_nx( + G=g, + highway_top_r=highway_top_r, + mod_jaccard_alpha=mod_jaccard_alpha, + ensure_min1_per_node=ensure_min1_per_node, + symmetrize=symmetrize, + max_anchors=max_anchors, + prop_top_r=prop_top_r, + prop_T=prop_T, + prop_damping=prop_damping, + prop_eta_leak=prop_eta_leak, + prop_tau=prop_tau, + enable_pattern_refinement=enable_pattern_refinement, + local_confidence_self_fraction_weight=local_confidence_self_fraction_weight, + local_confidence_low_entropy_weight=local_confidence_low_entropy_weight, + local_pattern_confidence_floor=local_pattern_confidence_floor, + local_pattern_confidence_ceiling=local_pattern_confidence_ceiling, + local_update_strength=local_update_strength, + local_node_mode_power=local_node_mode_power, + local_pattern_target_mix=local_pattern_target_mix, + local_target_sharpen_gamma=local_target_sharpen_gamma, + local_min_abs_mass_to_keep=local_min_abs_mass_to_keep, + local_renormalize=local_renormalize, + decode_theta=decode_theta, + max_memberships=max_memberships, + min_community_size=min_community_size, + deduplicate_communities=deduplicate_communities, + ) + + return NodeClustering( + coms, + g_original, + "Highway", + method_parameters={ + "highway_top_r": highway_top_r, + "mod_jaccard_alpha": mod_jaccard_alpha, + "ensure_min1_per_node": ensure_min1_per_node, + "symmetrize": symmetrize, + "max_anchors": max_anchors, + "prop_top_r": prop_top_r, + "prop_T": prop_T, + "prop_damping": prop_damping, + "prop_eta_leak": prop_eta_leak, + "prop_tau": prop_tau, + "enable_pattern_refinement": enable_pattern_refinement, + "local_confidence_self_fraction_weight": local_confidence_self_fraction_weight, + "local_confidence_low_entropy_weight": local_confidence_low_entropy_weight, + "local_pattern_confidence_floor": local_pattern_confidence_floor, + "local_pattern_confidence_ceiling": local_pattern_confidence_ceiling, + "local_update_strength": local_update_strength, + "local_node_mode_power": local_node_mode_power, + "local_pattern_target_mix": local_pattern_target_mix, + "local_target_sharpen_gamma": local_target_sharpen_gamma, + "local_min_abs_mass_to_keep": local_min_abs_mass_to_keep, + "local_renormalize": local_renormalize, + "decode_theta": decode_theta, + "max_memberships": max_memberships, + "min_community_size": min_community_size, + "deduplicate_communities": deduplicate_communities, + }, + overlap=True, + ) \ No newline at end of file diff --git a/cdlib/test/test_community_discovery_models.py b/cdlib/test/test_community_discovery_models.py index f4c1411..2328011 100644 --- a/cdlib/test/test_community_discovery_models.py +++ b/cdlib/test/test_community_discovery_models.py @@ -1063,3 +1063,26 @@ def test_bayan(self): if len(coms.communities) > 0: self.assertEqual(type(coms.communities[0]), list) self.assertEqual(type(coms.communities[0][0]), str) + + def test_highway(self): + g = get_string_graph() + coms = algorithms.highway(g) + + self.assertEqual(type(coms.communities), list) + self.assertTrue(coms.overlap) + + if len(coms.communities) > 0: + self.assertEqual(type(coms.communities[0]), list) + if len(coms.communities[0]) > 0: + self.assertEqual(type(coms.communities[0][0]), str) + + g = nx.karate_club_graph() + coms = algorithms.highway(g) + + self.assertEqual(type(coms.communities), list) + self.assertTrue(coms.overlap) + + if len(coms.communities) > 0: + self.assertEqual(type(coms.communities[0]), list) + if len(coms.communities[0]) > 0: + self.assertEqual(type(coms.communities[0][0]), int) \ No newline at end of file diff --git a/docs/reference/cd_algorithms/node_clustering.rst b/docs/reference/cd_algorithms/node_clustering.rst index 19eb41e..66c8a43 100644 --- a/docs/reference/cd_algorithms/node_clustering.rst +++ b/docs/reference/cd_algorithms/node_clustering.rst @@ -103,6 +103,7 @@ As a result, methods in this subclass return a ``NodeClustering`` object instanc slpa walkscan wCommunity + highway ^^^^^^^^^^^^^^^^^