summaryrefslogtreecommitdiff
path: root/rs/src/hypergraph.rs
diff options
context:
space:
mode:
authorPatrick Simianer <patrick@lilt.com>2026-02-26 19:28:22 +0100
committerPatrick Simianer <patrick@lilt.com>2026-02-26 19:28:22 +0100
commit0abcdd7e4358cb902c320b008d3c04bde07b749e (patch)
treef26bd36cc16b792ef4acf5450ef9293b55179167 /rs/src/hypergraph.rs
parent4e62908a1757f83ff703399252ad50758c4eb237 (diff)
Add Rust implementation of SCFG decoder
Rust port of the Ruby prototype decoder with performance optimizations for real Hiero-style grammars: - Rule indexing by first terminal/NT symbol for fast lookup - Chart symbol interning (u16 IDs) instead of string hashing - Passive chart index by (symbol, left) for direct right-endpoint lookup - Items store rule index instead of cloned rule data Includes CKY+ parser, chart-to-hypergraph conversion, Viterbi decoding, derivation extraction, and JSON hypergraph I/O. Self-filling step in parse uses grammar lookup (not just remaining active items) to handle rules that were consumed during the parse loop or skipped by the has_any_at optimization. Produces identical output to the Ruby prototype on all test examples. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'rs/src/hypergraph.rs')
-rw-r--r--rs/src/hypergraph.rs115
1 files changed, 115 insertions, 0 deletions
diff --git a/rs/src/hypergraph.rs b/rs/src/hypergraph.rs
new file mode 100644
index 0000000..90069b0
--- /dev/null
+++ b/rs/src/hypergraph.rs
@@ -0,0 +1,115 @@
+use crate::grammar::Rule;
+use crate::sparse_vector::SparseVector;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct NodeId(pub usize);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct EdgeId(pub usize);
+
+#[derive(Debug, Clone)]
+pub struct Node {
+ pub id: i64,
+ pub symbol: String,
+ pub left: i32,
+ pub right: i32,
+ pub outgoing: Vec<EdgeId>,
+ pub incoming: Vec<EdgeId>,
+ pub score: f64,
+}
+
+impl Node {
+ pub fn new(id: i64, symbol: &str, left: i32, right: i32) -> Self {
+ Self {
+ id,
+ symbol: symbol.to_string(),
+ left,
+ right,
+ outgoing: Vec::new(),
+ incoming: Vec::new(),
+ score: 0.0,
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct Edge {
+ pub head: NodeId,
+ pub tails: Vec<NodeId>,
+ pub score: f64,
+ pub f: SparseVector,
+ pub mark: usize,
+ pub rule: Rule,
+}
+
+impl Edge {
+ pub fn arity(&self) -> usize {
+ self.tails.len()
+ }
+
+ pub fn marked(&self) -> bool {
+ self.arity() == self.mark
+ }
+}
+
+#[derive(Debug)]
+pub struct Hypergraph {
+ pub nodes: Vec<Node>,
+ pub edges: Vec<Edge>,
+ pub nodes_by_id: std::collections::HashMap<i64, NodeId>,
+}
+
+impl Hypergraph {
+ pub fn new() -> Self {
+ Self {
+ nodes: Vec::new(),
+ edges: Vec::new(),
+ nodes_by_id: std::collections::HashMap::new(),
+ }
+ }
+
+ pub fn add_node(&mut self, id: i64, symbol: &str, left: i32, right: i32) -> NodeId {
+ let nid = NodeId(self.nodes.len());
+ self.nodes.push(Node::new(id, symbol, left, right));
+ self.nodes_by_id.insert(id, nid);
+ nid
+ }
+
+ pub fn add_edge(
+ &mut self,
+ head: NodeId,
+ tails: Vec<NodeId>,
+ score: f64,
+ f: SparseVector,
+ rule: Rule,
+ ) -> EdgeId {
+ let eid = EdgeId(self.edges.len());
+ self.edges.push(Edge {
+ head,
+ tails: tails.clone(),
+ score,
+ f,
+ mark: 0,
+ rule,
+ });
+ self.nodes[head.0].incoming.push(eid);
+ for &t in &tails {
+ self.nodes[t.0].outgoing.push(eid);
+ }
+ eid
+ }
+
+ pub fn node(&self, nid: NodeId) -> &Node {
+ &self.nodes[nid.0]
+ }
+
+ pub fn edge(&self, eid: EdgeId) -> &Edge {
+ &self.edges[eid.0]
+ }
+
+ pub fn reset(&mut self) {
+ for e in &mut self.edges {
+ e.mark = 0;
+ }
+ }
+}