summaryrefslogtreecommitdiff
path: root/decoder/tree_fragment.h
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-03-12 02:30:26 -0400
committerChris Dyer <redpony@gmail.com>2014-03-12 02:30:26 -0400
commit5acfb5acb4048560df75c21f2974e1e6f18e61e7 (patch)
tree8a87ac9773ab0b1b681c167eb2ff72d66d10e44a /decoder/tree_fragment.h
parentde8ffd4598d6c1e45273b50642870a661b4bcad4 (diff)
add support for internal tree structure on SCFG rules
Diffstat (limited to 'decoder/tree_fragment.h')
-rw-r--r--decoder/tree_fragment.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h
new file mode 100644
index 00000000..83cd1c1e
--- /dev/null
+++ b/decoder/tree_fragment.h
@@ -0,0 +1,59 @@
+#ifndef TREE_FRAGMENT
+#define TREE_FRAGMENT
+
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include "tdict.h"
+
+namespace cdec {
+
+static const unsigned NT_BIT = 0x40000000u;
+static const unsigned FRONTIER_BIT = 0x80000000u;
+static const unsigned ALL_MASK = 0x0FFFFFFFu;
+
+inline bool IsInternalNT(unsigned x) {
+ return (x & NT_BIT);
+}
+
+inline bool IsFrontier(unsigned x) {
+ return (x & FRONTIER_BIT);
+}
+
+struct TreeFragmentProduction {
+ TreeFragmentProduction() {}
+ TreeFragmentProduction(int nttype, const std::vector<unsigned>& r) : lhs(nttype), rhs(r) {}
+ unsigned lhs;
+ std::vector<unsigned> rhs;
+};
+
+// this data structure represents a tree or forest
+// productions can have mixtures of terminals and nonterminal symbols and non-terminal frontier sites
+class TreeFragment {
+ public:
+ TreeFragment() : frontier_sites(), terminals() {}
+ // (S (NP a (X b) c d) (VP (V foo) (NP (NN bar))))
+ explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false);
+ void DebugRec(unsigned cur, std::ostream* out) const;
+ private:
+ // cp is the character index in the tree
+ // np keeps track of the nodes (nonterminals) that have been built
+ // symp keeps track of the terminal symbols that have been built
+ void ParseRec(const std::string& tree, bool afs, unsigned cp, unsigned symp, unsigned np, unsigned* pcp, unsigned* psymp, unsigned* pnp);
+ public:
+ unsigned root;
+ unsigned char frontier_sites;
+ unsigned short terminals;
+
+ std::vector<TreeFragmentProduction> nodes;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) {
+ x.DebugRec(x.nodes.size() - 1, &os);
+ return os;
+}
+
+}
+
+#endif