From 5acfb5acb4048560df75c21f2974e1e6f18e61e7 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 12 Mar 2014 02:30:26 -0400 Subject: add support for internal tree structure on SCFG rules --- decoder/tree_fragment.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 decoder/tree_fragment.h (limited to 'decoder/tree_fragment.h') diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h new file mode 100644 index 00000000..83cd1c1e --- /dev/null +++ b/decoder/tree_fragment.h @@ -0,0 +1,59 @@ +#ifndef TREE_FRAGMENT +#define TREE_FRAGMENT + +#include +#include +#include + +#include "tdict.h" + +namespace cdec { + +static const unsigned NT_BIT = 0x40000000u; +static const unsigned FRONTIER_BIT = 0x80000000u; +static const unsigned ALL_MASK = 0x0FFFFFFFu; + +inline bool IsInternalNT(unsigned x) { + return (x & NT_BIT); +} + +inline bool IsFrontier(unsigned x) { + return (x & FRONTIER_BIT); +} + +struct TreeFragmentProduction { + TreeFragmentProduction() {} + TreeFragmentProduction(int nttype, const std::vector& r) : lhs(nttype), rhs(r) {} + unsigned lhs; + std::vector rhs; +}; + +// this data structure represents a tree or forest +// productions can have mixtures of terminals and nonterminal symbols and non-terminal frontier sites +class TreeFragment { + public: + TreeFragment() : frontier_sites(), terminals() {} + // (S (NP a (X b) c d) (VP (V foo) (NP (NN bar)))) + explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false); + void DebugRec(unsigned cur, std::ostream* out) const; + private: + // cp is the character index in the tree + // np keeps track of the nodes (nonterminals) that have been built + // symp keeps track of the terminal symbols that have been built + void ParseRec(const std::string& tree, bool afs, unsigned cp, unsigned symp, unsigned np, unsigned* pcp, unsigned* psymp, unsigned* pnp); + public: + unsigned root; + unsigned char frontier_sites; + unsigned short terminals; + + std::vector nodes; +}; + +inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) { + x.DebugRec(x.nodes.size() - 1, &os); + return os; +} + +} + +#endif -- cgit v1.2.3