diff options
author | Patrick Simianer <p@simianer.de> | 2014-03-16 17:48:48 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-03-16 17:48:48 +0100 |
commit | 62bd9a4bdcea606d6ff2031fa4b207ef20caac31 (patch) | |
tree | 5a97415cff8287398becc602a1ca16c937a43253 /decoder/tree_fragment.h | |
parent | 7112976f89f0082f7af48829dd5deee61a3e6d16 (diff) | |
parent | 6c04595f968f7a9c047c5941113752a7c7280b45 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder/tree_fragment.h')
-rw-r--r-- | decoder/tree_fragment.h | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h new file mode 100644 index 00000000..83cd1c1e --- /dev/null +++ b/decoder/tree_fragment.h @@ -0,0 +1,59 @@ +#ifndef TREE_FRAGMENT +#define TREE_FRAGMENT + +#include <iostream> +#include <vector> +#include <string> + +#include "tdict.h" + +namespace cdec { + +static const unsigned NT_BIT = 0x40000000u; +static const unsigned FRONTIER_BIT = 0x80000000u; +static const unsigned ALL_MASK = 0x0FFFFFFFu; + +inline bool IsInternalNT(unsigned x) { + return (x & NT_BIT); +} + +inline bool IsFrontier(unsigned x) { + return (x & FRONTIER_BIT); +} + +struct TreeFragmentProduction { + TreeFragmentProduction() {} + TreeFragmentProduction(int nttype, const std::vector<unsigned>& r) : lhs(nttype), rhs(r) {} + unsigned lhs; + std::vector<unsigned> rhs; +}; + +// this data structure represents a tree or forest +// productions can have mixtures of terminals and nonterminal symbols and non-terminal frontier sites +class TreeFragment { + public: + TreeFragment() : frontier_sites(), terminals() {} + // (S (NP a (X b) c d) (VP (V foo) (NP (NN bar)))) + explicit TreeFragment(const std::string& tree, bool allow_frontier_sites = false); + void DebugRec(unsigned cur, std::ostream* out) const; + private: + // cp is the character index in the tree + // np keeps track of the nodes (nonterminals) that have been built + // symp keeps track of the terminal symbols that have been built + void ParseRec(const std::string& tree, bool afs, unsigned cp, unsigned symp, unsigned np, unsigned* pcp, unsigned* psymp, unsigned* pnp); + public: + unsigned root; + unsigned char frontier_sites; + unsigned short terminals; + + std::vector<TreeFragmentProduction> nodes; +}; + +inline std::ostream& operator<<(std::ostream& os, const TreeFragment& x) { + x.DebugRec(x.nodes.size() - 1, &os); + return os; +} + +} + +#endif |