summaryrefslogtreecommitdiff
path: root/decoder/apply_fsa_models.cc
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-13 08:20:47 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-13 08:20:47 +0000
commitead8845217c5e6e48f3680ead6f859ec8e110eb2 (patch)
treea9bd115c8b2b95f933d76e8deed37678b2baa280 /decoder/apply_fsa_models.cc
parent84009c98d9a0a2e3ecd801ebb92ed47ee3f3328b (diff)
(NEEDS TESTING) cfg index rules->nts, sort by prob, remove duplicates keeping highest prob, topo sort (and after binarize topo sort). beginning to apply_fsa_models (PrefixTrieNode)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@539 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/apply_fsa_models.cc')
-rwxr-xr-xdecoder/apply_fsa_models.cc40
1 files changed, 40 insertions, 0 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 2854b28b..c9bda68b 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -13,6 +13,46 @@
using namespace std;
+//impl details (not exported). flat namespace for my ease.
+
+typedef CFG::BinRhs BinRhs;
+typedef CFG::NTs NTs;
+typedef CFG::NT NT;
+typedef CFG::NTHandle NTHandle;
+typedef CFG::Rules Rules;
+typedef CFG::Rule Rule;
+typedef CFG::RuleHandle RuleHandle;
+
+namespace {
+
+// if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course.
+template <class K,class V>
+struct prefix_map_type {
+ typedef std::map<K,V> type;
+};
+//template typedef
+#define PREFIX_MAP(k,v) prefix_map_type<k,v>::type
+typedef NTHandle LHS;
+struct PrefixTrieNode {
+ prob_t backward; // (viterbi) backward prob (for cost pushing)
+ typedef PREFIX_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS
+ Completed completed;
+ typedef PREFIX_MAP(WordID,PrefixTrieNode *) Adj;
+ Adj adj;
+ //TODO:
+};
+
+struct PrefixTrie {
+ CFG const* cfgp;
+ CFG const& cfg() const { return *cfgp; }
+ PrefixTrie(CFG const& cfg) : cfgp(&cfg) {
+//TODO:
+ }
+};
+
+}//anon ns
+
+
DEFINE_NAMED_ENUM(FSA_BY)
struct ApplyFsa {