diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-11 02:46:13 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-11 02:46:13 +0000 |
commit | b22b4fb953987d6a19716af8c3f8af73826bcfca (patch) | |
tree | 6db7e6872417c34127db03181ed62b63841e6c8e /decoder/cfg.cc | |
parent | 1db7d5bdc95db9515e3c3ce41cefd4e98fc69298 (diff) |
merge
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@511 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/cfg.cc')
-rwxr-xr-x | decoder/cfg.cc | 44 |
1 files changed, 43 insertions, 1 deletions
diff --git a/decoder/cfg.cc b/decoder/cfg.cc index f899765e..74e23cb6 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -2,9 +2,20 @@ #include "hg.h" #include "cfg_format.h" #include "cfg_binarize.h" +#include "hash.h" +#include "batched_append.h" +#include <limits> +#include "fast_lexical_cast.hpp" using namespace std; +namespace { +BinRhs nullrhs(std::numeric_limits<int>::min(),std::numeric_limits<int>::min()); +} +WordID CFG::BinName(BinRhs const& b); +{ + return TD::Convert(lexical_cast<string>(b.first)+"+"+lexical_cast<string>(b.second)); +} void CFG::Binarize(CFGBinarize const& b) { if (!b.Binarizing()) return; @@ -14,7 +25,38 @@ void CFG::Binarize(CFGBinarize const& b) { } // l2r only so far: cerr << "Binarizing "<<b<<endl; - //TODO. + HASH_MAP<BinRhs,NTHandle> bin2lhs; // we're going to hash cons rather than build an explicit trie from right to left. + HASH_MAP_EMPTY(bin2lhs,nullrhs); + int rhsmin=b.bin_unary?0:1; + // iterate using indices and not iterators because we'll be adding to both nodes and edge list. we could instead pessimistically reserve space for both, but this is simpler. also: store original end of nts since we won't need to reprocess newly added ones. + NTs new_nts; // these will be appended at the end, so we don't have to worry about iterator invalidation + Rules new_rules; + //TODO: this could be factored easily into in-place (append to new_* like below) and functional (nondestructive copy) versions (copy orig to target and append to target) + int newnt=nts.size(); + int newruleid=rules.size(); + BinRhs bin; + for (NTs::const_iterator n=nts.begin(),nn=nts.end();n!=nn;++n) { + NT const& nt=*n; + for (Ruleids::iterator ir=n.ruleids.begin(),er=n.ruleids.end();ir!=er;++ir) { + RHS &rhs=ir->rhs; // we're going to binarize this while adding newly created rules to new_... + if (rhs.empty()) continue; + bin.second=rhs.back(); + for (int r=rhs.size()-2;r>=rhsmin;--r) { // pairs from right to left (normally we leave the last pair alone) + rhs.pop_back(); + bin.first=rhs[r]; + if (newnt==(bin.second=(get_default(bin2lhs,bin,newnt)))) { + new_nts.push_back(NT()); + new_nts.back().ruleids.push_back(newruleid); + new_rules.push_back(Rule(newnt,bin)); + if (b.bin_name_nts) + new_nts.back().from.nt=BinName(bin); + ++newnt;++newruleid; + } + } + } + } + batch_append_swap(nts,new_nts); + batch_append_swap(rules,new_rules); } namespace { |