diff options
-rw-r--r-- | decoder/cdec.cc | 1 | ||||
-rw-r--r-- | decoder/lexcrf.cc | 2 | ||||
-rw-r--r-- | training/atools.cc | 130 |
3 files changed, 95 insertions, 38 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index c6a0057f..d517e451 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -71,6 +71,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("show_expected_length", "Show the expected translation length under the model") ("show_partition,z", "Compute and show the partition (inside score)") ("beam_prune", po::value<double>(), "Prune paths from +LM forest") + ("lexcrf_use_null", "Support source-side null words in lexical translation") ("tagger_tagset,t", po::value<string>(), "(Tagger) file containing tag set") ("csplit_output_plf", "(Compound splitter) Output lattice in PLF format") ("csplit_preserve_full_word", "(Compound splitter) Always include the unsegmented form in the output lattice") diff --git a/decoder/lexcrf.cc b/decoder/lexcrf.cc index 816506e4..9f96de9f 100644 --- a/decoder/lexcrf.cc +++ b/decoder/lexcrf.cc @@ -12,7 +12,7 @@ using namespace std; struct LexicalCRFImpl { LexicalCRFImpl(const boost::program_options::variables_map& conf) : - use_null(false), + use_null(conf.count("lexcrf_use_null") > 0), kXCAT(TD::Convert("X")*-1), kNULL(TD::Convert("<eps>")), kBINARY(new TRule("[X] ||| [X,1] [X,2] ||| [1] [2]")), diff --git a/training/atools.cc b/training/atools.cc index a18250f7..cf238371 100644 --- a/training/atools.cc +++ b/training/atools.cc @@ -28,7 +28,7 @@ struct Command { x->resize(max(a.width(), b.width()), max(a.height(), b.width())); } bool Safe(const Array2D<bool>& a, int i, int j) const { - if (i < a.width() && j < a.height()) + if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) return a(i,j); else return false; @@ -40,9 +40,10 @@ struct Command { struct FMeasureCommand : public Command { FMeasureCommand() : matches(), num_predicted(), num_in_ref() {} int Result() const { return 2; } - string Name() const { return "f"; } + string Name() const { return "fmeasure"; } bool RequiresTwoOperands() const { return true; } void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { + (void) x; // AER just computes statistics, not an alignment int i_len = ref.width(); int j_len = ref.height(); for (int i = 0; i < i_len; ++i) { @@ -126,54 +127,85 @@ struct RefineCommand : public Command { neighbors_.push_back(make_pair(0,-1)); } bool RequiresTwoOperands() const { return true; } + + void Align(int i, int j) { + res_(i, j) = true; + is_i_aligned_[i] = true; + is_j_aligned_[j] = true; + } + + bool IsNeighborAligned(int i, int j) const { + for (int k = 0; k < neighbors_.size(); ++k) { + const int di = neighbors_[k].first; + const int dj = neighbors_[k].second; + if (Safe(res_, i + di, j + dj)) + return true; + } + return false; + } + + bool IsNeitherAligned(int i, int j) const { + return !(is_i_aligned_[i] || is_j_aligned_[j]); + } + + bool IsOneOrBothUnaligned(int i, int j) const { + return !(is_i_aligned_[i] && is_j_aligned_[j]); + } + + bool KoehnAligned(int i, int j) const { + return IsOneOrBothUnaligned(i, j) && IsNeighborAligned(i, j); + } + + typedef bool (RefineCommand::*Predicate)(int i, int j) const; + protected: void InitRefine( const Array2D<bool>& a, - const Array2D<bool>& b, - Array2D<bool>* x) { - EnsureSize(a, b, x); + const Array2D<bool>& b) { + res_.clear(); + EnsureSize(a, b, &res_); in_.clear(); un_.clear(); is_i_aligned_.clear(); is_j_aligned_.clear(); EnsureSize(a, b, &in_); EnsureSize(a, b, &un_); - is_i_aligned_.resize(x->width(), false); - is_j_aligned_.resize(x->height(), false); + is_i_aligned_.resize(res_.width(), false); + is_j_aligned_.resize(res_.height(), false); for (int i = 0; i < in_.width(); ++i) for (int j = 0; j < in_.height(); ++j) { un_(i, j) = Safe(a, i, j) || Safe(b, i, j); in_(i, j) = Safe(a, i, j) && Safe(b, i, j); + if (in_(i, j)) Align(i, j); } } - // "grow" the intersection alignment with neighboring points - // from the union alignment - void Grow(Array2D<bool>* x) { - Array2D<bool>& res = *x; - queue<pair<int, int> > q; - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) - if (in_(i, j)) { - Align(i, j, x); - q.push(make_pair(i, j)); + // "grow" the resulting alignment using the points in adds + // if they match the constraints determined by pred + void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { + if (idempotent) { + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) { + if (adds(i, j) && !res_(i, j) && + (this->*pred)(i, j)) Align(i, j); } - while(!q.empty()) { - const pair<int,int> point = q.front(); - q.pop(); - for (int k = 0; k < neighbors_.size(); ++k) { - const int test_i = neighbors_[k].first + point.first; - const int test_j = neighbors_[k].second + point.second; - if (Safe(un_, test_i, test_j) && !res(test_i, test_j)) { - Align(test_i, test_j, x); - q.push(make_pair(test_i, test_j)); + return; + } + set<pair<int, int> > p; + for (int i = 0; i < adds.width(); ++i) + for (int j = 0; j < adds.height(); ++j) + if (adds(i, j) && !res_(i, j)) + p.insert(make_pair(i, j)); + bool keep_going = !p.empty(); + while (keep_going) { + keep_going = false; + for (set<pair<int, int> >::iterator pi = p.begin(); + pi != p.end(); ++pi) { + if ((this->*pred)(pi->first, pi->second)) { + Align(pi->first, pi->second); + p.erase(pi); + keep_going = true; } } } } - void Final(bool do_and, Array2D<bool>* x) { - } - void Align(int i, int j, Array2D<bool>* x) { - (*x)(i, j) = true; - is_i_aligned_[i] = true; - is_j_aligned_[j] = true; - } + Array2D<bool> res_; // refined alignment Array2D<bool> in_; // intersection alignment Array2D<bool> un_; // union alignment vector<bool> is_i_aligned_; @@ -190,12 +222,34 @@ struct DiagCommand : public RefineCommand { } }; +struct GDCommand : public DiagCommand { + string Name() const { return "grow-diag"; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + *x = res_; + } +}; + struct GDFCommand : public DiagCommand { - string Name() const { return "gdf"; } + string Name() const { return "grow-diag-final"; } + void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, a); + Grow(&RefineCommand::IsOneOrBothUnaligned, true, b); + *x = res_; + } +}; + +struct GDFACommand : public DiagCommand { + string Name() const { return "grow-diag-final-and"; } void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { - InitRefine(a, b, x); - Grow(x); - Final(false, x); + InitRefine(a, b); + Grow(&RefineCommand::KoehnAligned, false, un_); + Grow(&RefineCommand::IsNeitherAligned, true, a); + Grow(&RefineCommand::IsNeitherAligned, true, b); + *x = res_; } }; @@ -258,7 +312,9 @@ int main(int argc, char **argv) { AddCommand<InvertCommand>(); AddCommand<IntersectCommand>(); AddCommand<UnionCommand>(); + AddCommand<GDCommand>(); AddCommand<GDFCommand>(); + AddCommand<GDFACommand>(); AddCommand<FMeasureCommand>(); po::variables_map conf; InitCommandLine(argc, argv, &conf); |