From ec7edcc7e398bdb040d810094b8416ad9f279d98 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 7 Dec 2009 01:50:49 -0500 Subject: minor changes to the way the phonotactic prob is calculated --- src/csplit.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/csplit.h') diff --git a/src/csplit.h b/src/csplit.h index 5911af77..54e5329d 100644 --- a/src/csplit.h +++ b/src/csplit.h @@ -4,6 +4,13 @@ #include "translator.h" #include "lattice.h" +// this "translator" takes single words (with NO SPACES) and segments +// them using the approach described in: +// +// C. Dyer. (2009) Using a maximum entropy model to build segmentation +// lattices for MT. In Proceedings of NAACL HLT 2009. +// note, an extra word space marker # is inserted at the left edge of +// the forest! struct CompoundSplitImpl; struct CompoundSplit : public Translator { CompoundSplit(const boost::program_options::variables_map& conf); -- cgit v1.2.3