initial checkin

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-06-22 05:12:27 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-06-22 05:12:27 +0000
commit: 7cc92b65a3185aa242088d830e166e495674efc9 (patch)
tree: 681fe5237612a4e96ce36fb9fabef00042c8ee61 /decoder/csplit.h
parent: 37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff)
1 files changed, 30 insertions, 0 deletions
diff --git a/decoder/csplit.h b/decoder/csplit.h
new file mode 100644
index 00000000..82ed23fc
--- /dev/null
+++ b/decoder/csplit.h
@@ -0,0 +1,30 @@
+#ifndef _CSPLIT_H_
+#define _CSPLIT_H_
+
+#include "translator.h"
+#include "lattice.h"
+
+// this "translator" takes single words (with NO SPACES) and segments
+// them using the approach described in:
+//
+// C. Dyer. (2009) Using a maximum entropy model to build segmentation
+//                 lattices for MT. In Proceedings of NAACL HLT 2009.
+// note, an extra word space marker # is inserted at the left edge of
+// the forest!
+struct CompoundSplitImpl;
+struct CompoundSplit : public Translator {
+  CompoundSplit(const boost::program_options::variables_map& conf);
+  bool TranslateImpl(const std::string& input,
+                 SentenceMetadata* smeta,
+                 const std::vector<double>& weights,
+                 Hypergraph* forest);
+
+  // given a forest generated by CompoundSplit::Translate,
+  // find the edge representing the unsegmented form
+  static int GetFullWordEdgeIndex(const Hypergraph& forest);
+
+ private:
+  boost::shared_ptr<CompoundSplitImpl> pimpl_;
+};
+
+#endif
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-06-22 05:12:27 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-06-22 05:12:27 +0000
commit	7cc92b65a3185aa242088d830e166e495674efc9 (patch)
tree	681fe5237612a4e96ce36fb9fabef00042c8ee61 /decoder/csplit.h
parent	37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff)