blob: 83d457b8fe34a690e00461449fdf2104c4b6b779 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
#ifndef CSPLIT_H_
#define CSPLIT_H_
#include "translator.h"
#include "lattice.h"
// this "translator" takes single words (with NO SPACES) and segments
// them using the approach described in:
//
// C. Dyer. (2009) Using a maximum entropy model to build segmentation
// lattices for MT. In Proceedings of NAACL HLT 2009.
// note, an extra word space marker # is inserted at the left edge of
// the forest!
struct CompoundSplitImpl;
struct CompoundSplit : public Translator {
CompoundSplit(const boost::program_options::variables_map& conf);
bool TranslateImpl(const std::string& input,
SentenceMetadata* smeta,
const std::vector<double>& weights,
Hypergraph* forest);
// given a forest generated by CompoundSplit::Translate,
// find the edge representing the unsegmented form
static int GetFullWordEdgeIndex(const Hypergraph& forest);
private:
boost::shared_ptr<CompoundSplitImpl> pimpl_;
};
#endif
|