From 6b4b4f19f44e051e5f62bcb1243c3d199d537cc6 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 23 Dec 2010 20:22:45 -0600 Subject: non-latin character detector --- decoder/ff_charset.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 decoder/ff_charset.h (limited to 'decoder/ff_charset.h') diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h new file mode 100644 index 00000000..b1ad537e --- /dev/null +++ b/decoder/ff_charset.h @@ -0,0 +1,26 @@ +#ifndef _FFCHARSET_H_ +#define _FFCHARSET_H_ + +#include +#include +#include "ff.h" +#include "hg.h" + +class SentenceMetadata; + +class NonLatinCount : public FeatureFunction { + public: + NonLatinCount(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + FeatureVector* features, + FeatureVector* estimated_features, + void* context) const; + private: + mutable std::map is_non_latin_; + const int fid_; +}; + +#endif -- cgit v1.2.3