added non-pruning intersection and a CRF tagger

- the linear-chain tagger is more of a proof of concept than a real tagger-- the context-free assumptions made in a number of places mean that the algorithms used may not be as efficient as they could be, but the model is as powerful as any CRF - it would be easy to add latent variables or semi-CRF support (or both!) - i've added a couple basic features that are often used for POS tagging - non-pruning intersection is useful for lexical word alignment models and the tagger - a sample POS tagger model will be committed later
author: Chris Dyer <redpony@gmail.com> 2009-12-17 13:57:54 -0500
committer: Chris Dyer <redpony@gmail.com> 2009-12-17 13:57:54 -0500
commit: bba4ff830c8722cdcaf29e36c1ff5821a912ae5d (patch)
tree: 268f2f8118aca09b3cc40dca8b2be7de8295acd5 /decoder/cdec_ff.cc
parent: 04ae1beeaeceb0161a64d33112f21956f9741bde (diff)
1 files changed, 3 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 0a4f3d5e..bb2c9d34 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -4,6 +4,7 @@
 #include "ff_lm.h"
 #include "ff_csplit.h"
 #include "ff_wordalign.h"
+#include "ff_tagger.h"
 #include "ff_factory.h"
 
 boost::shared_ptr<FFRegistry> global_ff_registry;
@@ -18,5 +19,7 @@ void register_feature_functions() {
   global_ff_registry->Register("AlignerResults", new FFFactory<AlignerResults>);
   global_ff_registry->Register("CSplit_BasicFeatures", new FFFactory<BasicCSplitFeatures>);
   global_ff_registry->Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>);
+  global_ff_registry->Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>);
+  global_ff_registry->Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>);
 };
author	Chris Dyer <redpony@gmail.com>	2009-12-17 13:57:54 -0500
committer	Chris Dyer <redpony@gmail.com>	2009-12-17 13:57:54 -0500
commit	bba4ff830c8722cdcaf29e36c1ff5821a912ae5d (patch)
tree	268f2f8118aca09b3cc40dca8b2be7de8295acd5 /decoder/cdec_ff.cc
parent	04ae1beeaeceb0161a64d33112f21956f9741bde (diff)