From fed4a267a202a9d60326861b0a71205503010d24 Mon Sep 17 00:00:00 2001 From: Waleed Ammar Date: Sat, 16 Jun 2012 05:35:49 -0400 Subject: enable regex-based feature templates --- decoder/ff_context.cc | 262 +++++++++++++++++++++++++++++++++++++------------- decoder/ff_context.h | 29 +++++- 2 files changed, 222 insertions(+), 69 deletions(-) diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc index 19f9a413..9de4d737 100644 --- a/decoder/ff_context.cc +++ b/decoder/ff_context.cc @@ -1,5 +1,6 @@ #include "ff_context.h" +#include #include #include #include @@ -11,24 +12,150 @@ #include "fdict.h" #include "verbose.h" -using namespace std; +RuleContextFeatures::RuleContextFeatures(const string& param) { + // cerr << "initializing RuleContextFeatures with parameters: " << param; + kSOS = TD::Convert(""); + kEOS = TD::Convert(""); + macro_regex = sregex::compile("%([xy])\\[(-[1-9][0-9]*|0|[1-9][1-9]*)]"); + ParseArgs(param); +} -namespace { - string Escape(const string& x) { - string y = x; - for (int i = 0; i < y.size(); ++i) { - if (y[i] == '=') y[i]='_'; - if (y[i] == ';') y[i]='_'; - } - return y; +string RuleContextFeatures::Escape(const string& x) const { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; } + return y; } -RuleContextFeatures::RuleContextFeatures(const std::string& param) { - kSOS = TD::Convert(""); - kEOS = TD::Convert(""); +// replace %x[relative_location] or %y[relative_location] with actual_token +// within feature_instance +void RuleContextFeatures::ReplaceMacroWithString( + string& feature_instance, bool token_vs_label, int relative_location, + const string& actual_token) const { + + stringstream macro; + if (token_vs_label) { + macro << "%x["; + } else { + macro << "%y["; + } + macro << relative_location << "]"; + int macro_index = feature_instance.find(macro.str()); + if (macro_index == string::npos) { + cerr << "Can't find macro " << macro << " in feature template " + << feature_instance; + abort(); + } + feature_instance.replace(macro_index, macro.str().size(), actual_token); +} + +void RuleContextFeatures::ReplaceTokenMacroWithString( + string& feature_instance, int relative_location, + const string& actual_token) const { + + ReplaceMacroWithString(feature_instance, true, relative_location, + actual_token); +} - // TODO param lets you pass in a string from the cdec.ini file +void RuleContextFeatures::ReplaceLabelMacroWithString( + string& feature_instance, int relative_location, + const string& actual_token) const { + + ReplaceMacroWithString(feature_instance, false, relative_location, + actual_token); +} + +void RuleContextFeatures::Error(const string& error_message) const { + cerr << "Error: " << error_message << "\n\n" + + << "RuleContextFeatures Usage: \n" + << " feature_function=RuleContextFeatures -t