Allow NGramFeatures to be named in order to avoid conflicts when using more than one set of them

author: armatthews <armatthe@cmu.edu> 2014-02-20 22:21:21 -0500
committer: armatthews <armatthe@cmu.edu> 2014-02-20 22:21:21 -0500
commit: dcfc7a9797891074d4dd67f2066d28c10b344f61 (patch)
tree: 1028733853b052a8aed60f2e704a5c959ae2b4b8 /decoder
parent: 2b772ed8c1dcfecbb473f63cb0ef65b1dfb574dd (diff)
2 files changed, 13 insertions, 5 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index e02c7730..7b49fcfa 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -408,7 +408,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("max_translation_sample,X", po::value<int>(), "Sample the max translation from the chart")
         ("pb_max_distortion,D", po::value<int>()->default_value(4), "Phrase-based decoder: maximum distortion")
         ("cll_gradient,G","Compute conditional log-likelihood gradient and write to STDOUT (src & ref required)")
-        ("get_oracle_forest,o", "Calculate rescored hypregraph using approximate BLEU scoring of rules")
+        ("get_oracle_forest,o", "Calculate rescored hypergraph using approximate BLEU scoring of rules")
         ("feature_expectations","Write feature expectations for all features in chart (**OBJ** will be the partition)")
         ("vector_format",po::value<string>()->default_value("b64"), "Sparse vector serialization format for feature expectations or gradients, includes (text or b64)")
         ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)")
diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc
index d337b28b..0bc14e5a 100644
--- a/decoder/ff_ngrams.cc
+++ b/decoder/ff_ngrams.cc
@@ -60,8 +60,9 @@ namespace {
   }
 }
 
-static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator, string* cluster_file) {
+static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator, string* cluster_file, string* featname) {
   vector<string> const& argv=SplitOnWhitespace(in);
+  *featname = "";
   *explicit_markers = false;
   *order = 3;
   prefixes.push_back("NOT-USED");
@@ -83,6 +84,9 @@ static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order,
       case 'x':
         *explicit_markers = true;
         break;
+      case 'n':
+        LMSPEC_NEXTARG; *featname=*i;
+        break;
       case 'U':
 	LMSPEC_NEXTARG;
 	prefixes[1] = *i;
@@ -226,6 +230,7 @@ class NgramDetectorImpl {
       ++n;
       if (!fid) {
         ostringstream os;
+        os << featname_;
         os << prefixes_[n];
         for (int i = n-1; i >= 0; --i) {
           os << (i != n-1 ? target_separator_ : "");
@@ -404,7 +409,8 @@ class NgramDetectorImpl {
 
  public:
   explicit NgramDetectorImpl(bool explicit_markers, unsigned order,
-			     vector<string>& prefixes, string& target_separator, const string& clusters) :
+			     vector<string>& prefixes, string& target_separator, const string& clusters,
+                             const string& featname) :
       kCDEC_UNK(TD::Convert("<unk>")) ,
       add_sos_eos_(!explicit_markers) {
     order_ = order;
@@ -414,6 +420,7 @@ class NgramDetectorImpl {
     unscored_words_offset_ = is_complete_offset_ + 1;
     prefixes_ = prefixes;
     target_separator_ = target_separator;
+    featname_ = featname;
 
     // special handling of beginning / ending sentence markers
     dummy_state_ = new char[state_size_];
@@ -454,6 +461,7 @@ class NgramDetectorImpl {
   TRulePtr dummy_rule_;
   vector<string> prefixes_;
   string target_separator_;
+  string featname_;
   struct FidTree {
     map<WordID, int> fids;
     map<WordID, FidTree> levels;
@@ -467,9 +475,9 @@ NgramDetector::NgramDetector(const string& param) {
   bool explicit_markers = false;
   unsigned order = 3;
   string clusters;
-  ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters);
+  ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters, &featname);
   pimpl_ = new NgramDetectorImpl(explicit_markers, order, prefixes, 
-				 target_separator, clusters);
+				 target_separator, clusters, featname);
   SetStateSize(pimpl_->ReserveStateSize());
 }
author	armatthews <armatthe@cmu.edu>	2014-02-20 22:21:21 -0500
committer	armatthews <armatthe@cmu.edu>	2014-02-20 22:21:21 -0500
commit	dcfc7a9797891074d4dd67f2066d28c10b344f61 (patch)
tree	1028733853b052a8aed60f2e704a5c959ae2b4b8 /decoder
parent	2b772ed8c1dcfecbb473f63cb0ef65b1dfb574dd (diff)