From a3aa460b375b8d0c3db59c40fc7060fc5e634c14 Mon Sep 17 00:00:00 2001 From: armatthews Date: Thu, 20 Feb 2014 22:21:21 -0500 Subject: Allow NGramFeatures to be named in order to avoid conflicts when using more than one set of them --- decoder/ff_ngrams.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'decoder/ff_ngrams.cc') diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc index d337b28b..0bc14e5a 100644 --- a/decoder/ff_ngrams.cc +++ b/decoder/ff_ngrams.cc @@ -60,8 +60,9 @@ namespace { } } -static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector& prefixes, string& target_separator, string* cluster_file) { +static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector& prefixes, string& target_separator, string* cluster_file, string* featname) { vector const& argv=SplitOnWhitespace(in); + *featname = ""; *explicit_markers = false; *order = 3; prefixes.push_back("NOT-USED"); @@ -83,6 +84,9 @@ static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, case 'x': *explicit_markers = true; break; + case 'n': + LMSPEC_NEXTARG; *featname=*i; + break; case 'U': LMSPEC_NEXTARG; prefixes[1] = *i; @@ -226,6 +230,7 @@ class NgramDetectorImpl { ++n; if (!fid) { ostringstream os; + os << featname_; os << prefixes_[n]; for (int i = n-1; i >= 0; --i) { os << (i != n-1 ? target_separator_ : ""); @@ -404,7 +409,8 @@ class NgramDetectorImpl { public: explicit NgramDetectorImpl(bool explicit_markers, unsigned order, - vector& prefixes, string& target_separator, const string& clusters) : + vector& prefixes, string& target_separator, const string& clusters, + const string& featname) : kCDEC_UNK(TD::Convert("")) , add_sos_eos_(!explicit_markers) { order_ = order; @@ -414,6 +420,7 @@ class NgramDetectorImpl { unscored_words_offset_ = is_complete_offset_ + 1; prefixes_ = prefixes; target_separator_ = target_separator; + featname_ = featname; // special handling of beginning / ending sentence markers dummy_state_ = new char[state_size_]; @@ -454,6 +461,7 @@ class NgramDetectorImpl { TRulePtr dummy_rule_; vector prefixes_; string target_separator_; + string featname_; struct FidTree { map fids; map levels; @@ -467,9 +475,9 @@ NgramDetector::NgramDetector(const string& param) { bool explicit_markers = false; unsigned order = 3; string clusters; - ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters); + ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters, &featname); pimpl_ = new NgramDetectorImpl(explicit_markers, order, prefixes, - target_separator, clusters); + target_separator, clusters, featname); SetStateSize(pimpl_->ReserveStateSize()); } -- cgit v1.2.3 From d843587027d815f3a1c9b8dd5394f3fe04ac85fa Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 23 Feb 2014 17:32:59 -0500 Subject: ngrams fix for unigram models --- decoder/ff_ngrams.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'decoder/ff_ngrams.cc') diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc index 0bc14e5a..0a97cba5 100644 --- a/decoder/ff_ngrams.cc +++ b/decoder/ff_ngrams.cc @@ -36,7 +36,7 @@ struct State { } explicit State(const State& other, unsigned order, WordID extend) { char om1 = order - 1; - assert(om1 > 0); + if (!om1) { memset(state, 0, sizeof(state)); return; } for (char i = 1; i < om1; ++i) state[i - 1]= other.state[i]; state[om1 - 1] = extend; } @@ -152,7 +152,7 @@ usage: << "Example feature instantiation: \n" << " tri:a|b|c \n\n"; - return false; + abort(); } class NgramDetectorImpl { -- cgit v1.2.3