diff options
Diffstat (limited to 'extools')
| -rw-r--r-- | extools/featurize_grammar.cc | 60 | ||||
| -rw-r--r-- | extools/filter_grammar.cc | 8 | ||||
| -rw-r--r-- | extools/filter_score_grammar.cc | 56 | ||||
| -rw-r--r-- | extools/mr_stripe_rule_reduce.cc | 2 | 
4 files changed, 63 insertions, 63 deletions
| diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc index 2fc53ff9..0c3418eb 100644 --- a/extools/featurize_grammar.cc +++ b/extools/featurize_grammar.cc @@ -36,12 +36,12 @@ static const size_t MAX_LINE_LENGTH = 64000000;  // Data structures for indexing and counting rules  //typedef boost::tuple< WordID, vector<WordID>, vector<WordID> > RuleTuple;  struct RuleTuple { -  RuleTuple(const WordID& lhs, const vector<WordID>& s, const vector<WordID>& t)  +  RuleTuple(const WordID& lhs, const vector<WordID>& s, const vector<WordID>& t)    : m_lhs(lhs), m_source(s), m_target(t) {      hash_value();      m_dirty = false;    } -   +    size_t hash_value() const {  //    if (m_dirty) {        size_t hash = 0; @@ -99,17 +99,17 @@ struct FreqCount {    Counts counts;    int inc(const Key& r, int c=1) { -    pair<typename Counts::iterator,bool> itb  +    pair<typename Counts::iterator,bool> itb        = counts.insert(make_pair(r,c)); -    if (!itb.second)  -      itb.first->second += c;  +    if (!itb.second) +      itb.first->second += c;      return itb.first->second;    }    int inc_if_exists(const Key& r, int c=1) {      typename Counts::iterator it = counts.find(r); -    if (it != counts.end())  -      it->second += c;  +    if (it != counts.end()) +      it->second += c;      return it->second;    } @@ -284,9 +284,9 @@ struct LogRuleCount : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs; (void) src; (void) trg; -    //result->set_value(fid_, log(info.counts.value(kCFE))); -    result->set_value(fid_, log(info.counts.value(kCFE))); -    if (IsZero(info.counts.value(kCFE))) +    //result->set_value(fid_, log(info.counts.get(kCFE))); +    result->set_value(fid_, log(info.counts.get(kCFE))); +    if (IsZero(info.counts.get(kCFE)))        result->set_value(sfid_, 1);    }    const int fid_; @@ -300,14 +300,14 @@ struct RulePenalty : public FeatureExtractor {                                 const vector<WordID>& /*src*/,                                 const vector<WordID>& /*trg*/,                                 const RuleStatistics& /*info*/, -                               SparseVector<float>* result) const  +                               SparseVector<float>* result) const    { result->set_value(fid_, 1); }    const int fid_;  }; -// The negative log of the condition rule probs  -// ignoring the identities of the  non-terminals.  +// The negative log of the condition rule probs +// ignoring the identities of the  non-terminals.  // i.e. the prob Hiero would assign.  // Also extracts Labelled features.  struct XFeatures: public FeatureExtractor { @@ -335,7 +335,7 @@ struct XFeatures: public FeatureExtractor {                                       const RuleStatistics& info) {      RuleTuple r(-1, src, trg);      map_rule(r); -    const int count = info.counts.value(kCFE); +    const int count = info.counts.get(kCFE);      assert(count > 0);      rule_counts.inc_if_exists(r, count);      source_counts.inc_if_exists(r.source(), count); @@ -354,14 +354,14 @@ struct XFeatures: public FeatureExtractor {      const int t_c = target_counts(r.target());      assert(t_c > 0);      result->set_value(fid_xfe, log(t_c) - l_r_freq); -    result->set_value(fid_labelledfe, log(t_c) - log(info.counts.value(kCFE))); +    result->set_value(fid_labelledfe, log(t_c) - log(info.counts.get(kCFE)));  //    if (t_c == 1)  //      result->set_value(fid_xesingleton, 1.0);      const int s_c = source_counts(r.source());      assert(s_c > 0);      result->set_value(fid_xef, log(s_c) - l_r_freq); -    result->set_value(fid_labelledef, log(s_c) - log(info.counts.value(kCFE))); +    result->set_value(fid_labelledef, log(s_c) - log(info.counts.get(kCFE)));  //    if (s_c == 1)  //      result->set_value(fid_xfsingleton, 1.0);    } @@ -407,10 +407,10 @@ struct LabelledRuleConditionals: public FeatureExtractor {                                       const vector<WordID>& trg,                                       const RuleStatistics& info) {      RuleTuple r(lhs, src, trg); -    rule_counts.inc_if_exists(r, info.counts.value(kCFE)); -    source_counts.inc_if_exists(r.source(), info.counts.value(kCFE)); +    rule_counts.inc_if_exists(r, info.counts.get(kCFE)); +    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE)); -    target_counts.inc_if_exists(r.target(), info.counts.value(kCFE)); +    target_counts.inc_if_exists(r.target(), info.counts.get(kCFE));    }    virtual void ExtractFeatures(const WordID lhs, @@ -436,10 +436,10 @@ struct LHSProb: public FeatureExtractor {    virtual void ObserveUnfilteredRule(const WordID lhs,                                       const vector<WordID>& /*src*/,                                       const vector<WordID>& /*trg*/, -                                     const RuleStatistics& info) {  -    int count = info.counts.value(kCFE); +                                     const RuleStatistics& info) { +    int count = info.counts.get(kCFE);      total_count += count; -    lhs_counts.inc(lhs, count);  +    lhs_counts.inc(lhs, count);    }    virtual void ExtractFeatures(const WordID lhs, @@ -459,22 +459,22 @@ struct LHSProb: public FeatureExtractor {  // Proper rule generative probability: p( s,t | lhs)  struct GenerativeProb: public FeatureExtractor { -  GenerativeProb() :  +  GenerativeProb() :      fid_(FD::Convert("GenerativeProb")),      kCFE(FD::Convert("CFE")) {}    virtual void ObserveUnfilteredRule(const WordID lhs,                                       const vector<WordID>& /*src*/,                                       const vector<WordID>& /*trg*/, -                                     const RuleStatistics& info)  -  { lhs_counts.inc(lhs, info.counts.value(kCFE)); } +                                     const RuleStatistics& info) +  { lhs_counts.inc(lhs, info.counts.get(kCFE)); }    virtual void ExtractFeatures(const WordID lhs,                                 const vector<WordID>& /*src*/,                                 const vector<WordID>& /*trg*/,                                 const RuleStatistics& info,                                 SparseVector<float>* result) const { -    double log_prob = log(lhs_counts(lhs)) - log(info.counts.value(kCFE)); +    double log_prob = log(lhs_counts(lhs)) - log(info.counts.get(kCFE));      result->set_value(fid_, log_prob);    } @@ -502,8 +502,8 @@ struct LabellingShape: public FeatureExtractor {                                       const RuleStatistics& info) {      RuleTuple r(-1, src, trg);      map_rule(r); -    rule_counts.inc_if_exists(r, info.counts.value(kCFE)); -    source_counts.inc_if_exists(r.source(), info.counts.value(kCFE)); +    rule_counts.inc_if_exists(r, info.counts.get(kCFE)); +    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE));    }    virtual void ExtractFeatures(const WordID /*lhs*/, @@ -519,9 +519,9 @@ struct LabellingShape: public FeatureExtractor {    // Replace all terminals with generic -1    void map_rule(RuleTuple& r) const { -    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it)  +    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it)        if (*it <= 0) *it = -1; -    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it)  +    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it)        if (*it <= 0) *it = -1;    } diff --git a/extools/filter_grammar.cc b/extools/filter_grammar.cc index ca329de1..cafcc923 100644 --- a/extools/filter_grammar.cc +++ b/extools/filter_grammar.cc @@ -37,7 +37,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description clo("Command line options");    po::options_description dcmdline_options;    dcmdline_options.add(opts); -   +    po::store(parse_command_line(argc, argv, dcmdline_options), *conf);    po::notify(*conf); @@ -46,7 +46,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {      cerr << dcmdline_options << endl;      exit(1);    } -}    +}  struct SourceFilter {    // return true to keep the rule, otherwise false @@ -95,7 +95,7 @@ struct DumbSuffixTreeFilter : SourceFilter {  };  boost::shared_ptr<SourceFilter> filter; -multimap<float, ID2RuleStatistics::const_iterator> options;  +multimap<float, ID2RuleStatistics::const_iterator> options;  int kCOUNT;  int max_options; @@ -103,7 +103,7 @@ void cb(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rule    options.clear();    if (!filter || filter->Matches(src_rhs)) {      for (ID2RuleStatistics::const_iterator it = rules.begin(); it != rules.end(); ++it) { -      options.insert(make_pair(-it->second.counts.value(kCOUNT), it)); +      options.insert(make_pair(-it->second.counts.get(kCOUNT), it));      }      int ocount = 0;      cout << '[' << TD::Convert(-lhs) << ']' << " ||| "; diff --git a/extools/filter_score_grammar.cc b/extools/filter_score_grammar.cc index 24f5fd1c..d1a20b63 100644 --- a/extools/filter_score_grammar.cc +++ b/extools/filter_score_grammar.cc @@ -45,7 +45,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description clo("Command line options");    po::options_description dcmdline_options;    dcmdline_options.add(opts); -   +    po::store(parse_command_line(argc, argv, dcmdline_options), *conf);    po::notify(*conf); @@ -54,7 +54,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {      cerr << dcmdline_options << endl;      exit(1);    } -}    +}  namespace {    inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }    inline bool IsBracket(char c){return c == '[' || c == ']';} @@ -143,9 +143,9 @@ void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* coun  void LexTranslationTable::createTTable(const char* buf){    AnnotatedParallelSentence sent;    sent.ParseInputLine(buf); -       +    //iterate over the alignment to compute aligned words -   +    for(int i =0;i<sent.aligned.width();i++)      {        for (int j=0;j<sent.aligned.height();j++) @@ -162,7 +162,7 @@ void LexTranslationTable::createTTable(const char* buf){        if (DEBUG)  cerr << endl;      }    if (DEBUG) cerr << endl; -   +    const WordID NULL_ = TD::Convert("NULL");    //handle unaligned words - align them to null    for (int j =0; j < sent.e_len; j++) { @@ -171,7 +171,7 @@ void LexTranslationTable::createTTable(const char* buf){      ++total_foreign[NULL_];      ++total_english[sent.e[j]];    } -   +    for (int i =0; i < sent.f_len; i++) {      if (sent.f_aligned[i]) continue;      ++word_translation[pair<WordID,WordID> (sent.f[i], NULL_)]; @@ -261,8 +261,8 @@ struct LogRuleCount : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs_src; (void) trg; -    result->set_value(fid_, log(info.counts.value(kCFE))); -    if (IsZero(info.counts.value(kCFE))) +    result->set_value(fid_, log(info.counts.get(kCFE))); +    if (IsZero(info.counts.get(kCFE)))        result->set_value(sfid_, 1);    }    const int fid_; @@ -280,9 +280,9 @@ struct LogECount : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs_src; (void) trg; -    assert(info.counts.value(kCE) > 0); -    result->set_value(fid_, log(info.counts.value(kCE))); -    if (IsZero(info.counts.value(kCE))) +    assert(info.counts.get(kCE) > 0); +    result->set_value(fid_, log(info.counts.get(kCE))); +    if (IsZero(info.counts.get(kCE)))        result->set_value(sfid_, 1);    }    const int sfid_; @@ -300,9 +300,9 @@ struct LogFCount : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs_src; (void) trg; -    assert(info.counts.value(kCF) > 0); -    result->set_value(fid_, log(info.counts.value(kCF))); -    if (IsZero(info.counts.value(kCF))) +    assert(info.counts.get(kCF) > 0); +    result->set_value(fid_, log(info.counts.get(kCF))); +    if (IsZero(info.counts.get(kCF)))        result->set_value(sfid_, 1);    }    const int sfid_; @@ -319,8 +319,8 @@ struct EGivenFExtractor : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs_src; (void) trg; -    assert(info.counts.value(kCF) > 0.0f); -    result->set_value(fid_, safenlog(info.counts.value(kCFE) / info.counts.value(kCF))); +    assert(info.counts.get(kCF) > 0.0f); +    result->set_value(fid_, safenlog(info.counts.get(kCFE) / info.counts.get(kCF)));    }    const int fid_, kCF, kCFE;  }; @@ -334,8 +334,8 @@ struct FGivenEExtractor : public FeatureExtractor {                                 const RuleStatistics& info,                                 SparseVector<float>* result) const {      (void) lhs_src; (void) trg; -    assert(info.counts.value(kCE) > 0.0f); -    result->set_value(fid_, safenlog(info.counts.value(kCFE) / info.counts.value(kCE))); +    assert(info.counts.get(kCE) > 0.0f); +    result->set_value(fid_, safenlog(info.counts.get(kCFE) / info.counts.get(kCE)));    }    const int fid_, kCE, kCFE;  }; @@ -353,7 +353,7 @@ struct LexProbExtractor : public FeatureExtractor {      while(alignment) {        alignment.getline(buf, MAX_LINE_LENGTH);        if (buf[0] == 0) continue; -      table.createTTable(buf);               +      table.createTTable(buf);      }      delete[] buf;  #if 0 @@ -363,7 +363,7 @@ struct LexProbExtractor : public FeatureExtractor {        trans_table.open("lex_trans_table.out");        for(map < pair<WordID,WordID>,int >::iterator it = table.word_translation.begin(); it != table.word_translation.end(); ++it) {          trans_table <<  TD::Convert(trg.first) <<  "|||" << TD::Convert(trg.second) << "==" << it->second << "//" << table.total_foreign[trg.first] << "//" << table.total_english[trg.second] << endl; -      }  +      }        trans_table.close();      }  #endif @@ -393,7 +393,7 @@ struct LexProbExtractor : public FeatureExtractor {              if ( table.total_english[trg[ita->second]] !=0 )                e2f = (float) temp / table.total_english[trg[ita->second]];              if (DEBUG) printf (" %d %E %E\n", temp, f2e, e2f); -               +              //local counts to keep track of which things haven't been aligned, to later compute their null alignment              if (foreign_aligned.count(lhs_src[ita->first+2])) {                foreign_aligned[ lhs_src[ita->first+2] ].first++; @@ -401,7 +401,7 @@ struct LexProbExtractor : public FeatureExtractor {              } else {                foreign_aligned[ lhs_src[ita->first+2] ] = pair<int,float> (1,e2f);              } -   +              if (english_aligned.count( trg[ ita->second] )) {                 english_aligned[ trg[ ita->second] ].first++;                 english_aligned[ trg[ ita->second] ].second += f2e; @@ -416,8 +416,8 @@ struct LexProbExtractor : public FeatureExtractor {            //compute lexical weight P(F|E) and include unaligned foreign words             for(int i=0;i<lhs_src.size(); i++) {                 if (!table.total_foreign.count(lhs_src[i])) continue;      //if we dont have it in the translation table, we won't know its lexical weight -                -               if (foreign_aligned.count(lhs_src[i]))  + +               if (foreign_aligned.count(lhs_src[i]))                   {                     pair<int, float> temp_lex_prob = foreign_aligned[lhs_src[i]];                     final_lex_e2f *= temp_lex_prob.second / temp_lex_prob.first; @@ -427,14 +427,14 @@ struct LexProbExtractor : public FeatureExtractor {                     int temp_count = table.word_translation[pair<WordID,WordID> (lhs_src[i],NULL_)];                     float temp_e2f = (float) temp_count / table.total_english[NULL_];                     final_lex_e2f *= temp_e2f; -                 }                               +                 }               }             //compute P(E|F) unaligned english words             for(int j=0; j< trg.size(); j++) {                 if (!table.total_english.count(trg[j])) continue; -                +                 if (english_aligned.count(trg[j]))                   {                     pair<int, float> temp_lex_prob = english_aligned[trg[j]]; @@ -490,7 +490,7 @@ int main(int argc, char** argv){    int line = 0;    const int kLogRuleCount = FD::Convert("LogRuleCount"); -  multimap<float, string> options;  +  multimap<float, string> options;    while(!unscored_grammar.eof())      {        ++line; @@ -510,7 +510,7 @@ int main(int argc, char** argv){             os << TD::GetString(cur_key)                << ' ' << TD::GetString(it->first) << " ||| ";             feats.Write(false, &os); -           options.insert(make_pair(-feats.value(kLogRuleCount), os.str())); +           options.insert(make_pair(-feats.get(kLogRuleCount), os.str()));          }          int ocount = 0;          for (multimap<float,string>::iterator it = options.begin(); it != options.end(); ++it) { diff --git a/extools/mr_stripe_rule_reduce.cc b/extools/mr_stripe_rule_reduce.cc index 0be1834d..c9b2eb2a 100644 --- a/extools/mr_stripe_rule_reduce.cc +++ b/extools/mr_stripe_rule_reduce.cc @@ -82,7 +82,7 @@ void DoPhraseMarginals(const vector<WordID>& key, const bool bidir, ID2RuleStati    }    double tot = 0;    for (ID2RuleStatistics::iterator it = val->begin(); it != val->end(); ++it) -    tot += it->second.counts.value(kCFE); +    tot += it->second.counts.get(kCFE);    for (ID2RuleStatistics::iterator it = val->begin(); it != val->end(); ++it) {      it->second.counts.set_value(cur_marginal_id, tot); | 
