diff options
| author | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 | 
| commit | 244971287003d079e46193b8a209c28955f90134 (patch) | |
| tree | 8beaae6b12b913acb213fc7f2415fd63886192f9 /training | |
| parent | 5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (diff) | |
| parent | b66e838ed52decc0be1eb5817b2a77c3840db2c5 (diff) | |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'training')
| -rw-r--r-- | training/dpmert/lo_test.cc | 2 | ||||
| -rw-r--r-- | training/mira/kbest_cut_mira.cc | 61 | ||||
| -rwxr-xr-x | training/mira/mira.py | 11 | ||||
| -rw-r--r-- | training/pro/mr_pro_map.cc | 26 | ||||
| -rw-r--r-- | training/utils/grammar_convert.cc | 21 | 
5 files changed, 71 insertions, 50 deletions
| diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc index d89bcd99..b8776169 100644 --- a/training/dpmert/lo_test.cc +++ b/training/dpmert/lo_test.cc @@ -56,7 +56,7 @@ BOOST_AUTO_TEST_CASE(TestConvexHull) {  }  BOOST_AUTO_TEST_CASE(TestConvexHullInside) { -  const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; +  const string json = "{\"rules\":[1,\"[X] ||| a ||| a\",2,\"[X] ||| A [X] ||| A [1]\",3,\"[X] ||| c ||| c\",4,\"[X] ||| C [X] ||| C [1]\",5,\"[X] ||| [X] B [X] ||| [1] B [2]\",6,\"[X] ||| [X] b [X] ||| [1] b [2]\",7,\"[X] ||| X [X] ||| X [1]\",8,\"[X] ||| Z [X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}";    Hypergraph hg;    istringstream instr(json);    HypergraphIO::ReadFromJSON(&instr, &hg); diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index cde65332..724b1853 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -341,23 +341,22 @@ struct BasicObserver: public DecoderObserver {  };  struct TrainingObserver : public DecoderObserver { -  TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { -     - -    if(!pseudo_doc && !sent_approx) -    if(cur_pass > 0)     //calculate corpus bleu score from previous iterations 1-best for BLEU gain -      { -	ScoreP acc; -	for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { -	  if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } -	  acc->PlusEquals(*corpus_bleu_sent_stats[ii]); -	   -	} -	corpus_bleu_stats = acc; -	corpus_bleu_score = acc->ComputeScore(); +  TrainingObserver(const int k, +                   const DocScorer& d, +                   vector<GoodBadOracle>* o, +                   vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { +    if(!pseudo_doc && !sent_approx) { +      if(cur_pass > 0) {    //calculate corpus bleu score from previous iterations 1-best for BLEU gain +        ScoreP acc; +        for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { +          if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } +          acc->PlusEquals(*corpus_bleu_sent_stats[ii]); +        } +        corpus_bleu_stats = acc; +        corpus_bleu_score = acc->ComputeScore();        } - -} +    } +  }    const DocScorer& ds;    vector<ScoreP>& corpus_bleu_sent_stats;    vector<GoodBadOracle>& oracles; @@ -461,7 +460,6 @@ struct TrainingObserver : public DecoderObserver {  	    }  	  else //use sentence-level smoothing ( used when cur_pass=0 if not pseudo_doc)  	    { -	       	      sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore());  	    } @@ -575,19 +573,15 @@ void ReadTrainingCorpus(const string& fname, vector<string>* c) {    }  } -void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od) -{ -  cerr << "Reading BLEU gain file "; +void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od) { +  cerr << "Reading previous score file ";    string fname; -  if(cur_pass == 0) -    { -      fname = od + "/run.raw.init"; -    } -  else -    { -      int last_pass = cur_pass - 1;  -      fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B"; -    } +  if (cur_pass == 0) { +    fname = od + "/run.raw.init"; +  } else { +    int last_pass = cur_pass - 1;  +    fname = od + "/run.raw."  +  boost::lexical_cast<std::string>(last_pass) + ".B"; +  }    cerr << fname << "\n";    ReadFile rf(fname);    istream& in = *rf.stream(); @@ -604,7 +598,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore      if (!acc) { acc = sentscore->GetZero(); }      acc->PlusEquals(*sentscore);      ++lc; -     }    assert(lc > 0); @@ -612,7 +605,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore    string details;    acc->ScoreDetails(&details);    cerr << "Previous run: " << details << score << endl; -  } @@ -672,10 +664,9 @@ int main(int argc, char** argv) {    //check training pass,if >0, then use previous iterations corpus bleu stats    cur_pass = stream ? 0 : conf["pass"].as<int>(); -  if(cur_pass > 0) -    { -      ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir); -    } +  if(cur_pass > 0) { +    ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir); +  }    cerr << "Using optimizer:" << optimizer << endl; diff --git a/training/mira/mira.py b/training/mira/mira.py index 539a0b0e..691a62a6 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -203,14 +203,15 @@ def main():    if have_mpl: graph_file = graph(args.output_dir, hope_best_fear, args.metric)    dev_results, dev_bleu = evaluate(args.devset, args.weights, args.config,  -                         script_dir, args.output_dir) +                         script_dir, args.output_dir, args.jobs)    if args.test:      if args.test_config:        test_results, test_bleu = evaluate(args.test, args.weights,  -                              args.test_config, script_dir, args.output_dir) +                              args.test_config, script_dir, args.output_dir, +                              args.jobs)      else:        test_results, test_bleu = evaluate(args.test, args.weights, args.config, -                              script_dir, args.output_dir) +                              script_dir, args.output_dir, args.jobs)    else:       test_results = ''      test_bleu = '' @@ -240,11 +241,11 @@ def graph(output_dir, hope_best_fear, metric):    return graph_file  #evaluate a given test set using decode-and-evaluate.pl -def evaluate(testset, weights, ini, script_dir, out_dir): +def evaluate(testset, weights, ini, script_dir, out_dir, jobs):    evaluator = '{}/../utils/decode-and-evaluate.pl'.format(script_dir)    try:      p = subprocess.Popen([evaluator, '-c', ini, '-w', weights, '-i', testset,  -                         '-d', out_dir, '--jobs', args.jobs], stdout=subprocess.PIPE) +                         '-d', out_dir, '--jobs', str(jobs)], stdout=subprocess.PIPE)      results, err = p.communicate()      bleu, results = results.split('\n',1)    except subprocess.CalledProcessError: diff --git a/training/pro/mr_pro_map.cc b/training/pro/mr_pro_map.cc index a5e6e48f..da58cd24 100644 --- a/training/pro/mr_pro_map.cc +++ b/training/pro/mr_pro_map.cc @@ -88,23 +88,43 @@ struct DiffOrder {    }  }; -void Sample(const unsigned gamma, +double LengthDifferenceStdDev(const training::CandidateSet& J_i, int n) { +  double sum = 0; +  for (int i = 0; i < n; ++i) { +    const size_t a = rng->inclusive(0, J_i.size() - 1)(); +    const size_t b = rng->inclusive(0, J_i.size() - 1)(); +    if (a == b) { --i; continue; } +    double p = J_i[a].ewords.size(); +    p -= J_i[b].ewords.size(); +    sum += p * p;  // mean is 0 by construction +  } +  return max(sqrt(sum / n), 2.0); +}; + +void Sample(const int gamma,              const unsigned xi,              const training::CandidateSet& J_i,              const EvaluationMetric* metric,              vector<TrainingInstance>* pv) { +  const double len_stddev = LengthDifferenceStdDev(J_i, 5000);    const bool invert_score = metric->IsErrorMetric();    vector<TrainingInstance> v1, v2;    float avg_diff = 0; -  for (unsigned i = 0; i < gamma; ++i) { +  const double z_score_threshold=2; +  for (int i = 0; i < gamma; ++i) {      const size_t a = rng->inclusive(0, J_i.size() - 1)();      const size_t b = rng->inclusive(0, J_i.size() - 1)(); -    if (a == b) continue; +    if (a == b) { --i; continue; } +    double z_score = fabs(((int)J_i[a].ewords.size() - (int)J_i[b].ewords.size()) / len_stddev); +    // variation on Nakov et al. (2011) +    if (z_score > z_score_threshold) { --i; continue; }      float ga = metric->ComputeScore(J_i[a].eval_feats);      float gb = metric->ComputeScore(J_i[b].eval_feats);      bool positive = gb < ga;      if (invert_score) positive = !positive;      const float gdiff = fabs(ga - gb); +    //cerr << ((int)J_i[a].ewords.size() - (int)J_i[b].ewords.size()) << endl; +    //cerr << (ga - gb) << endl;      if (!gdiff) continue;      avg_diff += gdiff;      SparseVector<weight_t> xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); diff --git a/training/utils/grammar_convert.cc b/training/utils/grammar_convert.cc index 607a7cb9..5c1b4d4a 100644 --- a/training/utils/grammar_convert.cc +++ b/training/utils/grammar_convert.cc @@ -56,15 +56,22 @@ int GetOrCreateNode(const WordID& lhs, map<WordID, int>* lhs2node, Hypergraph* h    return node_id - 1;  } +void AddDummyGoalNode(Hypergraph* hg) { +  static const int kGOAL = -TD::Convert("Goal"); +  static TRulePtr kGOAL_RULE(new TRule("[Goal] ||| [X] ||| [1]")); +  unsigned old_goal_node_idx = hg->nodes_.size() - 1; +  HG::Node* goal_node = hg->AddNode(kGOAL); +  goal_node->node_hash = goal_node->id_ * 10 + 1; +  TailNodeVector tail(1, old_goal_node_idx); +  HG::Edge* new_edge = hg->AddEdge(kGOAL_RULE, tail); +  hg->ConnectEdgeToHeadNode(new_edge, goal_node); +} +  void FilterAndCheckCorrectness(int goal, Hypergraph* hg) {    if (goal < 0) {      cerr << "Error! [S] not found in grammar!\n";      exit(1);    } -  if (hg->nodes_[goal].in_edges_.size() != 1) { -    cerr << "Error! [S] has more than one rewrite!\n"; -    exit(1); -  }    int old_size = hg->nodes_.size();    hg->TopologicallySortNodesAndEdges(goal);    if (hg->nodes_.size() != old_size) { @@ -292,10 +299,10 @@ int main(int argc, char **argv) {    int lc = 0;    Hypergraph hg;    map<WordID, int> lhs2node; +  string line;    while(*in) { -    string line; +    getline(*in,line);      ++lc; -    getline(*in, line);      if (is_json_input) {        if (line.empty() || line[0] == '#') continue;        string ref; @@ -319,6 +326,7 @@ int main(int argc, char **argv) {        if (line.empty()) {          int goal = lhs2node[kSTART] - 1;          FilterAndCheckCorrectness(goal, &hg); +        AddDummyGoalNode(&hg);          ProcessHypergraph(w, conf, "", &hg);          hg.clear();          lhs2node.clear(); @@ -342,6 +350,7 @@ int main(int argc, char **argv) {        edge->feature_values_ = tr->scores_;        Hypergraph::Node* node = &hg.nodes_[head];        hg.ConnectEdgeToHeadNode(edge, node); +      node->node_hash = lc;      }    }  } | 
