diff options
| author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:52:58 +0000 | 
|---|---|---|
| committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 02:52:58 +0000 | 
| commit | 786b689a486413b4ea31841eb352ed728621e4b1 (patch) | |
| tree | 2ff55857c4e5367c557642bc44e4f44d1697b3fe /decoder/oracle_bleu.h | |
| parent | 0f80272c5e32dd3a0d5d747d00c914b0a6bf0be8 (diff) | |
cleaned up kbest, new USE_INFO_EDGE 1 logs per edge, --show_derivation (needs work; handle kbest deriv, viterbi deriv, sort hg exposing viterbi?)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@405 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/oracle_bleu.h')
| -rwxr-xr-x | decoder/oracle_bleu.h | 59 | 
1 files changed, 31 insertions, 28 deletions
| diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 4dc86bc7..4a2cbbe5 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -94,6 +94,7 @@ struct OracleBleu {        ("references,R", value<Refs >(&refs), "Translation reference files")        ("oracle_loss", value<string>(&loss_name)->default_value("IBM_BLEU_3"), "IBM_BLEU_3 (default), IBM_BLEU etc")        ("bleu_weight", value<double>(&bleu_weight)->default_value(1.), "weight to give the hope/fear loss function vs. model score") +      ("show_derivation", bool_switch(&show_derivation), "show derivation tree in kbest")        ("verbose",bool_switch(&verbose),"detailed logs")        ;    } @@ -248,46 +249,48 @@ struct OracleBleu {  //	dest_forest->SortInEdgesByEdgeWeights();    } -// TODO decoder output should probably be moved to another file - how about oracle_bleu.h -  void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) { +  bool show_derivation; +  template <class Filter> +  void kbest(int sent_id,Hypergraph const& forest,int k,std::ostream &kbest_out=std::cout,std::ostream &deriv_out=std::cerr) {      using namespace std;      using namespace boost; -    cerr << "In kbest\n"; - -    ofstream kbest_out; -    kbest_out.open(kbest_out_filename_.c_str()); -    cerr << "Output kbest to " << kbest_out_filename_; - +    typedef KBest::KBestDerivations<Sentence, ESentenceTraversal,Filter> K; +    K kbest(forest,k);      //add length (f side) src length of this sentence to the psuedo-doc src length count      float curr_src_length = doc_src_length + tmp_src_length; - -    if (unique) { -      KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique> kbest(forest, k); -      for (int i = 0; i < k; ++i) { -        const KBest::KBestDerivations<Sentence, ESentenceTraversal, KBest::FilterUnique>::Derivation* d = -          kbest.LazyKthBest(forest.nodes_.size() - 1, i); -        if (!d) break; -        //calculate score in context of psuedo-doc +    for (int i = 0; i < k; ++i) { +      typename K::Derivation *d = kbest.LazyKthBest(forest.nodes_.size() - 1, i); +      if (!d) break; +      kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " +                << d->feature_values << " ||| " << log(d->score); +      if (!refs.empty()) {          ScoreP sentscore = GetScore(d->yield,sent_id);          sentscore->PlusEquals(*doc_score,float(1));          float bleu = curr_src_length * sentscore->ComputeScore(); -        kbest_out << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " -                  << d->feature_values << " ||| " << log(d->score) << " ||| " << bleu << endl; -        // cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " -        //     << d->feature_values << " ||| " << log(d->score) << endl; +        kbest_out << " ||| " << bleu;        } -    } else { -      KBest::KBestDerivations<Sentence, ESentenceTraversal> kbest(forest, k); -      for (int i = 0; i < k; ++i) { -        const KBest::KBestDerivations<Sentence, ESentenceTraversal>::Derivation* d = -          kbest.LazyKthBest(forest.nodes_.size() - 1, i); -        if (!d) break; -        cout << sent_id << " ||| " << TD::GetString(d->yield) << " ||| " -             << d->feature_values << " ||| " << log(d->score) << endl; +      kbest_out<<endl<<flush; +      if (show_derivation) { +        deriv_out<<"\nsent_id="<<sent_id<<"\n"; +        forest.show_tree(cerr,*d->edge); +        deriv_out<<flush;        }      }    } +// TODO decoder output should probably be moved to another file - how about oracle_bleu.h +  void DumpKBest(const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const &kbest_out_filename_) { + +    WriteFile ko(kbest_out_filename_); +    std::cerr << "Output kbest to " << kbest_out_filename_; + +    if (unique) +      kbest<KBest::NoFilter>(sent_id,forest,k,ko.get(),std::cerr); +    else { +      kbest<KBest::FilterUnique>(sent_id,forest,k,ko.get(),std::cerr); +    } +  } +  void DumpKBest(std::string const& suffix,const int sent_id, const Hypergraph& forest, const int k, const bool unique, std::string const& forest_output)    {      std::ostringstream kbest_string_stream; | 
