diff options
author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
commit | ef6085e558e26c8819f1735425761103021b6470 (patch) | |
tree | 5cf70e4c48c64d838e1326b5a505c8c4061bff4a /decoder | |
parent | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff) | |
parent | dfbc278c1057555fda9312291c8024049e00b7d8 (diff) |
merge with upstream
Diffstat (limited to 'decoder')
47 files changed, 247 insertions, 303 deletions
diff --git a/decoder/1dev.ur b/decoder/1dev.ur deleted file mode 100755 index adeaa101..00000000 --- a/decoder/1dev.ur +++ /dev/null @@ -1 +0,0 @@ -krAcy ( AstRAf rpwrtRr ) krAcy myN pyr kw mxtlf HAdvAt myN xAtwn smyt 4 AfrAd hlAk hw gyY jbkh smndr sY Ayk $xS ky lA$ mly . diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 30eaf04d..ec51d643 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -63,6 +63,7 @@ libcdec_a_SOURCES = \ ff.cc \ ff_rules.cc \ ff_wordset.cc \ + ff_context.cc \ ff_charset.cc \ ff_lm.cc \ ff_klm.cc \ @@ -75,7 +76,6 @@ libcdec_a_SOURCES = \ ff_source_syntax.cc \ ff_bleu.cc \ ff_factory.cc \ - freqdict.cc \ lexalign.cc \ lextrans.cc \ tagger.cc \ diff --git a/decoder/apply_fsa_models.README b/decoder/apply_fsa_models.README deleted file mode 100755 index 7e116a62..00000000 --- a/decoder/apply_fsa_models.README +++ /dev/null @@ -1,21 +0,0 @@ -trie root and trie lhs2[lhs-nodeid] -> trie node - -trie node edges (adj) - list of w,dest,p. dest==0 means it's a completed rule (note: p is redundant with node e.dest->p-p, except in case of dest=0). we will also use null_wordid (max_int) for dest=0 edges, but that doesn't matter - -we intersect by iterating over adj and scoring w/ fsa. TODO: index for sparse fsa; for now we assume smoothed ngram fsa where all items are scorable. - -predicted items: we don't make copies of the pending predictions as we scan toward completion; instead, item backpointers are followed until the prediction (where backpointer=0). such backpointer=0 items have a queue of prediction-originating items. - -reusing completed items using a lookup on pair [NT,a] -> all [NT,a,b] lazy best-first. b-next (right state) index in lazy index. - -perhaps predictors need to register the # of items it has already mated with. (b-next index) - -comb-like (cube) t-next (position in trie node edge list), b-next? or just check chart and don't redup. depends on whether we want just 1best or kbest deriv - diff. ways of reaching same result are good in kbest. - -types of chart items: - -A->t.*,a,b (trie node t) with mutable state t-next for generating successor lazily (vs. all at once) - -A->t.B,a,b (t-next of A->t.* points to (B,t')): mutable state b-next for choosing which B->b,? to use. note: such an item can't be queued immediately on its own, but can be added to the pending list of B->b,? ; once any B->b,? is completed then we see if any more b-next are already known; if they're exhausted then we add back to pending list? - -A->a,? - list of all known (b,inside prob) such that A[a,b]. we may also choose to represent this as A->.*,a,a. diff --git a/decoder/apply_fsa_models.h b/decoder/apply_fsa_models.h index 6561c70c..6561c70c 100755..100644 --- a/decoder/apply_fsa_models.h +++ b/decoder/apply_fsa_models.h diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 40fd27e4..9ba59d1b 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -270,7 +270,8 @@ public: const Hypergraph::Edge& edge = in.edges_[in_edges[i]]; const JVector j(edge.tail_nodes_.size(), 0); cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal)); - assert(unique_cands.insert(cand.back()).second); // these should all be unique! + bool is_new = unique_cands.insert(cand.back()).second; + assert(is_new); // these should all be unique! } // cerr << " making heap of " << cand.size() << " candidates\n"; make_heap(cand.begin(), cand.end(), HeapCandCompare()); @@ -378,7 +379,8 @@ public: pop_heap(cand.begin(), cand.end(), HeapCandCompare()); Candidate* item = cand.back(); cand.pop_back(); - assert(unique_accepted.insert(item).second); // these should all be unique! + bool is_new = unique_accepted.insert(item).second; + assert(is_new); // these should all be unique! // cerr << "POPPED: " << *item << endl; PushSuccFast2(*item, is_goal, &cand, &unique_accepted); @@ -419,7 +421,8 @@ public: Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal); cand.push_back(new_cand); push_heap(cand.begin(), cand.end(), HeapCandCompare()); - assert(cs->insert(new_cand).second); // insert into uniqueness set, sanity check + bool is_new = cs->insert(new_cand).second; + assert(is_new); // insert into uniqueness set, sanity check } } } diff --git a/decoder/cdec-gz.ini b/decoder/cdec-gz.ini deleted file mode 100755 index f9b15420..00000000 --- a/decoder/cdec-gz.ini +++ /dev/null @@ -1,7 +0,0 @@ -cubepruning_pop_limit=200 -feature_function=WordPenalty -feature_function=ArityPenalty -add_pass_through_rules=true -formalism=scfg -grammar=mt09.grammar.gz -weights=weights.tune.nolm diff --git a/decoder/cdec-nolm-tuned.ini b/decoder/cdec-nolm-tuned.ini deleted file mode 100755 index 5ebab747..00000000 --- a/decoder/cdec-nolm-tuned.ini +++ /dev/null @@ -1,7 +0,0 @@ -cubepruning_pop_limit=200 -feature_function=WordPenalty -feature_function=ArityPenalty -add_pass_through_rules=true -formalism=scfg -grammar=mt09.grammar -weights=weights.tune.nolm diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 4ce5749e..b516c386 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -1,6 +1,7 @@ #include <boost/shared_ptr.hpp> #include "ff.h" +#include "ff_context.h" #include "ff_spans.h" #include "ff_lm.h" #include "ff_klm.h" @@ -42,6 +43,7 @@ void register_feature_functions() { #endif ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>()); ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>()); + ff_registry.Register("RuleContextFeatures", new FFFactory<RuleContextFeatures>()); ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>()); ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>); ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>); diff --git a/decoder/cfg.cc b/decoder/cfg.cc index cd7e66e9..cd7e66e9 100755..100644 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc diff --git a/decoder/cfg.h b/decoder/cfg.h index 8cb29bb9..8cb29bb9 100755..100644 --- a/decoder/cfg.h +++ b/decoder/cfg.h diff --git a/decoder/cfg_binarize.h b/decoder/cfg_binarize.h index ae06f8bf..ae06f8bf 100755..100644 --- a/decoder/cfg_binarize.h +++ b/decoder/cfg_binarize.h diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index 2f40d483..2f40d483 100755..100644 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h diff --git a/decoder/cfg_options.h b/decoder/cfg_options.h index 7b59c05c..7b59c05c 100755..100644 --- a/decoder/cfg_options.h +++ b/decoder/cfg_options.h diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index c61f9f2c..c61f9f2c 100755..100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc diff --git a/decoder/decode.sh b/decoder/decode.sh deleted file mode 100755 index 677e64ad..00000000 --- a/decoder/decode.sh +++ /dev/null @@ -1,10 +0,0 @@ -d=$(dirname `readlink -f $0`)/ -decode() { -if [ "$lm" ] ; then - lmargs0=-F - lmargs1="LanguageModel lm.gz -n LM" -fi -set -x -$gdb ${cdec:=$d/cdec} -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@" -set +x -} diff --git a/decoder/decoder.cc b/decoder/decoder.cc index b93925d1..53c47d21 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -408,6 +408,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("show_partition,z", "Compute and show the partition (inside score)") ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation") ("show_cfg_search_space", "Show the search space as a CFG") + ("show_target_graph", "Output the target hypergraph") ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)") ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found") ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -815,6 +816,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { abort(); } + if (conf.count("show_target_graph")) + HypergraphIO::WriteTarget(forest); + for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; const vector<weight_t>& cur_weights = *rp.weight_vector; diff --git a/decoder/do.tests.sh b/decoder/do.tests.sh deleted file mode 100755 index b3ddeb18..00000000 --- a/decoder/do.tests.sh +++ /dev/null @@ -1 +0,0 @@ -for f in *_test; do ./$f; done diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc index 48e94a31..b7af801a 100644 --- a/decoder/earley_composer.cc +++ b/decoder/earley_composer.cc @@ -329,7 +329,10 @@ class EarleyComposerImpl { forest->ReserveNodes(kMAX_NODES); assert(sit != g.end()); Edge* init = new Edge(start_cat_, &sit->second, q_0_); - assert(IncorporateNewEdge(init)); + if (!IncorporateNewEdge(init)) { + cerr << "Failed to create initial edge!\n"; + abort(); + } while (exp_agenda.HasWork() || agenda.HasWork()) { while(exp_agenda.HasWork()) { const Edge* edge = exp_agenda.Next(); diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc new file mode 100644 index 00000000..19f9a413 --- /dev/null +++ b/decoder/ff_context.cc @@ -0,0 +1,99 @@ +#include "ff_context.h" + +#include <sstream> +#include <cassert> +#include <cmath> + +#include "filelib.h" +#include "stringlib.h" +#include "sentence_metadata.h" +#include "lattice.h" +#include "fdict.h" +#include "verbose.h" + +using namespace std; + +namespace { + string Escape(const string& x) { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; + } + return y; + } +} + +RuleContextFeatures::RuleContextFeatures(const std::string& param) { + kSOS = TD::Convert("<s>"); + kEOS = TD::Convert("</s>"); + + // TODO param lets you pass in a string from the cdec.ini file +} + +void RuleContextFeatures::PrepareForInput(const SentenceMetadata& smeta) { + const Lattice& sl = smeta.GetSourceLattice(); + current_input.resize(sl.size()); + for (unsigned i = 0; i < sl.size(); ++i) { + if (sl[i].size() != 1) { + cerr << "Context features not supported with lattice inputs!\nid=" << smeta.GetSentenceId() << endl; + abort(); + } + current_input[i] = sl[i][0].label; + } +} + +void RuleContextFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + const TRule& rule = *edge.rule_; + + if (rule.Arity() != 0 || // arity = 0, no nonterminals + rule.e_.size() != 1) return; // size = 1, predicted label is a single token + + + // you can see the current label "for free" + const WordID cur_label = rule.e_[0]; + // (if you want to see more labels, you have to be very careful, and muck + // about with contexts and ant_contexts) + + // but... you can look at as much of the source as you want! + const int from_src_index = edge.i_; // start of the span in the input being labeled + const int to_src_index = edge.j_; // end of the span in the input + // (note: in the case of tagging the size of the spans being labeled will + // always be 1, but in other formalisms, you can have bigger spans.) + + // this is the current token being labeled: + const WordID cur_input = current_input[from_src_index]; + + // let's get the previous token in the input (may be to the left of the start + // of the sentence!) + WordID prev_input = kSOS; + if (from_src_index > 0) { prev_input = current_input[from_src_index - 1]; } + // let's get the next token (may be to the left of the start of the sentence!) + WordID next_input = kEOS; + if (to_src_index < current_input.size()) { next_input = current_input[to_src_index]; } + + // now, build a feature string + ostringstream os; + // TD::Convert converts from the internal integer representation of a token + // to the actual token + os << "C1:" << TD::Convert(prev_input) << '_' + << TD::Convert(cur_input) << '|' << TD::Convert(cur_label); + // C1 is just to prevent a name clash + + // pick a value + double fval = 1.0; // can be any real value + + // add it to the feature vector FD::Convert converts the feature string to a + // feature int, Escape makes sure the feature string doesn't have any bad + // symbols that could confuse a parser somewhere + features->add_value(FD::Convert(Escape(os.str())), fval); + // that's it! + + // create more features if you like... +} + diff --git a/decoder/ff_context.h b/decoder/ff_context.h new file mode 100644 index 00000000..0d22b027 --- /dev/null +++ b/decoder/ff_context.h @@ -0,0 +1,23 @@ +#ifndef _FF_CONTEXT_H_ +#define _FF_CONTEXT_H_ + +#include <vector> +#include "ff.h" + +class RuleContextFeatures : public FeatureFunction { + public: + RuleContextFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + std::vector<WordID> current_input; + WordID kSOS, kEOS; +}; + +#endif diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 3991d38f..c9ed996c 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -72,7 +72,7 @@ struct BasicCSplitFeaturesImpl { const int fl1_; const int fl2_; const int bad_; - FreqDict freq_dict_; + FreqDict<float> freq_dict_; set<WordID> bad_words_; }; diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc index 04dd1906..d6d79f5e 100644 --- a/decoder/ff_ngrams.cc +++ b/decoder/ff_ngrams.cc @@ -57,6 +57,39 @@ namespace { } } +static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order) { + vector<string> const& argv=SplitOnWhitespace(in); + *explicit_markers = false; + *order = 3; +#define LMSPEC_NEXTARG if (i==argv.end()) { \ + cerr << "Missing argument for "<<*last<<". "; goto usage; \ + } else { ++i; } + + for (vector<string>::const_iterator last,i=argv.begin(),e=argv.end();i!=e;++i) { + string const& s=*i; + if (s[0]=='-') { + if (s.size()>2) goto fail; + switch (s[1]) { + case 'x': + *explicit_markers = true; + break; + case 'o': + LMSPEC_NEXTARG; *order=atoi((*i).c_str()); + break; +#undef LMSPEC_NEXTARG + default: + fail: + cerr<<"Unknown option on NgramFeatures "<<s<<" ; "; + goto usage; + } + } + } + return true; +usage: + cerr << "NgramFeatures is incorrect!\n"; + return false; +} + class NgramDetectorImpl { // returns the number of unscored words at the left edge of a span @@ -264,10 +297,10 @@ class NgramDetectorImpl { } public: - explicit NgramDetectorImpl(bool explicit_markers) : + explicit NgramDetectorImpl(bool explicit_markers, unsigned order) : kCDEC_UNK(TD::Convert("<unk>")) , add_sos_eos_(!explicit_markers) { - order_ = 3; + order_ = order; state_size_ = (order_ - 1) * sizeof(WordID) + 2 + (order_ - 1) * sizeof(WordID); unscored_size_offset_ = (order_ - 1) * sizeof(WordID); is_complete_offset_ = unscored_size_offset_ + 1; @@ -316,8 +349,10 @@ class NgramDetectorImpl { NgramDetector::NgramDetector(const string& param) { string filename, mapfile, featname; - bool explicit_markers = (param == "-x"); - pimpl_ = new NgramDetectorImpl(explicit_markers); + bool explicit_markers = false; + unsigned order = 3; + ParseArgs(param, &explicit_markers, &order); + pimpl_ = new NgramDetectorImpl(explicit_markers, order); SetStateSize(pimpl_->ReserveStateSize()); } diff --git a/decoder/ff_register.h b/decoder/ff_register.h index 80b1457e..80b1457e 100755..100644 --- a/decoder/ff_register.h +++ b/decoder/ff_register.h diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 74d71b6a..74d71b6a 100755..100644 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h diff --git a/decoder/freqdict.cc b/decoder/freqdict.cc deleted file mode 100644 index 9e25d346..00000000 --- a/decoder/freqdict.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include <iostream> -#include <fstream> -#include <cassert> -#include "freqdict.h" -#include "tdict.h" -#include "filelib.h" - -using namespace std; - -void FreqDict::Load(const std::string& fname) { - cerr << "Reading word frequencies: " << fname << endl; - ReadFile rf(fname); - istream& ifs = *rf.stream(); - int cc=0; - while (ifs) { - std::string word; - ifs >> word; - if (word.size() == 0) continue; - if (word[0] == '#') continue; - double count = 0; - ifs >> count; - assert(count > 0.0); // use -log(f) - counts_[TD::Convert(word)]=count; - ++cc; - if (cc % 10000 == 0) { std::cerr << "."; } - } - std::cerr << "\n"; - std::cerr << "Loaded " << cc << " words\n"; -} diff --git a/decoder/freqdict.h b/decoder/freqdict.h index 9acf0c33..4e03fadd 100644 --- a/decoder/freqdict.h +++ b/decoder/freqdict.h @@ -1,20 +1,47 @@ #ifndef _FREQDICT_H_ #define _FREQDICT_H_ +#include <iostream> #include <map> #include <string> #include "wordid.h" +#include "filelib.h" +#include "tdict.h" +template <typename T = float> class FreqDict { public: - void Load(const std::string& fname); - float LookUp(const WordID& word) const { - std::map<WordID,float>::const_iterator i = counts_.find(word); - if (i == counts_.end()) return 0; + FreqDict() : max_() {} + T Max() const { return max_; } + void Load(const std::string& fname) { + std::cerr << "Reading word statistics from: " << fname << std::endl; + ReadFile rf(fname); + std::istream& ifs = *rf.stream(); + int cc=0; + std::string word; + while (ifs) { + ifs >> word; + if (word.size() == 0) continue; + if (word[0] == '#') continue; + T count = 0; + ifs >> count; + if (count > max_) max_ = count; + counts_[TD::Convert(word)]=count; + ++cc; + if (cc % 10000 == 0) { std::cerr << "."; } + } + std::cerr << "\n"; + std::cerr << "Loaded " << cc << " words\n"; + } + + T LookUp(const WordID& word) const { + typename std::map<WordID,T>::const_iterator i = counts_.find(word); + if (i == counts_.end()) return T(); return i->second; } private: - std::map<WordID, float> counts_; + T max_; + std::map<WordID, T> counts_; }; #endif diff --git a/decoder/fsa-decode.sh b/decoder/fsa-decode.sh deleted file mode 100755 index 66879523..00000000 --- a/decoder/fsa-decode.sh +++ /dev/null @@ -1,3 +0,0 @@ -d=$(dirname `readlink -f $0`)/ -. $d/decode.sh -in=1dev.ur cfg=cdec-fsa decode diff --git a/decoder/fsa-hiero.ini b/decoder/fsa-hiero.ini deleted file mode 100755 index 7c7d0347..00000000 --- a/decoder/fsa-hiero.ini +++ /dev/null @@ -1,5 +0,0 @@ -formalism=scfg -scfg_extra_glue_grammar=glue-lda.scfg -grammar=grammar.hiero -show_tree_structure=true -weights=weights.hiero diff --git a/decoder/fsa.ini b/decoder/fsa.ini deleted file mode 100755 index 571a2e34..00000000 --- a/decoder/fsa.ini +++ /dev/null @@ -1,2 +0,0 @@ -feature_function=ShorterThanPrev -feature_function=LongerThanPrev diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc index 38dbd717..074de4c9 100644 --- a/decoder/fst_translator.cc +++ b/decoder/fst_translator.cc @@ -30,7 +30,10 @@ struct FSTTranslatorImpl { if (input.find("{\"rules\"") == 0) { istringstream is(input); Hypergraph src_cfg_hg; - assert(HypergraphIO::ReadFromJSON(&is, &src_cfg_hg)); + if (!HypergraphIO::ReadFromJSON(&is, &src_cfg_hg)) { + cerr << "Failed to read HG from JSON.\n"; + abort(); + } if (add_pass_through_rules) { SparseVector<double> feats; feats.set_value(FD::Convert("PassThrough"), 1); diff --git a/decoder/glue-lda.scfg b/decoder/glue-lda.scfg deleted file mode 100755 index 27489817..00000000 --- a/decoder/glue-lda.scfg +++ /dev/null @@ -1,8 +0,0 @@ -[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1 -[S] ||| [X0,1] ||| [1] ||| GlueTop=1 -[S] ||| [S,1] [X1,2] ||| [1] [2] ||| Glue=1 -[S] ||| [X1,1] ||| [1] ||| GlueTop=1 -[S] ||| [S,1] [X2,2] ||| [1] [2] ||| Glue=1 -[S] ||| [X2,1] ||| [1] ||| GlueTop=1 -[S] ||| [S,1] [X3,2] ||| [1] [2] ||| Glue=1 -[S] ||| [X3,1] ||| [1] ||| GlueTop=1 diff --git a/decoder/grammar.hiero b/decoder/grammar.hiero deleted file mode 100755 index 79adf33a..00000000 --- a/decoder/grammar.hiero +++ /dev/null @@ -1,151 +0,0 @@ -[X] ||| . ||| . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] . ||| [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] anciano ||| [1] old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano . ||| [1] old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] anciano [X,2] ||| [1] old man [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| [X,1] feo ||| ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo . ||| ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] feo [X,2] ||| ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato ||| [1] cat ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato . ||| [1] cat . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato [X,2] ||| [1] [2] cat ||| EgivenF=0 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato [X,2] ||| [1] cat [2] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato [X,2] . ||| [1] [2] cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato negro ||| [1] black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato negro . ||| [1] black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] gato negro [X,2] ||| [1] black cat [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] grande ||| big [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] grande . ||| big [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] grande [X,2] ||| big [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] negro ||| black [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] negro . ||| black [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] negro [X,2] ||| black [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] oruga ||| [1] caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] oruga . ||| [1] caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] oruga [X,2] ||| [1] caterpiller [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] patito [X,2] ||| [1] [2] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] patito [X,2] . ||| [1] [2] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] patito feo ||| [1] ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] patito feo . ||| [1] ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] patito feo [X,2] ||| [1] ugly duckling [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] peces ||| [1] fish ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] peces . ||| [1] fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] peces [X,2] ||| [1] fish [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro ||| [1] dog ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro . ||| [1] dog . ||| EgivenF=0.405465 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro [X,2] ||| [1] dog [2] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro [X,2] ||| [1] [2] dog ||| EgivenF=0 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro [X,2] . ||| [1] [2] dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro grande ||| [1] big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro grande . ||| [1] big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] perro grande [X,2] ||| [1] big dog [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] pájaro [X,2] ||| [1] [2] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] pájaro [X,2] . ||| [1] [2] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] pájaro negro ||| [1] black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] pájaro negro . ||| [1] black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| [X,1] pájaro negro [X,2] ||| [1] black bird [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| anciano ||| old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| anciano . ||| old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| anciano [X,1] ||| old man [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| el ||| the ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] ||| the [1] ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] . ||| the [1] . ||| EgivenF=0.287682 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] feo ||| the ugly [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] feo . ||| the ugly [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] feo [X,2] ||| the ugly [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] grande ||| the big [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] grande . ||| the big [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] grande [X,2] ||| the big [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] negro ||| the black [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] negro . ||| the black [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el [X,1] negro [X,2] ||| the black [1] [2] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato ||| the cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato . ||| the cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato [X,1] ||| the [1] cat ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato [X,1] ||| the cat [1] ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato [X,1] . ||| the [1] cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato negro ||| the black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato negro . ||| the black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el gato negro [X,1] ||| the black cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el patito [X,1] ||| the [1] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el patito [X,1] . ||| the [1] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el patito feo ||| the ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el patito feo . ||| the ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el patito feo [X,1] ||| the ugly duckling [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro ||| the dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro . ||| the dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro [X,1] ||| the [1] dog ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro [X,1] ||| the dog [1] ||| EgivenF=0 FgivenE=0.693147 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro [X,1] . ||| the [1] dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro grande ||| the big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro grande . ||| the big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el perro grande [X,1] ||| the big dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el pájaro [X,1] ||| the [1] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el pájaro [X,1] . ||| the [1] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el pájaro negro ||| the black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el pájaro negro . ||| the black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| el pájaro negro [X,1] ||| the black bird [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0.287682 LexFgivenE=0 -[X] ||| eso ||| that ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| eso [X,1] ||| that [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| eso [X,1] . ||| that [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| eso perro ||| that dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| eso perro . ||| that dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| eso perro [X,1] ||| that dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este ||| this ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este [X,1] ||| this [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este [X,1] . ||| this [1] . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este anciano ||| this old man ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| este anciano . ||| this old man . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| este anciano [X,1] ||| this old man [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=1.38629 -[X] ||| este gato ||| this cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este gato . ||| this cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| este gato [X,1] ||| this cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| feo ||| ugly ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato ||| cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato . ||| cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato [X,1] ||| [1] cat ||| EgivenF=1.09861 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato [X,1] ||| cat [1] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato [X,1] . ||| [1] cat . ||| EgivenF=1.09861 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato negro ||| black cat ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato negro . ||| black cat . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| gato negro [X,1] ||| black cat [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| grande ||| big ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| la ||| the ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| la [X,1] ||| the [1] ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| la [X,1] . ||| the [1] . ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| la oruga ||| the caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| la oruga . ||| the caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| la oruga [X,1] ||| the caterpiller [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los ||| the ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los [X,1] ||| the [1] ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los [X,1] . ||| the [1] . ||| EgivenF=2.07944 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los peces ||| the fish ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los peces . ||| the fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| los peces [X,1] ||| the fish [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=2.07944 LexFgivenE=0 -[X] ||| negro ||| black ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| oruga ||| caterpiller ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| oruga . ||| caterpiller . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| oruga [X,1] ||| caterpiller [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito ||| duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito [X,1] ||| [1] duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito [X,1] . ||| [1] duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito feo ||| ugly duckling ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito feo . ||| ugly duckling . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| patito feo [X,1] ||| ugly duckling [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| peces ||| fish ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| peces . ||| fish . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| peces [X,1] ||| fish [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro ||| dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro . ||| dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro [X,1] ||| [1] dog ||| EgivenF=1.09861 FgivenE=1.09861 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro [X,1] ||| dog [1] ||| EgivenF=0 FgivenE=0.405465 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro [X,1] . ||| [1] dog . ||| EgivenF=1.09861 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro grande ||| big dog ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro grande . ||| big dog . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| perro grande [X,1] ||| big dog [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro ||| bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro [X,1] ||| [1] bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro [X,1] . ||| [1] bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro negro ||| black bird ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro negro . ||| black bird . ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 -[X] ||| pájaro negro [X,1] ||| black bird [1] ||| EgivenF=0 FgivenE=0 LexEgivenF=0 LexFgivenE=0 diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h index b90aca47..b90aca47 100755..100644 --- a/decoder/hg_cfg.h +++ b/decoder/hg_cfg.h diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index c1c93933..9f0f50fa 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -624,3 +624,30 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) { } } +/* Output format: + * #vertices + * for each vertex in bottom-up topological order: + * #downward_edges + * for each downward edge: + * RHS with [vertex_index] for NTs ||| scores + */ +void HypergraphIO::WriteTarget(const Hypergraph& hg) { + cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n'; + for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { + const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_; + cout << edges.size() << '\n'; + for (unsigned int j = 0; j < edges.size(); ++j) { + const Hypergraph::Edge &edge = hg.edges_[edges[j]]; + const std::vector<WordID> &e = edge.rule_->e(); + for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) { + if (*word <= 0) { + cout << '[' << edge.tail_nodes_[-*word] << "] "; + } else { + cout << TD::Convert(*word) << ' '; + } + } + cout << "||| " << edge.rule_->scores_ << '\n'; + } + } +} + diff --git a/decoder/hg_io.h b/decoder/hg_io.h index 082489d8..44817157 100644 --- a/decoder/hg_io.h +++ b/decoder/hg_io.h @@ -23,6 +23,9 @@ struct HypergraphIO { static void WriteAsCFG(const Hypergraph& hg); + // Write only the target size information in bottom-up order. + static void WriteTarget(const Hypergraph& hg); + // serialization utils static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0); // return PLF string representation (undefined behavior on non-lattices) diff --git a/decoder/hg_test.h b/decoder/hg_test.h index 3da6533c..3da6533c 100755..100644 --- a/decoder/hg_test.h +++ b/decoder/hg_test.h diff --git a/decoder/lattice.cc b/decoder/lattice.cc index e3631e59..89da3cd0 100644 --- a/decoder/lattice.cc +++ b/decoder/lattice.cc @@ -46,6 +46,7 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) { Lattice& l = *pl; vector<WordID> ids; TD::ConvertSentence(text, &ids); + l.clear(); l.resize(ids.size()); for (int i = 0; i < l.size(); ++i) l[i].push_back(LatticeArc(ids[i], 0.0, 1)); diff --git a/decoder/nt_span.h b/decoder/nt_span.h index a918f301..a918f301 100755..100644 --- a/decoder/nt_span.h +++ b/decoder/nt_span.h diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index b603e27a..b603e27a 100755..100644 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h diff --git a/decoder/perro.sh b/decoder/perro.sh deleted file mode 100755 index 3e54ac71..00000000 --- a/decoder/perro.sh +++ /dev/null @@ -1 +0,0 @@ -$gdb $cdec "$@" -k 30 --show_features -c fsa-hiero.ini -i perro.ur diff --git a/decoder/perro.ur b/decoder/perro.ur deleted file mode 100755 index 6c5da6d7..00000000 --- a/decoder/perro.ur +++ /dev/null @@ -1 +0,0 @@ -eso perro feo diff --git a/decoder/program_options.h b/decoder/program_options.h index 87afb320..87afb320 100755..100644 --- a/decoder/program_options.h +++ b/decoder/program_options.h diff --git a/decoder/sentences.h b/decoder/sentences.h index 54b5ffb3..54b5ffb3 100755..100644 --- a/decoder/sentences.h +++ b/decoder/sentences.h diff --git a/decoder/short.ur b/decoder/short.ur deleted file mode 100755 index 48612801..00000000 --- a/decoder/short.ur +++ /dev/null @@ -1 +0,0 @@ -krAcy myN pyr kw mxtlf HAdvAt diff --git a/decoder/trule.cc b/decoder/trule.cc index 40235542..141b8faa 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -232,16 +232,6 @@ void TRule::ComputeArity() { arity_ = 1 - min; } -static string AnonymousStrVar(int i) { - string res("[v]"); - if(!(i <= 0 && i >= -8)) { - cerr << "Can't handle more than 9 non-terminals: index=" << (-i) << endl; - abort(); - } - res[1] = '1' - i; - return res; -} - string TRule::AsString(bool verbose) const { ostringstream os; int idx = 0; @@ -259,15 +249,11 @@ string TRule::AsString(bool verbose) const { } } os << " ||| "; - if (idx > 9) { - cerr << "Too many non-terminals!\n partial: " << os.str() << endl; - exit(1); - } for (int i =0; i<e_.size(); ++i) { if (i) os << ' '; const WordID& w = e_[i]; if (w < 1) - os << AnonymousStrVar(w); + os << '[' << (1-w) << ']'; else os << TD::Convert(w); } diff --git a/decoder/weights-fsa b/decoder/weights-fsa deleted file mode 100644 index 3cc96c2f..00000000 --- a/decoder/weights-fsa +++ /dev/null @@ -1,14 +0,0 @@ -Arity_0 1.70741473606976 -Arity_1 1.12426238048012 -Arity_2 1.14986187839554 -Glue -0.04589037041388 -LanguageModel 1.09051 -LM 1.09051 -PassThrough -3.66226367902928 -PhraseModel_0 -1.94633451863252 -PhraseModel_1 -0.1475347695476 -PhraseModel_2 -1.614818994946 -WordPenalty -3.0 -WordPenaltyFsa -0.56028442964748 -ShorterThanPrev -10 -LongerThanPrev -10 diff --git a/decoder/weights.hiero b/decoder/weights.hiero deleted file mode 100755 index 6747f059..00000000 --- a/decoder/weights.hiero +++ /dev/null @@ -1,10 +0,0 @@ -SameFirstLetter 1 -LongerThanPrev 1 -ShorterThanPrev 1 -GlueTop 0.0 -Glue -1.0 -EgivenF -0.5 -FgivenE -0.5 -LexEgivenF -0.5 -LexFgivenE -0.5 -LM 1 |