diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-08 21:38:55 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-08 21:38:55 -0500 |
commit | dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (patch) | |
tree | d76d25baf66459d13da4faedffd7d6faba28a513 /src | |
parent | 5082307f382a6563d4ffb97034137da4e362e629 (diff) |
LICENSE fixes, full support of lattice decoding
Diffstat (limited to 'src')
-rw-r--r-- | src/array2d.h | 1 | ||||
-rw-r--r-- | src/bottom_up_parser.cc | 53 | ||||
-rw-r--r-- | src/cdec_ff.cc | 1 | ||||
-rw-r--r-- | src/ff.cc | 21 | ||||
-rw-r--r-- | src/ff.h | 15 | ||||
-rw-r--r-- | src/grammar.cc | 11 | ||||
-rw-r--r-- | src/grammar.h | 8 | ||||
-rw-r--r-- | src/lattice.cc | 34 | ||||
-rw-r--r-- | src/lattice.h | 24 | ||||
-rw-r--r-- | src/synparse.cc | 212 |
10 files changed, 135 insertions, 245 deletions
diff --git a/src/array2d.h b/src/array2d.h index 09d84d0b..e63eda0d 100644 --- a/src/array2d.h +++ b/src/array2d.h @@ -19,6 +19,7 @@ class Array2D { width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} + bool empty() const { return data_.empty(); } void resize(int w, int h, const T& d = T()) { data_.resize(w * h, d); width_ = w; diff --git a/src/bottom_up_parser.cc b/src/bottom_up_parser.cc index 349ed2de..b3315b8a 100644 --- a/src/bottom_up_parser.cc +++ b/src/bottom_up_parser.cc @@ -24,8 +24,18 @@ class PassiveChart { inline int GetGoalIndex() const { return goal_idx_; } private: - void ApplyRules(const int i, const int j, const RuleBin* rules, const Hypergraph::TailNodeVector& tail); - void ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes); + void ApplyRules(const int i, + const int j, + const RuleBin* rules, + const Hypergraph::TailNodeVector& tail, + const float lattice_cost); + + void ApplyRule(const int i, + const int j, + const TRulePtr& r, + const Hypergraph::TailNodeVector& ant_nodes, + const float lattice_cost); + void ApplyUnaryRules(const int i, const int j); const vector<GrammarPtr>& grammars_; @@ -38,6 +48,7 @@ class PassiveChart { const WordID goal_cat_; // category that is being searched for at [0,n] TRulePtr goal_rule_; int goal_idx_; // index of goal node, if found + const int lc_fid_; static WordID kGOAL; // [Goal] }; @@ -51,12 +62,12 @@ class ActiveChart { act_chart_(psv_chart.size(), psv_chart.size()), psv_chart_(psv_chart) {} struct ActiveItem { - ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, double lcost) : + ActiveItem(const GrammarIter* g, const Hypergraph::TailNodeVector& a, float lcost) : gptr_(g), ant_nodes_(a), lattice_cost(lcost) {} explicit ActiveItem(const GrammarIter* g) : - gptr_(g), ant_nodes_(), lattice_cost() {} + gptr_(g), ant_nodes_(), lattice_cost(0.0) {} - void ExtendTerminal(int symbol, double src_cost, vector<ActiveItem>* out_cell) const { + void ExtendTerminal(int symbol, float src_cost, vector<ActiveItem>* out_cell) const { const GrammarIter* ni = gptr_->Extend(symbol); if (ni) out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost)); } @@ -73,14 +84,14 @@ class ActiveChart { const GrammarIter* gptr_; Hypergraph::TailNodeVector ant_nodes_; - double lattice_cost; // TODO? use SparseVector<double> + float lattice_cost; // TODO? use SparseVector<double> }; inline const vector<ActiveItem>& operator()(int i, int j) const { return act_chart_(i,j); } void SeedActiveChart(const Grammar& g) { int size = act_chart_.width(); for (int i = 0; i < size; ++i) - if (g.HasRuleForSpan(i,i)) + if (g.HasRuleForSpan(i,i,0)) act_chart_(i,i).push_back(ActiveItem(g.GetRoot())); } @@ -132,7 +143,8 @@ PassiveChart::PassiveChart(const string& goal, nodemap_(input.size()+1, input.size()+1), goal_cat_(TD::Convert(goal) * -1), goal_rule_(new TRule("[Goal] ||| [" + goal + ",1] ||| [" + goal + ",1]")), - goal_idx_(-1) { + goal_idx_(-1), + lc_fid_(FD::Convert("LatticeCost")) { act_chart_.resize(grammars_.size()); for (int i = 0; i < grammars_.size(); ++i) act_chart_[i] = new ActiveChart(forest, *this); @@ -140,13 +152,19 @@ PassiveChart::PassiveChart(const string& goal, cerr << " Goal category: [" << goal << ']' << endl; } -void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergraph::TailNodeVector& ant_nodes) { +void PassiveChart::ApplyRule(const int i, + const int j, + const TRulePtr& r, + const Hypergraph::TailNodeVector& ant_nodes, + const float lattice_cost) { Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes); new_edge->prev_i_ = r->prev_i; new_edge->prev_j_ = r->prev_j; new_edge->i_ = i; new_edge->j_ = j; new_edge->feature_values_ = r->GetFeatureValues(); + if (lattice_cost) + new_edge->feature_values_.set_value(lc_fid_, lattice_cost); Cat2NodeMap& c2n = nodemap_(i,j); const bool is_goal = (r->GetLHS() == kGOAL); const Cat2NodeMap::iterator ni = c2n.find(r->GetLHS()); @@ -169,23 +187,24 @@ void PassiveChart::ApplyRule(const int i, const int j, TRulePtr r, const Hypergr void PassiveChart::ApplyRules(const int i, const int j, const RuleBin* rules, - const Hypergraph::TailNodeVector& tail) { + const Hypergraph::TailNodeVector& tail, + const float lattice_cost) { const int n = rules->GetNumRules(); for (int k = 0; k < n; ++k) - ApplyRule(i, j, rules->GetIthRule(k), tail); + ApplyRule(i, j, rules->GetIthRule(k), tail, lattice_cost); } void PassiveChart::ApplyUnaryRules(const int i, const int j) { const vector<int>& nodes = chart_(i,j); // reference is important! for (int gi = 0; gi < grammars_.size(); ++gi) { - if (!grammars_[gi]->HasRuleForSpan(i,j)) continue; + if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue; for (int di = 0; di < nodes.size(); ++di) { const WordID& cat = forest_->nodes_[nodes[di]].cat_; const vector<TRulePtr>& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat); for (int ri = 0; ri < unaries.size(); ++ri) { // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl; const Hypergraph::TailNodeVector ant(1, nodes[di]); - ApplyRule(i, j, unaries[ri], ant); // may update nodes + ApplyRule(i, j, unaries[ri], ant, 0); // may update nodes } } } @@ -205,7 +224,7 @@ bool PassiveChart::Parse() { int j = i + l; for (int gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; - if (g.HasRuleForSpan(i, j)) { + if (g.HasRuleForSpan(i, j, input_.Distance(i, j))) { act_chart_[gi]->AdvanceDotsForAllItemsInCell(i, j, input_); const vector<ActiveChart::ActiveItem>& cell = (*act_chart_[gi])(i,j); @@ -213,7 +232,7 @@ bool PassiveChart::Parse() { ai != cell.end(); ++ai) { const RuleBin* rules = (ai->gptr_->GetRules()); if (!rules) continue; - ApplyRules(i, j, rules, ai->ant_nodes_); + ApplyRules(i, j, rules, ai->ant_nodes_, ai->lattice_cost); } } } @@ -222,7 +241,7 @@ bool PassiveChart::Parse() { for (int gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; // deal with non-terminals that were just proved - if (g.HasRuleForSpan(i, j)) + if (g.HasRuleForSpan(i, j, input_.Distance(i,j))) act_chart_[gi]->ExtendActiveItems(i, i, j); } } @@ -231,7 +250,7 @@ bool PassiveChart::Parse() { const Hypergraph::Node& node = forest_->nodes_[dh[di]]; if (node.cat_ == goal_cat_) { Hypergraph::TailNodeVector ant(1, node.id_); - ApplyRule(0, input_.size(), goal_rule_, ant); + ApplyRule(0, input_.size(), goal_rule_, ant, 0); } } } diff --git a/src/cdec_ff.cc b/src/cdec_ff.cc index 846a908e..0a4f3d5e 100644 --- a/src/cdec_ff.cc +++ b/src/cdec_ff.cc @@ -11,6 +11,7 @@ boost::shared_ptr<FFRegistry> global_ff_registry; void register_feature_functions() { global_ff_registry->Register("LanguageModel", new FFFactory<LanguageModel>); global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>); + global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>); global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>); global_ff_registry->Register("MarkovJump", new FFFactory<MarkovJump>); global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>); @@ -36,6 +36,27 @@ void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->set_value(fid_, edge.rule_->EWords() * value_); } +SourceWordPenalty::SourceWordPenalty(const string& param) : + fid_(FD::Convert("SourceWordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; + } +} + +void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->FWords() * value_); +} + ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : models_(models), weights_(w), @@ -89,6 +89,21 @@ class WordPenalty : public FeatureFunction { const double value_; }; +class SourceWordPenalty : public FeatureFunction { + public: + SourceWordPenalty(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + // this class is a set of FeatureFunctions that can be used to score, rescore, // etc. a (translation?) forest class ModelSet { diff --git a/src/grammar.cc b/src/grammar.cc index e0b2a09e..e19bd344 100644 --- a/src/grammar.cc +++ b/src/grammar.cc @@ -15,9 +15,10 @@ RuleBin::~RuleBin() {} GrammarIter::~GrammarIter() {} Grammar::~Grammar() {} -bool Grammar::HasRuleForSpan(int i, int j) const { +bool Grammar::HasRuleForSpan(int i, int j, int distance) const { (void) i; (void) j; + (void) distance; return true; // always true by default } @@ -119,8 +120,8 @@ void TextGrammar::ReadFromFile(const string& filename) { cerr << " " << rule_count << " rules read.\n"; } -bool TextGrammar::HasRuleForSpan(int i, int j) const { - return (max_span_ >= (j - i)); +bool TextGrammar::HasRuleForSpan(int i, int j, int distance) const { + return (max_span_ >= distance); } GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} @@ -136,7 +137,7 @@ GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt) { //cerr << "GLUE: " << glue->AsString() << endl; } -bool GlueGrammar::HasRuleForSpan(int i, int j) const { +bool GlueGrammar::HasRuleForSpan(int i, int j, int distance) const { (void) j; return (i == 0); } @@ -156,7 +157,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat) } } -bool PassThroughGrammar::HasRuleForSpan(int i, int j) const { +bool PassThroughGrammar::HasRuleForSpan(int i, int j, int distance) const { const set<int>& hr = has_rule_[i]; if (i == j) { return !hr.empty(); } return (hr.find(j) != hr.end()); diff --git a/src/grammar.h b/src/grammar.h index 4a03c505..3471e3f1 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -28,7 +28,7 @@ struct Grammar { virtual ~Grammar(); virtual const GrammarIter* GetRoot() const = 0; - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; // cat is the category to be rewritten inline const std::vector<TRulePtr>& GetAllUnaryRules() const { @@ -59,7 +59,7 @@ struct TextGrammar : public Grammar { virtual const GrammarIter* GetRoot() const; void AddRule(const TRulePtr& rule); void ReadFromFile(const std::string& filename); - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; const std::vector<TRulePtr>& GetUnaryRules(const WordID& cat) const; private: int max_span_; @@ -70,12 +70,12 @@ struct GlueGrammar : public TextGrammar { // read glue grammar from file explicit GlueGrammar(const std::string& file); GlueGrammar(const std::string& goal_nt, const std::string& default_nt); // "S", "X" - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; }; struct PassThroughGrammar : public TextGrammar { PassThroughGrammar(const Lattice& input, const std::string& cat); - virtual bool HasRuleForSpan(int i, int j) const; + virtual bool HasRuleForSpan(int i, int j, int distance) const; private: std::vector<std::set<int> > has_rule_; // index by [i][j] }; diff --git a/src/lattice.cc b/src/lattice.cc index aa1df3db..56bc9551 100644 --- a/src/lattice.cc +++ b/src/lattice.cc @@ -5,6 +5,39 @@ using namespace std; +static const int kUNREACHABLE = 99999999; + +void Lattice::ComputeDistances() { + const int n = this->size() + 1; + dist_.resize(n, n, kUNREACHABLE); + for (int i = 0; i < this->size(); ++i) { + const vector<LatticeArc>& alts = (*this)[i]; + for (int j = 0; j < alts.size(); ++j) + dist_(i, i + alts[j].dist2next) = 1; + } + for (int k = 0; k < n; ++k) { + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + const int dp = dist_(i,k) + dist_(k,j); + if (dist_(i,j) > dp) + dist_(i,j) = dp; + } + } + } + + for (int i = 0; i < n; ++i) { + int latest = kUNREACHABLE; + for (int j = n-1; j >= 0; --j) { + const int c = dist_(i,j); + if (c < kUNREACHABLE) + latest = c; + else + dist_(i,j) = latest; + } + } + // cerr << dist_ << endl; +} + bool LatticeTools::LooksLikePLF(const string &line) { return (line.size() > 5) && (line.substr(0,4) == "((('"); } @@ -23,5 +56,6 @@ void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) { HypergraphIO::PLFtoLattice(text_or_plf, pl); else ConvertTextToLattice(text_or_plf, pl); + pl->ComputeDistances(); } diff --git a/src/lattice.h b/src/lattice.h index 1177e768..71589b92 100644 --- a/src/lattice.h +++ b/src/lattice.h @@ -4,6 +4,14 @@ #include <string> #include <vector> #include "wordid.h" +#include "array2d.h" + +class Lattice; +struct LatticeTools { + static bool LooksLikePLF(const std::string &line); + static void ConvertTextToLattice(const std::string& text, Lattice* pl); + static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); +}; struct LatticeArc { WordID label; @@ -14,18 +22,20 @@ struct LatticeArc { }; class Lattice : public std::vector<std::vector<LatticeArc> > { + friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); public: Lattice() {} explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) : std::vector<std::vector<LatticeArc> >(t, v) {} - - // TODO add distance functions -}; + int Distance(int from, int to) const { + if (dist_.empty()) + return (to - from); + return dist_(from, to); + } -struct LatticeTools { - static bool LooksLikePLF(const std::string &line); - static void ConvertTextToLattice(const std::string& text, Lattice* pl); - static void ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); + private: + void ComputeDistances(); + Array2D<int> dist_; }; #endif diff --git a/src/synparse.cc b/src/synparse.cc deleted file mode 100644 index 96588f1e..00000000 --- a/src/synparse.cc +++ /dev/null @@ -1,212 +0,0 @@ -#include <iostream> -#include <ext/hash_map> -#include <ext/hash_set> -#include <utility> - -#include <boost/multi_array.hpp> -#include <boost/functional/hash.hpp> -#include <boost/program_options.hpp> -#include <boost/program_options/variables_map.hpp> - -#include "prob.h" -#include "tdict.h" -#include "filelib.h" - -using namespace std; -using namespace __gnu_cxx; -namespace po = boost::program_options; - -const prob_t kMONO(1.0); // 0.6 -const prob_t kINV(1.0); // 0.1 -const prob_t kLEX(1.0); // 0.3 - -typedef hash_map<vector<WordID>, hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >, boost::hash<vector<WordID> > > PTable; -typedef boost::multi_array<prob_t, 4> CChart; -typedef pair<int,int> SpanType; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("phrasetable,p",po::value<string>(), "[REQD] Phrase pairs for ITG alignment") - ("input,i",po::value<string>()->default_value("-"), "Input file") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("phrasetable")) { - cerr << "Please specify a grammar file with -p <GRAMMAR.TXT>\n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void LoadITGPhrasetable(const string& fname, PTable* ptable) { - const WordID sep = TD::Convert("|||"); - ReadFile rf(fname); - istream& in = *rf.stream(); - assert(in); - int lc = 0; - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - ++lc; - vector<WordID> full, f, e; - TD::ConvertSentence(line, &full); - int i = 0; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - f.push_back(full[i]); - } - ++i; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - e.push_back(full[i]); - } - ++i; - prob_t prob(0.000001); - if (i < full.size()) { prob = prob_t(atof(TD::Convert(full[i]))); ++i; } - - if (i < full.size()) { cerr << "Warning line " << lc << " has extra stuff.\n"; } - assert(f.size() > 0); - assert(e.size() > 0); - (*ptable)[f][e] = prob; - } - cerr << "Read " << lc << " phrase pairs\n"; -} - -void FindPhrases(const vector<WordID>& e, const vector<WordID>& f, const PTable& pt, CChart* pcc) { - CChart& cc = *pcc; - const size_t n = f.size(); - const size_t m = e.size(); - typedef hash_map<vector<WordID>, vector<SpanType>, boost::hash<vector<WordID> > > PhraseToSpan; - PhraseToSpan e_locations; - for (int i = 0; i < m; ++i) { - const int mel = m - i; - vector<WordID> e_phrase; - for (int el = 0; el < mel; ++el) { - e_phrase.push_back(e[i + el]); - e_locations[e_phrase].push_back(make_pair(i, i + el + 1)); - } - } - //cerr << "Cached the locations of " << e_locations.size() << " e-phrases\n"; - - for (int s = 0; s < n; ++s) { - const int mfl = n - s; - vector<WordID> f_phrase; - for (int fl = 0; fl < mfl; ++fl) { - f_phrase.push_back(f[s + fl]); - PTable::const_iterator it = pt.find(f_phrase); - if (it == pt.end()) continue; - const hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >& es = it->second; - for (hash_map<vector<WordID>, prob_t, boost::hash<vector<WordID> > >::const_iterator eit = es.begin(); eit != es.end(); ++eit) { - PhraseToSpan::iterator loc = e_locations.find(eit->first); - if (loc == e_locations.end()) continue; - const vector<SpanType>& espans = loc->second; - for (int j = 0; j < espans.size(); ++j) { - cc[s][s + fl + 1][espans[j].first][espans[j].second] = eit->second; - //cerr << '[' << s << ',' << (s + fl + 1) << ',' << espans[j].first << ',' << espans[j].second << "] is C\n"; - } - } - } - } -} - -long long int evals = 0; - -void ProcessSynchronousCell(const int s, - const int t, - const int u, - const int v, - const prob_t& lex, - const prob_t& mono, - const prob_t& inv, - const CChart& tc, CChart* ntc) { - prob_t& inside = (*ntc)[s][t][u][v]; - // cerr << log(tc[s][t][u][v]) << " + " << log(lex) << endl; - inside = tc[s][t][u][v] * lex; - // cerr << " terminal span: " << log(inside) << endl; - if (t - s == 1) return; - if (v - u == 1) return; - for (int x = s+1; x < t; ++x) { - for (int y = u+1; y < v; ++y) { - const prob_t m = (*ntc)[s][x][u][y] * (*ntc)[x][t][y][v] * mono; - const prob_t i = (*ntc)[s][x][y][v] * (*ntc)[x][t][u][y] * inv; - // cerr << log(i) << "\t" << log(m) << endl; - inside += m; - inside += i; - evals++; - } - } - // cerr << " span: " << log(inside) << endl; -} - -prob_t SynchronousParse(const int n, const int m, const prob_t& lex, const prob_t& mono, const prob_t& inv, const CChart& tc, CChart* ntc) { - for (int fl = 0; fl < n; ++fl) { - for (int el = 0; el < m; ++el) { - const int ms = n - fl; - for (int s = 0; s < ms; ++s) { - const int t = s + fl + 1; - const int mu = m - el; - for (int u = 0; u < mu; ++u) { - const int v = u + el + 1; - //cerr << "Processing cell [" << s << ',' << t << ',' << u << ',' << v << "]\n"; - ProcessSynchronousCell(s, t, u, v, lex, mono, inv, tc, ntc); - } - } - } - } - return (*ntc)[0][n][0][m]; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - PTable ptable; - LoadITGPhrasetable(conf["phrasetable"].as<string>(), &ptable); - ReadFile rf(conf["input"].as<string>()); - istream& in = *rf.stream(); - int lc = 0; - const WordID sep = TD::Convert("|||"); - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - ++lc; - vector<WordID> full, f, e; - TD::ConvertSentence(line, &full); - int i = 0; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - f.push_back(full[i]); - } - ++i; - for (; i < full.size(); ++i) { - if (full[i] == sep) break; - e.push_back(full[i]); - } - if (e.empty()) cerr << "E is empty!\n"; - if (f.empty()) cerr << "F is empty!\n"; - if (e.empty() || f.empty()) continue; - int n = f.size(); - int m = e.size(); - cerr << "Synchronous chart has " << (n * n * m * m) << " cells\n"; - clock_t start = clock(); - CChart cc(boost::extents[n+1][n+1][m+1][m+1]); - FindPhrases(e, f, ptable, &cc); - CChart ntc(boost::extents[n+1][n+1][m+1][m+1]); - prob_t likelihood = SynchronousParse(n, m, kLEX, kMONO, kINV, cc, &ntc); - clock_t end = clock(); - cerr << "log Z: " << log(likelihood) << endl; - cerr << " Z: " << likelihood << endl; - double etime = (end - start) / 1000000.0; - cout << " time: " << etime << endl; - cout << "evals: " << evals << endl; - } - return 0; -} - |