From d60dda793ce24818becf6dfb140579899a5e121b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 23 May 2012 18:02:48 -0400 Subject: more bjam stuff, more cleanup --- decoder/bottom_up_parser.cc | 24 ++++++++++++------------ decoder/ff_dwarf.cc | 6 +++--- decoder/hg.h | 2 +- decoder/hg_io.cc | 9 ++++----- decoder/inside_outside.h | 10 +++++----- decoder/kbest.h | 22 +++++++++++++--------- decoder/trule.cc | 24 ++++++++++++------------ 7 files changed, 50 insertions(+), 47 deletions(-) (limited to 'decoder') diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 63939221..1f262747 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -154,7 +154,7 @@ PassiveChart::PassiveChart(const string& goal, goal_idx_(-1), lc_fid_(FD::Convert("LatticeCost")) { act_chart_.resize(grammars_.size()); - for (int i = 0; i < grammars_.size(); ++i) + for (unsigned i = 0; i < grammars_.size(); ++i) act_chart_[i] = new ActiveChart(forest, *this); if (!kGOAL) kGOAL = TD::Convert("Goal") * -1; if (!SILENT) cerr << " Goal category: [" << goal << ']' << endl; @@ -204,12 +204,12 @@ void PassiveChart::ApplyRules(const int i, void PassiveChart::ApplyUnaryRules(const int i, const int j) { const vector& nodes = chart_(i,j); // reference is important! - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue; - for (int di = 0; di < nodes.size(); ++di) { + for (unsigned di = 0; di < nodes.size(); ++di) { const WordID& cat = forest_->nodes_[nodes[di]].cat_; const vector& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat); - for (int ri = 0; ri < unaries.size(); ++ri) { + for (unsigned ri = 0; ri < unaries.size(); ++ri) { // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl; const Hypergraph::TailNodeVector ant(1, nodes[di]); ApplyRule(i, j, unaries[ri], ant, 0); // may update nodes @@ -224,15 +224,15 @@ bool PassiveChart::Parse() { size_t res = min(static_cast(2000000), static_cast(in_size_2 * 1000)); forest_->edges_.reserve(res); goal_idx_ = -1; - for (int gi = 0; gi < grammars_.size(); ++gi) + for (unsigned gi = 0; gi < grammars_.size(); ++gi) act_chart_[gi]->SeedActiveChart(*grammars_[gi]); if (!SILENT) cerr << " "; - for (int l=1; lAdvanceDotsForAllItemsInCell(i, j, input_); @@ -248,7 +248,7 @@ bool PassiveChart::Parse() { } ApplyUnaryRules(i,j); - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; // deal with non-terminals that were just proved if (g.HasRuleForSpan(i, j, input_.Distance(i,j))) @@ -256,7 +256,7 @@ bool PassiveChart::Parse() { } } const vector& dh = chart_(0, input_.size()); - for (int di = 0; di < dh.size(); ++di) { + for (unsigned di = 0; di < dh.size(); ++di) { const Hypergraph::Node& node = forest_->nodes_[dh[di]]; if (node.cat_ == goal_cat_) { Hypergraph::TailNodeVector ant(1, node.id_); @@ -272,7 +272,7 @@ bool PassiveChart::Parse() { } PassiveChart::~PassiveChart() { - for (int i = 0; i < act_chart_.size(); ++i) + for (unsigned i = 0; i < act_chart_.size(); ++i) delete act_chart_[i]; } diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 3daa85ac..43528405 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -519,7 +519,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx>=0) { if (l[idx].size()>0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *lfw++; + lfw++; } } idx-=l[idx][0].dist2next; @@ -528,7 +528,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *rfw++; + rfw++; } } idx+=l[idx][0].dist2next; @@ -787,7 +787,7 @@ bool Dwarf::generalizeOrientation(CountTable* table, const std::map void Reweight(const V& weights) { - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { Edge& e = edges_[i]; e.edge_prob_.logeq(e.feature_values_.dot(weights)); } diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 734c2ce8..3321558d 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -488,13 +488,13 @@ int getInt(const std::string& in, int &c) #define MAX_NODES 100000000 // parse ('foo', 0.23) void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { - if (get(in,c++) != '(') { assert(!"PCN/PLF parse error: expected ( at start of cn alt block\n"); } + if (get(in,c++) != '(') { cerr << "PCN/PLF parse error: expected (\n"; abort(); } vector ewords(2, 0); ewords[1] = TD::Convert(getEscapedString(in,c)); TRulePtr r(new TRule(ewords)); r->ComputeArity(); // cerr << "RULE: " << r->AsString() << endl; - if (get(in,c++) != ',') { cerr << in << endl; assert(!"PCN/PLF parse error: expected , after string\n"); } + if (get(in,c++) != ',') { cerr << in << endl; cerr << "PCN/PLF parse error: expected , after string\n"; abort(); } size_t cnNext = 1; std::vector probs; probs.push_back(getFloat(in,c)); @@ -508,10 +508,9 @@ void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { if (probs.size()>1) { cnNext = static_cast(probs.back()); probs.pop_back(); - if (cnNext < 1) { cerr << cnNext << endl; - assert(!"PCN/PLF parse error: bad link length at last element of cn alt block\n"); } + if (cnNext < 1) { cerr << cnNext << endl << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; abort(); } } - if (get(in,c++) != ')') { assert(!"PCN/PLF parse error: expected ) at end of cn alt block\n"); } + if (get(in,c++) != ')') { cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; abort(); } eatws(in,c); Hypergraph::TailNodeVector tail(1, cur_node); Hypergraph::Edge* edge = hg->AddEdge(r, tail); diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index dc96f1a9..2ded328d 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -31,24 +31,24 @@ template WeightType Inside(const Hypergraph& hg, std::vector* result = NULL, const WeightFunction& weight = WeightFunction()) { - const int num_nodes = hg.nodes_.size(); + const unsigned num_nodes = hg.nodes_.size(); std::vector dummy; std::vector& inside_score = result ? *result : dummy; inside_score.clear(); inside_score.resize(num_nodes); // std::fill(inside_score.begin(), inside_score.end(), WeightType()); // clear handles - for (int i = 0; i < num_nodes; ++i) { + for (unsigned i = 0; i < num_nodes; ++i) { WeightType* const cur_node_inside_score = &inside_score[i]; Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; - const int num_in_edges = in.size(); + const unsigned num_in_edges = in.size(); if (num_in_edges == 0) { *cur_node_inside_score = WeightType(1); //FIXME: why not call weight(edge) instead? continue; } - for (int j = 0; j < num_in_edges; ++j) { + for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); - for (int k = 0; k < edge.tail_nodes_.size(); ++k) { + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) { const int tail_node_index = edge.tail_nodes_[k]; score *= inside_score[tail_node_index]; } diff --git a/decoder/kbest.h b/decoder/kbest.h index 03a8311c..9af3a20e 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -43,7 +43,7 @@ namespace KBest { traverse(tf), w(wf), g(hg), nds(g.nodes_.size()), k_prime(k) {} ~KBestDerivations() { - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } @@ -86,7 +86,7 @@ namespace KBest { // Hypergraph::Edge const * operator ->() const { return d->edge; } }; - EdgeHandle operator()(int t,int taili,EdgeHandle const& parent) const { + EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { return EdgeHandle(nds[t].D[parent.d->j[taili]]); } @@ -98,7 +98,7 @@ namespace KBest { size_t operator()(const Derivation* d) const { size_t x = 5381; x = ((x << 5) + x) ^ d->edge->id_; - for (int i = 0; i < d->j.size(); ++i) + for (unsigned i = 0; i < d->j.size(); ++i) x = ((x << 5) + x) ^ d->j[i]; return x; } @@ -121,7 +121,7 @@ namespace KBest { explicit NodeDerivationState(const DerivationFilter& f = DerivationFilter()) : filter(f) {} }; - Derivation* LazyKthBest(int v, int k) { + Derivation* LazyKthBest(unsigned v, unsigned k) { NodeDerivationState& s = GetCandidates(v); CandidateHeap& cand = s.cand; DerivationList& D = s.D; @@ -139,7 +139,7 @@ namespace KBest { Derivation* d = cand.back(); cand.pop_back(); std::vector ants(d->edge->Arity()); - for (int j = 0; j < ants.size(); ++j) + for (unsigned j = 0; j < ants.size(); ++j) ants[j] = &LazyKthBest(d->edge->tail_nodes_[j], d->j[j])->yield; traverse(*d->edge, ants, &d->yield); if (!filter(d->yield)) { @@ -171,12 +171,12 @@ namespace KBest { return freelist.back(); } - NodeDerivationState& GetCandidates(int v) { + NodeDerivationState& GetCandidates(unsigned v) { NodeDerivationState& s = nds[v]; if (!s.D.empty() || !s.cand.empty()) return s; const Hypergraph::Node& node = g.nodes_[v]; - for (int i = 0; i < node.in_edges_.size(); ++i) { + for (unsigned i = 0; i < node.in_edges_.size(); ++i) { const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; SmallVectorInt jv(edge.Arity(), 0); Derivation* d = CreateDerivation(edge, jv); @@ -184,7 +184,7 @@ namespace KBest { s.cand.push_back(d); } - const int effective_k = std::min(k_prime, s.cand.size()); + const unsigned effective_k = std::min(k_prime, s.cand.size()); const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k; std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare()); s.cand.resize(effective_k); @@ -194,7 +194,7 @@ namespace KBest { } void LazyNext(const Derivation* d, CandidateHeap* cand, UniqueDerivationSet* ds) { - for (int i = 0; i < d->j.size(); ++i) { + for (unsigned i = 0; i < d->j.size(); ++i) { SmallVectorInt j = d->j; ++j[i]; const Derivation* ant = LazyKthBest(d->edge->tail_nodes_[i], j[i]); @@ -205,8 +205,12 @@ namespace KBest { if (new_d) { cand->push_back(new_d); std::push_heap(cand->begin(), cand->end(), HeapCompare()); +#ifdef NDEBUG + ds->insert(new_d).second; // insert into uniqueness set +#else bool inserted = ds->insert(new_d).second; // insert into uniqueness set assert(inserted); +#endif } } } diff --git a/decoder/trule.cc b/decoder/trule.cc index 141b8faa..5ebc4c16 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -18,7 +18,7 @@ bool TRule::IsGoal() const { } static WordID ConvertTrgString(const string& w) { - int len = w.size(); + const unsigned len = w.size(); WordID id = 0; // [X,0] or [0] // for target rules, we ignore the category, just keep the index @@ -33,7 +33,7 @@ static WordID ConvertTrgString(const string& w) { } static WordID ConvertSrcString(const string& w, bool mono = false) { - int len = w.size(); + const unsigned len = w.size(); // [X,0] // for source rules, we keep the category and ignore the index (source rules are // always numbered 1, 2, 3... @@ -60,7 +60,7 @@ static WordID ConvertSrcString(const string& w, bool mono = false) { static WordID ConvertLHS(const string& w) { if (w[0] == '[') { - int len = w.size(); + const unsigned len = w.size(); if (len < 3) { cerr << "Format error: " << w << endl; exit(1); } return TD::Convert(w.substr(1, len-2)) * -1; } else { @@ -143,15 +143,15 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { string ss; getline(is, ss); //cerr << "L: " << ss << endl; - int start = 0; - int len = ss.size(); + unsigned start = 0; + unsigned len = ss.size(); const size_t ppos = ss.find(" |||"); if (ppos != string::npos) { len = ppos; } while (start < len) { while(start < len && (ss[start] == ' ' || ss[start] == ';')) ++start; if (start == len) break; - int end = start + 1; + unsigned end = start + 1; while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';')) ++end; if (end == len || ss[end] == ' ' || ss[end] == ';') { @@ -188,7 +188,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } f_ = e_; int x = ConvertLHS("[X]"); - for (int i = 0; i < f_.size(); ++i) + for (unsigned i = 0; i < f_.size(); ++i) if (f_[i] <= 0) { f_[i] = x; } } else { cerr << "F: " << format << endl; @@ -197,7 +197,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { if (mono) { e_ = f_; int ci = 0; - for (int i = 0; i < e_.size(); ++i) + for (unsigned i = 0; i < e_.size(); ++i) if (e_[i] < 0) e_[i] = ci--; } @@ -208,7 +208,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { bool TRule::SanityCheck() const { vector used(f_.size(), 0); int ac = 0; - for (int i = 0; i < e_.size(); ++i) { + for (unsigned i = 0; i < e_.size(); ++i) { int ind = e_[i]; if (ind > 0) continue; ind = -ind; @@ -238,7 +238,7 @@ string TRule::AsString(bool verbose) const { if (lhs_ && verbose) { os << '[' << TD::Convert(lhs_ * -1) << "] |||"; } - for (int i = 0; i < f_.size(); ++i) { + for (unsigned i = 0; i < f_.size(); ++i) { const WordID& w = f_[i]; if (w < 0) { int wi = w * -1; @@ -249,7 +249,7 @@ string TRule::AsString(bool verbose) const { } } os << " ||| "; - for (int i =0; i