From 570ba076cbe3b12c56b281da7c1892972e8598f1 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 23 May 2012 18:02:48 -0400 Subject: more bjam stuff, more cleanup --- decoder/bottom_up_parser.cc | 24 ++++++++++++------------ decoder/ff_dwarf.cc | 6 +++--- decoder/hg.h | 2 +- decoder/hg_io.cc | 9 ++++----- decoder/inside_outside.h | 10 +++++----- decoder/kbest.h | 22 +++++++++++++--------- decoder/trule.cc | 24 ++++++++++++------------ 7 files changed, 50 insertions(+), 47 deletions(-) (limited to 'decoder') diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 63939221..1f262747 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -154,7 +154,7 @@ PassiveChart::PassiveChart(const string& goal, goal_idx_(-1), lc_fid_(FD::Convert("LatticeCost")) { act_chart_.resize(grammars_.size()); - for (int i = 0; i < grammars_.size(); ++i) + for (unsigned i = 0; i < grammars_.size(); ++i) act_chart_[i] = new ActiveChart(forest, *this); if (!kGOAL) kGOAL = TD::Convert("Goal") * -1; if (!SILENT) cerr << " Goal category: [" << goal << ']' << endl; @@ -204,12 +204,12 @@ void PassiveChart::ApplyRules(const int i, void PassiveChart::ApplyUnaryRules(const int i, const int j) { const vector& nodes = chart_(i,j); // reference is important! - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { if (!grammars_[gi]->HasRuleForSpan(i,j,input_.Distance(i,j))) continue; - for (int di = 0; di < nodes.size(); ++di) { + for (unsigned di = 0; di < nodes.size(); ++di) { const WordID& cat = forest_->nodes_[nodes[di]].cat_; const vector& unaries = grammars_[gi]->GetUnaryRulesForRHS(cat); - for (int ri = 0; ri < unaries.size(); ++ri) { + for (unsigned ri = 0; ri < unaries.size(); ++ri) { // cerr << "At (" << i << "," << j << "): applying " << unaries[ri]->AsString() << endl; const Hypergraph::TailNodeVector ant(1, nodes[di]); ApplyRule(i, j, unaries[ri], ant, 0); // may update nodes @@ -224,15 +224,15 @@ bool PassiveChart::Parse() { size_t res = min(static_cast(2000000), static_cast(in_size_2 * 1000)); forest_->edges_.reserve(res); goal_idx_ = -1; - for (int gi = 0; gi < grammars_.size(); ++gi) + for (unsigned gi = 0; gi < grammars_.size(); ++gi) act_chart_[gi]->SeedActiveChart(*grammars_[gi]); if (!SILENT) cerr << " "; - for (int l=1; lAdvanceDotsForAllItemsInCell(i, j, input_); @@ -248,7 +248,7 @@ bool PassiveChart::Parse() { } ApplyUnaryRules(i,j); - for (int gi = 0; gi < grammars_.size(); ++gi) { + for (unsigned gi = 0; gi < grammars_.size(); ++gi) { const Grammar& g = *grammars_[gi]; // deal with non-terminals that were just proved if (g.HasRuleForSpan(i, j, input_.Distance(i,j))) @@ -256,7 +256,7 @@ bool PassiveChart::Parse() { } } const vector& dh = chart_(0, input_.size()); - for (int di = 0; di < dh.size(); ++di) { + for (unsigned di = 0; di < dh.size(); ++di) { const Hypergraph::Node& node = forest_->nodes_[dh[di]]; if (node.cat_ == goal_cat_) { Hypergraph::TailNodeVector ant(1, node.id_); @@ -272,7 +272,7 @@ bool PassiveChart::Parse() { } PassiveChart::~PassiveChart() { - for (int i = 0; i < act_chart_.size(); ++i) + for (unsigned i = 0; i < act_chart_.size(); ++i) delete act_chart_[i]; } diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 3daa85ac..43528405 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -519,7 +519,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx>=0) { if (l[idx].size()>0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *lfw++; + lfw++; } } idx-=l[idx][0].dist2next; @@ -528,7 +528,7 @@ void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const m while (idx0) { if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { - *rfw++; + rfw++; } } idx+=l[idx][0].dist2next; @@ -787,7 +787,7 @@ bool Dwarf::generalizeOrientation(CountTable* table, const std::map void Reweight(const V& weights) { - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { Edge& e = edges_[i]; e.edge_prob_.logeq(e.feature_values_.dot(weights)); } diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 734c2ce8..3321558d 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -488,13 +488,13 @@ int getInt(const std::string& in, int &c) #define MAX_NODES 100000000 // parse ('foo', 0.23) void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { - if (get(in,c++) != '(') { assert(!"PCN/PLF parse error: expected ( at start of cn alt block\n"); } + if (get(in,c++) != '(') { cerr << "PCN/PLF parse error: expected (\n"; abort(); } vector ewords(2, 0); ewords[1] = TD::Convert(getEscapedString(in,c)); TRulePtr r(new TRule(ewords)); r->ComputeArity(); // cerr << "RULE: " << r->AsString() << endl; - if (get(in,c++) != ',') { cerr << in << endl; assert(!"PCN/PLF parse error: expected , after string\n"); } + if (get(in,c++) != ',') { cerr << in << endl; cerr << "PCN/PLF parse error: expected , after string\n"; abort(); } size_t cnNext = 1; std::vector probs; probs.push_back(getFloat(in,c)); @@ -508,10 +508,9 @@ void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) { if (probs.size()>1) { cnNext = static_cast(probs.back()); probs.pop_back(); - if (cnNext < 1) { cerr << cnNext << endl; - assert(!"PCN/PLF parse error: bad link length at last element of cn alt block\n"); } + if (cnNext < 1) { cerr << cnNext << endl << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; abort(); } } - if (get(in,c++) != ')') { assert(!"PCN/PLF parse error: expected ) at end of cn alt block\n"); } + if (get(in,c++) != ')') { cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; abort(); } eatws(in,c); Hypergraph::TailNodeVector tail(1, cur_node); Hypergraph::Edge* edge = hg->AddEdge(r, tail); diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index dc96f1a9..2ded328d 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -31,24 +31,24 @@ template WeightType Inside(const Hypergraph& hg, std::vector* result = NULL, const WeightFunction& weight = WeightFunction()) { - const int num_nodes = hg.nodes_.size(); + const unsigned num_nodes = hg.nodes_.size(); std::vector dummy; std::vector& inside_score = result ? *result : dummy; inside_score.clear(); inside_score.resize(num_nodes); // std::fill(inside_score.begin(), inside_score.end(), WeightType()); // clear handles - for (int i = 0; i < num_nodes; ++i) { + for (unsigned i = 0; i < num_nodes; ++i) { WeightType* const cur_node_inside_score = &inside_score[i]; Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; - const int num_in_edges = in.size(); + const unsigned num_in_edges = in.size(); if (num_in_edges == 0) { *cur_node_inside_score = WeightType(1); //FIXME: why not call weight(edge) instead? continue; } - for (int j = 0; j < num_in_edges; ++j) { + for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); - for (int k = 0; k < edge.tail_nodes_.size(); ++k) { + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) { const int tail_node_index = edge.tail_nodes_[k]; score *= inside_score[tail_node_index]; } diff --git a/decoder/kbest.h b/decoder/kbest.h index 03a8311c..9af3a20e 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -43,7 +43,7 @@ namespace KBest { traverse(tf), w(wf), g(hg), nds(g.nodes_.size()), k_prime(k) {} ~KBestDerivations() { - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } @@ -86,7 +86,7 @@ namespace KBest { // Hypergraph::Edge const * operator ->() const { return d->edge; } }; - EdgeHandle operator()(int t,int taili,EdgeHandle const& parent) const { + EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { return EdgeHandle(nds[t].D[parent.d->j[taili]]); } @@ -98,7 +98,7 @@ namespace KBest { size_t operator()(const Derivation* d) const { size_t x = 5381; x = ((x << 5) + x) ^ d->edge->id_; - for (int i = 0; i < d->j.size(); ++i) + for (unsigned i = 0; i < d->j.size(); ++i) x = ((x << 5) + x) ^ d->j[i]; return x; } @@ -121,7 +121,7 @@ namespace KBest { explicit NodeDerivationState(const DerivationFilter& f = DerivationFilter()) : filter(f) {} }; - Derivation* LazyKthBest(int v, int k) { + Derivation* LazyKthBest(unsigned v, unsigned k) { NodeDerivationState& s = GetCandidates(v); CandidateHeap& cand = s.cand; DerivationList& D = s.D; @@ -139,7 +139,7 @@ namespace KBest { Derivation* d = cand.back(); cand.pop_back(); std::vector ants(d->edge->Arity()); - for (int j = 0; j < ants.size(); ++j) + for (unsigned j = 0; j < ants.size(); ++j) ants[j] = &LazyKthBest(d->edge->tail_nodes_[j], d->j[j])->yield; traverse(*d->edge, ants, &d->yield); if (!filter(d->yield)) { @@ -171,12 +171,12 @@ namespace KBest { return freelist.back(); } - NodeDerivationState& GetCandidates(int v) { + NodeDerivationState& GetCandidates(unsigned v) { NodeDerivationState& s = nds[v]; if (!s.D.empty() || !s.cand.empty()) return s; const Hypergraph::Node& node = g.nodes_[v]; - for (int i = 0; i < node.in_edges_.size(); ++i) { + for (unsigned i = 0; i < node.in_edges_.size(); ++i) { const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; SmallVectorInt jv(edge.Arity(), 0); Derivation* d = CreateDerivation(edge, jv); @@ -184,7 +184,7 @@ namespace KBest { s.cand.push_back(d); } - const int effective_k = std::min(k_prime, s.cand.size()); + const unsigned effective_k = std::min(k_prime, s.cand.size()); const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k; std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare()); s.cand.resize(effective_k); @@ -194,7 +194,7 @@ namespace KBest { } void LazyNext(const Derivation* d, CandidateHeap* cand, UniqueDerivationSet* ds) { - for (int i = 0; i < d->j.size(); ++i) { + for (unsigned i = 0; i < d->j.size(); ++i) { SmallVectorInt j = d->j; ++j[i]; const Derivation* ant = LazyKthBest(d->edge->tail_nodes_[i], j[i]); @@ -205,8 +205,12 @@ namespace KBest { if (new_d) { cand->push_back(new_d); std::push_heap(cand->begin(), cand->end(), HeapCompare()); +#ifdef NDEBUG + ds->insert(new_d).second; // insert into uniqueness set +#else bool inserted = ds->insert(new_d).second; // insert into uniqueness set assert(inserted); +#endif } } } diff --git a/decoder/trule.cc b/decoder/trule.cc index 141b8faa..5ebc4c16 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -18,7 +18,7 @@ bool TRule::IsGoal() const { } static WordID ConvertTrgString(const string& w) { - int len = w.size(); + const unsigned len = w.size(); WordID id = 0; // [X,0] or [0] // for target rules, we ignore the category, just keep the index @@ -33,7 +33,7 @@ static WordID ConvertTrgString(const string& w) { } static WordID ConvertSrcString(const string& w, bool mono = false) { - int len = w.size(); + const unsigned len = w.size(); // [X,0] // for source rules, we keep the category and ignore the index (source rules are // always numbered 1, 2, 3... @@ -60,7 +60,7 @@ static WordID ConvertSrcString(const string& w, bool mono = false) { static WordID ConvertLHS(const string& w) { if (w[0] == '[') { - int len = w.size(); + const unsigned len = w.size(); if (len < 3) { cerr << "Format error: " << w << endl; exit(1); } return TD::Convert(w.substr(1, len-2)) * -1; } else { @@ -143,15 +143,15 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { string ss; getline(is, ss); //cerr << "L: " << ss << endl; - int start = 0; - int len = ss.size(); + unsigned start = 0; + unsigned len = ss.size(); const size_t ppos = ss.find(" |||"); if (ppos != string::npos) { len = ppos; } while (start < len) { while(start < len && (ss[start] == ' ' || ss[start] == ';')) ++start; if (start == len) break; - int end = start + 1; + unsigned end = start + 1; while(end < len && (ss[end] != '=' && ss[end] != ' ' && ss[end] != ';')) ++end; if (end == len || ss[end] == ' ' || ss[end] == ';') { @@ -188,7 +188,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { while(is>>w && w!="|||") { e_.push_back(ConvertTrgString(w)); } f_ = e_; int x = ConvertLHS("[X]"); - for (int i = 0; i < f_.size(); ++i) + for (unsigned i = 0; i < f_.size(); ++i) if (f_[i] <= 0) { f_[i] = x; } } else { cerr << "F: " << format << endl; @@ -197,7 +197,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { if (mono) { e_ = f_; int ci = 0; - for (int i = 0; i < e_.size(); ++i) + for (unsigned i = 0; i < e_.size(); ++i) if (e_[i] < 0) e_[i] = ci--; } @@ -208,7 +208,7 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { bool TRule::SanityCheck() const { vector used(f_.size(), 0); int ac = 0; - for (int i = 0; i < e_.size(); ++i) { + for (unsigned i = 0; i < e_.size(); ++i) { int ind = e_[i]; if (ind > 0) continue; ind = -ind; @@ -238,7 +238,7 @@ string TRule::AsString(bool verbose) const { if (lhs_ && verbose) { os << '[' << TD::Convert(lhs_ * -1) << "] |||"; } - for (int i = 0; i < f_.size(); ++i) { + for (unsigned i = 0; i < f_.size(); ++i) { const WordID& w = f_[i]; if (w < 0) { int wi = w * -1; @@ -249,7 +249,7 @@ string TRule::AsString(bool verbose) const { } } os << " ||| "; - for (int i =0; i Date: Wed, 23 May 2012 18:04:38 -0400 Subject: forgotten file --- decoder/cfg.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'decoder') diff --git a/decoder/cfg.cc b/decoder/cfg.cc index cd7e66e9..d6ee651a 100644 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -229,13 +229,13 @@ template <> struct null_for { static RHS null; }; -*/ template <> -BinRhs null_traits::null(std::numeric_limits::min(),std::numeric_limits::min()); +BinRhs null_traits::xnull(std::numeric_limits::min(),std::numeric_limits::min()); template <> -RHS null_traits::null(1,std::numeric_limits::min()); +RHS null_traits::xnull(1,std::numeric_limits::min()); +*/ template struct add_virtual_rules { @@ -250,7 +250,7 @@ struct add_virtual_rules { R2L rhs2lhs; // an rhs maps to this -virtntid, or original id if length 1 bool name_nts; add_virtual_rules(CFG &cfg,bool name_nts=false) : nts(cfg.nts),rules(cfg.rules),newnt(-nts.size()),newruleid(rules.size()),name_nts(name_nts) { - HASH_MAP_EMPTY(rhs2lhs,null_traits::null); + HASH_MAP_EMPTY(rhs2lhs,null_traits::xnull); } NTHandle get_virt(Rhs const& r) { NTHandle nt=get_default(rhs2lhs,r,newnt); -- cgit v1.2.3 From d92ed0e6a2a3563ae0a69e3000a358962de8e96d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 25 May 2012 12:46:24 -0400 Subject: fix bjam build --- decoder/ff_bleu.h | 1 - decoder/ff_lm.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'decoder') diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index e93731c3..5544920e 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -6,7 +6,6 @@ #include "hg.h" #include "ff.h" -#include "config.h" class BLEUModelImpl; diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 8885efce..ccee4268 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -6,7 +6,9 @@ #include "hg.h" #include "ff.h" +#ifdef HAVE_CONFIG_H #include "config.h" +#endif // everything in this file is deprecated and may be broken. // Chris Dyer, Mar 2011 -- cgit v1.2.3 From 213de03f50676d9930ea26c853623111fd758b67 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 May 2012 13:27:02 -0700 Subject: And actually allow ff_klm.cc to load RestProbingModel --- decoder/ff_klm.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'decoder') diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index a4b26f7c..7a84add7 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -373,15 +373,17 @@ boost::shared_ptr KLanguageModelFactory::Create(std::string par if (!RecognizeBinary(filename.c_str(), m)) m = HASH_PROBING; switch (m) { - case HASH_PROBING: + case PROBING: return CreateModel(param); - case TRIE_SORTED: + case REST_PROBING: + return CreateModel(param); + case TRIE: return CreateModel(param); - case ARRAY_TRIE_SORTED: + case ARRAY_TRIE: return CreateModel(param); - case QUANT_TRIE_SORTED: + case QUANT_TRIE: return CreateModel(param); - case QUANT_ARRAY_TRIE_SORTED: + case QUANT_ARRAY_TRIE: return CreateModel(param); default: UTIL_THROW(util::Exception, "Unrecognized kenlm binary file type " << (unsigned)m); -- cgit v1.2.3 From bfa5c4866101161c5fb20220d335c80ed075ae0a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 27 May 2012 15:34:44 -0400 Subject: clean up --- decoder/bottom_up_parser.cc | 2 +- decoder/hg.cc | 132 ++++++++++++------------- decoder/hg.h | 6 +- decoder/hg_intersect.cc | 28 +++--- decoder/hg_io.cc | 4 +- decoder/inside_outside.h | 2 +- decoder/maxtrans_blunsom.cc | 28 +++--- decoder/scfg_translator.cc | 14 +-- decoder/trule.cc | 2 + decoder/trule.h | 4 +- phrasinator/Makefile.am | 2 +- phrasinator/ccrp_nt.h | 170 -------------------------------- phrasinator/gibbs_train_plm.cc | 18 ++-- phrasinator/gibbs_train_plm.notables.cc | 24 ++--- utils/Makefile.am | 40 ++++---- utils/alignment_io.cc | 10 +- utils/alignment_io.h | 8 +- utils/array2d.h | 46 ++++----- utils/atools.cc | 44 ++++----- utils/ccrp.h | 2 +- utils/ccrp_nt.h | 2 +- utils/fast_sparse_vector.h | 86 ++++++++-------- utils/mfcr_test.cc | 14 +-- utils/sampler.h | 5 +- utils/small_vector.h | 1 + utils/sparse_vector.cc | 6 +- utils/stringlib.h | 2 +- utils/tdict.cc | 2 +- utils/weights.cc | 16 +-- 29 files changed, 270 insertions(+), 450 deletions(-) delete mode 100644 phrasinator/ccrp_nt.h (limited to 'decoder') diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 1f262747..ed79aaf0 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -84,7 +84,7 @@ class ActiveChart { const GrammarIter* ni = gptr_->Extend(symbol); if (!ni) return; Hypergraph::TailNodeVector na(ant_nodes_.size() + 1); - for (int i = 0; i < ant_nodes_.size(); ++i) + for (unsigned i = 0; i < ant_nodes_.size(); ++i) na[i] = ant_nodes_[i]; na[ant_nodes_.size()] = node_index; out_cell->push_back(ActiveItem(ni, na, lattice_cost)); diff --git a/decoder/hg.cc b/decoder/hg.cc index 180986d7..0dcbe91f 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -56,7 +56,7 @@ struct less_ve { Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev) { - for (int i=0;iresize(ne); - for (int i=0;i* posts) co SparseVector, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); return inside; } @@ -175,7 +175,7 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { SparseVector, ViterbiTransitionEventWeightFunction>(*this, &pv); post->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*post)[i] = pv.value(i).v_; return viterbi_weight.v_; } @@ -183,12 +183,12 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { void Hypergraph::PushWeightsToSource(double scale) { vector posts; ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) z += posts[node.out_edges_[j]]; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { edges_[node.out_edges_[j]].edge_prob_ = posts[node.out_edges_[j]] / z; } } @@ -201,7 +201,7 @@ struct vpusher : public vector { void operator()(int n,int /*ei*/,Hypergraph::Edge &e) const { Hypergraph::TailNodeVector const& t=e.tail_nodes_; prob_t p=e.edge_prob_; - for (int i=0;i posts; const prob_t inside_z = ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.in_edges_.size(); ++j) + for (unsigned j = 0; j < node.in_edges_.size(); ++j) z += posts[node.in_edges_[j]]; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { edges_[node.in_edges_[j]].edge_prob_ = posts[node.in_edges_[j]] / z; } } @@ -257,7 +257,7 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith if (run_inside_algorithm) { const EdgeExistsWeightFunction wf(prune_edge); vector reachable; - bool goal_derivable = Inside/* */(*this, &reachable, wf); + bool goal_derivable = Inside(*this, &reachable, wf); if (!goal_derivable) { edges_.clear(); nodes_.clear(); @@ -266,11 +266,11 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith } assert(reachable.size() == nodes_.size()); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { bool prune = prune_edge[i]; if (!prune) { const Edge& edge = edges_[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { if (!reachable[edge.tail_nodes_[j]]) { prune = true; break; @@ -299,7 +299,7 @@ void Hypergraph::MarginPrune(vector const& io,prob_t cutoff,vector cerr<<"Finishing prune for "< best) best = mm[i]; } prob_t beam_cut=best*prob_t::exp(-alpha); @@ -386,10 +386,10 @@ void Hypergraph::PrintGraphviz() const { << "\" shape=\"rect\"];\n"; Hypergraph::TailNodeVector indorder(edge.tail_nodes_.size(), 0); int ntc = 0; - for (int i = 0; i < edge.rule_->e_.size(); ++i) { + for (unsigned i = 0; i < edge.rule_->e_.size(); ++i) { if (edge.rule_->e_[i] <= 0) indorder[ntc++] = 1 + (-1 * edge.rule_->e_[i]); } - for (int i = 0; i < edge.tail_nodes_.size(); ++i) { + for (unsigned i = 0; i < edge.tail_nodes_.size(); ++i) { cerr << " " << edge.tail_nodes_[i] << " -> A_" << ei; if (edge.tail_nodes_.size() > 1) { cerr << " [label=\"" << indorder[i] << "\"]"; @@ -414,8 +414,8 @@ void Hypergraph::PrintGraphviz() const { void Hypergraph::Union(const Hypergraph& other) { if (&other == this) return; if (nodes_.empty()) { nodes_ = other.nodes_; edges_ = other.edges_; return; } - int noff = nodes_.size(); - int eoff = edges_.size(); + unsigned noff = nodes_.size(); + unsigned eoff = edges_.size(); int ogoal = other.nodes_.size() - 1; int cgoal = noff - 1; // keep a single goal node, so add nodes.size - 1 @@ -428,15 +428,15 @@ void Hypergraph::Union(const Hypergraph& other) { Node& cn = nodes_[i + noff]; cn.id_ = i + noff; cn.in_edges_.resize(on.in_edges_.size()); - for (int j = 0; j < on.in_edges_.size(); ++j) + for (unsigned j = 0; j < on.in_edges_.size(); ++j) cn.in_edges_[j] = on.in_edges_[j] + eoff; cn.out_edges_.resize(on.out_edges_.size()); - for (int j = 0; j < on.out_edges_.size(); ++j) + for (unsigned j = 0; j < on.out_edges_.size(); ++j) cn.out_edges_[j] = on.out_edges_[j] + eoff; } - for (int i = 0; i < other.edges_.size(); ++i) { + for (unsigned i = 0; i < other.edges_.size(); ++i) { const Edge& oe = other.edges_[i]; Edge& ce = edges_[i + eoff]; ce.id_ = i + eoff; @@ -449,7 +449,7 @@ void Hypergraph::Union(const Hypergraph& other) { ce.head_node_ = oe.head_node_ + noff; } ce.tail_nodes_.resize(oe.tail_nodes_.size()); - for (int j = 0; j < oe.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j) ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff; } @@ -460,16 +460,6 @@ void Hypergraph::PruneUnreachable(int goal_node_id) { TopologicallySortNodesAndEdges(goal_node_id, NULL); } -void Hypergraph::RemoveNoncoaccessibleStates(int goal_node_id) { - if (goal_node_id < 0) goal_node_id += nodes_.size(); - assert(goal_node_id >= 0); - assert(goal_node_id < nodes_.size()); - - // I don't get it: does TopologicallySortNodesAndEdges not remove things that don't connect to goal_index? it uses goal_index just to order things? InsideOutside pruning can do this anyway (nearly infinite beam, viterbi semiring) - // TODO finish implementation - abort(); -} - struct DFSContext { int node; int edge_iter; @@ -559,7 +549,7 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, } #ifndef HG_EDGES_TOPO_SORTED int ec = 0; - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { int& cp = reloc_edge[i]; if (cp >= 0) { cp = ec++; } } @@ -576,34 +566,34 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, cerr << endl; #endif bool no_op = true; - for (int i = 0; i < reloc_node.size() && no_op; ++i) - if (reloc_node[i] != i) no_op = false; - for (int i = 0; i < reloc_edge.size() && no_op; ++i) - if (reloc_edge[i] != i) no_op = false; + for (unsigned i = 0; i < reloc_node.size() && no_op; ++i) + if (reloc_node[i] != static_cast(i)) no_op = false; + for (unsigned i = 0; i < reloc_edge.size() && no_op; ++i) + if (reloc_edge[i] != static_cast(i)) no_op = false; if (no_op) return; - for (int i = 0; i < reloc_node.size(); ++i) { + for (unsigned i = 0; i < reloc_node.size(); ++i) { Node& node = nodes_[i]; node.id_ = reloc_node[i]; int c = 0; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { const int new_index = reloc_edge[node.in_edges_[j]]; if (new_index >= 0) node.in_edges_[c++] = new_index; } node.in_edges_.resize(c); c = 0; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { const int new_index = reloc_edge[node.out_edges_[j]]; if (new_index >= 0) node.out_edges_[c++] = new_index; } node.out_edges_.resize(c); } - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { Edge& edge = edges_[i]; edge.id_ = reloc_edge[i]; edge.head_node_ = reloc_node[edge.head_node_]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) edge.tail_nodes_[j] = reloc_node[edge.tail_nodes_[j]]; } edges_.erase(remove_if(edges_.begin(), edges_.end(), BadId()), edges_.end()); @@ -623,7 +613,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); } vector kill(edges_.size(), false); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { const Edge& edge = edges_[i]; if (edge.tail_nodes_.empty() && edge.rule_->f_.size() == 1 && @@ -637,7 +627,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { // same sequence via different paths through the input forest // this needs to be investigated and fixed } else { - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_; // cerr << "PROMOTED " << edge.feature_values_ << endl; } @@ -646,19 +636,19 @@ void Hypergraph::EpsilonRemove(WordID eps) { } bool created_eps = false; PruneEdges(kill); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Node& node = nodes_[i]; if (node.in_edges_.empty()) { - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { Edge& edge = edges_[node.out_edges_[j]]; if (edge.rule_->Arity() == 2) { assert(edge.rule_->f_.size() == 2); assert(edge.rule_->e_.size() == 2); edge.rule_ = kUnaryRule; - int cur = node.id_; + unsigned cur = node.id_; int t = -1; assert(edge.tail_nodes_.size() == 2); - for (int i = 0; i < 2; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } + for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } assert(t != -1); edge.tail_nodes_.resize(1); edge.tail_nodes_[0] = t; @@ -712,14 +702,14 @@ HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges,NodeMask &kn) const { kn.clear(); kn.resize(nodes_.size()); - for (int n=0;nnodes_; - for (int i=0;iedges_; - for (int i=0;i(i)); + for (unsigned i = 0; i < nodes_.size(); ++i) + assert(nodes_[i].id_==static_cast(i)); } HypergraphP Hypergraph::CreateViterbiHypergraph(const vector* edges) const { @@ -796,15 +786,15 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector* edges) const set_ids(); # endif EdgeMask used(edges_.size()); - for (int i = 0; i < vit_edges.size(); ++i) + for (unsigned i = 0; i < vit_edges.size(); ++i) used[vit_edges[i]->id_]=true; return CreateEdgeSubset(used); #else map old2new_node; int num_new_nodes = 0; - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& edge = *vit_edges[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); if (old2new_node.count(edge.head_node_) == 0) { old2new_node[edge.head_node_] = num_new_nodes; ++num_new_nodes; @@ -820,7 +810,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector* edges) const new_node.id_ = it->second; } - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& old_edge = *vit_edges[i]; Edge& new_edge = out->edges_[i]; new_edge = old_edge; @@ -828,7 +818,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector* edges) const const int new_head_node = old2new_node[old_edge.head_node_]; new_edge.head_node_ = new_head_node; out->nodes_[new_head_node].in_edges_.push_back(i); - for (int j = 0; j < old_edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < old_edge.tail_nodes_.size(); ++j) { const int new_tail_node = old2new_node[old_edge.tail_nodes_[j]]; new_edge.tail_nodes_[j] = new_tail_node; out->nodes_[new_tail_node].out_edges_.push_back(i); diff --git a/decoder/hg.h b/decoder/hg.h index 5f6d57ab..91d25f01 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -43,7 +43,7 @@ public: Hypergraph() : is_linear_chain_(false) {} // SmallVector is a fast, small vector implementation for sizes <= 2 - typedef SmallVectorInt TailNodeVector; // indices in nodes_ + typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ typedef std::vector EdgesVector; // indices in edges_ // TODO get rid of cat_? @@ -457,8 +457,6 @@ public: void PruneUnreachable(int goal_node_id); // DEPRECATED - void RemoveNoncoaccessibleStates(int goal_node_id = -1); - // remove edges from the hypergraph if prune_edge[edge_id] is true // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept. void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false); @@ -524,7 +522,7 @@ public: template void visit_edges(V &v) { - for (int i=0;i, bool, boost::hash > > exists_; bool true_lattice; - RuleFilter(const Lattice& target, int max_phrase_size) { + RuleFilter(const Lattice& target, unsigned max_phrase_size) { true_lattice = false; - for (int i = 0; i < target.size(); ++i) { + for (unsigned i = 0; i < target.size(); ++i) { vector phrase; - int lim = min(static_cast(target.size()), i + max_phrase_size); - for (int j = i; j < lim; ++j) { + const unsigned lim = min(static_cast(target.size()), i + max_phrase_size); + for (unsigned j = i; j < lim; ++j) { if (target[j].size() > 1) { true_lattice = true; break; } phrase.push_back(target[j][0].label); exists_[phrase] = true; @@ -37,10 +37,10 @@ struct RuleFilter { // TODO do some smarter filtering for lattices if (true_lattice) return false; // don't filter "true lattice" input const vector& e = r.e(); - for (int i = 0; i < e.size(); ++i) { + for (unsigned i = 0; i < e.size(); ++i) { if (e[i] <= 0) continue; vector phrase; - for (int j = i; j < e.size(); ++j) { + for (unsigned j = i; j < e.size(); ++j) { if (e[j] <= 0) break; phrase.push_back(e[j]); if (exists_.count(phrase) == 0) return true; @@ -55,7 +55,7 @@ static bool FastLinearIntersect(const Lattice& target, Hypergraph* hg) { vector prune(hg->edges_.size(), false); set cov; map inverted_rules; - for (int i = 0; i < prune.size(); ++i) { + for (unsigned i = 0; i < prune.size(); ++i) { Hypergraph::Edge& edge = hg->edges_[i]; if (edge.Arity() == 0) { const int trg_index = edge.prev_i_; @@ -87,12 +87,12 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { vector rem(hg->edges_.size(), false); const RuleFilter filter(target, 15); // TODO make configurable - for (int i = 0; i < rem.size(); ++i) + for (unsigned i = 0; i < rem.size(); ++i) rem[i] = filter(*hg->edges_[i].rule_); hg->PruneEdges(rem, true); - const int nedges = hg->edges_.size(); - const int nnodes = hg->nodes_.size(); + const unsigned nedges = hg->edges_.size(); + const unsigned nnodes = hg->nodes_.size(); TextGrammar* g = new TextGrammar; GrammarPtr gp(g); @@ -100,7 +100,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { // each node in the translation forest becomes a "non-terminal" in the new // grammar, create the labels here const string kSEP = "_"; - for (int i = 0; i < nnodes; ++i) { + for (unsigned i = 0; i < nnodes; ++i) { const char* pstr = "CAT"; if (hg->nodes_[i].cat_ < 0) pstr = TD::Convert(-hg->nodes_[i].cat_); @@ -108,7 +108,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } // construct the grammar - for (int i = 0; i < nedges; ++i) { + for (unsigned i = 0; i < nedges; ++i) { const Hypergraph::Edge& edge = hg->edges_[i]; const vector& tgt = edge.rule_->e(); const vector& src = edge.rule_->f(); @@ -122,7 +122,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { e.resize(src.size()); // parses using the source side! Hypergraph::TailNodeVector tn(edge.tail_nodes_.size()); int ntc = 0; - for (int j = 0; j < tgt.size(); ++j) { + for (unsigned j = 0; j < tgt.size(); ++j) { const WordID& cur = tgt[j]; if (cur > 0) { f[j] = cur; @@ -133,7 +133,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } } ntc = 0; - for (int j = 0; j < src.size(); ++j) { + for (unsigned j = 0; j < src.size(); ++j) { const WordID& cur = src[j]; if (cur > 0) { e[j] = cur; diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 3321558d..bfb2fb80 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser { hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node); } } - void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorInt& tail) { + void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { Hypergraph::Edge* edge = hg.AddEdge(rule, tail); feats->swap(edge->feature_values_); edge->i_ = spans[0]; @@ -229,7 +229,7 @@ struct HGReader : public JSONParser { } string rp; string cat; - SmallVectorInt tail; + SmallVectorUnsigned tail; vector in_edges; TRulePtr cur_rule; map rules; diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index 2ded328d..bb7f9fcc 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -67,7 +67,7 @@ void Outside(const Hypergraph& hg, ) { assert(result); const int num_nodes = hg.nodes_.size(); - assert(inside_score.size() == num_nodes); + assert(static_cast(inside_score.size()) == num_nodes); std::vector& outside_score = *result; outside_score.clear(); outside_score.resize(num_nodes); diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc index 6efab454..774e4170 100644 --- a/decoder/maxtrans_blunsom.cc +++ b/decoder/maxtrans_blunsom.cc @@ -73,7 +73,7 @@ struct Candidate { prob_t p = prob_t::One(); // cerr << "\nEstimating application of " << in_edge.rule_->AsString() << endl; vector* > ants(tail.size()); - for (int i = 0; i < tail.size(); ++i) { + for (unsigned i = 0; i < tail.size(); ++i) { const Candidate& ant = *D[in_edge.tail_nodes_[i]][j_[i]]; ants[i] = &ant.state_; assert(ant.IsIncorporatedIntoHypergraph()); @@ -99,7 +99,7 @@ ostream& operator<<(ostream& os, const Candidate& cand) { else { os << "+LM_node=" << cand.node_index_; } os << " edge=" << cand.in_edge_->id_; os << " j=<"; - for (int i = 0; i < cand.j_.size(); ++i) + for (unsigned i = 0; i < cand.j_.size(); ++i) os << (i==0 ? "" : " ") << cand.j_[i]; os << "> vit=" << log(cand.inside_prob_); os << " est=" << log(cand.est_prob_); @@ -127,7 +127,7 @@ struct CandidateUniquenessHash { size_t operator()(const Candidate* c) const { size_t x = 5381; x = ((x << 5) + x) ^ c->in_edge_->id_; - for (int i = 0; i < c->j_.size(); ++i) + for (unsigned i = 0; i < c->j_.size(); ++i) x = ((x << 5) + x) ^ c->j_[i]; return x; } @@ -154,12 +154,12 @@ public: } void Apply() { - int num_nodes = in.nodes_.size(); - int goal_id = num_nodes - 1; - int pregoal = goal_id - 1; + const unsigned num_nodes = in.nodes_.size(); + const unsigned goal_id = num_nodes - 1; + const unsigned pregoal = goal_id - 1; assert(in.nodes_[pregoal].out_edges_.size() == 1); cerr << " "; - for (int i = 0; i < in.nodes_.size(); ++i) { + for (unsigned i = 0; i < in.nodes_.size(); ++i) { cerr << '.'; KBest(i, i == goal_id); } @@ -174,9 +174,9 @@ public: private: void FreeAll() { - for (int i = 0; i < D.size(); ++i) { + for (unsigned i = 0; i < D.size(); ++i) { CandidateList& D_i = D[i]; - for (int j = 0; j < D_i.size(); ++j) + for (unsigned j = 0; j < D_i.size(); ++j) delete D_i[j]; } D.clear(); @@ -216,7 +216,7 @@ public: CandidateList freelist; cand.reserve(in_edges.size()); UniqueCandidateSet unique_cands; - for (int i = 0; i < in_edges.size(); ++i) { + for (unsigned i = 0; i < in_edges.size(); ++i) { const Hypergraph::Edge& edge = in.edges_[in_edges[i]]; const JVector j(edge.tail_nodes_.size(), 0); cand.push_back(new Candidate(edge, j, D, is_goal)); @@ -242,20 +242,20 @@ public: sort(D_v.begin(), D_v.end(), EstProbSorter()); // cerr << " expanded to " << D_v.size() << " nodes\n"; - for (int i = 0; i < cand.size(); ++i) + for (unsigned i = 0; i < cand.size(); ++i) delete cand[i]; // freelist is necessary since even after an item merged, it still stays in // the unique set so it can't be deleted til now - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } void PushSucc(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* cs) { CandidateHeap& cand = *pcand; - for (int i = 0; i < item.j_.size(); ++i) { + for (unsigned i = 0; i < item.j_.size(); ++i) { JVector j = item.j_; ++j[i]; - if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) { + if (static_cast(j[i]) < D[item.in_edge_->tail_nodes_[i]].size()) { Candidate query_unique(*item.in_edge_, j); if (cs->count(&query_unique) == 0) { Candidate* new_cand = new Candidate(*item.in_edge_, j, D, is_goal); diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 15abb600..185f979a 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -33,7 +33,7 @@ struct SCFGTranslatorImpl { { if(conf.count("grammar")){ vector gfiles = conf["grammar"].as >(); - for (int i = 0; i < gfiles.size(); ++i) { + for (unsigned i = 0; i < gfiles.size(); ++i) { if (!SILENT) cerr << "Reading SCFG grammar from " << gfiles[i] << endl; TextGrammar* g = new TextGrammar(gfiles[i]); g->SetMaxSpan(max_span_limit); @@ -132,7 +132,7 @@ struct SCFGTranslatorImpl { g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } - for (int gi = 0; gi < glist.size(); ++gi) { + for (unsigned gi = 0; gi < glist.size(); ++gi) { if(printGrammarsUsed) cerr << "Using grammar::" << glist[gi]->GetGrammarName() << endl; } @@ -147,7 +147,7 @@ struct SCFGTranslatorImpl { forest->Reweight(weights); if (use_ctf_) { Hypergraph::Node& goal_node = *(forest->nodes_.end()-1); - foreach(int edge_id, goal_node.in_edges_) + foreach(unsigned edge_id, goal_node.in_edges_) RefineRule(forest->edges_[edge_id].rule_, ctf_iterations_); double alpha = ctf_alpha_; bool found_parse=false; @@ -155,7 +155,7 @@ struct SCFGTranslatorImpl { cerr << "Coarse-to-fine source parse, alpha=" << alpha << endl; found_parse = true; Hypergraph refined_forest = *forest; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { cerr << viterbi_stats(refined_forest," Coarse forest",true,show_tree_structure_); cerr << " Iteration " << (j+1) << ": Pruning forest... "; refined_forest.BeamPruneInsideOutside(1.0, false, alpha, NULL); @@ -178,7 +178,7 @@ struct SCFGTranslatorImpl { if (!found_parse){ if (ctf_exhaustive_){ cerr << "Last resort: refining coarse forest without pruning..."; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { if (RefineForest(forest)){ cerr << " Refinement succeeded." << endl; forest->Reweight(weights); @@ -213,7 +213,7 @@ struct SCFGTranslatorImpl { Hypergraph::Edge& edge = forest->edges_[edge_id]; std::vector nt_positions; TRulePtr& coarse_rule_ptr = edge.rule_; - for(int i=0; i< coarse_rule_ptr->f_.size(); ++i){ + for(unsigned i=0; i< coarse_rule_ptr->f_.size(); ++i){ if (coarse_rule_ptr->f_[i] < 0) nt_positions.push_back(i); } @@ -225,7 +225,7 @@ struct SCFGTranslatorImpl { // fine rules apply only if state splits on tail nodes match fine rule nonterminals foreach(TRulePtr& fine_rule_ptr, *(coarse_rule_ptr->fine_rules_)) { Hypergraph::TailNodeVector tail; - for (int pos_i=0; pos_if_[nt_positions[pos_i]]; Split2Node::iterator it = s2n.find(StateSplit(edge.tail_nodes_[pos_i], fine_cat)); diff --git a/decoder/trule.cc b/decoder/trule.cc index 5ebc4c16..187a003d 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -100,6 +100,8 @@ namespace { // callback for lexer int n_assigned=0; void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { + (void) ctf_level; + (void) coarse_rule; TRule *assignto=(TRule *)extra; *assignto=*new_rule; ++n_assigned; diff --git a/decoder/trule.h b/decoder/trule.h index 8eb2a059..6a33d052 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -76,7 +76,7 @@ class TRule { void ESubstitute(const std::vector* >& var_values, std::vector* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector::const_iterator i = e_.begin(); i != e_.end(); ++i) { const WordID& c = *i; @@ -95,7 +95,7 @@ class TRule { void FSubstitute(const std::vector* >& var_values, std::vector* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector::const_iterator i = f_.begin(); i != f_.end(); ++i) { const WordID& c = *i; diff --git a/phrasinator/Makefile.am b/phrasinator/Makefile.am index aba98601..486cd21b 100644 --- a/phrasinator/Makefile.am +++ b/phrasinator/Makefile.am @@ -11,4 +11,4 @@ gibbs_train_plm_LDADD = $(top_srcdir)/utils/libutils.a -lz #head_bigram_model_SOURCES = head_bigram_model.cc #head_bigram_model_LDADD = $(top_srcdir)/utils/libutils.a -lz -AM_CPPFLAGS = -funroll-loops -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +AM_CPPFLAGS = -funroll-loops -ffast-math -W -Wall -Werror -I$(top_srcdir)/utils diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h deleted file mode 100644 index 811bce73..00000000 --- a/phrasinator/ccrp_nt.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _CCRP_NT_H_ -#define _CCRP_NT_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template > -class CCRP_NoTable { - public: - explicit CCRP_NoTable(double conc) : - num_customers_(), - concentration_(conc), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : - num_customers_(), - concentration_(c), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double concentration() const { return concentration_; } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_customers_ = 0; - custs_.clear(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = custs_.find(dish); - if (it == custs_.end()) return 0; - return it->second; - } - - int increment(const Dish& dish) { - int table_diff = 0; - if (++custs_[dish] == 1) - table_diff = 1; - ++num_customers_; - return table_diff; - } - - int decrement(const Dish& dish) { - int table_diff = 0; - int nc = --custs_[dish]; - if (nc == 0) { - custs_.erase(dish); - table_diff = -1; - } else if (nc < 0) { - std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; - abort(); - } - --num_customers_; - return table_diff; - } - - double prob(const Dish& dish, const double& p0) const { - const unsigned at_table = num_customers(dish); - return (at_table + p0 * concentration_) / (num_customers_ + concentration_); - } - - double logprob(const Dish& dish, const double& logp0) const { - const unsigned at_table = num_customers(dish); - return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); - } - - double log_crp_prob() const { - return log_crp_prob(concentration_); - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& concentration) const { - double lp = 0.0; - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - lp += lgamma(concentration) - lgamma(concentration + num_customers_) + - custs_.size() * log(concentration); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - lp += lgamma(it->second); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_concentration_prior()); - ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - } - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} - const CCRP_NoTable& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(proposed_concentration); - } - }; - - void Print(std::ostream* out) const { - (*out) << "DP(alpha=" << concentration_ << ") customers=" << num_customers_ << std::endl; - int cc = 0; - for (typename std::tr1::unordered_map::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - (*out) << " " << it->first << "(" << it->second << " eating)"; - ++cc; - if (cc > 10) { (*out) << " ..."; break; } - } - (*out) << std::endl; - } - - unsigned num_customers_; - std::tr1::unordered_map custs_; - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return custs_.begin(); - } - const_iterator end() const { - return custs_.end(); - } - - double concentration_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP_NoTable& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 86fd7865..7847a460 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -26,7 +26,7 @@ string Join(char joiner, const vector& phrase) { } ostream& operator<<(ostream& os, const vector& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -37,7 +37,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -91,7 +91,7 @@ void ReadCorpus(const string& filename, vector >* c, set* vocab c->push_back(vector()); vector& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -151,7 +151,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -177,7 +177,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -187,7 +187,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -286,7 +286,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) // SegCorpus::Write(corpus[i], shmmlm.z_[i], d); ; if (conf.count("write_cdec_grammar")) { @@ -304,8 +304,6 @@ int main(int argc, char** argv) { os << "# make C smaller to use more phrases\nP 1\nPassThrough " << ulm.OOVUnigramLogProb() << "\nC -3\n"; } - - return 0; } diff --git a/phrasinator/gibbs_train_plm.notables.cc b/phrasinator/gibbs_train_plm.notables.cc index 9dca9e8d..4526eaa6 100644 --- a/phrasinator/gibbs_train_plm.notables.cc +++ b/phrasinator/gibbs_train_plm.notables.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -29,13 +29,13 @@ template void WriteSeg(const vector& line, const vector& label, const Dict& d) { assert(line.size() == label.size()); assert(label.back()); - int prev = 0; - int cur = 0; + unsigned prev = 0; + unsigned cur = 0; while (cur < line.size()) { if (label[cur]) { if (prev) cout << ' '; cout << "{{"; - for (int i = prev; i <= cur; ++i) + for (unsigned i = prev; i <= cur; ++i) cout << (i == prev ? "" : " ") << d.Convert(line[i]); cout << "}}:" << label[cur]; prev = cur + 1; @@ -46,7 +46,7 @@ void WriteSeg(const vector& line, const vector& label, const Dict& d } ostream& operator<<(ostream& os, const vector& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -57,7 +57,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -111,7 +111,7 @@ void ReadCorpus(const string& filename, vector >* c, set* vocab c->push_back(vector()); vector& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -175,7 +175,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -201,7 +201,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -211,7 +211,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -276,7 +276,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.concentration(); + cerr << " " << phrases_.alpha(); } CCRP_NoTable > phrases_; @@ -310,7 +310,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) WriteSeg(corpus[i], ulm.z_[i], d); if (conf.count("write_cdec_grammar")) { diff --git a/utils/Makefile.am b/utils/Makefile.am index 46650c75..386344dd 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,10 +1,9 @@ - bin_PROGRAMS = reconstruct_weights atools -noinst_PROGRAMS = ts phmt mfcr_test -TESTS = ts phmt mfcr_test - -noinst_PROGRAMS += \ +noinst_PROGRAMS = \ + ts \ + phmt \ + mfcr_test \ crp_test \ dict_test \ m_test \ @@ -12,11 +11,7 @@ noinst_PROGRAMS += \ logval_test \ small_vector_test -TESTS += crp_test small_vector_test logval_test weights_test dict_test m_test - -reconstruct_weights_SOURCES = reconstruct_weights.cc - -atools_SOURCES = atools.cc +TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a @@ -39,26 +34,31 @@ if HAVE_CMPH libutils_a_SOURCES += perfect_hash.cc endif +reconstruct_weights_SOURCES = reconstruct_weights.cc +reconstruct_weights_LDADD = libutils.a -lz +atools_SOURCES = atools.cc +atools_LDADD = libutils.a -lz + phmt_SOURCES = phmt.cc +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ts_SOURCES = ts.cc +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz m_test_SOURCES = m_test.cc -m_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc -dict_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc -weights_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz crp_test_SOURCES = crp_test.cc -crp_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc -logval_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) - -AM_LDFLAGS = libutils.a -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. ################################################################ diff --git a/utils/alignment_io.cc b/utils/alignment_io.cc index 1d923f7f..460fbd3f 100644 --- a/utils/alignment_io.cc +++ b/utils/alignment_io.cc @@ -7,7 +7,7 @@ static bool is_digit(char x) { return x >= '0' && x <= '9'; } boost::shared_ptr > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) { int max_x = 0; int max_y = 0; - int i = 0; + unsigned i = 0; size_t pos = al.rfind(" ||| "); if (pos != string::npos) { i = pos + 5; } while (i < al.size()) { @@ -65,8 +65,8 @@ boost::shared_ptr > AlignmentIO::ReadPharaohAlignmentGrid(const st void AlignmentIO::SerializePharaohFormat(const Array2D& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) if (alignment(i,j)) { if (need_space) out << ' '; else need_space = true; out << i << '-' << j; @@ -77,8 +77,8 @@ void AlignmentIO::SerializePharaohFormat(const Array2D& alignment, ostream void AlignmentIO::SerializeTypedAlignment(const Array2D& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) { + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) { const AlignmentType& aij = alignment(i,j); if (aij != kNONE) { if (need_space) out << ' '; else need_space = true; diff --git a/utils/alignment_io.h b/utils/alignment_io.h index 36bcecd7..63fb916b 100644 --- a/utils/alignment_io.h +++ b/utils/alignment_io.h @@ -16,12 +16,12 @@ struct AlignmentIO { inline std::ostream& operator<<(std::ostream& os, const Array2D& m) { os << ' '; - for (int j=0; j::iterator iterator; typedef typename std::vector::const_iterator const_iterator; Array2D() : width_(0), height_(0) {} - Array2D(int w, int h, const T& d = T()) : + Array2D(unsigned w, unsigned h, const T& d = T()) : width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} bool empty() const { return data_.empty(); } - void resize(int w, int h, const T& d = T()) { + void resize(unsigned w, unsigned h, const T& d = T()) { data_.resize(w * h, d); width_ = w; height_ = h; @@ -32,25 +32,25 @@ class Array2D { return *this; } void fill(const T& v) { data_.assign(data_.size(), v); } - int width() const { return width_; } - int height() const { return height_; } - reference operator()(int i, int j) { + unsigned width() const { return width_; } + unsigned height() const { return height_; } + reference operator()(unsigned i, unsigned j) { return data_[offset(i, j)]; } void clear() { data_.clear(); width_=0; height_=0; } - const_reference operator()(int i, int j) const { + const_reference operator()(unsigned i, unsigned j) const { return data_[offset(i, j)]; } - iterator begin_col(int j) { + iterator begin_col(unsigned j) { return data_.begin() + offset(0,j); } - const_iterator begin_col(int j) const { + const_iterator begin_col(unsigned j) const { return data_.begin() + offset(0,j); } - iterator end_col(int j) { + iterator end_col(unsigned j) { return data_.begin() + offset(0,j) + width_; } - const_iterator end_col(int j) const { + const_iterator end_col(unsigned j) const { return data_.begin() + offset(0,j) + width_; } iterator end() { return data_.end(); } @@ -71,14 +71,14 @@ class Array2D { } private: - inline int offset(int i, int j) const { + inline unsigned offset(unsigned i, unsigned j) const { assert(i data_; }; @@ -120,8 +120,8 @@ Array2D operator-(const Array2D& l, const Array2D& r) { template inline std::ostream& operator<<(std::ostream& os, const Array2D& m) { - for (int i=0; i& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D& m) { os << ' '; - for (int j=0; j& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D >& m) { os << ' '; - for (int j=0; j& ar = m(i,j); for (unsigned k=0; kresize(max(a.width(), b.width()), max(a.height(), b.height())); } static bool Safe(const Array2D& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + if (i >= 0 && j >= 0 && i < static_cast(a.width()) && j < static_cast(a.height())) return a(i,j); else return false; @@ -43,18 +43,18 @@ struct FMeasureCommand : public Command { bool RequiresTwoOperands() const { return true; } void Apply(const Array2D& hyp, const Array2D& ref, Array2D* x) { (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { + unsigned i_len = ref.width(); + unsigned j_len = ref.height(); + for (unsigned i = 0; i < i_len; ++i) { + for (unsigned j = 0; j < j_len; ++j) { if (ref(i,j)) { ++num_in_ref; if (Safe(hyp, i, j)) ++matches; } } } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) + for (unsigned i = 0; i < hyp.width(); ++i) + for (unsigned j = 0; j < hyp.height(); ++j) if (hyp(i,j)) ++num_predicted; } void Summary() { @@ -97,8 +97,8 @@ struct InvertCommand : public Command { void Apply(const Array2D& in, const Array2D&, Array2D* x) { Array2D& res = *x; res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) + for (unsigned i = 0; i < in.height(); ++i) + for (unsigned j = 0; j < in.width(); ++j) res(i, j) = in(j, i); } }; @@ -109,8 +109,8 @@ struct IntersectCommand : public Command { void Apply(const Array2D& a, const Array2D& b, Array2D* x) { EnsureSize(a, b, x); Array2D& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) + for (unsigned i = 0; i < a.width(); ++i) + for (unsigned j = 0; j < a.height(); ++j) res(i, j) = Safe(a, i, j) && Safe(b, i, j); } }; @@ -121,8 +121,8 @@ struct UnionCommand : public Command { void Apply(const Array2D& a, const Array2D& b, Array2D* x) { EnsureSize(a, b, x); Array2D& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) + for (unsigned i = 0; i < res.width(); ++i) + for (unsigned j = 0; j < res.height(); ++j) res(i, j) = Safe(a, i, j) || Safe(b, i, j); } }; @@ -136,14 +136,14 @@ struct RefineCommand : public Command { } bool RequiresTwoOperands() const { return true; } - void Align(int i, int j) { + void Align(unsigned i, unsigned j) { res_(i, j) = true; is_i_aligned_[i] = true; is_j_aligned_[j] = true; } bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { + for (unsigned k = 0; k < neighbors_.size(); ++k) { const int di = neighbors_[k].first; const int dj = neighbors_[k].second; if (Safe(res_, i + di, j + dj)) @@ -177,8 +177,8 @@ struct RefineCommand : public Command { EnsureSize(a, b, &un_); is_i_aligned_.resize(res_.width(), false); is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { + for (unsigned i = 0; i < in_.width(); ++i) + for (unsigned j = 0; j < in_.height(); ++j) { un_(i, j) = Safe(a, i, j) || Safe(b, i, j); in_(i, j) = Safe(a, i, j) && Safe(b, i, j); if (in_(i, j)) Align(i, j); @@ -188,16 +188,16 @@ struct RefineCommand : public Command { // if they match the constraints determined by pred void Grow(Predicate pred, bool idempotent, const Array2D& adds) { if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) { if (adds(i, j) && !res_(i, j) && (this->*pred)(i, j)) Align(i, j); } return; } set > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) if (adds(i, j) && !res_(i, j)) p.insert(make_pair(i, j)); bool keep_going = !p.empty(); @@ -263,7 +263,7 @@ struct GDFACommand : public DiagCommand { map > commands; -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); ostringstream os; os << "Operation to perform:"; diff --git a/utils/ccrp.h b/utils/ccrp.h index 8635b422..1d41a3ef 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -232,7 +232,7 @@ class CCRP { if (num_customers() == 0) return; DiscountResampler dr(*this); StrengthResampler sr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { if (has_strength_prior()) { strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits::min(), std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 6efbfc78..724b11bd 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -111,7 +111,7 @@ class CCRP_NoTable { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_alpha_prior()); ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); } diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 3cc48f8e..e86cbdc1 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -30,7 +30,7 @@ // to just set it #define L2_CACHE_LINE 128 -// this should just be a typedef to pair on the new c++ +// this should just be a typedef to pair on the new c++ // I have to avoid this since I want to use unions and c++-98 // does not let unions have types with constructors in them // this type bypasses default constructors. use with caution! @@ -38,32 +38,32 @@ // does anything template struct PairIntT { - const PairIntT& operator=(const std::pair& v) { + const PairIntT& operator=(const std::pair& v) { std::memcpy(this, &v, sizeof(PairIntT)); return *this; } - operator const std::pair&() const { - return *reinterpret_cast*>(this); + operator const std::pair&() const { + return *reinterpret_cast*>(this); } - int& first() { - return reinterpret_cast*>(this)->first; + unsigned& first() { + return reinterpret_cast*>(this)->first; } T& second() { - return reinterpret_cast*>(this)->second; + return reinterpret_cast*>(this)->second; } - const int& first() const { - return reinterpret_cast*>(this)->first; + const unsigned& first() const { + return reinterpret_cast*>(this)->first; } const T& second() const { - return reinterpret_cast*>(this)->second; + return reinterpret_cast*>(this)->second; } private: // very bad way of bypassing the default constructor on T - char data_[sizeof(std::pair)]; + char data_[sizeof(std::pair)]; }; -BOOST_STATIC_ASSERT(sizeof(PairIntT) == sizeof(std::pair)); +BOOST_STATIC_ASSERT(sizeof(PairIntT) == sizeof(std::pair)); -template +template class FastSparseVector { public: struct const_iterator { @@ -79,17 +79,17 @@ class FastSparseVector { } const bool local_; const PairIntT* local_it_; - typename std::map::const_iterator remote_it_; - const std::pair& operator*() const { + typename std::map::const_iterator remote_it_; + const std::pair& operator*() const { if (local_) - return *reinterpret_cast*>(local_it_); + return *reinterpret_cast*>(local_it_); else return *remote_it_; } - const std::pair* operator->() const { + const std::pair* operator->() const { if (local_) - return reinterpret_cast*>(local_it_); + return reinterpret_cast*>(local_it_); else return &*remote_it_; } @@ -118,17 +118,17 @@ class FastSparseVector { } FastSparseVector(const FastSparseVector& other) { std::memcpy(this, &other, sizeof(FastSparseVector)); - if (is_remote_) data_.rbmap = new std::map(*data_.rbmap); + if (is_remote_) data_.rbmap = new std::map(*data_.rbmap); } - FastSparseVector(std::pair* first, std::pair* last) { + FastSparseVector(std::pair* first, std::pair* last) { const ptrdiff_t n = last - first; if (n <= LOCAL_MAX) { is_remote_ = false; local_size_ = n; - std::memcpy(data_.local, first, sizeof(std::pair) * n); + std::memcpy(data_.local, first, sizeof(std::pair) * n); } else { is_remote_ = true; - data_.rbmap = new std::map(first, last); + data_.rbmap = new std::map(first, last); } } void erase(int k) { @@ -150,31 +150,31 @@ class FastSparseVector { clear(); std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_remote_) - data_.rbmap = new std::map(*data_.rbmap); + data_.rbmap = new std::map(*data_.rbmap); return *this; } T const& get_singleton() const { assert(size()==1); return begin()->second; } - bool nonzero(int k) const { + bool nonzero(unsigned k) const { return static_cast(value(k)); } - inline void set_value(int k, const T& v) { + inline void set_value(unsigned k, const T& v) { get_or_create_bin(k) = v; } - inline T& add_value(int k, const T& v) { + inline T& add_value(unsigned k, const T& v) { return get_or_create_bin(k) += v; } - inline T get(int k) const { + inline T get(unsigned k) const { return value(k); } - inline T value(int k) const { + inline T value(unsigned k) const { if (is_remote_) { - typename std::map::const_iterator it = data_.rbmap->find(k); + typename std::map::const_iterator it = data_.rbmap->find(k); if (it != data_.rbmap->end()) return it->second; } else { - for (int i = 0; i < local_size_; ++i) { + for (unsigned i = 0; i < local_size_; ++i) { const PairIntT& p = data_.local[i]; if (p.first() == k) return p.second(); } @@ -256,8 +256,8 @@ class FastSparseVector { } inline FastSparseVector& operator*=(const T& scalar) { if (is_remote_) { - const typename std::map::iterator end = data_.rbmap->end(); - for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map::iterator end = data_.rbmap->end(); + for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) it->second *= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -267,8 +267,8 @@ class FastSparseVector { } inline FastSparseVector& operator/=(const T& scalar) { if (is_remote_) { - const typename std::map::iterator end = data_.rbmap->end(); - for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map::iterator end = data_.rbmap->end(); + for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) it->second /= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -300,7 +300,7 @@ class FastSparseVector { T dot(const std::vector& v) const { T res = T(); for (const_iterator it = begin(), e = end(); it != e; ++it) - if (it->first < v.size()) res += it->second * v[it->first]; + if (static_cast(it->first) < v.size()) res += it->second * v[it->first]; return res; } T dot(const FastSparseVector& other) const { @@ -330,11 +330,11 @@ class FastSparseVector { v.resize(i+1); return v[i]; } - inline T& get_or_create_bin(int k) { + inline T& get_or_create_bin(unsigned k) { if (is_remote_) { return (*data_.rbmap)[k]; } else { - for (int i = 0; i < local_size_; ++i) + for (unsigned i = 0; i < local_size_; ++i) if (data_.local[i].first() == k) return data_.local[i].second(); } assert(!is_remote_); @@ -353,17 +353,17 @@ class FastSparseVector { void swap_local_rbmap() { if (is_remote_) { // data is in rbmap, move to local assert(data_.rbmap->size() < LOCAL_MAX); - const std::map* m = data_.rbmap; + const std::map* m = data_.rbmap; local_size_ = m->size(); int i = 0; - for (typename std::map::const_iterator it = m->begin(); + for (typename std::map::const_iterator it = m->begin(); it != m->end(); ++it) { data_.local[i] = *it; ++i; } is_remote_ = false; } else { // data is local, move to rbmap - std::map* m = new std::map(&data_.local[0], &data_.local[local_size_]); + std::map* m = new std::map(&data_.local[0], &data_.local[local_size_]); data_.rbmap = m; is_remote_ = true; } @@ -371,7 +371,7 @@ class FastSparseVector { union { PairIntT local[LOCAL_MAX]; - std::map* rbmap; + std::map* rbmap; } data_; unsigned char local_size_; bool is_remote_; @@ -399,8 +399,8 @@ class FastSparseVector { void load(Archive & ar, const unsigned int version) { (void) version; this->clear(); - int sz; ar & sz; - for (int i = 0; i < sz; ++i) { + unsigned sz; ar & sz; + for (unsigned i = 0; i < sz; ++i) { std::pair wire_pair; ar & wire_pair; this->set_value(FD::Convert(wire_pair.first), wire_pair.second); diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc index cc886335..29a1a2ce 100644 --- a/utils/mfcr_test.cc +++ b/utils/mfcr_test.cc @@ -4,11 +4,17 @@ #include #include +#define BOOST_TEST_MODULE MFCRTest +#include +#include + #include "sampler.h" using namespace std; -void test_exch(MT19937* rng) { +BOOST_AUTO_TEST_CASE(Exchangability) { + MT19937 r; + MT19937* rng = &r; MFCR<2, int> crp(0.5, 3.0); vector lambdas(2); vector p0s(2); @@ -64,9 +70,3 @@ void test_exch(MT19937* rng) { assert(error2 < 0.05); }; -int main(int argc, char** argv) { - MT19937 rng; - test_exch(&rng); - return 0; -} - diff --git a/utils/sampler.h b/utils/sampler.h index 22c873d4..b237c716 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -49,9 +49,10 @@ struct RandomNumberGenerator { size_t SelectSample(const F& a, const F& b, double T = 1.0) { if (T == 1.0) { if (F(this->next()) > (a / (a + b))) return 1; else return 0; - } else { - assert(!"not implemented"); } + std::cerr << "SelectSample with annealing not implemented\n"; + abort(); + return 0; } // T is the annealing temperature, if desired diff --git a/utils/small_vector.h b/utils/small_vector.h index d04d1352..894b1b32 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -316,6 +316,7 @@ inline void swap(SmallVector &a,SmallVector &b) { } typedef SmallVector SmallVectorInt; +typedef SmallVector SmallVectorUnsigned; template void memcpy(void *out,SmallVector const& v) { diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 27bb88dd..00e7bd60 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -32,7 +32,7 @@ void Encode(double objective, const SparseVector& v, ostream* out) { *reinterpret_cast(&data[off_objective]) = objective; *reinterpret_cast(&data[off_num_feats]) = num_feats; char* cur = &data[off_data]; - assert(cur - data == off_data); + assert(static_cast(cur - data) == off_data); for (const_iterator it = v.begin(); it != v.end(); ++it) { const string& fname = FD::Convert(it->first); *cur++ = static_cast(fname.size()); // name len @@ -41,10 +41,10 @@ void Encode(double objective, const SparseVector& v, ostream* out) { *reinterpret_cast(cur) = it->second; cur += sizeof(double); } - assert(cur - data == off_magic); + assert(static_cast(cur - data) == off_magic); *reinterpret_cast(cur) = 0xBAABABBAu; cur += sizeof(unsigned int); - assert(cur - data == tot_size); + assert(static_cast(cur - data) == tot_size); b64encode(data, tot_size, out); delete[] data; } diff --git a/utils/stringlib.h b/utils/stringlib.h index 13d14dbf..75772c4d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -231,7 +231,7 @@ template void VisitTokens(std::string const& s,F f) { if (0) { std::vector ss=SplitOnWhitespace(s); - for (int i=0;i& strings, std::vector std::string TD::GetString(const std::vector& str) { ostringstream o; - for (int i=0;i 0; --i) if (buf[i] == '=' || buf[i] == '\t') { buf[i] = ' '; break; } - int start = 0; + unsigned start = 0; while(start < buf.size() && buf[start] == ' ') ++start; - int end = 0; + unsigned end = 0; while(end < buf.size() && buf[end] != ' ') ++end; - const int fid = FD::Convert(buf.substr(start, end - start)); + const unsigned fid = FD::Convert(buf.substr(start, end - start)); if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); } while(end < buf.size() && buf[end] == ' ') ++end; val = strtod(&buf.c_str()[end], NULL); @@ -73,7 +73,7 @@ void Weights::InitFromFile(const string& filename, } else { // !read_text char buf[6]; in.read(buf, 5); - size_t num_keys; + int num_keys; in.read(reinterpret_cast(&num_keys), sizeof(size_t)); if (num_keys != FD::NumFeats()) { cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys << endl; @@ -102,8 +102,8 @@ void Weights::WriteToFile(const string& fname, if (write_text) { if (extra) { o << "# " << *extra << endl; } o.precision(17); - const int num_feats = FD::NumFeats(); - for (int i = 1; i < num_feats; ++i) { + const unsigned num_feats = FD::NumFeats(); + for (unsigned i = 1; i < num_feats; ++i) { const weight_t val = (i < weights.size() ? weights[i] : 0.0); if (hide_zero_value_features && val == 0.0) continue; o << FD::Convert(i) << ' ' << val << endl; @@ -126,7 +126,7 @@ void Weights::InitSparseVector(const vector& dv, } void Weights::SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { + for (unsigned i = 0; i < w.size(); ++i) { assert(!isnan(w[i])); assert(!isinf(w[i])); } @@ -142,7 +142,7 @@ struct FComp { void Weights::ShowLargestFeatures(const vector& w) { vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) + for (unsigned i = 0; i < w.size(); ++i) fnums[i] = i; int nf = FD::NumFeats(); if (nf > 10) nf = 10; -- cgit v1.2.3