diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-27 15:34:44 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-27 15:34:44 -0400 |
commit | 71c4918f05a4b380dfaebfabcc1847c1c6d497dd (patch) | |
tree | cd2a0c9c9175ddf8100b1c64d689e540f50eeae9 | |
parent | ab38dc57a6a64aa7ef60a845a4176e18e1ac7f27 (diff) |
clean up
-rw-r--r-- | decoder/bottom_up_parser.cc | 2 | ||||
-rw-r--r-- | decoder/hg.cc | 132 | ||||
-rw-r--r-- | decoder/hg.h | 6 | ||||
-rw-r--r-- | decoder/hg_intersect.cc | 28 | ||||
-rw-r--r-- | decoder/hg_io.cc | 4 | ||||
-rw-r--r-- | decoder/inside_outside.h | 2 | ||||
-rw-r--r-- | decoder/maxtrans_blunsom.cc | 28 | ||||
-rw-r--r-- | decoder/scfg_translator.cc | 14 | ||||
-rw-r--r-- | decoder/trule.cc | 2 | ||||
-rw-r--r-- | decoder/trule.h | 4 | ||||
-rw-r--r-- | phrasinator/Makefile.am | 2 | ||||
-rw-r--r-- | phrasinator/ccrp_nt.h | 170 | ||||
-rw-r--r-- | phrasinator/gibbs_train_plm.cc | 18 | ||||
-rw-r--r-- | phrasinator/gibbs_train_plm.notables.cc | 24 | ||||
-rw-r--r-- | utils/Makefile.am | 40 | ||||
-rw-r--r-- | utils/alignment_io.cc | 10 | ||||
-rw-r--r-- | utils/alignment_io.h | 8 | ||||
-rw-r--r-- | utils/array2d.h | 46 | ||||
-rw-r--r-- | utils/atools.cc | 44 | ||||
-rw-r--r-- | utils/ccrp.h | 2 | ||||
-rw-r--r-- | utils/ccrp_nt.h | 2 | ||||
-rw-r--r-- | utils/fast_sparse_vector.h | 86 | ||||
-rw-r--r-- | utils/mfcr_test.cc | 14 | ||||
-rw-r--r-- | utils/sampler.h | 5 | ||||
-rw-r--r-- | utils/small_vector.h | 1 | ||||
-rw-r--r-- | utils/sparse_vector.cc | 6 | ||||
-rw-r--r-- | utils/stringlib.h | 2 | ||||
-rw-r--r-- | utils/tdict.cc | 2 | ||||
-rw-r--r-- | utils/weights.cc | 16 |
29 files changed, 270 insertions, 450 deletions
diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index 1f262747..ed79aaf0 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -84,7 +84,7 @@ class ActiveChart { const GrammarIter* ni = gptr_->Extend(symbol); if (!ni) return; Hypergraph::TailNodeVector na(ant_nodes_.size() + 1); - for (int i = 0; i < ant_nodes_.size(); ++i) + for (unsigned i = 0; i < ant_nodes_.size(); ++i) na[i] = ant_nodes_[i]; na[ant_nodes_.size()] = node_index; out_cell->push_back(ActiveItem(ni, na, lattice_cost)); diff --git a/decoder/hg.cc b/decoder/hg.cc index 180986d7..0dcbe91f 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -56,7 +56,7 @@ struct less_ve { Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev) { - for (int i=0;i<nodes_.size();++i) { + for (unsigned i=0;i<nodes_.size();++i) { EdgesVector &ie=nodes_[i].in_edges_; std::sort(ie.begin(),ie.end(),less_ve(ev)); } @@ -70,9 +70,9 @@ prob_t Hypergraph::ComputeEdgeViterbi(EdgeProbs *ev) const { } prob_t Hypergraph::ComputeEdgeViterbi(NodeProbs const& nv,EdgeProbs *ev) const { - int ne=edges_.size(); + unsigned ne=edges_.size(); ev->resize(ne); - for (int i=0;i<ne;++i) { + for (unsigned i=0;i<ne;++i) { Edge const& e=edges_[i]; prob_t r=e.edge_prob_; TailNodeVector const& t=e.tail_nodes_; @@ -162,7 +162,7 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector<prob_t>* posts) co SparseVector<prob_t>, ScaledTransitionEventWeightFunction>(*this, &pv, weight, w2); posts->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*posts)[i] = prob_t(pv.value(i)); return inside; } @@ -175,7 +175,7 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const { SparseVector<TropicalValue>, ViterbiTransitionEventWeightFunction>(*this, &pv); post->resize(edges_.size()); - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) (*post)[i] = pv.value(i).v_; return viterbi_weight.v_; } @@ -183,12 +183,12 @@ prob_t Hypergraph::ComputeBestPathThroughEdges(vector<prob_t>* post) const { void Hypergraph::PushWeightsToSource(double scale) { vector<prob_t> posts; ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) z += posts[node.out_edges_[j]]; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { edges_[node.out_edges_[j]].edge_prob_ = posts[node.out_edges_[j]] / z; } } @@ -201,7 +201,7 @@ struct vpusher : public vector<TropicalValue> { void operator()(int n,int /*ei*/,Hypergraph::Edge &e) const { Hypergraph::TailNodeVector const& t=e.tail_nodes_; prob_t p=e.edge_prob_; - for (int i=0;i<t.size();++i) + for (unsigned i=0;i<t.size();++i) p*=(*this)[t[i]].v_; e.feature_values_.set_value(fid,log(e.edge_prob_=p/(*this)[n].v_)); } @@ -229,12 +229,12 @@ prob_t Hypergraph::PushViterbiWeightsToGoal(int fid) { prob_t Hypergraph::PushWeightsToGoal(double scale) { vector<prob_t> posts; const prob_t inside_z = ComputeEdgePosteriors(scale, &posts); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); - for (int j = 0; j < node.in_edges_.size(); ++j) + for (unsigned j = 0; j < node.in_edges_.size(); ++j) z += posts[node.in_edges_[j]]; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { edges_[node.in_edges_[j]].edge_prob_ = posts[node.in_edges_[j]] / z; } } @@ -257,7 +257,7 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith if (run_inside_algorithm) { const EdgeExistsWeightFunction wf(prune_edge); vector<Boolean> reachable; - bool goal_derivable = Inside/* <Boolean, EdgeExistsWeightFunction> */(*this, &reachable, wf); + bool goal_derivable = Inside<Boolean, EdgeExistsWeightFunction>(*this, &reachable, wf); if (!goal_derivable) { edges_.clear(); nodes_.clear(); @@ -266,11 +266,11 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith } assert(reachable.size() == nodes_.size()); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { bool prune = prune_edge[i]; if (!prune) { const Edge& edge = edges_[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { if (!reachable[edge.tail_nodes_[j]]) { prune = true; break; @@ -299,7 +299,7 @@ void Hypergraph::MarginPrune(vector<prob_t> const& io,prob_t cutoff,vector<bool> cerr<<"Finishing prune for "<<prune.size()<<" edges; CUTOFF=" << cutoff << endl; } unsigned pc = 0; - for (int i = 0; i < io.size(); ++i) { + for (unsigned i = 0; i < io.size(); ++i) { cutoff*=creep; // start more permissive, then become less generous. this is barely more than 1. we want to do this because it's a disaster if something lower in a derivation tree is deleted, but the higher thing remains (unless safe_inside) const bool prune_edge = (io[i] < cutoff); if (prune_edge) { @@ -325,11 +325,11 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* assert(!use_beam||alpha>0); assert(!use_density||density>=1); assert(!use_sum_prod_semiring||scale>0); - int rnum=edges_.size(); + unsigned rnum=edges_.size(); if (use_density) { const int plen = ViterbiPathLength(*this); vector<WordID> bp; - rnum = min(rnum, static_cast<int>(density * static_cast<double>(plen))); + rnum = min(rnum, static_cast<unsigned>(density * plen)); cerr << "Density pruning: keep "<<rnum<<" of "<<edges_.size()<<" edges (viterbi = "<<plen<<" edges)"<<endl; if (rnum == edges_.size()) { cerr << "No pruning required: denisty already sufficient\n"; @@ -357,7 +357,7 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* if (use_beam) { prob_t best=prob_t::One(); if (use_sum_prod_semiring) { - for (int i = 0; i < mm.size(); ++i) + for (unsigned i = 0; i < mm.size(); ++i) if (mm[i] > best) best = mm[i]; } prob_t beam_cut=best*prob_t::exp(-alpha); @@ -386,10 +386,10 @@ void Hypergraph::PrintGraphviz() const { << "\" shape=\"rect\"];\n"; Hypergraph::TailNodeVector indorder(edge.tail_nodes_.size(), 0); int ntc = 0; - for (int i = 0; i < edge.rule_->e_.size(); ++i) { + for (unsigned i = 0; i < edge.rule_->e_.size(); ++i) { if (edge.rule_->e_[i] <= 0) indorder[ntc++] = 1 + (-1 * edge.rule_->e_[i]); } - for (int i = 0; i < edge.tail_nodes_.size(); ++i) { + for (unsigned i = 0; i < edge.tail_nodes_.size(); ++i) { cerr << " " << edge.tail_nodes_[i] << " -> A_" << ei; if (edge.tail_nodes_.size() > 1) { cerr << " [label=\"" << indorder[i] << "\"]"; @@ -414,8 +414,8 @@ void Hypergraph::PrintGraphviz() const { void Hypergraph::Union(const Hypergraph& other) { if (&other == this) return; if (nodes_.empty()) { nodes_ = other.nodes_; edges_ = other.edges_; return; } - int noff = nodes_.size(); - int eoff = edges_.size(); + unsigned noff = nodes_.size(); + unsigned eoff = edges_.size(); int ogoal = other.nodes_.size() - 1; int cgoal = noff - 1; // keep a single goal node, so add nodes.size - 1 @@ -428,15 +428,15 @@ void Hypergraph::Union(const Hypergraph& other) { Node& cn = nodes_[i + noff]; cn.id_ = i + noff; cn.in_edges_.resize(on.in_edges_.size()); - for (int j = 0; j < on.in_edges_.size(); ++j) + for (unsigned j = 0; j < on.in_edges_.size(); ++j) cn.in_edges_[j] = on.in_edges_[j] + eoff; cn.out_edges_.resize(on.out_edges_.size()); - for (int j = 0; j < on.out_edges_.size(); ++j) + for (unsigned j = 0; j < on.out_edges_.size(); ++j) cn.out_edges_[j] = on.out_edges_[j] + eoff; } - for (int i = 0; i < other.edges_.size(); ++i) { + for (unsigned i = 0; i < other.edges_.size(); ++i) { const Edge& oe = other.edges_[i]; Edge& ce = edges_[i + eoff]; ce.id_ = i + eoff; @@ -449,7 +449,7 @@ void Hypergraph::Union(const Hypergraph& other) { ce.head_node_ = oe.head_node_ + noff; } ce.tail_nodes_.resize(oe.tail_nodes_.size()); - for (int j = 0; j < oe.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j) ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff; } @@ -460,16 +460,6 @@ void Hypergraph::PruneUnreachable(int goal_node_id) { TopologicallySortNodesAndEdges(goal_node_id, NULL); } -void Hypergraph::RemoveNoncoaccessibleStates(int goal_node_id) { - if (goal_node_id < 0) goal_node_id += nodes_.size(); - assert(goal_node_id >= 0); - assert(goal_node_id < nodes_.size()); - - // I don't get it: does TopologicallySortNodesAndEdges not remove things that don't connect to goal_index? it uses goal_index just to order things? InsideOutside pruning can do this anyway (nearly infinite beam, viterbi semiring) - // TODO finish implementation - abort(); -} - struct DFSContext { int node; int edge_iter; @@ -559,7 +549,7 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, } #ifndef HG_EDGES_TOPO_SORTED int ec = 0; - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { int& cp = reloc_edge[i]; if (cp >= 0) { cp = ec++; } } @@ -576,34 +566,34 @@ void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, cerr << endl; #endif bool no_op = true; - for (int i = 0; i < reloc_node.size() && no_op; ++i) - if (reloc_node[i] != i) no_op = false; - for (int i = 0; i < reloc_edge.size() && no_op; ++i) - if (reloc_edge[i] != i) no_op = false; + for (unsigned i = 0; i < reloc_node.size() && no_op; ++i) + if (reloc_node[i] != static_cast<int>(i)) no_op = false; + for (unsigned i = 0; i < reloc_edge.size() && no_op; ++i) + if (reloc_edge[i] != static_cast<int>(i)) no_op = false; if (no_op) return; - for (int i = 0; i < reloc_node.size(); ++i) { + for (unsigned i = 0; i < reloc_node.size(); ++i) { Node& node = nodes_[i]; node.id_ = reloc_node[i]; int c = 0; - for (int j = 0; j < node.in_edges_.size(); ++j) { + for (unsigned j = 0; j < node.in_edges_.size(); ++j) { const int new_index = reloc_edge[node.in_edges_[j]]; if (new_index >= 0) node.in_edges_[c++] = new_index; } node.in_edges_.resize(c); c = 0; - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { const int new_index = reloc_edge[node.out_edges_[j]]; if (new_index >= 0) node.out_edges_[c++] = new_index; } node.out_edges_.resize(c); } - for (int i = 0; i < reloc_edge.size(); ++i) { + for (unsigned i = 0; i < reloc_edge.size(); ++i) { Edge& edge = edges_[i]; edge.id_ = reloc_edge[i]; edge.head_node_ = reloc_node[edge.head_node_]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) edge.tail_nodes_[j] = reloc_node[edge.tail_nodes_[j]]; } edges_.erase(remove_if(edges_.begin(), edges_.end(), BadId<Edge>()), edges_.end()); @@ -623,7 +613,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); } vector<bool> kill(edges_.size(), false); - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { const Edge& edge = edges_[i]; if (edge.tail_nodes_.empty() && edge.rule_->f_.size() == 1 && @@ -637,7 +627,7 @@ void Hypergraph::EpsilonRemove(WordID eps) { // same sequence via different paths through the input forest // this needs to be investigated and fixed } else { - for (int j = 0; j < node.out_edges_.size(); ++j) + for (unsigned j = 0; j < node.out_edges_.size(); ++j) edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_; // cerr << "PROMOTED " << edge.feature_values_ << endl; } @@ -646,19 +636,19 @@ void Hypergraph::EpsilonRemove(WordID eps) { } bool created_eps = false; PruneEdges(kill); - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { const Node& node = nodes_[i]; if (node.in_edges_.empty()) { - for (int j = 0; j < node.out_edges_.size(); ++j) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { Edge& edge = edges_[node.out_edges_[j]]; if (edge.rule_->Arity() == 2) { assert(edge.rule_->f_.size() == 2); assert(edge.rule_->e_.size() == 2); edge.rule_ = kUnaryRule; - int cur = node.id_; + unsigned cur = node.id_; int t = -1; assert(edge.tail_nodes_.size() == 2); - for (int i = 0; i < 2; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } + for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; } assert(t != -1); edge.tail_nodes_.resize(1); edge.tail_nodes_[0] = t; @@ -712,14 +702,14 @@ HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges,NodeMask &kn) const { kn.clear(); kn.resize(nodes_.size()); - for (int n=0;n<nodes_.size();++n) { // this nested iteration gives us edges in topo order too + for (unsigned n=0;n<nodes_.size();++n) { // this nested iteration gives us edges in topo order too EdgesVector const& es=nodes_[n].in_edges_; - for (int i=0;i<es.size();++i) { + for (unsigned i=0;i<es.size();++i) { int ei=es[i]; if (keep_edges[ei]) { const Edge& e = edges_[ei]; TailNodeVector const& tails=e.tail_nodes_; - for (int j=0;j<e.tail_nodes_.size();++j) { + for (unsigned j=0;j<e.tail_nodes_.size();++j) { if (!kn[tails[j]]) { keep_edges[ei]=false; goto next_edge; @@ -738,11 +728,11 @@ HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask indices_after e2(keep_edges); HypergraphP ret(new Hypergraph(n2.n_kept, e2.n_kept, is_linear_chain_)); Nodes &rn=ret->nodes_; - for (int i=0;i<nodes_.size();++i) + for (unsigned i=0;i<nodes_.size();++i) if (n2.keeping(i)) rn[n2[i]].copy_reindex(nodes_[i],n2,e2); Edges &re=ret->edges_; - for (int i=0;i<edges_.size();++i) + for (unsigned i=0;i<edges_.size();++i) if (e2.keeping(i)) re[e2[i]].copy_reindex(edges_[i],n2,e2); return ret; @@ -750,11 +740,11 @@ HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask void Hypergraph::TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const { - for (int i = 0; i < edges_.size(); ++i) { + for (unsigned i = 0; i < edges_.size(); ++i) { if (ke[i]) { const Edge& edge = edges_[i]; TailNodeVector const& tails=edge.tail_nodes_; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { if (!kn[tails[j]]) { ke[i]=false; goto next_edge; @@ -766,18 +756,18 @@ void Hypergraph::TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const } void Hypergraph::set_ids() { - for (int i = 0; i < edges_.size(); ++i) + for (unsigned i = 0; i < edges_.size(); ++i) edges_[i].id_=i; - for (int i = 0; i < nodes_.size(); ++i) + for (unsigned i = 0; i < nodes_.size(); ++i) nodes_[i].id_=i; } void Hypergraph::check_ids() const { - for (int i = 0; i < edges_.size(); ++i) - assert(edges_[i].id_==i); - for (int i = 0; i < nodes_.size(); ++i) - assert(nodes_[i].id_==i); + for (unsigned i = 0; i < edges_.size(); ++i) + assert(edges_[i].id_==static_cast<int>(i)); + for (unsigned i = 0; i < nodes_.size(); ++i) + assert(nodes_[i].id_==static_cast<int>(i)); } HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const { @@ -796,15 +786,15 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const set_ids(); # endif EdgeMask used(edges_.size()); - for (int i = 0; i < vit_edges.size(); ++i) + for (unsigned i = 0; i < vit_edges.size(); ++i) used[vit_edges[i]->id_]=true; return CreateEdgeSubset(used); #else map<int, int> old2new_node; int num_new_nodes = 0; - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& edge = *vit_edges[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); if (old2new_node.count(edge.head_node_) == 0) { old2new_node[edge.head_node_] = num_new_nodes; ++num_new_nodes; @@ -820,7 +810,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const new_node.id_ = it->second; } - for (int i = 0; i < vit_edges.size(); ++i) { + for (unsigned i = 0; i < vit_edges.size(); ++i) { const Edge& old_edge = *vit_edges[i]; Edge& new_edge = out->edges_[i]; new_edge = old_edge; @@ -828,7 +818,7 @@ HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const const int new_head_node = old2new_node[old_edge.head_node_]; new_edge.head_node_ = new_head_node; out->nodes_[new_head_node].in_edges_.push_back(i); - for (int j = 0; j < old_edge.tail_nodes_.size(); ++j) { + for (unsigned j = 0; j < old_edge.tail_nodes_.size(); ++j) { const int new_tail_node = old2new_node[old_edge.tail_nodes_[j]]; new_edge.tail_nodes_[j] = new_tail_node; out->nodes_[new_tail_node].out_edges_.push_back(i); diff --git a/decoder/hg.h b/decoder/hg.h index 5f6d57ab..91d25f01 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -43,7 +43,7 @@ public: Hypergraph() : is_linear_chain_(false) {} // SmallVector is a fast, small vector<int> implementation for sizes <= 2 - typedef SmallVectorInt TailNodeVector; // indices in nodes_ + typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ typedef std::vector<int> EdgesVector; // indices in edges_ // TODO get rid of cat_? @@ -457,8 +457,6 @@ public: void PruneUnreachable(int goal_node_id); // DEPRECATED - void RemoveNoncoaccessibleStates(int goal_node_id = -1); - // remove edges from the hypergraph if prune_edge[edge_id] is true // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept. void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false); @@ -524,7 +522,7 @@ public: template <class V> void visit_edges(V &v) { - for (int i=0;i<edges_.size();++i) + for (unsigned i=0;i<edges_.size();++i) v(edges_[i].head_node_,i,edges_[i]); } diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index 8752838f..6e3bfee6 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -19,12 +19,12 @@ using namespace std; struct RuleFilter { unordered_map<vector<WordID>, bool, boost::hash<vector<WordID> > > exists_; bool true_lattice; - RuleFilter(const Lattice& target, int max_phrase_size) { + RuleFilter(const Lattice& target, unsigned max_phrase_size) { true_lattice = false; - for (int i = 0; i < target.size(); ++i) { + for (unsigned i = 0; i < target.size(); ++i) { vector<WordID> phrase; - int lim = min(static_cast<int>(target.size()), i + max_phrase_size); - for (int j = i; j < lim; ++j) { + const unsigned lim = min(static_cast<unsigned>(target.size()), i + max_phrase_size); + for (unsigned j = i; j < lim; ++j) { if (target[j].size() > 1) { true_lattice = true; break; } phrase.push_back(target[j][0].label); exists_[phrase] = true; @@ -37,10 +37,10 @@ struct RuleFilter { // TODO do some smarter filtering for lattices if (true_lattice) return false; // don't filter "true lattice" input const vector<WordID>& e = r.e(); - for (int i = 0; i < e.size(); ++i) { + for (unsigned i = 0; i < e.size(); ++i) { if (e[i] <= 0) continue; vector<WordID> phrase; - for (int j = i; j < e.size(); ++j) { + for (unsigned j = i; j < e.size(); ++j) { if (e[j] <= 0) break; phrase.push_back(e[j]); if (exists_.count(phrase) == 0) return true; @@ -55,7 +55,7 @@ static bool FastLinearIntersect(const Lattice& target, Hypergraph* hg) { vector<bool> prune(hg->edges_.size(), false); set<int> cov; map<const TRule*, TRulePtr> inverted_rules; - for (int i = 0; i < prune.size(); ++i) { + for (unsigned i = 0; i < prune.size(); ++i) { Hypergraph::Edge& edge = hg->edges_[i]; if (edge.Arity() == 0) { const int trg_index = edge.prev_i_; @@ -87,12 +87,12 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { vector<bool> rem(hg->edges_.size(), false); const RuleFilter filter(target, 15); // TODO make configurable - for (int i = 0; i < rem.size(); ++i) + for (unsigned i = 0; i < rem.size(); ++i) rem[i] = filter(*hg->edges_[i].rule_); hg->PruneEdges(rem, true); - const int nedges = hg->edges_.size(); - const int nnodes = hg->nodes_.size(); + const unsigned nedges = hg->edges_.size(); + const unsigned nnodes = hg->nodes_.size(); TextGrammar* g = new TextGrammar; GrammarPtr gp(g); @@ -100,7 +100,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { // each node in the translation forest becomes a "non-terminal" in the new // grammar, create the labels here const string kSEP = "_"; - for (int i = 0; i < nnodes; ++i) { + for (unsigned i = 0; i < nnodes; ++i) { const char* pstr = "CAT"; if (hg->nodes_[i].cat_ < 0) pstr = TD::Convert(-hg->nodes_[i].cat_); @@ -108,7 +108,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } // construct the grammar - for (int i = 0; i < nedges; ++i) { + for (unsigned i = 0; i < nedges; ++i) { const Hypergraph::Edge& edge = hg->edges_[i]; const vector<WordID>& tgt = edge.rule_->e(); const vector<WordID>& src = edge.rule_->f(); @@ -122,7 +122,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { e.resize(src.size()); // parses using the source side! Hypergraph::TailNodeVector tn(edge.tail_nodes_.size()); int ntc = 0; - for (int j = 0; j < tgt.size(); ++j) { + for (unsigned j = 0; j < tgt.size(); ++j) { const WordID& cur = tgt[j]; if (cur > 0) { f[j] = cur; @@ -133,7 +133,7 @@ bool HG::Intersect(const Lattice& target, Hypergraph* hg) { } } ntc = 0; - for (int j = 0; j < src.size(); ++j) { + for (unsigned j = 0; j < src.size(); ++j) { const WordID& cur = src[j]; if (cur > 0) { e[j] = cur; diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 3321558d..bfb2fb80 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser { hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node); } } - void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorInt& tail) { + void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { Hypergraph::Edge* edge = hg.AddEdge(rule, tail); feats->swap(edge->feature_values_); edge->i_ = spans[0]; @@ -229,7 +229,7 @@ struct HGReader : public JSONParser { } string rp; string cat; - SmallVectorInt tail; + SmallVectorUnsigned tail; vector<int> in_edges; TRulePtr cur_rule; map<int, TRulePtr> rules; diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index 2ded328d..bb7f9fcc 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -67,7 +67,7 @@ void Outside(const Hypergraph& hg, ) { assert(result); const int num_nodes = hg.nodes_.size(); - assert(inside_score.size() == num_nodes); + assert(static_cast<int>(inside_score.size()) == num_nodes); std::vector<WeightType>& outside_score = *result; outside_score.clear(); outside_score.resize(num_nodes); diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc index 6efab454..774e4170 100644 --- a/decoder/maxtrans_blunsom.cc +++ b/decoder/maxtrans_blunsom.cc @@ -73,7 +73,7 @@ struct Candidate { prob_t p = prob_t::One(); // cerr << "\nEstimating application of " << in_edge.rule_->AsString() << endl; vector<const vector<WordID>* > ants(tail.size()); - for (int i = 0; i < tail.size(); ++i) { + for (unsigned i = 0; i < tail.size(); ++i) { const Candidate& ant = *D[in_edge.tail_nodes_[i]][j_[i]]; ants[i] = &ant.state_; assert(ant.IsIncorporatedIntoHypergraph()); @@ -99,7 +99,7 @@ ostream& operator<<(ostream& os, const Candidate& cand) { else { os << "+LM_node=" << cand.node_index_; } os << " edge=" << cand.in_edge_->id_; os << " j=<"; - for (int i = 0; i < cand.j_.size(); ++i) + for (unsigned i = 0; i < cand.j_.size(); ++i) os << (i==0 ? "" : " ") << cand.j_[i]; os << "> vit=" << log(cand.inside_prob_); os << " est=" << log(cand.est_prob_); @@ -127,7 +127,7 @@ struct CandidateUniquenessHash { size_t operator()(const Candidate* c) const { size_t x = 5381; x = ((x << 5) + x) ^ c->in_edge_->id_; - for (int i = 0; i < c->j_.size(); ++i) + for (unsigned i = 0; i < c->j_.size(); ++i) x = ((x << 5) + x) ^ c->j_[i]; return x; } @@ -154,12 +154,12 @@ public: } void Apply() { - int num_nodes = in.nodes_.size(); - int goal_id = num_nodes - 1; - int pregoal = goal_id - 1; + const unsigned num_nodes = in.nodes_.size(); + const unsigned goal_id = num_nodes - 1; + const unsigned pregoal = goal_id - 1; assert(in.nodes_[pregoal].out_edges_.size() == 1); cerr << " "; - for (int i = 0; i < in.nodes_.size(); ++i) { + for (unsigned i = 0; i < in.nodes_.size(); ++i) { cerr << '.'; KBest(i, i == goal_id); } @@ -174,9 +174,9 @@ public: private: void FreeAll() { - for (int i = 0; i < D.size(); ++i) { + for (unsigned i = 0; i < D.size(); ++i) { CandidateList& D_i = D[i]; - for (int j = 0; j < D_i.size(); ++j) + for (unsigned j = 0; j < D_i.size(); ++j) delete D_i[j]; } D.clear(); @@ -216,7 +216,7 @@ public: CandidateList freelist; cand.reserve(in_edges.size()); UniqueCandidateSet unique_cands; - for (int i = 0; i < in_edges.size(); ++i) { + for (unsigned i = 0; i < in_edges.size(); ++i) { const Hypergraph::Edge& edge = in.edges_[in_edges[i]]; const JVector j(edge.tail_nodes_.size(), 0); cand.push_back(new Candidate(edge, j, D, is_goal)); @@ -242,20 +242,20 @@ public: sort(D_v.begin(), D_v.end(), EstProbSorter()); // cerr << " expanded to " << D_v.size() << " nodes\n"; - for (int i = 0; i < cand.size(); ++i) + for (unsigned i = 0; i < cand.size(); ++i) delete cand[i]; // freelist is necessary since even after an item merged, it still stays in // the unique set so it can't be deleted til now - for (int i = 0; i < freelist.size(); ++i) + for (unsigned i = 0; i < freelist.size(); ++i) delete freelist[i]; } void PushSucc(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* cs) { CandidateHeap& cand = *pcand; - for (int i = 0; i < item.j_.size(); ++i) { + for (unsigned i = 0; i < item.j_.size(); ++i) { JVector j = item.j_; ++j[i]; - if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) { + if (static_cast<unsigned>(j[i]) < D[item.in_edge_->tail_nodes_[i]].size()) { Candidate query_unique(*item.in_edge_, j); if (cs->count(&query_unique) == 0) { Candidate* new_cand = new Candidate(*item.in_edge_, j, D, is_goal); diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 15abb600..185f979a 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -33,7 +33,7 @@ struct SCFGTranslatorImpl { { if(conf.count("grammar")){ vector<string> gfiles = conf["grammar"].as<vector<string> >(); - for (int i = 0; i < gfiles.size(); ++i) { + for (unsigned i = 0; i < gfiles.size(); ++i) { if (!SILENT) cerr << "Reading SCFG grammar from " << gfiles[i] << endl; TextGrammar* g = new TextGrammar(gfiles[i]); g->SetMaxSpan(max_span_limit); @@ -132,7 +132,7 @@ struct SCFGTranslatorImpl { g->SetGrammarName("PassThrough"); glist.push_back(GrammarPtr(g)); } - for (int gi = 0; gi < glist.size(); ++gi) { + for (unsigned gi = 0; gi < glist.size(); ++gi) { if(printGrammarsUsed) cerr << "Using grammar::" << glist[gi]->GetGrammarName() << endl; } @@ -147,7 +147,7 @@ struct SCFGTranslatorImpl { forest->Reweight(weights); if (use_ctf_) { Hypergraph::Node& goal_node = *(forest->nodes_.end()-1); - foreach(int edge_id, goal_node.in_edges_) + foreach(unsigned edge_id, goal_node.in_edges_) RefineRule(forest->edges_[edge_id].rule_, ctf_iterations_); double alpha = ctf_alpha_; bool found_parse=false; @@ -155,7 +155,7 @@ struct SCFGTranslatorImpl { cerr << "Coarse-to-fine source parse, alpha=" << alpha << endl; found_parse = true; Hypergraph refined_forest = *forest; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { cerr << viterbi_stats(refined_forest," Coarse forest",true,show_tree_structure_); cerr << " Iteration " << (j+1) << ": Pruning forest... "; refined_forest.BeamPruneInsideOutside(1.0, false, alpha, NULL); @@ -178,7 +178,7 @@ struct SCFGTranslatorImpl { if (!found_parse){ if (ctf_exhaustive_){ cerr << "Last resort: refining coarse forest without pruning..."; - for (int j=0; j < ctf_iterations_; ++j) { + for (unsigned j=0; j < ctf_iterations_; ++j) { if (RefineForest(forest)){ cerr << " Refinement succeeded." << endl; forest->Reweight(weights); @@ -213,7 +213,7 @@ struct SCFGTranslatorImpl { Hypergraph::Edge& edge = forest->edges_[edge_id]; std::vector<int> nt_positions; TRulePtr& coarse_rule_ptr = edge.rule_; - for(int i=0; i< coarse_rule_ptr->f_.size(); ++i){ + for(unsigned i=0; i< coarse_rule_ptr->f_.size(); ++i){ if (coarse_rule_ptr->f_[i] < 0) nt_positions.push_back(i); } @@ -225,7 +225,7 @@ struct SCFGTranslatorImpl { // fine rules apply only if state splits on tail nodes match fine rule nonterminals foreach(TRulePtr& fine_rule_ptr, *(coarse_rule_ptr->fine_rules_)) { Hypergraph::TailNodeVector tail; - for (int pos_i=0; pos_i<nt_positions.size(); ++pos_i){ + for (unsigned pos_i=0; pos_i<nt_positions.size(); ++pos_i){ WordID fine_cat = fine_rule_ptr->f_[nt_positions[pos_i]]; Split2Node::iterator it = s2n.find(StateSplit(edge.tail_nodes_[pos_i], fine_cat)); diff --git a/decoder/trule.cc b/decoder/trule.cc index 5ebc4c16..187a003d 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -100,6 +100,8 @@ namespace { // callback for lexer int n_assigned=0; void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { + (void) ctf_level; + (void) coarse_rule; TRule *assignto=(TRule *)extra; *assignto=*new_rule; ++n_assigned; diff --git a/decoder/trule.h b/decoder/trule.h index 8eb2a059..6a33d052 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -76,7 +76,7 @@ class TRule { void ESubstitute(const std::vector<const std::vector<WordID>* >& var_values, std::vector<WordID>* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector<WordID>::const_iterator i = e_.begin(); i != e_.end(); ++i) { const WordID& c = *i; @@ -95,7 +95,7 @@ class TRule { void FSubstitute(const std::vector<const std::vector<WordID>* >& var_values, std::vector<WordID>* result) const { - int vc = 0; + unsigned vc = 0; result->clear(); for (std::vector<WordID>::const_iterator i = f_.begin(); i != f_.end(); ++i) { const WordID& c = *i; diff --git a/phrasinator/Makefile.am b/phrasinator/Makefile.am index aba98601..486cd21b 100644 --- a/phrasinator/Makefile.am +++ b/phrasinator/Makefile.am @@ -11,4 +11,4 @@ gibbs_train_plm_LDADD = $(top_srcdir)/utils/libutils.a -lz #head_bigram_model_SOURCES = head_bigram_model.cc #head_bigram_model_LDADD = $(top_srcdir)/utils/libutils.a -lz -AM_CPPFLAGS = -funroll-loops -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +AM_CPPFLAGS = -funroll-loops -ffast-math -W -Wall -Werror -I$(top_srcdir)/utils diff --git a/phrasinator/ccrp_nt.h b/phrasinator/ccrp_nt.h deleted file mode 100644 index 811bce73..00000000 --- a/phrasinator/ccrp_nt.h +++ /dev/null @@ -1,170 +0,0 @@ -#ifndef _CCRP_NT_H_ -#define _CCRP_NT_H_ - -#include <numeric> -#include <cassert> -#include <cmath> -#include <list> -#include <iostream> -#include <vector> -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template <typename Dish, typename DishHash = boost::hash<Dish> > -class CCRP_NoTable { - public: - explicit CCRP_NoTable(double conc) : - num_customers_(), - concentration_(conc), - concentration_prior_shape_(std::numeric_limits<double>::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits<double>::quiet_NaN()) {} - - CCRP_NoTable(double c_shape, double c_rate, double c = 10.0) : - num_customers_(), - concentration_(c), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double concentration() const { return concentration_; } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_customers_ = 0; - custs_.clear(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.find(dish); - if (it == custs_.end()) return 0; - return it->second; - } - - int increment(const Dish& dish) { - int table_diff = 0; - if (++custs_[dish] == 1) - table_diff = 1; - ++num_customers_; - return table_diff; - } - - int decrement(const Dish& dish) { - int table_diff = 0; - int nc = --custs_[dish]; - if (nc == 0) { - custs_.erase(dish); - table_diff = -1; - } else if (nc < 0) { - std::cerr << "Dish counts dropped below zero for: " << dish << std::endl; - abort(); - } - --num_customers_; - return table_diff; - } - - double prob(const Dish& dish, const double& p0) const { - const unsigned at_table = num_customers(dish); - return (at_table + p0 * concentration_) / (num_customers_ + concentration_); - } - - double logprob(const Dish& dish, const double& logp0) const { - const unsigned at_table = num_customers(dish); - return log(at_table + exp(logp0 + log(concentration_))) - log(num_customers_ + concentration_); - } - - double log_crp_prob() const { - return log_crp_prob(concentration_); - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& concentration) const { - double lp = 0.0; - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - lp += lgamma(concentration) - lgamma(concentration + num_customers_) + - custs_.size() * log(concentration); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - lp += lgamma(it->second); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - assert(has_concentration_prior()); - ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); - } - } - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP_NoTable& crp) : crp_(crp) {} - const CCRP_NoTable& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(proposed_concentration); - } - }; - - void Print(std::ostream* out) const { - (*out) << "DP(alpha=" << concentration_ << ") customers=" << num_customers_ << std::endl; - int cc = 0; - for (typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator it = custs_.begin(); - it != custs_.end(); ++it) { - (*out) << " " << it->first << "(" << it->second << " eating)"; - ++cc; - if (cc > 10) { (*out) << " ..."; break; } - } - (*out) << std::endl; - } - - unsigned num_customers_; - std::tr1::unordered_map<Dish, unsigned, DishHash> custs_; - - typedef typename std::tr1::unordered_map<Dish, unsigned, DishHash>::const_iterator const_iterator; - const_iterator begin() const { - return custs_.begin(); - } - const_iterator end() const { - return custs_.end(); - } - - double concentration_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template <typename T,typename H> -std::ostream& operator<<(std::ostream& o, const CCRP_NoTable<T,H>& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/phrasinator/gibbs_train_plm.cc b/phrasinator/gibbs_train_plm.cc index 86fd7865..7847a460 100644 --- a/phrasinator/gibbs_train_plm.cc +++ b/phrasinator/gibbs_train_plm.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector<int>& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -26,7 +26,7 @@ string Join(char joiner, const vector<int>& phrase) { } ostream& operator<<(ostream& os, const vector<int>& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -37,7 +37,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -91,7 +91,7 @@ void ReadCorpus(const string& filename, vector<vector<int> >* c, set<int>* vocab c->push_back(vector<int>()); vector<int>& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -151,7 +151,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -177,7 +177,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -187,7 +187,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -286,7 +286,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as<unsigned>(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) // SegCorpus::Write(corpus[i], shmmlm.z_[i], d); ; if (conf.count("write_cdec_grammar")) { @@ -304,8 +304,6 @@ int main(int argc, char** argv) { os << "# make C smaller to use more phrases\nP 1\nPassThrough " << ulm.OOVUnigramLogProb() << "\nC -3\n"; } - - return 0; } diff --git a/phrasinator/gibbs_train_plm.notables.cc b/phrasinator/gibbs_train_plm.notables.cc index 9dca9e8d..4526eaa6 100644 --- a/phrasinator/gibbs_train_plm.notables.cc +++ b/phrasinator/gibbs_train_plm.notables.cc @@ -18,7 +18,7 @@ Dict d; // global dictionary string Join(char joiner, const vector<int>& phrase) { ostringstream os; - for (int i = 0; i < phrase.size(); ++i) { + for (unsigned i = 0; i < phrase.size(); ++i) { if (i > 0) os << joiner; os << d.Convert(phrase[i]); } @@ -29,13 +29,13 @@ template <typename BType> void WriteSeg(const vector<int>& line, const vector<BType>& label, const Dict& d) { assert(line.size() == label.size()); assert(label.back()); - int prev = 0; - int cur = 0; + unsigned prev = 0; + unsigned cur = 0; while (cur < line.size()) { if (label[cur]) { if (prev) cout << ' '; cout << "{{"; - for (int i = prev; i <= cur; ++i) + for (unsigned i = prev; i <= cur; ++i) cout << (i == prev ? "" : " ") << d.Convert(line[i]); cout << "}}:" << label[cur]; prev = cur + 1; @@ -46,7 +46,7 @@ void WriteSeg(const vector<int>& line, const vector<BType>& label, const Dict& d } ostream& operator<<(ostream& os, const vector<int>& phrase) { - for (int i = 0; i < phrase.size(); ++i) + for (unsigned i = 0; i < phrase.size(); ++i) os << (i == 0 ? "" : " ") << d.Convert(phrase[i]); return os; } @@ -57,7 +57,7 @@ struct UnigramLM { assert(in); } - double logprob(int word) const { + double logprob(unsigned word) const { assert(word < freqs_.size()); return freqs_[word]; } @@ -111,7 +111,7 @@ void ReadCorpus(const string& filename, vector<vector<int> >* c, set<int>* vocab c->push_back(vector<int>()); vector<int>& v = c->back(); d.ConvertWhitespaceDelimitedLine(line, &v); - for (int i = 0; i < v.size(); ++i) vocab->insert(v[i]); + for (unsigned i = 0; i < v.size(); ++i) vocab->insert(v[i]); } if (in != &cin) delete in; } @@ -175,7 +175,7 @@ struct UniphraseLM { cerr << "Initializing...\n"; z_.resize(corpus_.size()); int tc = 0; - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -201,7 +201,7 @@ struct UniphraseLM { cerr << "Initial LLH: " << llh() << endl; cerr << "Sampling...\n"; cerr << gen_ << endl; - for (int s = 1; s < samples; ++s) { + for (unsigned s = 1; s < samples; ++s) { cerr << '.'; if (s % 10 == 0) { cerr << " [" << s; @@ -211,7 +211,7 @@ struct UniphraseLM { //for (int j = 0; j < z.size(); ++j) z[j] = z_[0][j]; //SegCorpus::Write(corpus_[0], z, d); } - for (int i = 0; i < corpus_.size(); ++i) { + for (unsigned i = 0; i < corpus_.size(); ++i) { const vector<int>& line = corpus_[i]; const int ls = line.size(); const int last_pos = ls - 1; @@ -276,7 +276,7 @@ struct UniphraseLM { void ResampleHyperparameters(MT19937* rng) { phrases_.resample_hyperparameters(rng); gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.concentration(); + cerr << " " << phrases_.alpha(); } CCRP_NoTable<vector<int> > phrases_; @@ -310,7 +310,7 @@ int main(int argc, char** argv) { ulm.Sample(conf["samples"].as<unsigned>(), conf.count("no_hyperparameter_inference") == 0, &rng); cerr << "OOV unigram prob: " << ulm.OOVUnigramLogProb() << endl; - for (int i = 0; i < corpus.size(); ++i) + for (unsigned i = 0; i < corpus.size(); ++i) WriteSeg(corpus[i], ulm.z_[i], d); if (conf.count("write_cdec_grammar")) { diff --git a/utils/Makefile.am b/utils/Makefile.am index 46650c75..386344dd 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,10 +1,9 @@ - bin_PROGRAMS = reconstruct_weights atools -noinst_PROGRAMS = ts phmt mfcr_test -TESTS = ts phmt mfcr_test - -noinst_PROGRAMS += \ +noinst_PROGRAMS = \ + ts \ + phmt \ + mfcr_test \ crp_test \ dict_test \ m_test \ @@ -12,11 +11,7 @@ noinst_PROGRAMS += \ logval_test \ small_vector_test -TESTS += crp_test small_vector_test logval_test weights_test dict_test m_test - -reconstruct_weights_SOURCES = reconstruct_weights.cc - -atools_SOURCES = atools.cc +TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a @@ -39,26 +34,31 @@ if HAVE_CMPH libutils_a_SOURCES += perfect_hash.cc endif +reconstruct_weights_SOURCES = reconstruct_weights.cc +reconstruct_weights_LDADD = libutils.a -lz +atools_SOURCES = atools.cc +atools_LDADD = libutils.a -lz + phmt_SOURCES = phmt.cc +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ts_SOURCES = ts.cc +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz m_test_SOURCES = m_test.cc -m_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc -dict_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc -weights_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz crp_test_SOURCES = crp_test.cc -crp_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc -logval_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) - -AM_LDFLAGS = libutils.a -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. ################################################################ diff --git a/utils/alignment_io.cc b/utils/alignment_io.cc index 1d923f7f..460fbd3f 100644 --- a/utils/alignment_io.cc +++ b/utils/alignment_io.cc @@ -7,7 +7,7 @@ static bool is_digit(char x) { return x >= '0' && x <= '9'; } boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) { int max_x = 0; int max_y = 0; - int i = 0; + unsigned i = 0; size_t pos = al.rfind(" ||| "); if (pos != string::npos) { i = pos + 5; } while (i < al.size()) { @@ -65,8 +65,8 @@ boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const st void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) if (alignment(i,j)) { if (need_space) out << ' '; else need_space = true; out << i << '-' << j; @@ -77,8 +77,8 @@ void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) { + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) { const AlignmentType& aij = alignment(i,j); if (aij != kNONE) { if (need_space) out << ' '; else need_space = true; diff --git a/utils/alignment_io.h b/utils/alignment_io.h index 36bcecd7..63fb916b 100644 --- a/utils/alignment_io.h +++ b/utils/alignment_io.h @@ -16,12 +16,12 @@ struct AlignmentIO { inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { switch (m(i,j)) { case AlignmentIO::kNONE: os << '.'; break; case AlignmentIO::kTRANSLATION: os << '*'; break; @@ -32,7 +32,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::Ali os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; diff --git a/utils/array2d.h b/utils/array2d.h index ee2600d2..1a8e4157 100644 --- a/utils/array2d.h +++ b/utils/array2d.h @@ -15,12 +15,12 @@ class Array2D { typedef typename std::vector<T>::iterator iterator; typedef typename std::vector<T>::const_iterator const_iterator; Array2D() : width_(0), height_(0) {} - Array2D(int w, int h, const T& d = T()) : + Array2D(unsigned w, unsigned h, const T& d = T()) : width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} bool empty() const { return data_.empty(); } - void resize(int w, int h, const T& d = T()) { + void resize(unsigned w, unsigned h, const T& d = T()) { data_.resize(w * h, d); width_ = w; height_ = h; @@ -32,25 +32,25 @@ class Array2D { return *this; } void fill(const T& v) { data_.assign(data_.size(), v); } - int width() const { return width_; } - int height() const { return height_; } - reference operator()(int i, int j) { + unsigned width() const { return width_; } + unsigned height() const { return height_; } + reference operator()(unsigned i, unsigned j) { return data_[offset(i, j)]; } void clear() { data_.clear(); width_=0; height_=0; } - const_reference operator()(int i, int j) const { + const_reference operator()(unsigned i, unsigned j) const { return data_[offset(i, j)]; } - iterator begin_col(int j) { + iterator begin_col(unsigned j) { return data_.begin() + offset(0,j); } - const_iterator begin_col(int j) const { + const_iterator begin_col(unsigned j) const { return data_.begin() + offset(0,j); } - iterator end_col(int j) { + iterator end_col(unsigned j) { return data_.begin() + offset(0,j) + width_; } - const_iterator end_col(int j) const { + const_iterator end_col(unsigned j) const { return data_.begin() + offset(0,j) + width_; } iterator end() { return data_.end(); } @@ -71,14 +71,14 @@ class Array2D { } private: - inline int offset(int i, int j) const { + inline unsigned offset(unsigned i, unsigned j) const { assert(i<width_); assert(j<height_); return i + j * width_; } - int width_; - int height_; + unsigned width_; + unsigned height_; std::vector<T> data_; }; @@ -120,8 +120,8 @@ Array2D<T> operator-(const Array2D<T>& l, const Array2D<T>& r) { template <typename T> inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { - for (int i=0; i<m.width(); ++i) { - for (int j=0; j<m.height(); ++j) + for (unsigned i=0; i<m.width(); ++i) { + for (unsigned j=0; j<m.height(); ++j) os << '\t' << m(i,j); os << '\n'; } @@ -130,17 +130,17 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (m(i,j) ? '*' : '.'); os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; @@ -148,12 +148,12 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool> >& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { const std::vector<bool>& ar = m(i,j); for (unsigned k=0; k<ar.size(); ++k) os << (ar[k] ? '*' : '.'); @@ -162,7 +162,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; return os; diff --git a/utils/atools.cc b/utils/atools.cc index bce7822e..24406b71 100644 --- a/utils/atools.cc +++ b/utils/atools.cc @@ -27,7 +27,7 @@ struct Command { x->resize(max(a.width(), b.width()), max(a.height(), b.height())); } static bool Safe(const Array2D<bool>& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + if (i >= 0 && j >= 0 && i < static_cast<int>(a.width()) && j < static_cast<int>(a.height())) return a(i,j); else return false; @@ -43,18 +43,18 @@ struct FMeasureCommand : public Command { bool RequiresTwoOperands() const { return true; } void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { + unsigned i_len = ref.width(); + unsigned j_len = ref.height(); + for (unsigned i = 0; i < i_len; ++i) { + for (unsigned j = 0; j < j_len; ++j) { if (ref(i,j)) { ++num_in_ref; if (Safe(hyp, i, j)) ++matches; } } } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) + for (unsigned i = 0; i < hyp.width(); ++i) + for (unsigned j = 0; j < hyp.height(); ++j) if (hyp(i,j)) ++num_predicted; } void Summary() { @@ -97,8 +97,8 @@ struct InvertCommand : public Command { void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { Array2D<bool>& res = *x; res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) + for (unsigned i = 0; i < in.height(); ++i) + for (unsigned j = 0; j < in.width(); ++j) res(i, j) = in(j, i); } }; @@ -109,8 +109,8 @@ struct IntersectCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) + for (unsigned i = 0; i < a.width(); ++i) + for (unsigned j = 0; j < a.height(); ++j) res(i, j) = Safe(a, i, j) && Safe(b, i, j); } }; @@ -121,8 +121,8 @@ struct UnionCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) + for (unsigned i = 0; i < res.width(); ++i) + for (unsigned j = 0; j < res.height(); ++j) res(i, j) = Safe(a, i, j) || Safe(b, i, j); } }; @@ -136,14 +136,14 @@ struct RefineCommand : public Command { } bool RequiresTwoOperands() const { return true; } - void Align(int i, int j) { + void Align(unsigned i, unsigned j) { res_(i, j) = true; is_i_aligned_[i] = true; is_j_aligned_[j] = true; } bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { + for (unsigned k = 0; k < neighbors_.size(); ++k) { const int di = neighbors_[k].first; const int dj = neighbors_[k].second; if (Safe(res_, i + di, j + dj)) @@ -177,8 +177,8 @@ struct RefineCommand : public Command { EnsureSize(a, b, &un_); is_i_aligned_.resize(res_.width(), false); is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { + for (unsigned i = 0; i < in_.width(); ++i) + for (unsigned j = 0; j < in_.height(); ++j) { un_(i, j) = Safe(a, i, j) || Safe(b, i, j); in_(i, j) = Safe(a, i, j) && Safe(b, i, j); if (in_(i, j)) Align(i, j); @@ -188,16 +188,16 @@ struct RefineCommand : public Command { // if they match the constraints determined by pred void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) { if (adds(i, j) && !res_(i, j) && (this->*pred)(i, j)) Align(i, j); } return; } set<pair<int, int> > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) if (adds(i, j) && !res_(i, j)) p.insert(make_pair(i, j)); bool keep_going = !p.empty(); @@ -263,7 +263,7 @@ struct GDFACommand : public DiagCommand { map<string, boost::shared_ptr<Command> > commands; -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); ostringstream os; os << "Operation to perform:"; diff --git a/utils/ccrp.h b/utils/ccrp.h index 8635b422..1d41a3ef 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -232,7 +232,7 @@ class CCRP { if (num_customers() == 0) return; DiscountResampler dr(*this); StrengthResampler sr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { if (has_strength_prior()) { strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits<double>::min(), std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 6efbfc78..724b11bd 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -111,7 +111,7 @@ class CCRP_NoTable { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_alpha_prior()); ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); } diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 3cc48f8e..e86cbdc1 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -30,7 +30,7 @@ // to just set it #define L2_CACHE_LINE 128 -// this should just be a typedef to pair<int,T> on the new c++ +// this should just be a typedef to pair<unsigned,T> on the new c++ // I have to avoid this since I want to use unions and c++-98 // does not let unions have types with constructors in them // this type bypasses default constructors. use with caution! @@ -38,32 +38,32 @@ // does anything template <typename T> struct PairIntT { - const PairIntT& operator=(const std::pair<const int, T>& v) { + const PairIntT& operator=(const std::pair<const unsigned, T>& v) { std::memcpy(this, &v, sizeof(PairIntT)); return *this; } - operator const std::pair<const int, T>&() const { - return *reinterpret_cast<const std::pair<const int, T>*>(this); + operator const std::pair<const unsigned, T>&() const { + return *reinterpret_cast<const std::pair<const unsigned, T>*>(this); } - int& first() { - return reinterpret_cast<std::pair<int, T>*>(this)->first; + unsigned& first() { + return reinterpret_cast<std::pair<unsigned, T>*>(this)->first; } T& second() { - return reinterpret_cast<std::pair<int, T>*>(this)->second; + return reinterpret_cast<std::pair<unsigned, T>*>(this)->second; } - const int& first() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->first; + const unsigned& first() const { + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->first; } const T& second() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->second; + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->second; } private: // very bad way of bypassing the default constructor on T - char data_[sizeof(std::pair<int, T>)]; + char data_[sizeof(std::pair<unsigned, T>)]; }; -BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<int,float>)); +BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<unsigned,float>)); -template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)> +template <typename T, unsigned LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15u : 7u)> class FastSparseVector { public: struct const_iterator { @@ -79,17 +79,17 @@ class FastSparseVector { } const bool local_; const PairIntT<T>* local_it_; - typename std::map<int, T>::const_iterator remote_it_; - const std::pair<const int, T>& operator*() const { + typename std::map<unsigned, T>::const_iterator remote_it_; + const std::pair<const unsigned, T>& operator*() const { if (local_) - return *reinterpret_cast<const std::pair<const int, float>*>(local_it_); + return *reinterpret_cast<const std::pair<const unsigned, float>*>(local_it_); else return *remote_it_; } - const std::pair<const int, T>* operator->() const { + const std::pair<const unsigned, T>* operator->() const { if (local_) - return reinterpret_cast<const std::pair<const int, T>*>(local_it_); + return reinterpret_cast<const std::pair<const unsigned, T>*>(local_it_); else return &*remote_it_; } @@ -118,17 +118,17 @@ class FastSparseVector { } FastSparseVector(const FastSparseVector& other) { std::memcpy(this, &other, sizeof(FastSparseVector)); - if (is_remote_) data_.rbmap = new std::map<int, T>(*data_.rbmap); + if (is_remote_) data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); } - FastSparseVector(std::pair<int, T>* first, std::pair<int, T>* last) { + FastSparseVector(std::pair<unsigned, T>* first, std::pair<unsigned, T>* last) { const ptrdiff_t n = last - first; if (n <= LOCAL_MAX) { is_remote_ = false; local_size_ = n; - std::memcpy(data_.local, first, sizeof(std::pair<int, T>) * n); + std::memcpy(data_.local, first, sizeof(std::pair<unsigned, T>) * n); } else { is_remote_ = true; - data_.rbmap = new std::map<int, T>(first, last); + data_.rbmap = new std::map<unsigned, T>(first, last); } } void erase(int k) { @@ -150,31 +150,31 @@ class FastSparseVector { clear(); std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_remote_) - data_.rbmap = new std::map<int, T>(*data_.rbmap); + data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); return *this; } T const& get_singleton() const { assert(size()==1); return begin()->second; } - bool nonzero(int k) const { + bool nonzero(unsigned k) const { return static_cast<bool>(value(k)); } - inline void set_value(int k, const T& v) { + inline void set_value(unsigned k, const T& v) { get_or_create_bin(k) = v; } - inline T& add_value(int k, const T& v) { + inline T& add_value(unsigned k, const T& v) { return get_or_create_bin(k) += v; } - inline T get(int k) const { + inline T get(unsigned k) const { return value(k); } - inline T value(int k) const { + inline T value(unsigned k) const { if (is_remote_) { - typename std::map<int, T>::const_iterator it = data_.rbmap->find(k); + typename std::map<unsigned, T>::const_iterator it = data_.rbmap->find(k); if (it != data_.rbmap->end()) return it->second; } else { - for (int i = 0; i < local_size_; ++i) { + for (unsigned i = 0; i < local_size_; ++i) { const PairIntT<T>& p = data_.local[i]; if (p.first() == k) return p.second(); } @@ -256,8 +256,8 @@ class FastSparseVector { } inline FastSparseVector& operator*=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second *= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -267,8 +267,8 @@ class FastSparseVector { } inline FastSparseVector& operator/=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second /= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -300,7 +300,7 @@ class FastSparseVector { T dot(const std::vector<T>& v) const { T res = T(); for (const_iterator it = begin(), e = end(); it != e; ++it) - if (it->first < v.size()) res += it->second * v[it->first]; + if (static_cast<unsigned>(it->first) < v.size()) res += it->second * v[it->first]; return res; } T dot(const FastSparseVector<T>& other) const { @@ -330,11 +330,11 @@ class FastSparseVector { v.resize(i+1); return v[i]; } - inline T& get_or_create_bin(int k) { + inline T& get_or_create_bin(unsigned k) { if (is_remote_) { return (*data_.rbmap)[k]; } else { - for (int i = 0; i < local_size_; ++i) + for (unsigned i = 0; i < local_size_; ++i) if (data_.local[i].first() == k) return data_.local[i].second(); } assert(!is_remote_); @@ -353,17 +353,17 @@ class FastSparseVector { void swap_local_rbmap() { if (is_remote_) { // data is in rbmap, move to local assert(data_.rbmap->size() < LOCAL_MAX); - const std::map<int, T>* m = data_.rbmap; + const std::map<unsigned, T>* m = data_.rbmap; local_size_ = m->size(); int i = 0; - for (typename std::map<int, T>::const_iterator it = m->begin(); + for (typename std::map<unsigned, T>::const_iterator it = m->begin(); it != m->end(); ++it) { data_.local[i] = *it; ++i; } is_remote_ = false; } else { // data is local, move to rbmap - std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]); + std::map<unsigned, T>* m = new std::map<unsigned, T>(&data_.local[0], &data_.local[local_size_]); data_.rbmap = m; is_remote_ = true; } @@ -371,7 +371,7 @@ class FastSparseVector { union { PairIntT<T> local[LOCAL_MAX]; - std::map<int, T>* rbmap; + std::map<unsigned, T>* rbmap; } data_; unsigned char local_size_; bool is_remote_; @@ -399,8 +399,8 @@ class FastSparseVector { void load(Archive & ar, const unsigned int version) { (void) version; this->clear(); - int sz; ar & sz; - for (int i = 0; i < sz; ++i) { + unsigned sz; ar & sz; + for (unsigned i = 0; i < sz; ++i) { std::pair<std::string, T> wire_pair; ar & wire_pair; this->set_value(FD::Convert(wire_pair.first), wire_pair.second); diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc index cc886335..29a1a2ce 100644 --- a/utils/mfcr_test.cc +++ b/utils/mfcr_test.cc @@ -4,11 +4,17 @@ #include <cassert> #include <cmath> +#define BOOST_TEST_MODULE MFCRTest +#include <boost/test/unit_test.hpp> +#include <boost/test/floating_point_comparison.hpp> + #include "sampler.h" using namespace std; -void test_exch(MT19937* rng) { +BOOST_AUTO_TEST_CASE(Exchangability) { + MT19937 r; + MT19937* rng = &r; MFCR<2, int> crp(0.5, 3.0); vector<double> lambdas(2); vector<double> p0s(2); @@ -64,9 +70,3 @@ void test_exch(MT19937* rng) { assert(error2 < 0.05); }; -int main(int argc, char** argv) { - MT19937 rng; - test_exch(&rng); - return 0; -} - diff --git a/utils/sampler.h b/utils/sampler.h index 22c873d4..b237c716 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -49,9 +49,10 @@ struct RandomNumberGenerator { size_t SelectSample(const F& a, const F& b, double T = 1.0) { if (T == 1.0) { if (F(this->next()) > (a / (a + b))) return 1; else return 0; - } else { - assert(!"not implemented"); } + std::cerr << "SelectSample with annealing not implemented\n"; + abort(); + return 0; } // T is the annealing temperature, if desired diff --git a/utils/small_vector.h b/utils/small_vector.h index d04d1352..894b1b32 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -316,6 +316,7 @@ inline void swap(SmallVector<T,M> &a,SmallVector<T,M> &b) { } typedef SmallVector<int,2> SmallVectorInt; +typedef SmallVector<unsigned,2> SmallVectorUnsigned; template <class T,int M> void memcpy(void *out,SmallVector<T,M> const& v) { diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 27bb88dd..00e7bd60 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -32,7 +32,7 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(&data[off_objective]) = objective; *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats; char* cur = &data[off_data]; - assert(cur - data == off_data); + assert(static_cast<size_t>(cur - data) == off_data); for (const_iterator it = v.begin(); it != v.end(); ++it) { const string& fname = FD::Convert(it->first); *cur++ = static_cast<char>(fname.size()); // name len @@ -41,10 +41,10 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(cur) = it->second; cur += sizeof(double); } - assert(cur - data == off_magic); + assert(static_cast<size_t>(cur - data) == off_magic); *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu; cur += sizeof(unsigned int); - assert(cur - data == tot_size); + assert(static_cast<size_t>(cur - data) == tot_size); b64encode(data, tot_size, out); delete[] data; } diff --git a/utils/stringlib.h b/utils/stringlib.h index 13d14dbf..75772c4d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -231,7 +231,7 @@ template <class F> void VisitTokens(std::string const& s,F f) { if (0) { std::vector<std::string> ss=SplitOnWhitespace(s); - for (int i=0;i<ss.size();++i) + for (unsigned i=0;i<ss.size();++i) f(ss[i]); return; } diff --git a/utils/tdict.cc b/utils/tdict.cc index de234323..f33bd576 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -37,7 +37,7 @@ void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID> std::string TD::GetString(const std::vector<WordID>& str) { ostringstream o; - for (int i=0;i<str.size();++i) { + for (unsigned i=0;i<str.size();++i) { if (i) o << ' '; o << TD::Convert(str[i]); } diff --git a/utils/weights.cc b/utils/weights.cc index 39c18474..f56e2a20 100644 --- a/utils/weights.cc +++ b/utils/weights.cc @@ -45,11 +45,11 @@ void Weights::InitFromFile(const string& filename, } for (int i = buf.size() - 1; i > 0; --i) if (buf[i] == '=' || buf[i] == '\t') { buf[i] = ' '; break; } - int start = 0; + unsigned start = 0; while(start < buf.size() && buf[start] == ' ') ++start; - int end = 0; + unsigned end = 0; while(end < buf.size() && buf[end] != ' ') ++end; - const int fid = FD::Convert(buf.substr(start, end - start)); + const unsigned fid = FD::Convert(buf.substr(start, end - start)); if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); } while(end < buf.size() && buf[end] == ' ') ++end; val = strtod(&buf.c_str()[end], NULL); @@ -73,7 +73,7 @@ void Weights::InitFromFile(const string& filename, } else { // !read_text char buf[6]; in.read(buf, 5); - size_t num_keys; + int num_keys; in.read(reinterpret_cast<char*>(&num_keys), sizeof(size_t)); if (num_keys != FD::NumFeats()) { cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys << endl; @@ -102,8 +102,8 @@ void Weights::WriteToFile(const string& fname, if (write_text) { if (extra) { o << "# " << *extra << endl; } o.precision(17); - const int num_feats = FD::NumFeats(); - for (int i = 1; i < num_feats; ++i) { + const unsigned num_feats = FD::NumFeats(); + for (unsigned i = 1; i < num_feats; ++i) { const weight_t val = (i < weights.size() ? weights[i] : 0.0); if (hide_zero_value_features && val == 0.0) continue; o << FD::Convert(i) << ' ' << val << endl; @@ -126,7 +126,7 @@ void Weights::InitSparseVector(const vector<weight_t>& dv, } void Weights::SanityCheck(const vector<weight_t>& w) { - for (int i = 0; i < w.size(); ++i) { + for (unsigned i = 0; i < w.size(); ++i) { assert(!isnan(w[i])); assert(!isinf(w[i])); } @@ -142,7 +142,7 @@ struct FComp { void Weights::ShowLargestFeatures(const vector<weight_t>& w) { vector<int> fnums(w.size()); - for (int i = 0; i < w.size(); ++i) + for (unsigned i = 0; i < w.size(); ++i) fnums[i] = i; int nf = FD::NumFeats(); if (nf > 10) nf = 10; |