diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-27 15:34:44 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-05-27 15:34:44 -0400 |
commit | 71c4918f05a4b380dfaebfabcc1847c1c6d497dd (patch) | |
tree | cd2a0c9c9175ddf8100b1c64d689e540f50eeae9 /utils | |
parent | ab38dc57a6a64aa7ef60a845a4176e18e1ac7f27 (diff) |
clean up
Diffstat (limited to 'utils')
-rw-r--r-- | utils/Makefile.am | 40 | ||||
-rw-r--r-- | utils/alignment_io.cc | 10 | ||||
-rw-r--r-- | utils/alignment_io.h | 8 | ||||
-rw-r--r-- | utils/array2d.h | 46 | ||||
-rw-r--r-- | utils/atools.cc | 44 | ||||
-rw-r--r-- | utils/ccrp.h | 2 | ||||
-rw-r--r-- | utils/ccrp_nt.h | 2 | ||||
-rw-r--r-- | utils/fast_sparse_vector.h | 86 | ||||
-rw-r--r-- | utils/mfcr_test.cc | 14 | ||||
-rw-r--r-- | utils/sampler.h | 5 | ||||
-rw-r--r-- | utils/small_vector.h | 1 | ||||
-rw-r--r-- | utils/sparse_vector.cc | 6 | ||||
-rw-r--r-- | utils/stringlib.h | 2 | ||||
-rw-r--r-- | utils/tdict.cc | 2 | ||||
-rw-r--r-- | utils/weights.cc | 16 |
15 files changed, 143 insertions, 141 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am index 46650c75..386344dd 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,10 +1,9 @@ - bin_PROGRAMS = reconstruct_weights atools -noinst_PROGRAMS = ts phmt mfcr_test -TESTS = ts phmt mfcr_test - -noinst_PROGRAMS += \ +noinst_PROGRAMS = \ + ts \ + phmt \ + mfcr_test \ crp_test \ dict_test \ m_test \ @@ -12,11 +11,7 @@ noinst_PROGRAMS += \ logval_test \ small_vector_test -TESTS += crp_test small_vector_test logval_test weights_test dict_test m_test - -reconstruct_weights_SOURCES = reconstruct_weights.cc - -atools_SOURCES = atools.cc +TESTS = ts mfcr_test crp_test small_vector_test logval_test weights_test dict_test m_test noinst_LIBRARIES = libutils.a @@ -39,26 +34,31 @@ if HAVE_CMPH libutils_a_SOURCES += perfect_hash.cc endif +reconstruct_weights_SOURCES = reconstruct_weights.cc +reconstruct_weights_LDADD = libutils.a -lz +atools_SOURCES = atools.cc +atools_LDADD = libutils.a -lz + phmt_SOURCES = phmt.cc +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ts_SOURCES = ts.cc +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz m_test_SOURCES = m_test.cc -m_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz dict_test_SOURCES = dict_test.cc -dict_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz mfcr_test_SOURCES = mfcr_test.cc -mfcr_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +mfcr_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz weights_test_SOURCES = weights_test.cc -weights_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz crp_test_SOURCES = crp_test.cc -crp_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +crp_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz logval_test_SOURCES = logval_test.cc -logval_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) - -AM_LDFLAGS = libutils.a -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I. +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. ################################################################ diff --git a/utils/alignment_io.cc b/utils/alignment_io.cc index 1d923f7f..460fbd3f 100644 --- a/utils/alignment_io.cc +++ b/utils/alignment_io.cc @@ -7,7 +7,7 @@ static bool is_digit(char x) { return x >= '0' && x <= '9'; } boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) { int max_x = 0; int max_y = 0; - int i = 0; + unsigned i = 0; size_t pos = al.rfind(" ||| "); if (pos != string::npos) { i = pos + 5; } while (i < al.size()) { @@ -65,8 +65,8 @@ boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const st void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) if (alignment(i,j)) { if (need_space) out << ' '; else need_space = true; out << i << '-' << j; @@ -77,8 +77,8 @@ void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) { ostream& out = *o; bool need_space = false; - for (int i = 0; i < alignment.width(); ++i) - for (int j = 0; j < alignment.height(); ++j) { + for (unsigned i = 0; i < alignment.width(); ++i) + for (unsigned j = 0; j < alignment.height(); ++j) { const AlignmentType& aij = alignment(i,j); if (aij != kNONE) { if (need_space) out << ' '; else need_space = true; diff --git a/utils/alignment_io.h b/utils/alignment_io.h index 36bcecd7..63fb916b 100644 --- a/utils/alignment_io.h +++ b/utils/alignment_io.h @@ -16,12 +16,12 @@ struct AlignmentIO { inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { switch (m(i,j)) { case AlignmentIO::kNONE: os << '.'; break; case AlignmentIO::kTRANSLATION: os << '*'; break; @@ -32,7 +32,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::Ali os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; diff --git a/utils/array2d.h b/utils/array2d.h index ee2600d2..1a8e4157 100644 --- a/utils/array2d.h +++ b/utils/array2d.h @@ -15,12 +15,12 @@ class Array2D { typedef typename std::vector<T>::iterator iterator; typedef typename std::vector<T>::const_iterator const_iterator; Array2D() : width_(0), height_(0) {} - Array2D(int w, int h, const T& d = T()) : + Array2D(unsigned w, unsigned h, const T& d = T()) : width_(w), height_(h), data_(w*h, d) {} Array2D(const Array2D& rhs) : width_(rhs.width_), height_(rhs.height_), data_(rhs.data_) {} bool empty() const { return data_.empty(); } - void resize(int w, int h, const T& d = T()) { + void resize(unsigned w, unsigned h, const T& d = T()) { data_.resize(w * h, d); width_ = w; height_ = h; @@ -32,25 +32,25 @@ class Array2D { return *this; } void fill(const T& v) { data_.assign(data_.size(), v); } - int width() const { return width_; } - int height() const { return height_; } - reference operator()(int i, int j) { + unsigned width() const { return width_; } + unsigned height() const { return height_; } + reference operator()(unsigned i, unsigned j) { return data_[offset(i, j)]; } void clear() { data_.clear(); width_=0; height_=0; } - const_reference operator()(int i, int j) const { + const_reference operator()(unsigned i, unsigned j) const { return data_[offset(i, j)]; } - iterator begin_col(int j) { + iterator begin_col(unsigned j) { return data_.begin() + offset(0,j); } - const_iterator begin_col(int j) const { + const_iterator begin_col(unsigned j) const { return data_.begin() + offset(0,j); } - iterator end_col(int j) { + iterator end_col(unsigned j) { return data_.begin() + offset(0,j) + width_; } - const_iterator end_col(int j) const { + const_iterator end_col(unsigned j) const { return data_.begin() + offset(0,j) + width_; } iterator end() { return data_.end(); } @@ -71,14 +71,14 @@ class Array2D { } private: - inline int offset(int i, int j) const { + inline unsigned offset(unsigned i, unsigned j) const { assert(i<width_); assert(j<height_); return i + j * width_; } - int width_; - int height_; + unsigned width_; + unsigned height_; std::vector<T> data_; }; @@ -120,8 +120,8 @@ Array2D<T> operator-(const Array2D<T>& l, const Array2D<T>& r) { template <typename T> inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { - for (int i=0; i<m.width(); ++i) { - for (int j=0; j<m.height(); ++j) + for (unsigned i=0; i<m.width(); ++i) { + for (unsigned j=0; j<m.height(); ++j) os << '\t' << m(i,j); os << '\n'; } @@ -130,17 +130,17 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<T>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (m(i,j) ? '*' : '.'); os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10); os << "\n"; return os; @@ -148,12 +148,12 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<bool>& m) { inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool> >& m) { os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; - for (int i=0; i<m.width(); ++i) { + for (unsigned i=0; i<m.width(); ++i) { os << (i%10); - for (int j=0; j<m.height(); ++j) { + for (unsigned j=0; j<m.height(); ++j) { const std::vector<bool>& ar = m(i,j); for (unsigned k=0; k<ar.size(); ++k) os << (ar[k] ? '*' : '.'); @@ -162,7 +162,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool os << (i%10) << "\n"; } os << ' '; - for (int j=0; j<m.height(); ++j) + for (unsigned j=0; j<m.height(); ++j) os << (j%10) << "\t"; os << "\n"; return os; diff --git a/utils/atools.cc b/utils/atools.cc index bce7822e..24406b71 100644 --- a/utils/atools.cc +++ b/utils/atools.cc @@ -27,7 +27,7 @@ struct Command { x->resize(max(a.width(), b.width()), max(a.height(), b.height())); } static bool Safe(const Array2D<bool>& a, int i, int j) { - if (i >= 0 && j >= 0 && i < a.width() && j < a.height()) + if (i >= 0 && j >= 0 && i < static_cast<int>(a.width()) && j < static_cast<int>(a.height())) return a(i,j); else return false; @@ -43,18 +43,18 @@ struct FMeasureCommand : public Command { bool RequiresTwoOperands() const { return true; } void Apply(const Array2D<bool>& hyp, const Array2D<bool>& ref, Array2D<bool>* x) { (void) x; // AER just computes statistics, not an alignment - int i_len = ref.width(); - int j_len = ref.height(); - for (int i = 0; i < i_len; ++i) { - for (int j = 0; j < j_len; ++j) { + unsigned i_len = ref.width(); + unsigned j_len = ref.height(); + for (unsigned i = 0; i < i_len; ++i) { + for (unsigned j = 0; j < j_len; ++j) { if (ref(i,j)) { ++num_in_ref; if (Safe(hyp, i, j)) ++matches; } } } - for (int i = 0; i < hyp.width(); ++i) - for (int j = 0; j < hyp.height(); ++j) + for (unsigned i = 0; i < hyp.width(); ++i) + for (unsigned j = 0; j < hyp.height(); ++j) if (hyp(i,j)) ++num_predicted; } void Summary() { @@ -97,8 +97,8 @@ struct InvertCommand : public Command { void Apply(const Array2D<bool>& in, const Array2D<bool>&, Array2D<bool>* x) { Array2D<bool>& res = *x; res.resize(in.height(), in.width()); - for (int i = 0; i < in.height(); ++i) - for (int j = 0; j < in.width(); ++j) + for (unsigned i = 0; i < in.height(); ++i) + for (unsigned j = 0; j < in.width(); ++j) res(i, j) = in(j, i); } }; @@ -109,8 +109,8 @@ struct IntersectCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < a.width(); ++i) - for (int j = 0; j < a.height(); ++j) + for (unsigned i = 0; i < a.width(); ++i) + for (unsigned j = 0; j < a.height(); ++j) res(i, j) = Safe(a, i, j) && Safe(b, i, j); } }; @@ -121,8 +121,8 @@ struct UnionCommand : public Command { void Apply(const Array2D<bool>& a, const Array2D<bool>& b, Array2D<bool>* x) { EnsureSize(a, b, x); Array2D<bool>& res = *x; - for (int i = 0; i < res.width(); ++i) - for (int j = 0; j < res.height(); ++j) + for (unsigned i = 0; i < res.width(); ++i) + for (unsigned j = 0; j < res.height(); ++j) res(i, j) = Safe(a, i, j) || Safe(b, i, j); } }; @@ -136,14 +136,14 @@ struct RefineCommand : public Command { } bool RequiresTwoOperands() const { return true; } - void Align(int i, int j) { + void Align(unsigned i, unsigned j) { res_(i, j) = true; is_i_aligned_[i] = true; is_j_aligned_[j] = true; } bool IsNeighborAligned(int i, int j) const { - for (int k = 0; k < neighbors_.size(); ++k) { + for (unsigned k = 0; k < neighbors_.size(); ++k) { const int di = neighbors_[k].first; const int dj = neighbors_[k].second; if (Safe(res_, i + di, j + dj)) @@ -177,8 +177,8 @@ struct RefineCommand : public Command { EnsureSize(a, b, &un_); is_i_aligned_.resize(res_.width(), false); is_j_aligned_.resize(res_.height(), false); - for (int i = 0; i < in_.width(); ++i) - for (int j = 0; j < in_.height(); ++j) { + for (unsigned i = 0; i < in_.width(); ++i) + for (unsigned j = 0; j < in_.height(); ++j) { un_(i, j) = Safe(a, i, j) || Safe(b, i, j); in_(i, j) = Safe(a, i, j) && Safe(b, i, j); if (in_(i, j)) Align(i, j); @@ -188,16 +188,16 @@ struct RefineCommand : public Command { // if they match the constraints determined by pred void Grow(Predicate pred, bool idempotent, const Array2D<bool>& adds) { if (idempotent) { - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) { + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) { if (adds(i, j) && !res_(i, j) && (this->*pred)(i, j)) Align(i, j); } return; } set<pair<int, int> > p; - for (int i = 0; i < adds.width(); ++i) - for (int j = 0; j < adds.height(); ++j) + for (unsigned i = 0; i < adds.width(); ++i) + for (unsigned j = 0; j < adds.height(); ++j) if (adds(i, j) && !res_(i, j)) p.insert(make_pair(i, j)); bool keep_going = !p.empty(); @@ -263,7 +263,7 @@ struct GDFACommand : public DiagCommand { map<string, boost::shared_ptr<Command> > commands; -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); ostringstream os; os << "Operation to perform:"; diff --git a/utils/ccrp.h b/utils/ccrp.h index 8635b422..1d41a3ef 100644 --- a/utils/ccrp.h +++ b/utils/ccrp.h @@ -232,7 +232,7 @@ class CCRP { if (num_customers() == 0) return; DiscountResampler dr(*this); StrengthResampler sr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { if (has_strength_prior()) { strength_ = slice_sampler1d(sr, strength_, *rng, -discount_ + std::numeric_limits<double>::min(), std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); diff --git a/utils/ccrp_nt.h b/utils/ccrp_nt.h index 6efbfc78..724b11bd 100644 --- a/utils/ccrp_nt.h +++ b/utils/ccrp_nt.h @@ -111,7 +111,7 @@ class CCRP_NoTable { void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { assert(has_alpha_prior()); ConcentrationResampler cr(*this); - for (int iter = 0; iter < nloop; ++iter) { + for (unsigned iter = 0; iter < nloop; ++iter) { alpha_ = slice_sampler1d(cr, alpha_, *rng, 0.0, std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); } diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 3cc48f8e..e86cbdc1 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -30,7 +30,7 @@ // to just set it #define L2_CACHE_LINE 128 -// this should just be a typedef to pair<int,T> on the new c++ +// this should just be a typedef to pair<unsigned,T> on the new c++ // I have to avoid this since I want to use unions and c++-98 // does not let unions have types with constructors in them // this type bypasses default constructors. use with caution! @@ -38,32 +38,32 @@ // does anything template <typename T> struct PairIntT { - const PairIntT& operator=(const std::pair<const int, T>& v) { + const PairIntT& operator=(const std::pair<const unsigned, T>& v) { std::memcpy(this, &v, sizeof(PairIntT)); return *this; } - operator const std::pair<const int, T>&() const { - return *reinterpret_cast<const std::pair<const int, T>*>(this); + operator const std::pair<const unsigned, T>&() const { + return *reinterpret_cast<const std::pair<const unsigned, T>*>(this); } - int& first() { - return reinterpret_cast<std::pair<int, T>*>(this)->first; + unsigned& first() { + return reinterpret_cast<std::pair<unsigned, T>*>(this)->first; } T& second() { - return reinterpret_cast<std::pair<int, T>*>(this)->second; + return reinterpret_cast<std::pair<unsigned, T>*>(this)->second; } - const int& first() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->first; + const unsigned& first() const { + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->first; } const T& second() const { - return reinterpret_cast<const std::pair<int, T>*>(this)->second; + return reinterpret_cast<const std::pair<unsigned, T>*>(this)->second; } private: // very bad way of bypassing the default constructor on T - char data_[sizeof(std::pair<int, T>)]; + char data_[sizeof(std::pair<unsigned, T>)]; }; -BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<int,float>)); +BOOST_STATIC_ASSERT(sizeof(PairIntT<float>) == sizeof(std::pair<unsigned,float>)); -template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)> +template <typename T, unsigned LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15u : 7u)> class FastSparseVector { public: struct const_iterator { @@ -79,17 +79,17 @@ class FastSparseVector { } const bool local_; const PairIntT<T>* local_it_; - typename std::map<int, T>::const_iterator remote_it_; - const std::pair<const int, T>& operator*() const { + typename std::map<unsigned, T>::const_iterator remote_it_; + const std::pair<const unsigned, T>& operator*() const { if (local_) - return *reinterpret_cast<const std::pair<const int, float>*>(local_it_); + return *reinterpret_cast<const std::pair<const unsigned, float>*>(local_it_); else return *remote_it_; } - const std::pair<const int, T>* operator->() const { + const std::pair<const unsigned, T>* operator->() const { if (local_) - return reinterpret_cast<const std::pair<const int, T>*>(local_it_); + return reinterpret_cast<const std::pair<const unsigned, T>*>(local_it_); else return &*remote_it_; } @@ -118,17 +118,17 @@ class FastSparseVector { } FastSparseVector(const FastSparseVector& other) { std::memcpy(this, &other, sizeof(FastSparseVector)); - if (is_remote_) data_.rbmap = new std::map<int, T>(*data_.rbmap); + if (is_remote_) data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); } - FastSparseVector(std::pair<int, T>* first, std::pair<int, T>* last) { + FastSparseVector(std::pair<unsigned, T>* first, std::pair<unsigned, T>* last) { const ptrdiff_t n = last - first; if (n <= LOCAL_MAX) { is_remote_ = false; local_size_ = n; - std::memcpy(data_.local, first, sizeof(std::pair<int, T>) * n); + std::memcpy(data_.local, first, sizeof(std::pair<unsigned, T>) * n); } else { is_remote_ = true; - data_.rbmap = new std::map<int, T>(first, last); + data_.rbmap = new std::map<unsigned, T>(first, last); } } void erase(int k) { @@ -150,31 +150,31 @@ class FastSparseVector { clear(); std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_remote_) - data_.rbmap = new std::map<int, T>(*data_.rbmap); + data_.rbmap = new std::map<unsigned, T>(*data_.rbmap); return *this; } T const& get_singleton() const { assert(size()==1); return begin()->second; } - bool nonzero(int k) const { + bool nonzero(unsigned k) const { return static_cast<bool>(value(k)); } - inline void set_value(int k, const T& v) { + inline void set_value(unsigned k, const T& v) { get_or_create_bin(k) = v; } - inline T& add_value(int k, const T& v) { + inline T& add_value(unsigned k, const T& v) { return get_or_create_bin(k) += v; } - inline T get(int k) const { + inline T get(unsigned k) const { return value(k); } - inline T value(int k) const { + inline T value(unsigned k) const { if (is_remote_) { - typename std::map<int, T>::const_iterator it = data_.rbmap->find(k); + typename std::map<unsigned, T>::const_iterator it = data_.rbmap->find(k); if (it != data_.rbmap->end()) return it->second; } else { - for (int i = 0; i < local_size_; ++i) { + for (unsigned i = 0; i < local_size_; ++i) { const PairIntT<T>& p = data_.local[i]; if (p.first() == k) return p.second(); } @@ -256,8 +256,8 @@ class FastSparseVector { } inline FastSparseVector& operator*=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second *= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -267,8 +267,8 @@ class FastSparseVector { } inline FastSparseVector& operator/=(const T& scalar) { if (is_remote_) { - const typename std::map<int, T>::iterator end = data_.rbmap->end(); - for (typename std::map<int, T>::iterator it = data_.rbmap->begin(); it != end; ++it) + const typename std::map<unsigned, T>::iterator end = data_.rbmap->end(); + for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it) it->second /= scalar; } else { for (int i = 0; i < local_size_; ++i) @@ -300,7 +300,7 @@ class FastSparseVector { T dot(const std::vector<T>& v) const { T res = T(); for (const_iterator it = begin(), e = end(); it != e; ++it) - if (it->first < v.size()) res += it->second * v[it->first]; + if (static_cast<unsigned>(it->first) < v.size()) res += it->second * v[it->first]; return res; } T dot(const FastSparseVector<T>& other) const { @@ -330,11 +330,11 @@ class FastSparseVector { v.resize(i+1); return v[i]; } - inline T& get_or_create_bin(int k) { + inline T& get_or_create_bin(unsigned k) { if (is_remote_) { return (*data_.rbmap)[k]; } else { - for (int i = 0; i < local_size_; ++i) + for (unsigned i = 0; i < local_size_; ++i) if (data_.local[i].first() == k) return data_.local[i].second(); } assert(!is_remote_); @@ -353,17 +353,17 @@ class FastSparseVector { void swap_local_rbmap() { if (is_remote_) { // data is in rbmap, move to local assert(data_.rbmap->size() < LOCAL_MAX); - const std::map<int, T>* m = data_.rbmap; + const std::map<unsigned, T>* m = data_.rbmap; local_size_ = m->size(); int i = 0; - for (typename std::map<int, T>::const_iterator it = m->begin(); + for (typename std::map<unsigned, T>::const_iterator it = m->begin(); it != m->end(); ++it) { data_.local[i] = *it; ++i; } is_remote_ = false; } else { // data is local, move to rbmap - std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]); + std::map<unsigned, T>* m = new std::map<unsigned, T>(&data_.local[0], &data_.local[local_size_]); data_.rbmap = m; is_remote_ = true; } @@ -371,7 +371,7 @@ class FastSparseVector { union { PairIntT<T> local[LOCAL_MAX]; - std::map<int, T>* rbmap; + std::map<unsigned, T>* rbmap; } data_; unsigned char local_size_; bool is_remote_; @@ -399,8 +399,8 @@ class FastSparseVector { void load(Archive & ar, const unsigned int version) { (void) version; this->clear(); - int sz; ar & sz; - for (int i = 0; i < sz; ++i) { + unsigned sz; ar & sz; + for (unsigned i = 0; i < sz; ++i) { std::pair<std::string, T> wire_pair; ar & wire_pair; this->set_value(FD::Convert(wire_pair.first), wire_pair.second); diff --git a/utils/mfcr_test.cc b/utils/mfcr_test.cc index cc886335..29a1a2ce 100644 --- a/utils/mfcr_test.cc +++ b/utils/mfcr_test.cc @@ -4,11 +4,17 @@ #include <cassert> #include <cmath> +#define BOOST_TEST_MODULE MFCRTest +#include <boost/test/unit_test.hpp> +#include <boost/test/floating_point_comparison.hpp> + #include "sampler.h" using namespace std; -void test_exch(MT19937* rng) { +BOOST_AUTO_TEST_CASE(Exchangability) { + MT19937 r; + MT19937* rng = &r; MFCR<2, int> crp(0.5, 3.0); vector<double> lambdas(2); vector<double> p0s(2); @@ -64,9 +70,3 @@ void test_exch(MT19937* rng) { assert(error2 < 0.05); }; -int main(int argc, char** argv) { - MT19937 rng; - test_exch(&rng); - return 0; -} - diff --git a/utils/sampler.h b/utils/sampler.h index 22c873d4..b237c716 100644 --- a/utils/sampler.h +++ b/utils/sampler.h @@ -49,9 +49,10 @@ struct RandomNumberGenerator { size_t SelectSample(const F& a, const F& b, double T = 1.0) { if (T == 1.0) { if (F(this->next()) > (a / (a + b))) return 1; else return 0; - } else { - assert(!"not implemented"); } + std::cerr << "SelectSample with annealing not implemented\n"; + abort(); + return 0; } // T is the annealing temperature, if desired diff --git a/utils/small_vector.h b/utils/small_vector.h index d04d1352..894b1b32 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -316,6 +316,7 @@ inline void swap(SmallVector<T,M> &a,SmallVector<T,M> &b) { } typedef SmallVector<int,2> SmallVectorInt; +typedef SmallVector<unsigned,2> SmallVectorUnsigned; template <class T,int M> void memcpy(void *out,SmallVector<T,M> const& v) { diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 27bb88dd..00e7bd60 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -32,7 +32,7 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(&data[off_objective]) = objective; *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats; char* cur = &data[off_data]; - assert(cur - data == off_data); + assert(static_cast<size_t>(cur - data) == off_data); for (const_iterator it = v.begin(); it != v.end(); ++it) { const string& fname = FD::Convert(it->first); *cur++ = static_cast<char>(fname.size()); // name len @@ -41,10 +41,10 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { *reinterpret_cast<double*>(cur) = it->second; cur += sizeof(double); } - assert(cur - data == off_magic); + assert(static_cast<size_t>(cur - data) == off_magic); *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu; cur += sizeof(unsigned int); - assert(cur - data == tot_size); + assert(static_cast<size_t>(cur - data) == tot_size); b64encode(data, tot_size, out); delete[] data; } diff --git a/utils/stringlib.h b/utils/stringlib.h index 13d14dbf..75772c4d 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -231,7 +231,7 @@ template <class F> void VisitTokens(std::string const& s,F f) { if (0) { std::vector<std::string> ss=SplitOnWhitespace(s); - for (int i=0;i<ss.size();++i) + for (unsigned i=0;i<ss.size();++i) f(ss[i]); return; } diff --git a/utils/tdict.cc b/utils/tdict.cc index de234323..f33bd576 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -37,7 +37,7 @@ void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID> std::string TD::GetString(const std::vector<WordID>& str) { ostringstream o; - for (int i=0;i<str.size();++i) { + for (unsigned i=0;i<str.size();++i) { if (i) o << ' '; o << TD::Convert(str[i]); } diff --git a/utils/weights.cc b/utils/weights.cc index 39c18474..f56e2a20 100644 --- a/utils/weights.cc +++ b/utils/weights.cc @@ -45,11 +45,11 @@ void Weights::InitFromFile(const string& filename, } for (int i = buf.size() - 1; i > 0; --i) if (buf[i] == '=' || buf[i] == '\t') { buf[i] = ' '; break; } - int start = 0; + unsigned start = 0; while(start < buf.size() && buf[start] == ' ') ++start; - int end = 0; + unsigned end = 0; while(end < buf.size() && buf[end] != ' ') ++end; - const int fid = FD::Convert(buf.substr(start, end - start)); + const unsigned fid = FD::Convert(buf.substr(start, end - start)); if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); } while(end < buf.size() && buf[end] == ' ') ++end; val = strtod(&buf.c_str()[end], NULL); @@ -73,7 +73,7 @@ void Weights::InitFromFile(const string& filename, } else { // !read_text char buf[6]; in.read(buf, 5); - size_t num_keys; + int num_keys; in.read(reinterpret_cast<char*>(&num_keys), sizeof(size_t)); if (num_keys != FD::NumFeats()) { cerr << "Hash function reports " << FD::NumFeats() << " keys but weights file contains " << num_keys << endl; @@ -102,8 +102,8 @@ void Weights::WriteToFile(const string& fname, if (write_text) { if (extra) { o << "# " << *extra << endl; } o.precision(17); - const int num_feats = FD::NumFeats(); - for (int i = 1; i < num_feats; ++i) { + const unsigned num_feats = FD::NumFeats(); + for (unsigned i = 1; i < num_feats; ++i) { const weight_t val = (i < weights.size() ? weights[i] : 0.0); if (hide_zero_value_features && val == 0.0) continue; o << FD::Convert(i) << ' ' << val << endl; @@ -126,7 +126,7 @@ void Weights::InitSparseVector(const vector<weight_t>& dv, } void Weights::SanityCheck(const vector<weight_t>& w) { - for (int i = 0; i < w.size(); ++i) { + for (unsigned i = 0; i < w.size(); ++i) { assert(!isnan(w[i])); assert(!isinf(w[i])); } @@ -142,7 +142,7 @@ struct FComp { void Weights::ShowLargestFeatures(const vector<weight_t>& w) { vector<int> fnums(w.size()); - for (int i = 0; i < w.size(); ++i) + for (unsigned i = 0; i < w.size(); ++i) fnums[i] = i; int nf = FD::NumFeats(); if (nf > 10) nf = 10; |