From a448faf30fa46c006b1d38c6aee64a7aad29ac5e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 16 May 2012 07:09:16 -0400 Subject: clean up --- configure.ac | 2 ++ utils/array2d.h | 2 +- utils/fast_sparse_vector.h | 4 +++- utils/fdict.h | 2 ++ utils/hash.h | 6 ++++-- utils/perfect_hash.h | 13 ++++++++----- utils/phmt.cc | 2 ++ utils/small_vector.h | 28 ++++++++++++++-------------- utils/sparse_vector.cc | 2 +- 9 files changed, 37 insertions(+), 24 deletions(-) diff --git a/configure.ac b/configure.ac index 1e853fb6..6d2a8c60 100644 --- a/configure.ac +++ b/configure.ac @@ -130,4 +130,6 @@ then AM_CONDITIONAL([GLC], true) fi +CPPFLAGS="$CPPFLAGS -DHAVE_CONFIG_H" + AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile creg/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) diff --git a/utils/array2d.h b/utils/array2d.h index e63eda0d..ee2600d2 100644 --- a/utils/array2d.h +++ b/utils/array2d.h @@ -155,7 +155,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D& ar = m(i,j); - for (int k=0; k #include +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #include #if HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP @@ -323,7 +325,7 @@ class FastSparseVector { std::memcpy(&data_, t, sizeof(data_)); } private: - static inline T& extend_vector(std::vector &v,int i) { + static inline T& extend_vector(std::vector &v,size_t i) { if (i>=v.size()) v.resize(i+1); return v[i]; diff --git a/utils/fdict.h b/utils/fdict.h index 0a2a9456..71547d2e 100644 --- a/utils/fdict.h +++ b/utils/fdict.h @@ -1,7 +1,9 @@ #ifndef _FDICT_H_ #define _FDICT_H_ +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #include #include diff --git a/utils/hash.h b/utils/hash.h index 2290bc34..31457430 100644 --- a/utils/hash.h +++ b/utils/hash.h @@ -5,7 +5,10 @@ #include "murmur_hash.h" +#ifdef HAVE_CONFIG_H #include "config.h" +#endif + #ifdef HAVE_SPARSEHASH # include # include @@ -130,8 +133,7 @@ bool maybe_add(H &ht,K const& k,typename H::mapped_type const& v) { // ht[k] must not exist (yet) template void add(H &ht,K const& k,typename H::mapped_type const& v) { - bool fresh=maybe_add(ht,k,v); - assert(fresh); + maybe_add(ht,k,v); } diff --git a/utils/perfect_hash.h b/utils/perfect_hash.h index 8ac11f18..29ea48a9 100644 --- a/utils/perfect_hash.h +++ b/utils/perfect_hash.h @@ -1,15 +1,16 @@ #ifndef _PERFECT_HASH_MAP_H_ #define _PERFECT_HASH_MAP_H_ -#include "config.h" +#include +#include -#ifndef HAVE_CMPH -#error libcmph is required to use PerfectHashFunction +#ifdef HAVE_CONFIG_H +#include "config.h" #endif -#include -#include +#ifdef HAVE_CMPH #include "cmph.h" +#endif class PerfectHashFunction : boost::noncopyable { public: @@ -18,7 +19,9 @@ class PerfectHashFunction : boost::noncopyable { size_t operator()(const std::string& key) const; size_t number_of_keys() const; private: +#ifdef HAVE_CMPH cmph_t *mphf_; +#endif }; #endif diff --git a/utils/phmt.cc b/utils/phmt.cc index 48d9f093..b17febf6 100644 --- a/utils/phmt.cc +++ b/utils/phmt.cc @@ -1,4 +1,6 @@ +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #ifndef HAVE_CMPH int main() { diff --git a/utils/small_vector.h b/utils/small_vector.h index b65c3b38..d04d1352 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -50,16 +50,16 @@ class SmallVector { explicit SmallVector(size_t s) { Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i) new(&data_.vals[i]) T(); + for (unsigned i = 0; i < s; ++i) new(&data_.vals[i]) T(); } //TODO: if alloc were raw space, construct here. } SmallVector(size_t s, T const& v) { Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i) data_.vals[i] = v; + for (unsigned i = 0; i < s; ++i) data_.vals[i] = v; } else { - for (int i = 0; i < size_; ++i) data_.ptr[i] = v; + for (unsigned i = 0; i < size_; ++i) data_.ptr[i] = v; } } @@ -69,9 +69,9 @@ class SmallVector { int s=end-begin; Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; + for (unsigned i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; } else - for (int i = 0; i < s; ++i,++begin) data_.ptr[i] = *begin; + for (unsigned i = 0; i < s; ++i,++begin) data_.ptr[i] = *begin; } SmallVector(const Self& o) : size_(o.size_) { @@ -106,7 +106,7 @@ class SmallVector { if (size_ <= SV_MAX) { if (o.size_ <= SV_MAX) { size_ = o.size_; - for (int i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i]; + for (unsigned i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i]; } else { capacity_ = size_ = o.size_; data_.ptr = new T[capacity_]; @@ -116,7 +116,7 @@ class SmallVector { if (o.size_ <= SV_MAX) { delete[] data_.ptr; size_ = o.size_; - for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i]; + for (unsigned i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i]; } else { if (capacity_ < o.size_) { delete[] data_.ptr; @@ -124,7 +124,7 @@ class SmallVector { data_.ptr = new T[capacity_]; } size_ = o.size_; - for (int i = 0; i < size_; ++i) + for (unsigned i = 0; i < size_; ++i) data_.ptr[i] = o.data_.ptr[i]; } } @@ -135,7 +135,7 @@ class SmallVector { if (size_ <= SV_MAX) { // skip if pod? yes, we required pod anyway. no need to destruct #if !SMALL_VECTOR_POD - for (int i=0;i SV_MAX) { T *tmp=data_.ptr; - for (int i = 0; i < s; ++i) data_.vals[i] = tmp[i]; + for (unsigned i = 0; i < s; ++i) data_.vals[i] = tmp[i]; delete[] tmp; size_ = s; return; @@ -233,7 +233,7 @@ public: size_ = s; return; } else { - for (int i = size_; i < s; ++i) + for (unsigned i = size_; i < s; ++i) data_.vals[i] = v; size_ = s; return; @@ -244,7 +244,7 @@ public: if (s > capacity_) ensure_capacity(s); if (s > size_) { - for (int i = size_; i < s; ++i) + for (unsigned i = size_; i < s; ++i) data_.ptr[i] = v; } size_ = s; diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 24da5f39..27bb88dd 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -23,7 +23,7 @@ void Encode(double objective, const SparseVector& v, ostream* out) { for (const_iterator it = v.begin(); it != v.end(); ++it) tot_size += FD::Convert(it->first).size(); // feature names; tot_size += sizeof(double) * num_feats; // gradient - const size_t off_magic = tot_size; + const size_t off_magic = tot_size; (void) off_magic; tot_size += 4; // magic // size_t b64_size = tot_size * 4 / 3; -- cgit v1.2.3 From 75d0f86f8f949e37c600096accf85d5521940f40 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 16 May 2012 17:01:21 -0400 Subject: bjam cleanup --- Jamroot | 4 ++++ decoder/Jamfile | 7 ++++++- decoder/decoder.cc | 12 ++++++------ decoder/hg.h | 6 +++--- decoder/viterbi.h | 18 +++++++++--------- klm/util/Jamfile | 2 +- mteval/Jamfile | 2 +- 7 files changed, 30 insertions(+), 21 deletions(-) diff --git a/Jamroot b/Jamroot index f873db68..6daf7a9e 100644 --- a/Jamroot +++ b/Jamroot @@ -18,6 +18,10 @@ if [ test_header google/dense_hash_map ] || $(with-google-hash) { requirements += HAVE_SPARSEHASH $(with-google-hash) ; } +if [ test_header cmph.h ] || $(with-cmph) { + requirements += HAVE_CMPH $(with-cmph) ; +} + if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serialization ] { requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } diff --git a/decoder/Jamfile b/decoder/Jamfile index f8112cae..871da4f6 100644 --- a/decoder/Jamfile +++ b/decoder/Jamfile @@ -61,9 +61,14 @@ lib cdec_lib : ../klm/lm//kenlm ..//boost_program_options : . + : : + ..//utils + ..//mteval + ../klm/lm//kenlm + ..//boost_program_options ; -exe cdec : cdec.cc cdec_lib ; +exe cdec : cdec.cc cdec_lib ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : $(TOP)/decoder/test_data ; diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 4ce2ba86..487c7635 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -965,14 +965,14 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { { ReadFile rf(writer.fname_); bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg); - assert(succeeded); + if (!succeeded) abort(); } new_hg.Union(forest); bool succeeded = writer.Write(new_hg, false); - assert(succeeded); + if (!succeeded) abort(); } else { bool succeeded = writer.Write(forest, false); - assert(succeeded); + if (!succeeded) abort(); } } @@ -1052,14 +1052,14 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { { ReadFile rf(writer.fname_); bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg); - assert(succeeded); + if (!succeeded) abort(); } new_hg.Union(forest); bool succeeded = writer.Write(new_hg, false); - assert(succeeded); + if (!succeeded) abort(); } else { bool succeeded = writer.Write(forest, false); - assert(succeeded); + if (!succeeded) abort(); } } if (aligner_mode && !output_training_vector) diff --git a/decoder/hg.h b/decoder/hg.h index f0ddbb76..dfa4ac6d 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -189,7 +189,7 @@ public: o<<'('; show(o,show_mask); if (indent) o<<'\n'; - for (int i=0;irule_ = rule; edge->tail_nodes_ = tail; edge->id_ = eid; - for (int i = 0; i < edge->tail_nodes_.size(); ++i) + for (unsigned i = 0; i < edge->tail_nodes_.size(); ++i) nodes_[edge->tail_nodes_[i]].out_edges_.push_back(edge->id_); return edge; } diff --git a/decoder/viterbi.h b/decoder/viterbi.h index daee3d7a..3092f6da 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -32,16 +32,16 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, WeightType* const cur_node_best_weight = &vit_weight[i]; T* const cur_node_best_result = &vit_result[i]; - const int num_in_edges = cur_node.in_edges_.size(); + const unsigned num_in_edges = cur_node.in_edges_.size(); if (num_in_edges == 0) { *cur_node_best_weight = WeightType(1); continue; } Hypergraph::Edge const* edge_best=0; - for (int j = 0; j < num_in_edges; ++j) { + for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; WeightType score = weight(edge); - for (int k = 0; k < edge.tail_nodes_.size(); ++k) + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) score *= vit_weight[edge.tail_nodes_[k]]; if (!edge_best || *cur_node_best_weight < score) { *cur_node_best_weight = score; @@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, assert(edge_best); Hypergraph::Edge const& edgeb=*edge_best; std::vector antsb(edgeb.tail_nodes_.size()); - for (int k = 0; k < edgeb.tail_nodes_.size(); ++k) + for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k) antsb[k] = &vit_result[edgeb.tail_nodes_[k]]; traverse(edgeb, antsb, cur_node_best_result); } @@ -101,7 +101,7 @@ struct PathLengthTraversal { int* result) const { (void) edge; *result = 1; - for (int i = 0; i < ants.size(); ++i) *result += *ants[i]; + for (unsigned i = 0; i < ants.size(); ++i) *result += *ants[i]; } }; @@ -120,7 +120,7 @@ struct ELengthTraversal { const std::vector& ants, int* result) const { *result = edge.rule_->ELength() - edge.rule_->Arity(); - for (int i = 0; i < ants.size(); ++i) *result += *ants[i]; + for (unsigned i = 0; i < ants.size(); ++i) *result += *ants[i]; } }; @@ -179,8 +179,8 @@ struct ViterbiPathTraversal { void operator()(const Hypergraph::Edge& edge, std::vector const& ants, Result* result) const { - for (int i = 0; i < ants.size(); ++i) - for (int j = 0; j < ants[i]->size(); ++j) + for (unsigned i = 0; i < ants.size(); ++i) + for (unsigned j = 0; j < ants[i]->size(); ++j) result->push_back((*ants[i])[j]); result->push_back(&edge); } @@ -191,7 +191,7 @@ struct FeatureVectorTraversal { void operator()(Hypergraph::Edge const& edge, std::vector const& ants, Result* result) const { - for (int i = 0; i < ants.size(); ++i) + for (unsigned i = 0; i < ants.size(); ++i) *result+=*ants[i]; *result+=edge.feature_values_; } diff --git a/klm/util/Jamfile b/klm/util/Jamfile index 00eefc22..b8c14347 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,4 +1,4 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..///z : .. : : .. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..//z : .. : : .. ; import testing ; diff --git a/mteval/Jamfile b/mteval/Jamfile index 24a95e8f..6260caea 100644 --- a/mteval/Jamfile +++ b/mteval/Jamfile @@ -1,6 +1,6 @@ import testing ; -lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ..//utils : . : : . ; +lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ..//utils : . : : . ..//z ; exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ; exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ; alias programs : fast_score mbr_kbest ; -- cgit v1.2.3 From 141f566baf82129fd339fa28e1e98a17c6e37dcc Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 17 May 2012 10:45:55 -0400 Subject: Fix JSON parser for unicode, empty feature vectors --- creg/json_feature_map_lexer.ll | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] {WS}*{LCB}{WS}* { BEGIN(PREVAL); } +{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + \" { BEGIN(STRING); spos=0; } \" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] \\n { } \\r { } \\t { } -\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } {WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - -- cgit v1.2.3 From ff6cc0d60f32c11fca9ffd81af93bec334728f31 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 17 May 2012 23:48:23 -0400 Subject: jamfiles for creg --- Jamroot | 4 ++-- creg/Jamfile | 6 ++++++ creg/creg.cc | 2 +- training/liblbfgs/Jamfile | 5 +++++ 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 creg/Jamfile create mode 100644 training/liblbfgs/Jamfile diff --git a/Jamroot b/Jamroot index 6daf7a9e..f42a9bc3 100644 --- a/Jamroot +++ b/Jamroot @@ -29,9 +29,9 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa project : requirements $(requirements) ; project : default-build single on release ; -install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec phrasinator//programs ; +install-bin-libs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec creg//creg phrasinator//programs ; -build-projects mteval decoder klm/lm ; +build-projects mteval decoder klm/lm training/liblbfgs creg ; #Compile everything ending with _test.cc into a test and run it. rule all_tests ( targets * : dependencies : properties * ) { diff --git a/creg/Jamfile b/creg/Jamfile new file mode 100644 index 00000000..cfed2388 --- /dev/null +++ b/creg/Jamfile @@ -0,0 +1,6 @@ +import lex ; + +exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : ../training . : ..//z ; + +alias programs : creg ; + diff --git a/creg/creg.cc b/creg/creg.cc index 005ec9ac..b145ac49 100644 --- a/creg/creg.cc +++ b/creg/creg.cc @@ -65,7 +65,7 @@ void ReaderCB(const string& id, const SparseVector& fmap, void* extra) { if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } const unordered_map::iterator it = rh.id2ind.find(id); if (it == rh.id2ind.end()) { - cerr << "Unlabeled example in line " << rh.lc << endl; + cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl; abort(); } (*rh.xy_pairs)[it->second - 1].x = fmap; diff --git a/training/liblbfgs/Jamfile b/training/liblbfgs/Jamfile new file mode 100644 index 00000000..49c82748 --- /dev/null +++ b/training/liblbfgs/Jamfile @@ -0,0 +1,5 @@ +import testing ; + +lib liblbfgs : lbfgs.c : .. ; + +unit-test ll_test : ll_test.cc liblbfgs : .. ; -- cgit v1.2.3 From bc761e1c2ea93da11a470fd55f9f48303afb8b4c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 17 May 2012 23:53:06 -0400 Subject: Training jamfile --- training/Jamfile | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 training/Jamfile diff --git a/training/Jamfile b/training/Jamfile new file mode 100644 index 00000000..b28a13e3 --- /dev/null +++ b/training/Jamfile @@ -0,0 +1,25 @@ +import testing ; +import option ; + +lib training : + ..//utils + ..//mteval + ..//decoder + ../klm/lm//kenlm + ..//boost_program_options + ttables.cc + : . + : : + ..//decoder + ../klm/lm//kenlm + ..//utils + ..//mteval + ..//boost_program_options + ; + +exe model1 : model1.cc : ../decoder ; + +# // all_tests [ glob *_test.cc ] : cdec_lib : $(TOP)/decoder/test_data ; + +alias programs : model1 ; + -- cgit v1.2.3 From a70d6d3ed83a32d3cdf4bcb36a087426a4ed2c31 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 18 May 2012 00:24:35 -0400 Subject: build with bjam --- Jamroot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jamroot b/Jamroot index f42a9bc3..fff11d56 100644 --- a/Jamroot +++ b/Jamroot @@ -26,7 +26,8 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } -project : requirements $(requirements) ; +# project : requirements $(requirements) ; +project : requirements $(requirements) darwin:static ; project : default-build single on release ; install-bin-libs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec creg//creg phrasinator//programs ; -- cgit v1.2.3