diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-18 13:16:03 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-18 13:16:03 +0200 |
commit | 22d2f3ce5bd3fae2db9744eb66f7b8c3996265d6 (patch) | |
tree | 5cebab1bd3a0e77b108a19a786fe541cd7ce546c | |
parent | 78a0ee61c2d2d846306b60a8ac862a2d649bcf59 (diff) | |
parent | a70d6d3ed83a32d3cdf4bcb36a087426a4ed2c31 (diff) |
Merge remote-tracking branch 'upstream/master'
-rw-r--r-- | Jamroot | 11 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | creg/Jamfile | 6 | ||||
-rw-r--r-- | creg/creg.cc | 2 | ||||
-rw-r--r-- | creg/json_feature_map_lexer.ll | 9 | ||||
-rw-r--r-- | decoder/Jamfile | 7 | ||||
-rw-r--r-- | decoder/decoder.cc | 12 | ||||
-rw-r--r-- | decoder/hg.h | 6 | ||||
-rw-r--r-- | decoder/viterbi.h | 18 | ||||
-rw-r--r-- | klm/util/Jamfile | 2 | ||||
-rw-r--r-- | mteval/Jamfile | 2 | ||||
-rw-r--r-- | training/Jamfile | 25 | ||||
-rw-r--r-- | training/liblbfgs/Jamfile | 5 | ||||
-rw-r--r-- | utils/array2d.h | 2 | ||||
-rw-r--r-- | utils/fast_sparse_vector.h | 4 | ||||
-rw-r--r-- | utils/fdict.h | 2 | ||||
-rw-r--r-- | utils/hash.h | 6 | ||||
-rw-r--r-- | utils/perfect_hash.h | 13 | ||||
-rw-r--r-- | utils/phmt.cc | 2 | ||||
-rw-r--r-- | utils/small_vector.h | 28 | ||||
-rw-r--r-- | utils/sparse_vector.cc | 2 |
21 files changed, 115 insertions, 51 deletions
@@ -18,16 +18,21 @@ if [ test_header google/dense_hash_map ] || $(with-google-hash) { requirements += <define>HAVE_SPARSEHASH <include>$(with-google-hash) ; } +if [ test_header cmph.h ] || $(with-cmph) { + requirements += <define>HAVE_CMPH <include>$(with-cmph) ; +} + if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serialization ] { requirements += <define>HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } -project : requirements $(requirements) ; +# project : requirements $(requirements) ; +project : requirements $(requirements) <toolset>darwin:<link>static ; project : default-build <threading>single <warnings>on <variant>release ; -install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec phrasinator//programs ; +install-bin-libs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec creg//creg phrasinator//programs ; -build-projects mteval decoder klm/lm ; +build-projects mteval decoder klm/lm training/liblbfgs creg ; #Compile everything ending with _test.cc into a test and run it. rule all_tests ( targets * : dependencies : properties * ) { diff --git a/configure.ac b/configure.ac index 1e853fb6..6d2a8c60 100644 --- a/configure.ac +++ b/configure.ac @@ -130,4 +130,6 @@ then AM_CONDITIONAL([GLC], true) fi +CPPFLAGS="$CPPFLAGS -DHAVE_CONFIG_H" + AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile creg/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) diff --git a/creg/Jamfile b/creg/Jamfile new file mode 100644 index 00000000..cfed2388 --- /dev/null +++ b/creg/Jamfile @@ -0,0 +1,6 @@ +import lex ; + +exe creg : creg.cc json_feature_map_lexer.ll ..//utils ../training//liblbfgs ..//boost_program_options : <include>../training <include>. : <library>..//z ; + +alias programs : creg ; + diff --git a/creg/creg.cc b/creg/creg.cc index 005ec9ac..b145ac49 100644 --- a/creg/creg.cc +++ b/creg/creg.cc @@ -65,7 +65,7 @@ void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) { if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id); if (it == rh.id2ind.end()) { - cerr << "Unlabeled example in line " << rh.lc << endl; + cerr << "Unlabeled example in line " << rh.lc << " (key=" << id << ')' << endl; abort(); } (*rh.xy_pairs)[it->second - 1].x = fmap; diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll index cbb6d9a9..f9ce7977 100644 --- a/creg/json_feature_map_lexer.ll +++ b/creg/json_feature_map_lexer.ll @@ -77,6 +77,11 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); } +<JSON>{WS}*{LCB}{WS}*{RCB}\n* {const SparseVector<float> x; + json_fmap_callback(instid, x, json_fmap_callback_extra); + curfeat = 0; + BEGIN(INITIAL);} + <PREVAL>\" { BEGIN(STRING); spos=0; } <STRING>\" { featname[spos] = 0; @@ -92,7 +97,8 @@ UNESCAPED_CH [^\"\\\b\n\r\f\t] <STRING>\\n { } <STRING>\\r { } <STRING>\\t { } -<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort(); +<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { uint16_t hex = strtol(&yytext[2], NULL, 16); + spos += unicode_escape_to_utf8(hex, 0, &featname[spos++])-1; } <JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); } @@ -129,4 +135,3 @@ int main() { JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); } #endif - diff --git a/decoder/Jamfile b/decoder/Jamfile index f8112cae..871da4f6 100644 --- a/decoder/Jamfile +++ b/decoder/Jamfile @@ -61,9 +61,14 @@ lib cdec_lib : ../klm/lm//kenlm ..//boost_program_options : <include>. + : : + <library>..//utils + <library>..//mteval + <library>../klm/lm//kenlm + <library>..//boost_program_options ; -exe cdec : cdec.cc cdec_lib ; +exe cdec : cdec.cc cdec_lib ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ; diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 4ce2ba86..487c7635 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -965,14 +965,14 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { { ReadFile rf(writer.fname_); bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg); - assert(succeeded); + if (!succeeded) abort(); } new_hg.Union(forest); bool succeeded = writer.Write(new_hg, false); - assert(succeeded); + if (!succeeded) abort(); } else { bool succeeded = writer.Write(forest, false); - assert(succeeded); + if (!succeeded) abort(); } } @@ -1052,14 +1052,14 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { { ReadFile rf(writer.fname_); bool succeeded = HypergraphIO::ReadFromJSON(rf.stream(), &new_hg); - assert(succeeded); + if (!succeeded) abort(); } new_hg.Union(forest); bool succeeded = writer.Write(new_hg, false); - assert(succeeded); + if (!succeeded) abort(); } else { bool succeeded = writer.Write(forest, false); - assert(succeeded); + if (!succeeded) abort(); } } if (aligner_mode && !output_training_vector) diff --git a/decoder/hg.h b/decoder/hg.h index f0ddbb76..dfa4ac6d 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -189,7 +189,7 @@ public: o<<'('; show(o,show_mask); if (indent) o<<'\n'; - for (int i=0;i<tail_nodes_.size();++i) { + for (unsigned i=0;i<tail_nodes_.size();++i) { TEdgeHandle c=re(tail_nodes_[i],i,eh); Edge const* cp=c; if (cp) { @@ -314,7 +314,7 @@ public: private: void index_tails(Edge const& edge) { - for (int i = 0; i < edge.tail_nodes_.size(); ++i) + for (unsigned i = 0; i < edge.tail_nodes_.size(); ++i) nodes_[edge.tail_nodes_[i]].out_edges_.push_back(edge.id_); } public: @@ -348,7 +348,7 @@ public: edge->rule_ = rule; edge->tail_nodes_ = tail; edge->id_ = eid; - for (int i = 0; i < edge->tail_nodes_.size(); ++i) + for (unsigned i = 0; i < edge->tail_nodes_.size(); ++i) nodes_[edge->tail_nodes_[i]].out_edges_.push_back(edge->id_); return edge; } diff --git a/decoder/viterbi.h b/decoder/viterbi.h index daee3d7a..3092f6da 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -32,16 +32,16 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, WeightType* const cur_node_best_weight = &vit_weight[i]; T* const cur_node_best_result = &vit_result[i]; - const int num_in_edges = cur_node.in_edges_.size(); + const unsigned num_in_edges = cur_node.in_edges_.size(); if (num_in_edges == 0) { *cur_node_best_weight = WeightType(1); continue; } Hypergraph::Edge const* edge_best=0; - for (int j = 0; j < num_in_edges; ++j) { + for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; WeightType score = weight(edge); - for (int k = 0; k < edge.tail_nodes_.size(); ++k) + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) score *= vit_weight[edge.tail_nodes_[k]]; if (!edge_best || *cur_node_best_weight < score) { *cur_node_best_weight = score; @@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, assert(edge_best); Hypergraph::Edge const& edgeb=*edge_best; std::vector<const T*> antsb(edgeb.tail_nodes_.size()); - for (int k = 0; k < edgeb.tail_nodes_.size(); ++k) + for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k) antsb[k] = &vit_result[edgeb.tail_nodes_[k]]; traverse(edgeb, antsb, cur_node_best_result); } @@ -101,7 +101,7 @@ struct PathLengthTraversal { int* result) const { (void) edge; *result = 1; - for (int i = 0; i < ants.size(); ++i) *result += *ants[i]; + for (unsigned i = 0; i < ants.size(); ++i) *result += *ants[i]; } }; @@ -120,7 +120,7 @@ struct ELengthTraversal { const std::vector<const int*>& ants, int* result) const { *result = edge.rule_->ELength() - edge.rule_->Arity(); - for (int i = 0; i < ants.size(); ++i) *result += *ants[i]; + for (unsigned i = 0; i < ants.size(); ++i) *result += *ants[i]; } }; @@ -179,8 +179,8 @@ struct ViterbiPathTraversal { void operator()(const Hypergraph::Edge& edge, std::vector<Result const*> const& ants, Result* result) const { - for (int i = 0; i < ants.size(); ++i) - for (int j = 0; j < ants[i]->size(); ++j) + for (unsigned i = 0; i < ants.size(); ++i) + for (unsigned j = 0; j < ants[i]->size(); ++j) result->push_back((*ants[i])[j]); result->push_back(&edge); } @@ -191,7 +191,7 @@ struct FeatureVectorTraversal { void operator()(Hypergraph::Edge const& edge, std::vector<Result const*> const& ants, Result* result) const { - for (int i = 0; i < ants.size(); ++i) + for (unsigned i = 0; i < ants.size(); ++i) *result+=*ants[i]; *result+=edge.feature_values_; } diff --git a/klm/util/Jamfile b/klm/util/Jamfile index 00eefc22..b8c14347 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,4 +1,4 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..///z : <include>.. : : <include>.. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..//z : <include>.. : : <include>.. ; import testing ; diff --git a/mteval/Jamfile b/mteval/Jamfile index 24a95e8f..6260caea 100644 --- a/mteval/Jamfile +++ b/mteval/Jamfile @@ -1,6 +1,6 @@ import testing ; -lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ..//utils : <include>. : : <include>. ; +lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ..//utils : <include>. : : <include>. <library>..//z ; exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ; exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ; alias programs : fast_score mbr_kbest ; diff --git a/training/Jamfile b/training/Jamfile new file mode 100644 index 00000000..b28a13e3 --- /dev/null +++ b/training/Jamfile @@ -0,0 +1,25 @@ +import testing ; +import option ; + +lib training : + ..//utils + ..//mteval + ..//decoder + ../klm/lm//kenlm + ..//boost_program_options + ttables.cc + : <include>. + : : + <library>..//decoder + <library>../klm/lm//kenlm + <library>..//utils + <library>..//mteval + <library>..//boost_program_options + ; + +exe model1 : model1.cc : <include>../decoder ; + +# // all_tests [ glob *_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ; + +alias programs : model1 ; + diff --git a/training/liblbfgs/Jamfile b/training/liblbfgs/Jamfile new file mode 100644 index 00000000..49c82748 --- /dev/null +++ b/training/liblbfgs/Jamfile @@ -0,0 +1,5 @@ +import testing ; + +lib liblbfgs : lbfgs.c : <include>.. ; + +unit-test ll_test : ll_test.cc liblbfgs : <include>.. ; diff --git a/utils/array2d.h b/utils/array2d.h index e63eda0d..ee2600d2 100644 --- a/utils/array2d.h +++ b/utils/array2d.h @@ -155,7 +155,7 @@ inline std::ostream& operator<<(std::ostream& os, const Array2D<std::vector<bool os << (i%10); for (int j=0; j<m.height(); ++j) { const std::vector<bool>& ar = m(i,j); - for (int k=0; k<ar.size(); ++k) + for (unsigned k=0; k<ar.size(); ++k) os << (ar[k] ? '*' : '.'); } os << "\t"; diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h index 68caa704..3cc48f8e 100644 --- a/utils/fast_sparse_vector.h +++ b/utils/fast_sparse_vector.h @@ -14,7 +14,9 @@ #include <cassert> #include <vector> +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #include <boost/static_assert.hpp> #if HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP @@ -323,7 +325,7 @@ class FastSparseVector { std::memcpy(&data_, t, sizeof(data_)); } private: - static inline T& extend_vector(std::vector<T> &v,int i) { + static inline T& extend_vector(std::vector<T> &v,size_t i) { if (i>=v.size()) v.resize(i+1); return v[i]; diff --git a/utils/fdict.h b/utils/fdict.h index 0a2a9456..71547d2e 100644 --- a/utils/fdict.h +++ b/utils/fdict.h @@ -1,7 +1,9 @@ #ifndef _FDICT_H_ #define _FDICT_H_ +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #include <iostream> #include <string> diff --git a/utils/hash.h b/utils/hash.h index 2290bc34..31457430 100644 --- a/utils/hash.h +++ b/utils/hash.h @@ -5,7 +5,10 @@ #include "murmur_hash.h" +#ifdef HAVE_CONFIG_H #include "config.h" +#endif + #ifdef HAVE_SPARSEHASH # include <google/dense_hash_map> # include <google/dense_hash_set> @@ -130,8 +133,7 @@ bool maybe_add(H &ht,K const& k,typename H::mapped_type const& v) { // ht[k] must not exist (yet) template <class H,class K> void add(H &ht,K const& k,typename H::mapped_type const& v) { - bool fresh=maybe_add(ht,k,v); - assert(fresh); + maybe_add(ht,k,v); } diff --git a/utils/perfect_hash.h b/utils/perfect_hash.h index 8ac11f18..29ea48a9 100644 --- a/utils/perfect_hash.h +++ b/utils/perfect_hash.h @@ -1,15 +1,16 @@ #ifndef _PERFECT_HASH_MAP_H_ #define _PERFECT_HASH_MAP_H_ -#include "config.h" +#include <vector> +#include <boost/utility.hpp> -#ifndef HAVE_CMPH -#error libcmph is required to use PerfectHashFunction +#ifdef HAVE_CONFIG_H +#include "config.h" #endif -#include <vector> -#include <boost/utility.hpp> +#ifdef HAVE_CMPH #include "cmph.h" +#endif class PerfectHashFunction : boost::noncopyable { public: @@ -18,7 +19,9 @@ class PerfectHashFunction : boost::noncopyable { size_t operator()(const std::string& key) const; size_t number_of_keys() const; private: +#ifdef HAVE_CMPH cmph_t *mphf_; +#endif }; #endif diff --git a/utils/phmt.cc b/utils/phmt.cc index 48d9f093..b17febf6 100644 --- a/utils/phmt.cc +++ b/utils/phmt.cc @@ -1,4 +1,6 @@ +#ifdef HAVE_CONFIG_H #include "config.h" +#endif #ifndef HAVE_CMPH int main() { diff --git a/utils/small_vector.h b/utils/small_vector.h index b65c3b38..d04d1352 100644 --- a/utils/small_vector.h +++ b/utils/small_vector.h @@ -50,16 +50,16 @@ class SmallVector { explicit SmallVector(size_t s) { Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i) new(&data_.vals[i]) T(); + for (unsigned i = 0; i < s; ++i) new(&data_.vals[i]) T(); } //TODO: if alloc were raw space, construct here. } SmallVector(size_t s, T const& v) { Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i) data_.vals[i] = v; + for (unsigned i = 0; i < s; ++i) data_.vals[i] = v; } else { - for (int i = 0; i < size_; ++i) data_.ptr[i] = v; + for (unsigned i = 0; i < size_; ++i) data_.ptr[i] = v; } } @@ -69,9 +69,9 @@ class SmallVector { int s=end-begin; Alloc(s); if (s <= SV_MAX) { - for (int i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; + for (unsigned i = 0; i < s; ++i,++begin) data_.vals[i] = *begin; } else - for (int i = 0; i < s; ++i,++begin) data_.ptr[i] = *begin; + for (unsigned i = 0; i < s; ++i,++begin) data_.ptr[i] = *begin; } SmallVector(const Self& o) : size_(o.size_) { @@ -106,7 +106,7 @@ class SmallVector { if (size_ <= SV_MAX) { if (o.size_ <= SV_MAX) { size_ = o.size_; - for (int i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i]; + for (unsigned i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i]; } else { capacity_ = size_ = o.size_; data_.ptr = new T[capacity_]; @@ -116,7 +116,7 @@ class SmallVector { if (o.size_ <= SV_MAX) { delete[] data_.ptr; size_ = o.size_; - for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i]; + for (unsigned i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i]; } else { if (capacity_ < o.size_) { delete[] data_.ptr; @@ -124,7 +124,7 @@ class SmallVector { data_.ptr = new T[capacity_]; } size_ = o.size_; - for (int i = 0; i < size_; ++i) + for (unsigned i = 0; i < size_; ++i) data_.ptr[i] = o.data_.ptr[i]; } } @@ -135,7 +135,7 @@ class SmallVector { if (size_ <= SV_MAX) { // skip if pod? yes, we required pod anyway. no need to destruct #if !SMALL_VECTOR_POD - for (int i=0;i<size_;++i) data_.vals[i].~T(); + for (unsigned i=0;i<size_;++i) data_.vals[i].~T(); #endif } else delete[] data_.ptr; @@ -166,13 +166,13 @@ private: inline void copy_vals_to_ptr() { capacity_ = SV_MAX * 2; T* tmp = new T[capacity_]; - for (int i = 0; i < SV_MAX; ++i) tmp[i] = data_.vals[i]; + for (unsigned i = 0; i < SV_MAX; ++i) tmp[i] = data_.vals[i]; data_.ptr = tmp; } inline void ptr_to_small() { assert(size_<=SV_MAX); int *tmp=data_.ptr; - for (int i=0;i<size_;++i) + for (unsigned i=0;i<size_;++i) data_.vals[i]=tmp[i]; delete[] tmp; } @@ -224,7 +224,7 @@ public: if (s <= SV_MAX) { if (size_ > SV_MAX) { T *tmp=data_.ptr; - for (int i = 0; i < s; ++i) data_.vals[i] = tmp[i]; + for (unsigned i = 0; i < s; ++i) data_.vals[i] = tmp[i]; delete[] tmp; size_ = s; return; @@ -233,7 +233,7 @@ public: size_ = s; return; } else { - for (int i = size_; i < s; ++i) + for (unsigned i = size_; i < s; ++i) data_.vals[i] = v; size_ = s; return; @@ -244,7 +244,7 @@ public: if (s > capacity_) ensure_capacity(s); if (s > size_) { - for (int i = size_; i < s; ++i) + for (unsigned i = size_; i < s; ++i) data_.ptr[i] = v; } size_ = s; diff --git a/utils/sparse_vector.cc b/utils/sparse_vector.cc index 24da5f39..27bb88dd 100644 --- a/utils/sparse_vector.cc +++ b/utils/sparse_vector.cc @@ -23,7 +23,7 @@ void Encode(double objective, const SparseVector<double>& v, ostream* out) { for (const_iterator it = v.begin(); it != v.end(); ++it) tot_size += FD::Convert(it->first).size(); // feature names; tot_size += sizeof(double) * num_feats; // gradient - const size_t off_magic = tot_size; + const size_t off_magic = tot_size; (void) off_magic; tot_size += 4; // magic // size_t b64_size = tot_size * 4 / 3; |