From f82833385b7cf3c01dc2f92830119dfe3ebc573e Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 16 Aug 2012 17:02:56 -0400 Subject: KenLM update. Remove a couple of segfaults for weird input. Other oddball stuff. --- klm/lm/bhiksha.cc | 1 + klm/lm/bhiksha.hh | 2 +- klm/lm/build_binary.cc | 8 ++++++-- klm/lm/left.hh | 2 +- klm/lm/max_order.cc | 6 ++++++ klm/lm/max_order.hh | 26 ++++++++++++-------------- klm/lm/model.cc | 11 +++++++++-- klm/lm/model.hh | 1 - klm/lm/quantize.hh | 4 ++-- klm/lm/read_arpa.cc | 16 +++++++++++++++- klm/lm/search_trie.cc | 20 ++++++++++---------- klm/lm/state.hh | 10 +++++----- klm/lm/trie.hh | 2 +- klm/lm/trie_sort.cc | 24 ++++++++++++++++-------- klm/lm/trie_sort.hh | 4 ++-- klm/lm/value.hh | 2 +- klm/lm/value_build.hh | 6 +++--- klm/lm/vocab.hh | 4 ++-- klm/util/file.cc | 10 ++++++++++ klm/util/file.hh | 8 +++++++- klm/util/file_piece.cc | 2 +- klm/util/have.hh | 2 +- klm/util/mmap.cc | 16 +--------------- klm/util/string_piece.hh | 5 +++++ 24 files changed, 118 insertions(+), 74 deletions(-) create mode 100644 klm/lm/max_order.cc (limited to 'klm') diff --git a/klm/lm/bhiksha.cc b/klm/lm/bhiksha.cc index cdeafb47..870a4eee 100644 --- a/klm/lm/bhiksha.cc +++ b/klm/lm/bhiksha.cc @@ -1,6 +1,7 @@ #include "lm/bhiksha.hh" #include "lm/config.hh" #include "util/file.hh" +#include "util/exception.hh" #include diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh index 5182ee2e..9734f3ab 100644 --- a/klm/lm/bhiksha.hh +++ b/klm/lm/bhiksha.hh @@ -23,7 +23,7 @@ namespace lm { namespace ngram { -class Config; +struct Config; namespace trie { diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index c4a01cb4..49901c9e 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -25,7 +25,11 @@ void Usage(const char *name) { "-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n" "-w mmap|after determines how writing is done.\n" " mmap maps the binary file and writes to it. Default for trie.\n" -" after allocates anonymous memory, builds, and writes. Default for probing.\n\n" +" after allocates anonymous memory, builds, and writes. Default for probing.\n" +"-r \"order1.arpa order2 order3 order4\" adds lower-order rest costs from these\n" +" model files. order1.arpa must be an ARPA file. All others may be ARPA or\n" +" the same data structure as being built. All files must have the same\n" +" vocabulary. For probing, the unigrams must be in the same order.\n\n" "type is either probing or trie. Default is probing.\n\n" "probing uses a probing hash table. It is the fastest but uses the most memory.\n" "-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n" @@ -111,7 +115,7 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { for (long int i = 0; i < length - 2; ++i) std::cout << ' '; std::cout << prefix << "B\n" "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n" - "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r -p " << config.probing_multiplier << "\n" + "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n" "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n" "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n" "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n" diff --git a/klm/lm/left.hh b/klm/lm/left.hh index c00af88a..8c27232e 100644 --- a/klm/lm/left.hh +++ b/klm/lm/left.hh @@ -111,7 +111,7 @@ template class RuleScore { return; } - float backoffs[kMaxOrder - 1], backoffs2[kMaxOrder - 1]; + float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1]; float *back = backoffs, *back2 = backoffs2; unsigned char next_use = out_.right.length; diff --git a/klm/lm/max_order.cc b/klm/lm/max_order.cc new file mode 100644 index 00000000..94221201 --- /dev/null +++ b/klm/lm/max_order.cc @@ -0,0 +1,6 @@ +#include "lm/max_order.hh" +#include + +int main(int argc, char *argv[]) { + std::cerr << "KenLM was compiled with a maximum supported n-gram order set to " << KENLM_MAX_ORDER << "." << std::endl; +} diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index aff9de27..bc8687cd 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -1,14 +1,12 @@ -#ifndef LM_MAX_ORDER__ -#define LM_MAX_ORDER__ -namespace lm { -namespace ngram { -// If you need higher order, change this and recompile. -// Having this limit means that State can be -// (kMaxOrder - 1) * sizeof(float) bytes instead of -// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead -const unsigned char kMaxOrder = 5; - -} // namespace ngram -} // namespace lm - -#endif // LM_MAX_ORDER__ +/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM. + * If not, this is the default maximum order. + * Having this limit means that State can be + * (kMaxOrder - 1) * sizeof(float) bytes instead of + * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead + */ +#ifndef KENLM_MAX_ORDER +#define KENLM_MAX_ORDER 6 +#endif +#ifndef KENLM_ORDER_MESSAGE +#define KENLM_ORDER_MESSAGE "Edit klm/lm/max_order.hh." +#endif diff --git a/klm/lm/model.cc b/klm/lm/model.cc index a2d31ce0..b46333a4 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -5,6 +5,7 @@ #include "lm/search_hashed.hh" #include "lm/search_trie.hh" #include "lm/read_arpa.hh" +#include "util/have.hh" #include "util/murmur_hash.hh" #include @@ -47,7 +48,14 @@ template GenericModel::Ge P::Init(begin_sentence, null_context, vocab_, search_.Order()); } +namespace { +void CheckMaxOrder(size_t order) { + UTIL_THROW_IF(order > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << order << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE); +} +} // namespace + template void GenericModel::InitializeFromBinary(void *start, const Parameters ¶ms, const Config &config, int fd) { + CheckMaxOrder(params.counts.size()); SetupMemory(start, params.counts, config); vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab); search_.LoadedBinary(); @@ -60,8 +68,7 @@ template void GenericModel counts; // File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_. ReadARPACounts(f, counts); - - if (counts.size() > kMaxOrder) UTIL_THROW(FormatLoadException, "This model has order " << counts.size() << ". Edit lm/max_order.hh, set kMaxOrder to at least this value, and recompile."); + CheckMaxOrder(counts.size()); if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model."); if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0"); diff --git a/klm/lm/model.hh b/klm/lm/model.hh index be872178..6dee9419 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -5,7 +5,6 @@ #include "lm/binary_format.hh" #include "lm/config.hh" #include "lm/facade.hh" -#include "lm/max_order.hh" #include "lm/quantize.hh" #include "lm/search_hashed.hh" #include "lm/search_trie.hh" diff --git a/klm/lm/quantize.hh b/klm/lm/quantize.hh index 3e9153e3..abed0112 100644 --- a/klm/lm/quantize.hh +++ b/klm/lm/quantize.hh @@ -17,7 +17,7 @@ namespace lm { namespace ngram { -class Config; +struct Config; /* Store values directly and don't quantize. */ class DontQuantize { @@ -217,7 +217,7 @@ class SeparatelyQuantize { const Bins &LongestTable() const { return longest_; } private: - Bins tables_[kMaxOrder - 1][2]; + Bins tables_[KENLM_MAX_ORDER - 1][2]; Bins longest_; diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 2d9a337d..70727e4c 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -7,9 +7,14 @@ #include #include +#include #include #include +#ifdef WIN32 +#include +#endif + namespace lm { // 1 for '\t', '\n', and ' '. This is stricter than isspace. @@ -93,7 +98,16 @@ void ReadBackoff(util::FilePiece &in, float &backoff) { case '\t': backoff = in.ReadFloat(); if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff; - if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff"); + { +#ifdef WIN32 + int float_class = _fpclass(backoff); + UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff); +#else + int float_class = fpclassify(backoff); + UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff); +#endif + } + UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff"); break; case '\n': backoff = ngram::kNoExtensionBackoff; diff --git a/klm/lm/search_trie.cc b/klm/lm/search_trie.cc index 18e80d5a..832cc9f7 100644 --- a/klm/lm/search_trie.cc +++ b/klm/lm/search_trie.cc @@ -180,7 +180,7 @@ const float kBadProb = std::numeric_limits::infinity(); class SRISucks { public: SRISucks() { - for (BackoffMessages *i = messages_; i != messages_ + kMaxOrder - 1; ++i) + for (BackoffMessages *i = messages_; i != messages_ + KENLM_MAX_ORDER - 1; ++i) i->Init(sizeof(ProbPointer) + sizeof(WordIndex) * (i - messages_ + 1)); } @@ -196,7 +196,7 @@ class SRISucks { } void ObtainBackoffs(unsigned char total_order, FILE *unigram_file, RecordReader *reader) { - for (unsigned char i = 0; i < kMaxOrder - 1; ++i) { + for (unsigned char i = 0; i < KENLM_MAX_ORDER - 1; ++i) { it_[i] = values_[i].empty() ? NULL : &*values_[i].begin(); } messages_[0].Apply(it_, unigram_file); @@ -221,10 +221,10 @@ class SRISucks { private: // This used to be one array. Then I needed to separate it by order for quantization to work. - std::vector values_[kMaxOrder - 1]; - BackoffMessages messages_[kMaxOrder - 1]; + std::vector values_[KENLM_MAX_ORDER - 1]; + BackoffMessages messages_[KENLM_MAX_ORDER - 1]; - float *it_[kMaxOrder - 1]; + float *it_[KENLM_MAX_ORDER - 1]; }; class FindBlanks { @@ -337,7 +337,7 @@ struct Gram { template class BlankManager { public: BlankManager(unsigned char total_order, Doing &doing) : total_order_(total_order), been_length_(0), doing_(doing) { - for (float *i = basis_; i != basis_ + kMaxOrder - 1; ++i) *i = kBadProb; + for (float *i = basis_; i != basis_ + KENLM_MAX_ORDER - 1; ++i) *i = kBadProb; } void Visit(const WordIndex *to, unsigned char length, float prob) { @@ -373,10 +373,10 @@ template class BlankManager { private: const unsigned char total_order_; - WordIndex been_[kMaxOrder]; + WordIndex been_[KENLM_MAX_ORDER]; unsigned char been_length_; - float basis_[kMaxOrder]; + float basis_[KENLM_MAX_ORDER]; Doing &doing_; }; @@ -470,8 +470,8 @@ void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &c } // namespace template void BuildTrie(SortedFiles &files, std::vector &counts, const Config &config, TrieSearch &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing) { - RecordReader inputs[kMaxOrder - 1]; - RecordReader contexts[kMaxOrder - 1]; + RecordReader inputs[KENLM_MAX_ORDER - 1]; + RecordReader contexts[KENLM_MAX_ORDER - 1]; for (unsigned char i = 2; i <= counts.size(); ++i) { inputs[i-2].Init(files.Full(i), i * sizeof(WordIndex) + (i == counts.size() ? sizeof(Prob) : sizeof(ProbBackoff))); diff --git a/klm/lm/state.hh b/klm/lm/state.hh index c7438414..830e40aa 100644 --- a/klm/lm/state.hh +++ b/klm/lm/state.hh @@ -32,7 +32,7 @@ class State { // Call this before using raw memcmp. void ZeroRemaining() { - for (unsigned char i = length; i < kMaxOrder - 1; ++i) { + for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) { words[i] = 0; backoff[i] = 0.0; } @@ -42,8 +42,8 @@ class State { // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD. // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit. - WordIndex words[kMaxOrder - 1]; - float backoff[kMaxOrder - 1]; + WordIndex words[KENLM_MAX_ORDER - 1]; + float backoff[KENLM_MAX_ORDER - 1]; unsigned char length; }; @@ -72,11 +72,11 @@ struct Left { } void ZeroRemaining() { - for (uint64_t * i = pointers + length; i < pointers + kMaxOrder - 1; ++i) + for (uint64_t * i = pointers + length; i < pointers + KENLM_MAX_ORDER - 1; ++i) *i = 0; } - uint64_t pointers[kMaxOrder - 1]; + uint64_t pointers[KENLM_MAX_ORDER - 1]; unsigned char length; bool full; }; diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh index eff93292..034a1414 100644 --- a/klm/lm/trie.hh +++ b/klm/lm/trie.hh @@ -11,7 +11,7 @@ namespace lm { namespace ngram { -class Config; +struct Config; namespace trie { struct NodeRange { diff --git a/klm/lm/trie_sort.cc b/klm/lm/trie_sort.cc index b80fed02..0d83221e 100644 --- a/klm/lm/trie_sort.cc +++ b/klm/lm/trie_sort.cc @@ -148,13 +148,17 @@ template FILE *MergeSortedFiles(FILE *first_file, FILE *second_f } // namespace void RecordReader::Init(FILE *file, std::size_t entry_size) { - rewind(file); - file_ = file; + entry_size_ = entry_size; data_.reset(malloc(entry_size)); UTIL_THROW_IF(!data_.get(), util::ErrnoException, "Failed to malloc read buffer"); - remains_ = true; - entry_size_ = entry_size; - ++*this; + file_ = file; + if (file) { + rewind(file); + remains_ = true; + ++*this; + } else { + remains_ = false; + } } void RecordReader::Overwrite(const void *start, std::size_t amount) { @@ -169,9 +173,13 @@ void RecordReader::Overwrite(const void *start, std::size_t amount) { } void RecordReader::Rewind() { - rewind(file_); - remains_ = true; - ++*this; + if (file_) { + rewind(file_); + remains_ = true; + ++*this; + } else { + remains_ = false; + } } SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) { diff --git a/klm/lm/trie_sort.hh b/klm/lm/trie_sort.hh index 3036319d..1e6fce51 100644 --- a/klm/lm/trie_sort.hh +++ b/klm/lm/trie_sort.hh @@ -25,7 +25,7 @@ namespace lm { class PositiveProbWarn; namespace ngram { class SortedVocabulary; -class Config; +struct Config; namespace trie { @@ -107,7 +107,7 @@ class SortedFiles { util::scoped_fd unigram_; - util::scoped_FILE full_[kMaxOrder - 1], context_[kMaxOrder - 1]; + util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1]; }; } // namespace trie diff --git a/klm/lm/value.hh b/klm/lm/value.hh index 85e53f14..ba716713 100644 --- a/klm/lm/value.hh +++ b/klm/lm/value.hh @@ -6,7 +6,7 @@ #include "lm/weights.hh" #include "util/bit_packing.hh" -#include +#include namespace lm { namespace ngram { diff --git a/klm/lm/value_build.hh b/klm/lm/value_build.hh index 687a41a0..461e6a5c 100644 --- a/klm/lm/value_build.hh +++ b/klm/lm/value_build.hh @@ -10,9 +10,9 @@ namespace lm { namespace ngram { -class Config; -class BackoffValue; -class RestValue; +struct Config; +struct BackoffValue; +struct RestValue; class NoRestBuild { public: diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh index c3efcb4a..a25432f9 100644 --- a/klm/lm/vocab.hh +++ b/klm/lm/vocab.hh @@ -13,11 +13,11 @@ #include namespace lm { -class ProbBackoff; +struct ProbBackoff; class EnumerateVocab; namespace ngram { -class Config; +struct Config; namespace detail { uint64_t HashForVocab(const char *str, std::size_t len); diff --git a/klm/util/file.cc b/klm/util/file.cc index 6a3885a7..98f13983 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -44,6 +44,16 @@ int OpenReadOrThrow(const char *name) { return ret; } +int CreateOrThrow(const char *name) { + int ret; +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); +#else + UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); +#endif + return ret; +} + uint64_t SizeFile(int fd) { #if defined(_WIN32) || defined(_WIN64) __int64 ret = _filelengthi64(fd); diff --git a/klm/util/file.hh b/klm/util/file.hh index 5c57e2a9..8af1ff4f 100644 --- a/klm/util/file.hh +++ b/klm/util/file.hh @@ -65,7 +65,10 @@ class scoped_FILE { std::FILE *file_; }; +// Open for read only. int OpenReadOrThrow(const char *name); +// Create file if it doesn't exist, truncate if it does. Opened for write. +int CreateOrThrow(const char *name); // Return value for SizeFile when it can't size properly. const uint64_t kBadSize = (uint64_t)-1; @@ -91,10 +94,13 @@ class TempMaker { public: explicit TempMaker(const std::string &prefix); + // These will already be unlinked for you. int Make() const; - std::FILE *MakeFile() const; + // This will force you to close the fd instead of leaving it open. + std::string Name(scoped_fd &opened) const; + private: std::string base_; }; diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index a205995a..19a68728 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -27,7 +27,7 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() { #ifdef HAVE_ZLIB GZException::GZException(gzFile file) { int num; - *this << gzerror( file, &num) << " from zlib"; + *this << gzerror(file, &num) << " from zlib"; } #endif // HAVE_ZLIB diff --git a/klm/util/have.hh b/klm/util/have.hh index b8181e99..1d76a7fc 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -13,7 +13,7 @@ #endif #ifndef HAVE_BOOST -#define HAVE_BOOST +//#define HAVE_BOOST #endif #ifndef HAVE_THREADS diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index 576fd4cc..bc9e3f81 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -19,8 +19,8 @@ #include #include #else -#include #include +#include #endif namespace util { @@ -171,20 +171,6 @@ void *MapZeroedWrite(int fd, std::size_t size) { return MapOrThrow(size, true, kFileFlags, false, fd, 0); } -namespace { - -int CreateOrThrow(const char *name) { - int ret; -#if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); -#else - UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); -#endif - return ret; -} - -} // namespace - void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) { file.reset(CreateOrThrow(name)); try { diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index 5de053aa..be6a643d 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -85,6 +85,11 @@ U_NAMESPACE_BEGIN #include #include +#ifdef WIN32 +#undef max +#undef min +#endif + class StringPiece { public: typedef size_t size_type; -- cgit v1.2.3 From e828fab2b485dc0e50d9b9d5c5a599db695ce252 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 3 Sep 2012 17:41:43 +0100 Subject: fix some broken lm code --- klm/lm/build_binary.cc | 1 + klm/util/file_piece.cc | 1 + 2 files changed, 2 insertions(+) (limited to 'klm') diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index 49901c9e..c2ca1101 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -8,6 +8,7 @@ #include #include +#include #ifdef WIN32 #include "util/getopt.hh" diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 19a68728..af341d6d 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 7aa4baf365a80380bebacfc4d4a1ef1b9d757590 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:23:39 +0100 Subject: Update kenlm and build system --- bjam | 4 +- jam-files/fail/Jamroot | 4 + jam-files/sanity.jam | 9 ++ klm/lm/Jamfile | 11 +-- klm/lm/bhiksha.cc | 2 +- klm/lm/bhiksha.hh | 4 +- klm/lm/binary_format.cc | 4 +- klm/lm/binary_format.hh | 4 +- klm/lm/build_binary.cc | 9 +- klm/lm/max_order.hh | 2 +- klm/lm/model.cc | 21 +++-- klm/lm/model.hh | 2 +- klm/lm/partial.hh | 167 ++++++++++++++++++++++++++++++++++ klm/lm/partial_test.cc | 199 +++++++++++++++++++++++++++++++++++++++++ klm/lm/quantize.hh | 8 +- klm/lm/read_arpa.cc | 23 +++-- klm/lm/search_hashed.hh | 6 +- klm/lm/search_trie.hh | 4 +- klm/lm/state.hh | 2 + klm/lm/trie.cc | 4 +- klm/lm/trie.hh | 8 +- klm/lm/vocab.cc | 4 +- klm/lm/vocab.hh | 4 +- klm/util/Jamfile | 14 +-- klm/util/ersatz_progress.cc | 10 +-- klm/util/ersatz_progress.hh | 10 ++- klm/util/exception.cc | 3 + klm/util/exception.hh | 22 +++++ klm/util/file.cc | 7 +- klm/util/file_piece.cc | 1 - klm/util/probing_hash_table.hh | 5 +- 31 files changed, 502 insertions(+), 75 deletions(-) create mode 100644 jam-files/fail/Jamroot create mode 100644 klm/lm/partial.hh create mode 100644 klm/lm/partial_test.cc (limited to 'klm') diff --git a/bjam b/bjam index d1ac8a55..2b0232c8 100755 --- a/bjam +++ b/bjam @@ -4,8 +4,8 @@ if bjam="$(which bjam 2>/dev/null)" && #exists [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" /dev/null && #bjam in path isn't this script - "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build) - "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough. + "${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null && #The test in jam-files/sanity.jam passes + (cd jam-files/fail && ! "${bjam}") >/dev/null #Returns non-zero on failure then #Delegate to system bjam exec "${bjam}" "$@" diff --git a/jam-files/fail/Jamroot b/jam-files/fail/Jamroot new file mode 100644 index 00000000..c3584d89 --- /dev/null +++ b/jam-files/fail/Jamroot @@ -0,0 +1,4 @@ +actions fail { + false +} +make fail : : fail ; diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 8ccfc65d..086f20ae 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -15,6 +15,13 @@ rule _shell ( cmd : extras * ) { return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ; } +rule shell_or_fail ( cmd ) { + local ret = [ SHELL $(cmd) : exit-status ] ; + if $(ret[2]) != 0 { + exit $(cmd) failed : 1 ; + } +} + cxxflags = [ os.environ "CXXFLAGS" ] ; cflags = [ os.environ "CFLAGS" ] ; ldflags = [ os.environ "LDFLAGS" ] ; @@ -275,3 +282,5 @@ if [ option.get "sanity-test" : : "yes" ] { EXIT "Bad" : 1 ; } } + +use-project /top : . ; diff --git a/klm/lm/Jamfile b/klm/lm/Jamfile index b1971d88..dd620068 100644 --- a/klm/lm/Jamfile +++ b/klm/lm/Jamfile @@ -2,13 +2,14 @@ lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quant import testing ; -run left_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa ; -run model_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa test_nounk.arpa ; +run left_test.cc ../util//kenutil kenlm /top//boost_unit_test_framework : : test.arpa ; +run model_test.cc ../util//kenutil kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ; exe query : ngram_query.cc kenlm ../util//kenutil ; exe build_binary : build_binary.cc kenlm ../util//kenutil ; +exe kenlm_max_order : max_order.cc : .. ; -install legacy : build_binary query - : $(TOP)/klm/lm EXE on shared:$(TOP)/klm/lm shared:LIB ; +alias programs : query build_binary kenlm_max_order ; -alias programs : build_binary query ; +install legacy : build_binary query kenlm_max_order + : $(TOP)/lm EXE on shared:$(TOP)/lm shared:LIB ; diff --git a/klm/lm/bhiksha.cc b/klm/lm/bhiksha.cc index 870a4eee..088ea98d 100644 --- a/klm/lm/bhiksha.cc +++ b/klm/lm/bhiksha.cc @@ -50,7 +50,7 @@ std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &con } } // namespace -std::size_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) { +uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) { return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */; } diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh index 9734f3ab..8ff88654 100644 --- a/klm/lm/bhiksha.hh +++ b/klm/lm/bhiksha.hh @@ -33,7 +33,7 @@ class DontBhiksha { static void UpdateConfigFromBinary(int /*fd*/, Config &/*config*/) {} - static std::size_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; } + static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; } static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) { return util::RequiredBits(max_next); @@ -67,7 +67,7 @@ class ArrayBhiksha { static void UpdateConfigFromBinary(int fd, Config &config); - static std::size_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); + static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config); diff --git a/klm/lm/binary_format.cc b/klm/lm/binary_format.cc index a56e998e..fd841e59 100644 --- a/klm/lm/binary_format.cc +++ b/klm/lm/binary_format.cc @@ -200,10 +200,10 @@ void SeekPastHeader(int fd, const Parameters ¶ms) { util::SeekOrThrow(fd, TotalHeaderSize(params.counts.size())); } -uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, std::size_t memory_size, Backing &backing) { +uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, uint64_t memory_size, Backing &backing) { const uint64_t file_size = util::SizeFile(backing.file.get()); // The header is smaller than a page, so we have to map the whole header as well. - std::size_t total_map = TotalHeaderSize(params.counts.size()) + memory_size; + std::size_t total_map = util::CheckOverflow(TotalHeaderSize(params.counts.size()) + memory_size); if (file_size != util::kBadSize && static_cast(file_size) < total_map) UTIL_THROW(FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map); diff --git a/klm/lm/binary_format.hh b/klm/lm/binary_format.hh index dd795f62..bf699d5f 100644 --- a/klm/lm/binary_format.hh +++ b/klm/lm/binary_format.hh @@ -70,7 +70,7 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet void SeekPastHeader(int fd, const Parameters ¶ms); -uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, std::size_t memory_size, Backing &backing); +uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, uint64_t memory_size, Backing &backing); void ComplainAboutARPA(const Config &config, ModelType model_type); @@ -90,7 +90,7 @@ template void LoadLM(const char *file, const Config &config, To &to) new_config.probing_multiplier = params.fixed.probing_multiplier; detail::SeekPastHeader(backing.file.get(), params); To::UpdateConfigFromBinary(backing.file.get(), params.counts, new_config); - std::size_t memory_size = To::Size(params.counts, new_config); + uint64_t memory_size = To::Size(params.counts, new_config); uint8_t *start = detail::SetupBinary(new_config, params, memory_size, backing); to.InitializeFromBinary(start, params, new_config, backing.file.get()); } else { diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index c2ca1101..efe99899 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -8,7 +8,6 @@ #include #include -#include #ifdef WIN32 #include "util/getopt.hh" @@ -86,16 +85,16 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { std::vector counts; util::FilePiece f(file); lm::ReadARPACounts(f, counts); - std::size_t sizes[6]; + uint64_t sizes[6]; sizes[0] = ProbingModel::Size(counts, config); sizes[1] = RestProbingModel::Size(counts, config); sizes[2] = TrieModel::Size(counts, config); sizes[3] = QuantTrieModel::Size(counts, config); sizes[4] = ArrayTrieModel::Size(counts, config); sizes[5] = QuantArrayTrieModel::Size(counts, config); - std::size_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); - std::size_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); - std::size_t divide; + uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t)); + uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t)); + uint64_t divide; char prefix; if (min_length < (1 << 10) * 10) { prefix = ' '; diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index bc8687cd..989f8324 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -8,5 +8,5 @@ #define KENLM_MAX_ORDER 6 #endif #ifndef KENLM_ORDER_MESSAGE -#define KENLM_ORDER_MESSAGE "Edit klm/lm/max_order.hh." +#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --kenlm-max-order=6 -a'. Otherwise, edit lm/max_order.hh." #endif diff --git a/klm/lm/model.cc b/klm/lm/model.cc index b46333a4..40af8a63 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -12,6 +12,7 @@ #include #include #include +#include namespace lm { namespace ngram { @@ -19,17 +20,18 @@ namespace detail { template const ModelType GenericModel::kModelType = Search::kModelType; -template size_t GenericModel::Size(const std::vector &counts, const Config &config) { +template uint64_t GenericModel::Size(const std::vector &counts, const Config &config) { return VocabularyT::Size(counts[0], config) + Search::Size(counts, config); } template void GenericModel::SetupMemory(void *base, const std::vector &counts, const Config &config) { + size_t goal_size = util::CheckOverflow(Size(counts, config)); uint8_t *start = static_cast(base); size_t allocated = VocabularyT::Size(counts[0], config); vocab_.SetupMemory(start, allocated, counts[0], config); start += allocated; start = search_.SetupMemory(start, counts, config); - if (static_cast(start - static_cast(base)) != Size(counts, config)) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast(base)) << " but Size says they should take " << Size(counts, config)); + if (static_cast(start - static_cast(base)) != goal_size) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast(base)) << " but Size says they should take " << goal_size); } template GenericModel::GenericModel(const char *file, const Config &config) { @@ -49,13 +51,18 @@ template GenericModel::Ge } namespace { -void CheckMaxOrder(size_t order) { - UTIL_THROW_IF(order > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << order << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE); +void CheckCounts(const std::vector &counts) { + UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE); + if (sizeof(uint64_t) > sizeof(std::size_t)) { + for (std::vector::const_iterator i = counts.begin(); i != counts.end(); ++i) { + UTIL_THROW_IF(*i > static_cast(std::numeric_limits::max()), util::OverflowException, "This model has " << *i << " " << (i - counts.begin() + 1) << "-grams which is too many for 32-bit machines."); + } + } } } // namespace template void GenericModel::InitializeFromBinary(void *start, const Parameters ¶ms, const Config &config, int fd) { - CheckMaxOrder(params.counts.size()); + CheckCounts(params.counts); SetupMemory(start, params.counts, config); vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab); search_.LoadedBinary(); @@ -68,11 +75,11 @@ template void GenericModel counts; // File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_. ReadARPACounts(f, counts); - CheckMaxOrder(counts.size()); + CheckCounts(counts); if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model."); if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0"); - std::size_t vocab_size = VocabularyT::Size(counts[0], config); + std::size_t vocab_size = util::CheckOverflow(VocabularyT::Size(counts[0], config)); // Setup the binary file for writing the vocab lookup table. The search_ is responsible for growing the binary file to its needs. vocab_.SetupMemory(SetupJustVocab(config, counts.size(), vocab_size, backing_), vocab_size, counts[0], config); diff --git a/klm/lm/model.hh b/klm/lm/model.hh index 6dee9419..13ff864e 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -41,7 +41,7 @@ template class GenericModel : public base::Mod * does not include small non-mapped control structures, such as this class * itself. */ - static size_t Size(const std::vector &counts, const Config &config = Config()); + static uint64_t Size(const std::vector &counts, const Config &config = Config()); /* Load the model from a file. It may be an ARPA or binary file. Binary * files must have the format expected by this class or you'll get an diff --git a/klm/lm/partial.hh b/klm/lm/partial.hh new file mode 100644 index 00000000..1dede359 --- /dev/null +++ b/klm/lm/partial.hh @@ -0,0 +1,167 @@ +#ifndef LM_PARTIAL__ +#define LM_PARTIAL__ + +#include "lm/return.hh" +#include "lm/state.hh" + +#include + +#include + +namespace lm { +namespace ngram { + +struct ExtendReturn { + float adjust; + bool make_full; + unsigned char next_use; +}; + +template ExtendReturn ExtendLoop( + const Model &model, + unsigned char seen, const WordIndex *add_rbegin, const WordIndex *add_rend, const float *backoff_start, + const uint64_t *pointers, const uint64_t *pointers_end, + uint64_t *&pointers_write, + float *backoff_write) { + unsigned char add_length = add_rend - add_rbegin; + + float backoff_buf[2][KENLM_MAX_ORDER - 1]; + float *backoff_in = backoff_buf[0], *backoff_out = backoff_buf[1]; + std::copy(backoff_start, backoff_start + add_length, backoff_in); + + ExtendReturn value; + value.make_full = false; + value.adjust = 0.0; + value.next_use = add_length; + + unsigned char i = 0; + unsigned char length = pointers_end - pointers; + // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities. + if (pointers_write) { + // Using full context, writing to new left state. + for (; i < length; ++i) { + FullScoreReturn ret(model.ExtendLeft( + add_rbegin, add_rbegin + value.next_use, + backoff_in, + pointers[i], i + seen + 1, + backoff_out, + value.next_use)); + std::swap(backoff_in, backoff_out); + if (ret.independent_left) { + value.adjust += ret.prob; + value.make_full = true; + ++i; + break; + } + value.adjust += ret.rest; + *pointers_write++ = ret.extend_left; + if (value.next_use != add_length) { + value.make_full = true; + ++i; + break; + } + } + } + // Using some of the new context. + for (; i < length && value.next_use; ++i) { + FullScoreReturn ret(model.ExtendLeft( + add_rbegin, add_rbegin + value.next_use, + backoff_in, + pointers[i], i + seen + 1, + backoff_out, + value.next_use)); + std::swap(backoff_in, backoff_out); + value.adjust += ret.prob; + } + float unrest = model.UnRest(pointers + i, pointers_end, i + seen + 1); + // Using none of the new context. + value.adjust += unrest; + + std::copy(backoff_in, backoff_in + value.next_use, backoff_write); + return value; +} + +template float RevealBefore(const Model &model, const Right &reveal, const unsigned char seen, bool reveal_full, Left &left, Right &right) { + assert(seen < reveal.length || reveal_full); + uint64_t *pointers_write = reveal_full ? NULL : left.pointers; + float backoff_buffer[KENLM_MAX_ORDER - 1]; + ExtendReturn value(ExtendLoop( + model, + seen, reveal.words + seen, reveal.words + reveal.length, reveal.backoff + seen, + left.pointers, left.pointers + left.length, + pointers_write, + left.full ? backoff_buffer : (right.backoff + right.length))); + if (reveal_full) { + left.length = 0; + value.make_full = true; + } else { + left.length = pointers_write - left.pointers; + value.make_full |= (left.length == model.Order() - 1); + } + if (left.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i]; + } else { + // If left wasn't full when it came in, put words into right state. + std::copy(reveal.words + seen, reveal.words + seen + value.next_use, right.words + right.length); + right.length += value.next_use; + left.full = value.make_full || (right.length == model.Order() - 1); + } + return value.adjust; +} + +template float RevealAfter(const Model &model, Left &left, Right &right, const Left &reveal, unsigned char seen) { + assert(seen < reveal.length || reveal.full); + uint64_t *pointers_write = left.full ? NULL : (left.pointers + left.length); + ExtendReturn value(ExtendLoop( + model, + seen, right.words, right.words + right.length, right.backoff, + reveal.pointers + seen, reveal.pointers + reveal.length, + pointers_write, + right.backoff)); + if (reveal.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += right.backoff[i]; + right.length = 0; + value.make_full = true; + } else { + right.length = value.next_use; + value.make_full |= (right.length == model.Order() - 1); + } + if (!left.full) { + left.length = pointers_write - left.pointers; + left.full = value.make_full || (left.length == model.Order() - 1); + } + return value.adjust; +} + +template float Subsume(const Model &model, Left &first_left, const Right &first_right, const Left &second_left, Right &second_right, const unsigned int between_length) { + assert(first_right.length < KENLM_MAX_ORDER); + assert(second_left.length < KENLM_MAX_ORDER); + assert(between_length < KENLM_MAX_ORDER - 1); + uint64_t *pointers_write = first_left.full ? NULL : (first_left.pointers + first_left.length); + float backoff_buffer[KENLM_MAX_ORDER - 1]; + ExtendReturn value(ExtendLoop( + model, + between_length, first_right.words, first_right.words + first_right.length, first_right.backoff, + second_left.pointers, second_left.pointers + second_left.length, + pointers_write, + second_left.full ? backoff_buffer : (second_right.backoff + second_right.length))); + if (second_left.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i]; + } else { + std::copy(first_right.words, first_right.words + value.next_use, second_right.words + second_right.length); + second_right.length += value.next_use; + value.make_full |= (second_right.length == model.Order() - 1); + } + if (!first_left.full) { + first_left.length = pointers_write - first_left.pointers; + first_left.full = value.make_full || second_left.full || (first_left.length == model.Order() - 1); + } + assert(first_left.length < KENLM_MAX_ORDER); + assert(second_right.length < KENLM_MAX_ORDER); + return value.adjust; +} + +} // namespace ngram +} // namespace lm + +#endif // LM_PARTIAL__ diff --git a/klm/lm/partial_test.cc b/klm/lm/partial_test.cc new file mode 100644 index 00000000..8d309c85 --- /dev/null +++ b/klm/lm/partial_test.cc @@ -0,0 +1,199 @@ +#include "lm/partial.hh" + +#include "lm/left.hh" +#include "lm/model.hh" +#include "util/tokenize_piece.hh" + +#define BOOST_TEST_MODULE PartialTest +#include +#include + +namespace lm { +namespace ngram { +namespace { + +const char *TestLocation() { + if (boost::unit_test::framework::master_test_suite().argc < 2) { + return "test.arpa"; + } + return boost::unit_test::framework::master_test_suite().argv[1]; +} + +Config SilentConfig() { + Config config; + config.arpa_complain = Config::NONE; + config.messages = NULL; + return config; +} + +struct ModelFixture { + ModelFixture() : m(TestLocation(), SilentConfig()) {} + + RestProbingModel m; +}; + +BOOST_FIXTURE_TEST_SUITE(suite, ModelFixture) + +BOOST_AUTO_TEST_CASE(SimpleBefore) { + Left left; + left.full = false; + left.length = 0; + Right right; + right.length = 0; + + Right reveal; + reveal.length = 1; + WordIndex period = m.GetVocabulary().Index("."); + reveal.words[0] = period; + reveal.backoff[0] = -0.845098; + + BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 0, false, left, right), 0.001); + BOOST_CHECK_EQUAL(0, left.length); + BOOST_CHECK(!left.full); + BOOST_CHECK_EQUAL(1, right.length); + BOOST_CHECK_EQUAL(period, right.words[0]); + BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001); + + WordIndex more = m.GetVocabulary().Index("more"); + reveal.words[1] = more; + reveal.backoff[1] = -0.4771212; + reveal.length = 2; + BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 1, false, left, right), 0.001); + BOOST_CHECK_EQUAL(0, left.length); + BOOST_CHECK(!left.full); + BOOST_CHECK_EQUAL(2, right.length); + BOOST_CHECK_EQUAL(period, right.words[0]); + BOOST_CHECK_EQUAL(more, right.words[1]); + BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001); + BOOST_CHECK_CLOSE(-0.4771212, right.backoff[1], 0.001); +} + +BOOST_AUTO_TEST_CASE(AlsoWouldConsider) { + WordIndex would = m.GetVocabulary().Index("would"); + WordIndex consider = m.GetVocabulary().Index("consider"); + + ChartState current; + current.left.length = 1; + current.left.pointers[0] = would; + current.left.full = false; + current.right.length = 1; + current.right.words[0] = would; + current.right.backoff[0] = -0.30103; + + Left after; + after.full = false; + after.length = 1; + after.pointers[0] = consider; + + // adjustment for would consider + BOOST_CHECK_CLOSE(-1.687872 - -0.2922095 - 0.30103, RevealAfter(m, current.left, current.right, after, 0), 0.001); + + BOOST_CHECK_EQUAL(2, current.left.length); + BOOST_CHECK_EQUAL(would, current.left.pointers[0]); + BOOST_CHECK_EQUAL(false, current.left.full); + + WordIndex also = m.GetVocabulary().Index("also"); + Right before; + before.length = 1; + before.words[0] = also; + before.backoff[0] = -0.30103; + // r(would) = -0.2922095 [i would], r(would -> consider) = -1.988902 [b(would) + p(consider)] + // p(also -> would) = -2, p(also would -> consider) = -3 + BOOST_CHECK_CLOSE(-2 + 0.2922095 -3 + 1.988902, RevealBefore(m, before, 0, false, current.left, current.right), 0.001); + BOOST_CHECK_EQUAL(0, current.left.length); + BOOST_CHECK(current.left.full); + BOOST_CHECK_EQUAL(2, current.right.length); + BOOST_CHECK_EQUAL(would, current.right.words[0]); + BOOST_CHECK_EQUAL(also, current.right.words[1]); +} + +BOOST_AUTO_TEST_CASE(EndSentence) { + WordIndex loin = m.GetVocabulary().Index("loin"); + WordIndex period = m.GetVocabulary().Index("."); + WordIndex eos = m.GetVocabulary().EndSentence(); + + ChartState between; + between.left.length = 1; + between.left.pointers[0] = eos; + between.left.full = true; + between.right.length = 0; + + Right before; + before.words[0] = period; + before.words[1] = loin; + before.backoff[0] = -0.845098; + before.backoff[1] = 0.0; + + before.length = 1; + BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001); + BOOST_CHECK_EQUAL(0, between.left.length); +} + +float ScoreFragment(const RestProbingModel &model, unsigned int *begin, unsigned int *end, ChartState &out) { + RuleScore scorer(model, out); + for (unsigned int *i = begin; i < end; ++i) { + scorer.Terminal(*i); + } + return scorer.Finish(); +} + +void CheckAdjustment(const RestProbingModel &model, float expect, const Right &before_in, bool before_full, ChartState between, const Left &after_in) { + Right before(before_in); + Left after(after_in); + after.full = false; + float got = 0.0; + for (unsigned int i = 1; i < 5; ++i) { + if (before_in.length >= i) { + before.length = i; + got += RevealBefore(model, before, i - 1, false, between.left, between.right); + } + if (after_in.length >= i) { + after.length = i; + got += RevealAfter(model, between.left, between.right, after, i - 1); + } + } + if (after_in.full) { + after.full = true; + got += RevealAfter(model, between.left, between.right, after, after.length); + } + if (before_full) { + got += RevealBefore(model, before, before.length, true, between.left, between.right); + } + // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this. + BOOST_CHECK(fabs(expect - got) < 0.001); +} + +void FullDivide(const RestProbingModel &model, StringPiece str) { + std::vector indices; + for (util::TokenIter i(str, ' '); i; ++i) { + indices.push_back(model.GetVocabulary().Index(*i)); + } + ChartState full_state; + float full = ScoreFragment(model, &indices.front(), &indices.back() + 1, full_state); + + ChartState before_state; + before_state.left.full = false; + RuleScore before_scorer(model, before_state); + float before_score = 0.0; + for (unsigned int before = 0; before < indices.size(); ++before) { + for (unsigned int after = before; after <= indices.size(); ++after) { + ChartState after_state, between_state; + float after_score = ScoreFragment(model, &indices.front() + after, &indices.front() + indices.size(), after_state); + float between_score = ScoreFragment(model, &indices.front() + before, &indices.front() + after, between_state); + CheckAdjustment(model, full - before_score - after_score - between_score, before_state.right, before_state.left.full, between_state, after_state.left); + } + before_scorer.Terminal(indices[before]); + before_score = before_scorer.Finish(); + } +} + +BOOST_AUTO_TEST_CASE(Strings) { + FullDivide(m, "also would consider"); + FullDivide(m, "looking on a little more loin . "); + FullDivide(m, "in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown "); +} + +BOOST_AUTO_TEST_SUITE_END() +} // namespace +} // namespace ngram +} // namespace lm diff --git a/klm/lm/quantize.hh b/klm/lm/quantize.hh index abed0112..8ce2378a 100644 --- a/klm/lm/quantize.hh +++ b/klm/lm/quantize.hh @@ -24,7 +24,7 @@ class DontQuantize { public: static const ModelType kModelTypeAdd = static_cast(0); static void UpdateConfigFromBinary(int, const std::vector &, Config &) {} - static std::size_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; } + static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; } static uint8_t MiddleBits(const Config &/*config*/) { return 63; } static uint8_t LongestBits(const Config &/*config*/) { return 31; } @@ -138,9 +138,9 @@ class SeparatelyQuantize { static void UpdateConfigFromBinary(int fd, const std::vector &counts, Config &config); - static std::size_t Size(uint8_t order, const Config &config) { - size_t longest_table = (static_cast(1) << static_cast(config.prob_bits)) * sizeof(float); - size_t middle_table = (static_cast(1) << static_cast(config.backoff_bits)) * sizeof(float) + longest_table; + static uint64_t Size(uint8_t order, const Config &config) { + uint64_t longest_table = (static_cast(1) << static_cast(config.prob_bits)) * sizeof(float); + uint64_t middle_table = (static_cast(1) << static_cast(config.backoff_bits)) * sizeof(float) + longest_table; // unigrams are currently not quantized so no need for a table. return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8; } diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 70727e4c..174bd3a3 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -2,12 +2,13 @@ #include "lm/blank.hh" +#include #include #include +#include #include #include -#include #include #include @@ -31,6 +32,16 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) { const char kBinaryMagic[] = "mmap lm http://kheafield.com/code"; +// strtoull isn't portable enough :-( +uint64_t ReadCount(const std::string &from) { + std::stringstream stream(from); + uint64_t ret; + stream >> ret; + UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from); + UTIL_THROW_IF(static_cast(stream.tellg()) != from.size(), FormatLoadException, "Extra content in count: '" << from << "'"); + return ret; +} + } // namespace void ReadARPACounts(util::FilePiece &in, std::vector &number) { @@ -52,15 +63,11 @@ void ReadARPACounts(util::FilePiece &in, std::vector &number) { // So strtol doesn't go off the end of line. std::string remaining(line.data() + 6, line.size() - 6); char *end_ptr; - unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10); + unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10); if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line); if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line); ++end_ptr; - const char *start = end_ptr; - long int count = std::strtol(start, &end_ptr, 10); - if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count); - if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line); - number.push_back(count); + number.push_back(ReadCount(end_ptr)); } } @@ -103,7 +110,7 @@ void ReadBackoff(util::FilePiece &in, float &backoff) { int float_class = _fpclass(backoff); UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff); #else - int float_class = fpclassify(backoff); + int float_class = std::fpclassify(backoff); UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff); #endif } diff --git a/klm/lm/search_hashed.hh b/klm/lm/search_hashed.hh index 7e8c1220..3bcde921 100644 --- a/klm/lm/search_hashed.hh +++ b/klm/lm/search_hashed.hh @@ -74,8 +74,8 @@ template class HashedSearch { // TODO: move probing_multiplier here with next binary file format update. static void UpdateConfigFromBinary(int, const std::vector &, Config &) {} - static std::size_t Size(const std::vector &counts, const Config &config) { - std::size_t ret = Unigram::Size(counts[0]); + static uint64_t Size(const std::vector &counts, const Config &config) { + uint64_t ret = Unigram::Size(counts[0]); for (unsigned char n = 1; n < counts.size() - 1; ++n) { ret += Middle::Size(counts[n], config.probing_multiplier); } @@ -160,7 +160,7 @@ template class HashedSearch { #endif {} - static std::size_t Size(uint64_t count) { + static uint64_t Size(uint64_t count) { return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate } diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh index 10b22ab1..1264baf5 100644 --- a/klm/lm/search_trie.hh +++ b/klm/lm/search_trie.hh @@ -44,8 +44,8 @@ template class TrieSearch { Bhiksha::UpdateConfigFromBinary(fd, config); } - static std::size_t Size(const std::vector &counts, const Config &config) { - std::size_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]); + static uint64_t Size(const std::vector &counts, const Config &config) { + uint64_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]); for (unsigned char i = 1; i < counts.size() - 1; ++i) { ret += Middle::Size(Quant::MiddleBits(config), counts[i], counts[0], counts[i+1], config); } diff --git a/klm/lm/state.hh b/klm/lm/state.hh index 830e40aa..551510a8 100644 --- a/klm/lm/state.hh +++ b/klm/lm/state.hh @@ -47,6 +47,8 @@ class State { unsigned char length; }; +typedef State Right; + inline uint64_t hash_value(const State &state, uint64_t seed = 0) { return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed); } diff --git a/klm/lm/trie.cc b/klm/lm/trie.cc index 0f1ca574..d9895f89 100644 --- a/klm/lm/trie.cc +++ b/klm/lm/trie.cc @@ -36,7 +36,7 @@ bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_ } } // namespace -std::size_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) { +uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) { uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits; // Extra entry for next pointer at the end. // +7 then / 8 to round up bits and convert to bytes @@ -57,7 +57,7 @@ void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits) max_vocab_ = max_vocab; } -template std::size_t BitPackedMiddle::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { +template uint64_t BitPackedMiddle::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config)); } diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh index 034a1414..9ea3c546 100644 --- a/klm/lm/trie.hh +++ b/klm/lm/trie.hh @@ -49,7 +49,7 @@ class Unigram { unigram_ = static_cast(start); } - static std::size_t Size(uint64_t count) { + static uint64_t Size(uint64_t count) { // +1 in case unknown doesn't appear. +1 for the final next. return (count + 2) * sizeof(UnigramValue); } @@ -84,7 +84,7 @@ class BitPacked { } protected: - static std::size_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits); + static uint64_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits); void BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits); @@ -99,7 +99,7 @@ class BitPacked { template class BitPackedMiddle : public BitPacked { public: - static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config); + static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config); // next_source need not be initialized. BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config); @@ -128,7 +128,7 @@ template class BitPackedMiddle : public BitPacked { class BitPackedLongest : public BitPacked { public: - static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) { + static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) { return BaseSize(entries, max_vocab, quant_bits); } diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc index 5de68f16..398475be 100644 --- a/klm/lm/vocab.cc +++ b/klm/lm/vocab.cc @@ -87,7 +87,7 @@ void WriteWordsWrapper::Write(int fd) { SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {} -std::size_t SortedVocabulary::Size(std::size_t entries, const Config &/*config*/) { +uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) { // Lead with the number of entries. return sizeof(uint64_t) + sizeof(uint64_t) * entries; } @@ -165,7 +165,7 @@ struct ProbingVocabularyHeader { ProbingVocabulary::ProbingVocabulary() : enumerate_(NULL) {} -std::size_t ProbingVocabulary::Size(std::size_t entries, const Config &config) { +uint64_t ProbingVocabulary::Size(uint64_t entries, const Config &config) { return ALIGN8(sizeof(detail::ProbingVocabularyHeader)) + Lookup::Size(entries, config.probing_multiplier); } diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh index a25432f9..074cd446 100644 --- a/klm/lm/vocab.hh +++ b/klm/lm/vocab.hh @@ -62,7 +62,7 @@ class SortedVocabulary : public base::Vocabulary { } // Size for purposes of file writing - static size_t Size(std::size_t entries, const Config &config); + static uint64_t Size(uint64_t entries, const Config &config); // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary. WordIndex Bound() const { return bound_; } @@ -129,7 +129,7 @@ class ProbingVocabulary : public base::Vocabulary { return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0; } - static size_t Size(std::size_t entries, const Config &config); + static uint64_t Size(uint64_t entries, const Config &config); // Vocab words are [0, Bound()). WordIndex Bound() const { return bound_; } diff --git a/klm/util/Jamfile b/klm/util/Jamfile index 3ee2c2c2..a939265f 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,10 +1,10 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : .. : : .. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc /top//z : .. : : .. ; import testing ; -unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ; -run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ; -unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ; +unit-test bit_packing_test : bit_packing_test.cc kenutil /top//boost_unit_test_framework ; +run file_piece_test.cc kenutil /top//boost_unit_test_framework : : file_piece.cc ; +unit-test joint_sort_test : joint_sort_test.cc kenutil /top//boost_unit_test_framework ; +unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil /top//boost_unit_test_framework ; +unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil /top//boost_unit_test_framework ; +unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil /top//boost_unit_test_framework ; diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc index 07b14e26..eb635ad8 100644 --- a/klm/util/ersatz_progress.cc +++ b/klm/util/ersatz_progress.cc @@ -9,16 +9,16 @@ namespace util { namespace { const unsigned char kWidth = 100; } -ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} +ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} ErsatzProgress::~ErsatzProgress() { if (out_) Finished(); } -ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) +ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message) : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) { if (!out_) { - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); return; } if (!message.empty()) *out_ << message << '\n'; @@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std void ErsatzProgress::Milestone() { if (!out_) { current_ = 0; return; } if (!complete_) return; - unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); + unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); for (; stones_written_ < stone; ++stones_written_) { (*out_) << '*'; } if (stone == kWidth) { (*out_) << std::endl; - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); out_ = NULL; } else { next_ = std::max(next_, (stone * complete_) / kWidth); diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh index f709dc51..ff4d590f 100644 --- a/klm/util/ersatz_progress.hh +++ b/klm/util/ersatz_progress.hh @@ -4,6 +4,8 @@ #include #include +#include + // Ersatz version of boost::progress so core language model doesn't depend on // boost. Also adds option to print nothing. @@ -14,7 +16,7 @@ class ErsatzProgress { ErsatzProgress(); // Null means no output. The null value is useful for passing along the ostream pointer from another caller. - explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); + explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); ~ErsatzProgress(); @@ -23,12 +25,12 @@ class ErsatzProgress { return *this; } - ErsatzProgress &operator+=(std::size_t amount) { + ErsatzProgress &operator+=(uint64_t amount) { if ((current_ += amount) >= next_) Milestone(); return *this; } - void Set(std::size_t to) { + void Set(uint64_t to) { if ((current_ = to) >= next_) Milestone(); Milestone(); } @@ -40,7 +42,7 @@ class ErsatzProgress { private: void Milestone(); - std::size_t current_, next_, complete_; + uint64_t current_, next_, complete_; unsigned char stones_written_; std::ostream *out_; diff --git a/klm/util/exception.cc b/klm/util/exception.cc index c4f8c04c..3806e6de 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() { } EndOfFileException::~EndOfFileException() throw() {} +OverflowException::OverflowException() throw() {} +OverflowException::~OverflowException() throw() {} + } // namespace util diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 6d6a37cb..83f99cd6 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -2,9 +2,12 @@ #define UTIL_EXCEPTION__ #include +#include #include #include +#include + namespace util { template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); @@ -111,6 +114,25 @@ class EndOfFileException : public Exception { ~EndOfFileException() throw(); }; +class OverflowException : public Exception { + public: + OverflowException() throw(); + ~OverflowException() throw(); +}; + +template inline std::size_t CheckOverflowInternal(uint64_t value) { + UTIL_THROW_IF(value > static_cast(std::numeric_limits::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code."); + return value; +} + +template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) { + return value; +} + +inline std::size_t CheckOverflow(uint64_t value) { + return CheckOverflowInternal(value); +} + } // namespace util #endif // UTIL_EXCEPTION__ diff --git a/klm/util/file.cc b/klm/util/file.cc index 98f13983..ff5e64c9 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -119,8 +119,13 @@ void FSyncOrThrow(int fd) { } namespace { -void InternalSeek(int fd, off_t off, int whence) { +void InternalSeek(int fd, int64_t off, int whence) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed"); + +#else UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed"); +#endif } } // namespace diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index af341d6d..19a68728 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 3354b68e..770faa7e 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -8,6 +8,7 @@ #include #include +#include namespace util { @@ -42,8 +43,8 @@ template (multiplier * static_cast(entries))); + static uint64_t Size(uint64_t entries, float multiplier) { + uint64_t buckets = std::max(entries + 1, static_cast(multiplier * static_cast(entries))); return buckets * sizeof(Entry); } -- cgit v1.2.3 From 58d7f847cd5b3c56682e834a2d9b897c6943fafc Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:30:16 +0100 Subject: Add search library to cdec (not used yet) --- klm/search/Jamfile | 5 ++ klm/search/arity.hh | 8 ++ klm/search/config.hh | 25 +++++++ klm/search/context.hh | 66 +++++++++++++++++ klm/search/edge.hh | 54 ++++++++++++++ klm/search/edge_generator.cc | 129 ++++++++++++++++++++++++++++++++ klm/search/edge_generator.hh | 54 ++++++++++++++ klm/search/final.hh | 40 ++++++++++ klm/search/rule.cc | 55 ++++++++++++++ klm/search/rule.hh | 60 +++++++++++++++ klm/search/source.hh | 48 ++++++++++++ klm/search/types.hh | 18 +++++ klm/search/vertex.cc | 48 ++++++++++++ klm/search/vertex.hh | 165 +++++++++++++++++++++++++++++++++++++++++ klm/search/vertex_generator.cc | 99 +++++++++++++++++++++++++ klm/search/vertex_generator.hh | 70 +++++++++++++++++ klm/search/weights.cc | 69 +++++++++++++++++ klm/search/weights.hh | 49 ++++++++++++ klm/search/weights_test.cc | 38 ++++++++++ klm/search/word.hh | 47 ++++++++++++ 20 files changed, 1147 insertions(+) create mode 100644 klm/search/Jamfile create mode 100644 klm/search/arity.hh create mode 100644 klm/search/config.hh create mode 100644 klm/search/context.hh create mode 100644 klm/search/edge.hh create mode 100644 klm/search/edge_generator.cc create mode 100644 klm/search/edge_generator.hh create mode 100644 klm/search/final.hh create mode 100644 klm/search/rule.cc create mode 100644 klm/search/rule.hh create mode 100644 klm/search/source.hh create mode 100644 klm/search/types.hh create mode 100644 klm/search/vertex.cc create mode 100644 klm/search/vertex.hh create mode 100644 klm/search/vertex_generator.cc create mode 100644 klm/search/vertex_generator.hh create mode 100644 klm/search/weights.cc create mode 100644 klm/search/weights.hh create mode 100644 klm/search/weights_test.cc create mode 100644 klm/search/word.hh (limited to 'klm') diff --git a/klm/search/Jamfile b/klm/search/Jamfile new file mode 100644 index 00000000..ac47c249 --- /dev/null +++ b/klm/search/Jamfile @@ -0,0 +1,5 @@ +lib search : weights.cc vertex.cc vertex_generator.cc edge_generator.cc rule.cc ../lm//kenlm ../util//kenutil : : : .. ; + +import testing ; + +unit-test weights_test : weights_test.cc search /top//boost_unit_test_framework ; diff --git a/klm/search/arity.hh b/klm/search/arity.hh new file mode 100644 index 00000000..09c2c671 --- /dev/null +++ b/klm/search/arity.hh @@ -0,0 +1,8 @@ +#ifndef SEARCH_ARITY__ +#define SEARCH_ARITY__ +namespace search { + +const unsigned int kMaxArity = 2; + +} // namespace search +#endif // SEARCH_ARITY__ diff --git a/klm/search/config.hh b/klm/search/config.hh new file mode 100644 index 00000000..e21e4b7c --- /dev/null +++ b/klm/search/config.hh @@ -0,0 +1,25 @@ +#ifndef SEARCH_CONFIG__ +#define SEARCH_CONFIG__ + +#include "search/weights.hh" +#include "util/string_piece.hh" + +namespace search { + +class Config { + public: + Config(StringPiece weight_str, unsigned int pop_limit) : + weights_(weight_str), pop_limit_(pop_limit) {} + + const Weights &GetWeights() const { return weights_; } + + unsigned int PopLimit() const { return pop_limit_; } + + private: + search::Weights weights_; + unsigned int pop_limit_; +}; + +} // namespace search + +#endif // SEARCH_CONFIG__ diff --git a/klm/search/context.hh b/klm/search/context.hh new file mode 100644 index 00000000..ae248549 --- /dev/null +++ b/klm/search/context.hh @@ -0,0 +1,66 @@ +#ifndef SEARCH_CONTEXT__ +#define SEARCH_CONTEXT__ + +#include "lm/model.hh" +#include "search/config.hh" +#include "search/final.hh" +#include "search/types.hh" +#include "search/vertex.hh" +#include "search/word.hh" +#include "util/exception.hh" + +#include +#include + +#include + +namespace search { + +class Weights; + +class ContextBase { + public: + explicit ContextBase(const Config &config) : pop_limit_(config.PopLimit()), weights_(config.GetWeights()) {} + + Final *NewFinal() { + Final *ret = final_pool_.construct(); + assert(ret); + return ret; + } + + VertexNode *NewVertexNode() { + VertexNode *ret = vertex_node_pool_.construct(); + assert(ret); + return ret; + } + + void DeleteVertexNode(VertexNode *node) { + vertex_node_pool_.destroy(node); + } + + unsigned int PopLimit() const { return pop_limit_; } + + const Weights &GetWeights() const { return weights_; } + + private: + boost::object_pool final_pool_; + boost::object_pool vertex_node_pool_; + + unsigned int pop_limit_; + + const Weights &weights_; +}; + +template class Context : public ContextBase { + public: + Context(const Config &config, const Model &model) : ContextBase(config), model_(model) {} + + const Model &LanguageModel() const { return model_; } + + private: + const Model &model_; +}; + +} // namespace search + +#endif // SEARCH_CONTEXT__ diff --git a/klm/search/edge.hh b/klm/search/edge.hh new file mode 100644 index 00000000..4d2a5cbf --- /dev/null +++ b/klm/search/edge.hh @@ -0,0 +1,54 @@ +#ifndef SEARCH_EDGE__ +#define SEARCH_EDGE__ + +#include "lm/state.hh" +#include "search/arity.hh" +#include "search/rule.hh" +#include "search/types.hh" +#include "search/vertex.hh" + +#include + +namespace search { + +class Edge { + public: + Edge() { + end_to_ = to_; + } + + Rule &InitRule() { return rule_; } + + void Add(Vertex &vertex) { + assert(end_to_ - to_ < kMaxArity); + *(end_to_++) = &vertex; + } + + const Vertex &GetVertex(std::size_t index) const { + return *to_[index]; + } + + const Rule &GetRule() const { return rule_; } + + private: + // Rule and pointers to rule arguments. + Rule rule_; + + Vertex *to_[kMaxArity]; + Vertex **end_to_; +}; + +struct PartialEdge { + Score score; + // Terminals + lm::ngram::ChartState between[kMaxArity + 1]; + // Non-terminals + PartialVertex nt[kMaxArity]; + + bool operator<(const PartialEdge &other) const { + return score < other.score; + } +}; + +} // namespace search +#endif // SEARCH_EDGE__ diff --git a/klm/search/edge_generator.cc b/klm/search/edge_generator.cc new file mode 100644 index 00000000..d135899a --- /dev/null +++ b/klm/search/edge_generator.cc @@ -0,0 +1,129 @@ +#include "search/edge_generator.hh" + +#include "lm/left.hh" +#include "lm/partial.hh" +#include "search/context.hh" +#include "search/vertex.hh" +#include "search/vertex_generator.hh" + +#include + +namespace search { + +bool EdgeGenerator::Init(Edge &edge, VertexGenerator &parent) { + from_ = &edge; + for (unsigned int i = 0; i < GetRule().Arity(); ++i) { + if (edge.GetVertex(i).RootPartial().Empty()) return false; + } + PartialEdge &root = *parent.MallocPartialEdge(); + root.score = GetRule().Bound(); + for (unsigned int i = 0; i < GetRule().Arity(); ++i) { + root.nt[i] = edge.GetVertex(i).RootPartial(); + root.score += root.nt[i].Bound(); + } + for (unsigned int i = GetRule().Arity(); i < 2; ++i) { + root.nt[i] = kBlankPartialVertex; + } + for (unsigned int i = 0; i < GetRule().Arity() + 1; ++i) { + root.between[i] = GetRule().Lexical(i); + } + // wtf no clear method? + generate_ = Generate(); + generate_.push(&root); + top_ = root.score; + return true; +} + +namespace { + +template float FastScore(const Context &context, unsigned char victim, unsigned char arity, const PartialEdge &previous, PartialEdge &update) { + memcpy(update.between, previous.between, sizeof(lm::ngram::ChartState) * (arity + 1)); + + float ret = 0.0; + lm::ngram::ChartState *before, *after; + if (victim == 0) { + before = &update.between[0]; + after = &update.between[(arity == 2 && previous.nt[1].Complete()) ? 2 : 1]; + } else { + assert(victim == 1); + assert(arity == 2); + before = &update.between[previous.nt[0].Complete() ? 0 : 1]; + after = &update.between[2]; + } + const lm::ngram::ChartState &previous_reveal = previous.nt[victim].State(); + const PartialVertex &update_nt = update.nt[victim]; + const lm::ngram::ChartState &update_reveal = update_nt.State(); + float just_after = 0.0; + if ((update_reveal.left.length > previous_reveal.left.length) || (update_reveal.left.full && !previous_reveal.left.full)) { + just_after += lm::ngram::RevealAfter(context.LanguageModel(), before->left, before->right, update_reveal.left, previous_reveal.left.length); + } + if ((update_reveal.right.length > previous_reveal.right.length) || (update_nt.RightFull() && !previous.nt[victim].RightFull())) { + ret += lm::ngram::RevealBefore(context.LanguageModel(), update_reveal.right, previous_reveal.right.length, update_nt.RightFull(), after->left, after->right); + } + if (update_nt.Complete()) { + if (update_reveal.left.full) { + before->left.full = true; + } else { + assert(update_reveal.left.length == update_reveal.right.length); + ret += lm::ngram::Subsume(context.LanguageModel(), before->left, before->right, after->left, after->right, update_reveal.left.length); + } + if (victim == 0) { + update.between[0].right = after->right; + } else { + update.between[2].left = before->left; + } + } + return previous.score + (ret + just_after) * context.GetWeights().LM(); +} + +} // namespace + +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent) { + assert(!generate_.empty()); + PartialEdge &top = *generate_.top(); + generate_.pop(); + unsigned int victim = 0; + unsigned char lowest_length = 255; + for (unsigned int i = 0; i != GetRule().Arity(); ++i) { + if (!top.nt[i].Complete() && top.nt[i].Length() < lowest_length) { + lowest_length = top.nt[i].Length(); + victim = i; + } + } + if (lowest_length == 255) { + // All states report complete. + top.between[0].right = top.between[GetRule().Arity()].right; + parent.NewHypothesis(top.between[0], *from_, top); + top_ = generate_.empty() ? -kScoreInf : generate_.top()->score; + return !generate_.empty(); + } + + unsigned int stay = !victim; + PartialEdge &continuation = *parent.MallocPartialEdge(); + float old_bound = top.nt[victim].Bound(); + // The alternate's score will change because alternate.nt[victim] changes. + bool split = top.nt[victim].Split(continuation.nt[victim]); + // top is now the alternate. + + continuation.nt[stay] = top.nt[stay]; + continuation.score = FastScore(context, victim, GetRule().Arity(), top, continuation); + // TODO: dedupe? + generate_.push(&continuation); + + if (split) { + // We have an alternate. + top.score += top.nt[victim].Bound() - old_bound; + // TODO: dedupe? + generate_.push(&top); + } else { + parent.FreePartialEdge(&top); + } + + top_ = generate_.top()->score; + return true; +} + +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent); +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent); + +} // namespace search diff --git a/klm/search/edge_generator.hh b/klm/search/edge_generator.hh new file mode 100644 index 00000000..e306dc61 --- /dev/null +++ b/klm/search/edge_generator.hh @@ -0,0 +1,54 @@ +#ifndef SEARCH_EDGE_GENERATOR__ +#define SEARCH_EDGE_GENERATOR__ + +#include "search/edge.hh" + +#include + +#include +#include + +namespace lm { +namespace ngram { +class ChartState; +} // namespace ngram +} // namespace lm + +namespace search { + +template class Context; + +class VertexGenerator; + +struct PartialEdgePointerLess : std::binary_function { + bool operator()(const PartialEdge *first, const PartialEdge *second) const { + return *first < *second; + } +}; + +class EdgeGenerator { + public: + // True if it has a hypothesis. + bool Init(Edge &edge, VertexGenerator &parent); + + Score Top() const { + return top_; + } + + template bool Pop(Context &context, VertexGenerator &parent); + + private: + const Rule &GetRule() const { + return from_->GetRule(); + } + + Score top_; + + typedef std::priority_queue, PartialEdgePointerLess> Generate; + Generate generate_; + + Edge *from_; +}; + +} // namespace search +#endif // SEARCH_EDGE_GENERATOR__ diff --git a/klm/search/final.hh b/klm/search/final.hh new file mode 100644 index 00000000..24e6f0a5 --- /dev/null +++ b/klm/search/final.hh @@ -0,0 +1,40 @@ +#ifndef SEARCH_FINAL__ +#define SEARCH_FINAL__ + +#include "search/rule.hh" +#include "search/types.hh" + +#include + +namespace search { + +class Final { + public: + typedef boost::array ChildArray; + + void Reset(Score bound, const Rule &from, const Final &left, const Final &right) { + bound_ = bound; + from_ = &from; + children_[0] = &left; + children_[1] = &right; + } + + const ChildArray &Children() const { return children_; } + + unsigned int ChildCount() const { return from_->Arity(); } + + const Rule &From() const { return *from_; } + + Score Bound() const { return bound_; } + + private: + Score bound_; + + const Rule *from_; + + ChildArray children_; +}; + +} // namespace search + +#endif // SEARCH_FINAL__ diff --git a/klm/search/rule.cc b/klm/search/rule.cc new file mode 100644 index 00000000..a8b993eb --- /dev/null +++ b/klm/search/rule.cc @@ -0,0 +1,55 @@ +#include "search/rule.hh" + +#include "search/context.hh" +#include "search/final.hh" + +#include + +#include + +namespace search { + +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos) { + additive_ = additive; + Score lm_score = 0.0; + lexical_.clear(); + const lm::WordIndex oov = context.LanguageModel().GetVocabulary().NotFound(); + + for (std::vector::const_iterator word = items_.begin(); ; ++word) { + lexical_.resize(lexical_.size() + 1); + lm::ngram::RuleScore scorer(context.LanguageModel(), lexical_.back()); + // TODO: optimize + if (prepend_bos && (word == items_.begin())) { + scorer.BeginSentence(); + } + for (; ; ++word) { + if (word == items_.end()) { + lm_score += scorer.Finish(); + bound_ = additive_ + context.GetWeights().LM() * lm_score; + assert(lexical_.size() == arity_ + 1); + return; + } + if (!word->Terminal()) break; + if (word->Index() == oov) additive_ += context.GetWeights().OOV(); + scorer.Terminal(word->Index()); + } + lm_score += scorer.Finish(); + } +} + +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); + +std::ostream &operator<<(std::ostream &o, const Rule &rule) { + const Rule::ItemsRet &items = rule.Items(); + for (Rule::ItemsRet::const_iterator i = items.begin(); i != items.end(); ++i) { + if (i->Terminal()) { + o << i->String() << ' '; + } else { + o << "[] "; + } + } + return o; +} + +} // namespace search diff --git a/klm/search/rule.hh b/klm/search/rule.hh new file mode 100644 index 00000000..79192d40 --- /dev/null +++ b/klm/search/rule.hh @@ -0,0 +1,60 @@ +#ifndef SEARCH_RULE__ +#define SEARCH_RULE__ + +#include "lm/left.hh" +#include "search/arity.hh" +#include "search/types.hh" +#include "search/word.hh" + +#include + +#include +#include + +namespace search { + +template class Context; + +class Rule { + public: + Rule() : arity_(0) {} + + void AppendTerminal(Word w) { items_.push_back(w); } + + void AppendNonTerminal() { + items_.resize(items_.size() + 1); + ++arity_; + } + + template void FinishedAdding(const Context &context, Score additive, bool prepend_bos); + + Score Bound() const { return bound_; } + + Score Additive() const { return additive_; } + + unsigned int Arity() const { return arity_; } + + const lm::ngram::ChartState &Lexical(unsigned int index) const { + return lexical_[index]; + } + + // For printing. + typedef const std::vector ItemsRet; + ItemsRet &Items() const { return items_; } + + private: + Score bound_, additive_; + + unsigned int arity_; + + // TODO: pool? + std::vector items_; + + std::vector lexical_; +}; + +std::ostream &operator<<(std::ostream &o, const Rule &rule); + +} // namespace search + +#endif // SEARCH_RULE__ diff --git a/klm/search/source.hh b/klm/search/source.hh new file mode 100644 index 00000000..11839f7b --- /dev/null +++ b/klm/search/source.hh @@ -0,0 +1,48 @@ +#ifndef SEARCH_SOURCE__ +#define SEARCH_SOURCE__ + +#include "search/types.hh" + +#include +#include + +namespace search { + +template class Source { + public: + Source() : bound_(kScoreInf) {} + + Index Size() const { + return final_.size(); + } + + Score Bound() const { + return bound_; + } + + const Final &operator[](Index index) const { + return *final_[index]; + } + + Score ScoreOrBound(Index index) const { + return Size() > index ? final_[index]->Total() : Bound(); + } + + protected: + void AddFinal(const Final &store) { + final_.push_back(&store); + } + + void SetBound(Score to) { + assert(to <= bound_ + 0.001); + bound_ = to; + } + + private: + std::vector final_; + + Score bound_; +}; + +} // namespace search +#endif // SEARCH_SOURCE__ diff --git a/klm/search/types.hh b/klm/search/types.hh new file mode 100644 index 00000000..9726379f --- /dev/null +++ b/klm/search/types.hh @@ -0,0 +1,18 @@ +#ifndef SEARCH_TYPES__ +#define SEARCH_TYPES__ + +#include + +namespace search { + +typedef float Score; +const Score kScoreInf = INFINITY; + +// This could have been an enum but gcc wants 4 bytes. +typedef bool ExtendDirection; +const ExtendDirection kExtendLeft = 0; +const ExtendDirection kExtendRight = 1; + +} // namespace search + +#endif // SEARCH_TYPES__ diff --git a/klm/search/vertex.cc b/klm/search/vertex.cc new file mode 100644 index 00000000..cc53c0dd --- /dev/null +++ b/klm/search/vertex.cc @@ -0,0 +1,48 @@ +#include "search/vertex.hh" + +#include "search/context.hh" + +#include +#include + +#include + +namespace search { + +namespace { + +struct GreaterByBound : public std::binary_function { + bool operator()(const VertexNode *first, const VertexNode *second) const { + return first->Bound() > second->Bound(); + } +}; + +} // namespace + +void VertexNode::SortAndSet(ContextBase &context, VertexNode **parent_ptr) { + if (Complete()) { + assert(end_); + assert(extend_.empty()); + bound_ = end_->Bound(); + return; + } + if (extend_.size() == 1 && parent_ptr) { + *parent_ptr = extend_[0]; + extend_[0]->SortAndSet(context, parent_ptr); + context.DeleteVertexNode(this); + return; + } + for (std::vector::iterator i = extend_.begin(); i != extend_.end(); ++i) { + (*i)->SortAndSet(context, &*i); + } + std::sort(extend_.begin(), extend_.end(), GreaterByBound()); + bound_ = extend_.front()->Bound(); +} + +namespace { +VertexNode kBlankVertexNode; +} // namespace + +PartialVertex kBlankPartialVertex(kBlankVertexNode); + +} // namespace search diff --git a/klm/search/vertex.hh b/klm/search/vertex.hh new file mode 100644 index 00000000..7ef29efc --- /dev/null +++ b/klm/search/vertex.hh @@ -0,0 +1,165 @@ +#ifndef SEARCH_VERTEX__ +#define SEARCH_VERTEX__ + +#include "lm/left.hh" +#include "search/final.hh" +#include "search/types.hh" + +#include + +#include +#include + +#include + +namespace search { + +class ContextBase; + +class Edge; + +class VertexNode { + public: + VertexNode() : end_(NULL) {} + + void InitRoot() { + extend_.clear(); + state_.left.full = false; + state_.left.length = 0; + state_.right.length = 0; + right_full_ = false; + bound_ = -kScoreInf; + end_ = NULL; + } + + lm::ngram::ChartState &MutableState() { return state_; } + bool &MutableRightFull() { return right_full_; } + + void AddExtend(VertexNode *next) { + extend_.push_back(next); + } + + void SetEnd(Final *end) { end_ = end; } + + Final &MutableEnd() { return *end_; } + + void SortAndSet(ContextBase &context, VertexNode **parent_pointer); + + // Should only happen to a root node when the entire vertex is empty. + bool Empty() const { + return !end_ && extend_.empty(); + } + + bool Complete() const { + return end_; + } + + const lm::ngram::ChartState &State() const { return state_; } + bool RightFull() const { return right_full_; } + + Score Bound() const { + return bound_; + } + + unsigned char Length() const { + return state_.left.length + state_.right.length; + } + + // May be NULL. + const Final *End() const { return end_; } + + const VertexNode &operator[](size_t index) const { + return *extend_[index]; + } + + size_t Size() const { + return extend_.size(); + } + + private: + std::vector extend_; + + lm::ngram::ChartState state_; + bool right_full_; + + Score bound_; + Final *end_; +}; + +class PartialVertex { + public: + PartialVertex() {} + + explicit PartialVertex(const VertexNode &back) : back_(&back), index_(0) {} + + bool Empty() const { return back_->Empty(); } + + bool Complete() const { return back_->Complete(); } + + const lm::ngram::ChartState &State() const { return back_->State(); } + bool RightFull() const { return back_->RightFull(); } + + Score Bound() const { return Complete() ? back_->End()->Bound() : (*back_)[index_].Bound(); } + + unsigned char Length() const { return back_->Length(); } + + // Split into continuation and alternative, rendering this the alternative. + bool Split(PartialVertex &continuation) { + assert(!Complete()); + continuation.back_ = &((*back_)[index_]); + continuation.index_ = 0; + if (index_ + 1 < back_->Size()) { + ++index_; + return true; + } + return false; + } + + const Final &End() const { + return *back_->End(); + } + + private: + const VertexNode *back_; + unsigned int index_; +}; + +extern PartialVertex kBlankPartialVertex; + +class Vertex { + public: + Vertex() +#ifdef DEBUG + : finished_adding_(false) +#endif + {} + + void Add(Edge &edge) { +#ifdef DEBUG + assert(!finished_adding_); +#endif + edges_.push_back(&edge); + } + + void FinishedAdding() { +#ifdef DEBUG + assert(!finished_adding_); + finished_adding_ = true; +#endif + } + + PartialVertex RootPartial() const { return PartialVertex(root_); } + + private: + friend class VertexGenerator; + std::vector edges_; + +#ifdef DEBUG + bool finished_adding_; +#endif + + VertexNode root_; +}; + +} // namespace search +#endif // SEARCH_VERTEX__ diff --git a/klm/search/vertex_generator.cc b/klm/search/vertex_generator.cc new file mode 100644 index 00000000..0281fc37 --- /dev/null +++ b/klm/search/vertex_generator.cc @@ -0,0 +1,99 @@ +#include "search/vertex_generator.hh" + +#include "lm/left.hh" +#include "search/context.hh" + +#include + +namespace search { + +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen) : context_(context), edges_(gen.edges_.size()), partial_edge_pool_(sizeof(PartialEdge), context.PopLimit() * 2) { + for (std::size_t i = 0; i < gen.edges_.size(); ++i) { + if (edges_[i].Init(*gen.edges_[i], *this)) + generate_.push(&edges_[i]); + } + gen.root_.InitRoot(); + root_.under = &gen.root_; + to_pop_ = context.PopLimit(); + while (to_pop_ > 0 && !generate_.empty()) { + EdgeGenerator *top = generate_.top(); + generate_.pop(); + if (top->Pop(context, *this)) { + generate_.push(top); + } + } + gen.root_.SortAndSet(context, NULL); +} + +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen); +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen); + +namespace { +const uint64_t kCompleteAdd = static_cast(-1); +} // namespace + +void VertexGenerator::NewHypothesis(const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial) { + std::pair got(existing_.insert(std::pair(hash_value(state), NULL))); + if (!got.second) { + // Found it already. + Final &exists = *got.first->second; + if (exists.Bound() < partial.score) { + exists.Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + } + --to_pop_; + return; + } + unsigned char left = 0, right = 0; + Trie *node = &root_; + while (true) { + if (left == state.left.length) { + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, left, true, right, false); + for (; right < state.right.length; ++right) { + node = &FindOrInsert(*node, state.right.words[right], state, left, true, right + 1, false); + } + break; + } + node = &FindOrInsert(*node, state.left.pointers[left], state, left + 1, false, right, false); + left++; + if (right == state.right.length) { + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, left, false, right, true); + for (; left < state.left.length; ++left) { + node = &FindOrInsert(*node, state.left.pointers[left], state, left + 1, false, right, true); + } + break; + } + node = &FindOrInsert(*node, state.right.words[right], state, left, false, right + 1, false); + right++; + } + + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, state.left.length, true, state.right.length, true); + got.first->second = CompleteTransition(*node, state, from, partial); + --to_pop_; +} + +VertexGenerator::Trie &VertexGenerator::FindOrInsert(VertexGenerator::Trie &node, uint64_t added, const lm::ngram::ChartState &state, unsigned char left, bool left_full, unsigned char right, bool right_full) { + VertexGenerator::Trie &next = node.extend[added]; + if (!next.under) { + next.under = context_.NewVertexNode(); + lm::ngram::ChartState &writing = next.under->MutableState(); + writing = state; + writing.left.full &= left_full && state.left.full; + next.under->MutableRightFull() = right_full && state.left.full; + writing.left.length = left; + writing.right.length = right; + node.under->AddExtend(next.under); + } + return next; +} + +Final *VertexGenerator::CompleteTransition(VertexGenerator::Trie &starter, const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial) { + VertexNode &node = *starter.under; + assert(node.State().left.full == state.left.full); + assert(!node.End()); + Final *final = context_.NewFinal(); + final->Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + node.SetEnd(final); + return final; +} + +} // namespace search diff --git a/klm/search/vertex_generator.hh b/klm/search/vertex_generator.hh new file mode 100644 index 00000000..8cdf1420 --- /dev/null +++ b/klm/search/vertex_generator.hh @@ -0,0 +1,70 @@ +#ifndef SEARCH_VERTEX_GENERATOR__ +#define SEARCH_VERTEX_GENERATOR__ + +#include "search/edge.hh" +#include "search/edge_generator.hh" + +#include +#include + +#include + +namespace lm { +namespace ngram { +class ChartState; +} // namespace ngram +} // namespace lm + +namespace search { + +template class Context; +class ContextBase; +class Final; + +class VertexGenerator { + public: + template VertexGenerator(Context &context, Vertex &gen); + + PartialEdge *MallocPartialEdge() { return static_cast(partial_edge_pool_.malloc()); } + void FreePartialEdge(PartialEdge *value) { partial_edge_pool_.free(value); } + + void NewHypothesis(const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial); + + private: + // Parallel structure to VertexNode. + struct Trie { + Trie() : under(NULL) {} + + VertexNode *under; + boost::unordered_map extend; + }; + + Trie &FindOrInsert(Trie &node, uint64_t added, const lm::ngram::ChartState &state, unsigned char left, bool left_full, unsigned char right, bool right_full); + + Final *CompleteTransition(Trie &node, const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial); + + ContextBase &context_; + + std::vector edges_; + + struct LessByTop : public std::binary_function { + bool operator()(const EdgeGenerator *first, const EdgeGenerator *second) const { + return first->Top() < second->Top(); + } + }; + + typedef std::priority_queue, LessByTop> Generate; + Generate generate_; + + Trie root_; + + typedef boost::unordered_map Existing; + Existing existing_; + + int to_pop_; + + boost::pool<> partial_edge_pool_; +}; + +} // namespace search +#endif // SEARCH_VERTEX_GENERATOR__ diff --git a/klm/search/weights.cc b/klm/search/weights.cc new file mode 100644 index 00000000..82ff3f12 --- /dev/null +++ b/klm/search/weights.cc @@ -0,0 +1,69 @@ +#include "search/weights.hh" +#include "util/tokenize_piece.hh" + +#include + +namespace search { + +namespace { +struct Insert { + void operator()(boost::unordered_map &map, StringPiece name, search::Score score) const { + std::string copy(name.data(), name.size()); + map[copy] = score; + } +}; + +struct DotProduct { + search::Score total; + DotProduct() : total(0.0) {} + + void operator()(const boost::unordered_map &map, StringPiece name, search::Score score) { + boost::unordered_map::const_iterator i(FindStringPiece(map, name)); + if (i != map.end()) + total += score * i->second; + } +}; + +template void Parse(StringPiece text, Map &map, Op &op) { + for (util::TokenIter spaces(text, ' '); spaces; ++spaces) { + util::TokenIter equals(*spaces, '='); + UTIL_THROW_IF(!equals, WeightParseException, "Bad weight token " << *spaces); + StringPiece name(*equals); + UTIL_THROW_IF(!++equals, WeightParseException, "Bad weight token " << *spaces); + char *end; + // Assumes proper termination. + double value = std::strtod(equals->data(), &end); + UTIL_THROW_IF(end != equals->data() + equals->size(), WeightParseException, "Failed to parse weight" << *equals); + UTIL_THROW_IF(++equals, WeightParseException, "Too many equals in " << *spaces); + op(map, name, value); + } +} + +} // namespace + +Weights::Weights(StringPiece text) { + Insert op; + Parse(text, map_, op); + lm_ = Steal("LanguageModel"); + oov_ = Steal("OOV"); + word_penalty_ = Steal("WordPenalty"); +} + +search::Score Weights::DotNoLM(StringPiece text) const { + DotProduct dot; + Parse(text, map_, dot); + return dot.total; +} + +float Weights::Steal(const std::string &str) { + Map::iterator i(map_.find(str)); + if (i == map_.end()) { + return 0.0; + } else { + float ret = i->second; + map_.erase(i); + return ret; + } +} + +} // namespace search diff --git a/klm/search/weights.hh b/klm/search/weights.hh new file mode 100644 index 00000000..4a4388c7 --- /dev/null +++ b/klm/search/weights.hh @@ -0,0 +1,49 @@ +// For now, the individual features are not kept. +#ifndef SEARCH_WEIGHTS__ +#define SEARCH_WEIGHTS__ + +#include "search/types.hh" +#include "util/exception.hh" +#include "util/string_piece.hh" + +#include + +#include + +namespace search { + +class WeightParseException : public util::Exception { + public: + WeightParseException() {} + ~WeightParseException() throw() {} +}; + +class Weights { + public: + // Parses weights, sets lm_weight_, removes it from map_. + explicit Weights(StringPiece text); + + search::Score DotNoLM(StringPiece text) const; + + search::Score LM() const { return lm_; } + + search::Score OOV() const { return oov_; } + + search::Score WordPenalty() const { return word_penalty_; } + + // Mostly for testing. + const boost::unordered_map &GetMap() const { return map_; } + + private: + float Steal(const std::string &str); + + typedef boost::unordered_map Map; + + Map map_; + + search::Score lm_, oov_, word_penalty_; +}; + +} // namespace search + +#endif // SEARCH_WEIGHTS__ diff --git a/klm/search/weights_test.cc b/klm/search/weights_test.cc new file mode 100644 index 00000000..4811ff06 --- /dev/null +++ b/klm/search/weights_test.cc @@ -0,0 +1,38 @@ +#include "search/weights.hh" + +#define BOOST_TEST_MODULE WeightTest +#include +#include + +namespace search { +namespace { + +#define CHECK_WEIGHT(value, string) \ + i = parsed.find(string); \ + BOOST_REQUIRE(i != parsed.end()); \ + BOOST_CHECK_CLOSE((value), i->second, 0.001); + +BOOST_AUTO_TEST_CASE(parse) { + // These are not real feature weights. + Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); + const boost::unordered_map &parsed = w.GetMap(); + boost::unordered_map::const_iterator i; + CHECK_WEIGHT(0.0, "rarity"); + CHECK_WEIGHT(0.0, "phrase-SGT"); + CHECK_WEIGHT(9.45117, "phrase-TGS"); + CHECK_WEIGHT(2.33833, "lexical-SGT"); + BOOST_CHECK(parsed.end() == parsed.find("lm")); + BOOST_CHECK_CLOSE(3.0, w.LM(), 0.001); + CHECK_WEIGHT(-28.3317, "lexical-TGS"); + CHECK_WEIGHT(5.0, "glue?"); +} + +BOOST_AUTO_TEST_CASE(dot) { + Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); + BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0"), 0.001); + BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0 LanguageModel=10"), 0.001); + BOOST_CHECK_CLOSE(9.45117 * 3.0 + 28.3317 * 17.4, w.DotNoLM("rarity=5 phrase-TGS=3.0 LanguageModel=10 lexical-TGS=-17.4"), 0.001); +} + +} // namespace +} // namespace search diff --git a/klm/search/word.hh b/klm/search/word.hh new file mode 100644 index 00000000..e7a15be9 --- /dev/null +++ b/klm/search/word.hh @@ -0,0 +1,47 @@ +#ifndef SEARCH_WORD__ +#define SEARCH_WORD__ + +#include "lm/word_index.hh" + +#include + +#include +#include + +namespace search { + +class Word { + public: + // Construct a non-terminal. + Word() : entry_(NULL) {} + + explicit Word(const std::pair &entry) { + entry_ = &entry; + } + + // Returns true for two non-terminals even if their labels are different (since we don't care about labels). + bool operator==(const Word &other) const { + return entry_ == other.entry_; + } + + bool Terminal() const { return entry_ != NULL; } + + const std::string &String() const { return entry_->first; } + + lm::WordIndex Index() const { return entry_->second; } + + protected: + friend size_t hash_value(const Word &word); + + const std::pair *Entry() const { return entry_; } + + private: + const std::pair *entry_; +}; + +inline size_t hash_value(const Word &word) { + return boost::hash_value(word.Entry()); +} + +} // namespace search +#endif // SEARCH_WORD__ -- cgit v1.2.3 From 2ca3db90bd0a2e9a8619d2ebec7c6ac723838aca Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:49:58 +0100 Subject: Minor build fixes --- Jamroot | 4 ++-- klm/util/have.hh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'klm') diff --git a/Jamroot b/Jamroot index ef426146..738b83e3 100644 --- a/Jamroot +++ b/Jamroot @@ -26,7 +26,7 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } -project : requirements $(requirements) darwin:static ; +project : requirements $(requirements) darwin:static boost_system ; project : default-build on release ; install-bin-libs dpmert//programs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec phrasinator//programs mira//kbest_mira ; @@ -40,6 +40,6 @@ rule all_tests ( targets * : dependencies : properties * ) { targets ?= [ glob *_test.cc ] ; for t in $(targets) { local base = [ MATCH "^(.*).cc$" : $(t) ] ; - unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework : $(properties) ; + unit-test $(base) : $(t) $(dependencies) /top//boost_unit_test_framework : $(properties) ; } } diff --git a/klm/util/have.hh b/klm/util/have.hh index 1d76a7fc..b8181e99 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -13,7 +13,7 @@ #endif #ifndef HAVE_BOOST -//#define HAVE_BOOST +#define HAVE_BOOST #endif #ifndef HAVE_THREADS -- cgit v1.2.3 From c26c35a9bcbb4d42ae50ad0a75c1b5fb59702bd1 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 12 Sep 2012 12:01:26 +0100 Subject: Refactor search so that it knows even less, but keeps track of edge pointers --- klm/lm/word_index.hh | 3 +++ klm/search/context.hh | 1 - klm/search/final.hh | 12 +++++------ klm/search/rule.cc | 32 +++++++++------------------- klm/search/rule.hh | 21 ++++--------------- klm/search/vertex_generator.cc | 4 ++-- klm/search/word.hh | 47 ------------------------------------------ 7 files changed, 25 insertions(+), 95 deletions(-) delete mode 100644 klm/search/word.hh (limited to 'klm') diff --git a/klm/lm/word_index.hh b/klm/lm/word_index.hh index 67841c30..e09557a7 100644 --- a/klm/lm/word_index.hh +++ b/klm/lm/word_index.hh @@ -2,8 +2,11 @@ #ifndef LM_WORD_INDEX__ #define LM_WORD_INDEX__ +#include + namespace lm { typedef unsigned int WordIndex; +const WordIndex kMaxWordIndex = UINT_MAX; } // namespace lm typedef lm::WordIndex LMWordIndex; diff --git a/klm/search/context.hh b/klm/search/context.hh index ae248549..27940053 100644 --- a/klm/search/context.hh +++ b/klm/search/context.hh @@ -6,7 +6,6 @@ #include "search/final.hh" #include "search/types.hh" #include "search/vertex.hh" -#include "search/word.hh" #include "util/exception.hh" #include diff --git a/klm/search/final.hh b/klm/search/final.hh index 24e6f0a5..823b8c1a 100644 --- a/klm/search/final.hh +++ b/klm/search/final.hh @@ -1,18 +1,20 @@ #ifndef SEARCH_FINAL__ #define SEARCH_FINAL__ -#include "search/rule.hh" +#include "search/arity.hh" #include "search/types.hh" #include namespace search { +class Edge; + class Final { public: typedef boost::array ChildArray; - void Reset(Score bound, const Rule &from, const Final &left, const Final &right) { + void Reset(Score bound, const Edge &from, const Final &left, const Final &right) { bound_ = bound; from_ = &from; children_[0] = &left; @@ -21,16 +23,14 @@ class Final { const ChildArray &Children() const { return children_; } - unsigned int ChildCount() const { return from_->Arity(); } - - const Rule &From() const { return *from_; } + const Edge &From() const { return *from_; } Score Bound() const { return bound_; } private: Score bound_; - const Rule *from_; + const Edge *from_; ChildArray children_; }; diff --git a/klm/search/rule.cc b/klm/search/rule.cc index a8b993eb..0a941527 100644 --- a/klm/search/rule.cc +++ b/klm/search/rule.cc @@ -9,47 +9,35 @@ namespace search { -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos) { +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos) { additive_ = additive; Score lm_score = 0.0; lexical_.clear(); const lm::WordIndex oov = context.LanguageModel().GetVocabulary().NotFound(); - for (std::vector::const_iterator word = items_.begin(); ; ++word) { + for (std::vector::const_iterator word = words.begin(); ; ++word) { lexical_.resize(lexical_.size() + 1); lm::ngram::RuleScore scorer(context.LanguageModel(), lexical_.back()); // TODO: optimize - if (prepend_bos && (word == items_.begin())) { + if (prepend_bos && (word == words.begin())) { scorer.BeginSentence(); } for (; ; ++word) { - if (word == items_.end()) { + if (word == words.end()) { lm_score += scorer.Finish(); bound_ = additive_ + context.GetWeights().LM() * lm_score; - assert(lexical_.size() == arity_ + 1); + arity_ = lexical_.size() - 1; return; } - if (!word->Terminal()) break; - if (word->Index() == oov) additive_ += context.GetWeights().OOV(); - scorer.Terminal(word->Index()); + if (*word == kNonTerminal) break; + if (*word == oov) additive_ += context.GetWeights().OOV(); + scorer.Terminal(*word); } lm_score += scorer.Finish(); } } -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); - -std::ostream &operator<<(std::ostream &o, const Rule &rule) { - const Rule::ItemsRet &items = rule.Items(); - for (Rule::ItemsRet::const_iterator i = items.begin(); i != items.end(); ++i) { - if (i->Terminal()) { - o << i->String() << ' '; - } else { - o << "[] "; - } - } - return o; -} +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); } // namespace search diff --git a/klm/search/rule.hh b/klm/search/rule.hh index 79192d40..920c64a7 100644 --- a/klm/search/rule.hh +++ b/klm/search/rule.hh @@ -2,9 +2,9 @@ #define SEARCH_RULE__ #include "lm/left.hh" +#include "lm/word_index.hh" #include "search/arity.hh" #include "search/types.hh" -#include "search/word.hh" #include @@ -19,14 +19,10 @@ class Rule { public: Rule() : arity_(0) {} - void AppendTerminal(Word w) { items_.push_back(w); } + static const lm::WordIndex kNonTerminal = lm::kMaxWordIndex; - void AppendNonTerminal() { - items_.resize(items_.size() + 1); - ++arity_; - } - - template void FinishedAdding(const Context &context, Score additive, bool prepend_bos); + // Use kNonTerminal for non-terminals. + template void Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); Score Bound() const { return bound_; } @@ -38,23 +34,14 @@ class Rule { return lexical_[index]; } - // For printing. - typedef const std::vector ItemsRet; - ItemsRet &Items() const { return items_; } - private: Score bound_, additive_; unsigned int arity_; - // TODO: pool? - std::vector items_; - std::vector lexical_; }; -std::ostream &operator<<(std::ostream &o, const Rule &rule); - } // namespace search #endif // SEARCH_RULE__ diff --git a/klm/search/vertex_generator.cc b/klm/search/vertex_generator.cc index 0281fc37..78948c97 100644 --- a/klm/search/vertex_generator.cc +++ b/klm/search/vertex_generator.cc @@ -38,7 +38,7 @@ void VertexGenerator::NewHypothesis(const lm::ngram::ChartState &state, const Ed // Found it already. Final &exists = *got.first->second; if (exists.Bound() < partial.score) { - exists.Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + exists.Reset(partial.score, from, partial.nt[0].End(), partial.nt[1].End()); } --to_pop_; return; @@ -91,7 +91,7 @@ Final *VertexGenerator::CompleteTransition(VertexGenerator::Trie &starter, const assert(node.State().left.full == state.left.full); assert(!node.End()); Final *final = context_.NewFinal(); - final->Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + final->Reset(partial.score, from, partial.nt[0].End(), partial.nt[1].End()); node.SetEnd(final); return final; } diff --git a/klm/search/word.hh b/klm/search/word.hh deleted file mode 100644 index e7a15be9..00000000 --- a/klm/search/word.hh +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef SEARCH_WORD__ -#define SEARCH_WORD__ - -#include "lm/word_index.hh" - -#include - -#include -#include - -namespace search { - -class Word { - public: - // Construct a non-terminal. - Word() : entry_(NULL) {} - - explicit Word(const std::pair &entry) { - entry_ = &entry; - } - - // Returns true for two non-terminals even if their labels are different (since we don't care about labels). - bool operator==(const Word &other) const { - return entry_ == other.entry_; - } - - bool Terminal() const { return entry_ != NULL; } - - const std::string &String() const { return entry_->first; } - - lm::WordIndex Index() const { return entry_->second; } - - protected: - friend size_t hash_value(const Word &word); - - const std::pair *Entry() const { return entry_; } - - private: - const std::pair *entry_; -}; - -inline size_t hash_value(const Word &word) { - return boost::hash_value(word.Entry()); -} - -} // namespace search -#endif // SEARCH_WORD__ -- cgit v1.2.3 From 7f4c0920a290191775e091334581bcc21e6ec9e4 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 12 Sep 2012 15:07:44 +0100 Subject: Add the alone stuff, using a wrapper to the edge class. --- klm/alone/Jamfile | 4 ++ klm/alone/assemble.cc | 76 +++++++++++++++++++++++++++ klm/alone/assemble.hh | 21 ++++++++ klm/alone/graph.hh | 87 +++++++++++++++++++++++++++++++ klm/alone/just_vocab.cc | 14 +++++ klm/alone/labeled_edge.hh | 30 +++++++++++ klm/alone/main.cc | 84 ++++++++++++++++++++++++++++++ klm/alone/read.cc | 118 ++++++++++++++++++++++++++++++++++++++++++ klm/alone/read.hh | 29 +++++++++++ klm/alone/threading.cc | 80 ++++++++++++++++++++++++++++ klm/alone/threading.hh | 129 ++++++++++++++++++++++++++++++++++++++++++++++ klm/alone/vocab.cc | 19 +++++++ klm/alone/vocab.hh | 34 ++++++++++++ 13 files changed, 725 insertions(+) create mode 100644 klm/alone/Jamfile create mode 100644 klm/alone/assemble.cc create mode 100644 klm/alone/assemble.hh create mode 100644 klm/alone/graph.hh create mode 100644 klm/alone/just_vocab.cc create mode 100644 klm/alone/labeled_edge.hh create mode 100644 klm/alone/main.cc create mode 100644 klm/alone/read.cc create mode 100644 klm/alone/read.hh create mode 100644 klm/alone/threading.cc create mode 100644 klm/alone/threading.hh create mode 100644 klm/alone/vocab.cc create mode 100644 klm/alone/vocab.hh (limited to 'klm') diff --git a/klm/alone/Jamfile b/klm/alone/Jamfile new file mode 100644 index 00000000..2cc90c05 --- /dev/null +++ b/klm/alone/Jamfile @@ -0,0 +1,4 @@ +lib standalone : assemble.cc read.cc threading.cc vocab.cc ../lm//kenlm ../util//kenutil ../search//search : .. : : .. ../search//search ../lm//kenlm ; + +exe decode : main.cc standalone main.cc : multi:..//boost_thread ; +exe just_vocab : just_vocab.cc standalone : multi:..//boost_thread ; diff --git a/klm/alone/assemble.cc b/klm/alone/assemble.cc new file mode 100644 index 00000000..2ae72ce9 --- /dev/null +++ b/klm/alone/assemble.cc @@ -0,0 +1,76 @@ +#include "alone/assemble.hh" + +#include "alone/labeled_edge.hh" +#include "search/final.hh" + +#include + +namespace alone { + +std::ostream &operator<<(std::ostream &o, const search::Final &final) { + const std::vector &words = static_cast(final.From()).Words(); + if (words.empty()) return o; + const search::Final *const *child = final.Children().data(); + std::vector::const_iterator i(words.begin()); + for (; i != words.end() - 1; ++i) { + if (*i) { + o << **i << ' '; + } else { + o << **child << ' '; + ++child; + } + } + + if (*i) { + if (**i != "") { + o << **i; + } + } else { + o << **child; + } + + return o; +} + +namespace { + +void MakeIndent(std::ostream &o, const char *indent_str, unsigned int level) { + for (unsigned int i = 0; i < level; ++i) + o << indent_str; +} + +void DetailedFinalInternal(std::ostream &o, const search::Final &final, const char *indent_str, unsigned int indent) { + o << "(\n"; + MakeIndent(o, indent_str, indent); + const std::vector &words = static_cast(final.From()).Words(); + const search::Final *const *child = final.Children().data(); + for (std::vector::const_iterator i(words.begin()); i != words.end(); ++i) { + if (*i) { + o << **i; + if (i == words.end() - 1) { + o << '\n'; + MakeIndent(o, indent_str, indent); + } else { + o << ' '; + } + } else { + // One extra indent from the line we're currently on. + o << indent_str; + DetailedFinalInternal(o, **child, indent_str, indent + 1); + for (unsigned int i = 0; i < indent; ++i) o << indent_str; + ++child; + } + } + o << ")=" << final.Bound() << '\n'; +} +} // namespace + +void DetailedFinal(std::ostream &o, const search::Final &final, const char *indent_str) { + DetailedFinalInternal(o, final, indent_str, 0); +} + +void PrintFinal(const search::Final &final) { + std::cout << final << std::endl; +} + +} // namespace alone diff --git a/klm/alone/assemble.hh b/klm/alone/assemble.hh new file mode 100644 index 00000000..e6b0ad5c --- /dev/null +++ b/klm/alone/assemble.hh @@ -0,0 +1,21 @@ +#ifndef ALONE_ASSEMBLE__ +#define ALONE_ASSEMBLE__ + +#include + +namespace search { +class Final; +} // namespace search + +namespace alone { + +std::ostream &operator<<(std::ostream &o, const search::Final &final); + +void DetailedFinal(std::ostream &o, const search::Final &final, const char *indent_str = " "); + +// This isn't called anywhere but makes it easy to print from gdb. +void PrintFinal(const search::Final &final); + +} // namespace alone + +#endif // ALONE_ASSEMBLE__ diff --git a/klm/alone/graph.hh b/klm/alone/graph.hh new file mode 100644 index 00000000..788352c9 --- /dev/null +++ b/klm/alone/graph.hh @@ -0,0 +1,87 @@ +#ifndef ALONE_GRAPH__ +#define ALONE_GRAPH__ + +#include "alone/labeled_edge.hh" +#include "search/rule.hh" +#include "search/types.hh" +#include "search/vertex.hh" +#include "util/exception.hh" + +#include +#include +#include + +namespace alone { + +template class FixedAllocator : boost::noncopyable { + public: + FixedAllocator() : current_(NULL), end_(NULL) {} + + void Init(std::size_t count) { + assert(!current_); + array_.reset(new T[count]); + current_ = array_.get(); + end_ = current_ + count; + } + + T &operator[](std::size_t idx) { + return array_.get()[idx]; + } + + T *New() { + T *ret = current_++; + UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end"); + return ret; + } + + std::size_t Size() const { + return end_ - array_.get(); + } + + private: + boost::scoped_array array_; + T *current_, *end_; +}; + +class Graph : boost::noncopyable { + public: + typedef LabeledEdge Edge; + typedef search::Vertex Vertex; + + Graph() {} + + void SetCounts(std::size_t vertices, std::size_t edges) { + vertices_.Init(vertices); + edges_.Init(edges); + } + + Vertex *NewVertex() { + return vertices_.New(); + } + + std::size_t VertexSize() const { return vertices_.Size(); } + + Vertex &MutableVertex(std::size_t index) { + return vertices_[index]; + } + + Edge *NewEdge() { + return edges_.New(); + } + + std::size_t EdgeSize() const { return edges_.Size(); } + + void SetRoot(Vertex *root) { root_ = root; } + + Vertex &Root() { return *root_; } + + private: + FixedAllocator vertices_; + FixedAllocator edges_; + + Vertex *root_; +}; + +} // namespace alone + +#endif // ALONE_GRAPH__ diff --git a/klm/alone/just_vocab.cc b/klm/alone/just_vocab.cc new file mode 100644 index 00000000..35aea5ed --- /dev/null +++ b/klm/alone/just_vocab.cc @@ -0,0 +1,14 @@ +#include "alone/read.hh" +#include "util/file_piece.hh" + +#include + +int main() { + util::FilePiece f(0, "stdin", &std::cerr); + while (true) { + try { + alone::JustVocab(f, std::cout); + } catch (const util::EndOfFileException &e) { break; } + std::cout << '\n'; + } +} diff --git a/klm/alone/labeled_edge.hh b/klm/alone/labeled_edge.hh new file mode 100644 index 00000000..94d8cbdf --- /dev/null +++ b/klm/alone/labeled_edge.hh @@ -0,0 +1,30 @@ +#ifndef ALONE_LABELED_EDGE__ +#define ALONE_LABELED_EDGE__ + +#include "search/edge.hh" + +#include +#include + +namespace alone { + +class LabeledEdge : public search::Edge { + public: + LabeledEdge() {} + + void AppendWord(const std::string *word) { + words_.push_back(word); + } + + const std::vector &Words() const { + return words_; + } + + private: + // NULL for non-terminals. + std::vector words_; +}; + +} // namespace alone + +#endif // ALONE_LABELED_EDGE__ diff --git a/klm/alone/main.cc b/klm/alone/main.cc new file mode 100644 index 00000000..7768b89c --- /dev/null +++ b/klm/alone/main.cc @@ -0,0 +1,84 @@ +#include "alone/threading.hh" +#include "search/config.hh" +#include "search/context.hh" +#include "util/exception.hh" +#include "util/file_piece.hh" +#include "util/usage.hh" + +#include + +#include +#include + +namespace alone { + +template void ReadLoop(const std::string &graph_prefix, Control &control) { + for (unsigned int sentence = 0; ; ++sentence) { + std::stringstream name; + name << graph_prefix << '/' << sentence; + std::auto_ptr file; + try { + file.reset(new util::FilePiece(name.str().c_str())); + } catch (const util::ErrnoException &e) { + if (e.Error() == ENOENT) return; + throw; + } + control.Add(file.release()); + } +} + +template void RunWithModelType(const char *graph_prefix, const char *model_file, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { + Model model(model_file); + search::Config config(weight_str, pop_limit); + + if (threads > 1) { +#ifdef WITH_THREADS + Controller controller(config, model, threads, std::cout); + ReadLoop(graph_prefix, controller); +#else + UTIL_THROW(util::Exception, "Threading support not compiled in."); +#endif + } else { + InThread controller(config, model, std::cout); + ReadLoop(graph_prefix, controller); + } +} + +void Run(const char *graph_prefix, const char *lm_name, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { + lm::ngram::ModelType model_type; + if (!lm::ngram::RecognizeBinary(lm_name, model_type)) model_type = lm::ngram::PROBING; + switch (model_type) { + case lm::ngram::PROBING: + RunWithModelType(graph_prefix, lm_name, weight_str, pop_limit, threads); + break; + case lm::ngram::REST_PROBING: + RunWithModelType(graph_prefix, lm_name, weight_str, pop_limit, threads); + break; + default: + UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); + } +} + +} // namespace alone + +int main(int argc, char *argv[]) { + if (argc < 5 || argc > 6) { + std::cerr << argv[0] << " graph_prefix lm \"weights\" pop [threads]" << std::endl; + return 1; + } + +#ifdef WITH_THREADS + unsigned thread_count = boost::thread::hardware_concurrency(); +#else + unsigned thread_count = 1; +#endif + if (argc == 6) { + thread_count = boost::lexical_cast(argv[5]); + UTIL_THROW_IF(!thread_count, util::Exception, "Thread count 0"); + } + UTIL_THROW_IF(!thread_count, util::Exception, "Boost doesn't know how many threads there are. Pass it on the command line."); + alone::Run(argv[1], argv[2], argv[3], boost::lexical_cast(argv[4]), thread_count); + + util::PrintUsage(std::cerr); + return 0; +} diff --git a/klm/alone/read.cc b/klm/alone/read.cc new file mode 100644 index 00000000..0b20be35 --- /dev/null +++ b/klm/alone/read.cc @@ -0,0 +1,118 @@ +#include "alone/read.hh" + +#include "alone/graph.hh" +#include "alone/vocab.hh" +#include "search/arity.hh" +#include "search/context.hh" +#include "search/weights.hh" +#include "util/file_piece.hh" + +#include +#include + +#include + +namespace alone { + +namespace { + +template Graph::Edge &ReadEdge(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab, bool final) { + Graph::Edge *ret = to.NewEdge(); + + StringPiece got; + + std::vector words; + unsigned long int terminals = 0; + while ("|||" != (got = from.ReadDelimited())) { + if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) { + // non-terminal + char *end_ptr; + unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10); + UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, FormatException, "Bad non-terminal" << got); + UTIL_THROW_IF(child >= to.VertexSize(), FormatException, "Reference to vertex " << child << " but we only have " << to.VertexSize() << " vertices. Is the file in bottom-up format?"); + ret->Add(to.MutableVertex(child)); + words.push_back(lm::kMaxWordIndex); + ret->AppendWord(NULL); + } else { + const std::pair &found = vocab.FindOrAdd(got); + words.push_back(found.second); + ret->AppendWord(&found.first); + ++terminals; + } + } + if (final) { + // This is not counted for the word penalty. + words.push_back(vocab.EndSentence().second); + ret->AppendWord(&vocab.EndSentence().first); + } + // Hard-coded word penalty. + float additive = context.GetWeights().DotNoLM(from.ReadLine()) - context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; + ret->InitRule().Init(context, additive, words, final); + unsigned int arity = ret->GetRule().Arity(); + UTIL_THROW_IF(arity > search::kMaxArity, util::Exception, "Edit search/arity.hh and increase " << search::kMaxArity << " to at least " << arity); + return *ret; +} + +} // namespace + +// TODO: refactor +void JustVocab(util::FilePiece &from, std::ostream &out) { + boost::unordered_set seen; + unsigned long int vertices = from.ReadULong(); + from.ReadULong(); // edges + UTIL_THROW_IF(vertices == 0, FormatException, "Vertex count is zero"); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected newline after counts"); + std::string temp; + for (unsigned long int i = 0; i < vertices; ++i) { + unsigned long int edge_count = from.ReadULong(); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected after edge count"); + for (unsigned long int e = 0; e < edge_count; ++e) { + StringPiece got; + while ("|||" != (got = from.ReadDelimited())) { + if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) continue; + temp.assign(got.data(), got.size()); + if (seen.insert(temp).second) out << temp << ' '; + } + from.ReadLine(); // weights + } + } + // Eat sentence + from.ReadLine(); +} + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab) { + unsigned long int vertices; + try { + vertices = from.ReadULong(); + } catch (const util::EndOfFileException &e) { return false; } + unsigned long int edges = from.ReadULong(); + UTIL_THROW_IF(vertices < 2, FormatException, "Vertex count is " << vertices); + UTIL_THROW_IF(edges == 0, FormatException, "Edge count is " << edges); + --vertices; + --edges; + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected newline after counts"); + to.SetCounts(vertices, edges); + Graph::Vertex *vertex; + for (unsigned long int i = 0; ; ++i) { + vertex = to.NewVertex(); + unsigned long int edge_count = from.ReadULong(); + bool root = (i == vertices - 1); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected after edge count"); + for (unsigned long int e = 0; e < edge_count; ++e) { + vertex->Add(ReadEdge(context, from, to, vocab, root)); + } + vertex->FinishedAdding(); + if (root) break; + } + to.SetRoot(vertex); + StringPiece str = from.ReadLine(); + UTIL_THROW_IF("1" != str, FormatException, "Expected one edge to root"); + // The edge + from.ReadLine(); + return true; +} + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); + +} // namespace alone diff --git a/klm/alone/read.hh b/klm/alone/read.hh new file mode 100644 index 00000000..10769a86 --- /dev/null +++ b/klm/alone/read.hh @@ -0,0 +1,29 @@ +#ifndef ALONE_READ__ +#define ALONE_READ__ + +#include "util/exception.hh" + +#include + +namespace util { class FilePiece; } + +namespace search { template class Context; } + +namespace alone { + +class Graph; +class Vocab; + +class FormatException : public util::Exception { + public: + FormatException() {} + ~FormatException() throw() {} +}; + +void JustVocab(util::FilePiece &from, std::ostream &to); + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); + +} // namespace alone + +#endif // ALONE_READ__ diff --git a/klm/alone/threading.cc b/klm/alone/threading.cc new file mode 100644 index 00000000..475386b6 --- /dev/null +++ b/klm/alone/threading.cc @@ -0,0 +1,80 @@ +#include "alone/threading.hh" + +#include "alone/assemble.hh" +#include "alone/graph.hh" +#include "alone/read.hh" +#include "alone/vocab.hh" +#include "lm/model.hh" +#include "search/context.hh" +#include "search/vertex_generator.hh" + +#include +#include +#include + +#include + +namespace alone { +template void Decode(const search::Config &config, const Model &model, util::FilePiece *in_ptr, std::ostream &out) { + search::Context context(config, model); + Graph graph; + Vocab vocab(model.GetVocabulary()); + { + boost::scoped_ptr in(in_ptr); + ReadCDec(context, *in, graph, vocab); + } + + for (std::size_t i = 0; i < graph.VertexSize(); ++i) { + search::VertexGenerator(context, graph.MutableVertex(i)); + } + search::PartialVertex top = graph.Root().RootPartial(); + if (top.Empty()) { + out << "NO PATH FOUND"; + } else { + search::PartialVertex continuation; + while (!top.Complete()) { + top.Split(continuation); + top = continuation; + } + out << top.End() << " ||| " << top.End().Bound() << std::endl; + } +} + +template void Decode(const search::Config &config, const lm::ngram::ProbingModel &model, util::FilePiece *in_ptr, std::ostream &out); +template void Decode(const search::Config &config, const lm::ngram::RestProbingModel &model, util::FilePiece *in_ptr, std::ostream &out); + +#ifdef WITH_THREADS +template void DecodeHandler::operator()(Input message) { + std::stringstream assemble; + Decode(config_, model_, message.file, assemble); + Produce(message.sentence_id, assemble.str()); +} + +template void DecodeHandler::Produce(unsigned int sentence_id, const std::string &str) { + Output out; + out.sentence_id = sentence_id; + out.str = new std::string(str); + out_.Produce(out); +} + +void PrintHandler::operator()(Output message) { + unsigned int relative = message.sentence_id - done_; + if (waiting_.size() <= relative) waiting_.resize(relative + 1); + waiting_[relative] = message.str; + for (std::string *lead; !waiting_.empty() && (lead = waiting_[0]); waiting_.pop_front(), ++done_) { + out_ << *lead; + delete lead; + } +} + +template Controller::Controller(const search::Config &config, const Model &model, size_t decode_workers, std::ostream &to) : + sentence_id_(0), + printer_(decode_workers, 1, boost::ref(to), Output::Poison()), + decoder_(3, decode_workers, boost::in_place(boost::ref(config), boost::ref(model), boost::ref(printer_.In())), Input::Poison()) {} + +template class Controller; +template class Controller; + +#endif + +} // namespace alone diff --git a/klm/alone/threading.hh b/klm/alone/threading.hh new file mode 100644 index 00000000..0ab0f739 --- /dev/null +++ b/klm/alone/threading.hh @@ -0,0 +1,129 @@ +#ifndef ALONE_THREADING__ +#define ALONE_THREADING__ + +#ifdef WITH_THREADS +#include "util/pcqueue.hh" +#include "util/pool.hh" +#endif + +#include +#include +#include + +namespace util { +class FilePiece; +} // namespace util + +namespace search { +class Config; +template class Context; +} // namespace search + +namespace alone { + +template void Decode(const search::Config &config, const Model &model, util::FilePiece *in_ptr, std::ostream &out); + +class Graph; + +#ifdef WITH_THREADS +struct SentenceID { + unsigned int sentence_id; + bool operator==(const SentenceID &other) const { + return sentence_id == other.sentence_id; + } +}; + +struct Input : public SentenceID { + util::FilePiece *file; + static Input Poison() { + Input ret; + ret.sentence_id = static_cast(-1); + ret.file = NULL; + return ret; + } +}; + +struct Output : public SentenceID { + std::string *str; + static Output Poison() { + Output ret; + ret.sentence_id = static_cast(-1); + ret.str = NULL; + return ret; + } +}; + +template class DecodeHandler { + public: + typedef Input Request; + + DecodeHandler(const search::Config &config, const Model &model, util::PCQueue &out) : config_(config), model_(model), out_(out) {} + + void operator()(Input message); + + private: + void Produce(unsigned int sentence_id, const std::string &str); + + const search::Config &config_; + + const Model &model_; + + util::PCQueue &out_; +}; + +class PrintHandler { + public: + typedef Output Request; + + explicit PrintHandler(std::ostream &o) : out_(o), done_(0) {} + + void operator()(Output message); + + private: + std::ostream &out_; + std::deque waiting_; + unsigned int done_; +}; + +template class Controller { + public: + // This config must remain valid. + explicit Controller(const search::Config &config, const Model &model, size_t decode_workers, std::ostream &to); + + // Takes ownership of in. + void Add(util::FilePiece *in) { + Input input; + input.sentence_id = sentence_id_++; + input.file = in; + decoder_.Produce(input); + } + + private: + unsigned int sentence_id_; + + util::Pool printer_; + + util::Pool > decoder_; +}; +#endif + +// Same API as controller. +template class InThread { + public: + InThread(const search::Config &config, const Model &model, std::ostream &to) : config_(config), model_(model), to_(to) {} + + // Takes ownership of in. + void Add(util::FilePiece *in) { + Decode(config_, model_, in, to_); + } + + private: + const search::Config &config_; + + const Model &model_; + + std::ostream &to_; +}; + +} // namespace alone +#endif // ALONE_THREADING__ diff --git a/klm/alone/vocab.cc b/klm/alone/vocab.cc new file mode 100644 index 00000000..ffe55301 --- /dev/null +++ b/klm/alone/vocab.cc @@ -0,0 +1,19 @@ +#include "alone/vocab.hh" + +#include "lm/virtual_interface.hh" +#include "util/string_piece.hh" + +namespace alone { + +Vocab::Vocab(const lm::base::Vocabulary &backing) : backing_(backing), end_sentence_(FindOrAdd("")) {} + +const std::pair &Vocab::FindOrAdd(const StringPiece &str) { + Map::const_iterator i(FindStringPiece(map_, str)); + if (i != map_.end()) return *i; + std::pair to_ins; + to_ins.first.assign(str.data(), str.size()); + to_ins.second = backing_.Index(str); + return *map_.insert(to_ins).first; +} + +} // namespace alone diff --git a/klm/alone/vocab.hh b/klm/alone/vocab.hh new file mode 100644 index 00000000..3ac0f542 --- /dev/null +++ b/klm/alone/vocab.hh @@ -0,0 +1,34 @@ +#ifndef ALONE_VOCAB__ +#define ALONE_VOCAB__ + +#include "lm/word_index.hh" +#include "util/string_piece.hh" + +#include +#include + +#include + +namespace lm { namespace base { class Vocabulary; } } + +namespace alone { + +class Vocab { + public: + explicit Vocab(const lm::base::Vocabulary &backing); + + const std::pair &FindOrAdd(const StringPiece &str); + + const std::pair &EndSentence() const { return end_sentence_; } + + private: + typedef boost::unordered_map Map; + Map map_; + + const lm::base::Vocabulary &backing_; + + const std::pair &end_sentence_; +}; + +} // namespace alone +#endif // ALONE_VCOAB__ -- cgit v1.2.3 From 8505fdfdf0bc4ce9acec42e1980a2fdd4f254109 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 11:15:32 +0100 Subject: It compiles. --- decoder/Jamfile | 2 ++ decoder/decoder.cc | 4 +++ decoder/lazy.cc | 78 +++++++++++++++++++++++++++++++++++++-------------- decoder/lazy.h | 5 +++- klm/search/config.hh | 6 ++-- klm/search/weights.cc | 2 ++ klm/search/weights.hh | 17 ++++++----- 7 files changed, 82 insertions(+), 32 deletions(-) (limited to 'klm') diff --git a/decoder/Jamfile b/decoder/Jamfile index da02d063..d778dc7f 100644 --- a/decoder/Jamfile +++ b/decoder/Jamfile @@ -58,10 +58,12 @@ lib decoder : rescore_translator.cc hg_remove_eps.cc hg_union.cc + lazy.cc $(glc) ..//utils ..//mteval ../klm/lm//kenlm + ../klm/search//search ..//boost_program_options : . : : diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a69a6d05..3a410cf2 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -38,6 +38,7 @@ #include "sampler.h" #include "forest_writer.h" // TODO this section should probably be handled by an Observer +#include "lazy.h" #include "hg_io.h" #include "aligner.h" @@ -832,6 +833,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (conf.count("show_target_graph")) HypergraphIO::WriteTarget(conf["show_target_graph"].as(), sent_id, forest); + if (conf.count("lazy_search")) + PassToLazy(forest, CurrentWeightVector()); + for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; const vector& cur_weights = *rp.weight_vector; diff --git a/decoder/lazy.cc b/decoder/lazy.cc index f5b61c75..4776c1b8 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -1,15 +1,23 @@ #include "hg.h" #include "lazy.h" +#include "fdict.h" #include "tdict.h" #include "lm/enumerate_vocab.hh" #include "lm/model.hh" +#include "search/config.hh" +#include "search/context.hh" #include "search/edge.hh" #include "search/vertex.hh" +#include "search/vertex_generator.hh" #include "util/exception.hh" +#include #include +#include +#include + namespace { struct MapVocab : public lm::EnumerateVocab { @@ -19,13 +27,13 @@ struct MapVocab : public lm::EnumerateVocab { // Do not call after Lookup. void Add(lm::WordIndex index, const StringPiece &str) { const WordID cdec_id = TD::Convert(str.as_string()); - if (cdec_id >= out_->size()) out_.resize(cdec_id + 1); + if (cdec_id >= out_.size()) out_.resize(cdec_id + 1); out_[cdec_id] = index; } // Assumes Add has been called and will never be called again. lm::WordIndex FromCDec(WordID id) const { - return out_[out.size() > id ? id : 0]; + return out_[out_.size() > id ? id : 0]; } private: @@ -34,44 +42,50 @@ struct MapVocab : public lm::EnumerateVocab { class LazyBase { public: - LazyBase() {} + LazyBase(const std::vector &weights) : + cdec_weights_(weights), + config_(search::Weights(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]), 1000) {} virtual ~LazyBase() {} virtual void Search(const Hypergraph &hg) const = 0; - static LazyBase *Load(const char *model_file); + static LazyBase *Load(const char *model_file, const std::vector &weights); protected: - lm::ngram::Config GetConfig() const { + lm::ngram::Config GetConfig() { lm::ngram::Config ret; ret.enumerate_vocab = &vocab_; return ret; } MapVocab vocab_; + + const std::vector &cdec_weights_; + + const search::Config config_; }; template class Lazy : public LazyBase { public: - explicit Lazy(const char *model_file) : m_(model_file, GetConfig()) {} + Lazy(const char *model_file, const std::vector &weights) : LazyBase(weights), m_(model_file, GetConfig()) {} void Search(const Hypergraph &hg) const; private: - void ConvertEdge(const Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; + void ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; const Model m_; }; -static LazyBase *LazyBase::Load(const char *model_file) { +LazyBase *LazyBase::Load(const char *model_file, const std::vector &weights) { lm::ngram::ModelType model_type; - if (!lm::ngram::RecognizeBinary(lm_name, model_type)) model_type = lm::ngram::PROBING; + if (!lm::ngram::RecognizeBinary(model_file, model_type)) model_type = lm::ngram::PROBING; switch (model_type) { case lm::ngram::PROBING: - return new Lazy(model_file); + return new Lazy(model_file, weights); case lm::ngram::REST_PROBING: - return new Lazy(model_file); + return new Lazy(model_file, weights); default: UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); } @@ -80,25 +94,41 @@ static LazyBase *LazyBase::Load(const char *model_file) { template void Lazy::Search(const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); + + search::Context context(config_, m_); + for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { - search::Vertex *out_vertex = out_vertices[i]; + search::Vertex &out_vertex = out_vertices[i]; const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; - for (unsigned int j = 0; j < edges.size(); ++j) { + for (unsigned int j = 0; j < down_edges.size(); ++j) { unsigned int edge_index = down_edges[j]; - const Hypergraph::Edge &in_edge = hg.edges_[edge_index]; - search::Edge &out_edge = out_edges[edge_index]; + ConvertEdge(context, i == hg.nodes_.size() - 1, out_vertices.get(), hg.edges_[edge_index], out_edges[edge_index]); + out_vertex.Add(out_edges[edge_index]); } + out_vertex.FinishedAdding(); + search::VertexGenerator(context, out_vertex); + } + search::PartialVertex top = out_vertices[hg.nodes_.size() - 1].RootPartial(); + if (top.Empty()) { + std::cout << "NO PATH FOUND"; + } else { + search::PartialVertex continuation; + while (!top.Complete()) { + top.Split(continuation); + top = continuation; + } + std::cout << top.End().Bound() << std::endl; } } // TODO: get weights into here somehow. -template void Lazy::ConvertEdge(const Context &context, bool final, search::Vertices *vertices, const Hypergraph::Edge &in, search::Edge &out) const { - const std::vector &e = in_edge.rule_->e(); +template void Lazy::ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const { + const std::vector &e = in.rule_->e(); std::vector words; unsigned int terminals = 0; for (std::vector::const_iterator word = e.begin(); word != e.end(); ++word) { if (*word <= 0) { - out.Add(vertices[edge.tail_nodes_[-*word]]); + out.Add(vertices[in.tail_nodes_[-*word]]); words.push_back(lm::kMaxWordIndex); } else { ++terminals; @@ -110,13 +140,19 @@ template void Lazy::ConvertEdge(const Context &conte words.push_back(m_.GetVocabulary().EndSentence()); } - float additive = edge.rule_->GetFeatureValues().dot(weight_vector); + float additive = in.rule_->GetFeatureValues().dot(cdec_weights_); + additive -= terminals * context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; out.InitRule().Init(context, additive, words, final); } -} // namespace +boost::scoped_ptr AwfulGlobalLazy; -void PassToLazy(const Hypergraph &hg) { +} // namespace +void PassToLazy(const Hypergraph &hg, const std::vector &weights) { + if (!AwfulGlobalLazy.get()) { + AwfulGlobalLazy.reset(LazyBase::Load("lm", weights)); + } + AwfulGlobalLazy->Search(hg); } diff --git a/decoder/lazy.h b/decoder/lazy.h index aecd030d..3e71a3b0 100644 --- a/decoder/lazy.h +++ b/decoder/lazy.h @@ -1,8 +1,11 @@ #ifndef _LAZY_H_ #define _LAZY_H_ +#include "weights.h" +#include + class Hypergraph; -void PassToLazy(const Hypergraph &hg); +void PassToLazy(const Hypergraph &hg, const std::vector &weights); #endif // _LAZY_H_ diff --git a/klm/search/config.hh b/klm/search/config.hh index e21e4b7c..ef8e2354 100644 --- a/klm/search/config.hh +++ b/klm/search/config.hh @@ -8,15 +8,15 @@ namespace search { class Config { public: - Config(StringPiece weight_str, unsigned int pop_limit) : - weights_(weight_str), pop_limit_(pop_limit) {} + Config(const Weights &weights, unsigned int pop_limit) : + weights_(weights), pop_limit_(pop_limit) {} const Weights &GetWeights() const { return weights_; } unsigned int PopLimit() const { return pop_limit_; } private: - search::Weights weights_; + Weights weights_; unsigned int pop_limit_; }; diff --git a/klm/search/weights.cc b/klm/search/weights.cc index 82ff3f12..d65471ad 100644 --- a/klm/search/weights.cc +++ b/klm/search/weights.cc @@ -49,6 +49,8 @@ Weights::Weights(StringPiece text) { word_penalty_ = Steal("WordPenalty"); } +Weights::Weights(Score lm, Score oov, Score word_penalty) : lm_(lm), oov_(oov), word_penalty_(word_penalty) {} + search::Score Weights::DotNoLM(StringPiece text) const { DotProduct dot; Parse(text, map_, dot); diff --git a/klm/search/weights.hh b/klm/search/weights.hh index 4a4388c7..df1c419f 100644 --- a/klm/search/weights.hh +++ b/klm/search/weights.hh @@ -23,25 +23,28 @@ class Weights { // Parses weights, sets lm_weight_, removes it from map_. explicit Weights(StringPiece text); - search::Score DotNoLM(StringPiece text) const; + // Just the three scores we care about adding. + Weights(Score lm, Score oov, Score word_penalty); - search::Score LM() const { return lm_; } + Score DotNoLM(StringPiece text) const; - search::Score OOV() const { return oov_; } + Score LM() const { return lm_; } - search::Score WordPenalty() const { return word_penalty_; } + Score OOV() const { return oov_; } + + Score WordPenalty() const { return word_penalty_; } // Mostly for testing. - const boost::unordered_map &GetMap() const { return map_; } + const boost::unordered_map &GetMap() const { return map_; } private: float Steal(const std::string &str); - typedef boost::unordered_map Map; + typedef boost::unordered_map Map; Map map_; - search::Score lm_, oov_, word_penalty_; + Score lm_, oov_, word_penalty_; }; } // namespace search -- cgit v1.2.3 From 5258e0355a3fe54ce29877f5b2a8d5cd5f1737ca Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 03:54:03 -0700 Subject: Fine remove the length check --- klm/lm/read_arpa.cc | 1 - 1 file changed, 1 deletion(-) (limited to 'klm') diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 174bd3a3..b709fef9 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -38,7 +38,6 @@ uint64_t ReadCount(const std::string &from) { uint64_t ret; stream >> ret; UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from); - UTIL_THROW_IF(static_cast(stream.tellg()) != from.size(), FormatLoadException, "Extra content in count: '" << from << "'"); return ret; } -- cgit v1.2.3 From c0d96bd312b1b0ddc18b7ca1a3e066923e03b4b8 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 24 Sep 2012 15:55:40 +0100 Subject: Fix up compilation of standalone --- klm/alone/main.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'klm') diff --git a/klm/alone/main.cc b/klm/alone/main.cc index 7768b89c..e09ab01d 100644 --- a/klm/alone/main.cc +++ b/klm/alone/main.cc @@ -29,7 +29,8 @@ template void ReadLoop(const std::string &graph_prefix, Control template void RunWithModelType(const char *graph_prefix, const char *model_file, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { Model model(model_file); - search::Config config(weight_str, pop_limit); + search::Weights weights(weight_str); + search::Config config(weights, pop_limit); if (threads > 1) { #ifdef WITH_THREADS -- cgit v1.2.3 From 925087356b853e2099c1b60d8b757d7aa02121a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- Jamroot | 45 - Makefile.am | 6 +- bjam | 23 - configure.ac | 5 - decoder/Jamfile | 81 - decoder/decoder.h | 2 +- decoder/hg.h | 4 +- dpmert/Jamfile | 32 - gi/clda/src/Makefile.am | 6 - gi/clda/src/ccrp.h | 291 -- gi/clda/src/clda.cc | 148 - gi/clda/src/crp.h | 50 - gi/clda/src/slice_sampler.h | 191 - gi/clda/src/timer.h | 20 - gi/evaluation/conditional_entropy.py | 61 - gi/evaluation/confusion_matrix.py | 123 - gi/evaluation/entropy.py | 38 - gi/evaluation/extract_ccg_labels.py | 129 - gi/evaluation/tree.py | 485 --- gi/markov_al/Makefile.am | 6 - gi/markov_al/README | 2 - gi/markov_al/ml.cc | 470 --- gi/morf-segmentation/filter_docs.pl | 24 - gi/morf-segmentation/invalid_vocab.patterns | 6 - gi/morf-segmentation/linestripper.py | 40 - gi/morf-segmentation/morf-pipeline.pl | 486 --- gi/morf-segmentation/morfsegment.py | 50 - gi/morf-segmentation/morftrain.sh | 110 - gi/morf-segmentation/vocabextractor.sh | 40 - gi/pf/Makefile.am | 44 - gi/pf/README | 2 - gi/pf/align-lexonly-pyp.cc | 243 -- gi/pf/align-tl.cc | 339 -- gi/pf/backward.cc | 89 - gi/pf/backward.h | 33 - gi/pf/base_distributions.cc | 241 -- gi/pf/base_distributions.h | 238 -- gi/pf/bayes_lattice_score.cc | 309 -- gi/pf/brat.cc | 543 --- gi/pf/cbgi.cc | 330 -- gi/pf/cfg_wfst_composer.cc | 731 ---- gi/pf/cfg_wfst_composer.h | 46 - gi/pf/conditional_pseg.h | 275 -- gi/pf/condnaive.cc | 298 -- gi/pf/corpus.cc | 62 - gi/pf/corpus.h | 19 - gi/pf/dpnaive.cc | 301 -- gi/pf/guess-translits.pl | 72 - gi/pf/hpyp_tm.cc | 133 - gi/pf/hpyp_tm.h | 38 - gi/pf/itg.cc | 275 -- gi/pf/learn_cfg.cc | 428 --- gi/pf/make-freq-bins.pl | 26 - gi/pf/mh_test.cc | 148 - gi/pf/monotonic_pseg.h | 89 - gi/pf/ngram_base.cc | 69 - gi/pf/ngram_base.h | 25 - gi/pf/nuisance_test.cc | 161 - gi/pf/os_phrase.h | 15 - gi/pf/pf.h | 84 - gi/pf/pf_test.cc | 148 - gi/pf/pfbrat.cc | 543 --- gi/pf/pfdist.cc | 598 --- gi/pf/pfdist.new.cc | 620 --- gi/pf/pfnaive.cc | 284 -- gi/pf/poisson_uniform_word_model.h | 50 - gi/pf/pyp_lm.cc | 273 -- gi/pf/pyp_tm.cc | 128 - gi/pf/pyp_tm.h | 36 - gi/pf/pyp_word_model.h | 61 - gi/pf/quasi_model2.h | 177 - gi/pf/reachability.cc | 74 - gi/pf/reachability.h | 34 - gi/pf/tied_resampler.h | 122 - gi/pf/tpf.cc | 99 - gi/pf/transliterations.cc | 334 -- gi/pf/transliterations.h | 24 - gi/pf/unigrams.cc | 80 - gi/pf/unigrams.h | 69 - gi/pipeline/OLD.clsp.config | 9 - gi/pipeline/OLD.evaluation-pipeline.pl | 277 -- gi/pipeline/backoff-pipe.pl | 215 -- gi/pipeline/blacklight.config | 9 - gi/pipeline/clsp.config | 10 - gi/pipeline/evaluation-pipeline.pl | 364 -- gi/pipeline/local-gi-pipeline.pl | 465 --- gi/pipeline/lticluster.config | 9 - gi/pipeline/scripts/filter-by-f.pl | 56 - gi/pipeline/scripts/patch-corpus.pl | 65 - gi/pipeline/scripts/refilter.pl | 40 - gi/pipeline/scripts/rekey.pl | 8 - gi/pipeline/scripts/remove-tags-from-contexts.pl | 53 - gi/pipeline/scripts/remove-tags-from-corpus.pl | 44 - gi/pipeline/scripts/sort-by-key.sh | 5 - gi/pipeline/scripts/xfeats.pl | 39 - gi/pipeline/valhalla.config | 9 - gi/posterior-regularisation/Corpus.java | 167 - gi/posterior-regularisation/Lexicon.java | 32 - .../PhraseContextModel.java | 466 --- gi/posterior-regularisation/README | 3 - gi/posterior-regularisation/alphabet.hh | 61 - gi/posterior-regularisation/canned.concordance | 4 - gi/posterior-regularisation/em.cc | 830 ---- gi/posterior-regularisation/invert.hh | 45 - gi/posterior-regularisation/linesearch.py | 58 - gi/posterior-regularisation/log_add.hh | 30 - gi/posterior-regularisation/prjava.jar | 1 - gi/posterior-regularisation/prjava/Makefile | 8 - gi/posterior-regularisation/prjava/build.xml | 38 - .../prjava/lib/commons-math-2.1.jar | Bin 832410 -> 0 bytes .../prjava/lib/jopt-simple-3.2.jar | Bin 53244 -> 0 bytes .../prjava/lib/trove-2.0.2.jar | Bin 737844 -> 0 bytes gi/posterior-regularisation/prjava/src/arr/F.java | 99 - .../prjava/src/data/Corpus.java | 233 -- .../prjava/src/hmm/HMM.java | 579 --- .../prjava/src/hmm/HMMObjective.java | 351 -- .../prjava/src/hmm/POS.java | 120 - .../prjava/src/io/FileUtil.java | 48 - .../prjava/src/io/SerializedObjects.java | 83 - .../examples/GeneralizedRosenbrock.java | 110 - .../prjava/src/optimization/examples/x2y2.java | 128 - .../optimization/examples/x2y2WithConstraints.java | 127 - .../AbstractGradientBaseMethod.java | 120 - .../gradientBasedMethods/ConjugateGradient.java | 92 - .../gradientBasedMethods/DebugHelpers.java | 65 - .../gradientBasedMethods/GradientDescent.java | 19 - .../optimization/gradientBasedMethods/LBFGS.java | 234 -- .../gradientBasedMethods/Objective.java | 87 - .../gradientBasedMethods/Optimizer.java | 19 - .../ProjectedAbstractGradientBaseMethod.java | 11 - .../ProjectedGradientDescent.java | 154 - .../gradientBasedMethods/ProjectedObjective.java | 29 - .../gradientBasedMethods/ProjectedOptimizer.java | 10 - .../gradientBasedMethods/stats/OptimizerStats.java | 86 - .../stats/ProjectedOptimizerStats.java | 70 - .../linesearch/ArmijoLineSearchMinimization.java | 102 - ...joLineSearchMinimizationAlongProjectionArc.java | 141 - .../DifferentiableLineSearchObjective.java | 185 - .../linesearch/GenericPickFirstStep.java | 20 - .../linesearch/InterpolationPickFirstStep.java | 25 - .../optimization/linesearch/LineSearchMethod.java | 14 - .../NonNewtonInterpolationPickFirstStep.java | 33 - ...ProjectedDifferentiableLineSearchObjective.java | 137 - .../linesearch/WolfRuleLineSearch.java | 300 -- .../optimization/linesearch/WolfeConditions.java | 45 - .../optimization/projections/BoundsProjection.java | 104 - .../src/optimization/projections/Projection.java | 72 - .../projections/SimplexProjection.java | 127 - .../stopCriteria/CompositeStopingCriteria.java | 33 - .../optimization/stopCriteria/GradientL2Norm.java | 30 - .../stopCriteria/NormalizedGradientL2Norm.java | 48 - .../NormalizedProjectedGradientL2Norm.java | 60 - .../stopCriteria/NormalizedValueDifference.java | 54 - .../stopCriteria/ProjectedGradientL2Norm.java | 51 - .../optimization/stopCriteria/StopingCriteria.java | 8 - .../optimization/stopCriteria/ValueDifference.java | 41 - .../src/optimization/util/Interpolation.java | 37 - .../prjava/src/optimization/util/Logger.java | 7 - .../prjava/src/optimization/util/MathUtils.java | 339 -- .../prjava/src/optimization/util/MatrixOutput.java | 28 - .../prjava/src/optimization/util/StaticTools.java | 180 - .../prjava/src/phrase/Agree.java | 204 - .../prjava/src/phrase/Agree2Sides.java | 197 - .../prjava/src/phrase/C2F.java | 216 -- .../prjava/src/phrase/Corpus.java | 288 -- .../prjava/src/phrase/Lexicon.java | 34 - .../prjava/src/phrase/PhraseCluster.java | 540 --- .../prjava/src/phrase/PhraseContextObjective.java | 436 --- .../prjava/src/phrase/PhraseCorpus.java | 193 - .../prjava/src/phrase/PhraseObjective.java | 224 -- .../prjava/src/phrase/Trainer.java | 257 -- .../prjava/src/phrase/VB.java | 419 -- .../prjava/src/test/CorpusTest.java | 60 - .../prjava/src/test/HMMModelStats.java | 105 - .../prjava/src/test/IntDoublePair.java | 23 - .../prjava/src/test/X2y2WithConstraints.java | 131 - .../prjava/src/util/Array.java | 41 - .../prjava/src/util/ArrayMath.java | 186 - .../prjava/src/util/DifferentiableObjective.java | 14 - .../prjava/src/util/DigammaFunction.java | 21 - .../prjava/src/util/FileSystem.java | 21 - .../prjava/src/util/InputOutput.java | 67 - .../prjava/src/util/LogSummer.java | 86 - .../prjava/src/util/MathUtil.java | 148 - .../prjava/src/util/Matrix.java | 16 - .../prjava/src/util/MemoryTracker.java | 47 - .../prjava/src/util/Pair.java | 31 - .../prjava/src/util/Printing.java | 158 - .../prjava/src/util/Sorters.java | 39 - .../prjava/train-PR-cluster.sh | 4 - gi/posterior-regularisation/projected_gradient.cc | 87 - gi/posterior-regularisation/simplex_pg.py | 55 - gi/posterior-regularisation/split-languages.py | 23 - gi/posterior-regularisation/train_pr_agree.py | 400 -- gi/posterior-regularisation/train_pr_global.py | 296 -- gi/posterior-regularisation/train_pr_parallel.py | 333 -- gi/pyp-topics/scripts/contexts2documents.py | 37 - gi/pyp-topics/scripts/extract_contexts.py | 144 - gi/pyp-topics/scripts/extract_contexts_test.py | 72 - gi/pyp-topics/scripts/extract_leaves.py | 49 - gi/pyp-topics/scripts/map-documents.py | 20 - gi/pyp-topics/scripts/map-terms.py | 20 - gi/pyp-topics/scripts/run.sh | 13 - gi/pyp-topics/scripts/score-mkcls.py | 61 - gi/pyp-topics/scripts/score-topics.py | 64 - gi/pyp-topics/scripts/spans2labels.py | 137 - gi/pyp-topics/scripts/tokens2classes.py | 27 - gi/pyp-topics/scripts/topics.py | 20 - gi/pyp-topics/src/Makefile.am | 16 - gi/pyp-topics/src/Makefile.mpi | 26 - gi/pyp-topics/src/clock_gettime_stub.c | 141 - gi/pyp-topics/src/contexts_corpus.cc | 164 - gi/pyp-topics/src/contexts_corpus.hh | 90 - gi/pyp-topics/src/contexts_lexer.h | 22 - gi/pyp-topics/src/contexts_lexer.l | 113 - gi/pyp-topics/src/corpus.cc | 104 - gi/pyp-topics/src/corpus.hh | 133 - gi/pyp-topics/src/gammadist.c | 247 -- gi/pyp-topics/src/gammadist.h | 72 - gi/pyp-topics/src/gzstream.cc | 165 - gi/pyp-topics/src/gzstream.hh | 121 - gi/pyp-topics/src/log_add.h | 30 - gi/pyp-topics/src/macros.Linux | 18 - gi/pyp-topics/src/makefile.darwin | 15 - gi/pyp-topics/src/makefile.depend | 4042 -------------------- gi/pyp-topics/src/mpi-corpus.hh | 69 - gi/pyp-topics/src/mpi-pyp-topics.cc | 466 --- gi/pyp-topics/src/mpi-pyp-topics.hh | 106 - gi/pyp-topics/src/mpi-pyp.hh | 447 --- gi/pyp-topics/src/mpi-train-contexts.cc | 201 - gi/pyp-topics/src/mt19937ar.c | 194 - gi/pyp-topics/src/mt19937ar.h | 44 - gi/pyp-topics/src/pyp-topics.cc | 499 --- gi/pyp-topics/src/pyp-topics.hh | 98 - gi/pyp-topics/src/pyp.hh | 566 --- gi/pyp-topics/src/slice-sampler.h | 192 - gi/pyp-topics/src/timing.h | 37 - gi/pyp-topics/src/train-contexts.cc | 174 - gi/pyp-topics/src/train.cc | 135 - gi/pyp-topics/src/utility.h | 962 ----- gi/pyp-topics/src/workers.hh | 275 -- gi/scripts/buck2utf8.pl | 87 - jam-files/LICENSE_1_0.txt | 23 - jam-files/boost-build/boost-build.jam | 8 - jam-files/boost-build/bootstrap.jam | 18 - jam-files/boost-build/build-system.jam | 1008 ----- jam-files/boost-build/build/__init__.py | 0 jam-files/boost-build/build/ac.jam | 198 - jam-files/boost-build/build/alias.jam | 73 - jam-files/boost-build/build/alias.py | 63 - jam-files/boost-build/build/build-request.jam | 322 -- jam-files/boost-build/build/build_request.py | 216 -- jam-files/boost-build/build/configure.jam | 237 -- jam-files/boost-build/build/configure.py | 164 - jam-files/boost-build/build/engine.py | 172 - jam-files/boost-build/build/errors.py | 127 - jam-files/boost-build/build/feature.jam | 1335 ------- jam-files/boost-build/build/feature.py | 905 ----- jam-files/boost-build/build/generators.jam | 1408 ------- jam-files/boost-build/build/generators.py | 1089 ------ jam-files/boost-build/build/modifiers.jam | 232 -- jam-files/boost-build/build/project.ann.py | 996 ----- jam-files/boost-build/build/project.jam | 1110 ------ jam-files/boost-build/build/project.py | 1120 ------ jam-files/boost-build/build/property-set.jam | 481 --- jam-files/boost-build/build/property.jam | 788 ---- jam-files/boost-build/build/property.py | 593 --- jam-files/boost-build/build/property_set.py | 449 --- jam-files/boost-build/build/readme.txt | 13 - jam-files/boost-build/build/scanner.jam | 153 - jam-files/boost-build/build/scanner.py | 158 - jam-files/boost-build/build/targets.jam | 1659 -------- jam-files/boost-build/build/targets.py | 1401 ------- jam-files/boost-build/build/toolset.jam | 502 --- jam-files/boost-build/build/toolset.py | 398 -- jam-files/boost-build/build/type.jam | 425 -- jam-files/boost-build/build/type.py | 313 -- jam-files/boost-build/build/version.jam | 161 - jam-files/boost-build/build/virtual-target.jam | 1317 ------- jam-files/boost-build/build/virtual_target.py | 1118 ------ jam-files/boost-build/kernel/boost-build.jam | 5 - jam-files/boost-build/kernel/bootstrap.jam | 263 -- jam-files/boost-build/kernel/bootstrap.py | 25 - jam-files/boost-build/kernel/class.jam | 420 -- jam-files/boost-build/kernel/errors.jam | 274 -- jam-files/boost-build/kernel/modules.jam | 354 -- jam-files/boost-build/options/help.jam | 212 - jam-files/boost-build/site-config.jam | 4 - jam-files/boost-build/tools/__init__.py | 0 jam-files/boost-build/tools/acc.jam | 118 - jam-files/boost-build/tools/auto-index.jam | 212 - jam-files/boost-build/tools/bison.jam | 32 - jam-files/boost-build/tools/boostbook-config.jam | 13 - jam-files/boost-build/tools/boostbook.jam | 727 ---- jam-files/boost-build/tools/borland.jam | 220 -- jam-files/boost-build/tools/builtin.jam | 960 ----- jam-files/boost-build/tools/builtin.py | 718 ---- jam-files/boost-build/tools/cast.jam | 91 - jam-files/boost-build/tools/cast.py | 69 - jam-files/boost-build/tools/clang-darwin.jam | 170 - jam-files/boost-build/tools/clang-linux.jam | 196 - jam-files/boost-build/tools/clang.jam | 27 - jam-files/boost-build/tools/common.jam | 994 ----- jam-files/boost-build/tools/common.py | 840 ---- jam-files/boost-build/tools/como-linux.jam | 103 - jam-files/boost-build/tools/como-win.jam | 117 - jam-files/boost-build/tools/como.jam | 29 - jam-files/boost-build/tools/convert.jam | 62 - jam-files/boost-build/tools/cw-config.jam | 34 - jam-files/boost-build/tools/cw.jam | 246 -- jam-files/boost-build/tools/darwin.jam | 568 --- jam-files/boost-build/tools/darwin.py | 57 - jam-files/boost-build/tools/dmc.jam | 134 - jam-files/boost-build/tools/docutils.jam | 84 - jam-files/boost-build/tools/doxproc.py | 859 ----- jam-files/boost-build/tools/doxygen-config.jam | 11 - jam-files/boost-build/tools/doxygen.jam | 776 ---- .../tools/doxygen/windows-paths-check.doxyfile | 3 - .../tools/doxygen/windows-paths-check.hpp | 0 jam-files/boost-build/tools/fop.jam | 69 - jam-files/boost-build/tools/fortran.jam | 55 - jam-files/boost-build/tools/gcc.jam | 1185 ------ jam-files/boost-build/tools/gcc.py | 796 ---- jam-files/boost-build/tools/generate.jam | 108 - jam-files/boost-build/tools/gettext.jam | 230 -- jam-files/boost-build/tools/gfortran.jam | 39 - jam-files/boost-build/tools/hp_cxx.jam | 181 - jam-files/boost-build/tools/hpfortran.jam | 35 - jam-files/boost-build/tools/ifort.jam | 44 - jam-files/boost-build/tools/intel-darwin.jam | 220 -- jam-files/boost-build/tools/intel-linux.jam | 250 -- jam-files/boost-build/tools/intel-win.jam | 184 - jam-files/boost-build/tools/intel.jam | 34 - jam-files/boost-build/tools/lex.jam | 33 - jam-files/boost-build/tools/make.jam | 72 - jam-files/boost-build/tools/make.py | 59 - jam-files/boost-build/tools/mc.jam | 44 - jam-files/boost-build/tools/message.jam | 55 - jam-files/boost-build/tools/message.py | 46 - jam-files/boost-build/tools/midl.jam | 142 - jam-files/boost-build/tools/mipspro.jam | 145 - jam-files/boost-build/tools/mpi.jam | 583 --- jam-files/boost-build/tools/msvc-config.jam | 12 - jam-files/boost-build/tools/msvc.jam | 1392 ------- jam-files/boost-build/tools/notfile.jam | 74 - jam-files/boost-build/tools/notfile.py | 51 - jam-files/boost-build/tools/package.jam | 165 - jam-files/boost-build/tools/package.py | 168 - jam-files/boost-build/tools/pathscale.jam | 168 - jam-files/boost-build/tools/pch.jam | 95 - jam-files/boost-build/tools/pch.py | 83 - jam-files/boost-build/tools/pgi.jam | 147 - jam-files/boost-build/tools/python-config.jam | 27 - jam-files/boost-build/tools/python.jam | 1267 ------ jam-files/boost-build/tools/qcc.jam | 236 -- jam-files/boost-build/tools/qt.jam | 17 - jam-files/boost-build/tools/qt3.jam | 209 - jam-files/boost-build/tools/qt4.jam | 724 ---- jam-files/boost-build/tools/quickbook-config.jam | 44 - jam-files/boost-build/tools/quickbook.jam | 361 -- jam-files/boost-build/tools/rc.jam | 156 - jam-files/boost-build/tools/rc.py | 189 - jam-files/boost-build/tools/stage.jam | 524 --- jam-files/boost-build/tools/stage.py | 350 -- jam-files/boost-build/tools/stlport.jam | 303 -- jam-files/boost-build/tools/sun.jam | 142 - jam-files/boost-build/tools/symlink.jam | 140 - jam-files/boost-build/tools/symlink.py | 112 - jam-files/boost-build/tools/testing-aux.jam | 210 - jam-files/boost-build/tools/testing.jam | 581 --- jam-files/boost-build/tools/testing.py | 342 -- jam-files/boost-build/tools/types/__init__.py | 18 - jam-files/boost-build/tools/types/asm.jam | 4 - jam-files/boost-build/tools/types/asm.py | 13 - jam-files/boost-build/tools/types/cpp.jam | 86 - jam-files/boost-build/tools/types/cpp.py | 10 - jam-files/boost-build/tools/types/exe.jam | 9 - jam-files/boost-build/tools/types/exe.py | 11 - jam-files/boost-build/tools/types/html.jam | 4 - jam-files/boost-build/tools/types/html.py | 10 - jam-files/boost-build/tools/types/lib.jam | 74 - jam-files/boost-build/tools/types/lib.py | 77 - jam-files/boost-build/tools/types/obj.jam | 9 - jam-files/boost-build/tools/types/obj.py | 11 - jam-files/boost-build/tools/types/objc.jam | 26 - jam-files/boost-build/tools/types/preprocessed.jam | 9 - jam-files/boost-build/tools/types/qt.jam | 10 - jam-files/boost-build/tools/types/register.jam | 39 - jam-files/boost-build/tools/types/rsp.jam | 4 - jam-files/boost-build/tools/types/rsp.py | 10 - jam-files/boost-build/tools/unix.jam | 224 -- jam-files/boost-build/tools/unix.py | 150 - jam-files/boost-build/tools/vacpp.jam | 150 - jam-files/boost-build/tools/whale.jam | 116 - jam-files/boost-build/tools/xlf.jam | 39 - jam-files/boost-build/tools/xsltproc-config.jam | 37 - jam-files/boost-build/tools/xsltproc.jam | 194 - jam-files/boost-build/tools/xsltproc/included.xsl | 11 - jam-files/boost-build/tools/xsltproc/test.xml | 2 - jam-files/boost-build/tools/xsltproc/test.xsl | 12 - jam-files/boost-build/tools/zlib.jam | 92 - jam-files/boost-build/user-config.jam | 92 - jam-files/boost-build/util/__init__.py | 136 - jam-files/boost-build/util/assert.jam | 336 -- jam-files/boost-build/util/container.jam | 339 -- jam-files/boost-build/util/doc.jam | 997 ----- jam-files/boost-build/util/indirect.jam | 115 - jam-files/boost-build/util/indirect.py | 15 - jam-files/boost-build/util/logger.py | 46 - jam-files/boost-build/util/numbers.jam | 218 -- jam-files/boost-build/util/option.jam | 109 - jam-files/boost-build/util/option.py | 35 - jam-files/boost-build/util/order.jam | 169 - jam-files/boost-build/util/order.py | 121 - jam-files/boost-build/util/os.jam | 171 - jam-files/boost-build/util/os_j.py | 19 - jam-files/boost-build/util/path.jam | 934 ----- jam-files/boost-build/util/path.py | 904 ----- jam-files/boost-build/util/print.jam | 488 --- jam-files/boost-build/util/regex.jam | 193 - jam-files/boost-build/util/regex.py | 25 - jam-files/boost-build/util/sequence.jam | 335 -- jam-files/boost-build/util/sequence.py | 50 - jam-files/boost-build/util/set.jam | 93 - jam-files/boost-build/util/set.py | 42 - jam-files/boost-build/util/string.jam | 189 - jam-files/boost-build/util/utility.jam | 235 -- jam-files/boost-build/util/utility.py | 155 - jam-files/engine/Jambase | 2473 ------------ jam-files/engine/boost-jam.spec | 64 - jam-files/engine/boost-no-inspect | 1 - jam-files/engine/build.bat | 532 --- jam-files/engine/build.jam | 1070 ------ jam-files/engine/build.sh | 303 -- jam-files/engine/build_vms.com | 105 - jam-files/engine/builtins.c | 2310 ----------- jam-files/engine/builtins.h | 69 - jam-files/engine/bump_version.py | 80 - jam-files/engine/class.c | 141 - jam-files/engine/class.h | 13 - jam-files/engine/command.c | 100 - jam-files/engine/command.h | 61 - jam-files/engine/compile.c | 1424 ------- jam-files/engine/compile.h | 82 - jam-files/engine/debian/changelog | 72 - jam-files/engine/debian/control | 16 - jam-files/engine/debian/copyright | 25 - jam-files/engine/debian/jam.man.sgml | 236 -- jam-files/engine/debian/rules | 73 - jam-files/engine/debug.c | 132 - jam-files/engine/debug.h | 54 - jam-files/engine/execcmd.h | 45 - jam-files/engine/execmac.c | 69 - jam-files/engine/execnt.c | 1296 ------- jam-files/engine/execunix.c | 569 --- jam-files/engine/execvms.c | 161 - jam-files/engine/expand.c | 733 ---- jam-files/engine/expand.h | 14 - jam-files/engine/filemac.c | 175 - jam-files/engine/filent.c | 387 -- jam-files/engine/fileos2.c | 138 - jam-files/engine/filesys.c | 83 - jam-files/engine/filesys.h | 60 - jam-files/engine/fileunix.c | 501 --- jam-files/engine/filevms.c | 327 -- jam-files/engine/frames.c | 22 - jam-files/engine/frames.h | 37 - jam-files/engine/glob.c | 152 - jam-files/engine/hash.c | 459 --- jam-files/engine/hash.h | 25 - jam-files/engine/hcache.c | 434 --- jam-files/engine/hcache.h | 18 - jam-files/engine/hdrmacro.c | 137 - jam-files/engine/hdrmacro.h | 14 - jam-files/engine/headers.c | 203 - jam-files/engine/headers.h | 16 - jam-files/engine/jam.c | 632 --- jam-files/engine/jam.h | 579 --- jam-files/engine/jambase.c | 1691 -------- jam-files/engine/jambase.h | 15 - jam-files/engine/jamgram.c | 1830 --------- jam-files/engine/jamgram.h | 140 - jam-files/engine/jamgram.y | 371 -- jam-files/engine/jamgram.yy | 329 -- jam-files/engine/jamgramtab.h | 44 - jam-files/engine/lists.c | 339 -- jam-files/engine/lists.h | 108 - jam-files/engine/make.c | 814 ---- jam-files/engine/make.h | 41 - jam-files/engine/make1.c | 1145 ------ jam-files/engine/md5.c | 381 -- jam-files/engine/md5.h | 91 - jam-files/engine/mem.c | 75 - jam-files/engine/mem.h | 134 - jam-files/engine/mkjambase.c | 123 - jam-files/engine/modules.c | 168 - jam-files/engine/modules.h | 37 - jam-files/engine/modules/order.c | 144 - jam-files/engine/modules/path.c | 32 - jam-files/engine/modules/property-set.c | 110 - jam-files/engine/modules/readme.txt | 3 - jam-files/engine/modules/regex.c | 96 - jam-files/engine/modules/sequence.c | 42 - jam-files/engine/modules/set.c | 41 - jam-files/engine/native.c | 36 - jam-files/engine/native.h | 34 - jam-files/engine/newstr.c | 174 - jam-files/engine/newstr.h | 14 - jam-files/engine/option.c | 94 - jam-files/engine/option.h | 23 - jam-files/engine/output.c | 125 - jam-files/engine/output.h | 29 - jam-files/engine/parse.c | 132 - jam-files/engine/parse.h | 59 - jam-files/engine/patchlevel.h | 17 - jam-files/engine/pathmac.c | 252 -- jam-files/engine/pathsys.h | 91 - jam-files/engine/pathunix.c | 457 --- jam-files/engine/pathvms.c | 406 -- jam-files/engine/pwd.c | 66 - jam-files/engine/pwd.h | 10 - jam-files/engine/regexp.c | 1328 ------- jam-files/engine/regexp.h | 32 - jam-files/engine/rules.c | 810 ---- jam-files/engine/rules.h | 280 -- jam-files/engine/scan.c | 418 -- jam-files/engine/scan.h | 56 - jam-files/engine/search.c | 223 -- jam-files/engine/search.h | 11 - jam-files/engine/strings.c | 201 - jam-files/engine/strings.h | 34 - jam-files/engine/subst.c | 94 - jam-files/engine/timestamp.c | 226 -- jam-files/engine/timestamp.h | 12 - jam-files/engine/variable.c | 631 --- jam-files/engine/variable.h | 35 - jam-files/engine/w32_getreg.c | 207 - jam-files/engine/yyacc.c | 268 -- jam-files/sanity.jam | 277 -- klm/lm/Jamfile | 14 - klm/util/Jamfile | 10 - mira/Jamfile | 1 - mteval/Jamfile | 8 - mteval/ns_docscorer.cc | 4 +- mteval/ns_docscorer.h | 2 +- phrasinator/Jamfile | 4 - phrasinator/Makefile.am | 14 - phrasinator/README | 16 - phrasinator/gibbs_train_plm.cc | 309 -- phrasinator/gibbs_train_plm.notables.cc | 335 -- phrasinator/train-phrasinator.pl | 89 - rst_parser/Makefile.am | 20 - rst_parser/arc_factored.cc | 151 - rst_parser/arc_factored.h | 124 - rst_parser/arc_factored_marginals.cc | 58 - rst_parser/arc_ff.cc | 183 - rst_parser/arc_ff.h | 28 - rst_parser/dep_training.cc | 76 - rst_parser/dep_training.h | 19 - rst_parser/global_ff.cc | 44 - rst_parser/global_ff.h | 18 - rst_parser/mst_train.cc | 228 -- rst_parser/picojson.h | 979 ----- rst_parser/random_tree.cc | 36 - rst_parser/rst.cc | 82 - rst_parser/rst.h | 21 - rst_parser/rst_parse.cc | 111 - rst_parser/rst_train.cc | 144 - training/Jamfile | 25 - training/liblbfgs/Jamfile | 5 - utils/Jamfile | 32 - utils/Makefile.am | 7 +- utils/ccrp.h | 270 -- utils/ccrp_nt.h | 164 - utils/ccrp_onetable.h | 253 -- utils/crp_table_manager.h | 114 - utils/crp_test.cc | 91 - utils/fast_sparse_vector.h | 2 +- utils/gamma_poisson.h | 33 - utils/mfcr.h | 370 -- utils/mfcr_test.cc | 72 - utils/sampler.h | 2 +- utils/slice_sampler.h | 191 - utils/small_vector.h | 2 +- utils/stringlib.h | 2 +- utils/unigram_pyp_lm.cc | 214 -- 586 files changed, 13 insertions(+), 125976 deletions(-) delete mode 100644 Jamroot delete mode 100755 bjam delete mode 100644 decoder/Jamfile delete mode 100644 dpmert/Jamfile delete mode 100644 gi/clda/src/Makefile.am delete mode 100644 gi/clda/src/ccrp.h delete mode 100644 gi/clda/src/clda.cc delete mode 100644 gi/clda/src/crp.h delete mode 100644 gi/clda/src/slice_sampler.h delete mode 100644 gi/clda/src/timer.h delete mode 100644 gi/evaluation/conditional_entropy.py delete mode 100644 gi/evaluation/confusion_matrix.py delete mode 100644 gi/evaluation/entropy.py delete mode 100644 gi/evaluation/extract_ccg_labels.py delete mode 100644 gi/evaluation/tree.py delete mode 100644 gi/markov_al/Makefile.am delete mode 100644 gi/markov_al/README delete mode 100644 gi/markov_al/ml.cc delete mode 100755 gi/morf-segmentation/filter_docs.pl delete mode 100644 gi/morf-segmentation/invalid_vocab.patterns delete mode 100755 gi/morf-segmentation/linestripper.py delete mode 100755 gi/morf-segmentation/morf-pipeline.pl delete mode 100755 gi/morf-segmentation/morfsegment.py delete mode 100755 gi/morf-segmentation/morftrain.sh delete mode 100755 gi/morf-segmentation/vocabextractor.sh delete mode 100644 gi/pf/Makefile.am delete mode 100644 gi/pf/README delete mode 100644 gi/pf/align-lexonly-pyp.cc delete mode 100644 gi/pf/align-tl.cc delete mode 100644 gi/pf/backward.cc delete mode 100644 gi/pf/backward.h delete mode 100644 gi/pf/base_distributions.cc delete mode 100644 gi/pf/base_distributions.h delete mode 100644 gi/pf/bayes_lattice_score.cc delete mode 100644 gi/pf/brat.cc delete mode 100644 gi/pf/cbgi.cc delete mode 100644 gi/pf/cfg_wfst_composer.cc delete mode 100644 gi/pf/cfg_wfst_composer.h delete mode 100644 gi/pf/conditional_pseg.h delete mode 100644 gi/pf/condnaive.cc delete mode 100644 gi/pf/corpus.cc delete mode 100644 gi/pf/corpus.h delete mode 100644 gi/pf/dpnaive.cc delete mode 100755 gi/pf/guess-translits.pl delete mode 100644 gi/pf/hpyp_tm.cc delete mode 100644 gi/pf/hpyp_tm.h delete mode 100644 gi/pf/itg.cc delete mode 100644 gi/pf/learn_cfg.cc delete mode 100755 gi/pf/make-freq-bins.pl delete mode 100644 gi/pf/mh_test.cc delete mode 100644 gi/pf/monotonic_pseg.h delete mode 100644 gi/pf/ngram_base.cc delete mode 100644 gi/pf/ngram_base.h delete mode 100644 gi/pf/nuisance_test.cc delete mode 100644 gi/pf/os_phrase.h delete mode 100644 gi/pf/pf.h delete mode 100644 gi/pf/pf_test.cc delete mode 100644 gi/pf/pfbrat.cc delete mode 100644 gi/pf/pfdist.cc delete mode 100644 gi/pf/pfdist.new.cc delete mode 100644 gi/pf/pfnaive.cc delete mode 100644 gi/pf/poisson_uniform_word_model.h delete mode 100644 gi/pf/pyp_lm.cc delete mode 100644 gi/pf/pyp_tm.cc delete mode 100644 gi/pf/pyp_tm.h delete mode 100644 gi/pf/pyp_word_model.h delete mode 100644 gi/pf/quasi_model2.h delete mode 100644 gi/pf/reachability.cc delete mode 100644 gi/pf/reachability.h delete mode 100644 gi/pf/tied_resampler.h delete mode 100644 gi/pf/tpf.cc delete mode 100644 gi/pf/transliterations.cc delete mode 100644 gi/pf/transliterations.h delete mode 100644 gi/pf/unigrams.cc delete mode 100644 gi/pf/unigrams.h delete mode 100644 gi/pipeline/OLD.clsp.config delete mode 100755 gi/pipeline/OLD.evaluation-pipeline.pl delete mode 100644 gi/pipeline/backoff-pipe.pl delete mode 100644 gi/pipeline/blacklight.config delete mode 100644 gi/pipeline/clsp.config delete mode 100755 gi/pipeline/evaluation-pipeline.pl delete mode 100755 gi/pipeline/local-gi-pipeline.pl delete mode 100644 gi/pipeline/lticluster.config delete mode 100755 gi/pipeline/scripts/filter-by-f.pl delete mode 100755 gi/pipeline/scripts/patch-corpus.pl delete mode 100755 gi/pipeline/scripts/refilter.pl delete mode 100755 gi/pipeline/scripts/rekey.pl delete mode 100755 gi/pipeline/scripts/remove-tags-from-contexts.pl delete mode 100755 gi/pipeline/scripts/remove-tags-from-corpus.pl delete mode 100755 gi/pipeline/scripts/sort-by-key.sh delete mode 100755 gi/pipeline/scripts/xfeats.pl delete mode 100644 gi/pipeline/valhalla.config delete mode 100644 gi/posterior-regularisation/Corpus.java delete mode 100644 gi/posterior-regularisation/Lexicon.java delete mode 100644 gi/posterior-regularisation/PhraseContextModel.java delete mode 100644 gi/posterior-regularisation/README delete mode 100644 gi/posterior-regularisation/alphabet.hh delete mode 100644 gi/posterior-regularisation/canned.concordance delete mode 100644 gi/posterior-regularisation/em.cc delete mode 100644 gi/posterior-regularisation/invert.hh delete mode 100644 gi/posterior-regularisation/linesearch.py delete mode 100644 gi/posterior-regularisation/log_add.hh delete mode 120000 gi/posterior-regularisation/prjava.jar delete mode 100755 gi/posterior-regularisation/prjava/Makefile delete mode 100644 gi/posterior-regularisation/prjava/build.xml delete mode 100644 gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar delete mode 100644 gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar delete mode 100644 gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar delete mode 100644 gi/posterior-regularisation/prjava/src/arr/F.java delete mode 100644 gi/posterior-regularisation/prjava/src/data/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMM.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/POS.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/FileUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/SerializedObjects.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Logger.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/C2F.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Lexicon.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Trainer.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/VB.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/CorpusTest.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/HMMModelStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/IntDoublePair.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Array.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/ArrayMath.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DigammaFunction.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/FileSystem.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/InputOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/LogSummer.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MathUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Matrix.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MemoryTracker.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Pair.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Printing.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Sorters.java delete mode 100755 gi/posterior-regularisation/prjava/train-PR-cluster.sh delete mode 100644 gi/posterior-regularisation/projected_gradient.cc delete mode 100644 gi/posterior-regularisation/simplex_pg.py delete mode 100755 gi/posterior-regularisation/split-languages.py delete mode 100644 gi/posterior-regularisation/train_pr_agree.py delete mode 100644 gi/posterior-regularisation/train_pr_global.py delete mode 100644 gi/posterior-regularisation/train_pr_parallel.py delete mode 100755 gi/pyp-topics/scripts/contexts2documents.py delete mode 100755 gi/pyp-topics/scripts/extract_contexts.py delete mode 100755 gi/pyp-topics/scripts/extract_contexts_test.py delete mode 100755 gi/pyp-topics/scripts/extract_leaves.py delete mode 100755 gi/pyp-topics/scripts/map-documents.py delete mode 100755 gi/pyp-topics/scripts/map-terms.py delete mode 100644 gi/pyp-topics/scripts/run.sh delete mode 100755 gi/pyp-topics/scripts/score-mkcls.py delete mode 100755 gi/pyp-topics/scripts/score-topics.py delete mode 100755 gi/pyp-topics/scripts/spans2labels.py delete mode 100755 gi/pyp-topics/scripts/tokens2classes.py delete mode 100755 gi/pyp-topics/scripts/topics.py delete mode 100644 gi/pyp-topics/src/Makefile.am delete mode 100644 gi/pyp-topics/src/Makefile.mpi delete mode 100644 gi/pyp-topics/src/clock_gettime_stub.c delete mode 100644 gi/pyp-topics/src/contexts_corpus.cc delete mode 100644 gi/pyp-topics/src/contexts_corpus.hh delete mode 100644 gi/pyp-topics/src/contexts_lexer.h delete mode 100644 gi/pyp-topics/src/contexts_lexer.l delete mode 100644 gi/pyp-topics/src/corpus.cc delete mode 100644 gi/pyp-topics/src/corpus.hh delete mode 100644 gi/pyp-topics/src/gammadist.c delete mode 100644 gi/pyp-topics/src/gammadist.h delete mode 100644 gi/pyp-topics/src/gzstream.cc delete mode 100644 gi/pyp-topics/src/gzstream.hh delete mode 100644 gi/pyp-topics/src/log_add.h delete mode 100644 gi/pyp-topics/src/macros.Linux delete mode 100644 gi/pyp-topics/src/makefile.darwin delete mode 100644 gi/pyp-topics/src/makefile.depend delete mode 100644 gi/pyp-topics/src/mpi-corpus.hh delete mode 100644 gi/pyp-topics/src/mpi-pyp-topics.cc delete mode 100644 gi/pyp-topics/src/mpi-pyp-topics.hh delete mode 100644 gi/pyp-topics/src/mpi-pyp.hh delete mode 100644 gi/pyp-topics/src/mpi-train-contexts.cc delete mode 100644 gi/pyp-topics/src/mt19937ar.c delete mode 100644 gi/pyp-topics/src/mt19937ar.h delete mode 100644 gi/pyp-topics/src/pyp-topics.cc delete mode 100644 gi/pyp-topics/src/pyp-topics.hh delete mode 100644 gi/pyp-topics/src/pyp.hh delete mode 100644 gi/pyp-topics/src/slice-sampler.h delete mode 100644 gi/pyp-topics/src/timing.h delete mode 100644 gi/pyp-topics/src/train-contexts.cc delete mode 100644 gi/pyp-topics/src/train.cc delete mode 100644 gi/pyp-topics/src/utility.h delete mode 100644 gi/pyp-topics/src/workers.hh delete mode 100755 gi/scripts/buck2utf8.pl delete mode 100644 jam-files/LICENSE_1_0.txt delete mode 100644 jam-files/boost-build/boost-build.jam delete mode 100644 jam-files/boost-build/bootstrap.jam delete mode 100644 jam-files/boost-build/build-system.jam delete mode 100644 jam-files/boost-build/build/__init__.py delete mode 100644 jam-files/boost-build/build/ac.jam delete mode 100644 jam-files/boost-build/build/alias.jam delete mode 100644 jam-files/boost-build/build/alias.py delete mode 100644 jam-files/boost-build/build/build-request.jam delete mode 100644 jam-files/boost-build/build/build_request.py delete mode 100644 jam-files/boost-build/build/configure.jam delete mode 100644 jam-files/boost-build/build/configure.py delete mode 100644 jam-files/boost-build/build/engine.py delete mode 100644 jam-files/boost-build/build/errors.py delete mode 100644 jam-files/boost-build/build/feature.jam delete mode 100644 jam-files/boost-build/build/feature.py delete mode 100644 jam-files/boost-build/build/generators.jam delete mode 100644 jam-files/boost-build/build/generators.py delete mode 100644 jam-files/boost-build/build/modifiers.jam delete mode 100644 jam-files/boost-build/build/project.ann.py delete mode 100644 jam-files/boost-build/build/project.jam delete mode 100644 jam-files/boost-build/build/project.py delete mode 100644 jam-files/boost-build/build/property-set.jam delete mode 100644 jam-files/boost-build/build/property.jam delete mode 100644 jam-files/boost-build/build/property.py delete mode 100644 jam-files/boost-build/build/property_set.py delete mode 100644 jam-files/boost-build/build/readme.txt delete mode 100644 jam-files/boost-build/build/scanner.jam delete mode 100644 jam-files/boost-build/build/scanner.py delete mode 100644 jam-files/boost-build/build/targets.jam delete mode 100644 jam-files/boost-build/build/targets.py delete mode 100644 jam-files/boost-build/build/toolset.jam delete mode 100644 jam-files/boost-build/build/toolset.py delete mode 100644 jam-files/boost-build/build/type.jam delete mode 100644 jam-files/boost-build/build/type.py delete mode 100644 jam-files/boost-build/build/version.jam delete mode 100644 jam-files/boost-build/build/virtual-target.jam delete mode 100644 jam-files/boost-build/build/virtual_target.py delete mode 100644 jam-files/boost-build/kernel/boost-build.jam delete mode 100644 jam-files/boost-build/kernel/bootstrap.jam delete mode 100644 jam-files/boost-build/kernel/bootstrap.py delete mode 100644 jam-files/boost-build/kernel/class.jam delete mode 100644 jam-files/boost-build/kernel/errors.jam delete mode 100644 jam-files/boost-build/kernel/modules.jam delete mode 100644 jam-files/boost-build/options/help.jam delete mode 100644 jam-files/boost-build/site-config.jam delete mode 100644 jam-files/boost-build/tools/__init__.py delete mode 100644 jam-files/boost-build/tools/acc.jam delete mode 100644 jam-files/boost-build/tools/auto-index.jam delete mode 100644 jam-files/boost-build/tools/bison.jam delete mode 100644 jam-files/boost-build/tools/boostbook-config.jam delete mode 100644 jam-files/boost-build/tools/boostbook.jam delete mode 100644 jam-files/boost-build/tools/borland.jam delete mode 100644 jam-files/boost-build/tools/builtin.jam delete mode 100644 jam-files/boost-build/tools/builtin.py delete mode 100644 jam-files/boost-build/tools/cast.jam delete mode 100644 jam-files/boost-build/tools/cast.py delete mode 100644 jam-files/boost-build/tools/clang-darwin.jam delete mode 100644 jam-files/boost-build/tools/clang-linux.jam delete mode 100644 jam-files/boost-build/tools/clang.jam delete mode 100644 jam-files/boost-build/tools/common.jam delete mode 100644 jam-files/boost-build/tools/common.py delete mode 100644 jam-files/boost-build/tools/como-linux.jam delete mode 100644 jam-files/boost-build/tools/como-win.jam delete mode 100644 jam-files/boost-build/tools/como.jam delete mode 100644 jam-files/boost-build/tools/convert.jam delete mode 100644 jam-files/boost-build/tools/cw-config.jam delete mode 100644 jam-files/boost-build/tools/cw.jam delete mode 100644 jam-files/boost-build/tools/darwin.jam delete mode 100644 jam-files/boost-build/tools/darwin.py delete mode 100644 jam-files/boost-build/tools/dmc.jam delete mode 100644 jam-files/boost-build/tools/docutils.jam delete mode 100644 jam-files/boost-build/tools/doxproc.py delete mode 100644 jam-files/boost-build/tools/doxygen-config.jam delete mode 100644 jam-files/boost-build/tools/doxygen.jam delete mode 100644 jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile delete mode 100644 jam-files/boost-build/tools/doxygen/windows-paths-check.hpp delete mode 100644 jam-files/boost-build/tools/fop.jam delete mode 100644 jam-files/boost-build/tools/fortran.jam delete mode 100644 jam-files/boost-build/tools/gcc.jam delete mode 100644 jam-files/boost-build/tools/gcc.py delete mode 100644 jam-files/boost-build/tools/generate.jam delete mode 100644 jam-files/boost-build/tools/gettext.jam delete mode 100644 jam-files/boost-build/tools/gfortran.jam delete mode 100644 jam-files/boost-build/tools/hp_cxx.jam delete mode 100644 jam-files/boost-build/tools/hpfortran.jam delete mode 100644 jam-files/boost-build/tools/ifort.jam delete mode 100644 jam-files/boost-build/tools/intel-darwin.jam delete mode 100644 jam-files/boost-build/tools/intel-linux.jam delete mode 100644 jam-files/boost-build/tools/intel-win.jam delete mode 100644 jam-files/boost-build/tools/intel.jam delete mode 100644 jam-files/boost-build/tools/lex.jam delete mode 100644 jam-files/boost-build/tools/make.jam delete mode 100644 jam-files/boost-build/tools/make.py delete mode 100644 jam-files/boost-build/tools/mc.jam delete mode 100644 jam-files/boost-build/tools/message.jam delete mode 100644 jam-files/boost-build/tools/message.py delete mode 100644 jam-files/boost-build/tools/midl.jam delete mode 100644 jam-files/boost-build/tools/mipspro.jam delete mode 100644 jam-files/boost-build/tools/mpi.jam delete mode 100644 jam-files/boost-build/tools/msvc-config.jam delete mode 100644 jam-files/boost-build/tools/msvc.jam delete mode 100644 jam-files/boost-build/tools/notfile.jam delete mode 100644 jam-files/boost-build/tools/notfile.py delete mode 100644 jam-files/boost-build/tools/package.jam delete mode 100644 jam-files/boost-build/tools/package.py delete mode 100644 jam-files/boost-build/tools/pathscale.jam delete mode 100644 jam-files/boost-build/tools/pch.jam delete mode 100644 jam-files/boost-build/tools/pch.py delete mode 100644 jam-files/boost-build/tools/pgi.jam delete mode 100644 jam-files/boost-build/tools/python-config.jam delete mode 100644 jam-files/boost-build/tools/python.jam delete mode 100644 jam-files/boost-build/tools/qcc.jam delete mode 100644 jam-files/boost-build/tools/qt.jam delete mode 100644 jam-files/boost-build/tools/qt3.jam delete mode 100644 jam-files/boost-build/tools/qt4.jam delete mode 100644 jam-files/boost-build/tools/quickbook-config.jam delete mode 100644 jam-files/boost-build/tools/quickbook.jam delete mode 100644 jam-files/boost-build/tools/rc.jam delete mode 100644 jam-files/boost-build/tools/rc.py delete mode 100644 jam-files/boost-build/tools/stage.jam delete mode 100644 jam-files/boost-build/tools/stage.py delete mode 100644 jam-files/boost-build/tools/stlport.jam delete mode 100644 jam-files/boost-build/tools/sun.jam delete mode 100644 jam-files/boost-build/tools/symlink.jam delete mode 100644 jam-files/boost-build/tools/symlink.py delete mode 100644 jam-files/boost-build/tools/testing-aux.jam delete mode 100644 jam-files/boost-build/tools/testing.jam delete mode 100644 jam-files/boost-build/tools/testing.py delete mode 100644 jam-files/boost-build/tools/types/__init__.py delete mode 100644 jam-files/boost-build/tools/types/asm.jam delete mode 100644 jam-files/boost-build/tools/types/asm.py delete mode 100644 jam-files/boost-build/tools/types/cpp.jam delete mode 100644 jam-files/boost-build/tools/types/cpp.py delete mode 100644 jam-files/boost-build/tools/types/exe.jam delete mode 100644 jam-files/boost-build/tools/types/exe.py delete mode 100644 jam-files/boost-build/tools/types/html.jam delete mode 100644 jam-files/boost-build/tools/types/html.py delete mode 100644 jam-files/boost-build/tools/types/lib.jam delete mode 100644 jam-files/boost-build/tools/types/lib.py delete mode 100644 jam-files/boost-build/tools/types/obj.jam delete mode 100644 jam-files/boost-build/tools/types/obj.py delete mode 100644 jam-files/boost-build/tools/types/objc.jam delete mode 100644 jam-files/boost-build/tools/types/preprocessed.jam delete mode 100644 jam-files/boost-build/tools/types/qt.jam delete mode 100644 jam-files/boost-build/tools/types/register.jam delete mode 100644 jam-files/boost-build/tools/types/rsp.jam delete mode 100644 jam-files/boost-build/tools/types/rsp.py delete mode 100644 jam-files/boost-build/tools/unix.jam delete mode 100644 jam-files/boost-build/tools/unix.py delete mode 100644 jam-files/boost-build/tools/vacpp.jam delete mode 100644 jam-files/boost-build/tools/whale.jam delete mode 100644 jam-files/boost-build/tools/xlf.jam delete mode 100644 jam-files/boost-build/tools/xsltproc-config.jam delete mode 100644 jam-files/boost-build/tools/xsltproc.jam delete mode 100644 jam-files/boost-build/tools/xsltproc/included.xsl delete mode 100644 jam-files/boost-build/tools/xsltproc/test.xml delete mode 100644 jam-files/boost-build/tools/xsltproc/test.xsl delete mode 100644 jam-files/boost-build/tools/zlib.jam delete mode 100644 jam-files/boost-build/user-config.jam delete mode 100644 jam-files/boost-build/util/__init__.py delete mode 100644 jam-files/boost-build/util/assert.jam delete mode 100644 jam-files/boost-build/util/container.jam delete mode 100644 jam-files/boost-build/util/doc.jam delete mode 100644 jam-files/boost-build/util/indirect.jam delete mode 100644 jam-files/boost-build/util/indirect.py delete mode 100644 jam-files/boost-build/util/logger.py delete mode 100644 jam-files/boost-build/util/numbers.jam delete mode 100644 jam-files/boost-build/util/option.jam delete mode 100644 jam-files/boost-build/util/option.py delete mode 100644 jam-files/boost-build/util/order.jam delete mode 100644 jam-files/boost-build/util/order.py delete mode 100644 jam-files/boost-build/util/os.jam delete mode 100644 jam-files/boost-build/util/os_j.py delete mode 100644 jam-files/boost-build/util/path.jam delete mode 100644 jam-files/boost-build/util/path.py delete mode 100644 jam-files/boost-build/util/print.jam delete mode 100644 jam-files/boost-build/util/regex.jam delete mode 100644 jam-files/boost-build/util/regex.py delete mode 100644 jam-files/boost-build/util/sequence.jam delete mode 100644 jam-files/boost-build/util/sequence.py delete mode 100644 jam-files/boost-build/util/set.jam delete mode 100644 jam-files/boost-build/util/set.py delete mode 100644 jam-files/boost-build/util/string.jam delete mode 100644 jam-files/boost-build/util/utility.jam delete mode 100644 jam-files/boost-build/util/utility.py delete mode 100644 jam-files/engine/Jambase delete mode 100644 jam-files/engine/boost-jam.spec delete mode 100644 jam-files/engine/boost-no-inspect delete mode 100644 jam-files/engine/build.bat delete mode 100644 jam-files/engine/build.jam delete mode 100755 jam-files/engine/build.sh delete mode 100644 jam-files/engine/build_vms.com delete mode 100644 jam-files/engine/builtins.c delete mode 100644 jam-files/engine/builtins.h delete mode 100644 jam-files/engine/bump_version.py delete mode 100644 jam-files/engine/class.c delete mode 100644 jam-files/engine/class.h delete mode 100644 jam-files/engine/command.c delete mode 100644 jam-files/engine/command.h delete mode 100644 jam-files/engine/compile.c delete mode 100644 jam-files/engine/compile.h delete mode 100644 jam-files/engine/debian/changelog delete mode 100644 jam-files/engine/debian/control delete mode 100644 jam-files/engine/debian/copyright delete mode 100644 jam-files/engine/debian/jam.man.sgml delete mode 100755 jam-files/engine/debian/rules delete mode 100644 jam-files/engine/debug.c delete mode 100644 jam-files/engine/debug.h delete mode 100644 jam-files/engine/execcmd.h delete mode 100644 jam-files/engine/execmac.c delete mode 100644 jam-files/engine/execnt.c delete mode 100644 jam-files/engine/execunix.c delete mode 100644 jam-files/engine/execvms.c delete mode 100644 jam-files/engine/expand.c delete mode 100644 jam-files/engine/expand.h delete mode 100644 jam-files/engine/filemac.c delete mode 100644 jam-files/engine/filent.c delete mode 100644 jam-files/engine/fileos2.c delete mode 100644 jam-files/engine/filesys.c delete mode 100644 jam-files/engine/filesys.h delete mode 100644 jam-files/engine/fileunix.c delete mode 100644 jam-files/engine/filevms.c delete mode 100644 jam-files/engine/frames.c delete mode 100644 jam-files/engine/frames.h delete mode 100644 jam-files/engine/glob.c delete mode 100644 jam-files/engine/hash.c delete mode 100644 jam-files/engine/hash.h delete mode 100644 jam-files/engine/hcache.c delete mode 100644 jam-files/engine/hcache.h delete mode 100644 jam-files/engine/hdrmacro.c delete mode 100644 jam-files/engine/hdrmacro.h delete mode 100644 jam-files/engine/headers.c delete mode 100644 jam-files/engine/headers.h delete mode 100644 jam-files/engine/jam.c delete mode 100644 jam-files/engine/jam.h delete mode 100644 jam-files/engine/jambase.c delete mode 100644 jam-files/engine/jambase.h delete mode 100644 jam-files/engine/jamgram.c delete mode 100644 jam-files/engine/jamgram.h delete mode 100644 jam-files/engine/jamgram.y delete mode 100644 jam-files/engine/jamgram.yy delete mode 100644 jam-files/engine/jamgramtab.h delete mode 100644 jam-files/engine/lists.c delete mode 100644 jam-files/engine/lists.h delete mode 100644 jam-files/engine/make.c delete mode 100644 jam-files/engine/make.h delete mode 100644 jam-files/engine/make1.c delete mode 100644 jam-files/engine/md5.c delete mode 100644 jam-files/engine/md5.h delete mode 100644 jam-files/engine/mem.c delete mode 100644 jam-files/engine/mem.h delete mode 100644 jam-files/engine/mkjambase.c delete mode 100644 jam-files/engine/modules.c delete mode 100644 jam-files/engine/modules.h delete mode 100644 jam-files/engine/modules/order.c delete mode 100644 jam-files/engine/modules/path.c delete mode 100644 jam-files/engine/modules/property-set.c delete mode 100644 jam-files/engine/modules/readme.txt delete mode 100644 jam-files/engine/modules/regex.c delete mode 100644 jam-files/engine/modules/sequence.c delete mode 100644 jam-files/engine/modules/set.c delete mode 100644 jam-files/engine/native.c delete mode 100644 jam-files/engine/native.h delete mode 100644 jam-files/engine/newstr.c delete mode 100644 jam-files/engine/newstr.h delete mode 100644 jam-files/engine/option.c delete mode 100644 jam-files/engine/option.h delete mode 100644 jam-files/engine/output.c delete mode 100644 jam-files/engine/output.h delete mode 100644 jam-files/engine/parse.c delete mode 100644 jam-files/engine/parse.h delete mode 100644 jam-files/engine/patchlevel.h delete mode 100644 jam-files/engine/pathmac.c delete mode 100644 jam-files/engine/pathsys.h delete mode 100644 jam-files/engine/pathunix.c delete mode 100644 jam-files/engine/pathvms.c delete mode 100644 jam-files/engine/pwd.c delete mode 100644 jam-files/engine/pwd.h delete mode 100644 jam-files/engine/regexp.c delete mode 100644 jam-files/engine/regexp.h delete mode 100644 jam-files/engine/rules.c delete mode 100644 jam-files/engine/rules.h delete mode 100644 jam-files/engine/scan.c delete mode 100644 jam-files/engine/scan.h delete mode 100644 jam-files/engine/search.c delete mode 100644 jam-files/engine/search.h delete mode 100644 jam-files/engine/strings.c delete mode 100644 jam-files/engine/strings.h delete mode 100644 jam-files/engine/subst.c delete mode 100644 jam-files/engine/timestamp.c delete mode 100644 jam-files/engine/timestamp.h delete mode 100644 jam-files/engine/variable.c delete mode 100644 jam-files/engine/variable.h delete mode 100644 jam-files/engine/w32_getreg.c delete mode 100644 jam-files/engine/yyacc.c delete mode 100644 jam-files/sanity.jam delete mode 100644 klm/lm/Jamfile delete mode 100644 klm/util/Jamfile delete mode 100644 mira/Jamfile delete mode 100644 mteval/Jamfile delete mode 100644 phrasinator/Jamfile delete mode 100644 phrasinator/Makefile.am delete mode 100644 phrasinator/README delete mode 100644 phrasinator/gibbs_train_plm.cc delete mode 100644 phrasinator/gibbs_train_plm.notables.cc delete mode 100755 phrasinator/train-phrasinator.pl delete mode 100644 rst_parser/Makefile.am delete mode 100644 rst_parser/arc_factored.cc delete mode 100644 rst_parser/arc_factored.h delete mode 100644 rst_parser/arc_factored_marginals.cc delete mode 100644 rst_parser/arc_ff.cc delete mode 100644 rst_parser/arc_ff.h delete mode 100644 rst_parser/dep_training.cc delete mode 100644 rst_parser/dep_training.h delete mode 100644 rst_parser/global_ff.cc delete mode 100644 rst_parser/global_ff.h delete mode 100644 rst_parser/mst_train.cc delete mode 100644 rst_parser/picojson.h delete mode 100644 rst_parser/random_tree.cc delete mode 100644 rst_parser/rst.cc delete mode 100644 rst_parser/rst.h delete mode 100644 rst_parser/rst_parse.cc delete mode 100644 rst_parser/rst_train.cc delete mode 100644 training/Jamfile delete mode 100644 training/liblbfgs/Jamfile delete mode 100644 utils/Jamfile delete mode 100644 utils/ccrp.h delete mode 100644 utils/ccrp_nt.h delete mode 100644 utils/ccrp_onetable.h delete mode 100644 utils/crp_table_manager.h delete mode 100644 utils/crp_test.cc delete mode 100644 utils/gamma_poisson.h delete mode 100644 utils/mfcr.h delete mode 100644 utils/mfcr_test.cc delete mode 100644 utils/slice_sampler.h delete mode 100644 utils/unigram_pyp_lm.cc (limited to 'klm') diff --git a/Jamroot b/Jamroot deleted file mode 100644 index ef426146..00000000 --- a/Jamroot +++ /dev/null @@ -1,45 +0,0 @@ -#cdec compilation with bjam -# -#--with-boost=/usr/include -#--with-google-hash=/usr/include so that $with-google-hash/google/dense_hash_map exists -# -#-a forces the build to run from scratch -#-jN parallelizes just like make -# -#Respects CXXFLAGS, CFLAGS, and LDFLAGS environment variables. - -path-constant TOP : . ; -include $(TOP)/jam-files/sanity.jam ; -boost 104400 ; -external-lib z ; - -with-google-hash = [ option.get "with-google-hash" ] ; -if [ test_header sparsehash/dense_hash_map ] || $(with-google-hash) { - requirements += HAVE_SPARSEHASH $(with-google-hash) ; -} - -if [ test_header cmph.h ] || $(with-cmph) { - requirements += HAVE_CMPH $(with-cmph) ; -} - -if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serialization ] { - requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; -} - -project : requirements $(requirements) darwin:static ; -project : default-build on release ; - -install-bin-libs dpmert//programs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec phrasinator//programs mira//kbest_mira ; - -install perl-scripts : dpmert//dpmert.pl : $(bindir) ; - -build-projects mteval decoder dpmert klm/lm training/liblbfgs ; - -#Compile everything ending with _test.cc into a test and run it. -rule all_tests ( targets * : dependencies : properties * ) { - targets ?= [ glob *_test.cc ] ; - for t in $(targets) { - local base = [ MATCH "^(.*).cc$" : $(t) ] ; - unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework : $(properties) ; - } -} diff --git a/Makefile.am b/Makefile.am index 24aafd63..c0826532 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,7 +7,6 @@ SUBDIRS = \ klm/util \ klm/lm \ decoder \ - phrasinator \ training \ training/liblbfgs \ mira \ @@ -15,10 +14,7 @@ SUBDIRS = \ dpmert \ pro-train \ rampion \ - minrisk \ - gi/pf \ - gi/markov_al \ - rst_parser + minrisk #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/bjam b/bjam deleted file mode 100755 index d1ac8a55..00000000 --- a/bjam +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -set -e -if - bjam="$(which bjam 2>/dev/null)" && #exists - [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true - ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" /dev/null && #bjam in path isn't this script - "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build) - "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough. -then - #Delegate to system bjam - exec "${bjam}" "$@" -fi - -top="$(dirname "$0")" -if [ ! -x "$top"/jam-files/bjam ]; then - pushd "$top/jam-files/engine" - ./build.sh - cp -f bin.*/bjam ../bjam - popd -fi - -export BOOST_BUILD_PATH="$top"/jam-files/boost-build -exec "$top"/jam-files/bjam "$@" diff --git a/configure.ac b/configure.ac index ea9e84fb..07ef9fe1 100644 --- a/configure.ac +++ b/configure.ac @@ -114,7 +114,6 @@ AC_CONFIG_FILES([Makefile]) AC_CONFIG_FILES([utils/Makefile]) AC_CONFIG_FILES([mteval/Makefile]) AC_CONFIG_FILES([decoder/Makefile]) -AC_CONFIG_FILES([phrasinator/Makefile]) AC_CONFIG_FILES([training/Makefile]) AC_CONFIG_FILES([training/liblbfgs/Makefile]) AC_CONFIG_FILES([dpmert/Makefile]) @@ -125,10 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([gi/pyp-topics/src/Makefile]) -AC_CONFIG_FILES([gi/clda/src/Makefile]) -AC_CONFIG_FILES([gi/pf/Makefile]) -AC_CONFIG_FILES([gi/markov_al/Makefile]) AC_CONFIG_FILES([rst_parser/Makefile]) AC_CONFIG_FILES([python/setup.py]) diff --git a/decoder/Jamfile b/decoder/Jamfile deleted file mode 100644 index da02d063..00000000 --- a/decoder/Jamfile +++ /dev/null @@ -1,81 +0,0 @@ -import testing ; -import lex ; -import option ; - -if [ option.get "with-glc" ] { - glc = ff_glc.cc string_util.cc feature-factory.cc ; -} - -lib decoder : - forest_writer.cc - maxtrans_blunsom.cc - cdec_ff.cc - cfg.cc - dwarf.cc - ff_dwarf.cc - rule_lexer.ll - fst_translator.cc - csplit.cc - translator.cc - scfg_translator.cc - hg.cc - hg_io.cc - decoder.cc - hg_intersect.cc - hg_sampler.cc - factored_lexicon_helper.cc - viterbi.cc - lattice.cc - aligner.cc - apply_models.cc - earley_composer.cc - phrasetable_fst.cc - trule.cc - ff.cc - ff_rules.cc - ff_wordset.cc - ff_context.cc - ff_charset.cc - ff_lm.cc - ff_klm.cc - ff_ngrams.cc - ff_spans.cc - ff_ruleshape.cc - ff_wordalign.cc - ff_csplit.cc - ff_tagger.cc - ff_source_syntax.cc - ff_bleu.cc - ff_factory.cc - lexalign.cc - lextrans.cc - tagger.cc - bottom_up_parser.cc - phrasebased_translator.cc - JSON_parser.c - json_parse.cc - grammar.cc - rescore_translator.cc - hg_remove_eps.cc - hg_union.cc - $(glc) - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - : . - : : - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - . - ; - -exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; - -all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : $(TOP)/decoder/test_data ; - -install legacy : cdec - : $(TOP)/cdec EXE on shared:$(TOP)/cdec shared:LIB ; - diff --git a/decoder/decoder.h b/decoder/decoder.h index bef2ff5e..79c7a602 100644 --- a/decoder/decoder.h +++ b/decoder/decoder.h @@ -24,7 +24,7 @@ private: #endif class SentenceMetadata; -struct Hypergraph; +class Hypergraph; struct DecoderImpl; struct DecoderObserver { diff --git a/decoder/hg.h b/decoder/hg.h index 591e98ce..6d67f2fa 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -503,9 +503,9 @@ public: template void visit_edges_topo(V &v) { - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { EdgesVector const& in=nodes_[i].in_edges_; - for (int j=0;j. - : : - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - . - ; - -all_tests [ glob *_test.cc ] : dpmert : $(TOP)/dpmert/test_data ; - -exe sentserver : sentserver.c : multi ; -exe sentclient : sentclient.c ; -exe mr_dpmert_generate_mapper_input : mr_dpmert_generate_mapper_input.cc dpmert ..//boost_program_options ; -exe mr_dpmert_map : mr_dpmert_map.cc dpmert ..//boost_program_options ; -exe mr_dpmert_reduce : mr_dpmert_reduce.cc dpmert ..//boost_program_options ; - -alias programs : sentserver sentclient mr_dpmert_generate_mapper_input mr_dpmert_map mr_dpmert_reduce ; diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am deleted file mode 100644 index cdca1f97..00000000 --- a/gi/clda/src/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = clda - -clda_SOURCES = clda.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/clda/src/ccrp.h b/gi/clda/src/ccrp.h deleted file mode 100644 index a7c2825c..00000000 --- a/gi/clda/src/ccrp.h +++ /dev/null @@ -1,291 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template > -class CCRP { - public: - CCRP(double disc, double conc) : - num_tables_(), - num_customers_(), - discount_(disc), - concentration_(conc), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) : - num_tables_(), - num_customers_(), - discount_(d), - concentration_(c), - discount_prior_alpha_(d_alpha), - discount_prior_beta_(d_beta), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double discount() const { return discount_; } - double concentration() const { return concentration_; } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); - } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_locs_.clear(); - } - - unsigned num_tables(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->second.table_counts_.size(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->total_dish_count_; - } - - // returns +1 or 0 indicating whether a new table was opened - int increment(const Dish& dish, const double& p0, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - bool share_table = false; - if (loc.total_dish_count_) { - const double p_empty = (concentration_ + num_tables_ * discount_) * p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= (*ti - discount_); - if (r <= 0.0) { - ++(*ti); - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } else { - loc.table_counts_.push_back(1u); - ++num_tables_; - } - ++loc.total_dish_count_; - ++num_customers_; - return (share_table ? 0 : 1); - } - - // returns -1 or 0, indicating whether a table was closed - int decrement(const Dish& dish, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - assert(loc.total_dish_count_); - if (loc.total_dish_count_ == 1) { - dish_locs_.erase(dish); - --num_tables_; - --num_customers_; - return -1; - } else { - int delta = 0; - // sample customer to remove UNIFORMLY. that is, do NOT use the discount - // here. if you do, it will introduce (unwanted) bias! - double r = rng->next() * loc.total_dish_count_; - --loc.total_dish_count_; - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= *ti; - if (r <= 0.0) { - if ((--(*ti)) == 0) { - --num_tables_; - delta = -1; - loc.table_counts_.erase(ti); - } - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - --num_customers_; - return delta; - } - } - - double prob(const Dish& dish, const double& p0) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + concentration_; - if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + concentration_); - } else { - return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + concentration_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, concentration_); - } - - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& discount, const double& concentration) const { - double lp = 0.0; - if (has_discount_prior()) - lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - lp += lgamma(concentration) - lgamma(concentration + num_customers_) - + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) - - lgamma(concentration / discount); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const DishLocations& cur = it->second; - for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { - lp += lgamma(*ti - discount) - r; - } - } - } else { - assert(!"not implemented yet"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng) { - assert(has_discount_prior() || has_concentration_prior()); - DiscountResampler dr(*this); - ConcentrationResampler cr(*this); - const int niterations = 10; - double gamma_upper = std::numeric_limits::infinity(); - for (int iter = 0; iter < 5; ++iter) { - if (has_concentration_prior()) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - gamma_upper, 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), - 1.0, 0.0, niterations, 100*niterations); - } - } - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - gamma_upper, 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.concentration_); - } - }; - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(crp_.discount_, proposed_concentration); - } - }; - - struct DishLocations { - DishLocations() : total_dish_count_() {} - unsigned total_dish_count_; // customers at all tables with this dish - std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want - // .size() is the number of tables for this dish - }; - - void Print(std::ostream* out) const { - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; - for (typename std::list::const_iterator i = it->second.table_counts_.begin(); - i != it->second.table_counts_.end(); ++i) { - (*out) << " " << *i; - } - (*out) << std::endl; - } - } - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return dish_locs_.begin(); - } - const_iterator end() const { - return dish_locs_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - std::tr1::unordered_map dish_locs_; - - double discount_; - double concentration_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; - double discount_prior_beta_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc deleted file mode 100644 index f548997f..00000000 --- a/gi/clda/src/clda.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include -#include -#include -#include - -#include "timer.h" -#include "crp.h" -#include "ccrp.h" -#include "sampler.h" -#include "tdict.h" -const size_t MAX_DOC_LEN_CHARS = 10000000; - -using namespace std; - -void ShowTopWordsForTopic(const map& counts) { - multimap ms; - for (map::const_iterator it = counts.begin(); it != counts.end(); ++it) - ms.insert(make_pair(it->second, it->first)); - int cc = 0; - for (multimap::reverse_iterator it = ms.rbegin(); it != ms.rend(); ++it) { - cerr << it->first << ':' << TD::Convert(it->second) << " "; - ++cc; - if (cc==20) break; - } - cerr << endl; -} - -int main(int argc, char** argv) { - if (argc != 3) { - cerr << "Usage: " << argv[0] << " num-classes num-samples\n"; - return 1; - } - const int num_classes = atoi(argv[1]); - const int num_iterations = atoi(argv[2]); - const int burnin_size = num_iterations * 0.9; - if (num_classes < 2) { - cerr << "Must request more than 1 class\n"; - return 1; - } - if (num_iterations < 5) { - cerr << "Must request more than 5 iterations\n"; - return 1; - } - cerr << "CLASSES: " << num_classes << endl; - char* buf = new char[MAX_DOC_LEN_CHARS]; - vector > wji; // w[j][i] - observed word i of doc j - vector > zji; // z[j][i] - topic assignment for word i of doc j - cerr << "READING DOCUMENTS\n"; - while(cin) { - cin.getline(buf, MAX_DOC_LEN_CHARS); - if (buf[0] == 0) continue; - wji.push_back(vector()); - TD::ConvertSentence(buf, &wji.back()); - } - cerr << "READ " << wji.size() << " DOCUMENTS\n"; - MT19937 rng; - cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n"; - zji.resize(wji.size()); - double disc = 0.1; - double beta = 10.0; - double alpha = 50.0; - const double uniform_topic = 1.0 / num_classes; - const double uniform_word = 1.0 / TD::NumWords(); - vector > dr(zji.size(), CCRP(1,1,1,1,disc, beta)); // dr[i] describes the probability of using a topic in document i - vector > wr(num_classes, CCRP(1,1,1,1,disc, alpha)); // wr[k] describes the probability of generating a word in topic k - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - zj.resize(num_words); - for (int i = 0; i < num_words; ++i) { - int random_topic = rng.next() * num_classes; - if (random_topic == num_classes) { --random_topic; } - zj[i] = random_topic; - const int word = wj[i]; - dr[j].increment(random_topic, uniform_topic, &rng); - wr[random_topic].increment(word, uniform_word, &rng); - } - } - cerr << "SAMPLING\n"; - vector > t2w(num_classes); - Timer timer; - SampleSet ss; - ss.resize(num_classes); - double total_time = 0; - for (int iter = 0; iter < num_iterations; ++iter) { - cerr << '.'; - if (iter && iter % 10 == 0) { - total_time += timer.Elapsed(); - timer.Reset(); - double llh = 0; -#if 1 - for (int j = 0; j < dr.size(); ++j) - dr[j].resample_hyperparameters(&rng); - for (int j = 0; j < wr.size(); ++j) - wr[j].resample_hyperparameters(&rng); -#endif - - for (int j = 0; j < dr.size(); ++j) - llh += dr[j].log_crp_prob(); - for (int j = 0; j < wr.size(); ++j) - llh += wr[j].log_crp_prob(); - cerr << " [LLH=" << llh << " I=" << iter << "]\n"; - } - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - for (int i = 0; i < num_words; ++i) { - const int word = wj[i]; - const int cur_topic = zj[i]; - dr[j].decrement(cur_topic, &rng); - wr[cur_topic].decrement(word, &rng); - - for (int k = 0; k < num_classes; ++k) { - ss[k]= dr[j].prob(k, uniform_topic) * wr[k].prob(word, uniform_word); - } - const int new_topic = rng.SelectSample(ss); - dr[j].increment(new_topic, uniform_topic, &rng); - wr[new_topic].increment(word, uniform_word, &rng); - zj[i] = new_topic; - if (iter > burnin_size) { - ++t2w[cur_topic][word]; - } - } - } - } - for (int i = 0; i < num_classes; ++i) { - cerr << "---------------------------------\n"; - cerr << " final PYP(" << wr[i].discount() << "," << wr[i].concentration() << ")\n"; - ShowTopWordsForTopic(t2w[i]); - } - cerr << "-------------\n"; -#if 0 - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - zj.resize(num_words); - for (int i = 0; i < num_words; ++i) { - cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") "; - } - cerr << endl; - } -#endif - return 0; -} - diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h deleted file mode 100644 index 9d35857e..00000000 --- a/gi/clda/src/crp.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _CRP_H_ -#define _CRP_H_ - -// shamelessly adapted from code by Phil Blunsom and Trevor Cohn - -#include -#include - -#include "prob.h" - -template > -class CRP { - public: - CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {} - void increment(const DishType& dish); - void decrement(const DishType& dish); - void erase(const DishType& dish) { - counts_.erase(dish); - } - inline int count(const DishType& dish) const { - const typename MapType::const_iterator i = counts_.find(dish); - if (i == counts_.end()) return 0; else return i->second; - } - inline prob_t prob(const DishType& dish, const prob_t& p0) const { - return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_); - } - private: - typedef std::tr1::unordered_map MapType; - MapType counts_; - const double alpha_; - const prob_t palpha_; - int total_customers_; -}; - -template -void CRP::increment(const Dish& dish) { - ++counts_[dish]; - ++total_customers_; -} - -template -void CRP::decrement(const Dish& dish) { - typename MapType::iterator i = counts_.find(dish); - assert(i != counts_.end()); - if (--i->second == 0) - counts_.erase(i); - --total_customers_; -} - -#endif diff --git a/gi/clda/src/slice_sampler.h b/gi/clda/src/slice_sampler.h deleted file mode 100644 index aa48a169..00000000 --- a/gi/clda/src/slice_sampler.h +++ /dev/null @@ -1,191 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include -#include -#include -#include -#include - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template -struct slice_sampler_rfc_type { - F min_x, max_x; - const Fn& f; - U max_nfeval, nfeval; - slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval) - : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } - - F operator() (F x) { - if (min_x < x && x < max_x) { - assert(++nfeval <= max_nfeval); - F fx = f(x); - assert(std::isfinite(fx)); - return fx; - } - return -std::numeric_limits::infinity(); - } -}; // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template -F slice_sampler1d(const LogF& logF0, //!< log of function to sample - F x, //!< starting point - Uniform01& u01, //!< uniform [0,1) random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - slice_sampler_rfc_type logF(min_x, max_x, logF0, max_nfeval); - - assert(std::isfinite(x)); - - if (w <= 0.0) { // set w to a default width - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); - } - assert(std::isfinite(w)); - - F logFx = logF(x); - for (U sample = 0; sample < nsamples; ++sample) { - F logY = logFx + log(u01()+1e-100); //! slice logFx at this value - assert(std::isfinite(logY)); - - F xl = x - w*u01(); //! lower bound on slice interval - F logFxl = logF(xl); - F xr = xl + w; //! upper bound on slice interval - F logFxr = logF(xr); - - while (logY < logFxl || logY < logFxr) // doubling procedure - if (u01() < 0.5) - logFxl = logF(xl -= xr - xl); - else - logFxr = logF(xr += xr - xl); - - F xl1 = xl; - F xr1 = xr; - while (true) { // shrinking procedure - F x1 = xl1 + u01()*(xr1 - xl1); - if (logY < logF(x1)) { - F xl2 = xl; // acceptance procedure - F xr2 = xr; - bool d = false; - while (xr2 - xl2 > 1.1*w) { - F xm = (xl2 + xr2)/2; - if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) - d = true; - if (x1 < xm) - xr2 = xm; - else - xl2 = xm; - if (d && logY >= logF(xl2) && logY >= logF(xr2)) - goto unacceptable; - } - x = x1; - goto acceptable; - } - goto acceptable; - unacceptable: - if (x1 < x) // rest of shrinking procedure - xl1 = x1; - else - xr1 = x1; - } - acceptable: - w = (4*w + (xr1 - xl1))/5; // update width estimate - } - return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template -F slice_sampler1d(const LogP& logP, //!< log of distribution to sample - F x, //!< initial sample - Uniform01& u01, //!< uniform random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - assert(std::isfinite(x)); - if (w <= 0.0) { - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, 0.1); - } - // TRACE4(x, min_x, max_x, w); - F logPx = logP(x); - assert(std::isfinite(logPx)); - U nfeval = 1; - for (U sample = 0; sample < nsamples; ++sample) { - F x0 = x; - F logU = logPx + log(u01()+1e-100); - assert(std::isfinite(logU)); - F r = u01(); - F xl = std::max(min_x, x - r*w); - F xr = std::min(max_x, x + (1-r)*w); - // TRACE3(x, logPx, logU); - while (xl > min_x && logP(xl) > logU) { - xl -= w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; - assert(nfeval < max_nfeval); - } - xl = std::max(xl, min_x); - while (xr < max_x && logP(xr) > logU) { - xr += w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; - assert(nfeval < max_nfeval); - } - xr = std::min(xr, max_x); - while (true) { - r = u01(); - x = r*xl + (1-r)*xr; - assert(std::isfinite(x)); - logPx = logP(x); - // TRACE4(logPx, x, xl, xr); - assert(std::isfinite(logPx)); - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; - assert(nfeval < max_nfeval); - if (logPx > logU) - break; - else if (x > x0) - xr = x; - else - xl = x; - } - // w = (4*w + (xr-xl))/5; // gradually adjust w - } - // TRACE2(logPx, x); - return x; -} // slice_sampler1d() -*/ - -#endif // SLICE_SAMPLER_H diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h deleted file mode 100644 index 123d9a94..00000000 --- a/gi/clda/src/timer.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _TIMER_STATS_H_ -#define _TIMER_STATS_H_ - -#include - -struct Timer { - Timer() { Reset(); } - void Reset() { - start_t = clock(); - } - double Elapsed() const { - const clock_t end_t = clock(); - const double elapsed = (end_t - start_t) / 1000000.0; - return elapsed; - } - private: - std::clock_t start_t; -}; - -#endif diff --git a/gi/evaluation/conditional_entropy.py b/gi/evaluation/conditional_entropy.py deleted file mode 100644 index 356d3b1d..00000000 --- a/gi/evaluation/conditional_entropy.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input-1 input-2' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - else: - usage() -if len(args) != 2: - usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -# evaluating: H(G | P) = sum_{g,p} p(g,p) log { p(p) / p(g,p) } -# = sum_{g,p} c(g,p)/N { log c(p) - log N - log c(g,p) + log N } -# = 1/N sum_{g,p} c(g,p) { log c(p) - log c(g,p) } -# where G = gold, P = predicted, N = number of events - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): - gparts = gline.split('||| ')[1].split() - pparts = pline.split('||| ')[1].split() - assert len(gparts) == len(pparts) - - for gpart, ppart in zip(gparts, pparts): - gtag = gpart.split(':',1)[1] - ptag = ppart.split(':',1)[1] - - if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: - joint_frequencies.setdefault((gtag, ptag), 0) - joint_frequencies[gtag,ptag] += 1 - - predict_frequencies.setdefault(ptag, 0) - predict_frequencies[ptag] += 1 - - gold_frequencies.setdefault(gtag, 0) - gold_frequencies[gtag] += 1 - - N += 1 - -hg2p = 0 -hp2g = 0 -for (gtag, ptag), cgp in joint_frequencies.items(): - hp2g += cgp * (math.log(predict_frequencies[ptag], 2) - math.log(cgp, 2)) - hg2p += cgp * (math.log(gold_frequencies[gtag], 2) - math.log(cgp, 2)) -hg2p /= N -hp2g /= N - -print 'H(P|G)', hg2p, 'H(G|P)', hp2g, 'VI', hg2p + hp2g diff --git a/gi/evaluation/confusion_matrix.py b/gi/evaluation/confusion_matrix.py deleted file mode 100644 index 2dd7aa47..00000000 --- a/gi/evaluation/confusion_matrix.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] [-p output] [-m] input-1 input-2' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:mp:') -slash_threshold = None -output_fname = None -show_matrix = False -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - elif opt == '-p': - output_fname = arg - elif opt == '-m': - show_matrix = True - else: - usage() -if len(args) != 2 or (not show_matrix and not output_fname): - usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -if output_fname: - try: - import Image, ImageDraw - except ImportError: - print >>sys.stderr, "Error: Python Image Library not available. Did you forget to set your PYTHONPATH environment variable?" - sys.exit(1) - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): - gparts = gline.split('||| ')[1].split() - pparts = pline.split('||| ')[1].split() - assert len(gparts) == len(pparts) - - for gpart, ppart in zip(gparts, pparts): - gtag = gpart.split(':',1)[1] - ptag = ppart.split(':',1)[1] - - if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: - joint_frequencies.setdefault((gtag, ptag), 0) - joint_frequencies[gtag,ptag] += 1 - - predict_frequencies.setdefault(ptag, 0) - predict_frequencies[ptag] += 1 - - gold_frequencies.setdefault(gtag, 0) - gold_frequencies[gtag] += 1 - - N += 1 - -# find top tags -gtags = gold_frequencies.items() -gtags.sort(lambda x,y: x[1]-y[1]) -gtags.reverse() -#gtags = gtags[:50] - -preds = predict_frequencies.items() -preds.sort(lambda x,y: x[1]-y[1]) -preds.reverse() - -if show_matrix: - print '%7s %7s' % ('pred', 'cnt'), - for gtag, gcount in gtags: print '%7s' % gtag, - print - print '=' * 80 - - for ptag, pcount in preds: - print '%7s %7d' % (ptag, pcount), - for gtag, gcount in gtags: - print '%7d' % joint_frequencies.get((gtag, ptag), 0), - print - - print '%7s %7d' % ('total', N), - for gtag, gcount in gtags: print '%7d' % gcount, - print - -if output_fname: - offset=10 - - image = Image.new("RGB", (len(preds), len(gtags)), (255, 255, 255)) - #hsl(hue, saturation%, lightness%) - - # re-sort preds to get a better diagonal - ptags=[] - if True: - ptags = map(lambda (p,c): p, preds) - else: - remaining = set(predict_frequencies.keys()) - for y, (gtag, gcount) in enumerate(gtags): - best = (None, 0) - for ptag in remaining: - #pcount = predict_frequencies[ptag] - p = joint_frequencies.get((gtag, ptag), 0)# / float(pcount) - if p > best[1]: best = (ptag, p) - ptags.append(ptag) - remaining.remove(ptag) - if not remaining: break - - print 'Predicted tag ordering:', ' '.join(ptags) - print 'Gold tag ordering:', ' '.join(map(lambda (t,c): t, gtags)) - - draw = ImageDraw.Draw(image) - for x, ptag in enumerate(ptags): - pcount = predict_frequencies[ptag] - minval = math.log(offset) - maxval = math.log(pcount + offset) - for y, (gtag, gcount) in enumerate(gtags): - f = math.log(offset + joint_frequencies.get((gtag, ptag), 0)) - z = int(240. * (maxval - f) / float(maxval - minval)) - #print x, y, z, f, maxval - draw.point([(x,y)], fill='hsl(%d, 100%%, 50%%)' % z) - del draw - image.save(output_fname) diff --git a/gi/evaluation/entropy.py b/gi/evaluation/entropy.py deleted file mode 100644 index ec1ef502..00000000 --- a/gi/evaluation/entropy.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input file' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - else: - usage() -if len(args) != 1: - usage() - -infile = open(args[0]) -N = 0 -frequencies = {} - -for line in infile: - - for part in line.split('||| ')[1].split(): - tag = part.split(':',1)[1] - - if slash_threshold == None or tag.count('/') + tag.count('\\') <= slash_threshold: - frequencies.setdefault(tag, 0) - frequencies[tag] += 1 - N += 1 - -h = 0 -for tag, c in frequencies.items(): - h -= c * (math.log(c, 2) - math.log(N, 2)) -h /= N - -print 'entropy', h diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py deleted file mode 100644 index e0034648..00000000 --- a/gi/evaluation/extract_ccg_labels.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python - -# -# Takes spans input along with treebank and spits out CG style categories for each span. -# spans = output from CDEC's extools/extractor with --base_phrase_spans option -# treebank = PTB format, one tree per line -# -# Output is in CDEC labelled-span format -# - -import sys, itertools, tree - -tinfile = open(sys.argv[1]) -einfile = open(sys.argv[2]) - -def number_leaves(node, next=0): - left, right = None, None - for child in node.children: - l, r = number_leaves(child, next) - next = max(next, r+1) - if left == None or l < left: - left = l - if right == None or r > right: - right = r - - #print node, left, right, next - if left == None or right == None: - assert not node.children - left = right = next - - node.left = left - node.right = right - - return left, right - -def ancestor(node, indices): - #print node, node.left, node.right, indices - # returns the deepest node covering all the indices - if min(indices) >= node.left and max(indices) <= node.right: - # try the children - for child in node.children: - x = ancestor(child, indices) - if x: return x - return node - else: - return None - -def frontier(node, indices): - #print 'frontier for node', node, 'indices', indices - if node.left > max(indices) or node.right < min(indices): - #print '\toutside' - return [node] - elif node.children: - #print '\tcovering at least part' - ns = [] - for child in node.children: - n = frontier(child, indices) - ns.extend(n) - return ns - else: - return [node] - -def project_heads(node): - #print 'project_heads', node - is_head = node.data.tag.endswith('-HEAD') - if node.children: - found = 0 - for child in node.children: - x = project_heads(child) - if x: - node.data.tag = x - found += 1 - assert found == 1 - elif is_head: - node.data.tag = node.data.tag[:-len('-HEAD')] - - if is_head: - return node.data.tag - else: - return None - -for tline, eline in itertools.izip(tinfile, einfile): - if tline.strip() != '(())': - if tline.startswith('( '): - tline = tline[2:-1].strip() - tr = tree.parse_PST(tline) - if tr != None: - number_leaves(tr) - #project_heads(tr) # assumes Bikel-style head annotation for the input trees - else: - tr = None - - parts = eline.strip().split(" ||| ") - zh, en = parts[:2] - spans = parts[-1] - print '|||', - for span in spans.split(): - sps = span.split(":") - i, j, x, y = map(int, sps[0].split("-")) - - if tr: - a = ancestor(tr, range(x,y)) - try: - fs = frontier(a, range(x,y)) - except: - print >>sys.stderr, "problem with line", tline.strip(), "--", eline.strip() - raise - - #print x, y - #print 'ancestor', a - #print 'frontier', fs - - cat = a.data.tag - for f in fs: - if f.right < x: - cat += '\\' + f.data.tag - else: - break - fs.reverse() - for f in fs: - if f.left >= y: - cat += '/' + f.data.tag - else: - break - else: - cat = 'FAIL' - - print '%d-%d:%s' % (x, y, cat), - print diff --git a/gi/evaluation/tree.py b/gi/evaluation/tree.py deleted file mode 100644 index 702d80b6..00000000 --- a/gi/evaluation/tree.py +++ /dev/null @@ -1,485 +0,0 @@ -import re, sys - -class Symbol: - def __init__(self, nonterm, term=None, var=None): - assert not (term != None and var != None) - self.tag = nonterm - self.token = term - self.variable = var - - def is_variable(self): - return self.variable != None - - def __eq__(self, other): - return self.tag == other.tag and self.token == other.token and self.variable == other.variable - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.tag, self.token, self.variable)) - - def __repr__(self): - return str(self) - - def __cmp__(self, other): - return cmp((self.tag, self.token, self.variable), - (other.tag, other.token, other.variable)) - - def __str__(self): - parts = [] - if False: # DEPENDENCY - if self.token: - parts.append(str(self.token)) - elif self.variable != None: - parts.append('#%d' % self.variable) - if self.tag: - parts.append(str(self.tag)) - return '/'.join(parts) - else: - if self.tag: - parts.append(str(self.tag)) - if self.token: - parts.append(str(self.token)) - elif self.variable != None: - parts.append('#%d' % self.variable) - return ' '.join(parts) - -class TreeNode: - def __init__(self, data, children=None, order=-1): - self.data = data - self.children = [] - self.order = order - self.parent = None - if children: self.children = children - - def insert(self, child): - self.children.append(child) - child.parent = self - - def leaves(self): - ls = [] - for node in self.xtraversal(): - if not node.children: - ls.append(node.data) - return ls - - def leaf_nodes(self): - ls = [] - for node in self.xtraversal(): - if not node.children: - ls.append(node) - return ls - - def max_depth(self): - d = 1 - for child in self.children: - d = max(d, 1 + child.max_depth()) - if not self.children and self.data.token: - d = 2 - return d - - def max_width(self): - w = 0 - for child in self.children: - w += child.max_width() - return max(1, w) - - def num_internal_nodes(self): - if self.children: - n = 1 - for child in self.children: - n += child.num_internal_nodes() - return n - elif self.data.token: - return 1 - else: - return 0 - - def postorder_traversal(self, visit): - """ - Postorder traversal; no guarantee that terminals will be read in the - correct order for dep. trees. - """ - for child in self.children: - child.traversal(visit) - visit(self) - - def traversal(self, visit): - """ - Preorder for phrase structure trees, and inorder for dependency trees. - In both cases the terminals will be read off in the correct order. - """ - visited_self = False - if self.order <= 0: - visited_self = True - visit(self) - - for i, child in enumerate(self.children): - child.traversal(visit) - if i + 1 == self.order: - visited_self = True - visit(self) - - assert visited_self - - def xpostorder_traversal(self): - for child in self.children: - for node in child.xpostorder_traversal(): - yield node - yield self - - def xtraversal(self): - visited_self = False - if self.order <= 0: - visited_self = True - yield self - - for i, child in enumerate(self.children): - for d in child.xtraversal(): - yield d - - if i + 1 == self.order: - visited_self = True - yield self - - assert visited_self - - def xpostorder_traversal(self): - for i, child in enumerate(self.children): - for d in child.xpostorder_traversal(): - yield d - yield self - - def edges(self): - es = [] - self.traverse_edges(lambda h,c: es.append((h,c))) - return es - - def traverse_edges(self, visit): - for child in self.children: - visit(self.data, child.data) - child.traverse_edges(visit) - - def subtrees(self, include_self=False): - st = [] - if include_self: - stack = [self] - else: - stack = self.children[:] - - while stack: - node = stack.pop() - st.append(node) - stack.extend(node.children) - return st - - def find_parent(self, node): - try: - index = self.children.index(node) - return self, index - except ValueError: - for child in self.children: - if isinstance(child, TreeNode): - r = child.find_parent(node) - if r: return r - return None - - def is_ancestor_of(self, node): - if self == node: - return True - for child in self.children: - if child.is_ancestor_of(child): - return True - return False - - def find(self, node): - if self == node: - return self - for child in self.children: - if isinstance(child, TreeNode): - r = child.find(node) - if r: return r - else: - if child == node: - return r - return None - - def equals_ignorecase(self, other): - if not isinstance(other, TreeNode): - return False - if self.data != other.data: - return False - if len(self.children) != len(other.children): - return False - for mc, oc in zip(self.children, other.children): - if isinstance(mc, TreeNode): - if not mc.equals_ignorecase(oc): - return False - else: - if mc.lower() != oc.lower(): - return False - return True - - def node_number(self, numbering, next=0): - if self.order <= 0: - numbering[id(self)] = next - next += 1 - - for i, child in enumerate(self.children): - next = child.node_number(numbering, next) - if i + 1 == self.order: - numbering[id(self)] = next - next += 1 - - return next - - def display_conll(self, out): - numbering = {} - self.node_number(numbering) - next = 0 - self.children[0].traversal(lambda x: \ - out.write('%d\t%s\t%s\t%s\t%s\t_\t%d\tLAB\n' \ - % (numbering[id(x)], x.data.token, x.data.token, - x.data.tag, x.data.tag, numbering[id(x.parent)]))) - out.write('\n') - - def size(self): - sz = 1 - for child in self.children: - sz += child.size() - return sz - - def __eq__(self, other): - if isinstance(other, TreeNode) and self.data == other.data \ - and self.children == other.children: - return True - return False - - def __cmp__(self, other): - if not isinstance(other, TreeNode): return 1 - n = cmp(self.data, other.data) - if n != 0: return n - n = len(self.children) - len(other.children) - if n != 0: return n - for sc, oc in zip(self.children, other.children): - n = cmp(sc, oc) - if n != 0: return n - return 0 - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((self.data, tuple(self.children))) - - def __repr__(self): - return str(self) - - def __str__(self): - s = '(' - space = False - if self.order <= 0: - s += str(self.data) - space = True - for i, child in enumerate(self.children): - if space: s += ' ' - s += str(child) - space = True - if i+1 == self.order: - s += ' ' + str(self.data) - return s + ')' - -def read_PSTs(fname): - infile = open(fname) - trees = [] - for line in infile: - trees.append(parse_PST(line.strip())) - infile.close() - return trees - -def parse_PST_multiline(infile, hash_is_var=True): - buf = '' - num_open = 0 - while True: - line = infile.readline() - if not line: - return None - buf += ' ' + line.rstrip() - num_open += line.count('(') - line.count(')') - if num_open == 0: - break - - return parse_PST(buf, hash_is_var) - -def parse_PST(line, hash_is_var=True): - line = line.rstrip() - if not line or line.lower() == 'null': - return None - - # allow either (a/DT) or (DT a) - #parts_re = re.compile(r'(\(*)([^/)]*)(?:/([^)]*))?(\)*)$') - - # only allow (DT a) - parts_re = re.compile(r'(\(*)([^)]*)(\)*)$') - - root = TreeNode(Symbol('TOP')) - stack = [root] - for part in line.rstrip().split(): - m = parts_re.match(part) - #opening, tok_or_tag, tag, closing = m.groups() - opening, tok_or_tag, closing = m.groups() - tag = None - #print 'token', part, 'bits', m.groups() - for i in opening: - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if tag: - stack[-1].data.tag = tag - if hash_is_var and tok_or_tag.startswith('#'): - stack[-1].data.variable = int(tok_or_tag[1:]) - else: - stack[-1].data.token = tok_or_tag - else: - if stack[-1].data.tag == None: - stack[-1].data.tag = tok_or_tag - else: - if hash_is_var and tok_or_tag.startswith('#'): - try: - stack[-1].data.variable = int(tok_or_tag[1:]) - except ValueError: # it's really a token! - #print >>sys.stderr, 'Warning: # used for token:', tok_or_tag - stack[-1].data.token = tok_or_tag - else: - stack[-1].data.token = tok_or_tag - - for i in closing: - stack.pop() - - #assert str(root.children[0]) == line - return root.children[0] - -def read_DTs(fname): - infile = open(fname) - trees = [] - while True: - t = parse_DT(infile) - if t: trees.append(t) - else: break - infile.close() - return trees - -def read_bracketed_DTs(fname): - infile = open(fname) - trees = [] - for line in infile: - trees.append(parse_bracketed_DT(line)) - infile.close() - return trees - -def parse_DT(infile): - tokens = [Symbol('ROOT')] - children = {} - - for line in infile: - parts = line.rstrip().split() - #print parts - if not parts: break - index = len(tokens) - token = parts[1] - tag = parts[3] - parent = int(parts[6]) - if token.startswith('#'): - tokens.append(Symbol(tag, var=int(token[1:]))) - else: - tokens.append(Symbol(tag, token)) - children.setdefault(parent, set()).add(index) - - if len(tokens) == 1: return None - - root = TreeNode(Symbol('ROOT'), [], 0) - schedule = [] - for child in sorted(children[0]): - schedule.append((root, child)) - - while schedule: - parent, index = schedule[0] - del schedule[0] - - node = TreeNode(tokens[index]) - node.order = 0 - parent.insert(node) - - for child in sorted(children.get(index, [])): - schedule.append((node, child)) - if child < index: - node.order += 1 - - return root - -_bracket_split_re = re.compile(r'([(]*)([^)/]*)(?:/([^)]*))?([)]*)') - -def parse_bracketed_DT(line, insert_root=True): - line = line.rstrip() - if not line or line == 'NULL': return None - #print line - - root = TreeNode(Symbol('ROOT')) - stack = [root] - for part in line.rstrip().split(): - m = _bracket_split_re.match(part) - - for c in m.group(1): - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if m.group(3) != None: - if m.group(2).startswith('#'): - stack[-1].data.variable = int(m.group(2)[1:]) - else: - stack[-1].data.token = m.group(2) - stack[-1].data.tag = m.group(3) - else: - stack[-1].data.tag = m.group(2) - stack[-1].order = len(stack[-1].children) - # FIXME: also check for vars - - for c in m.group(4): - stack.pop() - - assert len(stack) == 1 - if not insert_root or root.children[0].data.tag == 'ROOT': - return root.children[0] - else: - return root - -_bracket_split_notag_re = re.compile(r'([(]*)([^)/]*)([)]*)') - -def parse_bracketed_untagged_DT(line): - line = line.rstrip() - if not line or line == 'NULL': return None - - root = TreeNode(Symbol('TOP')) - stack = [root] - for part in line.rstrip().split(): - m = _bracket_split_notag_re.match(part) - - for c in m.group(1): - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if stack[-1].data.token == None: - stack[-1].data.token = m.group(2) - stack[-1].order = len(stack[-1].children) - else: - child = TreeNode(Symbol(nonterm=None, term=m.group(2))) - stack[-1].insert(child) - - for c in m.group(3): - stack.pop() - - return root.children[0] diff --git a/gi/markov_al/Makefile.am b/gi/markov_al/Makefile.am deleted file mode 100644 index fe3e3349..00000000 --- a/gi/markov_al/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = ml - -ml_SOURCES = ml.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/markov_al/README b/gi/markov_al/README deleted file mode 100644 index 9c10f7cd..00000000 --- a/gi/markov_al/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental translation models with Markovian dependencies. - diff --git a/gi/markov_al/ml.cc b/gi/markov_al/ml.cc deleted file mode 100644 index 1e71edd6..00000000 --- a/gi/markov_al/ml.cc +++ /dev/null @@ -1,470 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#include "tdict.h" -#include "filelib.h" -#include "sampler.h" -#include "ccrp_onetable.h" -#include "array2d.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -void PrintTopCustomers(const CCRP_OneTable& crp) { - for (CCRP_OneTable::const_iterator it = crp.begin(); it != crp.end(); ++it) { - cerr << " " << TD::Convert(it->first) << " = " << it->second << endl; - } -} - -void PrintAlignment(const vector& src, const vector& trg, const vector& a) { - cerr << TD::GetString(src) << endl << TD::GetString(trg) << endl; - Array2D al(src.size(), trg.size()); - for (int i = 0; i < a.size(); ++i) - if (a[i] != 255) al(a[i], i) = true; - cerr << al << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct Unigram; -struct Bigram { - Bigram() : trg(), cond() {} - Bigram(WordID prev, WordID cur, WordID t) : trg(t) { cond.first = prev; cond.second = cur; } - const pair& ConditioningPair() const { - return cond; - } - WordID& prev_src() { return cond.first; } - WordID& cur_src() { return cond.second; } - const WordID& prev_src() const { return cond.first; } - const WordID& cur_src() const { return cond.second; } - WordID trg; - private: - pair cond; -}; - -struct Unigram { - Unigram() : cur_src(), trg() {} - Unigram(WordID s, WordID t) : cur_src(s), trg(t) {} - WordID cur_src; - WordID trg; -}; - -ostream& operator<<(ostream& os, const Bigram& b) { - os << "( " << TD::Convert(b.trg) << " | " << TD::Convert(b.prev_src()) << " , " << TD::Convert(b.cur_src()) << " )"; - return os; -} - -ostream& operator<<(ostream& os, const Unigram& u) { - os << "( " << TD::Convert(u.trg) << " | " << TD::Convert(u.cur_src) << " )"; - return os; -} - -bool operator==(const Bigram& a, const Bigram& b) { - return a.trg == b.trg && a.cur_src() == b.cur_src() && a.prev_src() == b.prev_src(); -} - -bool operator==(const Unigram& a, const Unigram& b) { - return a.trg == b.trg && a.cur_src == b.cur_src; -} - -size_t hash_value(const Bigram& b) { - size_t h = boost::hash_value(b.prev_src()); - boost::hash_combine(h, boost::hash_value(b.cur_src())); - boost::hash_combine(h, boost::hash_value(b.trg)); - return h; -} - -size_t hash_value(const Unigram& u) { - size_t h = boost::hash_value(u.cur_src); - boost::hash_combine(h, boost::hash_value(u.trg)); - return h; -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UnigramModel { - UnigramModel(size_t src_voc_size, size_t trg_voc_size) : - unigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), - p0(1.0 / trg_voc_size) {} - - void increment(const Bigram& b) { - unigrams[b.cur_src()].increment(b.trg); - } - - void decrement(const Bigram& b) { - unigrams[b.cur_src()].decrement(b.trg); - } - - double prob(const Bigram& b) const { - const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); - return q0; - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < unigrams.size(); ++i) { - const CCRP_OneTable& crp = unigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - return llh; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < unigrams.size(); ++i) - unigrams[i].resample_hyperparameters(rng); - } - - vector > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src) - - const double p0; -}; - -struct BigramModel { - BigramModel(size_t src_voc_size, size_t trg_voc_size) : - unigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), - p0(1.0 / trg_voc_size) {} - - void increment(const Bigram& b) { - BigramMap::iterator it = bigrams.find(b.ConditioningPair()); - if (it == bigrams.end()) { - it = bigrams.insert(make_pair(b.ConditioningPair(), CCRP_OneTable(1,1,1,1))).first; - } - if (it->second.increment(b.trg)) - unigrams[b.cur_src()].increment(b.trg); - } - - void decrement(const Bigram& b) { - BigramMap::iterator it = bigrams.find(b.ConditioningPair()); - assert(it != bigrams.end()); - if (it->second.decrement(b.trg)) { - unigrams[b.cur_src()].decrement(b.trg); - if (it->second.num_customers() == 0) - bigrams.erase(it); - } - } - - double prob(const Bigram& b) const { - const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); - const BigramMap::const_iterator it = bigrams.find(b.ConditioningPair()); - if (it == bigrams.end()) return q0; - return it->second.prob(b.trg, q0); - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < unigrams.size(); ++i) { - const CCRP_OneTable& crp = unigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - for (BigramMap::const_iterator it = bigrams.begin(); it != bigrams.end(); ++it) { - const CCRP_OneTable& crp = it->second; - const WordID cur_src = it->first.second; - llh += crp.log_crp_prob(); - for (CCRP_OneTable::const_iterator bit = crp.begin(); bit != crp.end(); ++bit) { - llh += log(unigrams[cur_src].prob(bit->second, p0)); - } - } - return llh; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < unigrams.size(); ++i) - unigrams[i].resample_hyperparameters(rng); - for (BigramMap::iterator it = bigrams.begin(); it != bigrams.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - typedef unordered_map, CCRP_OneTable, boost::hash > > BigramMap; - BigramMap bigrams; // bigrams[(src-1,src)].prob(trg, q0) = p(trg|src,src-1) - vector > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src) - - const double p0; -}; - -struct BigramAlignmentModel { - BigramAlignmentModel(size_t src_voc_size, size_t trg_voc_size) : bigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), p0(1.0 / src_voc_size) {} - void increment(WordID prev, WordID next) { - bigrams[prev].increment(next); // hierarchy? - } - void decrement(WordID prev, WordID next) { - bigrams[prev].decrement(next); // hierarchy? - } - double prob(WordID prev, WordID next) { - return bigrams[prev].prob(next, p0); - } - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < bigrams.size(); ++i) { - const CCRP_OneTable& crp = bigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - return llh; - } - - vector > bigrams; // bigrams[prev].prob(next, p0) = p(next|prev) - const double p0; -}; - -struct Alignment { - vector a; -}; - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned samples = conf["samples"].as(); - - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - const size_t corpus_len = corpusf.size(); - const WordID kNULL = TD::Convert(""); - const WordID kBOS = TD::Convert(""); - const WordID kEOS = TD::Convert(""); - Bigram TT(kBOS, TD::Convert("我"), TD::Convert("i")); - Bigram TT2(kBOS, TD::Convert("è¦"), TD::Convert("i")); - - UnigramModel model(vocabf.size(), vocabe.size()); - vector alignments(corpus_len); - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - alg.resize(trg.size()); - int lenp1 = src.size() + 1; - WordID prev_src = kBOS; - for (int j = 0; j < trg.size(); ++j) { - int samp = lenp1 * rng.next(); - --samp; - if (samp < 0) samp = 255; - alg[j] = samp; - WordID cur_src = (samp == 255 ? kNULL : src[alg[j]]); - Bigram b(prev_src, cur_src, trg[j]); - model.increment(b); - prev_src = cur_src; - } - Bigram b(prev_src, kEOS, kEOS); - model.increment(b); - } - cerr << "Initial LLH: " << model.LogLikelihood() << endl; - - SampleSet ss; - for (unsigned si = 0; si < 50; ++si) { - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (unsigned j = 0; j < trg.size(); ++j) { - unsigned char& a_j = alg[j]; - WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - Bigram b(prev_src, cur_e_a_j, trg[j]); - //cerr << "DEC: " << b << "\t" << nextb << endl; - model.decrement(b); - ss.clear(); - for (unsigned i = 0; i <= src.size(); ++i) { - const WordID cur_src = (i ? src[i-1] : kNULL); - b.cur_src() = cur_src; - ss.add(model.prob(b)); - } - int sampled_a_j = rng.SelectSample(ss); - a_j = (sampled_a_j ? sampled_a_j - 1 : 255); - cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - b.cur_src() = cur_e_a_j; - //cerr << "INC: " << b << "\t" << nextb << endl; - model.increment(b); - prev_src = cur_e_a_j; - } - } - cerr << '.' << flush; - if (si % 10 == 9) { - cerr << "[LLH prev=" << model.LogLikelihood(); - //model.ResampleHyperparameters(&rng); - cerr << " new=" << model.LogLikelihood() << "]\n"; - //pair xx = make_pair(kBOS, TD::Convert("我")); - //PrintTopCustomers(model.bigrams.find(xx)->second); - cerr << "p(" << TT << ") = " << model.prob(TT) << endl; - cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; - PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); - } - } - { - // MODEL 2 - BigramModel model(vocabf.size(), vocabe.size()); - BigramAlignmentModel amodel(vocabf.size(), vocabe.size()); - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (int j = 0; j < trg.size(); ++j) { - WordID cur_src = (alg[j] == 255 ? kNULL : src[alg[j]]); - Bigram b(prev_src, cur_src, trg[j]); - model.increment(b); - amodel.increment(prev_src, cur_src); - prev_src = cur_src; - } - amodel.increment(prev_src, kEOS); - Bigram b(prev_src, kEOS, kEOS); - model.increment(b); - } - cerr << "Initial LLH: " << model.LogLikelihood() << " " << amodel.LogLikelihood() << endl; - - SampleSet ss; - for (unsigned si = 0; si < samples; ++si) { - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (unsigned j = 0; j < trg.size(); ++j) { - unsigned char& a_j = alg[j]; - WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - Bigram b(prev_src, cur_e_a_j, trg[j]); - WordID next_src = kEOS; - WordID next_trg = kEOS; - if (j < (trg.size() - 1)) { - next_src = (alg[j+1] == 255 ? kNULL : src[alg[j + 1]]); - next_trg = trg[j + 1]; - } - Bigram nextb(cur_e_a_j, next_src, next_trg); - //cerr << "DEC: " << b << "\t" << nextb << endl; - model.decrement(b); - model.decrement(nextb); - amodel.decrement(prev_src, cur_e_a_j); - amodel.decrement(cur_e_a_j, next_src); - ss.clear(); - for (unsigned i = 0; i <= src.size(); ++i) { - const WordID cur_src = (i ? src[i-1] : kNULL); - b.cur_src() = cur_src; - ss.add(model.prob(b) * model.prob(nextb) * amodel.prob(prev_src, cur_src) * amodel.prob(cur_src, next_src)); - //cerr << log(ss[ss.size() - 1]) << "\t" << b << endl; - } - int sampled_a_j = rng.SelectSample(ss); - a_j = (sampled_a_j ? sampled_a_j - 1 : 255); - cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - b.cur_src() = cur_e_a_j; - nextb.prev_src() = cur_e_a_j; - //cerr << "INC: " << b << "\t" << nextb << endl; - //exit(1); - model.increment(b); - model.increment(nextb); - amodel.increment(prev_src, cur_e_a_j); - amodel.increment(cur_e_a_j, next_src); - prev_src = cur_e_a_j; - } - } - cerr << '.' << flush; - if (si % 10 == 9) { - cerr << "[LLH prev=" << (model.LogLikelihood() + amodel.LogLikelihood()); - //model.ResampleHyperparameters(&rng); - cerr << " new=" << model.LogLikelihood() << "]\n"; - pair xx = make_pair(kBOS, TD::Convert("我")); - cerr << "p(" << TT << ") = " << model.prob(TT) << endl; - cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; - pair xx2 = make_pair(kBOS, TD::Convert("è¦")); - PrintTopCustomers(model.bigrams.find(xx)->second); - //PrintTopCustomers(amodel.bigrams[TD::Convert("")]); - //PrintTopCustomers(model.unigrams[TD::Convert("")]); - PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); - } - } - } - return 0; -} - diff --git a/gi/morf-segmentation/filter_docs.pl b/gi/morf-segmentation/filter_docs.pl deleted file mode 100755 index a78575da..00000000 --- a/gi/morf-segmentation/filter_docs.pl +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/perl - -#Filters the phrase&cluster document set to retain only documents that correspond to words or morphs, i.e. not crossing word boundaries. - -#Usage: filter_docs.pl [mark] -# STDIN: data in the doc.txt format (i.e. phrase\t blahblah ), most likely from cdec extractor -# STDOUT: the matching subset, same format - -use utf8; -my $letter=qr/\p{L}\p{M}*/; # see http://www.regular-expressions.info/unicode.html - -my $morph=qr/$letter+/; - -my $m = "##"; # marker used to indicate morphemes -if ((scalar @ARGV) >= 1) { - $m = $ARGV[0]; - shift; -} -print STDERR "Using $m to filter for morphemes\n"; - -my $expr = qr/^($morph\Q$m\E)? ?(\Q$m\E$morph\Q$m\E)* ?(\Q$m\E$morph)?\t/; #\Q and \E bounded sections are escaped -while(<>) { - /$expr/ && print; -} diff --git a/gi/morf-segmentation/invalid_vocab.patterns b/gi/morf-segmentation/invalid_vocab.patterns deleted file mode 100644 index 473ce1b1..00000000 --- a/gi/morf-segmentation/invalid_vocab.patterns +++ /dev/null @@ -1,6 +0,0 @@ -[[:digit:]] -[] !"#$%&()*+,./:;<=>?@[\^_`{|}~] -^'$ --$ -^- -^$ diff --git a/gi/morf-segmentation/linestripper.py b/gi/morf-segmentation/linestripper.py deleted file mode 100755 index 04e9044a..00000000 --- a/gi/morf-segmentation/linestripper.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/python - -import sys - -#linestripper file file maxlen [numlines] - -if len(sys.argv) < 3: - print "linestripper file1 file2 maxlen [numlines]" - print " outputs subset of file1 to stdout, ..of file2 to stderr" - sys.exit(1) - - -f1 = open(sys.argv[1],'r') -f2 = open(sys.argv[2],'r') - -maxlen=int(sys.argv[3]) -numlines = 0 - -if len(sys.argv) > 4: - numlines = int(sys.argv[4]) - -count=0 -for line1 in f1: - line2 = f2.readline() - - w1 = len(line1.strip().split()) - w2 = len(line2.strip().split()) - - if w1 <= maxlen and w2 <= maxlen: - count = count + 1 - sys.stdout.write(line1) - sys.stderr.write(line2) - - if numlines > 0 and count >= numlines: - break - -f1.close() -f2.close() - - diff --git a/gi/morf-segmentation/morf-pipeline.pl b/gi/morf-segmentation/morf-pipeline.pl deleted file mode 100755 index 46eb5b46..00000000 --- a/gi/morf-segmentation/morf-pipeline.pl +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - - -# Preprocessing pipeline to take care of word segmentation -# Learns a segmentation model for each/either side of the parallel corpus using all train/dev/test data -# Applies the segmentation where necessary. -# Learns word alignments on the preprocessed training data. -# Outputs script files used later to score output. - - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; - -my $MORF_TRAIN = "$SCRIPT_DIR/morftrain.sh"; -my $MORF_SEGMENT = "$SCRIPT_DIR/morfsegment.py"; - -my $LINESTRIPPER = "$SCRIPT_DIR/linestripper.py"; -my $ALIGNER = "/export/ws10smt/software/berkeleyaligner/berkeleyaligner.jar"; -#java -d64 -Xmx10g -jar $ALIGNER ++word-align.conf >> aligner.log -assert_exec($MORF_TRAIN, $LINESTRIPPER, $MORF_SEGMENT, $ALIGNER); - -my $OUTPUT = './morfwork'; -my $PPL_SRC = 50; -my $PPL_TRG = 50; -my $MARKER = "#"; -my $MAX_WORDS = 40; -my $SENTENCES;# = 100000; -my $SPLIT_TYPE = ""; #possible values: s, t, st, or (empty string) -my $NAME_SHORTCUT; - -usage() unless &GetOptions('max_words=i' => \$MAX_WORDS, - 'output=s' => \$OUTPUT, - 'ppl_src=i' => \$PPL_SRC, - 'ppl_trg=i' => \$PPL_TRG, - 'sentences=i' => \$SENTENCES, - 'marker=s' => \$MARKER, - 'split=s' => \$SPLIT_TYPE, - 'get_name_only' => \$NAME_SHORTCUT, - ); - -usage() unless scalar @ARGV >= 2; - -my %CORPUS; # for (src,trg) it has (orig, name, filtered, final) - -$CORPUS{'src'}{'orig'} = $ARGV[0]; -open F, "<$CORPUS{'src'}{'orig'}" or die "Can't read $CORPUS{'src'}{'orig'}: $!"; close F; -$CORPUS{'src'}{'name'} = get_basename($CORPUS{'src'}{'orig'}); - -$CORPUS{'trg'}{'orig'} = $ARGV[1]; -open F, "<$CORPUS{'trg'}{'orig'}" or die "Can't read $CORPUS{'trg'}{'orig'}: $!"; close F; -$CORPUS{'trg'}{'name'} = get_basename($CORPUS{'trg'}{'orig'}); - -my %DEV; # for (src,trg) has (orig, final.split final.unsplit -if (@ARGV >= 4) { - $DEV{'src'}{'orig'} = $ARGV[2]; - open F, "<$DEV{'src'}{'orig'}" or die "Can't read $DEV{'src'}{'orig'}: $!"; close F; - $DEV{'src'}{'name'} = get_basename($DEV{'src'}{'orig'}); - $DEV{'trg'}{'orig'} = $ARGV[3]; - open F, "<$DEV{'trg'}{'orig'}" or die "Can't read $DEV{'trg'}{'orig'}: $!"; close F; - $DEV{'trg'}{'name'} = get_basename($DEV{'trg'}{'orig'}); -} - -my %TEST; # for (src,trg) has (orig, name) -if (@ARGV >= 6) { - $TEST{'src'}{'orig'} = $ARGV[4]; - open F, "<$TEST{'src'}{'orig'}" or die "Can't read $TEST{'src'}{'orig'}: $!"; close F; - $TEST{'src'}{'name'} = get_basename($TEST{'src'}{'orig'}); - $TEST{'trg'}{'orig'} = $ARGV[5]; - open F, "<$TEST{'trg'}{'orig'}" or die "Can't read $TEST{'trg'}{'orig'}: $!"; close F; - $TEST{'trg'}{'name'} = get_basename($TEST{'trg'}{'orig'}); -} - -my $SPLIT_SRC; #use these to check whether that part is being split -my $SPLIT_TRG; - -#OUTPUT WILL GO IN THESE -my $CORPUS_DIR = $OUTPUT . '/' . corpus_dir(); #subsampled corpus -my $MODEL_SRC_DIR = $OUTPUT . '/' . model_dir("src"); #splitting.. -my $MODEL_TRG_DIR = $OUTPUT . '/' . model_dir("trg"); # .. models -my $PROCESSED_DIR = $OUTPUT . '/' . processed_dir(); #segmented copora+alignments -my $ALIGNMENT_DIR = $PROCESSED_DIR . '/alignments'; - -$CORPUS{'src'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'src'}{'name'}"; -$CORPUS{'trg'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'trg'}{'name'}"; - -print STDERR "Output: $OUTPUT\n"; -print STDERR "Corpus: $CORPUS_DIR\n"; -print STDERR "Model-src: $MODEL_SRC_DIR\n"; -print STDERR "Model-trg: $MODEL_TRG_DIR\n"; -print STDERR "Finaldir: $PROCESSED_DIR\n"; - -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($CORPUS_DIR) or die "Couldn't create output directory $CORPUS_DIR: $!"; -filter_corpus(); - -safemkdir($PROCESSED_DIR); -safemkdir($ALIGNMENT_DIR); - -if ($SPLIT_SRC) { - safemkdir($MODEL_SRC_DIR) or die "Couldn't create output directory $MODEL_SRC_DIR: $!"; - learn_segmentation("src"); - apply_segmentation_side("src", $MODEL_SRC_DIR); -} - -#assume that unsplit hypotheses will be scored against an aritificially split target test set; thus obtain a target splitting model -#TODO: add a flag to override this behaviour -safemkdir($MODEL_TRG_DIR) or die "Couldn't create output directory $MODEL_TRG_DIR: $!"; -learn_segmentation("trg"); -$TEST{'trg'}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}"; -copy($TEST{'trg'}{'orig'}, $TEST{'trg'}{'finalunsplit'}) or die "Could not copy unsegmented test set"; - -if ($SPLIT_TRG) { - apply_segmentation_side("trg", $MODEL_TRG_DIR); - } else { - $TEST{'trg'}{'finalsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}.split"; - apply_segmentation_any($MODEL_TRG_DIR, $TEST{'trg'}{'finalunsplit'}, $TEST{'trg'}{'finalsplit'}); -} - -write_eval_sh("$PROCESSED_DIR/eval-devtest.sh"); - -#copy corpora if they haven't been put in place by splitting operations -place_missing_data_side('src'); -place_missing_data_side('trg'); - -do_align(); - -if ($CORPUS{'src'}{'orig'} && $DEV{'src'}{'orig'} && $TEST{'src'}{'orig'}) { - print STDERR "Putting the config file entry in $PROCESSED_DIR/exp.config\n"; -#format is: - # nlfr100k_unsplit /export/ws10smt/jan/nlfr/morfwork/s100k.w40.sp_0 corpus.nl-fr.al fr-3.lm.gz dev.nl dev.fr test2008.nl eval-devtest.sh - my $line = split_name() . " $PROCESSED_DIR corpus.src-trg.al LMFILE.lm.gz"; - $line = $line . " $DEV{'src'}{'name'} $DEV{'trg'}{'name'}"; - $line = $line . " " . get_basename($TEST{'src'}{$SPLIT_SRC ? "finalsplit" : "finalunsplit"}) . " eval-devtest.sh"; - safesystem("echo '$line' > $PROCESSED_DIR/exp.config"); -} - -system("date"); -print STDERR "All done. You now need to train a language model (if target split), put it in the right dir and update the config file.\n\n"; - -############################## BILINGUAL ################################### - -sub filter_corpus { - print STDERR "\n!!!FILTERING TRAINING COPRUS!!!\n"; - if ( -f $CORPUS{'src'}{'filtered'} && -f $CORPUS{'trg'}{'filtered'}) { - print STDERR "$CORPUS{'src'}{'filtered'} and $CORPUS{'trg'}{'filtered'} exist, reusing...\n"; - return; - } - my $args = "$CORPUS{'src'}{'orig'} $CORPUS{'trg'}{'orig'} $MAX_WORDS"; - if ($SENTENCES) { $args = $args . " $SENTENCES"; } - safesystem("$LINESTRIPPER $args 1> $CORPUS{'src'}{'filtered'} 2> $CORPUS{'trg'}{'filtered'}") or die "Failed to filter training corpus for length."; -} - -sub learn_segmentation -{ - my $WHICH = shift; - my $corpus; my $dev; my $test; my $moddir; my $ppl; - - $corpus = $CORPUS{$WHICH}{'filtered'}; - $dev = $DEV{$WHICH}{'orig'}; - $test = $TEST{$WHICH}{'orig'}; - - if ($WHICH eq "src") { - $moddir = $MODEL_SRC_DIR; - $ppl = $PPL_SRC; - } else { - $moddir = $MODEL_TRG_DIR; - $ppl = $PPL_TRG; - } - my $cmd = "cat $corpus"; - if ($dev) { $cmd = "$cmd $dev"; } - if ($test) { $cmd = "$cmd $test"; } - my $tmpfile = "$CORPUS_DIR/all.tmp.gz"; - safesystem("$cmd | $GZIP > $tmpfile") or die "Failed to concatenate data for model learning.."; - assert_marker($tmpfile); - - learn_segmentation_side($tmpfile, $moddir, $ppl, $WHICH); - safesystem("rm $tmpfile"); -} - -sub do_align { - print STDERR "\n!!!WORD ALIGNMENT!!!\n"; - system("date"); - - my $ALIGNMENTS = "$ALIGNMENT_DIR/training.align"; - if ( -f $ALIGNMENTS ) { - print STDERR "$ALIGNMENTS exists, reusing...\n"; - return; - } - my $conf_file = "$ALIGNMENT_DIR/word-align.conf"; - - #decorate training files with identifiers to stop the aligner from training on dev and test when rerun in future. - safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'src'}{'name'} corpus.src") or die "Failed to symlink: $!"; - safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'trg'}{'name'} corpus.trg") or die "Failed to symlink: $!"; - - write_wconf($conf_file, $PROCESSED_DIR); - system("java -d64 -Xmx24g -jar $ALIGNER ++$conf_file > $ALIGNMENT_DIR/aligner.log"); - - if (! -f $ALIGNMENTS) { die "Failed to run word alignment.";} - - my $cmd = "paste $PROCESSED_DIR/corpus.src $PROCESSED_DIR/corpus.trg $ALIGNMENTS"; - $cmd = $cmd . " | sed 's/\\t/ \|\|\| /g' > $PROCESSED_DIR/corpus.src-trg.al"; - safesystem($cmd) or die "Failed to paste into aligned corpus file."; - -} - -############################# MONOLINGUAL ################################# - -#copy the necessary data files that weren't place by segmentation -sub place_missing_data_side { - my $side = shift; - - ifne_copy($CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}") ; - - if ($DEV{$side}{'orig'} && ! -f "$PROCESSED_DIR/$DEV{$side}{'name'}") { - $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; - copy($DEV{$side}{'orig'}, $DEV{$side}{'final'}) or die "Copy failed: $!"; - } - - if ($TEST{$side}{'orig'} && ! -f "$PROCESSED_DIR/$TEST{$side}{'name'}" && ! $TEST{$side}{'finalunsplit'}) { - $TEST{$side}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}"; - copy($TEST{$side}{'orig'}, $TEST{$side}{'finalunsplit'}) or die "Copy failed: $!"; - } - -} - -sub apply_segmentation_side { - my ($side, $moddir) = @_; - - print STDERR "\n!!!APPLYING SEGMENTATION MODEL ($side)!!!\n"; - apply_segmentation_any($moddir, $CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}"); - if ($DEV{$side}{'orig'}) { - $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; - apply_segmentation_any($moddir, $DEV{$side}{'orig'}, "$DEV{$side}{'final'}"); - } - if ($TEST{$side}{'orig'}) { - $TEST{$side}{'finalsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}.split"; - apply_segmentation_any($moddir, $TEST{$side}{'orig'}, $TEST{$side}{'finalsplit'} ); - } - -} - -sub learn_segmentation_side { - my($INPUT_FILE, $SEGOUT_DIR, $PPL, $LANG) = @_; - - print STDERR "\n!!!LEARNING SEGMENTATION MODEL ($LANG)!!!\n"; - system("date"); - my $SEG_FILE = $SEGOUT_DIR . "/segmentation.ready"; - if ( -f $SEG_FILE) { - print STDERR "$SEG_FILE exists, reusing...\n"; - return; - } - my $cmd = "$MORF_TRAIN $INPUT_FILE $SEGOUT_DIR $PPL \"$MARKER\""; - safesystem($cmd) or die "Failed to learn segmentation model"; -} - -sub apply_segmentation_any { - my($moddir, $datfile, $outfile) = @_; - if ( -f $outfile) { - print STDERR "$outfile exists, reusing...\n"; - return; - } - - my $args = "$moddir/inputvocab.gz $moddir/segmentation.ready \"$MARKER\""; - safesystem("cat $datfile | $MORF_SEGMENT $args &> $outfile") or die "Could not segment $datfile"; -} - -##################### PATH FUNCTIONS ########################## - -sub beautify_numlines { - return ($SENTENCES ? $SENTENCES : "_all"); -} - -sub corpus_dir { - return "s" . beautify_numlines() . ".w" . $MAX_WORDS; -} - -sub model_dir { - my $lang = shift; - if ($lang eq "src") { - return corpus_dir() . ".PPL" . $PPL_SRC . ".src"; - } elsif ($lang eq "trg") { - return corpus_dir() . ".PPL" . $PPL_TRG . ".trg"; - } else { - return "PPLundef"; - } -} - -sub processed_dir { - return corpus_dir() . "." . split_name(); -} - -########################## HELPER FUNCTIONS ############################ - -sub ifne_copy { - my ($src, $dest) = @_; - if (! -f $dest) { - copy($src, $dest) or die "Copy failed: $!"; - } -} - -sub split_name { - #parses SPLIT_TYPE, which can have the following values - # t|s|ts|st (last 2 are equiv) - # or is undefined when no splitting is done - my $name = ""; - - if ($SPLIT_TYPE) { - $SPLIT_SRC = lc($SPLIT_TYPE) =~ /s/; - $SPLIT_TRG = lc($SPLIT_TYPE) =~ /t/; - $name = $name . ($SPLIT_SRC ? $PPL_SRC : "0"); - $name = $name . "_" . ($SPLIT_TRG ? $PPL_TRG : "0"); - } else { - #no splitting - $name = "0"; - } - - return "sp_" . $name; - -} - -sub usage { - print <> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - -sub get_basename -{ - my $x = shift; - $x = `basename $x`; - $x =~ s/\n//; - return $x; -} - -sub assert_marker { - my $file = shift; - my $result = `zcat $file| grep '$MARKER' | wc -l` or die "Cannot read $file: $!"; - print $result; - if (scalar($result) != 0) { die "Data contains marker '$MARKER'; use something else.";} -} -########################### Dynamic config files ############################## - -sub write_wconf { - my ($filename, $train_dir) = @_; - open WCONF, ">$filename" or die "Can't write $filename: $!"; - - print WCONF <$filename" or die "Can't write $filename: $!"; - - print EVALFILE < "\$1.recombined" - -\$EVAL_MAIN "\$1.recombined" $TEST{'trg'}{'finalunsplit'} -EOT - - } else { - print EVALFILE < "\$1.split" - -\$EVAL_MAIN "\$1.split" $TEST{'trg'}{'finalsplit'} - -echo "DIRECT EVALUATION" -echo "--------------------------" -\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalunsplit'} - -EOT - - } - close EVALFILE; - -} - - - - diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py deleted file mode 100755 index 85b9d4fb..00000000 --- a/gi/morf-segmentation/morfsegment.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/python - -import sys -import gzip - -#usage: morfsegment.py inputvocab.gz segmentation.ready -# stdin: the data to segment -# stdout: the segmented data - -if len(sys.argv) < 3: - print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]" - print " stdin: the data to segment" - print " stdout: the segmented data" - sys.exit() - -#read index: -split_index={} - -marker="##" - -if len(sys.argv) > 3: - marker=sys.argv[3] - -word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz -seg_vocab=open(sys.argv[2], 'r') #segm.ready.. - -for seg in seg_vocab: - #seg = ver# #wonder\n - #wordline = 1 verwonder\n - word = word_vocab.readline().strip().split(' ') - assert(len(word) == 2) - word = word[1] - seg=seg.strip() - - if seg != word: - split_index[word] = seg - -word_vocab.close() -seg_vocab.close() - -for line in sys.stdin: - words = line.strip().split() - - newsent = [] - for word in words: - splitword = split_index.get(word, word) - newsent.append(splitword) - - print ' '.join(newsent) - diff --git a/gi/morf-segmentation/morftrain.sh b/gi/morf-segmentation/morftrain.sh deleted file mode 100755 index 9004922f..00000000 --- a/gi/morf-segmentation/morftrain.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -if [[ $# -lt 3 ]]; then - echo "Trains a morfessor model and places the result in writedir" - echo - echo "Usage: `basename $0` corpus_input_file writedir [PPL] [marker] [lines]" - echo -e "\tcorpus_input_file contains a sentence per line." - exit 1 -fi - -MORFESSOR_DIR="/export/ws10smt/software/morfessor_catmap0.9.2" -SCRIPT_DIR=$(dirname `readlink -f $0`) - -MORFBINDIR="$MORFESSOR_DIR/bin" -MORFMAKEFILE_TRAIN="$MORFESSOR_DIR/train/Makefile" -VOCABEXT="$SCRIPT_DIR/vocabextractor.sh" - -MARKER="#" - -if [[ ! -f $VOCABEXT ]]; then - echo "$VOCABEXT doesn't exist!" - exit 1 -fi -if [[ ! -f $MORFMAKEFILE_TRAIN ]]; then - echo "$MORFMAKEFILE_TRAIN doesn't exist!" - exit 1 -fi - - -CORPUS="$1" -WRITETODIR=$2 - -if [[ ! -f $CORPUS ]]; then - echo "$CORPUS doesn't exist!" - exit 1 -fi - -PPL=10 -LINES=0 -if [[ $# -gt 2 ]]; then - PPL=$3 -fi -if [[ $# -gt 3 ]]; then - MARKER="$4" -fi -if [[ $# -gt 4 ]]; then - LINES=$5 -fi - -mkdir -p $WRITETODIR - -#extract vocabulary to train on -echo "Extracting vocabulary..." -if [[ -f $WRITETODIR/inputvocab.gz ]]; then - echo " ....$WRITETODIR/inputvocab.gz exists, reusing." -else - if [[ $LINES -gt 0 ]]; then - $VOCABEXT $CORPUS $LINES | gzip > $WRITETODIR/inputvocab.gz - else - $VOCABEXT $CORPUS | gzip > $WRITETODIR/inputvocab.gz - fi -fi - - -#train it -echo "Training morf model..." -if [[ -f $WRITETODIR/segmentation.final.gz ]]; then - echo " ....$WRITETODIR/segmentation.final.gz exists, reusing.." -else - OLDPWD=`pwd` - cd $WRITETODIR - - #put the training Makefile in place, with appropriate modifications - sed -e "s/^GZIPPEDINPUTDATA = .*$/GZIPPEDINPUTDATA = inputvocab.gz/" \ - -e "s/^PPLTHRESH = .*$/PPLTHRESH = $PPL/" \ - -e "s;^BINDIR = .*$;BINDIR = $MORFBINDIR;" \ - $MORFMAKEFILE_TRAIN > ./Makefile - - date - make > ./trainmorf.log 2>&1 - cd $OLDPWD - - - echo "Post processing..." - #remove comments, counts and morph types - #mark morphs - - if [[ ! -f $WRITETODIR/segmentation.final.gz ]]; then - echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" - exit 1 - fi - - zcat $WRITETODIR/segmentation.final.gz | \ - awk '$1 !~ /^#/ {print}' | \ - cut -d ' ' --complement -f 1 | \ - sed -e "s/\/...//g" -e "s/ + /$MARKER $MARKER/g" \ - > $WRITETODIR/segmentation.ready - - if [[ ! -f $WRITETODIR/segmentation.ready ]]; then - echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" - exit 1 - fi - - - - echo "Done training." - date -fi -echo "Segmentation model is $WRITETODIR/segmentation.ready." - diff --git a/gi/morf-segmentation/vocabextractor.sh b/gi/morf-segmentation/vocabextractor.sh deleted file mode 100755 index 00ae7109..00000000 --- a/gi/morf-segmentation/vocabextractor.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -d=$(dirname `readlink -f $0`) -if [ $# -lt 1 ]; then - echo "Extracts unique words and their frequencies from a subset of a corpus." - echo - echo "Usage: `basename $0` input_file [number_of_lines] > output_file" - echo -e "\tinput_file contains a sentence per line." - echo - echo "Script also removes words from the vocabulary if they contain a digit or a special character. Output is printed to stdout in a format suitable for use with Morfessor." - echo - exit -fi - -srcname=$1 -reallen=0 - -if [[ $# -gt 1 ]]; then - reallen=$2 -fi - -pattern_file=$d/invalid_vocab.patterns - -if [[ ! -f $pattern_file ]]; then - echo "Pattern file missing" - exit 1 -fi - -#this awk strips entries from the vocabulary if they contain invalid characters -#invalid characters are digits and punctuation marks, and words beginning or ending with a dash -#uniq -c extracts the unique words and counts the occurrences - -if [[ $reallen -eq 0 ]]; then - #when a zero is passed, use the whole file - zcat -f $srcname | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//' - -else - zcat -f $srcname | head -n $reallen | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//' -fi - diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am deleted file mode 100644 index 86f8e07b..00000000 --- a/gi/pf/Makefile.am +++ /dev/null @@ -1,44 +0,0 @@ -bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl pf_test bayes_lattice_score - -noinst_LIBRARIES = libpf.a - -libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc hpyp_tm.cc pyp_tm.cc - -bayes_lattice_score_SOURCES = bayes_lattice_score.cc -bayes_lattice_score_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -pf_test_SOURCES = pf_test.cc -pf_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -nuisance_test_SOURCES = nuisance_test.cc -nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc -align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_tl_SOURCES = align-tl.cc -align_tl_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -itg_SOURCES = itg.cc - -pyp_lm_SOURCES = pyp_lm.cc - -learn_cfg_SOURCES = learn_cfg.cc - -condnaive_SOURCES = condnaive.cc - -dpnaive_SOURCES = dpnaive.cc - -pfdist_SOURCES = pfdist.cc - -pfnaive_SOURCES = pfnaive.cc - -cbgi_SOURCES = cbgi.cc - -brat_SOURCES = brat.cc - -pfbrat_SOURCES = pfbrat.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/klm - -AM_LDFLAGS = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/pf/README b/gi/pf/README deleted file mode 100644 index 62e47541..00000000 --- a/gi/pf/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental Bayesian alignment tools. Nothing to see here. - diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc deleted file mode 100644 index e7509f57..00000000 --- a/gi/pf/align-lexonly-pyp.cc +++ /dev/null @@ -1,243 +0,0 @@ -#include -#include - -#include -#include - -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "array2d.h" -#include "sampler.h" -#include "corpus.h" -#include "pyp_tm.h" -#include "hpyp_tm.h" -#include "quasi_model2.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("infer_alignment_hyperparameters,I", "Infer alpha and p_null, otherwise fixed values will be assumed") - ("p_null,0", po::value()->default_value(0.08), "probability of aligning to null") - ("align_alpha,a", po::value()->default_value(4.0), "how 'tight' is the bias toward be along the diagonal?") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -MT19937* prng; - -struct LexicalAlignment { - unsigned char src_index; - bool is_transliteration; - vector > derivation; -}; - -struct AlignedSentencePair { - vector src; - vector trg; - vector a; - Array2D posterior; -}; - -template -struct Aligner { - Aligner(const vector >& lets, - int vocab_size, - int num_letters, - const po::variables_map& conf, - vector* c) : - corpus(*c), - paj_model(conf["align_alpha"].as(), conf["p_null"].as()), - infer_paj(conf.count("infer_alignment_hyperparameters") > 0), - model(lets, vocab_size, num_letters), - kNULL(TD::Convert("NULL")) { - assert(lets[kNULL].size() == 0); - } - - vector& corpus; - QuasiModel2 paj_model; - const bool infer_paj; - LexicalTranslationModel model; - const WordID kNULL; - - void ResampleHyperparameters() { - model.ResampleHyperparameters(prng); - if (infer_paj) paj_model.ResampleHyperparameters(prng); - } - - void InitializeRandom() { - cerr << "Initializing with random alignments ...\n"; - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - asp.a.resize(asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - unsigned char& a_j = asp.a[j].src_index; - a_j = prng->next() * (1 + asp.src.size()); - const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Increment(f_a_j, asp.trg[j], &*prng); - paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); - } - } - cerr << "Corpus intialized randomly." << endl; - cerr << "LLH = " << Likelihood() << " \t(Amodel=" << paj_model.Likelihood() - << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl; - } - - void ResampleCorpus() { - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - SampleSet ss; ss.resize(asp.src.size() + 1); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - unsigned char& a_j = asp.a[j].src_index; - const WordID e_j = asp.trg[j]; - WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Decrement(f_a_j, e_j, prng); - paj_model.Decrement(a_j, j, asp.src.size(), asp.trg.size()); - - for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { - const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); - ss[prop_a_j] = model.Prob(prop_f, e_j); - ss[prop_a_j] *= paj_model.Prob(prop_a_j, j, asp.src.size(), asp.trg.size()); - } - a_j = prng->SelectSample(ss); - f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Increment(f_a_j, e_j, prng); - paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); - } - } - } - - prob_t Likelihood() const { - return model.Likelihood() * paj_model.Likelihood(); - } -}; - -void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { - for (set::const_iterator it = v.begin(); it != v.end(); ++it) { - vector& letters = (*l)[*it]; - if (letters.size()) continue; // if e and f have the same word - - const string& w = TD::Convert(*it); - - size_t cur = 0; - while (cur < w.size()) { - const size_t len = UTF8Len(w[cur]); - letters.push_back(TD::Convert(w.substr(cur, len))); - if (letset) letset->insert(letters.back()); - cur += len; - } - } -} - -void Debug(const AlignedSentencePair& asp) { - cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; - Array2D a(asp.src.size(), asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - assert(asp.a[j].src_index <= asp.src.size()); - if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; - } - cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { - for (unsigned j = 0; j < asp->trg.size(); ++j) - asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { - bool first = true; - for (unsigned j = 0; j < asp.trg.size(); ++j) { - int src_index = -1; - int mc = -1; - for (unsigned i = 0; i <= asp.src.size(); ++i) { - if (asp.posterior(i, j) > mc) { - mc = asp.posterior(i, j); - src_index = i; - } - } - - if (src_index) { - if (first) first = false; else cout << ' '; - cout << (src_index - 1) << '-' << j; - } - } - cout << endl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - if (conf.count("random_seed")) - prng = new MT19937(conf["random_seed"].as()); - else - prng = new MT19937; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - vector corpus(corpuse.size()); - for (unsigned i = 0; i < corpuse.size(); ++i) { - corpus[i].src.swap(corpusf[i]); - corpus[i].trg.swap(corpuse[i]); - corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); - } - corpusf.clear(); corpuse.clear(); - - vocabf.insert(TD::Convert("NULL")); - vector > letters(TD::NumWords()); - set letset; - ExtractLetters(vocabe, &letters, &letset); - ExtractLetters(vocabf, &letters, NULL); - letters[TD::Convert("NULL")].clear(); - - //Aligner aligner(letters, vocabe.size(), letset.size(), conf, &corpus); - Aligner aligner(letters, vocabe.size(), letset.size(), conf, &corpus); - aligner.InitializeRandom(); - - const unsigned samples = conf["samples"].as(); - for (int i = 0; i < samples; ++i) { - for (int j = 65; j < 67; ++j) Debug(corpus[j]); - if (i % 10 == 9) { - aligner.ResampleHyperparameters(); - cerr << "LLH = " << aligner.Likelihood() << " \t(Amodel=" << aligner.paj_model.Likelihood() - << " TModel=" << aligner.model.Likelihood() << ") contexts=" << aligner.model.UniqueConditioningContexts() << endl; - } - aligner.ResampleCorpus(); - if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); - } - for (unsigned i = 0; i < corpus.size(); ++i) - WriteAlignments(corpus[i]); - aligner.model.Summary(); - - return 0; -} diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc deleted file mode 100644 index f6608f1d..00000000 --- a/gi/pf/align-tl.cc +++ /dev/null @@ -1,339 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "backward.h" -#include "array2d.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "mfcr.h" -#include "corpus.h" -#include "ngram_base.h" -#include "transliterations.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("s2t", po::value(), "character level source-to-target prior transliteration probabilities") - ("t2s", po::value(), "character level target-to-source prior transliteration probabilities") - ("max_src_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in source") - ("max_trg_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in target") - ("expected_src_to_trg_ratio", po::value()->default_value(1.0), "If a word is transliterated, what is the expected length ratio from source to target?") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -struct LexicalAlignment { - unsigned char src_index; - bool is_transliteration; - vector > derivation; -}; - -struct AlignedSentencePair { - vector src; - vector trg; - vector a; - Array2D posterior; -}; - -struct HierarchicalWordBase { - explicit HierarchicalWordBase(const unsigned vocab_e_size) : - base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} - - void ResampleHyperparameters(MT19937* rng) { - r.resample_hyperparameters(rng); - } - - inline double logp0(const vector& s) const { - return Md::log_poisson(s.size(), 7.5) + s.size() * u0; - } - - // return p0 of rule.e_ - prob_t operator()(const TRule& rule) const { - v[0].logeq(logp0(rule.e_)); - return r.prob(rule.e_, v.begin(), l.begin()); - } - - void Increment(const TRule& rule) { - v[0].logeq(logp0(rule.e_)); - if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) { - base *= v[0] * l[0]; - } - } - - void Decrement(const TRule& rule) { - if (r.decrement(rule.e_, &*prng).count) { - base /= prob_t(exp(logp0(rule.e_))); - } - } - - prob_t Likelihood() const { - prob_t p; p.logeq(r.log_crp_prob()); - p *= base; - return p; - } - - void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; - for (MFCR<1,vector >::const_iterator it = r.begin(); it != r.end(); ++it) - cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl; - } - - prob_t base; - MFCR<1,vector > r; - const double u0; - const vector l; - mutable vector v; -}; - -struct BasicLexicalAlignment { - explicit BasicLexicalAlignment(const vector >& lets, - const unsigned words_e, - const unsigned letters_e, - vector* corp) : - letters(lets), - corpus(*corp), - //up0(words_e), - //up0("en.chars.1gram", letters_e), - //up0("en.words.1gram"), - up0(letters_e), - //up0("en.chars.2gram"), - tmodel(up0) { - } - - void InstantiateRule(const WordID src, - const WordID trg, - TRule* rule) const { - static const WordID kX = TD::Convert("X") * -1; - rule->lhs_ = kX; - rule->e_ = letters[trg]; - rule->f_ = letters[src]; - } - - void InitializeRandom() { - const WordID kNULL = TD::Convert("NULL"); - cerr << "Initializing with random alignments ...\n"; - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - asp.a.resize(asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - const unsigned char a_j = prng->next() * (1 + asp.src.size()); - const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - TRule r; - InstantiateRule(f_a_j, asp.trg[j], &r); - asp.a[j].is_transliteration = false; - asp.a[j].src_index = a_j; - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); - } - } - cerr << " LLH = " << Likelihood() << endl; - } - - prob_t Likelihood() const { - prob_t p = tmodel.Likelihood(); - p *= up0.Likelihood(); - return p; - } - - void ResampleHyperparemeters() { - tmodel.ResampleHyperparameters(&*prng); - up0.ResampleHyperparameters(&*prng); - cerr << " (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n"; - } - - void ResampleCorpus(); - - const vector >& letters; // spelling dictionary - vector& corpus; - //PhraseConditionalUninformativeBase up0; - //PhraseConditionalUninformativeUnigramBase up0; - //UnigramWordBase up0; - //HierarchicalUnigramBase up0; - HierarchicalWordBase up0; - //CompletelyUniformBase up0; - //FixedNgramBase up0; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - MConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; -}; - -void BasicLexicalAlignment::ResampleCorpus() { - static const WordID kNULL = TD::Convert("NULL"); - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - SampleSet ss; ss.resize(asp.src.size() + 1); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - TRule r; - unsigned char& a_j = asp.a[j].src_index; - WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.DecrementRule(r, &*prng)) - up0.Decrement(r); - - for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { - const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); - InstantiateRule(prop_f, asp.trg[j], &r); - ss[prop_a_j] = tmodel.RuleProbability(r); - } - a_j = prng->SelectSample(ss); - f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); - } - } - cerr << " LLH = " << Likelihood() << endl; -} - -void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { - for (set::const_iterator it = v.begin(); it != v.end(); ++it) { - vector& letters = (*l)[*it]; - if (letters.size()) continue; // if e and f have the same word - - const string& w = TD::Convert(*it); - - size_t cur = 0; - while (cur < w.size()) { - const size_t len = UTF8Len(w[cur]); - letters.push_back(TD::Convert(w.substr(cur, len))); - if (letset) letset->insert(letters.back()); - cur += len; - } - } -} - -void Debug(const AlignedSentencePair& asp) { - cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; - Array2D a(asp.src.size(), asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) - if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; - cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { - for (unsigned j = 0; j < asp->trg.size(); ++j) - asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { - bool first = true; - for (unsigned j = 0; j < asp.trg.size(); ++j) { - int src_index = -1; - int mc = -1; - for (unsigned i = 0; i <= asp.src.size(); ++i) { - if (asp.posterior(i, j) > mc) { - mc = asp.posterior(i, j); - src_index = i; - } - } - - if (src_index) { - if (first) first = false; else cout << ' '; - cout << (src_index - 1) << '-' << j; - } - } - cout << endl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - vector corpus(corpuse.size()); - for (unsigned i = 0; i < corpuse.size(); ++i) { - corpus[i].src.swap(corpusf[i]); - corpus[i].trg.swap(corpuse[i]); - corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); - } - corpusf.clear(); corpuse.clear(); - - vocabf.insert(TD::Convert("NULL")); - vector > letters(TD::NumWords() + 1); - set letset; - ExtractLetters(vocabe, &letters, &letset); - ExtractLetters(vocabf, &letters, NULL); - letters[TD::Convert("NULL")].clear(); - - // TODO configure this - const int max_src_chunk = conf["max_src_chunk"].as(); - const int max_trg_chunk = conf["max_trg_chunk"].as(); - const double s2t_rat = conf["expected_src_to_trg_ratio"].as(); - const BackwardEstimator be(conf["s2t"].as(), conf["t2s"].as()); - Transliterations tl(max_src_chunk, max_trg_chunk, s2t_rat, be); - - cerr << "Initializing transliteration graph structures ...\n"; - for (int i = 0; i < corpus.size(); ++i) { - const vector& src = corpus[i].src; - const vector& trg = corpus[i].trg; - for (int j = 0; j < src.size(); ++j) { - const vector& src_let = letters[src[j]]; - for (int k = 0; k < trg.size(); ++k) { - const vector& trg_let = letters[trg[k]]; - tl.Initialize(src[j], src_let, trg[k], trg_let); - //if (src_let.size() < min_trans_src) - // tl.Forbid(src[j], src_let, trg[k], trg_let); - } - } - } - cerr << endl; - tl.GraphSummary(); - - return 0; -} diff --git a/gi/pf/backward.cc b/gi/pf/backward.cc deleted file mode 100644 index b92629fd..00000000 --- a/gi/pf/backward.cc +++ /dev/null @@ -1,89 +0,0 @@ -#include "backward.h" - -#include -#include - -#include "array2d.h" -#include "reachability.h" -#include "base_distributions.h" - -using namespace std; - -BackwardEstimator::BackwardEstimator(const string& s2t, - const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {} - -BackwardEstimator::~BackwardEstimator() { - delete m1; m1 = NULL; - delete m1inv; m1inv = NULL; -} - -float BackwardEstimator::ComputeBackwardProb(const std::vector& src, - const std::vector& trg, - unsigned src_covered, - unsigned trg_covered, - double s2t_ratio) const { - if (src_covered == src.size() || trg_covered == trg.size()) { - assert(src_covered == src.size()); - assert(trg_covered == trg.size()); - return 0; - } - static const WordID kNULL = TD::Convert(""); - const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1)); - // TODO factor in expected length ratio - prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_covered; j < trg.size(); ++j) { - prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12); - for (unsigned i = src_covered; i < src.size(); ++i) - p += (*m1)(src[i], trg[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n"; - assert(!"failed"); - } - p *= uniform_alignment; - e *= p; - } - // TODO factor in expected length ratio - const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio)); - for (unsigned i = src_covered; i < src.size(); ++i) { - prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12); - for (unsigned j = trg_covered; j < trg.size(); ++j) - p += (*m1inv)(trg[j], src[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n"; - assert(!"failed"); - } - p *= inv_uniform; - inv *= p; - } - return (log(e) + log(inv)) / 2; -} - -void BackwardEstimator::InitializeGrid(const vector& src, - const vector& trg, - const Reachability& r, - double s2t_ratio, - float* grid) const { - queue > q; - q.push(make_pair(0,0)); - Array2D done(src.size()+1, trg.size()+1, false); - //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl; - while(!q.empty()) { - const pair n = q.front(); - q.pop(); - if (done(n.first,n.second)) continue; - done(n.first,n.second) = true; - - float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio); - if (n.first == 0 && n.second == 0) grid[0] = lp; - //cerr << " " << n.first << "," << n.second << "\t" << lp << endl; - - if (n.first == src.size() || n.second == trg.size()) continue; - const vector >& edges = r.valid_deltas[n.first][n.second]; - for (int i = 0; i < edges.size(); ++i) - q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second)); - } - //static int cc = 0; ++cc; if (cc == 80) exit(1); -} - diff --git a/gi/pf/backward.h b/gi/pf/backward.h deleted file mode 100644 index e67eff0c..00000000 --- a/gi/pf/backward.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _BACKWARD_H_ -#define _BACKWARD_H_ - -#include -#include -#include "wordid.h" - -struct Reachability; -struct Model1; - -struct BackwardEstimator { - BackwardEstimator(const std::string& s2t, - const std::string& t2s); - ~BackwardEstimator(); - - void InitializeGrid(const std::vector& src, - const std::vector& trg, - const Reachability& r, - double src2trg_ratio, - float* grid) const; - - private: - float ComputeBackwardProb(const std::vector& src, - const std::vector& trg, - unsigned src_covered, - unsigned trg_covered, - double src2trg_ratio) const; - - Model1* m1; - Model1* m1inv; -}; - -#endif diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc deleted file mode 100644 index 57e0bbe1..00000000 --- a/gi/pf/base_distributions.cc +++ /dev/null @@ -1,241 +0,0 @@ -#include "base_distributions.h" - -#include - -#include "filelib.h" - -using namespace std; - -TableLookupBase::TableLookupBase(const string& fname) { - cerr << "TableLookupBase reading from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - vector le, lf; - TRule x; - x.lhs_ = -TD::Convert("X"); - bool flag = false; - while(getline(in, line)) { - ++lc; - if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; } - else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; } - tmp.clear(); - TD::ConvertSentence(line, &tmp); - x.f_.clear(); - x.e_.clear(); - size_t pos = 0; - int cc = 0; - while(pos < tmp.size()) { - const WordID cur = tmp[pos++]; - if (cur == kDIV) { - ++cc; - } else if (cc == 0) { - x.f_.push_back(cur); - } else if (cc == 1) { - x.e_.push_back(cur); - } else if (cc == 2) { - table[x].logeq(atof(TD::Convert(cur).c_str())); - ++cc; - } else { - if (flag) cerr << endl; - cerr << "Bad format in " << lc << ": " << line << endl; abort(); - } - } - if (cc != 3) { - if (flag) cerr << endl; - cerr << "Bad format in " << lc << ": " << line << endl; abort(); - } - } - if (flag) cerr << endl; - cerr << " read " << lc << " entries\n"; -} - -prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t p; - p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - //p.logeq(log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) - p *= u(vtrg[i + start_trg]); // draw e_i ~Uniform - return p; -} - -prob_t PhraseConditionalUninformativeBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t p; - //p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - p.logeq(Md::log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) - p *= kUNIFORM_TARGET; // draw e_i ~Uniform - return p; -} - -void Model1::LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; -} - -prob_t PhraseConditionalBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - return p; -} - -prob_t PhraseJointBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) - // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); - p *= ptrglen; - p *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform - for (int i = 0; i < elen; ++i) { // for each position i in E - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - return p; -} - -prob_t PhraseJointBase_BiDir::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1)); - - prob_t p1; - p1.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) - // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); - p1 *= ptrglen; - p1 *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform - for (int i = 0; i < elen; ++i) { // for each position i in E - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p1 *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p1.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - - prob_t p2; - p2.logeq(Md::log_poisson(elen, 1.0)); // elen ~Pois(1) - // flen | elen ~Pois(flen + 0.01) - prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01)); - p2 *= psrclen; - p2 *= kUNIFORM_TARGET.pow(elen); // each f in F ~Uniform - for (int i = 0; i < flen; ++i) { // for each position i in E - const WordID src = vsrc[i + start_src]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < elen; ++j) { - const WordID trg = j < 0 ? 0 : vtrg[j + start_trg]; - tp += kM1MIXTURE * invmodel1(trg, src); - tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE; - } - tp *= uniform_trg_alignment; // draw a_i ~uniform - p2 *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p2.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - - static const prob_t kHALF(0.5); - return (p1 + p2) * kHALF; -} - -JumpBase::JumpBase() : p(200) { - for (unsigned src_len = 1; src_len < 200; ++src_len) { - map& cpd = p[src_len]; - int min_jump = 1 - src_len; - int max_jump = src_len; - prob_t z; - for (int j = min_jump; j <= max_jump; ++j) { - prob_t& cp = cpd[j]; - if (j < 0) - cp.logeq(Md::log_poisson(1.5-j, 1)); - else if (j > 0) - cp.logeq(Md::log_poisson(j, 1)); - cp.poweq(0.2); - z += cp; - } - for (int j = min_jump; j <= max_jump; ++j) { - cpd[j] /= z; - } - } -} - diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h deleted file mode 100644 index 41b513f8..00000000 --- a/gi/pf/base_distributions.h +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef _BASE_MEASURES_H_ -#define _BASE_MEASURES_H_ - -#include -#include -#include -#include -#include -#include - -#include "unigrams.h" -#include "trule.h" -#include "prob.h" -#include "tdict.h" -#include "sampler.h" -#include "m.h" -#include "os_phrase.h" - -struct Model1 { - explicit Model1(const std::string& fname) : - kNULL(TD::Convert("")), - kZERO() { - LoadModel1(fname); - } - - void LoadModel1(const std::string& fname); - - // returns prob 0 if src or trg is not found - const prob_t& operator()(WordID src, WordID trg) const { - if (src == 0) src = kNULL; - if (src < ttable.size()) { - const std::map& cpd = ttable[src]; - const std::map::const_iterator it = cpd.find(trg); - if (it != cpd.end()) - return it->second; - } - return kZERO; - } - - const WordID kNULL; - const prob_t kZERO; - std::vector > ttable; -}; - -struct PoissonUniformUninformativeBase { - explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} - prob_t operator()(const TRule& r) const { - prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0)); - prob_t q = kUNIFORM; q.poweq(r.e_.size()); - p *= q; - return p; - } - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM; -}; - -struct CompletelyUniformBase { - explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} - prob_t operator()(const TRule&) const { - return kUNIFORM; - } - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM; -}; - -struct UnigramWordBase { - explicit UnigramWordBase(const std::string& fname) : un(fname) {} - prob_t operator()(const TRule& r) const { - return un(r.e_); - } - const UnigramWordModel un; -}; - -struct RuleHasher { - size_t operator()(const TRule& r) const { - return hash_value(r); - } -}; - -struct TableLookupBase { - TableLookupBase(const std::string& fname); - - prob_t operator()(const TRule& rule) const { - const std::tr1::unordered_map::const_iterator it = table.find(rule); - if (it == table.end()) { - std::cerr << rule << " not found\n"; - abort(); - } - return it->second; - } - - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - void Summary() const {} - - std::tr1::unordered_map table; -}; - -struct PhraseConditionalUninformativeBase { - explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) : - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseConditionalUninformativeUnigramBase { - explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {} - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const UnigramModel u; -}; - -struct PhraseConditionalBase { - explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) : - model1(m1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase { - explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) : - model1(m1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_SOURCE(1.0 / vocab_f_size), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ , rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_SOURCE; - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase_BiDir { - explicit PhraseJointBase_BiDir(const Model1& m1, - const Model1& im1, - const double m1mixture, - const unsigned vocab_e_size, - const unsigned vocab_f_size) : - model1(m1), - invmodel1(im1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_SOURCE(1.0 / vocab_f_size), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ , rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const Model1& invmodel1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_SOURCE; - const prob_t kUNIFORM_TARGET; -}; - -// base distribution for jump size multinomials -// basically p(0) = 0 and then, p(1) is max, and then -// you drop as you move to the max jump distance -struct JumpBase { - JumpBase(); - - const prob_t& operator()(int jump, unsigned src_len) const { - assert(jump != 0); - const std::map::const_iterator it = p[src_len].find(jump); - assert(it != p[src_len].end()); - return it->second; - } - std::vector > p; -}; - - -#endif diff --git a/gi/pf/bayes_lattice_score.cc b/gi/pf/bayes_lattice_score.cc deleted file mode 100644 index 70cb8dc2..00000000 --- a/gi/pf/bayes_lattice_score.cc +++ /dev/null @@ -1,309 +0,0 @@ -#include -#include - -#include -#include -#include - -#include "inside_outside.h" -#include "hg.h" -#include "hg_io.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -unsigned ReadCorpus(const string& filename, - vector* e, - set* vocab_e) { - e->clear(); - vocab_e->clear(); - ReadFile rf(filename); - istream* in = rf.stream(); - assert(*in); - string line; - unsigned toks = 0; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(Lattice()); - Lattice& le = e->back(); - LatticeTools::ConvertTextOrPLF(line, & le); - for (unsigned i = 0; i < le.size(); ++i) - for (unsigned j = 0; j < le[i].size(); ++j) - vocab_e->insert(le[i][j].label); - toks += le.size(); - } - return toks; -} - -struct BaseModel { - explicit BaseModel(unsigned tc) : - unif(1.0 / tc), p(prob_t::One()) {} - prob_t prob(const TRule& r) const { - return unif; - } - void increment(const TRule& r, MT19937* rng) { - p *= prob(r); - } - void decrement(const TRule& r, MT19937* rng) { - p /= prob(r); - } - prob_t Likelihood() const { - return p; - } - const prob_t unif; - prob_t p; -}; - -struct UnigramModel { - explicit UnigramModel(unsigned tc) : base(tc), crp(1,1,1,1), glue(1,1,1,1) {} - BaseModel base; - CCRP crp; - CCRP glue; - - prob_t Prob(const TRule& r) const { - if (r.Arity() != 0) { - return glue.prob(r, prob_t(0.5)); - } - return crp.prob(r, base.prob(r)); - } - - int Increment(const TRule& r, MT19937* rng) { - if (r.Arity() != 0) { - glue.increment(r, 0.5, rng); - return 0; - } else { - if (crp.increment(r, base.prob(r), rng)) { - base.increment(r, rng); - return 1; - } - return 0; - } - } - - int Decrement(const TRule& r, MT19937* rng) { - if (r.Arity() != 0) { - glue.decrement(r, rng); - return 0; - } else { - if (crp.decrement(r, rng)) { - base.decrement(r, rng); - return -1; - } - return 0; - } - } - - prob_t Likelihood() const { - prob_t p; - p.logeq(crp.log_crp_prob() + glue.log_crp_prob()); - p *= base.Likelihood(); - return p; - } - - void ResampleHyperparameters(MT19937* rng) { - crp.resample_hyperparameters(rng); - glue.resample_hyperparameters(rng); - cerr << " d=" << crp.discount() << ", s=" << crp.strength() << "\t STOP d=" << glue.discount() << ", s=" << glue.strength() << endl; - } -}; - -UnigramModel* plm; - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector* sampled_deriv) { - vector node_probs; - Inside(hg, &node_probs); - queue q; - q.push(hg.nodes_.size() - 2); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - //prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = edge.edge_prob_; - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - //z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } -// for (unsigned i = 0; i < sampled_deriv->size(); ++i) { -// cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; -// } -} - -void IncrementDerivation(const Hypergraph& hg, const vector& d, UnigramModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector& d, UnigramModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -prob_t TotalProb(const Hypergraph& hg) { - return Inside(hg); -} - -void IncrementLatticePath(const Hypergraph& hg, const vector& d, Lattice* pl) { - Lattice& lat = *pl; - for (int i = 0; i < d.size(); ++i) { - const Hypergraph::Edge& edge = hg.edges_[d[i]]; - if (edge.rule_->Arity() != 0) continue; - WordID sym = edge.rule_->e_[0]; - vector& las = lat[edge.i_]; - int dist = edge.j_ - edge.i_; - assert(dist > 0); - for (int j = 0; j < las.size(); ++j) { - if (las[j].dist2next == dist && - las[j].label == sym) { - las[j].cost += 1; - } - } - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - vector grammars(2); - grammars[0].reset(new GlueGrammar("S","X")); - const unsigned samples = conf["samples"].as(); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector corpuse; - set vocabe; - cerr << "Reading corpus...\n"; - const unsigned toks = ReadCorpus(conf["input"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " lattices\t (" << vocabe.size() << " word types)\n"; - UnigramModel lm(vocabe.size()); - vector hgs(corpuse.size()); - vector > derivs(corpuse.size()); - for (int i = 0; i < corpuse.size(); ++i) { - grammars[1].reset(new PassThroughGrammar(corpuse[i], "X")); - ExhaustiveBottomUpParser parser("S", grammars); - bool res = parser.Parse(corpuse[i], &hgs[i]); // exhaustive parse - assert(res); - } - - double csamples = 0; - for (int SS=0; SS < samples; ++SS) { - const bool is_last = ((samples - 1) == SS); - prob_t dlh = prob_t::One(); - bool record_sample = (SS > (samples * 1 / 3) && (SS % 5 == 3)); - if (record_sample) csamples++; - for (int ci = 0; ci < corpuse.size(); ++ci) { - Lattice& lat = corpuse[ci]; - Hypergraph& hg = hgs[ci]; - vector& d = derivs[ci]; - if (!is_last) DecrementDerivation(hg, d, &lm, &rng); - for (unsigned i = 0; i < hg.edges_.size(); ++i) { - TRule& r = *hg.edges_[i].rule_; - if (r.Arity() != 0) - hg.edges_[i].edge_prob_ = prob_t::One(); - else - hg.edges_[i].edge_prob_ = lm.Prob(r); - } - if (!is_last) { - d.clear(); - SampleDerivation(hg, &rng, &d); - IncrementDerivation(hg, derivs[ci], &lm, &rng); - } else { - prob_t p = TotalProb(hg); - dlh *= p; - cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; - } - if (record_sample) IncrementLatticePath(hg, derivs[ci], &lat); - } - double llh = log(lm.Likelihood()); - cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; - if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); - if (is_last) { - double z = log(dlh); - cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; - } - } - cerr << lm.crp << endl; - cerr << lm.glue << endl; - for (int i = 0; i < corpuse.size(); ++i) { - for (int j = 0; j < corpuse[i].size(); ++j) - for (int k = 0; k < corpuse[i][j].size(); ++k) { - corpuse[i][j][k].cost /= csamples; - corpuse[i][j][k].cost += 1e-3; - corpuse[i][j][k].cost = log(corpuse[i][j][k].cost); - } - cout << HypergraphIO::AsPLF(corpuse[i]) << endl; - } - return 0; -} - diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/brat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { - explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size), - kNULL(TD::Convert("")) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - LoadModel1(model1fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // return logp0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - const int flen = rule.f_.size(); - const int elen = rule.e_.size(); - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = rule.e_[i]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? kNULL : rule.f_[j]; - const map::const_iterator it = ttable[src].find(trg); - if (it != ttable[src].end()) { - tp += kM1MIXTURE * it->second; - } - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - return p; - } - - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; - const WordID kNULL; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(3),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(3),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UniphraseLM { - UniphraseLM(const vector >& corpus, - const set& vocab, - const po::variables_map& conf) : - phrases_(1,1), - gen_(1,1), - corpus_(corpus), - uniform_word_(1.0 / vocab.size()), - gen_p0_(0.5), - p_end_(0.5), - use_poisson_(conf.count("poisson_length") > 0) {} - - void ResampleHyperparameters(MT19937* rng) { - phrases_.resample_hyperparameters(rng); - gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.alpha(); - } - - CCRP_NoTable > phrases_; - CCRP_NoTable gen_; - vector > z_; // z_[i] is there a phrase boundary after the ith word - const vector >& corpus_; - const double uniform_word_; - const double gen_p0_; - const double p_end_; // in base length distribution, p of the end of a phrase - const bool use_poisson_; -}; - -struct Reachability { - boost::multi_array edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; - }; - - struct NState { - NState() : next_src_covered(), next_trg_covered() {} - NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} - int next_src_covered; - int next_trg_covered; - }; - - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - assert(a[srclen][trglen].size() > 0); - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); -// typedef boost::multi_array, 2> narray_type; -// narray_type b(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } -// const NState nstate(i,j); - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; -// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; - assert(max_src_delta[0][0] > 0); - //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; - //for (int i = 0; i < b[0][0].size(); ++i) { - // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; - //} - } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { - explicit FSTState(int src_size) : - trg_covered_(), - src_covered_(), - src_coverage_(src_size) {} - - FSTState(short trg_covered, short src_covered, const vector& src_coverage, const vector& src_prefix) : - trg_covered_(trg_covered), - src_covered_(src_covered), - src_coverage_(src_coverage), - src_prefix_(src_prefix) { - if (src_coverage_.size() == src_covered) { - assert(src_prefix.size() == 0); - } - } - - // if we extend by the word at src_position, what are - // the next states that are reachable and lie on a valid - // path to the final state? - vector Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { - assert(src_position < src_coverage_.size()); - if (src_coverage_[src_position]) { - cerr << "Trying to extend " << *this << " with position " << src_position << endl; - abort(); - } - vector ncvg = src_coverage_; - ncvg[src_position] = true; - - vector res; - const int trg_remaining = trg_len - trg_covered_; - if (trg_remaining <= 0) { - cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; - abort(); - } - const int src_remaining = src_len - src_covered_; - if (src_remaining <= 0) { - cerr << "Source appears to have been covered: " << *this << endl; - abort(); - } - - for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { - if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { - int nc = src_prefix_.size() + 1 + src_covered_; - res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector())); - } - } - - if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { - vector nsp = src_prefix_; - nsp.push_back(src_position); - res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); - } - - if (res.size() == 0) { - cerr << *this << " can't be extended!\n"; - abort(); - } - return res; - } - - short trg_covered_, src_covered_; - vector src_coverage_; - vector src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { - if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; - if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; - if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; - return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { - os << "[" << q.trg_covered_ << " : "; - for (int i = 0; i < q.src_coverage_.size(); ++i) - os << q.src_coverage_[i]; - os << " : <"; - for (int i = 0; i < q.src_prefix_.size(); ++i) { - if (i != 0) os << ' '; - os << q.src_prefix_[i]; - } - return os << ">]"; -} - -struct MyModel { - MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} - typedef unordered_map, CCRP_NoTable, boost::hash > > SrcToRuleCRPMap; - - void DecrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - it->second.decrement(rule); - if (it->second.num_customers() == 0) rules.erase(it); - } - - void IncrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) { - CCRP_NoTable crp(1,1); - it = rules.insert(make_pair(rule.f_, crp)).first; - } - it->second.increment(rule); - } - - // conditioned on rule.f_ - prob_t RuleConditionalProbability(const TRule& rule) const { - const prob_t base = rp0(rule); - SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) { - return base; - } else { - const double lp = it->second.logprob(rule, log(base)); - prob_t q; q.logeq(lp); - return q; - } - } - - const ConditionalBase& rp0; - SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { - MyFST(const vector& ssrc, const vector& strg, MyModel* m) : - src(ssrc), trg(strg), - r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), - model(m) { - FSTState in(src.size()); - cerr << " INIT: " << in << endl; - init = GetNode(in); - for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; - in.src_covered_ = src.size(); - in.trg_covered_ = trg.size(); - cerr << "FINAL: " << in << endl; - final = GetNode(in); - } - virtual const WFSTNode* Final() const; - virtual const WFSTNode* Initial() const; - - const WFSTNode* GetNode(const FSTState& q); - map > m; - const vector& src; - const vector& trg; - Reachability r; - const WFSTNode* init; - const WFSTNode* final; - MyModel* model; -}; - -struct MyNode : public WFSTNode { - MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} - virtual vector > ExtendInput(unsigned srcindex) const; - const FSTState state; - mutable MyFST* container; -}; - -vector > MyNode::ExtendInput(unsigned srcindex) const { - cerr << "EXTEND " << state << " with " << srcindex << endl; - vector ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); - vector > res(ext.size()); - for (unsigned i = 0; i < ext.size(); ++i) { - res[i].first = container->GetNode(ext[i]); - if (ext[i].src_prefix_.size() == 0) { - const unsigned trg_from = state.trg_covered_; - const unsigned trg_to = ext[i].trg_covered_; - const unsigned prev_prfx_size = state.src_prefix_.size(); - res[i].second.reset(new TRule); - res[i].second->lhs_ = -TD::Convert("X"); - vector& src = res[i].second->f_; - vector& trg = res[i].second->e_; - src.resize(prev_prfx_size + 1); - for (unsigned j = 0; j < prev_prfx_size; ++j) - src[j] = container->src[state.src_prefix_[j]]; - src[prev_prfx_size] = container->src[srcindex]; - for (unsigned j = trg_from; j < trg_to; ++j) - trg.push_back(container->trg[j]); - res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); - } - } - return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { - boost::shared_ptr& res = m[q]; - if (!res) { - res.reset(new MyNode(q, this)); - } - return &*res; -} - -const WFSTNode* MyFST::Final() const { - return final; -} - -const WFSTNode* MyFST::Initial() const { - return init; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - ConditionalBase lp0(conf["model1_interpolation_weight"].as(), - vocabe.size(), - conf["model1"].as()); - MyModel m(lp0); - - TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); - m.IncrementRule(x); - TRule y("[X] ||| nY dyN ||| gave ||| 0"); - m.IncrementRule(y); - - - MyFST fst(corpusf[0], corpuse[0], &m); - ifstream in("./kimura.g"); - assert(in); - CFG_WFSTComposer comp(fst); - Hypergraph hg; - bool succeed = comp.Compose(&in, &hg); - hg.PrintGraphviz(); - if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 - ifstream in2("./amnabooks.g"); - assert(in2); - MyFST fst2(corpusf[1], corpuse[1], &m); - CFG_WFSTComposer comp2(fst2); - Hypergraph hg2; - bool succeed2 = comp2.Compose(&in2, &hg2); - if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - - SparseVector w; w.set_value(FD::Convert("Proposal"), 1.0); - hg.Reweight(w); - cerr << ViterbiFTree(hg) << endl; - return 0; -} - diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc deleted file mode 100644 index 97f1ba34..00000000 --- a/gi/pf/cbgi.cc +++ /dev/null @@ -1,330 +0,0 @@ -#include -#include -#include - -#include -#include - -#include "sampler.h" -#include "filelib.h" -#include "hg_io.h" -#include "hg.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "inside_outside.h" - -using namespace std; -using namespace std::tr1; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -double log_decay(unsigned x, const double& b) { - assert(b > 1.0); - assert(x > 0); - return log(b - 1) - x * log(b); -} - -struct SimpleBase { - SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : - uniform_e(-log(esize)), - uniform_f(-log(fsize)), - uniform_nt(-log(ntsize)) { - } - - // binomial coefficient - static double choose(unsigned n, unsigned k) { - return exp(lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1)); - } - - // count the number of patterns of terminals and NTs in the rule, given elen and flen - static double log_number_of_patterns(const unsigned flen, const unsigned elen) { - static vector > counts; - if (elen >= counts.size()) counts.resize(elen + 1); - if (flen >= counts[elen].size()) counts[elen].resize(flen + 1); - double& count = counts[elen][flen]; - if (count) return log(count); - const unsigned max_arity = min(elen, flen); - for (unsigned a = 0; a <= max_arity; ++a) - count += choose(elen, a) * choose(flen, a); - return log(count); - } - - // return logp0 of rule | LHS - double operator()(const TRule& rule) const { - const unsigned flen = rule.f_.size(); - const unsigned elen = rule.e_.size(); -#if 0 - double p = 0; - p += log_poisson(flen, 0.5); // flen ~Pois(0.5) - p += log_poisson(elen, flen); // elen | flen ~Pois(flen) - p -= log_number_of_patterns(flen, elen); // pattern | flen,elen ~Uniform - for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS - if (rule.f_[i] <= 0) // according to pattern - p += uniform_nt; // draw NT ~Uniform - else - p += uniform_f; // draw f terminal ~Uniform - } - p -= lgamma(rule.Arity() + 1); // draw permutation ~Uniform - for (unsigned i = 0; i < elen; ++i) { // for each position in e-RHS - if (rule.e_[i] > 0) // according to pattern - p += uniform_e; // draw e|f term ~Uniform - // TODO this should prob be model 1 - } -#else - double p = 0; - bool is_abstract = rule.f_[0] <= 0; - p += log(0.5); - if (is_abstract) { - if (flen == 2) p += log(0.99); else p += log(0.01); - } else { - p += log_decay(flen, 3); - } - - for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS - if (rule.f_[i] <= 0) // according to pattern - p += uniform_nt; // draw NT ~Uniform - else - p += uniform_f; // draw f terminal ~Uniform - } -#endif - return p; - } - const double uniform_e; - const double uniform_f; - const double uniform_nt; - vector arities; -}; - -MT19937* rng = NULL; - -template -struct MHSamplerEdgeProb { - MHSamplerEdgeProb(const Hypergraph& hg, - const map >& rdp, - const Base& logp0, - const bool exclude_multiword_terminals) : edge_probs(hg.edges_.size()) { - for (int i = 0; i < edge_probs.size(); ++i) { - const TRule& rule = *hg.edges_[i].rule_; - const map >::const_iterator it = rdp.find(rule.lhs_); - assert(it != rdp.end()); - const CCRP_NoTable& crp = it->second; - edge_probs[i].logeq(crp.logprob(rule, logp0(rule))); - if (exclude_multiword_terminals && rule.f_[0] > 0 && rule.f_.size() > 1) - edge_probs[i] = prob_t::Zero(); - } - } - inline prob_t operator()(const Hypergraph::Edge& e) const { - return edge_probs[e.id_]; - } - prob_t DerivationProb(const vector& d) const { - prob_t p = prob_t::One(); - for (unsigned i = 0; i < d.size(); ++i) - p *= edge_probs[d[i]]; - return p; - } - vector edge_probs; -}; - -template -struct ModelAndData { - ModelAndData() : - base_lh(prob_t::One()), - logp0(10000, 10000), - mh_samples(), - mh_rejects() {} - - void SampleCorpus(const string& hgpath, int i); - void ResampleHyperparameters() { - for (map >::iterator it = rules.begin(); it != rules.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - CCRP_NoTable& RuleCRP(int lhs) { - map >::iterator it = rules.find(lhs); - if (it == rules.end()) { - rules.insert(make_pair(lhs, CCRP_NoTable(1,1))); - it = rules.find(lhs); - } - return it->second; - } - - void IncrementRule(const TRule& rule) { - CCRP_NoTable& crp = RuleCRP(rule.lhs_); - if (crp.increment(rule)) { - prob_t p; p.logeq(logp0(rule)); - base_lh *= p; - } - } - - void DecrementRule(const TRule& rule) { - CCRP_NoTable& crp = RuleCRP(rule.lhs_); - if (crp.decrement(rule)) { - prob_t p; p.logeq(logp0(rule)); - base_lh /= p; - } - } - - void DecrementDerivation(const Hypergraph& hg, const vector& d) { - for (unsigned i = 0; i < d.size(); ++i) { - const TRule& rule = *hg.edges_[d[i]].rule_; - DecrementRule(rule); - } - } - - void IncrementDerivation(const Hypergraph& hg, const vector& d) { - for (unsigned i = 0; i < d.size(); ++i) { - const TRule& rule = *hg.edges_[d[i]].rule_; - IncrementRule(rule); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (map >::const_iterator it = rules.begin(); it != rules.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - p *= base_lh; - return p; - } - - void ResampleDerivation(const Hypergraph& hg, vector* sampled_derivation); - - map > rules; // [lhs] -> distribution over RHSs - prob_t base_lh; - SimpleBase logp0; - vector > samples; // sampled derivations - unsigned int mh_samples; - unsigned int mh_rejects; -}; - -template -void ModelAndData::SampleCorpus(const string& hgpath, int n) { - vector hgs(n); hgs.clear(); - boost::unordered_map acc; - map tot; - for (int i = 0; i < n; ++i) { - ostringstream os; - os << hgpath << '/' << i << ".json.gz"; - if (!FileExists(os.str())) continue; - hgs.push_back(Hypergraph()); - ReadFile rf(os.str()); - HypergraphIO::ReadFromJSON(rf.stream(), &hgs.back()); - } - cerr << "Read " << hgs.size() << " alignment hypergraphs.\n"; - samples.resize(hgs.size()); - const unsigned SAMPLES = 2000; - const unsigned burnin = 3 * SAMPLES / 4; - const unsigned every = 20; - for (unsigned s = 0; s < SAMPLES; ++s) { - if (s % 10 == 0) { - if (s > 0) { cerr << endl; ResampleHyperparameters(); } - cerr << "[" << s << " LLH=" << log(Likelihood()) << " REJECTS=" << ((double)mh_rejects / mh_samples) << " LHS's=" << rules.size() << " base=" << log(base_lh) << "] "; - } - cerr << '.'; - for (unsigned i = 0; i < hgs.size(); ++i) { - ResampleDerivation(hgs[i], &samples[i]); - if (s > burnin && s % every == 0) { - for (unsigned j = 0; j < samples[i].size(); ++j) { - const TRule& rule = *hgs[i].edges_[samples[i][j]].rule_; - ++acc[rule]; - ++tot[rule.lhs_]; - } - } - } - } - cerr << endl; - for (boost::unordered_map::iterator it = acc.begin(); it != acc.end(); ++it) { - cout << it->first << " MyProb=" << log(it->second)-log(tot[it->first.lhs_]) << endl; - } -} - -template -void ModelAndData::ResampleDerivation(const Hypergraph& hg, vector* sampled_deriv) { - vector cur; - cur.swap(*sampled_deriv); - - const prob_t p_cur = Likelihood(); - DecrementDerivation(hg, cur); - if (cur.empty()) { - // first iteration, create restaurants - for (int i = 0; i < hg.edges_.size(); ++i) - RuleCRP(hg.edges_[i].rule_->lhs_); - } - MHSamplerEdgeProb wf(hg, rules, logp0, cur.empty()); -// MHSamplerEdgeProb wf(hg, rules, logp0, false); - const prob_t q_cur = wf.DerivationProb(cur); - vector node_probs; - Inside >(hg, &node_probs, wf); - queue q; - q.push(hg.nodes_.size() - 3); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = wf.edge_probs[edge.id_]; // edge proposal prob - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } - IncrementDerivation(hg, *sampled_deriv); - -// cerr << "sampled derivation contains " << sampled_deriv->size() << " edges\n"; -// cerr << "DERIV:\n"; -// for (int i = 0; i < sampled_deriv->size(); ++i) { -// cerr << " " << hg.edges_[(*sampled_deriv)[i]].rule_->AsString() << endl; -// } - - if (cur.empty()) return; // accept first sample - - ++mh_samples; - // only need to do MH if proposal is different to current state - if (cur != *sampled_deriv) { - const prob_t q_prop = wf.DerivationProb(*sampled_deriv); - const prob_t p_prop = Likelihood(); - if (!rng->AcceptMetropolisHastings(p_prop, p_cur, q_prop, q_cur)) { - ++mh_rejects; - DecrementDerivation(hg, *sampled_deriv); - IncrementDerivation(hg, cur); - swap(cur, *sampled_deriv); - } - } -} - -int main(int argc, char** argv) { - rng = new MT19937; - ModelAndData m; - m.SampleCorpus("./hgs", 50); - // m.SampleCorpus("./btec/hgs", 5000); - return 0; -} - diff --git a/gi/pf/cfg_wfst_composer.cc b/gi/pf/cfg_wfst_composer.cc deleted file mode 100644 index 21d5ec5b..00000000 --- a/gi/pf/cfg_wfst_composer.cc +++ /dev/null @@ -1,731 +0,0 @@ -#include "cfg_wfst_composer.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include "fast_lexical_cast.hpp" - -#include "phrasetable_fst.h" -#include "sparse_vector.h" -#include "tdict.h" -#include "hg.h" -#include "hg_remove_eps.h" - -namespace po = boost::program_options; -using namespace std; -using namespace std::tr1; - -WFSTNode::~WFSTNode() {} -WFST::~WFST() {} - -// Define the following macro if you want to see lots of debugging output -// when you run the chart parser -#undef DEBUG_CHART_PARSER - -// A few constants used by the chart parser /////////////// -static const int kMAX_NODES = 2000000; -static const string kPHRASE_STRING = "X"; -static bool constants_need_init = true; -static WordID kUNIQUE_START; -static WordID kPHRASE; -static TRulePtr kX1X2; -static TRulePtr kX1; -static WordID kEPS; -static TRulePtr kEPSRule; - -static void InitializeConstants() { - if (constants_need_init) { - kPHRASE = TD::Convert(kPHRASE_STRING) * -1; - kUNIQUE_START = TD::Convert("S") * -1; - kX1X2.reset(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]")); - kX1.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); - kEPSRule.reset(new TRule("[X] ||| ||| ")); - kEPS = TD::Convert(""); - constants_need_init = false; - } -} -//////////////////////////////////////////////////////////// - -class EGrammarNode { - friend bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest); - friend void AddGrammarRule(const string& r, map* g); - public: -#ifdef DEBUG_CHART_PARSER - string hint; -#endif - EGrammarNode() : is_some_rule_complete(false), is_root(false) {} - const map& GetTerminals() const { return tptr; } - const map& GetNonTerminals() const { return ntptr; } - bool HasNonTerminals() const { return (!ntptr.empty()); } - bool HasTerminals() const { return (!tptr.empty()); } - bool RuleCompletes() const { - return (is_some_rule_complete || (ntptr.empty() && tptr.empty())); - } - bool GrammarContinues() const { - return !(ntptr.empty() && tptr.empty()); - } - bool IsRoot() const { - return is_root; - } - // these are the features associated with the rule from the start - // node up to this point. If you use these features, you must - // not Extend() this rule. - const SparseVector& GetCFGProductionFeatures() const { - return input_features; - } - - const EGrammarNode* Extend(const WordID& t) const { - if (t < 0) { - map::const_iterator it = ntptr.find(t); - if (it == ntptr.end()) return NULL; - return &it->second; - } else { - map::const_iterator it = tptr.find(t); - if (it == tptr.end()) return NULL; - return &it->second; - } - } - - private: - map tptr; - map ntptr; - SparseVector input_features; - bool is_some_rule_complete; - bool is_root; -}; -typedef map EGrammar; // indexed by the rule LHS - -// edges are immutable once created -struct Edge { -#ifdef DEBUG_CHART_PARSER - static int id_count; - const int id; -#endif - const WordID cat; // lhs side of rule proved/being proved - const EGrammarNode* const dot; // dot position - const WFSTNode* const q; // start of span - const WFSTNode* const r; // end of span - const Edge* const active_parent; // back pointer, NULL for PREDICT items - const Edge* const passive_parent; // back pointer, NULL for SCAN and PREDICT items - TRulePtr tps; // translations - boost::shared_ptr > features; // features from CFG rule - - bool IsPassive() const { - // when a rule is completed, this value will be set - return static_cast(features); - } - bool IsActive() const { return !IsPassive(); } - bool IsInitial() const { - return !(active_parent || passive_parent); - } - bool IsCreatedByScan() const { - return active_parent && !passive_parent && !dot->IsRoot(); - } - bool IsCreatedByPredict() const { - return dot->IsRoot(); - } - bool IsCreatedByComplete() const { - return active_parent && passive_parent; - } - - // constructor for PREDICT - Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(NULL), passive_parent(NULL), tps() {} - Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r, const Edge* act_parent) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(act_parent), passive_parent(NULL), tps() {} - - // constructors for SCAN - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const TRulePtr& translations) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations) {} - - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const TRulePtr& translations, - const SparseVector& feats) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations), - features(new SparseVector(feats)) {} - - // constructors for COMPLETE - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const Edge *pas_par) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps() { - assert(pas_par->IsPassive()); - assert(act_par->IsActive()); - } - - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const Edge *pas_par, const SparseVector& feats) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps(), - features(new SparseVector(feats)) { - assert(pas_par->IsPassive()); - assert(act_par->IsActive()); - } - - // constructor for COMPLETE query - Edge(const WFSTNode* _r) : -#ifdef DEBUG_CHART_PARSER - id(0), -#endif - cat(0), dot(NULL), q(NULL), - r(_r), active_parent(NULL), passive_parent(NULL), tps() {} - // constructor for MERGE quere - Edge(const WFSTNode* _q, int) : -#ifdef DEBUG_CHART_PARSER - id(0), -#endif - cat(0), dot(NULL), q(_q), - r(NULL), active_parent(NULL), passive_parent(NULL), tps() {} -}; -#ifdef DEBUG_CHART_PARSER -int Edge::id_count = 0; -#endif - -ostream& operator<<(ostream& os, const Edge& e) { - string type = "PREDICT"; - if (e.IsCreatedByScan()) - type = "SCAN"; - else if (e.IsCreatedByComplete()) - type = "COMPLETE"; - os << "[" -#ifdef DEBUG_CHART_PARSER - << '(' << e.id << ") " -#else - << '(' << &e << ") " -#endif - << "q=" << e.q << ", r=" << e.r - << ", cat="<< TD::Convert(e.cat*-1) << ", dot=" - << e.dot -#ifdef DEBUG_CHART_PARSER - << e.dot->hint -#endif - << (e.IsActive() ? ", Active" : ", Passive") - << ", " << type; -#ifdef DEBUG_CHART_PARSER - if (e.active_parent) { os << ", act.parent=(" << e.active_parent->id << ')'; } - if (e.passive_parent) { os << ", psv.parent=(" << e.passive_parent->id << ')'; } -#endif - if (e.tps) { os << ", tps=" << e.tps->AsString(); } - return os << ']'; -} - -struct Traversal { - const Edge* const edge; // result from the active / passive combination - const Edge* const active; - const Edge* const passive; - Traversal(const Edge* me, const Edge* a, const Edge* p) : edge(me), active(a), passive(p) {} -}; - -struct UniqueTraversalHash { - size_t operator()(const Traversal* t) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(t->active); - x = ((x << 5) + x) ^ reinterpret_cast(t->passive); - x = ((x << 5) + x) ^ t->edge->IsActive(); - return x; - } -}; - -struct UniqueTraversalEquals { - size_t operator()(const Traversal* a, const Traversal* b) const { - return (a->passive == b->passive && a->active == b->active && a->edge->IsActive() == b->edge->IsActive()); - } -}; - -struct UniqueEdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - if (e->IsActive()) { - x = ((x << 5) + x) ^ reinterpret_cast(e->dot); - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - x = ((x << 5) + x) ^ static_cast(e->cat); - x += 13; - } else { // with passive edges, we don't care about the dot - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - x = ((x << 5) + x) ^ static_cast(e->cat); - } - return x; - } -}; - -struct UniqueEdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - if (a->IsActive() != b->IsActive()) return false; - if (a->IsActive()) { - return (a->cat == b->cat) && (a->dot == b->dot) && (a->q == b->q) && (a->r == b->r); - } else { - return (a->cat == b->cat) && (a->q == b->q) && (a->r == b->r); - } - } -}; - -struct REdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - return x; - } -}; - -struct REdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - return (a->r == b->r); - } -}; - -struct QEdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - return x; - } -}; - -struct QEdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - return (a->q == b->q); - } -}; - -struct EdgeQueue { - queue q; - EdgeQueue() {} - void clear() { while(!q.empty()) q.pop(); } - bool HasWork() const { return !q.empty(); } - const Edge* Next() { const Edge* res = q.front(); q.pop(); return res; } - void AddEdge(const Edge* s) { q.push(s); } -}; - -class CFG_WFSTComposerImpl { - public: - CFG_WFSTComposerImpl(WordID start_cat, - const WFSTNode* q_0, - const WFSTNode* q_final) : start_cat_(start_cat), q_0_(q_0), q_final_(q_final) {} - - // returns false if the intersection is empty - bool Compose(const EGrammar& g, Hypergraph* forest) { - goal_node = NULL; - EGrammar::const_iterator sit = g.find(start_cat_); - forest->ReserveNodes(kMAX_NODES); - assert(sit != g.end()); - Edge* init = new Edge(start_cat_, &sit->second, q_0_); - assert(IncorporateNewEdge(init)); - while (exp_agenda.HasWork() || agenda.HasWork()) { - while(exp_agenda.HasWork()) { - const Edge* edge = exp_agenda.Next(); - FinishEdge(edge, forest); - } - if (agenda.HasWork()) { - const Edge* edge = agenda.Next(); -#ifdef DEBUG_CHART_PARSER - cerr << "processing (" << edge->id << ')' << endl; -#endif - if (edge->IsActive()) { - if (edge->dot->HasTerminals()) - DoScan(edge); - if (edge->dot->HasNonTerminals()) { - DoMergeWithPassives(edge); - DoPredict(edge, g); - } - } else { - DoComplete(edge); - } - } - } - if (goal_node) { - forest->PruneUnreachable(goal_node->id_); - RemoveEpsilons(forest, kEPS); - } - FreeAll(); - return goal_node; - } - - void FreeAll() { - for (int i = 0; i < free_list_.size(); ++i) - delete free_list_[i]; - free_list_.clear(); - for (int i = 0; i < traversal_free_list_.size(); ++i) - delete traversal_free_list_[i]; - traversal_free_list_.clear(); - all_traversals.clear(); - exp_agenda.clear(); - agenda.clear(); - tps2node.clear(); - edge2node.clear(); - all_edges.clear(); - passive_edges.clear(); - active_edges.clear(); - } - - ~CFG_WFSTComposerImpl() { - FreeAll(); - } - - // returns the total number of edges created during composition - int EdgesCreated() const { - return free_list_.size(); - } - - private: - void DoScan(const Edge* edge) { - // here, we assume that the FST will potentially have many more outgoing - // edges than the grammar, which will be just a couple. If you want to - // efficiently handle the case where both are relatively large, this code - // will need to change how the intersection is done. The best general - // solution would probably be the Baeza-Yates double binary search. - - const EGrammarNode* dot = edge->dot; - const WFSTNode* r = edge->r; - const map& terms = dot->GetTerminals(); - for (map::const_iterator git = terms.begin(); - git != terms.end(); ++git) { - - if (!(TD::Convert(git->first)[0] >= '0' && TD::Convert(git->first)[0] <= '9')) { - std::cerr << "TERMINAL SYMBOL: " << TD::Convert(git->first) << endl; - abort(); - } - std::vector > extensions = r->ExtendInput(atoi(TD::Convert(git->first).c_str())); - for (unsigned nsi = 0; nsi < extensions.size(); ++nsi) { - const WFSTNode* next_r = extensions[nsi].first; - const EGrammarNode* next_dot = &git->second; - const bool grammar_continues = next_dot->GrammarContinues(); - const bool rule_completes = next_dot->RuleCompletes(); - if (extensions[nsi].second) - cerr << "!!! " << extensions[nsi].second->AsString() << endl; - // cerr << " rule completes: " << rule_completes << " after consuming " << TD::Convert(git->first) << endl; - assert(grammar_continues || rule_completes); - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - if (rule_completes) - IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second, input_features)); - if (grammar_continues) - IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second)); - } - } - } - - void DoPredict(const Edge* edge, const EGrammar& g) { - const EGrammarNode* dot = edge->dot; - const map& non_terms = dot->GetNonTerminals(); - for (map::const_iterator git = non_terms.begin(); - git != non_terms.end(); ++git) { - const WordID nt_to_predict = git->first; - //cerr << edge->id << " -- " << TD::Convert(nt_to_predict*-1) << endl; - EGrammar::const_iterator egi = g.find(nt_to_predict); - if (egi == g.end()) { - cerr << "[ERROR] Can't find any grammar rules with a LHS of type " - << TD::Convert(-1*nt_to_predict) << '!' << endl; - continue; - } - assert(edge->IsActive()); - const EGrammarNode* new_dot = &egi->second; - Edge* new_edge = new Edge(nt_to_predict, new_dot, edge->r, edge); - IncorporateNewEdge(new_edge); - } - } - - void DoComplete(const Edge* passive) { -#ifdef DEBUG_CHART_PARSER - cerr << " complete: " << *passive << endl; -#endif - const WordID completed_nt = passive->cat; - const WFSTNode* q = passive->q; - const WFSTNode* next_r = passive->r; - const Edge query(q); - const pair::iterator, - unordered_multiset::iterator > p = - active_edges.equal_range(&query); - for (unordered_multiset::iterator it = p.first; - it != p.second; ++it) { - const Edge* active = *it; -#ifdef DEBUG_CHART_PARSER - cerr << " pos: " << *active << endl; -#endif - const EGrammarNode* next_dot = active->dot->Extend(completed_nt); - if (!next_dot) continue; - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - // add up to 2 rules - if (next_dot->RuleCompletes()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); - if (next_dot->GrammarContinues()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); - } - } - - void DoMergeWithPassives(const Edge* active) { - // edge is active, has non-terminals, we need to find the passives that can extend it - assert(active->IsActive()); - assert(active->dot->HasNonTerminals()); -#ifdef DEBUG_CHART_PARSER - cerr << " merge active with passives: ACT=" << *active << endl; -#endif - const Edge query(active->r, 1); - const pair::iterator, - unordered_multiset::iterator > p = - passive_edges.equal_range(&query); - for (unordered_multiset::iterator it = p.first; - it != p.second; ++it) { - const Edge* passive = *it; - const EGrammarNode* next_dot = active->dot->Extend(passive->cat); - if (!next_dot) continue; - const WFSTNode* next_r = passive->r; - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - if (next_dot->RuleCompletes()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); - if (next_dot->GrammarContinues()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); - } - } - - // take ownership of edge memory, add to various indexes, etc - // returns true if this edge is new - bool IncorporateNewEdge(Edge* edge) { - free_list_.push_back(edge); - if (edge->passive_parent && edge->active_parent) { - Traversal* t = new Traversal(edge, edge->active_parent, edge->passive_parent); - traversal_free_list_.push_back(t); - if (all_traversals.find(t) != all_traversals.end()) { - return false; - } else { - all_traversals.insert(t); - } - } - exp_agenda.AddEdge(edge); - return true; - } - - bool FinishEdge(const Edge* edge, Hypergraph* hg) { - bool is_new = false; - if (all_edges.find(edge) == all_edges.end()) { -#ifdef DEBUG_CHART_PARSER - cerr << *edge << " is NEW\n"; -#endif - all_edges.insert(edge); - is_new = true; - if (edge->IsPassive()) passive_edges.insert(edge); - if (edge->IsActive()) active_edges.insert(edge); - agenda.AddEdge(edge); - } else { -#ifdef DEBUG_CHART_PARSER - cerr << *edge << " is NOT NEW.\n"; -#endif - } - AddEdgeToTranslationForest(edge, hg); - return is_new; - } - - // build the translation forest - void AddEdgeToTranslationForest(const Edge* edge, Hypergraph* hg) { - assert(hg->nodes_.size() < kMAX_NODES); - Hypergraph::Node* tps = NULL; - // first add any target language rules - if (edge->tps) { - Hypergraph::Node*& node = tps2node[(size_t)edge->tps.get()]; - if (!node) { - // cerr << "Creating phrases for " << edge->tps << endl; - const TRulePtr& rule = edge->tps; - node = hg->AddNode(kPHRASE); - Hypergraph::Edge* hg_edge = hg->AddEdge(rule, Hypergraph::TailNodeVector()); - hg_edge->feature_values_ += rule->GetFeatureValues(); - hg->ConnectEdgeToHeadNode(hg_edge, node); - } - tps = node; - } - Hypergraph::Node*& head_node = edge2node[edge]; - if (!head_node) - head_node = hg->AddNode(kPHRASE); - if (edge->cat == start_cat_ && edge->q == q_0_ && edge->r == q_final_ && edge->IsPassive()) { - assert(goal_node == NULL || goal_node == head_node); - goal_node = head_node; - } - Hypergraph::TailNodeVector tail; - SparseVector extra; - if (edge->IsCreatedByPredict()) { - // extra.set_value(FD::Convert("predict"), 1); - } else if (edge->IsCreatedByScan()) { - tail.push_back(edge2node[edge->active_parent]->id_); - if (tps) { - tail.push_back(tps->id_); - } - //extra.set_value(FD::Convert("scan"), 1); - } else if (edge->IsCreatedByComplete()) { - tail.push_back(edge2node[edge->active_parent]->id_); - tail.push_back(edge2node[edge->passive_parent]->id_); - //extra.set_value(FD::Convert("complete"), 1); - } else { - assert(!"unexpected edge type!"); - } - //cerr << head_node->id_ << "<--" << *edge << endl; - -#ifdef DEBUG_CHART_PARSER - for (int i = 0; i < tail.size(); ++i) - if (tail[i] == head_node->id_) { - cerr << "ERROR: " << *edge << "\n i=" << i << endl; - if (i == 1) { cerr << "\tP: " << *edge->passive_parent << endl; } - if (i == 0) { cerr << "\tA: " << *edge->active_parent << endl; } - assert(!"self-loop found!"); - } -#endif - Hypergraph::Edge* hg_edge = NULL; - if (tail.size() == 0) { - hg_edge = hg->AddEdge(kEPSRule, tail); - } else if (tail.size() == 1) { - hg_edge = hg->AddEdge(kX1, tail); - } else if (tail.size() == 2) { - hg_edge = hg->AddEdge(kX1X2, tail); - } - if (edge->features) - hg_edge->feature_values_ += *edge->features; - hg_edge->feature_values_ += extra; - hg->ConnectEdgeToHeadNode(hg_edge, head_node); - } - - Hypergraph::Node* goal_node; - EdgeQueue exp_agenda; - EdgeQueue agenda; - unordered_map tps2node; - unordered_map edge2node; - unordered_set all_traversals; - unordered_set all_edges; - unordered_multiset passive_edges; - unordered_multiset active_edges; - vector free_list_; - vector traversal_free_list_; - const WordID start_cat_; - const WFSTNode* const q_0_; - const WFSTNode* const q_final_; -}; - -#ifdef DEBUG_CHART_PARSER -static string TrimRule(const string& r) { - size_t start = r.find(" |||") + 5; - size_t end = r.rfind(" |||"); - return r.substr(start, end - start); -} -#endif - -void AddGrammarRule(const string& r, EGrammar* g) { - const size_t pos = r.find(" ||| "); - if (pos == string::npos || r[0] != '[') { - cerr << "Bad rule: " << r << endl; - return; - } - const size_t rpos = r.rfind(" ||| "); - string feats; - string rs = r; - if (rpos != pos) { - feats = r.substr(rpos + 5); - rs = r.substr(0, rpos); - } - string rhs = rs.substr(pos + 5); - string trule = rs + " ||| " + rhs + " ||| " + feats; - TRule tr(trule); - cerr << "X: " << tr.e_[0] << endl; -#ifdef DEBUG_CHART_PARSER - string hint_last_rule; -#endif - EGrammarNode* cur = &(*g)[tr.GetLHS()]; - cur->is_root = true; - for (int i = 0; i < tr.FLength(); ++i) { - WordID sym = tr.f()[i]; -#ifdef DEBUG_CHART_PARSER - hint_last_rule = TD::Convert(sym < 0 ? -sym : sym); - cur->hint += " <@@> (*" + hint_last_rule + ") " + TrimRule(tr.AsString()); -#endif - if (sym < 0) - cur = &cur->ntptr[sym]; - else - cur = &cur->tptr[sym]; - } -#ifdef DEBUG_CHART_PARSER - cur->hint += " <@@> (" + hint_last_rule + "*) " + TrimRule(tr.AsString()); -#endif - cur->is_some_rule_complete = true; - cur->input_features = tr.GetFeatureValues(); -} - -CFG_WFSTComposer::~CFG_WFSTComposer() { - delete pimpl_; -} - -CFG_WFSTComposer::CFG_WFSTComposer(const WFST& wfst) { - InitializeConstants(); - pimpl_ = new CFG_WFSTComposerImpl(kUNIQUE_START, wfst.Initial(), wfst.Final()); -} - -bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest) { - // first, convert the src forest into an EGrammar - EGrammar g; - const int nedges = src_forest.edges_.size(); - const int nnodes = src_forest.nodes_.size(); - vector cats(nnodes); - bool assign_cats = false; - for (int i = 0; i < nnodes; ++i) - if (assign_cats) { - cats[i] = TD::Convert("CAT_" + boost::lexical_cast(i)) * -1; - } else { - cats[i] = src_forest.nodes_[i].cat_; - } - // construct the grammar - for (int i = 0; i < nedges; ++i) { - const Hypergraph::Edge& edge = src_forest.edges_[i]; - const vector& src = edge.rule_->f(); - EGrammarNode* cur = &g[cats[edge.head_node_]]; - cur->is_root = true; - int ntc = 0; - for (int j = 0; j < src.size(); ++j) { - WordID sym = src[j]; - if (sym <= 0) { - sym = cats[edge.tail_nodes_[ntc]]; - ++ntc; - cur = &cur->ntptr[sym]; - } else { - cur = &cur->tptr[sym]; - } - } - cur->is_some_rule_complete = true; - cur->input_features = edge.feature_values_; - } - EGrammarNode& goal_rule = g[kUNIQUE_START]; - assert((goal_rule.ntptr.size() == 1 && goal_rule.tptr.size() == 0) || - (goal_rule.ntptr.size() == 0 && goal_rule.tptr.size() == 1)); - - return pimpl_->Compose(g, trg_forest); -} - -bool CFG_WFSTComposer::Compose(istream* in, Hypergraph* trg_forest) { - EGrammar g; - while(*in) { - string line; - getline(*in, line); - if (line.empty()) continue; - AddGrammarRule(line, &g); - } - - return pimpl_->Compose(g, trg_forest); -} diff --git a/gi/pf/cfg_wfst_composer.h b/gi/pf/cfg_wfst_composer.h deleted file mode 100644 index cf47f459..00000000 --- a/gi/pf/cfg_wfst_composer.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef _CFG_WFST_COMPOSER_H_ -#define _CFG_WFST_COMPOSER_H_ - -#include -#include -#include - -#include "trule.h" -#include "wordid.h" - -class CFG_WFSTComposerImpl; -class Hypergraph; - -struct WFSTNode { - virtual ~WFSTNode(); - // returns the next states reachable by consuming srcindex (which identifies a word) - // paired with the output string generated by taking that transition. - virtual std::vector > ExtendInput(unsigned srcindex) const = 0; -}; - -struct WFST { - virtual ~WFST(); - virtual const WFSTNode* Final() const = 0; - virtual const WFSTNode* Initial() const = 0; -}; - -class CFG_WFSTComposer { - public: - ~CFG_WFSTComposer(); - explicit CFG_WFSTComposer(const WFST& wfst); - bool Compose(const Hypergraph& in_forest, Hypergraph* trg_forest); - - // reads the grammar from a file. There must be a single top-level - // S -> X rule. Anything else is possible. Format is: - // [S] ||| [SS,1] - // [SS] ||| [NP,1] [VP,2] ||| Feature1=0.2 Feature2=-2.3 - // [SS] ||| [VP,1] [NP,2] ||| Feature1=0.8 - // [NP] ||| [DET,1] [N,2] ||| Feature3=2 - // ... - bool Compose(std::istream* grammar_file, Hypergraph* trg_forest); - - private: - CFG_WFSTComposerImpl* pimpl_; -}; - -#endif diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h deleted file mode 100644 index 81ddb206..00000000 --- a/gi/pf/conditional_pseg.h +++ /dev/null @@ -1,275 +0,0 @@ -#ifndef _CONDITIONAL_PSEG_H_ -#define _CONDITIONAL_PSEG_H_ - -#include -#include -#include -#include - -#include "m.h" -#include "prob.h" -#include "ccrp_nt.h" -#include "mfcr.h" -#include "trule.h" -#include "base_distributions.h" -#include "tdict.h" - -template -struct MConditionalTranslationModel { - explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {} - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; - for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl; - } - } - - double log_likelihood(const double& dd, const double& aa) const { - if (aa <= -dd) return -std::numeric_limits::infinity(); - //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); - double llh = Md::log_beta_density(dd, 1, 1) + - Md::log_gamma_density(dd + aa, 1, 1); - typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::const_iterator it; - for (it = r.begin(); it != r.end(); ++it) - llh += it->second.log_crp_prob(dd, aa); - return llh; - } - - struct DiscountResampler { - DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {} - const MConditionalTranslationModel& m_; - double operator()(const double& proposed_discount) const { - return m_.log_likelihood(proposed_discount, m_.strength); - } - }; - - struct AlphaResampler { - AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {} - const MConditionalTranslationModel& m_; - double operator()(const double& proposed_strength) const { - return m_.log_likelihood(m_.d, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng) { - typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::iterator it; -#if 1 - for (it = r.begin(); it != r.end(); ++it) { - it->second.resample_hyperparameters(rng); - } -#else - const unsigned nloop = 5; - const unsigned niterations = 10; - DiscountResampler dr(*this); - AlphaResampler ar(*this); - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - d = slice_sampler1d(dr, d, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - strength = slice_sampler1d(ar, strength, *rng, -d, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl; - for (it = r.begin(); it != r.end(); ++it) { - it->second.set_discount(d); - it->second.set_strength(strength); - } -#endif - } - - int DecrementRule(const TRule& rule, MT19937* rng) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - const TableCount delta = it->second.decrement(rule, rng); - if (delta.count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return delta.count; - } - - int IncrementRule(const TRule& rule, MT19937* rng) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - //it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first; - it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1,1,1,1,0.6, -0.12))).first; - } - p0s[0] = rp0(rule); - TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); - return delta.count; - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p = rp0(rule); - } else { - p0s[0] = rp0(rule); - p = it->second.prob(rule, p0s.begin(), lambdas.begin()); - } - return p; - } - - prob_t Likelihood() const { - prob_t p; p.logeq(log_likelihood(d, strength)); - return p; - } - - const ConditionalBaseMeasure& rp0; - typedef std::tr1::unordered_map, - MFCR<1, TRule>, - boost::hash > > RuleModelHash; - RuleModelHash r; - double d, strength; - std::vector lambdas; - mutable std::vector p0s; -}; - -template -struct ConditionalTranslationModel { - explicit ConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0) {} - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second << '\t' << i2->first << std::endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - int DecrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - int count = it->second.decrement(rule); - if (count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return count; - } - - int IncrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - it = r.insert(make_pair(rule.f_, CCRP_NoTable(1.0, 1.0, 8.0))).first; - } - int count = it->second.increment(rule); - return count; - } - - void IncrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p.logeq(log(rp0(rule))); - } else { - p.logeq(it->second.logprob(rule, log(rp0(rule)))); - } - return p; - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - p *= rp0(i2->first); - } - return p; - } - - const ConditionalBaseMeasure& rp0; - typedef std::tr1::unordered_map, - CCRP_NoTable, - boost::hash > > RuleModelHash; - RuleModelHash r; -}; - -template -struct ConditionalParallelSegementationModel { - explicit ConditionalParallelSegementationModel(ConditionalBaseMeasure& rcp0) : - tmodel(rcp0), base(prob_t::One()), aligns(1,1) {} - - ConditionalTranslationModel tmodel; - - void DecrementRule(const TRule& rule) { - tmodel.DecrementRule(rule); - } - - void IncrementRule(const TRule& rule) { - tmodel.IncrementRule(rule); - } - - void IncrementRulesAndAlignments(const std::vector& rules) { - tmodel.IncrementRules(rules); - for (int i = 0; i < rules.size(); ++i) { - IncrementAlign(rules[i]->f_.size()); - } - } - - void DecrementRulesAndAlignments(const std::vector& rules) { - tmodel.DecrementRules(rules); - for (int i = 0; i < rules.size(); ++i) { - DecrementAlign(rules[i]->f_.size()); - } - } - - prob_t RuleProbability(const TRule& rule) const { - return tmodel.RuleProbability(rule); - } - - void IncrementAlign(unsigned span) { - if (aligns.increment(span)) { - // TODO - } - } - - void DecrementAlign(unsigned span) { - if (aligns.decrement(span)) { - // TODO - } - } - - prob_t AlignProbability(unsigned span) const { - prob_t p; - p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0))); - return p; - } - - prob_t Likelihood() const { - prob_t p; p.logeq(aligns.log_crp_prob()); - p *= base; - p *= tmodel.Likelihood(); - return p; - } - - prob_t base; - CCRP_NoTable aligns; -}; - -#endif - diff --git a/gi/pf/condnaive.cc b/gi/pf/condnaive.cc deleted file mode 100644 index 419731ac..00000000 --- a/gi/pf/condnaive.cc +++ /dev/null @@ -1,298 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(4),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(4),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -struct ModelAndData { - explicit ModelAndData(ConditionalParallelSegementationModel& m, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : - model(m), - rng(&*prng), - corpuse(ce), - corpusf(cf), - vocabe(ve), - vocabf(vf), - mh_samples(), - mh_rejects(), - kX(-TD::Convert("X")), - derivations(corpuse.size()) {} - - void ResampleHyperparameters() { - } - - void InstantiateRule(const pair& from, - const pair& to, - const vector& sentf, - const vector& sente, - TRule* rule) const { - rule->f_.clear(); - rule->e_.clear(); - rule->lhs_ = kX; - for (short i = from.first; i < to.first; ++i) - rule->f_.push_back(sentf[i]); - for (short i = from.second; i < to.second; ++i) - rule->e_.push_back(sente[i]); - } - - void DecrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.DecrementRule(x); - model.DecrementAlign(x.f_.size()); - } - } - - void PrintDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - cerr << i << '/' << (d.size() - 1) << ": " << x << endl; - } - } - - void IncrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.IncrementRule(x); - model.IncrementAlign(x.f_.size()); - } - } - - prob_t Likelihood() const { - return model.Likelihood(); - } - - prob_t DerivationProposalProbability(const vector >& d, const vector& sentf, const vector& sente) const { - prob_t p = prob_t::One(); - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - p *= model.RuleProbability(x); - p *= model.AlignProbability(x.f_.size()); - } - return p; - } - - void Sample(); - - ConditionalParallelSegementationModel& model; - MT19937* rng; - const vector >& corpuse, corpusf; - const set& vocabe, vocabf; - unsigned mh_samples, mh_rejects; - const int kX; - vector > > derivations; -}; - -void ModelAndData::Sample() { - unsigned MAXK = kMAX_SRC_PHRASE; - unsigned MAXL = kMAX_TRG_PHRASE; - TRule x; - x.lhs_ = -TD::Convert("X"); - - for (int samples = 0; samples < 1000; ++samples) { - if (samples % 1 == 0 && samples > 0) { - //ResampleHyperparameters(); - cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; - for (int i = 0; i < 10; ++i) { - cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; - PrintDerivation(derivations[i], corpusf[i], corpuse[i]); - } - static TRule xx("[X] ||| w n ||| s h ||| X=0"); - const CCRP_NoTable& dcrp = model.tmodel.r.find(xx.f_)->second; - for (CCRP_NoTable::const_iterator it = dcrp.begin(); it != dcrp.end(); ++it) { - cerr << "\t" << it->second << "\t" << it->first << endl; - } - } - cerr << '.' << flush; - for (int s = 0; s < corpuse.size(); ++s) { - const vector& sentf = corpusf[s]; - const vector& sente = corpuse[s]; -// cerr << " CUSTOMERS: " << rules.num_customers() << endl; -// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - - vector >& deriv = derivations[s]; - const prob_t p_cur = Likelihood(); - DecrementDerivation(deriv, sentf, sente); - - boost::multi_array a(boost::extents[sentf.size() + 1][sente.size() + 1]); - boost::multi_array trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); - a[0][0] = prob_t::One(); - for (int i = 0; i < sentf.size(); ++i) { - for (int j = 0; j < sente.size(); ++j) { - const prob_t src_a = a[i][j]; - x.f_.clear(); - for (int k = 1; k <= MAXK; ++k) { - if (i + k > sentf.size()) break; - x.f_.push_back(sentf[i + k - 1]); - x.e_.clear(); - const prob_t p_span = model.AlignProbability(k); // prob of consuming this much source - for (int l = 1; l <= MAXL; ++l) { - if (j + l > sente.size()) break; - x.e_.push_back(sente[j + l - 1]); - trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * p_span; - a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; - } - } - } - } -// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; - const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - - vector > newderiv; - int cur_i = sentf.size(); - int cur_j = sente.size(); - while(cur_i > 0 && cur_j > 0) { - newderiv.push_back(pair(cur_i, cur_j)); -// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; - SampleSet ss; - vector > nexts; - for (int k = 1; k <= MAXK; ++k) { - const int hyp_i = cur_i - k; - if (hyp_i < 0) break; - for (int l = 1; l <= MAXL; ++l) { - const int hyp_j = cur_j - l; - if (hyp_j < 0) break; - const prob_t& inside = a[hyp_i][hyp_j]; - if (inside == prob_t::Zero()) continue; - const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; - if (transp == prob_t::Zero()) continue; - const prob_t p = inside * transp; - ss.add(p); - nexts.push_back(pair(hyp_i, hyp_j)); -// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl; - } - } -// cerr << " sample set has " << nexts.size() << " elements.\n"; - const int selected = rng->SelectSample(ss); - cur_i = nexts[selected].first; - cur_j = nexts[selected].second; - } - newderiv.push_back(pair(0,0)); - const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); - IncrementDerivation(newderiv, sentf, sente); -// cerr << "SANITY: " << q_new << " " <(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - Model1 m1(conf["model1"].as()); - - PhraseConditionalBase pcb0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - ConditionalParallelSegementationModel x(pcb0); - - ModelAndData posterior(x, corpuse, corpusf, vocabe, vocabf); - posterior.Sample(); - - TRule r1("[X] ||| x ||| l e ||| X=0"); - TRule r2("[X] ||| A ||| a d ||| X=0"); - TRule r3("[X] ||| n ||| e r ||| X=0"); - TRule r4("[X] ||| x A n ||| b l a g ||| X=0"); - - PhraseConditionalUninformativeBase u0(vocabe.size()); - - cerr << (pcb0(r1)*pcb0(r2)*pcb0(r3)) << endl; - cerr << (u0(r4)) << endl; - - return 0; -} - diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc deleted file mode 100644 index cb6e4ed7..00000000 --- a/gi/pf/corpus.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "corpus.h" - -#include -#include -#include - -#include "tdict.h" -#include "filelib.h" - -using namespace std; - -namespace corpus { - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - ReadFile rf(filename); - istream* in = rf.stream(); - assert(*in); - string line; - unsigned lc = 0; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(getline(*in, line)) { - ++lc; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { - isf = false; - } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - if (cur == kDIV) { - cerr << "ERROR in " << lc << ": " << line << endl << endl; - abort(); - } - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } -} - -} - diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h deleted file mode 100644 index e7febdb7..00000000 --- a/gi/pf/corpus.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _CORPUS_H_ -#define _CORPUS_H_ - -#include -#include -#include -#include "wordid.h" - -namespace corpus { - -void ReadParallelCorpus(const std::string& filename, - std::vector >* f, - std::vector >* e, - std::set* vocab_f, - std::set* vocab_e); - -} - -#endif diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc deleted file mode 100644 index 75ccad72..00000000 --- a/gi/pf/dpnaive.cc +++ /dev/null @@ -1,301 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(4),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(4),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -template -struct ModelAndData { - explicit ModelAndData(MonotonicParallelSegementationModel& m, const Base& b, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : - model(m), - rng(&*prng), - p0(b), - baseprob(prob_t::One()), - corpuse(ce), - corpusf(cf), - vocabe(ve), - vocabf(vf), - mh_samples(), - mh_rejects(), - kX(-TD::Convert("X")), - derivations(corpuse.size()) {} - - void ResampleHyperparameters() { - } - - void InstantiateRule(const pair& from, - const pair& to, - const vector& sentf, - const vector& sente, - TRule* rule) const { - rule->f_.clear(); - rule->e_.clear(); - rule->lhs_ = kX; - for (short i = from.first; i < to.first; ++i) - rule->f_.push_back(sentf[i]); - for (short i = from.second; i < to.second; ++i) - rule->e_.push_back(sente[i]); - } - - void DecrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.DecrementRule(x); - model.DecrementContinue(); - } - model.DecrementStop(); - } - - void PrintDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - cerr << i << '/' << (d.size() - 1) << ": " << x << endl; - } - } - - void IncrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.IncrementRule(x); - model.IncrementContinue(); - } - model.IncrementStop(); - } - - prob_t Likelihood() const { - return model.Likelihood(); - } - - prob_t DerivationProposalProbability(const vector >& d, const vector& sentf, const vector& sente) const { - prob_t p = model.StopProbability(); - if (d.size() < 2) return p; - TRule x; - const prob_t p_cont = model.ContinueProbability(); - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - p *= p_cont; - p *= model.RuleProbability(x); - } - return p; - } - - void Sample(); - - MonotonicParallelSegementationModel& model; - MT19937* rng; - const Base& p0; - prob_t baseprob; // cached value of generating the table table labels from p0 - // this can't be used if we go to a hierarchical prior! - const vector >& corpuse, corpusf; - const set& vocabe, vocabf; - unsigned mh_samples, mh_rejects; - const int kX; - vector > > derivations; -}; - -template -void ModelAndData::Sample() { - unsigned MAXK = kMAX_SRC_PHRASE; - unsigned MAXL = kMAX_TRG_PHRASE; - TRule x; - x.lhs_ = -TD::Convert("X"); - for (int samples = 0; samples < 1000; ++samples) { - if (samples % 1 == 0 && samples > 0) { - //ResampleHyperparameters(); - cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; - for (int i = 0; i < 10; ++i) { - cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; - PrintDerivation(derivations[i], corpusf[i], corpuse[i]); - } - } - cerr << '.' << flush; - for (int s = 0; s < corpuse.size(); ++s) { - const vector& sentf = corpusf[s]; - const vector& sente = corpuse[s]; -// cerr << " CUSTOMERS: " << rules.num_customers() << endl; -// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - - vector >& deriv = derivations[s]; - const prob_t p_cur = Likelihood(); - DecrementDerivation(deriv, sentf, sente); - - boost::multi_array a(boost::extents[sentf.size() + 1][sente.size() + 1]); - boost::multi_array trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); - a[0][0] = prob_t::One(); - const prob_t q_stop = model.StopProbability(); - const prob_t q_cont = model.ContinueProbability(); - for (int i = 0; i < sentf.size(); ++i) { - for (int j = 0; j < sente.size(); ++j) { - const prob_t src_a = a[i][j]; - x.f_.clear(); - for (int k = 1; k <= MAXK; ++k) { - if (i + k > sentf.size()) break; - x.f_.push_back(sentf[i + k - 1]); - x.e_.clear(); - for (int l = 1; l <= MAXL; ++l) { - if (j + l > sente.size()) break; - x.e_.push_back(sente[j + l - 1]); - const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size()); - const prob_t& cp = stop_now ? q_stop : q_cont; - trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp; - a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; - } - } - } - } -// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; - const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - - vector > newderiv; - int cur_i = sentf.size(); - int cur_j = sente.size(); - while(cur_i > 0 && cur_j > 0) { - newderiv.push_back(pair(cur_i, cur_j)); -// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; - SampleSet ss; - vector > nexts; - for (int k = 1; k <= MAXK; ++k) { - const int hyp_i = cur_i - k; - if (hyp_i < 0) break; - for (int l = 1; l <= MAXL; ++l) { - const int hyp_j = cur_j - l; - if (hyp_j < 0) break; - const prob_t& inside = a[hyp_i][hyp_j]; - if (inside == prob_t::Zero()) continue; - const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; - if (transp == prob_t::Zero()) continue; - const prob_t p = inside * transp; - ss.add(p); - nexts.push_back(pair(hyp_i, hyp_j)); -// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl; - } - } -// cerr << " sample set has " << nexts.size() << " elements.\n"; - const int selected = rng->SelectSample(ss); - cur_i = nexts[selected].first; - cur_j = nexts[selected].second; - } - newderiv.push_back(pair(0,0)); - const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); - IncrementDerivation(newderiv, sentf, sente); -// cerr << "SANITY: " << q_new << " " <(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (!conf.count("inverse_model1")) { - cerr << argv[0] << "Please use --inverse_model1 to specify inverse model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); -// PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MonotonicParallelSegementationModel m(alp0); - - ModelAndData posterior(m, alp0, corpuse, corpusf, vocabe, vocabf); - posterior.Sample(); - - return 0; -} - diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl deleted file mode 100755 index d00c2168..00000000 --- a/gi/pf/guess-translits.pl +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; - -my $MIN_PMI = -3; - -my %fs; -my %es; -my %ef; - -die "Usage: $0 < input.utf8.txt\n" if scalar @ARGV > 0; - -binmode(STDIN,":utf8"); -binmode(STDOUT,":utf8"); -binmode(STDERR,":utf8"); - -my $tot = 0; -print STDERR "Reading alignments from STDIN ...\n"; -while() { - chomp; - my ($fsent, $esent, $alsent) = split / \|\|\| /; - die "Format should be 'foreign sentence ||| english sentence ||| 0-0 1-1 ...'\n" unless defined $fsent && defined $esent && defined $alsent; - - my @fws = split /\s+/, $fsent; - my @ews = split /\s+/, $esent; - my @as = split /\s+/, $alsent; - my %a2b; - my %b2a; - for my $ap (@as) { - my ($a,$b) = split /-/, $ap; - die "BAD INPUT: $_\n" unless defined $a && defined $b; - $a2b{$a}->{$b} = 1; - $b2a{$b}->{$a} = 1; - } - for my $a (keys %a2b) { - my $bref = $a2b{$a}; - next unless scalar keys %$bref < 2; - my $b = (keys %$bref)[0]; - next unless scalar keys %{$b2a{$b}} < 2; - my $f = $fws[$a]; - next unless defined $f; - next unless length($f) > 3; - my $e = $ews[$b]; - next unless defined $e; - next unless length($e) > 3; - - $ef{$f}->{$e}++; - $es{$e}++; - $fs{$f}++; - $tot++; - } -} -my $ltot = log($tot); -my $num = 0; -print STDERR "Extracting pairs for PMI > $MIN_PMI ...\n"; -for my $f (keys %fs) { - my $logf = log($fs{$f}); - my $esref = $ef{$f}; - for my $e (keys %$esref) { - my $loge = log($es{$e}); - my $ef = $esref->{$e}; - my $logef = log($ef); - my $pmi = $logef - ($loge + $logf); - next if $pmi < $MIN_PMI; - my @flets = split //, $f; - my @elets = split //, $e; - print "@flets ||| @elets\n"; - $num++; - } -} -print STDERR "Extracted $num pairs.\n"; -print STDERR "Recommend running:\n ../../training/model1 -v -d -t -99999 output.txt\n"; diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc deleted file mode 100644 index f362d3f8..00000000 --- a/gi/pf/hpyp_tm.cc +++ /dev/null @@ -1,133 +0,0 @@ -#include "hpyp_tm.h" - -#include -#include -#include - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { - FreqBinner(const std::string& fname) { fd_.Load(fname); } - unsigned NumberOfBins() const { return fd_.Max() + 1; } - unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } - FreqDict fd_; -}; - -template -struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : - base(*b), - binner(bnr), - btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - - void Summary() const { - cerr << "Number of conditioning contexts: " << r.size() << endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; - for (CCRP >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - cerr << " " << i2->second << endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - btr.ResampleHyperparameters(rng); - } - - prob_t Prob(const WordID src, const vector& trglets) const { - RuleModelHash::const_iterator it = r.find(src); - if (it == r.end()) { - return base(trglets); - } else { - return it->second.prob(trglets, base(trglets)); - } - } - - void Increment(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - if (it == r.end()) { - it = r.insert(make_pair(src, CCRP >(0.5,1.0))).first; - static const WordID kNULL = TD::Convert("NULL"); - unsigned bin = (src == kNULL ? 0 : 1); - if (binner && bin) { bin = binner->Bin(src) + 1; } - btr.Add(bin, &it->second); - } - if (it->second.increment(trglets, base(trglets), rng)) - base.Increment(trglets, rng); - } - - void Decrement(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - assert(it != r.end()); - if (it->second.decrement(trglets, rng)) { - base.Decrement(trglets, rng); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - return p; - } - - unsigned UniqueConditioningContexts() const { - return r.size(); - } - - // TODO tie PYP hyperparameters based on source word frequency bins - Base& base; - const Binner* binner; - BinTiedResampler > > btr; - typedef unordered_map > > RuleModelHash; - RuleModelHash r; -}; - -HPYPLexicalTranslation::HPYPLexicalTranslation(const vector >& lets, - const unsigned vocab_size, - const unsigned num_letters) : - letters(lets), - base(vocab_size, num_letters, 5), - up0(new PYPWordModel(&base)), - tmodel(new ConditionalPYPWordModel >(up0, new FreqBinner("10k.freq"))), - kX(-TD::Convert("X")) {} - -void HPYPLexicalTranslation::Summary() const { - tmodel->Summary(); - up0->Summary(); -} - -prob_t HPYPLexicalTranslation::Likelihood() const { - prob_t p = up0->Likelihood(); - p *= tmodel->Likelihood(); - return p; -} - -void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { - tmodel->ResampleHyperparameters(rng); - up0->ResampleHyperparameters(rng); -} - -unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const { - return tmodel->UniqueConditioningContexts(); -} - -prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const { - return tmodel->Prob(src, letters[trg]); -} - -void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { - tmodel->Increment(src, letters[trg], rng); -} - -void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { - tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/hpyp_tm.h b/gi/pf/hpyp_tm.h deleted file mode 100644 index af3215ba..00000000 --- a/gi/pf/hpyp_tm.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef HPYP_LEX_TRANS -#define HPYP_LEX_TRANS - -#include -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template struct PYPWordModel; -template struct ConditionalPYPWordModel; - -struct HPYPLexicalTranslation { - explicit HPYPLexicalTranslation(const std::vector >& lets, - const unsigned vocab_size, - const unsigned num_letters); - - prob_t Likelihood() const; - - void ResampleHyperparameters(MT19937* rng); - prob_t Prob(WordID src, WordID trg) const; // return p(trg | src) - void Summary() const; - void Increment(WordID src, WordID trg, MT19937* rng); - void Decrement(WordID src, WordID trg, MT19937* rng); - unsigned UniqueConditioningContexts() const; - - private: - const std::vector >& letters; // spelling dictionary - PoissonUniformWordModel base; // "generator" of English types - PYPWordModel* up0; // model English lexicon - ConditionalPYPWordModel, FreqBinner>* tmodel; // translation distributions - // (model English word | French word) - const WordID kX; -}; - -#endif diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc deleted file mode 100644 index 29ec3860..00000000 --- a/gi/pf/itg.cc +++ /dev/null @@ -1,275 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -ostream& operator<<(ostream& os, const vector& p) { - os << '['; - for (int i = 0; i < p.size(); ++i) - os << (i==0 ? "" : " ") << TD::Convert(p[i]); - return os << ']'; -} - -struct UnigramModel { - explicit UnigramModel(const string& fname, unsigned vocab_size, double p0null = 0.05) : - use_uniform_(fname.size() == 0), - p0null_(p0null), - uniform_((1.0 - p0null) / vocab_size), - probs_(TD::NumWords() + 1) { - if (fname.size() > 0) LoadUnigrams(fname); - probs_[0] = p0null_; - } - -// -// \data\ -// ngram 1=9295 -// -// \1-grams: -// -3.191193 " - - void LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - const WordID w = TD::Convert(line.substr(pos + 1)); - line[pos] = 0; - float p = atof(&line[0]); - const prob_t pnon_null(1.0 - p0null_.as_float()); - if (w < probs_.size()) probs_[w].logeq(p * log(10) + log(pnon_null)); else abort(); - } - } - - const prob_t& operator()(const WordID& w) const { - if (!w) return p0null_; - if (use_uniform_) return uniform_; - return probs_[w]; - } - - const bool use_uniform_; - const prob_t p0null_; - const prob_t uniform_; - vector probs_; -}; - -struct Model1 { - explicit Model1(const string& fname) : - kNULL(TD::Convert("")), - kZERO() { - LoadModel1(fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // returns prob 0 if src or trg is not found! - const prob_t& operator()(WordID src, WordID trg) const { - if (src == 0) src = kNULL; - if (src < ttable.size()) { - const map& cpd = ttable[src]; - const map::const_iterator it = cpd.find(trg); - if (it != cpd.end()) - return it->second; - } - return kZERO; - } - - const WordID kNULL; - const prob_t kZERO; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(25),"Number of particles") - ("input,i",po::value(),"Read parallel data from") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("src_unigram,u",po::value()->default_value(""),"Source unigram distribution; empty for uniform") - ("trg_unigram,U",po::value()->default_value(""),"Target unigram distribution; empty for uniform") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - TD::Convert(""); - TD::Convert(""); - TD::Convert(""); - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - UnigramModel src_unigram(conf["src_unigram"].as(), vocabf.size()); - UnigramModel trg_unigram(conf["trg_unigram"].as(), vocabe.size()); - const prob_t kHALF(0.5); - - const string kEMPTY = "NULL"; - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - for (int si = 0; si < conf["samples"].as(); ++si) { - cerr << '.' << flush; - for (int ci = 0; ci < corpusf.size(); ++ci) { - const vector& trg = corpuse[ci]; - const vector& src = corpusf[ci]; - for (int i = 0; i <= trg.size(); ++i) { - const WordID e_i = i > 0 ? trg[i-1] : 0; - for (int j = 0; j <= src.size(); ++j) { - const WordID f_j = j > 0 ? src[j-1] : 0; - if (e_i == 0 && f_j == 0) continue; - prob_t je = kHALF * src_unigram(f_j) * m1(f_j,e_i) + kHALF * trg_unigram(e_i) * invm1(e_i,f_j); - cerr << "p( " << (e_i ? TD::Convert(e_i) : kEMPTY) << " , " << (f_j ? TD::Convert(f_j) : kEMPTY) << " ) = " << log(je) << endl; - if (e_i && f_j) - cout << "[X] ||| " << TD::Convert(f_j) << " ||| " << TD::Convert(e_i) << " ||| LogProb=" << log(je) << endl; - } - } - } - } -} - diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc deleted file mode 100644 index 1d5126e4..00000000 --- a/gi/pf/learn_cfg.cc +++ /dev/null @@ -1,428 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "inside_outside.h" -#include "hg.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; -vector nt_vocab; -vector nt_id_to_index; -static unsigned kMAX_RULE_SIZE = 0; -static unsigned kMAX_ARITY = 0; -static bool kALLOW_MIXED = true; // allow rules with mixed terminals and NTs -static bool kHIERARCHICAL_PRIOR = false; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_rule_size,m", po::value()->default_value(0), "Maximum rule size (0 for unlimited)") - ("max_arity,a", po::value()->default_value(0), "Maximum number of nonterminals in a rule (0 for unlimited)") - ("no_mixed_rules,M", "Do not mix terminals and nonterminals in a rule RHS") - ("nonterminals,n", po::value()->default_value(1), "Size of nonterminal vocabulary") - ("hierarchical_prior,h", "Use hierarchical prior") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -unsigned ReadCorpus(const string& filename, - vector >* e, - set* vocab_e) { - e->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - unsigned toks = 0; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - vector& le = e->back(); - TD::ConvertSentence(line, &le); - for (unsigned i = 0; i < le.size(); ++i) - vocab_e->insert(le[i]); - toks += le.size(); - } - if (in != &cin) delete in; - return toks; -} - -struct Grid { - // a b c d e - // 0 - 0 - - - vector grid; -}; - -struct BaseRuleModel { - explicit BaseRuleModel(unsigned term_size, - unsigned nonterm_size = 1) : - unif_term(1.0 / term_size), - unif_nonterm(1.0 / nonterm_size) {} - prob_t operator()(const TRule& r) const { - prob_t p; p.logeq(Md::log_poisson(1.0, r.f_.size())); - const prob_t term_prob((2.0 + 0.01*r.f_.size()) / (r.f_.size() + 2)); - const prob_t nonterm_prob(1.0 - term_prob.as_float()); - for (unsigned i = 0; i < r.f_.size(); ++i) { - if (r.f_[i] <= 0) { // nonterminal - if (kALLOW_MIXED) p *= nonterm_prob; - p *= unif_nonterm; - } else { // terminal - if (kALLOW_MIXED) p *= term_prob; - p *= unif_term; - } - } - return p; - } - const prob_t unif_term, unif_nonterm; -}; - -struct HieroLMModel { - explicit HieroLMModel(unsigned vocab_size, unsigned num_nts = 1) : - base(vocab_size, num_nts), - q0(1,1,1,1), - nts(num_nts, CCRP(1,1,1,1)) {} - - prob_t Prob(const TRule& r) const { - return nts[nt_id_to_index[-r.lhs_]].prob(r, p0(r)); - } - - inline prob_t p0(const TRule& r) const { - if (kHIERARCHICAL_PRIOR) - return q0.prob(r, base(r)); - else - return base(r); - } - - int Increment(const TRule& r, MT19937* rng) { - const int delta = nts[nt_id_to_index[-r.lhs_]].increment(r, p0(r), rng); - if (kHIERARCHICAL_PRIOR && delta) - q0.increment(r, base(r), rng); - return delta; - // return x.increment(r); - } - - int Decrement(const TRule& r, MT19937* rng) { - const int delta = nts[nt_id_to_index[-r.lhs_]].decrement(r, rng); - if (kHIERARCHICAL_PRIOR && delta) - q0.decrement(r, rng); - return delta; - //return x.decrement(r); - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (unsigned i = 0; i < nts.size(); ++i) { - prob_t q; q.logeq(nts[i].log_crp_prob()); - p *= q; - for (CCRP::const_iterator it = nts[i].begin(); it != nts[i].end(); ++it) { - prob_t tp = p0(it->first); - tp.poweq(it->second.num_tables()); - p *= tp; - } - } - if (kHIERARCHICAL_PRIOR) { - prob_t q; q.logeq(q0.log_crp_prob()); - p *= q; - for (CCRP::const_iterator it = q0.begin(); it != q0.end(); ++it) { - prob_t tp = base(it->first); - tp.poweq(it->second.num_tables()); - p *= tp; - } - } - //for (CCRP_OneTable::const_iterator it = x.begin(); it != x.end(); ++it) - // p *= base(it->first); - return p; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < nts.size(); ++i) - nts[i].resample_hyperparameters(rng); - if (kHIERARCHICAL_PRIOR) { - q0.resample_hyperparameters(rng); - cerr << "[base d=" << q0.discount() << ", s=" << q0.strength() << "]"; - } - cerr << " d=" << nts[0].discount() << ", s=" << nts[0].strength() << endl; - } - - const BaseRuleModel base; - CCRP q0; - vector > nts; - //CCRP_OneTable x; -}; - -vector tofreelist; - -HieroLMModel* plm; - -struct NPGrammarIter : public GrammarIter, public RuleBin { - NPGrammarIter() : arity() { tofreelist.push_back(this); } - NPGrammarIter(const TRulePtr& inr, const int a, int symbol) : arity(a) { - if (inr) { - r.reset(new TRule(*inr)); - } else { - r.reset(new TRule); - } - TRule& rr = *r; - rr.lhs_ = nt_vocab[0]; - rr.f_.push_back(symbol); - rr.e_.push_back(symbol < 0 ? (1-int(arity)) : symbol); - tofreelist.push_back(this); - } - inline static unsigned NextArity(int cur_a, int symbol) { - return cur_a + (symbol <= 0 ? 1 : 0); - } - virtual int GetNumRules() const { - if (r) return nt_vocab.size(); else return 0; - } - virtual TRulePtr GetIthRule(int i) const { - if (i == 0) return r; - TRulePtr nr(new TRule(*r)); - nr->lhs_ = nt_vocab[i]; - return nr; - } - virtual int Arity() const { - return arity; - } - virtual const RuleBin* GetRules() const { - if (!r) return NULL; else return this; - } - virtual const GrammarIter* Extend(int symbol) const { - const int next_arity = NextArity(arity, symbol); - if (kMAX_ARITY && next_arity > kMAX_ARITY) - return NULL; - if (!kALLOW_MIXED && r) { - bool t1 = r->f_.front() <= 0; - bool t2 = symbol <= 0; - if (t1 != t2) return NULL; - } - if (!kMAX_RULE_SIZE || !r || (r->f_.size() < kMAX_RULE_SIZE)) - return new NPGrammarIter(r, next_arity, symbol); - else - return NULL; - } - const unsigned char arity; - TRulePtr r; -}; - -struct NPGrammar : public Grammar { - virtual const GrammarIter* GetRoot() const { - return new NPGrammarIter; - } -}; - -prob_t TotalProb(const Hypergraph& hg) { - return Inside(hg); -} - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector* sampled_deriv) { - vector node_probs; - Inside(hg, &node_probs); - queue q; - q.push(hg.nodes_.size() - 2); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - //prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = edge.edge_prob_; - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - //z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } - for (unsigned i = 0; i < sampled_deriv->size(); ++i) { - cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; - } -} - -void IncrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - nt_vocab.resize(conf["nonterminals"].as()); - assert(nt_vocab.size() > 0); - assert(nt_vocab.size() < 26); - { - string nt = "X"; - for (unsigned i = 0; i < nt_vocab.size(); ++i) { - if (nt_vocab.size() > 1) nt[0] = ('A' + i); - int pid = TD::Convert(nt); - nt_vocab[i] = -pid; - if (pid >= nt_id_to_index.size()) { - nt_id_to_index.resize(pid + 1, -1); - } - nt_id_to_index[pid] = i; - } - } - vector grammars; - grammars.push_back(GrammarPtr(new NPGrammar)); - - const unsigned samples = conf["samples"].as(); - kMAX_RULE_SIZE = conf["max_rule_size"].as(); - if (kMAX_RULE_SIZE == 1) { - cerr << "Invalid maximum rule size: must be 0 or >1\n"; - return 1; - } - kMAX_ARITY = conf["max_arity"].as(); - if (kMAX_ARITY == 1) { - cerr << "Invalid maximum arity: must be 0 or >1\n"; - return 1; - } - kALLOW_MIXED = !conf.count("no_mixed_rules"); - - kHIERARCHICAL_PRIOR = conf.count("hierarchical_prior"); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector > corpuse; - set vocabe; - cerr << "Reading corpus...\n"; - const unsigned toks = ReadCorpus(conf["input"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - HieroLMModel lm(vocabe.size(), nt_vocab.size()); - - plm = &lm; - ExhaustiveBottomUpParser parser(TD::Convert(-nt_vocab[0]), grammars); - - Hypergraph hg; - const int kGoal = -TD::Convert("Goal"); - const int kLP = FD::Convert("LogProb"); - SparseVector v; v.set_value(kLP, 1.0); - vector > derivs(corpuse.size()); - vector cl(corpuse.size()); - for (int ci = 0; ci < corpuse.size(); ++ci) { - vector& src = corpuse[ci]; - Lattice& lat = cl[ci]; - lat.resize(src.size()); - for (unsigned i = 0; i < src.size(); ++i) - lat[i].push_back(LatticeArc(src[i], 0.0, 1)); - } - for (int SS=0; SS < samples; ++SS) { - const bool is_last = ((samples - 1) == SS); - prob_t dlh = prob_t::One(); - for (int ci = 0; ci < corpuse.size(); ++ci) { - const vector& src = corpuse[ci]; - const Lattice& lat = cl[ci]; - cerr << TD::GetString(src) << endl; - hg.clear(); - parser.Parse(lat, &hg); // exhaustive parse - vector& d = derivs[ci]; - if (!is_last) DecrementDerivation(hg, d, &lm, &rng); - for (unsigned i = 0; i < hg.edges_.size(); ++i) { - TRule& r = *hg.edges_[i].rule_; - if (r.lhs_ == kGoal) - hg.edges_[i].edge_prob_ = prob_t::One(); - else - hg.edges_[i].edge_prob_ = lm.Prob(r); - } - if (!is_last) { - d.clear(); - SampleDerivation(hg, &rng, &d); - IncrementDerivation(hg, derivs[ci], &lm, &rng); - } else { - prob_t p = TotalProb(hg); - dlh *= p; - cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; - } - if (tofreelist.size() > 200000) { - cerr << "Freeing ... "; - for (unsigned i = 0; i < tofreelist.size(); ++i) - delete tofreelist[i]; - tofreelist.clear(); - cerr << "Freed.\n"; - } - } - double llh = log(lm.Likelihood()); - cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; - if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); - if (is_last) { - double z = log(dlh); - cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; - } - } - for (unsigned i = 0; i < nt_vocab.size(); ++i) - cerr << lm.nts[i] << endl; - return 0; -} - diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl deleted file mode 100755 index fdcd3555..00000000 --- a/gi/pf/make-freq-bins.pl +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $BASE = 6; -my $CUTOFF = 3; - -my %d; -my $num = 0; -while(<>){ - chomp; - my @words = split /\s+/; - for my $w (@words) {$d{$w}++; $num++;} -} - -my @vocab = sort {$d{$b} <=> $d{$a}} keys %d; - -for (my $i=0; $i -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - - Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP(0.8, 0.5)) {} - - double p0(int x) const { - assert(x > 0); - assert(x < 5); - return 1.0/4.0; - } - - double llh() const { - double lh = bp + base.log_crp_prob(); - for (int ctx = 1; ctx < 5; ++ctx) - lh += ccrps[ctx].log_crp_prob(); - return lh; - } - - double prob(int ctx, int x) const { - assert(ctx > 0 && ctx < 5); - return ccrps[ctx].prob(x, base.prob(x, p0(x))); - } - - void increment(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { - if (base.increment(x, p0(x), &rng)) { - bp += log(1.0 / 4.0); - } - } - } - - // this is just a biased estimate - double est_base_prob(int x) { - return (x + 1) * x / 40.0; - } - - void increment_is(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - SampleSet ss; - const int PARTICLES = 25; - vector > s1s(PARTICLES, CCRP(0.5,0.5)); - vector > sbs(PARTICLES, CCRP(0.5,0.5)); - vector sp0s(PARTICLES); - - CCRP s1 = ccrps[ctx]; - CCRP sb = base; - double sp0 = bp; - for (int pp = 0; pp < PARTICLES; ++pp) { - if (pp > 0) { - ccrps[ctx] = s1; - base = sb; - bp = sp0; - } - - double q = 1; - double gamma = 1; - double est_p = est_base_prob(x); - //base.prob(x, p0(x)) + rng.next() * 0.1; - if (ccrps[ctx].increment(x, est_p, &rng, &q)) { - gamma = q * base.prob(x, p0(x)); - q *= est_p; - if (verbose) cerr << "(DP-base draw) "; - double qq = -1; - if (base.increment(x, p0(x), &rng, &qq)) { - if (verbose) cerr << "(G0 draw) "; - bp += log(p0(x)); - qq *= p0(x); - } - } else { gamma = q; } - double w = gamma / q; - if (verbose) - cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; - ss.add(w); - s1s[pp] = ccrps[ctx]; - sbs[pp] = base; - sp0s[pp] = bp; - } - int ps = rng.SelectSample(ss); - ccrps[ctx] = s1s[ps]; - base = sbs[ps]; - bp = sp0s[ps]; - if (verbose) { - cerr << "SELECTED: " << ps << endl; - static int cc = 0; cc++; if (cc ==10) exit(1); - } - } - - void decrement(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].decrement(x, &rng)) { - if (base.decrement(x, &rng)) { - bp -= log(p0(x)); - } - } - } - - double bp; - CCRP base; - vector > ccrps; - -}; - -int main(int argc, char** argv) { - if (argc > 1) { verbose = true; } - vector counts(15, 0); - vector tcounts(15, 0); - int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; - double tlh = 0; - double tt = 0; - for (int n = 0; n < 1000; ++n) { - if (n % 10 == 0) cerr << '.'; - if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; - Model m; - for (int *x = points; *x; x += 2) - m.increment(x[0], x[1]); - - for (int j = 0; j < 24; ++j) { - for (int *x = points; *x; x += 2) { - if (rng.next() < 0.8) { - m.decrement(x[0], x[1]); - m.increment_is(x[0], x[1]); - } - } - } - counts[m.base.num_customers()]++; - tcounts[m.base.num_tables()]++; - tlh += m.llh(); - tt += 1.0; - } - cerr << "mean LLH = " << (tlh / tt) << endl; - for (int i = 0; i < 15; ++i) - cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h deleted file mode 100644 index 10d171fe..00000000 --- a/gi/pf/monotonic_pseg.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _MONOTONIC_PSEG_H_ -#define _MONOTONIC_PSEG_H_ - -#include - -#include "prob.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "base_distributions.h" - -template -struct MonotonicParallelSegementationModel { - explicit MonotonicParallelSegementationModel(BaseMeasure& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRulesAndStops(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - if (rules.size()) IncrementContinue(rules.size() - 1); - IncrementStop(); - } - - void DecrementRulesAndStops(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - if (rules.size()) { - DecrementContinue(rules.size() - 1); - DecrementStop(); - } - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - q.logeq(stop.log_crp_prob()); - p *= q; - return p; - } - - void IncrementStop() { - stop.increment(true); - } - - void IncrementContinue(int n = 1) { - for (int i = 0; i < n; ++i) - stop.increment(false); - } - - void DecrementStop() { - stop.decrement(true); - } - - void DecrementContinue(int n = 1) { - for (int i = 0; i < n; ++i) - stop.decrement(false); - } - - prob_t StopProbability() const { - return prob_t(stop.prob(true, 0.5)); - } - - prob_t ContinueProbability() const { - return prob_t(stop.prob(false, 0.5)); - } - - const BaseMeasure& rp0; - prob_t base; - CCRP_NoTable rules; - CCRP_NoTable stop; -}; - -#endif - diff --git a/gi/pf/ngram_base.cc b/gi/pf/ngram_base.cc deleted file mode 100644 index 1299f06f..00000000 --- a/gi/pf/ngram_base.cc +++ /dev/null @@ -1,69 +0,0 @@ -#include "ngram_base.h" - -#include "lm/model.hh" -#include "tdict.h" - -using namespace std; - -namespace { -struct GICSVMapper : public lm::EnumerateVocab { - GICSVMapper(vector* out) : out_(out), kLM_UNKNOWN_TOKEN(0) { out_->clear(); } - void Add(lm::WordIndex index, const StringPiece &str) { - const WordID cdec_id = TD::Convert(str.as_string()); - if (cdec_id >= out_->size()) - out_->resize(cdec_id + 1, kLM_UNKNOWN_TOKEN); - (*out_)[cdec_id] = index; - } - vector* out_; - const lm::WordIndex kLM_UNKNOWN_TOKEN; -}; -} - -struct FixedNgramBaseImpl { - FixedNgramBaseImpl(const string& param) { - GICSVMapper vm(&cdec2klm_map_); - lm::ngram::Config conf; - conf.enumerate_vocab = &vm; - cerr << "Reading character LM from " << param << endl; - model = new lm::ngram::ProbingModel(param.c_str(), conf); - order = model->Order(); - kEOS = MapWord(TD::Convert("")); - assert(kEOS > 0); - } - - lm::WordIndex MapWord(const WordID w) const { - if (w < cdec2klm_map_.size()) return cdec2klm_map_[w]; - return 0; - } - - ~FixedNgramBaseImpl() { delete model; } - - prob_t StringProbability(const vector& s) const { - lm::ngram::State state = model->BeginSentenceState(); - double prob = 0; - for (unsigned i = 0; i < s.size(); ++i) { - const lm::ngram::State scopy(state); - prob += model->Score(scopy, MapWord(s[i]), state); - } - const lm::ngram::State scopy(state); - prob += model->Score(scopy, kEOS, state); - prob_t p; p.logeq(prob * log(10)); - return p; - } - - lm::ngram::ProbingModel* model; - unsigned order; - vector cdec2klm_map_; - lm::WordIndex kEOS; -}; - -FixedNgramBase::~FixedNgramBase() { delete impl; } - -FixedNgramBase::FixedNgramBase(const string& lmfname) { - impl = new FixedNgramBaseImpl(lmfname); -} - -prob_t FixedNgramBase::StringProbability(const vector& s) const { - return impl->StringProbability(s); -} - diff --git a/gi/pf/ngram_base.h b/gi/pf/ngram_base.h deleted file mode 100644 index 4ea999f3..00000000 --- a/gi/pf/ngram_base.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NGRAM_BASE_H_ -#define _NGRAM_BASE_H_ - -#include -#include -#include "trule.h" -#include "wordid.h" -#include "prob.h" - -struct FixedNgramBaseImpl; -struct FixedNgramBase { - FixedNgramBase(const std::string& lmfname); - ~FixedNgramBase(); - prob_t StringProbability(const std::vector& s) const; - - prob_t operator()(const TRule& rule) const { - return StringProbability(rule.e_); - } - - private: - FixedNgramBaseImpl* impl; - -}; - -#endif diff --git a/gi/pf/nuisance_test.cc b/gi/pf/nuisance_test.cc deleted file mode 100644 index fc0af9cb..00000000 --- a/gi/pf/nuisance_test.cc +++ /dev/null @@ -1,161 +0,0 @@ -#include "ccrp.h" - -#include -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -ostream& operator<<(ostream&os, const vector& v) { - os << '[' << v[0]; - if (v.size() == 2) os << ' ' << v[1]; - return os << ']'; -} - -struct Base { - Base() : llh(), v(2), v1(1), v2(1), crp(0.25, 0.5) {} - inline double p0(const vector& x) const { - double p = 0.75; - if (x.size() == 2) p = 0.25; - p *= 1.0 / 3.0; - if (x.size() == 2) p *= 1.0 / 3.0; - return p; - } - double est_deriv_prob(int a, int b, int seg) const { - assert(a > 0 && a < 4); // a \in {1,2,3} - assert(b > 0 && b < 4); // b \in {1,2,3} - assert(seg == 0 || seg == 1); // seg \in {0,1} - if (seg == 0) { - v[0] = a; - v[1] = b; - return crp.prob(v, p0(v)); - } else { - v1[0] = a; - v2[0] = b; - return crp.prob(v1, p0(v1)) * crp.prob(v2, p0(v2)); - } - } - double est_marginal_prob(int a, int b) const { - return est_deriv_prob(a,b,0) + est_deriv_prob(a,b,1); - } - int increment(int a, int b, double* pw = NULL) { - double p1 = est_deriv_prob(a, b, 0); - double p2 = est_deriv_prob(a, b, 1); - //p1 = 0.5; p2 = 0.5; - int seg = rng.SelectSample(p1,p2); - double tmp = 0; - if (!pw) pw = &tmp; - double& w = *pw; - if (seg == 0) { - v[0] = a; - v[1] = b; - w = crp.prob(v, p0(v)) / p1; - if (crp.increment(v, p0(v), &rng)) { - llh += log(p0(v)); - } - } else { - v1[0] = a; - w = crp.prob(v1, p0(v1)) / p2; - if (crp.increment(v1, p0(v1), &rng)) { - llh += log(p0(v1)); - } - v2[0] = b; - w *= crp.prob(v2, p0(v2)); - if (crp.increment(v2, p0(v2), &rng)) { - llh += log(p0(v2)); - } - } - return seg; - } - void increment(int a, int b, int seg) { - if (seg == 0) { - v[0] = a; - v[1] = b; - if (crp.increment(v, p0(v), &rng)) { - llh += log(p0(v)); - } - } else { - v1[0] = a; - if (crp.increment(v1, p0(v1), &rng)) { - llh += log(p0(v1)); - } - v2[0] = b; - if (crp.increment(v2, p0(v2), &rng)) { - llh += log(p0(v2)); - } - } - } - void decrement(int a, int b, int seg) { - if (seg == 0) { - v[0] = a; - v[1] = b; - if (crp.decrement(v, &rng)) { - llh -= log(p0(v)); - } - } else { - v1[0] = a; - if (crp.decrement(v1, &rng)) { - llh -= log(p0(v1)); - } - v2[0] = b; - if (crp.decrement(v2, &rng)) { - llh -= log(p0(v2)); - } - } - } - double log_likelihood() const { - return llh + crp.log_crp_prob(); - } - double llh; - mutable vector v, v1, v2; - CCRP > crp; -}; - -int main(int argc, char** argv) { - double tl = 0; - const int ITERS = 1000; - const int PARTICLES = 20; - const int DATAPOINTS = 50; - WordID x = TD::Convert("souvenons"); - WordID y = TD::Convert("remember"); - vector src; TD::ConvertSentence("s o u v e n o n s", &src); - vector trg; TD::ConvertSentence("r e m e m b e r", &trg); -// Transliterations xx; -// xx.Initialize(x, src, y, trg); -// return 1; - - for (int j = 0; j < ITERS; ++j) { - Base b; - vector segs(DATAPOINTS); - SampleSet ss; - vector sss; - for (int i = 0; i < DATAPOINTS; i++) { - ss.clear(); - sss.clear(); - int x = ((i / 10) % 3) + 1; - int y = (i % 3) + 1; - //double ep = b.est_marginal_prob(x,y); - //cerr << "est p(" << x << "," << y << ") = " << ep << endl; - for (int n = 0; n < PARTICLES; ++n) { - double w; - int seg = b.increment(x,y,&w); - //cerr << seg << " w=" << w << endl; - ss.add(w); - sss.push_back(seg); - b.decrement(x,y,seg); - } - int seg = sss[rng.SelectSample(ss)]; - b.increment(x, y, seg); - //cerr << "Selected: " << seg << endl; - //return 1; - segs[i] = seg; - } - tl += b.log_likelihood(); - } - cerr << "LLH=" << tl / ITERS << endl; -} - diff --git a/gi/pf/os_phrase.h b/gi/pf/os_phrase.h deleted file mode 100644 index dfe40cb1..00000000 --- a/gi/pf/os_phrase.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _OS_PHRASE_H_ -#define _OS_PHRASE_H_ - -#include -#include -#include "tdict.h" - -inline std::ostream& operator<<(std::ostream& os, const std::vector& p) { - os << '['; - for (int i = 0; i < p.size(); ++i) - os << (i==0 ? "" : " ") << TD::Convert(p[i]); - return os << ']'; -} - -#endif diff --git a/gi/pf/pf.h b/gi/pf/pf.h deleted file mode 100644 index ede7cda8..00000000 --- a/gi/pf/pf.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef _PF_H_ -#define _PF_H_ - -#include -#include -#include "sampler.h" -#include "prob.h" - -template -struct ParticleRenormalizer { - void operator()(std::vector* pv) const { - if (pv->empty()) return; - prob_t z = prob_t::Zero(); - for (unsigned i = 0; i < pv->size(); ++i) - z += (*pv)[i].weight; - assert(z > prob_t::Zero()); - for (unsigned i = 0; i < pv->size(); ++i) - (*pv)[i].weight /= z; - } -}; - -template -struct MultinomialResampleFilter { - explicit MultinomialResampleFilter(MT19937* rng) : rng_(rng) {} - - void operator()(std::vector* pv) { - if (pv->empty()) return; - std::vector& ps = *pv; - SampleSet ss; - for (int i = 0; i < ps.size(); ++i) - ss.add(ps[i].weight); - std::vector nps; nps.reserve(ps.size()); - const prob_t uniform_weight(1.0 / ps.size()); - for (int i = 0; i < ps.size(); ++i) { - nps.push_back(ps[rng_->SelectSample(ss)]); - nps[i].weight = uniform_weight; - } - nps.swap(ps); - } - - private: - MT19937* rng_; -}; - -template -struct SystematicResampleFilter { - explicit SystematicResampleFilter(MT19937* rng) : rng_(rng), renorm_() {} - - void operator()(std::vector* pv) { - if (pv->empty()) return; - renorm_(pv); - std::vector& ps = *pv; - std::vector nps; nps.reserve(ps.size()); - double lower = 0, upper = 0; - const double skip = 1.0 / ps.size(); - double u_j = rng_->next() * skip; - //std::cerr << "u_0: " << u_j << std::endl; - int j = 0; - for (unsigned i = 0; i < ps.size(); ++i) { - upper += ps[i].weight.as_float(); - //std::cerr << "lower: " << lower << " upper: " << upper << std::endl; - // how many children does ps[i] have? - while (u_j < lower) { u_j += skip; ++j; } - while (u_j >= lower && u_j <= upper) { - assert(j < ps.size()); - nps.push_back(ps[i]); - u_j += skip; - //std::cerr << " add u_j=" << u_j << std::endl; - ++j; - } - lower = upper; - } - //std::cerr << ps.size() << " " << nps.size() << "\n"; - assert(ps.size() == nps.size()); - //exit(1); - ps.swap(nps); - } - - private: - MT19937* rng_; - ParticleRenormalizer renorm_; -}; - -#endif diff --git a/gi/pf/pf_test.cc b/gi/pf/pf_test.cc deleted file mode 100644 index 296e7285..00000000 --- a/gi/pf/pf_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "ccrp.h" - -#include -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - - Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP(0.8, 0.5)) {} - - double p0(int x) const { - assert(x > 0); - assert(x < 5); - return 1.0/4.0; - } - - double llh() const { - double lh = bp + base.log_crp_prob(); - for (int ctx = 1; ctx < 5; ++ctx) - lh += ccrps[ctx].log_crp_prob(); - return lh; - } - - double prob(int ctx, int x) const { - assert(ctx > 0 && ctx < 5); - return ccrps[ctx].prob(x, base.prob(x, p0(x))); - } - - void increment(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { - if (base.increment(x, p0(x), &rng)) { - bp += log(1.0 / 4.0); - } - } - } - - // this is just a biased estimate - double est_base_prob(int x) { - return (x + 1) * x / 40.0; - } - - void increment_is(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - SampleSet ss; - const int PARTICLES = 25; - vector > s1s(PARTICLES, CCRP(0.5,0.5)); - vector > sbs(PARTICLES, CCRP(0.5,0.5)); - vector sp0s(PARTICLES); - - CCRP s1 = ccrps[ctx]; - CCRP sb = base; - double sp0 = bp; - for (int pp = 0; pp < PARTICLES; ++pp) { - if (pp > 0) { - ccrps[ctx] = s1; - base = sb; - bp = sp0; - } - - double q = 1; - double gamma = 1; - double est_p = est_base_prob(x); - //base.prob(x, p0(x)) + rng.next() * 0.1; - if (ccrps[ctx].increment(x, est_p, &rng, &q)) { - gamma = q * base.prob(x, p0(x)); - q *= est_p; - if (verbose) cerr << "(DP-base draw) "; - double qq = -1; - if (base.increment(x, p0(x), &rng, &qq)) { - if (verbose) cerr << "(G0 draw) "; - bp += log(p0(x)); - qq *= p0(x); - } - } else { gamma = q; } - double w = gamma / q; - if (verbose) - cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; - ss.add(w); - s1s[pp] = ccrps[ctx]; - sbs[pp] = base; - sp0s[pp] = bp; - } - int ps = rng.SelectSample(ss); - ccrps[ctx] = s1s[ps]; - base = sbs[ps]; - bp = sp0s[ps]; - if (verbose) { - cerr << "SELECTED: " << ps << endl; - static int cc = 0; cc++; if (cc ==10) exit(1); - } - } - - void decrement(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].decrement(x, &rng)) { - if (base.decrement(x, &rng)) { - bp -= log(p0(x)); - } - } - } - - double bp; - CCRP base; - vector > ccrps; - -}; - -int main(int argc, char** argv) { - if (argc > 1) { verbose = true; } - vector counts(15, 0); - vector tcounts(15, 0); - int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; - double tlh = 0; - double tt = 0; - for (int n = 0; n < 1000; ++n) { - if (n % 10 == 0) cerr << '.'; - if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; - Model m; - for (int *x = points; *x; x += 2) - m.increment(x[0], x[1]); - - for (int j = 0; j < 24; ++j) { - for (int *x = points; *x; x += 2) { - if (rng.next() < 0.8) { - m.decrement(x[0], x[1]); - m.increment_is(x[0], x[1]); - } - } - } - counts[m.base.num_customers()]++; - tcounts[m.base.num_tables()]++; - tlh += m.llh(); - tt += 1.0; - } - cerr << "mean LLH = " << (tlh / tt) << endl; - for (int i = 0; i < 15; ++i) - cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/pfbrat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { - explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size), - kNULL(TD::Convert("")) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - LoadModel1(model1fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // return logp0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - const int flen = rule.f_.size(); - const int elen = rule.e_.size(); - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = rule.e_[i]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? kNULL : rule.f_[j]; - const map::const_iterator it = ttable[src].find(trg); - if (it != ttable[src].end()) { - tp += kM1MIXTURE * it->second; - } - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - return p; - } - - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; - const WordID kNULL; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(3),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(3),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UniphraseLM { - UniphraseLM(const vector >& corpus, - const set& vocab, - const po::variables_map& conf) : - phrases_(1,1), - gen_(1,1), - corpus_(corpus), - uniform_word_(1.0 / vocab.size()), - gen_p0_(0.5), - p_end_(0.5), - use_poisson_(conf.count("poisson_length") > 0) {} - - void ResampleHyperparameters(MT19937* rng) { - phrases_.resample_hyperparameters(rng); - gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.alpha(); - } - - CCRP_NoTable > phrases_; - CCRP_NoTable gen_; - vector > z_; // z_[i] is there a phrase boundary after the ith word - const vector >& corpus_; - const double uniform_word_; - const double gen_p0_; - const double p_end_; // in base length distribution, p of the end of a phrase - const bool use_poisson_; -}; - -struct Reachability { - boost::multi_array edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; - }; - - struct NState { - NState() : next_src_covered(), next_trg_covered() {} - NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} - int next_src_covered; - int next_trg_covered; - }; - - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - assert(a[srclen][trglen].size() > 0); - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); -// typedef boost::multi_array, 2> narray_type; -// narray_type b(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } -// const NState nstate(i,j); - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; -// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; - assert(max_src_delta[0][0] > 0); - //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; - //for (int i = 0; i < b[0][0].size(); ++i) { - // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; - //} - } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { - explicit FSTState(int src_size) : - trg_covered_(), - src_covered_(), - src_coverage_(src_size) {} - - FSTState(short trg_covered, short src_covered, const vector& src_coverage, const vector& src_prefix) : - trg_covered_(trg_covered), - src_covered_(src_covered), - src_coverage_(src_coverage), - src_prefix_(src_prefix) { - if (src_coverage_.size() == src_covered) { - assert(src_prefix.size() == 0); - } - } - - // if we extend by the word at src_position, what are - // the next states that are reachable and lie on a valid - // path to the final state? - vector Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { - assert(src_position < src_coverage_.size()); - if (src_coverage_[src_position]) { - cerr << "Trying to extend " << *this << " with position " << src_position << endl; - abort(); - } - vector ncvg = src_coverage_; - ncvg[src_position] = true; - - vector res; - const int trg_remaining = trg_len - trg_covered_; - if (trg_remaining <= 0) { - cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; - abort(); - } - const int src_remaining = src_len - src_covered_; - if (src_remaining <= 0) { - cerr << "Source appears to have been covered: " << *this << endl; - abort(); - } - - for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { - if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { - int nc = src_prefix_.size() + 1 + src_covered_; - res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector())); - } - } - - if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { - vector nsp = src_prefix_; - nsp.push_back(src_position); - res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); - } - - if (res.size() == 0) { - cerr << *this << " can't be extended!\n"; - abort(); - } - return res; - } - - short trg_covered_, src_covered_; - vector src_coverage_; - vector src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { - if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; - if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; - if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; - return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { - os << "[" << q.trg_covered_ << " : "; - for (int i = 0; i < q.src_coverage_.size(); ++i) - os << q.src_coverage_[i]; - os << " : <"; - for (int i = 0; i < q.src_prefix_.size(); ++i) { - if (i != 0) os << ' '; - os << q.src_prefix_[i]; - } - return os << ">]"; -} - -struct MyModel { - MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} - typedef unordered_map, CCRP_NoTable, boost::hash > > SrcToRuleCRPMap; - - void DecrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - it->second.decrement(rule); - if (it->second.num_customers() == 0) rules.erase(it); - } - - void IncrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) { - CCRP_NoTable crp(1,1); - it = rules.insert(make_pair(rule.f_, crp)).first; - } - it->second.increment(rule); - } - - // conditioned on rule.f_ - prob_t RuleConditionalProbability(const TRule& rule) const { - const prob_t base = rp0(rule); - SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) { - return base; - } else { - const double lp = it->second.logprob(rule, log(base)); - prob_t q; q.logeq(lp); - return q; - } - } - - const ConditionalBase& rp0; - SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { - MyFST(const vector& ssrc, const vector& strg, MyModel* m) : - src(ssrc), trg(strg), - r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), - model(m) { - FSTState in(src.size()); - cerr << " INIT: " << in << endl; - init = GetNode(in); - for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; - in.src_covered_ = src.size(); - in.trg_covered_ = trg.size(); - cerr << "FINAL: " << in << endl; - final = GetNode(in); - } - virtual const WFSTNode* Final() const; - virtual const WFSTNode* Initial() const; - - const WFSTNode* GetNode(const FSTState& q); - map > m; - const vector& src; - const vector& trg; - Reachability r; - const WFSTNode* init; - const WFSTNode* final; - MyModel* model; -}; - -struct MyNode : public WFSTNode { - MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} - virtual vector > ExtendInput(unsigned srcindex) const; - const FSTState state; - mutable MyFST* container; -}; - -vector > MyNode::ExtendInput(unsigned srcindex) const { - cerr << "EXTEND " << state << " with " << srcindex << endl; - vector ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); - vector > res(ext.size()); - for (unsigned i = 0; i < ext.size(); ++i) { - res[i].first = container->GetNode(ext[i]); - if (ext[i].src_prefix_.size() == 0) { - const unsigned trg_from = state.trg_covered_; - const unsigned trg_to = ext[i].trg_covered_; - const unsigned prev_prfx_size = state.src_prefix_.size(); - res[i].second.reset(new TRule); - res[i].second->lhs_ = -TD::Convert("X"); - vector& src = res[i].second->f_; - vector& trg = res[i].second->e_; - src.resize(prev_prfx_size + 1); - for (unsigned j = 0; j < prev_prfx_size; ++j) - src[j] = container->src[state.src_prefix_[j]]; - src[prev_prfx_size] = container->src[srcindex]; - for (unsigned j = trg_from; j < trg_to; ++j) - trg.push_back(container->trg[j]); - res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); - } - } - return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { - boost::shared_ptr& res = m[q]; - if (!res) { - res.reset(new MyNode(q, this)); - } - return &*res; -} - -const WFSTNode* MyFST::Final() const { - return final; -} - -const WFSTNode* MyFST::Initial() const { - return init; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - ConditionalBase lp0(conf["model1_interpolation_weight"].as(), - vocabe.size(), - conf["model1"].as()); - MyModel m(lp0); - - TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); - m.IncrementRule(x); - TRule y("[X] ||| nY dyN ||| gave ||| 0"); - m.IncrementRule(y); - - - MyFST fst(corpusf[0], corpuse[0], &m); - ifstream in("./kimura.g"); - assert(in); - CFG_WFSTComposer comp(fst); - Hypergraph hg; - bool succeed = comp.Compose(&in, &hg); - hg.PrintGraphviz(); - if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 - ifstream in2("./amnabooks.g"); - assert(in2); - MyFST fst2(corpusf[1], corpuse[1], &m); - CFG_WFSTComposer comp2(fst2); - Hypergraph hg2; - bool succeed2 = comp2.Compose(&in2, &hg2); - if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - - SparseVector w; w.set_value(FD::Convert("Proposal"), 1.0); - hg.Reweight(w); - cerr << ViterbiFTree(hg) << endl; - return 0; -} - diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc deleted file mode 100644 index a3e46064..00000000 --- a/gi/pf/pfdist.cc +++ /dev/null @@ -1,598 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "pf.h" -#include "base_distributions.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(30),"Number of particles") - ("filter_frequency,f",po::value()->default_value(5),"Number of time steps between filterings") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { - MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - prob_t srcp0(const vector& src) const { - prob_t p(1.0 / 3000.0); - p.poweq(src.size()); - prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); - p *= lenp; - return p; - } - - void DecrementRule(const TRule& rule) { - const RuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - if (it->second.decrement(rule)) { - base /= (*rp0)(rule); - if (it->second.num_customers() == 0) - rules.erase(it); - } - if (src_phrases.decrement(rule.f_)) - base /= srcp0(rule.f_); - } - - void IncrementRule(const TRule& rule) { - RuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) - it = rules.insert(make_pair(rule.f_, CCRP_NoTable(1,1))).first; - if (it->second.increment(rule)) { - base *= (*rp0)(rule); - } - if (src_phrases.increment(rule.f_)) - base *= srcp0(rule.f_); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - const prob_t p0 = (*rp0)(rule); - prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); - const RuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) return srcp * p0; - const double lp = it->second.logprob(rule, log(p0)); - prob_t q; q.logeq(lp); - return q * srcp; - } - - prob_t Likelihood() const { - prob_t p = base; - for (RuleCRPMap::const_iterator it = rules.begin(); - it != rules.end(); ++it) { - prob_t cl; cl.logeq(it->second.log_crp_prob()); - p *= cl; - } - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseConditionalBase* rp0; - prob_t base; - typedef unordered_map, CCRP_NoTable, boost::hash > > RuleCRPMap; - RuleCRPMap rules; - CCRP_NoTable > src_phrases; - vector > src_jumps; -}; - -#endif - -struct MyJointModel { - MyJointModel(PhraseJointBase& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseJointBase& rp0; - prob_t base; - CCRP_NoTable rules; - vector > src_jumps; -}; - -struct BackwardEstimate { - BackwardEstimate(const Model1& m1, const vector& src, const vector& trg) : - model1_(m1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - r.push_back(0); // NULL word - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - } - return e; - } - const Model1& model1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} - prob_t weight; - prob_t gamma_last; - vector src_jumps; - vector rules; - vector src_cv; - int src_cov; - int trg_cov; - int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - const unsigned rejuv_freq = conf["filter_frequency"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - -#if 0 - PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - MyConditionalModel m(lp0); -#else - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MyJointModel m(lp0); -#endif - - MultinomialResampleFilter filter(&rng); - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRules(ps[ci].rules); - m.DecrementJumps(ps[ci].src_jumps, src.size()); - - //BackwardEstimate be(m1, src, trg); - BackwardEstimateSym be(m1, invm1, src, trg); - const Reachability& r = reaches[ci]; - vector lps(particles); - - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - p.src_cv.resize(src.size(), false); - } - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - filter(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - int first_uncovered = src.size(); - int last_uncovered = -1; - for (int i = 0; i < src.size(); ++i) { - const bool is_uncovered = !p.src_cv[i]; - if (i < first_uncovered && is_uncovered) first_uncovered = i; - if (is_uncovered && i > last_uncovered) last_uncovered = i; - } - assert(last_uncovered > -1); - assert(first_uncovered < src.size()); - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - const int last_possible_start = last_uncovered - src_len + 1; - assert(last_possible_start >= 0); - //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; - //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl; - for (int i = first_uncovered; i <= last_possible_start; ++i) { - if (p.src_cv[i]) continue; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - int gap_add = 0; - bool bad = false; - prob_t jp = prob_t::One(); - int prev_pos = p.prev_pos; - for (int j = 0; j < src_len; ++j) { - if ((j + i + gap_add) == src.size()) { bad = true; break; } - while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } - if ((j + i + gap_add) == src.size()) { bad = true; break; } - np.src_cv[i + j + gap_add] = true; - x.f_.push_back(src[i + j + gap_add]); - jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); - int jump = i + j + gap_add - prev_pos; - assert(jump != 0); - np.src_jumps.push_back(jump); - prev_pos = i + j + gap_add; - } - if (bad) continue; - np.prev_pos = prev_pos; - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - if (x.f_.size() != src_len) continue; - prob_t rp = m.RuleProbability(x); - np.gamma_last = rp * jp; - const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - - const bool completed = (p.trg_cov == trg.size()); - if (completed) { - int last_jump = src.size() - p.prev_pos; - assert(last_jump > 0); - p.src_jumps.push_back(last_jump); - p.weight *= m.JumpProbability(last_jump, src.size()); - } - } - } - } - cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRules(lps[sampled].rules); - m.IncrementJumps(lps[sampled].src_jumps, src.size()); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - for (int sni = 0; sni < 5; ++sni) { - for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } - } - } - return 0; -} - diff --git a/gi/pf/pfdist.new.cc b/gi/pf/pfdist.new.cc deleted file mode 100644 index 3169eb75..00000000 --- a/gi/pf/pfdist.new.cc +++ /dev/null @@ -1,620 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_measures.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -shared_ptr prng; - -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(25),"Number of particles") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { - MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - prob_t srcp0(const vector& src) const { - prob_t p(1.0 / 3000.0); - p.poweq(src.size()); - prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); - p *= lenp; - return p; - } - - void DecrementRule(const TRule& rule) { - const RuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - if (it->second.decrement(rule)) { - base /= (*rp0)(rule); - if (it->second.num_customers() == 0) - rules.erase(it); - } - if (src_phrases.decrement(rule.f_)) - base /= srcp0(rule.f_); - } - - void IncrementRule(const TRule& rule) { - RuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) - it = rules.insert(make_pair(rule.f_, CCRP_NoTable(1,1))).first; - if (it->second.increment(rule)) { - base *= (*rp0)(rule); - } - if (src_phrases.increment(rule.f_)) - base *= srcp0(rule.f_); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - const prob_t p0 = (*rp0)(rule); - prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); - const RuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) return srcp * p0; - const double lp = it->second.logprob(rule, log(p0)); - prob_t q; q.logeq(lp); - return q * srcp; - } - - prob_t Likelihood() const { - prob_t p = base; - for (RuleCRPMap::const_iterator it = rules.begin(); - it != rules.end(); ++it) { - prob_t cl; cl.logeq(it->second.log_crp_prob()); - p *= cl; - } - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseConditionalBase* rp0; - prob_t base; - typedef unordered_map, CCRP_NoTable, boost::hash > > RuleCRPMap; - RuleCRPMap rules; - CCRP_NoTable > src_phrases; - vector > src_jumps; -}; - -#endif - -struct MyJointModel { - MyJointModel(PhraseJointBase& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseJointBase& rp0; - prob_t base; - CCRP_NoTable rules; - vector > src_jumps; -}; - -struct BackwardEstimate { - BackwardEstimate(const Model1& m1, const vector& src, const vector& trg) : - model1_(m1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - r.push_back(0); // NULL word - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - } - return e; - } - const Model1& model1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} - prob_t weight; - prob_t gamma_last; - vector src_jumps; - vector rules; - vector src_cv; - int src_cov; - int trg_cov; - int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -void FilterCrapParticlesAndReweight(vector* pps) { - vector& ps = *pps; - SampleSet ss; - for (int i = 0; i < ps.size(); ++i) - ss.add(ps[i].weight); - vector nps; nps.reserve(ps.size()); - const prob_t uniform_weight(1.0 / ps.size()); - for (int i = 0; i < ps.size(); ++i) { - nps.push_back(ps[prng->SelectSample(ss)]); - nps[i].weight = uniform_weight; - } - nps.swap(ps); -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - -#if 0 - PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - MyConditionalModel m(lp0); -#else - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MyJointModel m(lp0); -#endif - - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRules(ps[ci].rules); - m.DecrementJumps(ps[ci].src_jumps, src.size()); - - //BackwardEstimate be(m1, src, trg); - BackwardEstimateSym be(m1, invm1, src, trg); - const Reachability& r = reaches[ci]; - vector lps(particles); - - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - p.src_cv.resize(src.size(), false); - } - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - FilterCrapParticlesAndReweight(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - const int rejuv_freq = 1; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - int first_uncovered = src.size(); - int last_uncovered = -1; - for (int i = 0; i < src.size(); ++i) { - const bool is_uncovered = !p.src_cv[i]; - if (i < first_uncovered && is_uncovered) first_uncovered = i; - if (is_uncovered && i > last_uncovered) last_uncovered = i; - } - assert(last_uncovered > -1); - assert(first_uncovered < src.size()); - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - const int last_possible_start = last_uncovered - src_len + 1; - assert(last_possible_start >= 0); - //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; - //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl; - for (int i = first_uncovered; i <= last_possible_start; ++i) { - if (p.src_cv[i]) continue; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - int gap_add = 0; - bool bad = false; - prob_t jp = prob_t::One(); - int prev_pos = p.prev_pos; - for (int j = 0; j < src_len; ++j) { - if ((j + i + gap_add) == src.size()) { bad = true; break; } - while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } - if ((j + i + gap_add) == src.size()) { bad = true; break; } - np.src_cv[i + j + gap_add] = true; - x.f_.push_back(src[i + j + gap_add]); - jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); - int jump = i + j + gap_add - prev_pos; - assert(jump != 0); - np.src_jumps.push_back(jump); - prev_pos = i + j + gap_add; - } - if (bad) continue; - np.prev_pos = prev_pos; - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - if (x.f_.size() != src_len) continue; - prob_t rp = m.RuleProbability(x); - np.gamma_last = rp * jp; - const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - - const bool completed = (p.trg_cov == trg.size()); - if (completed) { - int last_jump = src.size() - p.prev_pos; - assert(last_jump > 0); - p.src_jumps.push_back(last_jump); - p.weight *= m.JumpProbability(last_jump, src.size()); - } - } - } - } - cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRules(lps[sampled].rules); - m.IncrementJumps(lps[sampled].src_jumps, src.size()); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - for (int sni = 0; sni < 5; ++sni) { - for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } - } - } - return 0; -} - diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc deleted file mode 100644 index 958ec4e2..00000000 --- a/gi/pf/pfnaive.cc +++ /dev/null @@ -1,284 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "pf.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" -#include "corpus.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(30),"Number of particles") - ("filter_frequency,f",po::value()->default_value(5),"Number of time steps between filterings") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const { - assert(src_cov <= src_.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = src_cov; i < src_.size(); ++i) - r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov() {} - prob_t weight; - prob_t gamma_last; - vector rules; - int src_cov; - int trg_cov; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - const unsigned rejuv_freq = conf["filter_frequency"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MonotonicParallelSegementationModel m(alp0); - TRule xx("[X] ||| ms. kimura ||| MS. KIMURA ||| X=0"); - cerr << xx << endl << lp0(xx) << " " << alp0(xx) << endl; - TRule xx12("[X] ||| . ||| PHARMACY . ||| X=0"); - TRule xx21("[X] ||| pharmacy . ||| . ||| X=0"); -// TRule xx22("[X] ||| . ||| . ||| X=0"); - TRule xx22("[X] ||| . ||| THE . ||| X=0"); - cerr << xx12 << "\t" << lp0(xx12) << " " << alp0(xx12) << endl; - cerr << xx21 << "\t" << lp0(xx21) << " " << alp0(xx21) << endl; - cerr << xx22 << "\t" << lp0(xx22) << " " << alp0(xx22) << endl; - - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - SystematicResampleFilter filter(&rng); - // MultinomialResampleFilter filter(&rng); - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRulesAndStops(ps[ci].rules); - const prob_t q_stop = m.StopProbability(); - const prob_t q_cont = m.ContinueProbability(); - cerr << "P(stop)=" << q_stop << "\tP(continue)=" < lps(particles); - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - filter(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - int i = p.src_cov; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - for (int j = 0; j < src_len; ++j) - x.f_.push_back(src[i + j]); - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - const bool stop_now = (np.src_cov == src_len && np.trg_cov == trg_len); - prob_t rp = m.RuleProbability(x) * (stop_now ? q_stop : q_cont); - np.gamma_last = rp; - const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - } - } - //cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - //cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - prob_t wv = prob_t::Zero(); - for (int pp = 0; pp < lps.size(); ++pp) - wv += lps[pp].weight; - for (int pp = 0; pp < lps.size(); ++pp) - lps[pp].weight /= wv; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRulesAndStops(lps[sampled].rules); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - } - return 0; -} - diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h deleted file mode 100644 index 76204a0e..00000000 --- a/gi/pf/poisson_uniform_word_model.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _POISSON_UNIFORM_WORD_MODEL_H_ -#define _POISSON_UNIFORM_WORD_MODEL_H_ - -#include -#include -#include "prob.h" -#include "m.h" - -// len ~ Poisson(lambda) -// for (1..len) -// e_i ~ Uniform({Vocabulary}) -struct PoissonUniformWordModel { - explicit PoissonUniformWordModel(const unsigned vocab_size, - const unsigned alphabet_size, - const double mean_len = 5) : - lh(prob_t::One()), - v0(-std::log(vocab_size)), - u0(-std::log(alphabet_size)), - mean_length(mean_len) {} - - void ResampleHyperparameters(MT19937*) {} - - inline prob_t operator()(const std::vector& s) const { - prob_t p; - p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0); - //p.logeq(v0); - return p; - } - - inline void Increment(const std::vector& w, MT19937*) { - lh *= (*this)(w); - } - - inline void Decrement(const std::vector& w, MT19937 *) { - lh /= (*this)(w); - } - - inline prob_t Likelihood() const { return lh; } - - void Summary() const {} - - private: - - prob_t lh; // keeps track of the draws from the base distribution - const double v0; // uniform log prob of generating a word - const double u0; // uniform log prob of generating a letter - const double mean_length; // mean length of a word in the base distribution -}; - -#endif diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc deleted file mode 100644 index 605d8206..00000000 --- a/gi/pf/pyp_lm.cc +++ /dev/null @@ -1,273 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "gamma_poisson.h" -#include "corpus_tools.h" -#include "m.h" -#include "tdict.h" -#include "sampler.h" -#include "ccrp.h" -#include "tied_resampler.h" - -// A not very memory-efficient implementation of an N-gram LM based on PYPs -// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model -// based on Pitman-Yor Processes. In Proc. ACL. - -// I use templates to handle the recursive formalation of the prior, so -// the order of the model has to be specified here, at compile time: -#define kORDER 3 - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,n",po::value()->default_value(300),"Number of samples") - ("train,i",po::value(),"Training data file") - ("test,T",po::value(),"Test data file") - ("discount_prior_a,a",po::value()->default_value(1.0), "discount ~ Beta(a,b): a=this") - ("discount_prior_b,b",po::value()->default_value(1.0), "discount ~ Beta(a,b): b=this") - ("strength_prior_s,s",po::value()->default_value(1.0), "strength ~ Gamma(s,r): s=this") - ("strength_prior_r,r",po::value()->default_value(1.0), "strength ~ Gamma(s,r): r=this") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("train") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -// uniform distribution over a fixed vocabulary -struct UniformVocabulary { - UniformVocabulary(unsigned vs, double, double, double, double) : p0(1.0 / vs), draws() {} - void increment(WordID, const vector&, MT19937*) { ++draws; } - void decrement(WordID, const vector&, MT19937*) { --draws; assert(draws >= 0); } - double prob(WordID, const vector&) const { return p0; } - void resample_hyperparameters(MT19937*) {} - double log_likelihood() const { return draws * log(p0); } - const double p0; - int draws; -}; - -// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS. -// Journal of Statistical Planning and Inference 14 (1986) 311-322 -struct PoissonLengthUniformCharWordModel { - explicit PoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : plen(5,5), uc(-log(95)), llh() {} - void increment(WordID w, const vector& v, MT19937*) { - llh += log(prob(w, v)); // this isn't quite right - plen.increment(TD::Convert(w).size() - 1); - } - void decrement(WordID w, const vector& v, MT19937*) { - plen.decrement(TD::Convert(w).size() - 1); - llh -= log(prob(w, v)); // this isn't quite right - } - double prob(WordID w, const vector&) const { - const unsigned len = TD::Convert(w).size(); - return plen.prob(len - 1) * exp(uc * len); - } - double log_likelihood() const { return llh; } - void resample_hyperparameters(MT19937*) {} - GammaPoisson plen; - const double uc; - double llh; -}; - -struct PYPAdaptedPoissonLengthUniformCharWordModel { - explicit PYPAdaptedPoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : - base(vocab_size,1,1,1,1), - crp(1,1,1,1) {} - void increment(WordID w, const vector& v, MT19937* rng) { - double p0 = base.prob(w, v); - if (crp.increment(w, p0, rng)) - base.increment(w, v, rng); - } - void decrement(WordID w, const vector& v, MT19937* rng) { - if (crp.decrement(w, rng)) - base.decrement(w, v, rng); - } - double prob(WordID w, const vector& v) const { - double p0 = base.prob(w, v); - return crp.prob(w, p0); - } - double log_likelihood() const { return crp.log_crp_prob() + base.log_likelihood(); } - void resample_hyperparameters(MT19937* rng) { crp.resample_hyperparameters(rng); } - PoissonLengthUniformCharWordModel base; - CCRP crp; -}; - -template struct PYPLM; - -#if 1 -template<> struct PYPLM<0> : public UniformVocabulary { - PYPLM(unsigned vs, double a, double b, double c, double d) : - UniformVocabulary(vs, a, b, c, d) {} -}; -#else -#if 0 -template<> struct PYPLM<0> : public PoissonLengthUniformCharWordModel { - PYPLM(unsigned vs, double a, double b, double c, double d) : - PoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#else -template<> struct PYPLM<0> : public PYPAdaptedPoissonLengthUniformCharWordModel { - PYPLM(unsigned vs, double a, double b, double c, double d) : - PYPAdaptedPoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#endif -#endif - -// represents an N-gram LM -template struct PYPLM { - PYPLM(unsigned vs, double da, double db, double ss, double sr) : - backoff(vs, da, db, ss, sr), - tr(da, db, ss, sr, 0.8, 1.0), - lookup(N-1) {} - void increment(WordID w, const vector& context, MT19937* rng) { - const double bo = backoff.prob(w, context); - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); - if (it == p.end()) { - it = p.insert(make_pair(lookup, CCRP(0.5,1))).first; - tr.Add(&it->second); // add to resampler - } - if (it->second.increment(w, bo, rng)) - backoff.increment(w, context, rng); - } - void decrement(WordID w, const vector& context, MT19937* rng) { - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); - assert(it != p.end()); - if (it->second.decrement(w, rng)) - backoff.decrement(w, context, rng); - } - double prob(WordID w, const vector& context) const { - const double bo = backoff.prob(w, context); - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::const_iterator it = p.find(lookup); - if (it == p.end()) return bo; - return it->second.prob(w, bo); - } - - double log_likelihood() const { - double llh = backoff.log_likelihood(); - typename unordered_map, CCRP, boost::hash > >::const_iterator it; - for (it = p.begin(); it != p.end(); ++it) - llh += it->second.log_crp_prob(); - llh += tr.LogLikelihood(); - return llh; - } - - void resample_hyperparameters(MT19937* rng) { - tr.ResampleHyperparameters(rng); - backoff.resample_hyperparameters(rng); - } - - PYPLM backoff; - TiedResampler > tr; - double discount_a, discount_b, strength_s, strength_r; - double d, strength; - mutable vector lookup; // thread-local - unordered_map, CCRP, boost::hash > > p; -}; - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - const unsigned samples = conf["samples"].as(); - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector > corpuse; - set vocabe; - const WordID kEOS = TD::Convert(""); - cerr << "Reading corpus...\n"; - CorpusTools::ReadFromFile(conf["train"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - vector > test; - if (conf.count("test")) - CorpusTools::ReadFromFile(conf["test"].as(), &test); - else - test = corpuse; - PYPLM lm(vocabe.size(), - conf["discount_prior_a"].as(), - conf["discount_prior_b"].as(), - conf["strength_prior_s"].as(), - conf["strength_prior_r"].as()); - vector ctx(kORDER - 1, TD::Convert("")); - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpuse.size(); ++ci) { - ctx.resize(kORDER - 1); - const vector& s = corpuse[ci]; - for (int i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - if (SS > 0) lm.decrement(w, ctx, &rng); - lm.increment(w, ctx, &rng); - ctx.push_back(w); - } - } - if (SS % 10 == 9) { - cerr << " [LLH=" << lm.log_likelihood() << "]" << endl; - if (SS % 30 == 29) lm.resample_hyperparameters(&rng); - } else { cerr << '.' << flush; } - } - double llh = 0; - unsigned cnt = 0; - unsigned oovs = 0; - for (int ci = 0; ci < test.size(); ++ci) { - ctx.resize(kORDER - 1); - const vector& s = test[ci]; - for (int i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - double lp = log(lm.prob(w, ctx)) / log(2); - if (i < s.size() && vocabe.count(w) == 0) { - cerr << "**OOV "; - ++oovs; - lp = 0; - } - cerr << "p(" << TD::Convert(w) << " |"; - for (int j = ctx.size() + 1 - kORDER; j < ctx.size(); ++j) - cerr << ' ' << TD::Convert(ctx[j]); - cerr << ") = " << lp << endl; - ctx.push_back(w); - llh -= lp; - cnt++; - } - } - cerr << " Log_10 prob: " << (-llh * log(2) / log(10)) << endl; - cerr << " Count: " << cnt << endl; - cerr << " OOVs: " << oovs << endl; - cerr << "Cross-entropy: " << (llh / cnt) << endl; - cerr << " Perplexity: " << pow(2, llh / cnt) << endl; - return 0; -} - - diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc deleted file mode 100644 index 37b9a604..00000000 --- a/gi/pf/pyp_tm.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include "pyp_tm.h" - -#include -#include -#include - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { - FreqBinner(const std::string& fname) { fd_.Load(fname); } - unsigned NumberOfBins() const { return fd_.Max() + 1; } - unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } - FreqDict fd_; -}; - -template -struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : - base(*b), - binner(bnr), - btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - - void Summary() const { - cerr << "Number of conditioning contexts: " << r.size() << endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; - for (CCRP >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - cerr << " " << i2->second << '\t' << TD::GetString(i2->first) << endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - btr.ResampleHyperparameters(rng); - } - - prob_t Prob(const WordID src, const vector& trglets) const { - RuleModelHash::const_iterator it = r.find(src); - if (it == r.end()) { - return base(trglets); - } else { - return it->second.prob(trglets, base(trglets)); - } - } - - void Increment(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - if (it == r.end()) { - it = r.insert(make_pair(src, CCRP >(0.5,1.0))).first; - static const WordID kNULL = TD::Convert("NULL"); - unsigned bin = (src == kNULL ? 0 : 1); - if (binner && bin) { bin = binner->Bin(src) + 1; } - btr.Add(bin, &it->second); - } - if (it->second.increment(trglets, base(trglets), rng)) - base.Increment(trglets, rng); - } - - void Decrement(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - assert(it != r.end()); - if (it->second.decrement(trglets, rng)) { - base.Decrement(trglets, rng); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - return p; - } - - unsigned UniqueConditioningContexts() const { - return r.size(); - } - - // TODO tie PYP hyperparameters based on source word frequency bins - Base& base; - const Binner* binner; - BinTiedResampler > > btr; - typedef unordered_map > > RuleModelHash; - RuleModelHash r; -}; - -PYPLexicalTranslation::PYPLexicalTranslation(const vector >& lets, - const unsigned vocab_size, - const unsigned num_letters) : - letters(lets), - base(vocab_size, num_letters, 5), - tmodel(new ConditionalPYPWordModel(&base, new FreqBinner("10k.freq"))), - kX(-TD::Convert("X")) {} - -void PYPLexicalTranslation::Summary() const { - tmodel->Summary(); -} - -prob_t PYPLexicalTranslation::Likelihood() const { - return tmodel->Likelihood() * base.Likelihood(); -} - -void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { - tmodel->ResampleHyperparameters(rng); -} - -unsigned PYPLexicalTranslation::UniqueConditioningContexts() const { - return tmodel->UniqueConditioningContexts(); -} - -prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const { - return tmodel->Prob(src, letters[trg]); -} - -void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { - tmodel->Increment(src, letters[trg], rng); -} - -void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { - tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h deleted file mode 100644 index 2b076a25..00000000 --- a/gi/pf/pyp_tm.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef PYP_LEX_TRANS -#define PYP_LEX_TRANS - -#include -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template struct ConditionalPYPWordModel; - -struct PYPLexicalTranslation { - explicit PYPLexicalTranslation(const std::vector >& lets, - const unsigned vocab_size, - const unsigned num_letters); - - prob_t Likelihood() const; - - void ResampleHyperparameters(MT19937* rng); - prob_t Prob(WordID src, WordID trg) const; // return p(trg | src) - void Summary() const; - void Increment(WordID src, WordID trg, MT19937* rng); - void Decrement(WordID src, WordID trg, MT19937* rng); - unsigned UniqueConditioningContexts() const; - - private: - const std::vector >& letters; // spelling dictionary - PoissonUniformWordModel base; // "generator" of English types - ConditionalPYPWordModel* tmodel; // translation distributions - // (model English word | French word) - const WordID kX; -}; - -#endif diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h deleted file mode 100644 index 0bebb751..00000000 --- a/gi/pf/pyp_word_model.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _PYP_WORD_MODEL_H_ -#define _PYP_WORD_MODEL_H_ - -#include -#include -#include -#include "prob.h" -#include "ccrp.h" -#include "m.h" -#include "tdict.h" -#include "os_phrase.h" - -// PYP(d,s,poisson-uniform) represented as a CRP -template -struct PYPWordModel { - explicit PYPWordModel(Base* b) : - base(*b), - r(1,1,1,1,0.66,50.0) - {} - - void ResampleHyperparameters(MT19937* rng) { - r.resample_hyperparameters(rng); - std::cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n"; - } - - inline prob_t operator()(const std::vector& s) const { - return r.prob(s, base(s)); - } - - inline void Increment(const std::vector& s, MT19937* rng) { - if (r.increment(s, base(s), rng)) - base.Increment(s, rng); - } - - inline void Decrement(const std::vector& s, MT19937 *rng) { - if (r.decrement(s, rng)) - base.Decrement(s, rng); - } - - inline prob_t Likelihood() const { - prob_t p; p.logeq(r.log_crp_prob()); - p *= base.Likelihood(); - return p; - } - - void Summary() const { - std::cerr << "PYPWordModel: generations=" << r.num_customers() - << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << std::endl; - for (typename CCRP >::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << " " << it->second - << TD::GetString(it->first) << std::endl; - } - } - - private: - - Base& base; // keeps track of the draws from the base distribution - CCRP > r; -}; - -#endif diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h deleted file mode 100644 index 4075affe..00000000 --- a/gi/pf/quasi_model2.h +++ /dev/null @@ -1,177 +0,0 @@ -#ifndef _QUASI_MODEL2_H_ -#define _QUASI_MODEL2_H_ - -#include -#include -#include -#include "boost/functional.hpp" -#include "prob.h" -#include "array2d.h" -#include "slice_sampler.h" -#include "m.h" -#include "have_64_bits.h" - -struct AlignmentObservation { - AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} - AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) : - src_len(sl), trg_len(tl), j(tw), a_j(sw) {} - unsigned short src_len; - unsigned short trg_len; - unsigned short j; - unsigned short a_j; -}; - -#ifdef HAVE_64_BITS -inline size_t hash_value(const AlignmentObservation& o) { - return reinterpret_cast(o); -} -inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) { - return hash_value(a) == hash_value(b); -} -#else -inline size_t hash_value(const AlignmentObservation& o) { - size_t h = 1; - boost::hash_combine(h, o.src_len); - boost::hash_combine(h, o.trg_len); - boost::hash_combine(h, o.j); - boost::hash_combine(h, o.a_j); - return h; -} -#endif - -struct QuasiModel2 { - explicit QuasiModel2(double alpha, double pnull = 0.1) : - alpha_(alpha), - pnull_(pnull), - pnotnull_(1 - pnull) {} - - // a_j = 0 => NULL; src_len does *not* include null - prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const { - if (!a_j) return pnull_; - return pnotnull_ * - prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len)); - } - - void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { - assert(a_j <= src_len); - assert(j < trg_len); - ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)]; - } - - void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { - const AlignmentObservation ao(src_len, trg_len, j, a_j); - int &cc = obs_[ao]; - assert(cc > 0); - --cc; - if (!cc) obs_.erase(ao); - } - - struct PNullResampler { - PNullResampler(const QuasiModel2& m) : m_(m) {} - const QuasiModel2& m_; - double operator()(const double& proposed_pnull) const { - return log(m_.Likelihood(m_.alpha_, proposed_pnull)); - } - }; - - struct AlphaResampler { - AlphaResampler(const QuasiModel2& m) : m_(m) {} - const QuasiModel2& m_; - double operator()(const double& proposed_alpha) const { - return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); - } - }; - - void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - const PNullResampler dr(*this); - const AlphaResampler ar(*this); - for (unsigned i = 0; i < nloop; ++i) { - double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, - 1.0, 0.0, niterations, 100*niterations); - pnull_ = prob_t(pnull); - alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" - << pnull_.as_float() << ") = " << Likelihood() << std::endl; - zcache_.clear(); - } - - prob_t Likelihood() const { - return Likelihood(alpha_, pnull_.as_float()); - } - - prob_t Likelihood(double alpha, double ppnull) const { - const prob_t pnull(ppnull); - const prob_t pnotnull(1 - ppnull); - - prob_t p; - p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure - assert(!p.is_0()); - prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); - assert(!prob_of_ppnull.is_0()); - p *= prob_of_ppnull; - for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { - const AlignmentObservation& ao = it->first; - if (ao.a_j) { - prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); - prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); - prob_t pa(u / z); - pa *= pnotnull; - pa.poweq(it->second); - p *= pa; - } else { - p *= pnull.pow(it->second); - } - } - return p; - } - - private: - static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - prob_t p; - p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); - return p; - } - - static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - prob_t z = prob_t::Zero(); - for (int a_j = 1; a_j <= src_len; ++a_j) - z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); - return z; - } - - static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); - } - - static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - double z = 0; - for (int a_j = 1; a_j <= src_len; ++a_j) - z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha); - return z; - } - - const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const { - if (src_len >= zcache_.size()) - zcache_.resize(src_len + 1); - if (trg_len >= zcache_[src_len].size()) - zcache_[src_len].resize(trg_len + 1); - std::vector& zv = zcache_[src_len][trg_len]; - if (zv.size() == 0) - zv.resize(trg_len); - double& z = zv[j]; - if (!z) - z = ComputeZ(j, src_len, trg_len, alpha_); - return z; - } - - double alpha_; - prob_t pnull_; - prob_t pnotnull_; - mutable std::vector > > zcache_; - typedef std::tr1::unordered_map > ObsCount; - ObsCount obs_; -}; - -#endif diff --git a/gi/pf/reachability.cc b/gi/pf/reachability.cc deleted file mode 100644 index 7d0d04ac..00000000 --- a/gi/pf/reachability.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "reachability.h" - -#include -#include - -using namespace std; - -struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; -}; - -void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - //cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - if (a[srclen][trglen].empty()) { - cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n"; - nodes = 0; - return; - } - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - nodes = 0; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - valid_deltas[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(make_pair(src_delta,j - prevs[k].prev_trg_covered)); - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - assert(max_src_delta[0][0] > 0); - nodes = 0; - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (valid_deltas[i][j].size() > 0) { - node_addresses[i][j] = nodes++; - } else { - node_addresses[i][j] = -1; - } - } - } - cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") has " << valid_deltas[0][0].size() << " out edges in its root node, " << nodes << " nodes in total, and outside estimate matrix will require " << sizeof(float)*nodes << " bytes\n"; - } - diff --git a/gi/pf/reachability.h b/gi/pf/reachability.h deleted file mode 100644 index 1e22c76a..00000000 --- a/gi/pf/reachability.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _REACHABILITY_H_ -#define _REACHABILITY_H_ - -#include "boost/multi_array.hpp" - -// determines minimum and maximum lengths of outgoing edges from all -// coverage positions such that the alignment path respects src and -// trg maximum phrase sizes -// -// runs in O(n^2 * src_max * trg_max) time but should be relatively fast -// -// currently forbids 0 -> n and n -> 0 alignments - -struct Reachability { - unsigned nodes; - boost::multi_array edges; // edges[src_covered][trg_covered][src_delta][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - boost::multi_array node_addresses; // na[src_covered][trg_covered] -- the index of the node in a one-dimensional array (of size "nodes") - boost::multi_array >, 2> valid_deltas; // valid_deltas[src_covered][trg_covered] list of valid transitions leaving a particular node - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - nodes(), - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]), - node_addresses(boost::extents[srclen][trglen]), - valid_deltas(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len); -}; - -#endif diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h deleted file mode 100644 index a4f4af36..00000000 --- a/gi/pf/tied_resampler.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _TIED_RESAMPLER_H_ -#define _TIED_RESAMPLER_H_ - -#include -#include -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -template -struct TiedResampler { - explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) : - d_alpha(da), - d_beta(db), - s_shape(ss), - s_rate(sr), - discount(d), - strength(s) {} - - void Add(CRP* crp) { - crps.insert(crp); - crp->set_discount(discount); - crp->set_strength(strength); - assert(!crp->has_discount_prior()); - assert(!crp->has_strength_prior()); - } - - void Remove(CRP* crp) { - crps.erase(crp); - } - - size_t size() const { - return crps.size(); - } - - double LogLikelihood(double d, double s) const { - if (s <= -d) return -std::numeric_limits::infinity(); - double llh = Md::log_beta_density(d, d_alpha, d_beta) + - Md::log_gamma_density(d + s, s_shape, s_rate); - for (typename std::set::iterator it = crps.begin(); it != crps.end(); ++it) - llh += (*it)->log_crp_prob(d, s); - return llh; - } - - double LogLikelihood() const { - return LogLikelihood(discount, strength); - } - - struct DiscountResampler { - DiscountResampler(const TiedResampler& m) : m_(m) {} - const TiedResampler& m_; - double operator()(const double& proposed_discount) const { - return m_.LogLikelihood(proposed_discount, m_.strength); - } - }; - - struct AlphaResampler { - AlphaResampler(const TiedResampler& m) : m_(m) {} - const TiedResampler& m_; - double operator()(const double& proposed_strength) const { - return m_.LogLikelihood(m_.discount, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - if (size() == 0) { std::cerr << "EMPTY - not resampling\n"; return; } - const DiscountResampler dr(*this); - const AlphaResampler ar(*this); - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - discount = slice_sampler1d(dr, discount, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "TiedCRPs(d=" << discount << ",s=" - << strength << ") = " << LogLikelihood(discount, strength) << std::endl; - for (typename std::set::iterator it = crps.begin(); it != crps.end(); ++it) - (*it)->set_hyperparameters(discount, strength); - } - private: - std::set crps; - const double d_alpha, d_beta, s_shape, s_rate; - double discount, strength; -}; - -// split according to some criterion -template -struct BinTiedResampler { - explicit BinTiedResampler(unsigned nbins) : - resamplers(nbins, TiedResampler(1,1,1,1)) {} - - void Add(unsigned bin, CRP* crp) { - resamplers[bin].Add(crp); - } - - void Remove(unsigned bin, CRP* crp) { - resamplers[bin].Remove(crp); - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < resamplers.size(); ++i) { - std::cerr << "BIN " << i << " (" << resamplers[i].size() << " CRPs): " << std::flush; - resamplers[i].ResampleHyperparameters(rng); - } - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < resamplers.size(); ++i) - llh += resamplers[i].LogLikelihood(); - return llh; - } - - private: - std::vector > resamplers; -}; - -#endif diff --git a/gi/pf/tpf.cc b/gi/pf/tpf.cc deleted file mode 100644 index 7348d21c..00000000 --- a/gi/pf/tpf.cc +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include -#include - -#include "sampler.h" - -using namespace std; -using namespace tr1; - -shared_ptr prng; - -struct Particle { - Particle() : weight(prob_t::One()) {} - vector states; - prob_t weight; - prob_t gamma_last; -}; - -ostream& operator<<(ostream& os, const Particle& p) { - os << "["; - for (int i = 0; i < p.states.size(); ++i) os << p.states[i] << ' '; - os << "| w=" << log(p.weight) << ']'; - return os; -} - -void Rejuvenate(vector& pps) { - SampleSet ss; - vector nps(pps.size()); - for (int i = 0; i < pps.size(); ++i) { -// cerr << pps[i] << endl; - ss.add(pps[i].weight); - } -// cerr << "REJUVINATING...\n"; - for (int i = 0; i < pps.size(); ++i) { - nps[i] = pps[prng->SelectSample(ss)]; - nps[i].weight = prob_t(1.0 / pps.size()); -// cerr << nps[i] << endl; - } - nps.swap(pps); -// exit(1); -} - -int main(int argc, char** argv) { - const unsigned particles = 100; - prng.reset(new MT19937); - MT19937& rng = *prng; - - // q(a) = 0.8 - // q(b) = 0.8 - // q(c) = 0.4 - SampleSet ssq; - ssq.add(0.4); - ssq.add(0.6); - ssq.add(0); - double qz = 1; - - // p(a) = 0.2 - // p(b) = 0.8 - vector p(3); - p[0] = 0.2; - p[1] = 0.8; - p[2] = 0; - - vector counts(3); - int tot = 0; - - vector pps(particles); - SampleSet ppss; - int LEN = 12; - int PP = 1; - while (pps[0].states.size() < LEN) { - for (int pi = 0; pi < particles; ++pi) { - Particle& prt = pps[pi]; - - bool redo = true; - const Particle savedp = prt; - while (redo) { - redo = false; - for (int i = 0; i < PP; ++i) { - int s = rng.SelectSample(ssq); - double gamma_last = p[s]; - if (!gamma_last) { redo = true; break; } - double q = ssq[s] / qz; - prt.states.push_back(s); - prt.weight *= prob_t(gamma_last / q); - } - if (redo) { prt = savedp; continue; } - } - } - Rejuvenate(pps); - } - ppss.clear(); - for (int i = 0; i < particles; ++i) { ppss.add(pps[i].weight); } - int sp = rng.SelectSample(ppss); - cerr << pps[sp] << endl; - - return 0; -} - diff --git a/gi/pf/transliterations.cc b/gi/pf/transliterations.cc deleted file mode 100644 index b2996f65..00000000 --- a/gi/pf/transliterations.cc +++ /dev/null @@ -1,334 +0,0 @@ -#include "transliterations.h" - -#include -#include - -#include "boost/shared_ptr.hpp" - -#include "backward.h" -#include "filelib.h" -#include "tdict.h" -#include "trule.h" -#include "filelib.h" -#include "ccrp_nt.h" -#include "m.h" -#include "reachability.h" - -using namespace std; -using namespace std::tr1; - -struct TruncatedConditionalLengthModel { - TruncatedConditionalLengthModel(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : - plens(max_src_size+1, vector(max_trg_size+1, 0.0)) { - for (unsigned i = 1; i <= max_src_size; ++i) { - prob_t z = prob_t::Zero(); - for (unsigned j = 1; j <= max_trg_size; ++j) - z += (plens[i][j] = prob_t(0.01 + exp(Md::log_poisson(j, i * expected_src_to_trg_ratio)))); - for (unsigned j = 1; j <= max_trg_size; ++j) - plens[i][j] /= z; - //for (unsigned j = 1; j <= max_trg_size; ++j) - // cerr << "P(trg_len=" << j << " | src_len=" << i << ") = " << plens[i][j] << endl; - } - } - - // return p(tlen | slen) for *chunks* not full words - inline const prob_t& operator()(int slen, int tlen) const { - return plens[slen][tlen]; - } - - vector > plens; -}; - -struct CondBaseDist { - CondBaseDist(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : - tclm(max_src_size, max_trg_size, expected_src_to_trg_ratio) {} - - prob_t operator()(const vector& src, unsigned sf, unsigned st, - const vector& trg, unsigned tf, unsigned tt) const { - prob_t p = tclm(st - sf, tt - tf); // target len | source length ~ TCLM(source len) - assert(!"not impl"); - return p; - } - inline prob_t operator()(const vector& src, const vector& trg) const { - return (*this)(src, 0, src.size(), trg, 0, trg.size()); - } - TruncatedConditionalLengthModel tclm; -}; - -// represents transliteration phrase probabilities, e.g. -// p( a l - | A l ) , p( o | A w ) , ... -struct TransliterationChunkConditionalModel { - explicit TransliterationChunkConditionalModel(const CondBaseDist& pp0) : - d(0.0), - strength(1.0), - rp0(pp0) { - } - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second << '\t' << i2->first << std::endl; - } - } - - int DecrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - int count = it->second.decrement(rule); - if (count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return count; - } - - int IncrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - it = r.insert(make_pair(rule.f_, CCRP_NoTable(strength))).first; - } - int count = it->second.increment(rule); - return count; - } - - void IncrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p = rp0(rule.f_, rule.e_); - } else { - p = it->second.prob(rule, rp0(rule.f_, rule.e_)); - } - return p; - } - - double LogLikelihood(const double& dd, const double& aa) const { - if (aa <= -dd) return -std::numeric_limits::infinity(); - //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); - double llh = //Md::log_beta_density(dd, 1, 1) + - Md::log_gamma_density(dd + aa, 1, 1); - std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::const_iterator it; - for (it = r.begin(); it != r.end(); ++it) - llh += it->second.log_crp_prob(aa); - return llh; - } - - struct AlphaResampler { - AlphaResampler(const TransliterationChunkConditionalModel& m) : m_(m) {} - const TransliterationChunkConditionalModel& m_; - double operator()(const double& proposed_strength) const { - return m_.LogLikelihood(m_.d, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng) { - std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::iterator it; - //const unsigned nloop = 5; - const unsigned niterations = 10; - //DiscountResampler dr(*this); - AlphaResampler ar(*this); -#if 0 - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - d = slice_sampler1d(dr, d, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } -#endif - strength = slice_sampler1d(ar, strength, *rng, -d, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "CTMModel(alpha=" << strength << ") = " << LogLikelihood(d, strength) << std::endl; - for (it = r.begin(); it != r.end(); ++it) { -#if 0 - it->second.set_discount(d); -#endif - it->second.set_alpha(strength); - } - } - - prob_t Likelihood() const { - prob_t p; p.logeq(LogLikelihood(d, strength)); - return p; - } - - const CondBaseDist& rp0; - typedef std::tr1::unordered_map, - CCRP_NoTable, - boost::hash > > RuleModelHash; - RuleModelHash r; - double d, strength; -}; - -struct GraphStructure { - GraphStructure() : r() {} - // leak memory - these are basically static - const Reachability* r; - bool IsReachable() const { return r->nodes > 0; } -}; - -struct ProbabilityEstimates { - ProbabilityEstimates() : gs(), backward() {} - explicit ProbabilityEstimates(const GraphStructure& g) : - gs(&g), backward() { - if (g.r->nodes > 0) - backward = new float[g.r->nodes]; - } - // leak memory, these are static - - // returns an estimate of the marginal probability - double MarginalEstimate() const { - if (!backward) return 0; - return backward[0]; - } - - // returns an backward estimate - double Backward(int src_covered, int trg_covered) const { - if (!backward) return 0; - int ind = gs->r->node_addresses[src_covered][trg_covered]; - if (ind < 0) return 0; - return backward[ind]; - } - - prob_t estp; - float* backward; - private: - const GraphStructure* gs; -}; - -struct TransliterationsImpl { - TransliterationsImpl(int max_src, int max_trg, double sr, const BackwardEstimator& b) : - cp0(max_src, max_trg, sr), - tccm(cp0), - be(b), - kMAX_SRC_CHUNK(max_src), - kMAX_TRG_CHUNK(max_trg), - kS2T_RATIO(sr), - tot_pairs(), tot_mem() { - } - const CondBaseDist cp0; - TransliterationChunkConditionalModel tccm; - const BackwardEstimator& be; - - void Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - const size_t src_len = src_lets.size(); - const size_t trg_len = trg_lets.size(); - - // init graph structure - if (src_len >= graphs.size()) graphs.resize(src_len + 1); - if (trg_len >= graphs[src_len].size()) graphs[src_len].resize(trg_len + 1); - GraphStructure& gs = graphs[src_len][trg_len]; - if (!gs.r) { - double rat = exp(fabs(log(trg_len / (src_len * kS2T_RATIO)))); - if (rat > 1.5 || (rat > 2.4 && src_len < 6)) { - cerr << " ** Forbidding transliterations of size " << src_len << "," << trg_len << ": " << rat << endl; - gs.r = new Reachability(src_len, trg_len, 0, 0); - } else { - gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK); - } - } - - const Reachability& r = *gs.r; - - // init backward estimates - if (src >= ests.size()) ests.resize(src + 1); - unordered_map::iterator it = ests[src].find(trg); - if (it != ests[src].end()) return; // already initialized - - it = ests[src].insert(make_pair(trg, ProbabilityEstimates(gs))).first; - ProbabilityEstimates& est = it->second; - if (!gs.r->nodes) return; // not derivable subject to length constraints - - be.InitializeGrid(src_lets, trg_lets, r, kS2T_RATIO, est.backward); - cerr << TD::GetString(src_lets) << " ||| " << TD::GetString(trg_lets) << " ||| " << (est.backward[0] / trg_lets.size()) << endl; - tot_pairs++; - tot_mem += sizeof(float) * gs.r->nodes; - } - - void Forbid(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - const size_t src_len = src_lets.size(); - const size_t trg_len = trg_lets.size(); - // TODO - } - - prob_t EstimateProbability(WordID s, const vector& src, WordID t, const vector& trg) const { - assert(src.size() < graphs.size()); - const vector& tv = graphs[src.size()]; - assert(trg.size() < tv.size()); - const GraphStructure& gs = tv[trg.size()]; - if (gs.r->nodes == 0) - return prob_t::Zero(); - const unordered_map::const_iterator it = ests[s].find(t); - assert(it != ests[s].end()); - return it->second.estp; - } - - void GraphSummary() const { - double to = 0; - double tn = 0; - double tt = 0; - for (int i = 0; i < graphs.size(); ++i) { - const vector& vt = graphs[i]; - for (int j = 0; j < vt.size(); ++j) { - const GraphStructure& gs = vt[j]; - if (!gs.r) continue; - tt++; - for (int k = 0; k < i; ++k) { - for (int l = 0; l < j; ++l) { - size_t c = gs.r->valid_deltas[k][l].size(); - if (c) { - tn += 1; - to += c; - } - } - } - } - } - cerr << " Average nodes = " << (tn / tt) << endl; - cerr << "Average out-degree = " << (to / tn) << endl; - cerr << " Unique structures = " << tt << endl; - cerr << " Unique pairs = " << tot_pairs << endl; - cerr << " BEs size = " << (tot_mem / (1024.0*1024.0)) << " MB" << endl; - } - - const int kMAX_SRC_CHUNK; - const int kMAX_TRG_CHUNK; - const double kS2T_RATIO; - unsigned tot_pairs; - size_t tot_mem; - vector > graphs; // graphs[src_len][trg_len] - vector > ests; // ests[src][trg] -}; - -Transliterations::Transliterations(int max_src, int max_trg, double sr, const BackwardEstimator& be) : - pimpl_(new TransliterationsImpl(max_src, max_trg, sr, be)) {} -Transliterations::~Transliterations() { delete pimpl_; } - -void Transliterations::Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - pimpl_->Initialize(src, src_lets, trg, trg_lets); -} - -prob_t Transliterations::EstimateProbability(WordID s, const vector& src, WordID t, const vector& trg) const { - return pimpl_->EstimateProbability(s, src,t, trg); -} - -void Transliterations::Forbid(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - pimpl_->Forbid(src, src_lets, trg, trg_lets); -} - -void Transliterations::GraphSummary() const { - pimpl_->GraphSummary(); -} - diff --git a/gi/pf/transliterations.h b/gi/pf/transliterations.h deleted file mode 100644 index 49d14684..00000000 --- a/gi/pf/transliterations.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _TRANSLITERATIONS_H_ -#define _TRANSLITERATIONS_H_ - -#include -#include "wordid.h" -#include "prob.h" - -struct BackwardEstimator; -struct TransliterationsImpl; -struct Transliterations { - // max_src and max_trg indicate how big the transliteration phrases can be - // see reachability.h for information about filter_ratio - explicit Transliterations(int max_src, int max_trg, double s2t_rat, const BackwardEstimator& be); - ~Transliterations(); - void Initialize(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); - void Forbid(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); - void GraphSummary() const; - prob_t EstimateProbability(WordID s, const std::vector& src, WordID t, const std::vector& trg) const; - private: - TransliterationsImpl* pimpl_; -}; - -#endif - diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc deleted file mode 100644 index 40829775..00000000 --- a/gi/pf/unigrams.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "unigrams.h" - -#include -#include - -#include "stringlib.h" -#include "filelib.h" - -using namespace std; - -void UnigramModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - const WordID w = TD::Convert(line.substr(pos + 1)); - line[pos] = 0; - float p = atof(&line[0]); - if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n"; - } -} - -void UnigramWordModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - size_t cur = pos + 1; - vector w; - while (cur < line.size()) { - const size_t len = UTF8Len(line[cur]); - w.push_back(TD::Convert(line.substr(cur, len))); - cur += len; - } - line[pos] = 0; - float p = atof(&line[0]); - probs_[w].logeq(p * log(10.0)); - } -} - diff --git a/gi/pf/unigrams.h b/gi/pf/unigrams.h deleted file mode 100644 index 1660d1ed..00000000 --- a/gi/pf/unigrams.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _UNIGRAMS_H_ -#define _UNIGRAMS_H_ - -#include -#include -#include -#include - -#include "wordid.h" -#include "prob.h" -#include "tdict.h" - -struct UnigramModel { - explicit UnigramModel(const std::string& fname, unsigned vocab_size) : - use_uniform_(fname.size() == 0), - uniform_(1.0 / vocab_size), - probs_() { - if (fname.size() > 0) { - probs_.resize(TD::NumWords() + 1); - LoadUnigrams(fname); - } - } - - const prob_t& operator()(const WordID& w) const { - assert(w); - if (use_uniform_) return uniform_; - return probs_[w]; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - std::vector probs_; -}; - - -// reads an ARPA unigram file and converts words like 'cat' into a string 'c a t' -struct UnigramWordModel { - explicit UnigramWordModel(const std::string& fname) : - use_uniform_(false), - uniform_(1.0), - probs_() { - LoadUnigrams(fname); - } - - explicit UnigramWordModel(const unsigned vocab_size) : - use_uniform_(true), - uniform_(1.0 / vocab_size), - probs_() {} - - const prob_t& operator()(const std::vector& s) const { - if (use_uniform_) return uniform_; - const VectorProbHash::const_iterator it = probs_.find(s); - assert(it != probs_.end()); - return it->second; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - typedef std::tr1::unordered_map, prob_t, boost::hash > > VectorProbHash; - VectorProbHash probs_; -}; - -#endif diff --git a/gi/pipeline/OLD.clsp.config b/gi/pipeline/OLD.clsp.config deleted file mode 100644 index cd0f9d65..00000000 --- a/gi/pipeline/OLD.clsp.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM xfeats.grammar dev dev-refs test1 testt-eval.sh ... -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz xgrammar/grammar.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al -aren /export/ws10smt/data/arabic-english corpus.ar-en.al -uren /export/ws10smt/data/urdu-english corpus.ur-en.al -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/OLD.evaluation-pipeline.pl b/gi/pipeline/OLD.evaluation-pipeline.pl deleted file mode 100755 index 49c303eb..00000000 --- a/gi/pipeline/OLD.evaluation-pipeline.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my @DEFAULT_FEATS = qw( - LogRuleCount SingletonRule LexE2F LexF2E WordPenalty - LogFCount LanguageModel Glue GlueTop PassThrough SingletonF -); - -my %init_weights = qw( - LogRuleCount 0.2 - LexE2F -0.3 - LexF2E -0.3 - LogFCount 0.1 - WordPenalty -1.5 - LanguageModel 1.2 - Glue -1.0 - GlueTop 0.00001 - PassThrough -10.0 - SingletonRule -0.1 - X_EGivenF -0.3 - X_FGivenE -0.3 - X_LogECount -1 - X_LogFCount -0.1 - X_LogRuleCount 0.3 - X_SingletonE -0.1 - X_SingletonF -0.1 - X_SingletonRule -0.5 -); - -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTSCORE = "$EXTOOLS/filter_score_grammar"; -my $ADDXFEATS = "$SCRIPT_DIR/scripts/xfeats.pl"; -assert_exec($CDEC, $PARALLELIZE, $FILTSCORE, $DISTVEST, $ADDXFEATS); - -my $config = "$SCRIPT_DIR/OLD.clsp.config"; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my %xgrammars; -print STDERR " LANGUAGE PAIRS:"; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - s/^\s+//; - s/\s+$//; - my ($name, $path, $corpus, $lm, $xgrammar, $dev, $devref, @xtests) = split /\s+/; - $paths{$name} = $path; - $corpora{$name} = $corpus; - $lms{$name} = $lm; - $xgrammars{$name} = $xgrammar; - $devs{$name} = $dev; - $devrefs{$name} = $devref; - $tests{$name} = $xtests[0]; - $testevals{$name} = $xtests[1]; - print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $XFEATS; -my $EXTRA_FILTER = ''; -my $dataDir = '/export/ws10smt/data'; -if (GetOptions( - "data=s" => \$dataDir, - "xfeats" => \$XFEATS, -) == 0 || @ARGV!=2 || $help) { - print_help(); - exit; -} -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR " CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR " GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR " LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -die "Can't find test: $test\n" unless -f $test; -assert_exec($teval); - -if ($XFEATS) { - my $xgram = mydircat($corpdir, $xgrammars{$lp}); - die "Can't find x-grammar: $xgram" unless -f $xgram; - $EXTRA_FILTER = "$ADDXFEATS $xgram |"; - print STDERR "ADDING X-FEATS FROM $xgram\n"; -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -`mkdir -p $outdir`; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { - print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { - my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini"; - print STDERR "MERT COMMAND: $cmd\n"; - `rm -rf $outdir/vest 2> /dev/null`; - chdir $outdir or die "Can't chdir to $outdir: $!"; - $weights = `$cmd`; - die "MERT reported non-zero exit code" unless $? == 0; - chomp $weights; - safesystem($tuned_weights, "cp $weights $tuned_weights"); - print STDERR "TUNED WEIGHTS: $tuned_weights\n"; - die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { - my ($file, @extras) = @_; - open F, ">$file" or die "Can't write $file: $!"; - my @feats = (@DEFAULT_FEATS, @extras); - if ($XFEATS) { - my @xfeats = qw( - X_LogRuleCount X_LogECount X_LogFCount X_EGivenF X_FGivenE X_SingletonRule X_SingletonE X_SingletonF - ); - @feats = (@feats, @xfeats); - } - for my $feat (@feats) { - my $r = rand(1.6); - my $w = $init_weights{$feat} * $r; - if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } - print F "$feat $w\n"; - } - close F; -} - -sub filter { - my ($grammar, $set, $name, $outdir) = @_; - my $outgrammar = mydircat($outdir, "$name.scfg.gz"); - if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { - my $cmd = "gunzip -c $grammar | $FILTSCORE -c $CORPUS -t $set | $EXTRA_FILTER gzip > $outgrammar"; - safesystem($outgrammar, $cmd) or die "Can't filter and score grammar!"; - } - return $outgrammar; -} - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { - my ($filename, $grammar_path) = (@_); - open CDECINI, ">$filename" or die "Can't write $filename: $!"; - print CDECINI <> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - diff --git a/gi/pipeline/backoff-pipe.pl b/gi/pipeline/backoff-pipe.pl deleted file mode 100644 index ac103c8b..00000000 --- a/gi/pipeline/backoff-pipe.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my @grammars; -my $OUTPUTPREFIX = './giwork/bo.hier.grammar'; -safemkdir($OUTPUTPREFIX); -my $backoff_levels = 1; -my $glue_levels = 1; - -usage() unless &GetOptions('grmr=s@' => \ @grammars, - 'outprefix=s' => \ $OUTPUTPREFIX, - 'bo-lvls=i' => \ $backoff_levels, - 'glue-lvls=i' => \ $glue_levels, -); - -my $OUTDIR = $OUTPUTPREFIX . '/hier'; -print STDERR "@grammars\n"; - - -my %grmr = (); -foreach my $grammar (@grammars) { - $grammar =~ m/\/[^\/]*\.t(\d+)\.[^\/]*/; - $grmr{$1} = $grammar; -} - -my @index = sort keys %grmr; -$OUTDIR = $OUTDIR . join('-',@index); -safemkdir($OUTDIR); -my $BACKOFF_GRMR = $OUTDIR . '/backoff.hier.gz'; -safesystem("echo \"\" | gzip > $BACKOFF_GRMR"); -my $GLUE_GRMR = $OUTDIR . '/glue.hier.gz'; -safesystem("echo \"\" | gzip > $GLUE_GRMR"); -my $joinedgrammars = $OUTDIR . '/grammar.hier.gz'; - -join_grammars(); - -for my $i (0..(scalar @index)-2) { - my $freqs = extract_freqs($index[$i], $index[$i+1]); - if ($i < $backoff_levels) { - create_backoff_rules($index[$i],$index[$i+1],$freqs); - } - if ($i < $glue_levels) { - add_glue_rules($index[$i]); - } -} - -output_grammar_info(); - - -sub usage { - print <> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - - -sub join_grammars { - print STDERR "\n!!! JOINING GRAMMARS\n"; - if(-e $joinedgrammars) { - print STDERR "$joinedgrammars exists, reusing...\n"; - return; - } - safesystem("echo \"\" | gzip > $joinedgrammars"); - foreach my $i (@index) { - my $g = $grmr{$i}; - safesystem("zcat $g | sed -r -e 's/X([0-9]+)/X$i\\1/g' - | gzip > $g.2.gz"); - safesystem("zcat $joinedgrammars $g.2.gz | gzip > $joinedgrammars.2.gz"); - safesystem("mv $joinedgrammars.2.gz $joinedgrammars"); - } -} - - -sub extract_freqs { - my($grmr1,$grmr2) = @_; - print STDERR "\n!!!EXTRACTING FREQUENCIES: $grmr1->$grmr2\n"; - my $IN_COARSE = substr($grmr{$grmr1},0,index($grmr{$grmr1},".grammar/")) . "/labeled_spans.txt"; - my $IN_FINE = substr($grmr{$grmr2},0,index($grmr{$grmr2},".grammar/")) . "/labeled_spans.txt"; - my $OUT_SPANS = "$OUTDIR/labeled_spans.hier$grmr1-$grmr2.txt"; - my $FREQS = "$OUTDIR/label_freq.hier$grmr1-$grmr2.txt"; - if(-e $OUT_SPANS && -e $FREQS) { - print STDERR "$OUT_SPANS exists, reusing...\n"; - print STDERR "$FREQS exists, reusing...\n"; - return $FREQS; - } - - safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); - - my %FREQ_HIER = (); - my %finehier = (); - - open SPANS, $OUT_SPANS or die $!; - while () { - my ($tmp, $coarse, $fine) = split /\|\|\|/; - my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; - my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; - - foreach my $i (0..(scalar @coarse_spans)-1) { - my $coarse_cat = $coarse_spans[$i]; - my $fine_cat = $fine_spans[$i]; - - $FREQ_HIER{$coarse_cat}{$fine_cat}++; - } - } - close SPANS; - foreach (values %FREQ_HIER) { - my $coarse_freq = $_; - my $total = 0; - $total+=$_ for (values %{ $coarse_freq }); - $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); - } - open FREQS, ">", $FREQS or die $!; - foreach my $coarse_cat (keys %FREQ_HIER) { - print FREQS "$coarse_cat |||"; - foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { - my $freq = $FREQ_HIER{$coarse_cat}{$fine_cat}; - print FREQS " $fine_cat:$freq"; - if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $freq) { - $finehier{$fine_cat} = $coarse_cat; - } - } - print FREQS "\n"; - } -# foreach my $fine_cat (keys %finehier) { -# print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -# } - close FREQS; - return $FREQS; -} - - -sub create_backoff_rules { - print STDERR "\n!!! CREATING BACKOFF RULES\n"; - my ($grmr1, $grmr2, $freq) = @_; - my $OUTFILE = "$OUTDIR/backoff.hier$grmr1-$grmr2.txt"; - if(-e $OUTFILE) { - print STDERR "$OUTFILE exists, reusing...\n"; - return; - } - open FREQS, $freq or die $!; - open TMP, ">", $OUTFILE or die $!; - while () { - my $line = $_; - $line = m/^(\d+) \|\|\| (.+)$/; - my $coarse = $1; - $line = $2; - my @finefreq = $line =~ m/(\d+):(\S+)/g; - for(my $i = 0; $i < scalar @finefreq; $i+=2) { - my $finecat = $finefreq[$i]; - my $finefreq = $finefreq[$i+1]; - print TMP "[X$grmr1$coarse] ||| [X$grmr2$finecat,1]\t[1] ||| BackoffRule=$finefreq A=0-0\n"; - } - } - close TMP; - close FREQS; - safesystem("zcat $BACKOFF_GRMR | cat - $OUTFILE | gzip > $BACKOFF_GRMR.2.gz"); - safesystem("mv $BACKOFF_GRMR.2.gz $BACKOFF_GRMR"); -} - -sub add_glue_rules { - print STDERR "\n!!! CREATING GLUE RULES\n"; - my ($grmr) = @_; - my $OUTFILE = "$OUTDIR/glue.$grmr.gz"; - if (-e $OUTFILE) { - print STDERR "$OUTFILE exists, reusing...\n"; - return; - } - open TMP, ">", $OUTFILE or die $!; - for my $i (0..($grmr-1)) { - print TMP "[S] ||| [S,1] [X$grmr$i,2] ||| [1] [2] ||| Glue=1\n"; - print TMP "[S] ||| [X$grmr$i,1] ||| [1] ||| GlueTop=1\n"; - } - close TMP; - safesystem("zcat $GLUE_GRMR | cat - $OUTFILE | gzip > $GLUE_GRMR.2.gz"); - safesystem("mv $GLUE_GRMR.2.gz $GLUE_GRMR"); -} - -sub output_grammar_info { - print STDERR "\n!!! GRAMMAR INFORMATION\n"; - print STDOUT "GRAMMAR: \t$joinedgrammars\n"; - print STDOUT "GLUE: \t$GLUE_GRMR\n"; - print STDOUT "BACKOFF: \t$BACKOFF_GRMR\n"; -} diff --git a/gi/pipeline/blacklight.config b/gi/pipeline/blacklight.config deleted file mode 100644 index fc59a604..00000000 --- a/gi/pipeline/blacklight.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/usr/users/0/cdyer/ws10smt/data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /usr/users/0/cdyer/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config deleted file mode 100644 index c23d409f..00000000 --- a/gi/pipeline/clsp.config +++ /dev/null @@ -1,10 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/export/ws10smt/data -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /export/ws10smt/data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl deleted file mode 100755 index 4b4529d9..00000000 --- a/gi/pipeline/evaluation-pipeline.pl +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } -use LocalConfig; - -my $JOBS = 15; -my $PMEM = "9G"; -my $NUM_TRANSLATIONS = 50; -my $GOAL = "S"; - -# featurize_grammar may add multiple features from a single feature extractor -# the key in this map is the extractor name, the value is a list of the extracted features -my $feat_map = { - "LogRuleCount" => [ "LogRuleCount", "SingletonRule" ] , -# "XFeatures" => [ "XFE","XEF" ] , - "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE"], # ,"XE_Singleton","XF_Singleton"] , - "LabelledRuleConditionals" => [ "LabelledFE","LabelledEF" ] , - "LexProb" => [ "LexE2F", "LexF2E" ] , - "BackoffRule" => [ "BackoffRule" ] , - "RulePenalty" => [ "RulePenalty" ] , - "LHSProb" => [ "LHSProb" ] , - "LabellingShape" => [ "LabellingShape" ] , - "GenerativeProb" => [ "GenerativeProb" ] , -}; - -my %init_weights = qw( - EGivenF -0.735245 - FGivenE -0.219391 - Glue -0.306709 - GlueTop 0.0473331 - LanguageModel 2.40403 - LexE2F -0.266989 - LexF2E -0.550373 - LogECount -0.129853 - LogFCount -0.194037 - LogRuleCount 0.256706 - BackoffRule 0.5 - XFE -0.256706 - XEF -0.256706 - XF_Singleton -0.05 - XE_Singleton -0.8 - LabelledFE -0.256706 - LabelledEF -0.256706 - PassThrough -0.9304905 - SingletonE -3.04161 - SingletonF 0.0714027 - SingletonRule -0.889377 - WordPenalty -1.99495 - RulePenalty -0.1 - LabellingShape -0.1 - LHSProb -0.1 - GenerativeProb -0.1 -); - - -# these features are included by default -my @DEFAULT_FEATS = qw( PassThrough Glue GlueTop LanguageModel WordPenalty ); - - - -my $FILTERBYF = "$SCRIPT_DIR/scripts/filter-by-f.pl"; -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTER = "$EXTOOLS/filter_grammar"; -my $FEATURIZE = "$EXTOOLS/featurize_grammar"; -assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST, $FILTERBYF); - -my $numtopics = 25; - -my $config = "$SCRIPT_DIR/" . (lc environment_name()) . '.config'; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my $datadir; -print STDERR " LANGUAGE PAIRS:"; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - s/^\s+//; - s/\s+$//; - if (! defined $datadir) { $datadir = $_; next; } - my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/; - $paths{$name} = $path; - $corpora{$name} = $corpus; - $lms{$name} = $lm; - $devs{$name} = $dev; - $devrefs{$name} = $devref; - $tests{$name} = $xtests[0]; - $testevals{$name} = $xtests[1]; - print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $FEATURIZER_OPTS = ''; -my $dataDir = '/export/ws10smt/data'; -my @features; -my $bkoffgram; -my $gluegram; -my $oovgram; -my $usefork; -my $lmorder = 3; -my $density; -if (GetOptions( - "backoff-grammar=s" => \$bkoffgram, - "density-prune=f" => \$density, - "glue-grammar=s" => \$gluegram, - "oov-grammar=s" => \$oovgram, - "data=s" => \$dataDir, - "pmem=s" => \$PMEM, - "n=i" => \$NUM_TRANSLATIONS, - "features=s@" => \@features, - "use-fork" => \$usefork, - "jobs=i" => \$JOBS, - "out-dir=s" => \$outdir, - "lmorder=i" => \$lmorder, - "goal=s" => \$GOAL, -) == 0 || @ARGV!=2 || $help) { - print_help(); - exit; -} -my $DENSITY_PRUNE = ''; -if ($density) { - $DENSITY_PRUNE = "--density-prune $density"; -} -if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; } -my @fkeys = keys %$feat_map; -die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0; -my @xfeats; -for my $feat (@features) { - my $rs = $feat_map->{$feat}; - if (!defined $rs) { die "DON'T KNOW ABOUT FEATURE $feat\n"; } - my @xfs = @$rs; - @xfeats = (@xfeats, @xfs); - $FEATURIZER_OPTS .= " -f $feat" unless $feat eq "BackoffRule"; -} -print STDERR "X-FEATS: @xfeats\n"; - -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR " CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR " GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR " LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -#die "Can't find test: $test\n" unless -f $test; -#assert_exec($teval); - -`mkdir -p $outdir`; - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights, @xfeats); - -my $bkoff_grmr; -my $glue_grmr; -if($bkoffgram) { - print STDERR "Placing backoff grammar…\n"; - $bkoff_grmr = mydircat($outdir, "backoff.scfg.gz"); - print STDERR "cp $bkoffgram $bkoff_grmr\n"; - safesystem(undef,"cp $bkoffgram $bkoff_grmr"); -} -if($gluegram) { - print STDERR "Placing glue grammar…\n"; - $glue_grmr = mydircat($outdir, "glue.bo.scfg.gz"); - print STDERR "cp $gluegram $glue_grmr\n"; - safesystem(undef,"cp $gluegram $glue_grmr"); -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { - print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { - my $cmd = "$DISTVEST $usefork $DENSITY_PRUNE --decode-nodes $JOBS --pmem=$PMEM --ref-files=$drefs --source-file=$dev --weights $weights $devini"; - print STDERR "MERT COMMAND: $cmd\n"; - `rm -rf $outdir/vest 2> /dev/null`; - chdir $outdir or die "Can't chdir to $outdir: $!"; - $weights = `$cmd`; - die "MERT reported non-zero exit code" unless $? == 0; - chomp $weights; - safesystem($tuned_weights, "cp $weights $tuned_weights"); - print STDERR "TUNED WEIGHTS: $tuned_weights\n"; - die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { - my ($file, @extras) = @_; - if (-f $file) { - print STDERR "$file exists - REUSING!\n"; - return; - } - open F, ">$file" or die "Can't write $file: $!"; - my @feats = (@DEFAULT_FEATS, @extras); - for my $feat (@feats) { - my $r = rand(0.4) + 0.8; - my $w = $init_weights{$feat} * $r; - if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } - print F "$feat $w\n"; - } - close F; -} - -sub filter { - my ($grammar, $set, $name, $outdir) = @_; - my $out1 = mydircat($outdir, "$name.filt.gz"); - my $out2 = mydircat($outdir, "$name.f_feat.gz"); - my $outgrammar = mydircat($outdir, "$name.scfg.gz"); - if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { - my $cmd = "gunzip -c $grammar | $FILTER -t $set | gzip > $out1"; - safesystem($out1, $cmd) or die "Filtering failed."; - $cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2"; - safesystem($out2, $cmd) or die "Featurizing failed"; - $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar"; - safesystem($outgrammar, $cmd) or die "Secondary filtering failed"; - } - return $outgrammar; -} - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { - my ($filename, $grammar_path) = (@_); - open CDECINI, ">$filename" or die "Can't write $filename: $!"; - my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz"); - my $oov = ($oovgram ? "$oovgram" : "$datadir/oov.scfg.gz"); - print CDECINI <> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl deleted file mode 100755 index e31167a2..00000000 --- a/gi/pipeline/local-gi-pipeline.pl +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; -my $BASE_PHRASE_MAX_SIZE = 10; -my $COMPLETE_CACHE = 1; -my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors -my $NUM_TOPICS = 50; -my $NUM_TOPICS_COARSE; -my $NUM_TOPICS_FINE = $NUM_TOPICS; -my $NUM_SAMPLES = 1000; -my $CONTEXT_SIZE = 1; -my $BIDIR = 0; -my $TOPICS_CONFIG = "pyp-topics.conf"; -my $LANGUAGE = "target"; -my $LABEL_THRESHOLD = "0"; -my $PRESERVE_PHRASES; - -my $MODEL = "pyp"; -my $NUM_ITERS = 100; -my $PR_SCALE_P = 0; -my $PR_SCALE_C = 0; -my $PR_FLAGS = ""; -my $MORFMARK = ""; - -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $PYPTOOLS = "$SCRIPT_DIR/../pyp-topics/src"; -die "Can't find pyp-topics: $PYPTOOLS" unless -e $PYPTOOLS && -d $PYPTOOLS; -my $PYPSCRIPTS = "$SCRIPT_DIR/../pyp-topics/scripts"; -die "Can't find pyp-topics: $PYPSCRIPTS" unless -e $PYPSCRIPTS && -d $PYPSCRIPTS; -my $PRTOOLS = "$SCRIPT_DIR/../posterior-regularisation"; -die "Can't find posterior-regularisation: $PRTOOLS" unless -e $PRTOOLS && -d $PRTOOLS; -my $REDUCER = "$EXTOOLS/mr_stripe_rule_reduce"; -my $C2D = "$PYPSCRIPTS/contexts2documents.py"; -my $S2L = "$PYPSCRIPTS/spans2labels.py"; -my $SPLIT = "$SCRIPT_DIR/../posterior-regularisation/split-languages.py"; - -my $PREM_TRAIN="$PRTOOLS/prjava/train-PR-cluster.sh"; - -my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh"; -my $PATCH_CORPUS = "$SCRIPT_DIR/scripts/patch-corpus.pl"; -my $REMOVE_TAGS_CORPUS = "$SCRIPT_DIR/scripts/remove-tags-from-corpus.pl"; -my $REMOVE_TAGS_CONTEXT = "$SCRIPT_DIR/scripts/remove-tags-from-contexts.pl"; -my $EXTRACTOR = "$EXTOOLS/extractor"; -my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train"; -my $MORF_DOC_FILTER = "$SCRIPT_DIR/../morf-segmentation/filter_docs.pl"; - -assert_exec($PATCH_CORPUS, $SORT_KEYS, $REDUCER, $EXTRACTOR, - $S2L, $C2D, $TOPIC_TRAIN, $SPLIT, $REMOVE_TAGS_CONTEXT, $REMOVE_TAGS_CORPUS, $MORF_DOC_FILTER); - -my $BACKOFF_GRAMMAR; -my $DEFAULT_CAT; -my $HIER_CAT; -my %FREQ_HIER = (); -my $TAGGED_CORPUS; - -my $NAME_SHORTCUT; - -my $OUTPUT = './giwork'; -usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE, - 'backoff_grammar' => \$BACKOFF_GRAMMAR, - 'output=s' => \$OUTPUT, - 'model=s' => \$MODEL, - 'topics=i' => \$NUM_TOPICS_FINE, - 'coarse_topics=i' => \$NUM_TOPICS_COARSE, - 'trg_context=i' => \$CONTEXT_SIZE, - 'samples=i' => \$NUM_SAMPLES, - 'label_threshold=f' => \$LABEL_THRESHOLD, - 'use_default_cat' => \$DEFAULT_CAT, - 'topics-config=s' => \$TOPICS_CONFIG, - 'iterations=i' => \$NUM_ITERS, - 'pr-scale-phrase=f' => \$PR_SCALE_P, - 'pr-scale-context=f' => \$PR_SCALE_C, - 'pr-flags=s' => \$PR_FLAGS, - 'tagged_corpus=s' => \$TAGGED_CORPUS, - 'language=s' => \$LANGUAGE, - 'get_name_only' => \$NAME_SHORTCUT, - 'preserve_phrases' => \$PRESERVE_PHRASES, - 'morf=s' => \$MORFMARK, - ); -if ($NAME_SHORTCUT) { - $NUM_TOPICS = $NUM_TOPICS_FINE; - print STDERR labeled_dir(); - exit 0; -} -usage() unless scalar @ARGV == 1; -my $CORPUS = $ARGV[0]; -open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F; - -$NUM_TOPICS = $NUM_TOPICS_FINE; - -$HIER_CAT = ( $NUM_TOPICS_COARSE ? 1 : 0 ); - -print STDERR " Output: $OUTPUT\n"; -my $DATA_DIR = $OUTPUT . '/corpora'; -my $LEX_NAME = "corpus.f_e_a.$LANGUAGE.lex"; -my $CORPUS_LEX = $DATA_DIR . '/' . $LEX_NAME; # corpus used to extract rules -my $CORPUS_CLUSTER = $DATA_DIR . "/corpus.f_e_a.$LANGUAGE.cluster"; # corpus used for clustering (often identical) - -my $CONTEXT_DIR = $OUTPUT . '/' . context_dir(); -my $CLUSTER_DIR = $OUTPUT . '/' . cluster_dir(); -my $LABELED_DIR = $OUTPUT . '/' . labeled_dir(); -my $CLUSTER_DIR_C; -my $CLUSTER_DIR_F; -my $LABELED_DIR_C; -my $LABELED_DIR_F; -if($HIER_CAT) { - $CLUSTER_DIR_F = $CLUSTER_DIR; - $LABELED_DIR_F = $LABELED_DIR; - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR_C = $OUTPUT . '/' . cluster_dir(); - $LABELED_DIR_C = $OUTPUT . '/' . labeled_dir(); - $NUM_TOPICS = $NUM_TOPICS_FINE; -} -my $GRAMMAR_DIR = $OUTPUT . '/' . grammar_dir(); -print STDERR " Context: $CONTEXT_DIR\n Cluster: $CLUSTER_DIR\n Labeled: $LABELED_DIR\n Grammar: $GRAMMAR_DIR\n"; -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($DATA_DIR) or die "Couldn't create output directory $DATA_DIR: $!"; -safemkdir($CONTEXT_DIR) or die "Couldn't create output directory $CONTEXT_DIR: $!"; -safemkdir($CLUSTER_DIR) or die "Couldn't create output directory $CLUSTER_DIR: $!"; -if($HIER_CAT) { - safemkdir($CLUSTER_DIR_C) or die "Couldn't create output directory $CLUSTER_DIR_C: $!"; - safemkdir($LABELED_DIR_C) or die "Couldn't create output directory $LABELED_DIR_C: $!"; -} -safemkdir($LABELED_DIR) or die "Couldn't create output directory $LABELED_DIR: $!"; -safemkdir($GRAMMAR_DIR) or die "Couldn't create output directory $GRAMMAR_DIR: $!"; -if(-e $TOPICS_CONFIG) { - copy($TOPICS_CONFIG, $CLUSTER_DIR) or die "Copy failed: $!"; -} - -setup_data(); - -if (lc($MODEL) eq "blagree") { - extract_bilingual_context(); -} else { - extract_context(); -} - -if (lc($MODEL) eq "pyp") { - if($HIER_CAT) { - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR = $CLUSTER_DIR_C; - topic_train(); - $NUM_TOPICS = $NUM_TOPICS_FINE; - $CLUSTER_DIR = $CLUSTER_DIR_F; - topic_train(); - } else { - topic_train(); - } -} elsif (lc($MODEL) =~ /pr|em|agree/) { - prem_train(); -} else { die "Unsupported model type: $MODEL. Must be one of PYP or PREM.\n"; } -if($HIER_CAT) { - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR = $CLUSTER_DIR_C; - $LABELED_DIR = $LABELED_DIR_C; - label_spans_with_topics(); - $NUM_TOPICS = $NUM_TOPICS_FINE; - $CLUSTER_DIR = $CLUSTER_DIR_F; - $LABELED_DIR = $LABELED_DIR_F; - label_spans_with_topics(); - extract_freqs(); -} else { - label_spans_with_topics(); -} -my $res; -if ($BIDIR) { - $res = grammar_extract_bidir(); -} else { - $res = grammar_extract(); -} -print STDERR "\n!!!COMPLETE!!!\n"; -print STDERR "GRAMMAR: $res\nYou should probably run: $SCRIPT_DIR/evaluation-pipeline.pl LANGPAIR giwork/ct1s0.L10.PYP.t4.s20.grammar/grammar.gz -f FEAT1 -f FEAT2\n\n"; -exit 0; - -sub setup_data { - print STDERR "\n!!!PREPARE CORPORA!!!\n"; - if (-f $CORPUS_LEX && $CORPUS_CLUSTER) { - print STDERR "$CORPUS_LEX and $CORPUS_CLUSTER exist, reusing...\n"; - return; - } - copy($CORPUS, $CORPUS_LEX); - if ($TAGGED_CORPUS) { - die "Can't find $TAGGED_CORPUS" unless -f $TAGGED_CORPUS; - my $opt=""; - $opt = "-s" if ($LANGUAGE eq "source"); - $opt = $opt . " -a" if ($PRESERVE_PHRASES); - my $cmd="$PATCH_CORPUS $opt $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER"; - safesystem($cmd) or die "Failed to extract contexts."; - } else { - symlink($LEX_NAME, $CORPUS_CLUSTER); - } -} - -sub context_dir { - return "ct${CONTEXT_SIZE}s0.L$BASE_PHRASE_MAX_SIZE.l$LANGUAGE"; -} - -sub cluster_dir { - if (lc($MODEL) eq "pyp") { - return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES"; - } elsif (lc($MODEL) eq "em") { - return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_ITERS"; - } elsif (lc($MODEL) eq "pr") { - return context_dir() . ".PR.t$NUM_TOPICS.i$NUM_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C"; - } elsif (lc($MODEL) eq "agree") { - return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_ITERS"; - } elsif (lc($MODEL) eq "blagree") { - return context_dir() . ".BLAGREE.t$NUM_TOPICS.i$NUM_ITERS"; - } -} - -sub labeled_dir { - if (lc($MODEL) eq "pyp" && $LABEL_THRESHOLD ne "0") { - return cluster_dir() . "_lt$LABEL_THRESHOLD"; - } else { - return cluster_dir(); - } -} - -sub grammar_dir { - # TODO add grammar config options -- adjacent NTs, etc - if($HIER_CAT) { - return cluster_dir() . ".hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.grammar"; - } else { - return labeled_dir() . ".grammar"; - } -} - - - -sub safemkdir { - my $dir = shift; - if (-d $dir) { return 1; } - return mkdir($dir); -} - -sub usage { - print < $CLUSTER_DIR/clusters.txt") or die "Failed to unzip"; - safesystem("$EXTRACTOR --base_phrase_spans -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -S $CONTEXT_SIZE | $S2L $CLUSTER_DIR/clusters.txt $CONTEXT_SIZE $LABEL_THRESHOLD $extra > $OUT_SPANS") or die "Failed to label spans"; - unlink("$CLUSTER_DIR/clusters.txt") or warn "Failed to remove $CLUSTER_DIR/clusters.txt"; - safesystem("paste -d ' ' $CORPUS_LEX $OUT_SPANS | sed 's/ *||| *\$//' > $LABELED_DIR/corpus.src_trg_al_label") or die "Couldn't paste"; - } -} - -sub extract_freqs { - print STDERR "\n!!!EXTRACTING FREQUENCIES\n"; - my $IN_COARSE = "$LABELED_DIR_C/labeled_spans.txt"; - my $IN_FINE = "$LABELED_DIR_F/labeled_spans.txt"; - my $OUT_SPANS = "$LABELED_DIR_F/labeled_spans.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; - my $FREQS = "$LABELED_DIR_F/label_freq.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; - my $COARSE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1c/g\'"; #' - my $FINE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1f/g\'"; #' - my %finehier = (); - if (-e $OUT_SPANS) { - print STDERR "$OUT_SPANS exists, reusing...\n"; - } else { - safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); - } - open SPANS, $OUT_SPANS or die $!; - while () { - my ($tmp, $coarse, $fine) = split /\|\|\|/; - my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; - my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; - - foreach my $i (0..(scalar @coarse_spans)-1) { - my $coarse_cat = $coarse_spans[$i]; - my $fine_cat = $fine_spans[$i]; - - $FREQ_HIER{$coarse_cat}{$fine_cat}++; - } - } - close SPANS; - foreach (values %FREQ_HIER) { - my $coarse_freq = $_; - my $total = 0; - $total+=$_ for (values %{ $coarse_freq }); - $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); - } - open FREQS, ">", $FREQS or die $!; - foreach my $coarse_cat (keys %FREQ_HIER) { - print FREQS "$coarse_cat |||"; - foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { - my $res = $FREQ_HIER{$coarse_cat}{$fine_cat}; - print FREQS " $fine_cat:$res"; - if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $res) { - $finehier{$fine_cat} = $coarse_cat; - } - } - print FREQS "\n"; - } -# foreach my $fine_cat (keys %finehier) { -# print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -# } - close FREQS; - $CLUSTER_DIR = $CLUSTER_DIR_F; -} - -sub grammar_extract { - my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; - print STDERR "\n!!!EXTRACTING GRAMMAR\n"; - my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.gz"; - if (-e $OUTGRAMMAR) { - print STDERR "$OUTGRAMMAR exists, reusing...\n"; - } else { - my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); - my $DEFAULT_CAT_ARG = ($DEFAULT_CAT ? "-d X" : ""); - safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -t $NUM_TOPICS $BACKOFF_ARG $DEFAULT_CAT_ARG | $SORT_KEYS | $REDUCER -p | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; - } - return $OUTGRAMMAR; -} - -sub grammar_extract_bidir { -#gzcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz - my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; - print STDERR "\n!!!EXTRACTING GRAMMAR\n"; - my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.bidir.gz"; - if (-e $OUTGRAMMAR) { - print STDERR "$OUTGRAMMAR exists, reusing...\n"; - } else { - my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); - safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -b -t $NUM_TOPICS $BACKOFF_ARG | $SORT_KEYS | $REDUCER -p -b | $SORT_KEYS | $REDUCER | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; - } - return $OUTGRAMMAR; -} - -sub safesystem { - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - exit(1); - } - else { - my $exitcode = $? >> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - diff --git a/gi/pipeline/lticluster.config b/gi/pipeline/lticluster.config deleted file mode 100644 index 3e23c8cb..00000000 --- a/gi/pipeline/lticluster.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/cdyer/ws10smt-data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /home/cdyer/ws10smt-data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl deleted file mode 100755 index 0cef0606..00000000 --- a/gi/pipeline/scripts/filter-by-f.pl +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my $REKEY="$SCRIPT_DIR/rekey.pl"; -my $REFILTER="$SCRIPT_DIR/refilter.pl"; -my $SORT="$SCRIPT_DIR/sort-by-key.sh"; -assert_exec($REKEY, $REFILTER, $SORT); - - -die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; -my $translations = shift @ARGV; -die "Need number: $translations" unless $translations > 0; -die unless $ARGV[0] =~ /\.gz$/; -die unless $ARGV[1] =~ /\.gz$/; -die if $ARGV[0] eq $ARGV[1]; -die "Can't find $ARGV[0]" unless -f $ARGV[0]; - -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; -safesystem($ARGV[1], $cmd) or die "Filtering failed"; -exit 0; - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - -sub safesystem { - my $output = shift @_; - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - exit(1); - } - else { - my $exitcode = $? >> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - diff --git a/gi/pipeline/scripts/patch-corpus.pl b/gi/pipeline/scripts/patch-corpus.pl deleted file mode 100755 index c0eec43e..00000000 --- a/gi/pipeline/scripts/patch-corpus.pl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $PATCH = shift @ARGV; -my $TGT = 1; -my $APPEND; -while ($PATCH eq "-s" || $PATCH eq "-a") { - if ($PATCH eq "-s") { - undef $TGT; - } else { - $APPEND = 1; - } - $PATCH = shift @ARGV; -} - -die "Usage: $0 [-s] [-a] tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH; - -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -my $first=

; close P; -my @fields = split / \|\|\| /, $first; -die "Bad format!" if (scalar @fields > 2); - -if (scalar @fields != 1) { - # TODO support this - die "Patching source and target not supported yet!"; -} - -my $line = 0; -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -while(my $pline =

) { - chomp $pline; - $line++; - my $line = <>; - die "Too few lines in lexical corpus!" unless $line; - chomp $line; - @fields = split / \|\|\| /, $line; - my @pwords = split /\s+/, $pline; - if ($TGT) { - my @lwords = split /\s+/, $fields[1]; - die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); - if ($APPEND) { - foreach my $i (0..(scalar @pwords-1)) { - $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; - } - $fields[1] = join ' ', @lwords; - } else { - $fields[1] = $pline; - } - } else { # source side - my @lwords = split /\s+/, $fields[0]; - die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); - if ($APPEND) { - foreach my $i (0..(scalar @pwords-1)) { - $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; - } - $fields[0] = join ' ', @lwords; - } else { - $fields[0] = $pline; - } - } - print join ' ||| ', @fields; - print "\n"; -} - - diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl deleted file mode 100755 index a783eb4e..00000000 --- a/gi/pipeline/scripts/refilter.pl +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $NUM_TRANSLATIONS = shift @ARGV; -unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } -print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; - -my $pk = ''; -my %dict; -while(<>) { - s/^(.+)\t//; - my $key = $1; - if ($key ne $pk) { - if ($pk) { - emit_dict(); - } - %dict = (); - $pk = $key; - } - my ($lhs, $f, $e, $s) = split / \|\|\| /; - my $score = 0; - if ($s =~ /XEF=([^ ]+)/) { - $score += $1; - } else { die; } - if ($s =~ /GenerativeProb=([^ ]+)/) { - $score += ($1 / 10); - } else { die; } - $dict{"$lhs ||| $f ||| $e ||| $s"} = $score; -} -emit_dict(); - -sub emit_dict { - my $cc = 0; - for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) { - print "$k"; - $cc++; - if ($cc >= $NUM_TRANSLATIONS) { last; } - } -} - diff --git a/gi/pipeline/scripts/rekey.pl b/gi/pipeline/scripts/rekey.pl deleted file mode 100755 index 31eb86b8..00000000 --- a/gi/pipeline/scripts/rekey.pl +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/perl - -while(<>) { - my ($lhs, $f, $e, $s) = split / \|\|\| /; - $f =~ s/\[X[0-9]+\]/\[X\]/g; - print "$f\t$_"; -} - diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl deleted file mode 100755 index 20698816..00000000 --- a/gi/pipeline/scripts/remove-tags-from-contexts.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $PHRASE = 'tok'; -my $CONTEXT = 'tag'; - -die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus" - unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - my @top = split /\t/, $line; - die unless (scalar @top == 2); - - my @pwords = split /\s+/, $top[0]; - foreach my $token (@pwords) { - #print $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - die unless (scalar @parts == 2); - if ($PHRASE eq "tok") { - $token = $parts[0] - } elsif ($PHRASE eq "tag") { - $token = $parts[1] - } - } - - my @fields = split / \|\|\| /, $top[1]; - foreach my $i (0..((scalar @fields) / 2 - 1)) { - #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n"; - my @cwords = split /\s+/, $fields[2*$i]; - foreach my $token (@cwords) { - #print $i . ": " . $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - if ($CONTEXT eq "tok") { - $token = $parts[0] - } elsif ($CONTEXT eq "tag") { - $token = $parts[1] - } - } - } - $fields[2*$i] = join ' ', @cwords; - } - - print join ' ', @pwords; - print "\t"; - print join ' ||| ', @fields; - print "\n"; -} diff --git a/gi/pipeline/scripts/remove-tags-from-corpus.pl b/gi/pipeline/scripts/remove-tags-from-corpus.pl deleted file mode 100755 index be3e97c0..00000000 --- a/gi/pipeline/scripts/remove-tags-from-corpus.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $LANGUAGE = shift @ARGV; -$LANGUAGE = 'target' unless ($LANGUAGE); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - - my @fields = split / \|\|\| /, $line; - - if ($LANGUAGE eq "source" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[0]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[0] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - if ($LANGUAGE eq "target" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[1]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[1] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - print join ' ||| ', @fields; - print "\n"; -} diff --git a/gi/pipeline/scripts/sort-by-key.sh b/gi/pipeline/scripts/sort-by-key.sh deleted file mode 100755 index 7ae33e03..00000000 --- a/gi/pipeline/scripts/sort-by-key.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -export LANG=C -sort -t $'\t' -k 1 -T /tmp -S 6000000000 - diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl deleted file mode 100755 index dc578513..00000000 --- a/gi/pipeline/scripts/xfeats.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0; - -my $xgrammar = shift @ARGV; -die "Can't find $xgrammar" unless -f $xgrammar; -my $fh; -if ($xgrammar =~ /\.gz$/) { - open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!"; -} else { - open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!"; -} -print STDERR "Reading X-feats from $xgrammar...\n"; -my %dict; -while(<$fh>) { - chomp; - my ($lhs, $f, $e, $feats) = split / \|\|\| /; - my $xfeats; - my $cc = 0; - my @xfeats = (); - while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) { - push @xfeats, "X_$1=$2"; - } - #print "$lhs ||| $f ||| $e ||| @xfeats\n"; - $dict{"$lhs ||| $f ||| $e"} = "@xfeats"; -} -close $fh; - -print STDERR "Add features...\n"; -while(<>) { - chomp; - my ($lhs, $f, $e) = split / \|\|\| /; - $f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g; - my $xfeats = $dict{"[X] ||| $f ||| $e"}; - die "Can't find x features for: $_\n" unless $xfeats; - print "$_ $xfeats\n"; -} - diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config deleted file mode 100644 index e00a8485..00000000 --- a/gi/pipeline/valhalla.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/chris/ws10smt/data -btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al -aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al -uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java deleted file mode 100644 index 07b27387..00000000 --- a/gi/posterior-regularisation/Corpus.java +++ /dev/null @@ -1,167 +0,0 @@ -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -public class Corpus -{ - private Lexicon tokenLexicon = new Lexicon(); - private Lexicon phraseLexicon = new Lexicon(); - private Lexicon contextLexicon = new Lexicon(); - private List edges = new ArrayList(); - private List> phraseToContext = new ArrayList>(); - private List> contextToPhrase = new ArrayList>(); - - public class Edge - { - Edge(int phraseId, int contextId, int count) - { - this.phraseId = phraseId; - this.contextId = contextId; - this.count = count; - } - public int getPhraseId() - { - return phraseId; - } - public TIntArrayList getPhrase() - { - return phraseLexicon.lookup(phraseId); - } - public String getPhraseString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getPhrase().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getContextId() - { - return contextId; - } - public TIntArrayList getContext() - { - return contextLexicon.lookup(contextId); - } - public String getContextString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getContext().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getCount() - { - return count; - } - private int phraseId; - private int contextId; - private int count; - } - - List getEdges() - { - return edges; - } - - int getNumEdges() - { - return edges.size(); - } - - int getNumPhrases() - { - return phraseLexicon.size(); - } - - List getEdgesForPhrase(int phraseId) - { - return phraseToContext.get(phraseId); - } - - int getNumContexts() - { - return contextLexicon.size(); - } - - List getEdgesForContext(int contextId) - { - return contextToPhrase.get(contextId); - } - - int getNumTokens() - { - return tokenLexicon.size(); - } - - static Corpus readFromFile(Reader in) throws IOException - { - Corpus c = new Corpus(); - - // read in line-by-line - BufferedReader bin = new BufferedReader(in); - String line; - Pattern separator = Pattern.compile(" \\|\\|\\| "); - - while ((line = bin.readLine()) != null) - { - // split into phrase and contexts - StringTokenizer st = new StringTokenizer(line, "\t"); - assert (st.hasMoreTokens()); - String phraseToks = st.nextToken(); - assert (st.hasMoreTokens()); - String rest = st.nextToken(); - assert (!st.hasMoreTokens()); - - // process phrase - st = new StringTokenizer(phraseToks, " "); - TIntArrayList ptoks = new TIntArrayList(); - while (st.hasMoreTokens()) - ptoks.add(c.tokenLexicon.insert(st.nextToken())); - int phraseId = c.phraseLexicon.insert(ptoks); - if (phraseId == c.phraseToContext.size()) - c.phraseToContext.add(new ArrayList()); - - // process contexts - String[] parts = separator.split(rest); - assert (parts.length % 2 == 0); - for (int i = 0; i < parts.length; i += 2) - { - // process pairs of strings - context and count - TIntArrayList ctx = new TIntArrayList(); - String ctxString = parts[i]; - String countString = parts[i + 1]; - StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); - while (ctxStrtok.hasMoreTokens()) - { - String token = ctxStrtok.nextToken(); - if (!token.equals("")) - ctx.add(c.tokenLexicon.insert(token)); - } - int contextId = c.contextLexicon.insert(ctx); - if (contextId == c.contextToPhrase.size()) - c.contextToPhrase.add(new ArrayList()); - - assert (countString.startsWith("C=")); - Edge e = c.new Edge(phraseId, contextId, - Integer.parseInt(countString.substring(2).trim())); - c.edges.add(e); - - // index the edge for fast phrase, context lookup - c.phraseToContext.get(phraseId).add(e); - c.contextToPhrase.get(contextId).add(e); - } - } - - return c; - } -} diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java deleted file mode 100644 index 9f0245ee..00000000 --- a/gi/posterior-regularisation/Lexicon.java +++ /dev/null @@ -1,32 +0,0 @@ -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon -{ - public int insert(T word) - { - Integer i = wordToIndex.get(word); - if (i == null) - { - i = indexToWord.size(); - wordToIndex.put(word, i); - indexToWord.add(word); - } - return i; - } - - public T lookup(int index) - { - return indexToWord.get(index); - } - - public int size() - { - return indexToWord.size(); - } - - private Map wordToIndex = new HashMap(); - private List indexToWord = new ArrayList(); -} \ No newline at end of file diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java deleted file mode 100644 index 85bcfb89..00000000 --- a/gi/posterior-regularisation/PhraseContextModel.java +++ /dev/null @@ -1,466 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for tonight ? ||| C=1 ||| seats for ? ||| C=1 ||| i see ? ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating q(t) = P(t|p,c) for all x,c -// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t) -// - PR uses alternate e-step, which first optimizes lambda -// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc] -// where -// q(t|p,c) propto p(t,c|p) exp( -phi_ptc ) -// Then q is used to obtain expectations for vanilla M-step. - -// Sexing it up: -// - learn p-specific conditionals P(t|p) -// - or generate phrase internals, e.g., generate edge words from -// different distribution to central words -// - agreement between phrase->context model and context->phrase model - -import java.io.*; -import optimization.gradientBasedMethods.*; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.NormalizedProjectedGradientL2Norm; -import optimization.stopCriteria.NormalizedValueDifference; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; -import optimization.util.MathUtils; -import java.util.*; -import java.util.regex.*; -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; -import static java.lang.Math.*; - -class PhraseContextModel -{ - // model/optimisation configuration parameters - int numTags; - boolean posteriorRegularisation = true; - double constraintScale = 3; // FIXME: make configurable - - // copied from L1LMax in depparsing code - final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10; - final int maxZoomEvals = 10, maxExtrapolationIters = 200; - int maxProjectionIterations = 200; - int minOccurrencesForProjection = 0; - - // book keeping - int numPositions; - Random rng = new Random(); - - // training set - Corpus training; - - // model parameters (learnt) - double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position) - double prior[][]; // phrase x tag Pr(tag | phrase) - double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers - - PhraseContextModel(Corpus training, int tags) - { - this.training = training; - this.numTags = tags; - assert (!training.getEdges().isEmpty()); - assert (numTags > 1); - - // now initialise emissions - numPositions = training.getEdges().get(0).getContext().size(); - assert (numPositions > 0); - - emissions = new double[numPositions][numTags][training.getNumTokens()]; - prior = new double[training.getNumEdges()][numTags]; - if (posteriorRegularisation) - lambda = new double[training.getNumEdges() * numTags]; - - for (double[][] emissionTW : emissions) - { - for (double[] emissionW : emissionTW) - { - randomise(emissionW); -// for (int i = 0; i < emissionW.length; ++i) -// emissionW[i] = i+1; -// normalise(emissionW); - } - } - - for (double[] priorTag : prior) - { - randomise(priorTag); -// for (int i = 0; i < priorTag.length; ++i) -// priorTag[i] = i+1; -// normalise(priorTag); - } - } - - void expectationMaximisation(int numIterations) - { - double lastLlh = Double.NEGATIVE_INFINITY; - - for (int iteration = 0; iteration < numIterations; ++iteration) - { - double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()]; - double priorCounts[][] = new double[training.getNumPhrases()][numTags]; - - // E-step - double llh = 0; - if (posteriorRegularisation) - { - EStepDualObjective objective = new EStepDualObjective(); - - // copied from x2y2withconstraints -// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); -// OptimizerStats stats = new OptimizerStats(); -// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); -// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); -// compositeStop.add(new ProjectedGradientL2Norm(0.001)); -// compositeStop.add(new ValueDifference(0.001)); -// optimizer.setMaxIterations(50); -// boolean succeed = optimizer.optimize(objective,stats,compositeStop); - - // copied from depparser l1lmaxobjective - ProjectedOptimizerStats stats = new ProjectedOptimizerStats(); - GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1); - LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2); - ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch); - optimizer.setMaxIterations(maxProjectionIterations); - CompositeStopingCriteria stop = new CompositeStopingCriteria(); - stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision)); - stop.add(new NormalizedValueDifference(stoppingPrecision)); - boolean succeed = optimizer.optimize(objective, stats, stop); - - System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); - //System.out.println("Solution: " + objective.parameters); - if (!succeed) - System.out.println("Failed to optimize"); - //System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - - //lambda = objective.getParameters(); - llh = objective.primal(); - - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - for (int t = 0; t < numTags; t++) - { - double p = objective.q.get(i).get(j).get(t); - priorCounts[i][t] += e.getCount() * p; - TIntArrayList tokens = e.getContext(); - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p; - } - } - } - } - else - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - priorCounts[i][t] += e.getCount() * probs[t]; - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t]; - } - } - } - } - - // M-step: normalise - for (double[][] emissionTW : emissionsCounts) - for (double[] emissionW : emissionTW) - normalise(emissionW); - - for (double[] priorTag : priorCounts) - normalise(priorTag); - - emissions = emissionsCounts; - prior = priorCounts; - - System.out.println("Iteration " + iteration + " llh " + llh); - -// if (llh - lastLlh < 1e-4) -// break; -// else -// lastLlh = llh; - } - } - - static double normalise(double probs[]) - { - double z = 0; - for (double p : probs) - z += p; - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - return z; - } - - void randomise(double probs[]) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - z += probs[i]; - } - - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - static int argmax(double probs[]) - { - double m = Double.NEGATIVE_INFINITY; - int mi = -1; - for (int i = 0; i < probs.length; ++i) - { - if (probs[i] > m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - - double[] posterior(int phraseId, Corpus.Edge e) // unnormalised - { - double probs[] = new double[numTags]; - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - probs[t] = prior[phraseId][t]; - for (int k = 0; k < tokens.size(); ++k) - probs[t] *= emissions[k][t][tokens.get(k)]; - } - return probs; - } - - void displayPosterior() - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (Corpus.Edge e: edges) - { - double probs[] = posterior(i, e); - normalise(probs); - - // emit phrase - System.out.print(e.getPhraseString()); - System.out.print("\t"); - System.out.print(e.getContextString()); - System.out.print("||| C=" + e.getCount() + " |||"); - - int t = argmax(probs); - System.out.print(" " + t + " ||| " + probs[t]); - // for (int t = 0; t < numTags; ++t) - // System.out.print(" " + probs[t]); - System.out.println(); - } - } - } - - public static void main(String[] args) - { - assert (args.length >= 2); - try - { - Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0]))); - PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1])); - model.expectationMaximisation(Integer.parseInt(args[2])); - model.displayPosterior(); - } - catch (IOException e) - { - System.out.println("Failed to read input file: " + args[0]); - e.printStackTrace(); - } - } - - class EStepDualObjective extends ProjectedObjective - { - List> conditionals; // phrase id x context # x tag - precomputed - List> q; // ditto, but including exp(-lambda) terms - double objective = 0; // log(z) - // Objective.gradient = d log(z) / d lambda = E_q[phi] - double llh = 0; - - public EStepDualObjective() - { - super(); - // compute conditionals p(context, tag | phrase) for all training instances - conditionals = new ArrayList>(training.getNumPhrases()); - q = new ArrayList>(training.getNumPhrases()); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - conditionals.add(new ArrayList(edges.size())); - q.add(new ArrayList(edges.size())); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - conditionals.get(i).add(new TDoubleArrayList(probs)); - q.get(i).add(new TDoubleArrayList(probs)); - } - } - - gradient = new double[training.getNumEdges()*numTags]; - setInitialParameters(lambda); - computeObjectiveAndGradient(); - } - - @Override - public double[] projectPoint(double[] point) - { - SimplexProjection p = new SimplexProjection(constraintScale); - - double[] newPoint = point.clone(); - int edgeIndex = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - for (int t = 0; t < numTags; t++) - { - double[] subPoint = new double[edges.size()]; - for (int j = 0; j < edges.size(); ++j) - subPoint[j] = point[edgeIndex+j*numTags+t]; - - p.project(subPoint); - for (int j = 0; j < edges.size(); ++j) - newPoint[edgeIndex+j*numTags+t] = subPoint[j]; - } - - edgeIndex += edges.size() * numTags; - } -// System.out.println("Proj from: " + Arrays.toString(point)); -// System.out.println("Proj to: " + Arrays.toString(newPoint)); - return newPoint; - } - - @Override - public void setParameters(double[] params) - { - super.setParameters(params); - computeObjectiveAndGradient(); - } - - @Override - public double[] getGradient() - { - gradientCalls += 1; - return gradient; - } - - @Override - public double getValue() - { - functionCalls += 1; - return objective; - } - - public void computeObjectiveAndGradient() - { - int edgeIndex = 0; - objective = 0; - Arrays.fill(gradient, 0); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - - double z = 0; - for (int t = 0; t < numTags; t++) - { - double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]); - q.get(i).get(j).set(t, v); - z += v; - } - objective += log(z) * e.getCount(); - - for (int t = 0; t < numTags; t++) - { - double v = q.get(i).get(j).get(t) / z; - q.get(i).get(j).set(t, v); - gradient[edgeIndex+t] -= e.getCount() * v; - } - - edgeIndex += numTags; - } - } -// System.out.println("computeObjectiveAndGradient logz=" + objective); -// System.out.println("lambda= " + Arrays.toString(parameters)); -// System.out.println("gradient=" + Arrays.toString(gradient)); - } - - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append(getClass().getCanonicalName()).append(" with "); - sb.append(parameters.length).append(" parameters and "); - sb.append(training.getNumPhrases() * numTags).append(" constraints"); - return sb.toString(); - } - - double primal() - { - // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct] - // kl = sum_Y q(Y) log q(Y) / p(Y|X) - // = sum_Y q(Y) { -lambda . phi(Y) - log Z } - // = -log Z - lambda . E_q[phi] - // = -objective + lambda . gradient - - double kl = -objective + MathUtils.dotProduct(parameters, gradient); - double l1lmax = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int t = 0; t < numTags; t++) - { - double lmax = Double.NEGATIVE_INFINITY; - for (int j = 0; j < edges.size(); ++j) - lmax = max(lmax, q.get(i).get(j).get(t)); - l1lmax += lmax; - } - } - - return llh + kl + constraintScale * l1lmax; - } - } -} diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README deleted file mode 100644 index a3d54ffc..00000000 --- a/gi/posterior-regularisation/README +++ /dev/null @@ -1,3 +0,0 @@ - 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance - 559 wc -l btec.concordance - 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh deleted file mode 100644 index 1db928da..00000000 --- a/gi/posterior-regularisation/alphabet.hh +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _alphabet_hh -#define _alphabet_hh - -#include -#include -#include -#include -#include - -// Alphabet: indexes a set of types -template -class Alphabet: protected std::map -{ -public: - Alphabet() {}; - - bool empty() const { return std::map::empty(); } - int size() const { return std::map::size(); } - - int operator[](const T &k) const - { - typename std::map::const_iterator cit = find(k); - if (cit != std::map::end()) - return cit->second; - else - return -1; - } - - int lookup(const T &k) const { return (*this)[k]; } - - int insert(const T &k) - { - int sz = size(); - assert((unsigned) sz == _items.size()); - - std::pair::iterator, bool> - ins = std::map::insert(make_pair(k, sz)); - - if (ins.second) - _items.push_back(k); - - return ins.first->second; - } - - const T &type(int i) const - { - assert(i >= 0); - assert(i < size()); - return _items[i]; - } - - std::ostream &display(std::ostream &out, int i) const - { - return out << type(i); - } - -private: - std::vector _items; -}; - -#endif diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance deleted file mode 100644 index 710973ff..00000000 --- a/gi/posterior-regularisation/canned.concordance +++ /dev/null @@ -1,4 +0,0 @@ -a 0 0 0 0 ||| C=1 ||| 1 1 1 1 ||| C=1 ||| 2 2 2 2 ||| C=1 -b 0 0 0 0 ||| C=1 ||| 1 1 1 1 ||| C=1 -c 2 2 2 2 ||| C=1 ||| 4 4 4 4 ||| C=1 ||| 5 5 5 5 ||| C=1 -d 4 4 4 4 ||| C=1 ||| 5 5 5 5 ||| C=1 diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc deleted file mode 100644 index f6c9fd68..00000000 --- a/gi/posterior-regularisation/em.cc +++ /dev/null @@ -1,830 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for tonight ? ||| C=1 ||| seats for ? ||| C=1 ||| i see ? ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating P(t|p,c) for all x,c -// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t) - -// Sexing it up: -// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries -// - improve the generation of phrase internals, e.g., generate edge words from -// different distribution to central words - -#include "alphabet.hh" -#include "log_add.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace std::tr1; - -const int numTags = 5; -const int numIterations = 100; -const bool posterior_regularisation = true; -const double PHRASE_VIOLATION_WEIGHT = 10; -const double CONTEXT_VIOLATION_WEIGHT = 0; -const bool includePhraseProb = false; - -// Data structures: -Alphabet lexicon; -typedef vector Phrase; -typedef tuple Context; -Alphabet phrases; -Alphabet contexts; - -typedef map ContextCounts; -typedef map PhraseCounts; -typedef map PhraseToContextCounts; -typedef map ContextToPhraseCounts; - -PhraseToContextCounts concordancePhraseToContexts; -ContextToPhraseCounts concordanceContextToPhrases; - -typedef vector Dist; -typedef vector ConditionalDist; -Dist prior; // class -> P(class) -vector probCtx; // word -> class -> P(word | class), for each position of context word -ConditionalDist probPhrase; // class -> P(word | class) -Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter - -mt19937 randomGenerator((size_t) time(NULL)); -uniform_real uniDist(0.0, 1e-1); -variate_generator< mt19937, uniform_real > rng(randomGenerator, uniDist); - -void addRandomNoise(Dist &d); -void normalise(Dist &d); -void addTo(Dist &d, const Dist &e); -int argmax(const Dist &d); - -map > lambda_indices; - -Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0); -template -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation); -//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0); -double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data); -void optimise_lambda(double delta, double gamma, vector &lambda); -double expected_violation_phrases(const double *lambda); -double expected_violation_contexts(const double *lambda); -double primal_kl_divergence(const double *lambda); -double dual(const double *lambda); -void print_primal_dual(const double *lambda, double delta, double gamma); - -ostream &operator<<(ostream &, const Phrase &); -ostream &operator<<(ostream &, const Context &); -ostream &operator<<(ostream &, const Dist &); -ostream &operator<<(ostream &, const ConditionalDist &); - -int -main(int argc, char *argv[]) -{ - randomGenerator.seed(time(NULL)); - - int edges = 0; - istream &input = cin; - while (input.good()) - { - // read the phrase - string phraseString; - Phrase phrase; - getline(input, phraseString, '\t'); - istringstream pinput(phraseString); - string token; - while (pinput >> token) - phrase.push_back(lexicon.insert(token)); - int phraseId = phrases.insert(phrase); - - // read the rest, storing each context - string remainder; - getline(input, remainder, '\n'); - istringstream rinput(remainder); - Context context(-1, -1, -1, -1); - int index = 0; - while (rinput >> token) - { - if (token != "|||" && token != "") - { - if (index < 4) - { - // eugh! damn templates - switch (index) - { - case 0: get<0>(context) = lexicon.insert(token); break; - case 1: get<1>(context) = lexicon.insert(token); break; - case 2: get<2>(context) = lexicon.insert(token); break; - case 3: get<3>(context) = lexicon.insert(token); break; - default: assert(false); - } - index += 1; - } - else if (token.find("C=") == 0) - { - int contextId = contexts.insert(context); - int count = atoi(token.substr(strlen("C=")).c_str()); - concordancePhraseToContexts[phraseId][contextId] += count; - concordanceContextToPhrases[contextId][phraseId] += count; - index = 0; - context = Context(-1, -1, -1, -1); - edges += 1; - } - } - } - - // trigger EOF - input >> ws; - } - - cout << "Read in " << phrases.size() << " phrases" - << " and " << contexts.size() << " contexts" - << " and " << edges << " edges" - << " and " << lexicon.size() << " word types\n"; - - // FIXME: filter out low count phrases and low count contexts (based on individual words?) - // now populate model parameters with uniform + random noise - prior.resize(numTags, 1.0); - addRandomNoise(prior); - normalise(prior); - - probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0))); - if (includePhraseProb) - probPhrase.resize(numTags, Dist(lexicon.size(), 1.0)); - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - { - addRandomNoise(probCtx[j][t]); - normalise(probCtx[j][t]); - } - if (includePhraseProb) - { - addRandomNoise(probPhrase[t]); - normalise(probPhrase[t]); - } - } - if (includePhraseProb) - { - probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5 - addRandomNoise(probPhraseLength); - } - - cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << endl; - - vector lambda; - - // now do EM training - for (int iteration = 0; iteration < numIterations; ++iteration) - { - cout << "EM iteration " << iteration << endl; - - if (posterior_regularisation) - optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda); - //cout << "\tlambda " << lambda << endl; - - Dist countsPrior(numTags, 0.0); - vector countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10))); - ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10)); - Dist countsPhraseLength(numTags, 0.0); - Dist nPhrases(numTags, 0.0); - - double llh = 0; - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - - // e-step: estimate latent class probs; compile (class,word) stats for m-step - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double z = 0; - Dist tagCounts; - if (!posterior_regularisation) - tagCounts = conditional_probs(phrase, context, &z); - else - tagCounts = penalised_conditionals(phrase, context, lambda, &z); - - llh += log(z) * ccit->second; - addTo(countsPrior, tagCounts); // FIXME: times ccit->secon - - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second; - - if (includePhraseProb) - { - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - countsPhrase[t][*pit] += tagCounts[t] * ccit->second; - countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second; - nPhrases[t] += tagCounts[t] * ccit->second; - } - } - } - } - - cout << "M-step\n"; - - // m-step: normalise prior and (class,word) stats and assign to model parameters - normalise(countsPrior); - prior = countsPrior; - for (int t = 0; t < numTags; ++t) - { - //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n"; - for (int j = 0; j < 4; ++j) - normalise(countsCtx[j][t]); - if (includePhraseProb) - { - normalise(countsPhrase[t]); - countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t]; - } - } - probCtx = countsCtx; - if (includePhraseProb) - { - probPhrase = countsPhrase; - probPhraseLength = countsPhraseLength; - } - - double *larray = new double[lambda.size()]; - copy(lambda.begin(), lambda.end(), larray); - print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT); - delete [] larray; - - //cout << "\tllh " << llh << endl; - //cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << "\n"; - } - - // output class membership - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist tagCounts = conditional_probs(phrase, context, 0); - cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n"; - } - } - - return 0; -} - -void addRandomNoise(Dist &d) -{ - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit += rng(); -} - -void normalise(Dist &d) -{ - double z = 0; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - z += *dit; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit /= z; -} - -void addTo(Dist &d, const Dist &e) -{ - assert(d.size() == e.size()); - for (int i = 0; i < (int) d.size(); ++i) - d[i] += e[i]; -} - -int argmax(const Dist &d) -{ - double best = d[0]; - int index = 0; - for (int i = 1; i < (int) d.size(); ++i) - { - if (d[i] > best) - { - best = d[i]; - index = i; - } - } - return index; -} - -ostream &operator<<(ostream &out, const Phrase &phrase) -{ - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit); - return out; -} - -ostream &operator<<(ostream &out, const Context &context) -{ - lexicon.display(out, get<0>(context)); - lexicon.display(out << " ", get<1>(context)); - lexicon.display(out << " ", get<2>(context)); - lexicon.display(out << " ", get<3>(context)); - return out; -} - -ostream &operator<<(ostream &out, const Dist &dist) -{ - for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : " ") << *dit; - return out; -} - -ostream &operator<<(ostream &out, const ConditionalDist &dist) -{ - for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : "; ") << *dit; - return out; -} - -// FIXME: slow - just use the phrase index, context index to do the mapping -// (n.b. it's a sparse setup, not just equal to 3d array index) -int -lambda_index(const Phrase &phrase, const Context &context, int tag) -{ - return lambda_indices[phrase][context] + tag; -} - -template -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation) -{ - Dist d = conditional_probs(phrase, context, 0); - - double z = 0; - for (int t = 0; t < numTags; ++t) - { - d[t] *= exp(-lambda[lambda_index(phrase, context, t)]); - z += d[t]; - } - - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - d[t] /= z; - - return d; -} - -Dist -conditional_probs(const Phrase &phrase, const Context &context, double *normalisation) -{ - Dist tagCounts(numTags, 0.0); - double z = 0; - for (int t = 0; t < numTags; ++t) - { - double prob = prior[t]; - prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] * - probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]); - - if (includePhraseProb) - { - prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t]; - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - prob *= probPhrase[t][*pit]; - } - - tagCounts[t] = prob; - z += prob; - } - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - tagCounts[t] /= z; - - return tagCounts; -} - -double -penalised_log_likelihood(int n, const double *lambda, double *grad, void *) -{ - // return log Z(lambda, theta) over the corpus - // where theta are the global parameters (prior, probCtx*, probPhrase*) - // and lambda are lagrange multipliers for the posterior sparsity constraints - // - // this is formulated as: - // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] ) - // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1) - // - // with derivative: - // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) } - // { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) } - - //cout << "penalised_log_likelihood with lambda "; - //copy(lambda, lambda+n, ostream_iterator(cout, " ")); - //cout << "\n"; - - double f = 0; - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - - f += ccit->second * log(z); - //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl; - //cout << "\t\tscores: " << scores << "\n"; - - if (grad) - { - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); // FIXME: redundant lookups - assert(grad[i] == 0.0); - grad[i] = - ccit->second * scores[t]; - } - } - } - } - - //cout << "penalised_log_likelihood returning " << f; - //if (grad) - //{ - //cout << "\ngradient: "; - //copy(grad, grad+n, ostream_iterator(cout, " ")); - //} - //cout << "\n"; - - return f; -} - -typedef struct -{ - // one of p or c should be set to -1, in which case it will be marginalised out - // i.e. sum_p' lambda_{p'ct} <= threshold - // or sum_c' lambda_{pc't} <= threshold - int p, c, t, threshold; -} constraint_data; - -double -constraint_and_gradient(int n, const double *lambda, double *grad, void *data) -{ - constraint_data *d = (constraint_data *) data; - assert(d->t >= 0); - assert(d->threshold >= 0); - - //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl; - //cout << "\tlambda "; - //copy(lambda, lambda+n, ostream_iterator(cout, " ")); - //cout << "\n"; - - // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl; - - if (d->p >= 0) - { - assert(d->c < 0); - // sum_c lambda_pct <= delta [a.k.a. threshold] - // => sum_c lambda_pct - delta <= 0 - // derivative_pct = { 1, if p and t match; 0, otherwise } - - double val = -d->threshold; - - const Phrase &phrase = phrases.type(d->p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p); - assert(pcit != concordancePhraseToContexts.end()); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } - else - { - assert(d->c >= 0); - assert(d->p < 0); - // sum_p lambda_pct <= gamma [a.k.a. threshold] - // => sum_p lambda_pct - gamma <= 0 - // derivative_pct = { 1, if c and t match; 0, otherwise } - - double val = -d->threshold; - - const Context &context = contexts.type(d->c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c); - assert(cpit != concordanceContextToPhrases.end()); - for (PhraseCounts::iterator pcit = cpit->second.begin(); - pcit != cpit->second.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } -} - -void -optimise_lambda(double delta, double gamma, vector &lambdav) -{ - int num_lambdas = lambdav.size(); - if (lambda_indices.empty() || lambdav.empty()) - { - lambda_indices.clear(); - lambdav.clear(); - - int i = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - lambda_indices[phrase][context] = i; - i += numTags; - } - } - num_lambdas = i; - lambdav.resize(num_lambdas); - } - //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl; - - // FIXME: better to work with an implicit representation to save memory usage - int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags; - //cout << "optimise_lambda: #constraints " << num_constraints << endl; - constraint_data *data = new constraint_data[num_constraints]; - int i = 0; - if (delta > 0) - { - for (int p = 0; p < phrases.size(); ++p) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = p; - d.c = -1; - d.t = t; - d.threshold = delta; - } - } - } - - if (gamma > 0) - { - for (int c = 0; c < contexts.size(); ++c) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = -1; - d.c = c; - d.t = t; - d.threshold = gamma; - } - } - } - assert(i == num_constraints); - - double lambda[num_lambdas]; - double lb[num_lambdas], ub[num_lambdas]; - for (i = 0; i < num_lambdas; ++i) - { - lambda[i] = lambdav[i]; // starting value - lb[i] = 0; // lower bound - if (delta <= 0) // upper bound - ub[i] = gamma; - else if (gamma <= 0) - ub[i] = delta; - else - assert(false); - } - - //print_primal_dual(lambda, delta, gamma); - - double minf; - int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL, - num_constraints, constraint_and_gradient, data, sizeof(constraint_data), - lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0); - //cout << "optimise error code " << error_code << endl; - - //print_primal_dual(lambda, delta, gamma); - - delete [] data; - - if (error_code < 0) - cout << "WARNING: optimisation failed with error code: " << error_code << endl; - //else - //{ - //cout << "success; minf " << minf << endl; - //print_primal_dual(lambda, delta, gamma); - //} - - lambdav = vector(&lambda[0], &lambda[0] + num_lambdas); -} - -// FIXME: inefficient - cache the scores -double -expected_violation_phrases(const double *lambda) -{ - // sum_pt max_c E_q[phi_pct] - double violation = 0; - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: inefficient - cache the scores -double -expected_violation_contexts(const double *lambda) -{ - // sum_ct max_p E_q[phi_pct] - double violation = 0; - - for (int c = 0; c < contexts.size(); ++c) - { - const Context &context = contexts.type(c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (PhraseCounts::iterator pit = cpit->second.begin(); - pit != cpit->second.end(); ++pit) - { - const Phrase &phrase = phrases.type(pit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: possibly inefficient -double -primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms -{ - double llh = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = conditional_probs(phrase, context, &z); - llh += ccit->second * log(z); - } - } - return llh; -} - -// FIXME: inefficient - cache the scores -double -primal_kl_divergence(const double *lambda) -{ - // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) } - // = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) } - // = sum_y q(y) { - lambda . phi(x, y) } - log Z - // and q(y) factors with each edge, ditto for Z - - double feature_sum = 0, log_z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double local_z = 0; - double local_f = 0; - Dist d = conditional_probs(phrase, context, 0); - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); - double s = d[t] * exp(-lambda[i]); - local_f += lambda[i] * s; - local_z += s; - } - - log_z += ccit->second * log(local_z); - feature_sum += ccit->second * (local_f / local_z); - } - } - - return -feature_sum - log_z; -} - -// FIXME: inefficient - cache the scores -double -dual(const double *lambda) -{ - // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) } - // n.b. have flipped the sign as we're minimising - - double z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double lz = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - z += lz * ccit->second; - } - } - return log(z); -} - -void -print_primal_dual(const double *lambda, double delta, double gamma) -{ - double likelihood = primal_likelihood(); - double kl = primal_kl_divergence(lambda); - double sum_pt = expected_violation_phrases(lambda); - double sum_ct = expected_violation_contexts(lambda); - //double d = dual(lambda); - - cout << "\tllh=" << likelihood - << " kl=" << kl - << " violations phrases=" << sum_pt - << " contexts=" << sum_ct - //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct) - //<< " dual=" << d - << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct) - << endl; -} diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh deleted file mode 100644 index d06356e9..00000000 --- a/gi/posterior-regularisation/invert.hh +++ /dev/null @@ -1,45 +0,0 @@ -// The following code inverts the matrix input using LU-decomposition with -// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd -// ed., by Press, Teukolsky, Vetterling & Flannery. -// Code written by Fredrik Orderud. -// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion - -#ifndef INVERT_MATRIX_HPP -#define INVERT_MATRIX_HPP - -// REMEMBER to update "lu.hpp" header includes from boost-CVS -#include -#include -#include -#include -#include -#include - -namespace ublas = boost::numeric::ublas; - -/* Matrix inversion routine. - Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ -template -bool invert_matrix(const ublas::matrix& input, ublas::matrix& inverse) -{ - using namespace boost::numeric::ublas; - typedef permutation_matrix pmatrix; - // create a working copy of the input - matrix A(input); - // create a permutation matrix for the LU-factorization - pmatrix pm(A.size1()); - - // perform LU-factorization - int res = lu_factorize(A,pm); - if( res != 0 ) return false; - - // create identity matrix of "inverse" - inverse.assign(ublas::identity_matrix(A.size1())); - - // backsubstitute to get the inverse - lu_substitute(A, pm, inverse); - - return true; -} - -#endif //INVERT_MATRIX_HPP diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py deleted file mode 100644 index 5a3f2e9c..00000000 --- a/gi/posterior-regularisation/linesearch.py +++ /dev/null @@ -1,58 +0,0 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py - -from scipy.optimize import minpack2 -import numpy - -import __builtin__ -pymin = __builtin__.min - -def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, - args=(), c1=1e-4, c2=0.9, amax=50): - - fc = 0 - gc = 0 - phi0 = old_fval - derphi0 = numpy.dot(gfk,pk) - alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0) - # trevor: added this test - alpha1 = pymin(alpha1,amax) - - if isinstance(myfprime,type(())): - eps = myfprime[1] - fprime = myfprime[0] - newargs = (f,eps) + args - gradient = False - else: - fprime = myfprime - newargs = args - gradient = True - - xtol = 1e-14 - amin = 1e-8 - isave = numpy.zeros((2,), numpy.intc) - dsave = numpy.zeros((13,), float) - task = 'START' - fval = old_fval - gval = gfk - - while 1: - stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2, - xtol, task, amin, amax,isave,dsave) - #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave - #print 'returns', stp,fval,derphi,task - - if task[:2] == 'FG': - alpha1 = stp - fval = f(xk+stp*pk,*args) - fc += 1 - gval = fprime(xk+stp*pk,*newargs) - if gradient: gc += 1 - else: fc += len(xk) + 1 - phi0 = fval - derphi0 = numpy.dot(gval,pk) - else: - break - - if task[:5] == 'ERROR' or task[1:4] == 'WARN': - stp = None # failed - return stp, fc, gc, fval, old_fval, gval diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh deleted file mode 100644 index e0620c5a..00000000 --- a/gi/posterior-regularisation/log_add.hh +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef log_add_hh -#define log_add_hh - -#include -#include -#include -#include - -template -struct Log -{ - static T zero() { return -std::numeric_limits::infinity(); } - - static T add(T l1, T l2) - { - if (l1 == zero()) return l2; - if (l1 > l2) - return l1 + std::log(1 + exp(l2 - l1)); - else - return l2 + std::log(1 + exp(l1 - l2)); - } - - static T subtract(T l1, T l2) - { - //std::assert(l1 >= l2); - return l1 + log(1 - exp(l2 - l1)); - } -}; - -#endif diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar deleted file mode 120000 index da8bf761..00000000 --- a/gi/posterior-regularisation/prjava.jar +++ /dev/null @@ -1 +0,0 @@ -prjava/prjava-20100708.jar \ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile deleted file mode 100755 index bd3bfca0..00000000 --- a/gi/posterior-regularisation/prjava/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -all: - ant dist - -check: - echo no tests - -clean: - ant clean diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml deleted file mode 100644 index 7222b3c8..00000000 --- a/gi/posterior-regularisation/prjava/build.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar deleted file mode 100644 index 43b4b369..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar deleted file mode 100644 index 56373621..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar deleted file mode 100644 index 3e59fbf3..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java deleted file mode 100644 index be0a6ed6..00000000 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ /dev/null @@ -1,99 +0,0 @@ -package arr; - -import java.util.Arrays; -import java.util.Random; - -public class F { - public static Random rng = new Random(); - - public static void randomise(double probs[]) - { - randomise(probs, true); - } - - public static void randomise(double probs[], boolean normalise) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - if (normalise) - z += probs[i]; - } - - if (normalise) - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - public static void uniform(double probs[]) - { - for (int i = 0; i < probs.length; ++i) - probs[i] = 1.0 / probs.length; - } - - public static void l1normalize(double [] a){ - double sum=0; - for(int i=0;i m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java deleted file mode 100644 index 425ede11..00000000 --- a/gi/posterior-regularisation/prjava/src/data/Corpus.java +++ /dev/null @@ -1,233 +0,0 @@ -package data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Scanner; - -public class Corpus { - - public static final String alphaFilename="../posdata/corpus.alphabet"; - public static final String tagalphaFilename="../posdata/corpus.tag.alphabet"; - -// public static final String START_SYM=""; - public static final String END_SYM=""; - public static final String NUM_TOK=""; - - public static final String UNK_TOK=""; - - private ArrayListsent; - private ArrayListdata; - - public ArrayListtag; - public ArrayListtagData; - - public static boolean convertNumTok=true; - - private HashMapfreq; - public HashMapvocab; - - public HashMaptagVocab; - private int tagV; - - private int V; - - public static void main(String[] args) { - Corpus c=new Corpus("../posdata/en_test.conll"); - System.out.println( - Arrays.toString(c.get(0)) - ); - System.out.println( - Arrays.toString(c.getInt(0)) - ); - - System.out.println( - Arrays.toString(c.get(1)) - ); - System.out.println( - Arrays.toString(c.getInt(1)) - ); - } - - public Corpus(String filename,HashMapdict){ - V=0; - tagV=0; - freq=new HashMap(); - tagVocab=new HashMap(); - vocab=dict; - - sent=new ArrayList(); - tag=new ArrayList(); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - // s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - sent.add(s.toArray(new String[0])); - s=new ArrayList(); - // s.add(START_SYM); - continue; - } - String tok=toks[1].toLowerCase(); - s.add(tok); - } - sc.close(); - - buildData(); - } - - public Corpus(String filename){ - V=0; - freq=new HashMap(); - vocab=new HashMap(); - tagVocab=new HashMap(); - - sent=new ArrayList(); - tag=new ArrayList(); - - System.out.println("Reading:"+filename); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - ArrayListtags=new ArrayList(); - //s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - tags.add(END_SYM); - if(s.size()>2){ - sent.add(s.toArray(new String[0])); - tag.add(tags.toArray(new String [0])); - } - s=new ArrayList(); - tags=new ArrayList(); - // s.add(START_SYM); - continue; - } - - String tok=toks[1].toLowerCase(); - if(convertNumTok && tok.matches(".*\\d.*")){ - tok=NUM_TOK; - } - s.add(tok); - - if(toks.length>3){ - tok=toks[3].toLowerCase(); - }else{ - tok="_"; - } - tags.add(tok); - - } - sc.close(); - - for(int i=0;i(); - for(int i=0;i(); - for(int i=0;i2){ - vocab.put(key, V); - V++; - } - } - io.SerializedObjects.writeSerializedObject(vocab, alphaFilename); - io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename); - } - - private void addTag(String tag){ - Integer i=tagVocab.get(tag); - if(i==null){ - tagVocab.put(tag, tagV); - tagV++; - } - } - -} diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java deleted file mode 100644 index 17a4679f..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java +++ /dev/null @@ -1,579 +0,0 @@ -package hmm; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Scanner; - -public class HMM { - - - //trans[i][j]=prob of going FROM i to j - double [][]trans; - double [][]emit; - double []pi; - int [][]data; - int [][]tagdata; - - double logtrans[][]; - - public HMMObjective o; - - public static void main(String[] args) { - - } - - public HMM(int n_state,int n_emit,int [][]data){ - trans=new double [n_state][n_state]; - emit=new double[n_state][n_emit]; - pi=new double [n_state]; - System.out.println(" random initial parameters"); - fillRand(trans); - fillRand(emit); - fillRand(pi); - - this.data=data; - - } - - private void fillRand(double [][] a){ - for(int i=0;i=0;n--){ - for(int i=0;imaxprob){ - maxprob=p[seq.length-1][i]; - maxIdx=i; - } - } - int ans[]=new int [seq.length]; - ans[seq.length-1]=maxIdx; - for(int i=seq.length-2;i>=0;i--){ - ans[i]=backp[i+1][ans[i+1]]; - } - return ans; - } - - public double l1norm(double a[]){ - double norm=0; - for(int i=0;i s=new ArrayList(); - int state=sample(pi); - int sym=sample(emit[state]); - while(sym!=terminalSym){ - s.add(sym); - state=sample(trans[state]); - sym=sample(emit[state]); - } - - int ans[]=new int [s.size()]; - for(int i=0;i=r){ - return i; - } - } - return p.length-1; - } - - public void train(int tagdata[][]){ - double trans_exp_cnt[][]=new double [trans.length][trans.length]; - double emit_exp_cnt[][]=new double[trans.length][emit[0].length]; - double start_exp_cnt[]=new double[trans.length]; - - for(int i=0;imaxwt[i][d[sentNum][n]]){ - maxwt[i][d[sentNum][n]]=py; - } - - } - } - - //the last state - int len=post.length; - for(int i=0;imaxwt[i][d[sentNum][len-1]]){ - maxwt[i][d[sentNum][len-1]]=py; - } - - } - - } - - } - -}//end of class diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java deleted file mode 100644 index 70b6c966..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java +++ /dev/null @@ -1,351 +0,0 @@ -package hmm; - -import gnu.trove.TIntArrayList; -import optimization.gradientBasedMethods.ProjectedGradientDescent; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; - -public class HMMObjective extends ProjectedObjective{ - - - private static final double GRAD_DIFF = 3; - public static double INIT_STEP_SIZE=10; - public static double VAL_DIFF=1000; - - private HMM hmm; - double[] newPoint ; - - //posterior[sent num][tok num][tag]=index into lambda - private int posteriorMap[][][]; - //projection[word][tag].get(occurence)=index into lambda - private TIntArrayList projectionMap[][]; - - //Size of the simplex - public double scale=10; - private SimplexProjection projection; - - private int wordFreq[]; - private static int MIN_FREQ=10; - private int numWordsToProject=0; - - private int n_param; - - public double loglikelihood; - - public HMMObjective(HMM h){ - hmm=h; - - countWords(); - buildMap(); - - gradient=new double [n_param]; - projection = new SimplexProjection(scale); - newPoint = new double[n_param]; - setInitialParameters(new double[n_param]); - - } - - /**@brief counts word frequency in the corpus - * - */ - private void countWords(){ - wordFreq=new int [hmm.emit[0].length]; - for(int i=0;i

- %(title)s - - Headers - - - Classes - - - Index - -
-''' % self.args ) - self.section = { - 'headers' : self._getChild('library-reference',id='%(id)s.headers' % self.args), - 'classes' : self._getChild('index',id='%(id)s.classes' % self.args), - 'index' : self._getChild('index',id='%(id)s.index' % self.args) - } - #~ Remove the index sections if we aren't generating it. - if not self.args['index']: - self.section['classes'].parentNode.removeChild(self.section['classes']) - self.section['classes'].unlink() - del self.section['classes'] - self.section['index'].parentNode.removeChild(self.section['index']) - self.section['index'].unlink() - del self.section['index'] - #~ The symbols, per Doxygen notion, that we translated. - self.symbols = {} - #~ Map of Doxygen IDs and BoostBook IDs, so we can translate as needed. - self.idmap = {} - #~ Marks generation, to prevent redoing it. - self.generated = False - - #~ Add an Doxygen generated XML document to the content we are translating. - def addDox( self, document ): - self._translateNode(document.documentElement) - - #~ Turns the internal XML tree into an output UTF-8 string. - def tostring( self ): - self._generate() - #~ return self.boostbook.toprettyxml(' ') - return self.boostbook.toxml('utf-8') - - #~ Does post-processing on the partial generated content to generate additional info - #~ now that we have the complete source documents. - def _generate( self ): - if not self.generated: - self.generated = True - symbols = self.symbols.keys() - symbols.sort() - #~ Populate the header section. - for symbol in symbols: - if self.symbols[symbol]['kind'] in ('header'): - self.section['headers'].appendChild(self.symbols[symbol]['dom']) - for symbol in symbols: - if self.symbols[symbol]['kind'] not in ('namespace', 'header'): - container = self._resolveContainer(self.symbols[symbol], - self.symbols[self.symbols[symbol]['header']]['dom']) - if container.nodeName != 'namespace': - ## The current BoostBook to Docbook translation doesn't - ## respect, nor assign, IDs to inner types of any kind. - ## So nuke the ID entry so as not create bogus links. - del self.idmap[self.symbols[symbol]['id']] - container.appendChild(self.symbols[symbol]['dom']) - self._rewriteIDs(self.boostbook.documentElement) - - #~ Rewrite the various IDs from Doxygen references to the newly created - #~ BoostBook references. - def _rewriteIDs( self, node ): - if node.nodeName in ('link'): - if (self.idmap.has_key(node.getAttribute('linkend'))): - #~ A link, and we have someplace to repoint it at. - node.setAttribute('linkend',self.idmap[node.getAttribute('linkend')]) - else: - #~ A link, but we don't have a generated target for it. - node.removeAttribute('linkend') - elif hasattr(node,'hasAttribute') and node.hasAttribute('id') and self.idmap.has_key(node.getAttribute('id')): - #~ Simple ID, and we have a translation. - node.setAttribute('id',self.idmap[node.getAttribute('id')]) - #~ Recurse, and iterate, depth-first traversal which turns out to be - #~ left-to-right and top-to-bottom for the document. - if node.firstChild: - self._rewriteIDs(node.firstChild) - if node.nextSibling: - self._rewriteIDs(node.nextSibling) - - def _resolveContainer( self, cpp, root ): - container = root - for ns in cpp['namespace']: - node = self._getChild('namespace',name=ns,root=container) - if not node: - node = container.appendChild( - self._createNode('namespace',name=ns)) - container = node - for inner in cpp['name'].split('::'): - node = self._getChild(name=inner,root=container) - if not node: - break - container = node - return container - - def _setID( self, id, name ): - self.idmap[id] = name.replace('::','.').replace('/','.') - #~ print '--| setID:',id,'::',self.idmap[id] - - #~ Translate a given node within a given context. - #~ The translation dispatches to a local method of the form - #~ "_translate[_context0,...,_contextN]", and the keyword args are - #~ passed along. If there is no translation handling method we - #~ return None. - def _translateNode( self, *context, **kwargs ): - node = None - names = [ ] - for c in context: - if c: - if not isinstance(c,xml.dom.Node): - suffix = '_'+c.replace('-','_') - else: - suffix = '_'+c.nodeName.replace('-','_') - node = c - names.append('_translate') - names = map(lambda x: x+suffix,names) - if node: - for name in names: - if hasattr(self,name): - return getattr(self,name)(node,**kwargs) - return None - - #~ Translates the children of the given parent node, appending the results - #~ to the indicated target. For nodes not translated by the translation method - #~ it copies the child over and recurses on that child to translate any - #~ possible interior nodes. Hence this will translate the entire subtree. - def _translateChildren( self, parent, **kwargs ): - target = kwargs['target'] - for n in parent.childNodes: - child = self._translateNode(n,target=target) - if child: - target.appendChild(child) - else: - child = n.cloneNode(False) - if hasattr(child,'data'): - child.data = re.sub(r'\s+',' ',child.data) - target.appendChild(child) - self._translateChildren(n,target=child) - - #~ Translate the given node as a description, into the description subnode - #~ of the target. If no description subnode is present in the target it - #~ is created. - def _translateDescription( self, node, target=None, tag='description', **kwargs ): - description = self._getChild(tag,root=target) - if not description: - description = target.appendChild(self._createNode(tag)) - self._translateChildren(node,target=description) - return description - - #~ Top level translation of: ..., - #~ translates the children. - def _translate_doxygen( self, node ): - #~ print '_translate_doxygen:', node.nodeName - result = [] - for n in node.childNodes: - newNode = self._translateNode(n) - if newNode: - result.append(newNode) - return result - - #~ Top level translation of: - #~ - #~ - #~ - #~ ... - #~ - #~ ... - #~ - #~ ... - #~ - #~ builds the class and symbol sections, if requested. - def _translate_doxygenindex( self, node ): - #~ print '_translate_doxygenindex:', node.nodeName - if self.args['index']: - entries = [] - classes = [] - #~ Accumulate all the index entries we care about. - for n in node.childNodes: - if n.nodeName == 'compound': - if n.getAttribute('kind') not in ('file','dir','define'): - cpp = self._cppName(self._getChildData('name',root=n)) - entry = { - 'name' : cpp['name'], - 'compoundname' : cpp['compoundname'], - 'id' : n.getAttribute('refid') - } - if n.getAttribute('kind') in ('class','struct'): - classes.append(entry) - entries.append(entry) - for m in n.childNodes: - if m.nodeName == 'member': - cpp = self._cppName(self._getChildData('name',root=m)) - entry = { - 'name' : cpp['name'], - 'compoundname' : cpp['compoundname'], - 'id' : n.getAttribute('refid') - } - if hasattr(m,'getAttribute') and m.getAttribute('kind') in ('class','struct'): - classes.append(entry) - entries.append(entry) - #~ Put them in a sensible order. - entries.sort(lambda x,y: cmp(x['name'].lower(),y['name'].lower())) - classes.sort(lambda x,y: cmp(x['name'].lower(),y['name'].lower())) - #~ And generate the BoostBook for them. - self._translate_index_(entries,target=self.section['index']) - self._translate_index_(classes,target=self.section['classes']) - return None - - #~ Translate a set of index entries in the BoostBook output. The output - #~ is grouped into groups of the first letter of the entry names. - def _translate_index_(self, entries, target=None, **kwargs ): - i = 0 - targetID = target.getAttribute('id') - while i < len(entries): - dividerKey = entries[i]['name'][0].upper() - divider = target.appendChild(self._createNode('indexdiv',id=targetID+'.'+dividerKey)) - divider.appendChild(self._createText('title',dividerKey)) - while i < len(entries) and dividerKey == entries[i]['name'][0].upper(): - iename = entries[i]['name'] - ie = divider.appendChild(self._createNode('indexentry')) - ie = ie.appendChild(self._createText('primaryie',iename)) - while i < len(entries) and entries[i]['name'] == iename: - ie.appendChild(self.boostbook.createTextNode(' (')) - ie.appendChild(self._createText( - 'link',entries[i]['compoundname'],linkend=entries[i]['id'])) - ie.appendChild(self.boostbook.createTextNode(')')) - i += 1 - - #~ Translate a ..., - #~ by retranslating with the "kind" of compounddef. - def _translate_compounddef( self, node, target=None, **kwargs ): - return self._translateNode(node,node.getAttribute('kind')) - - #~ Translate a .... For - #~ namespaces we just collect the information for later use as there is no - #~ currently namespaces are not included in the BoostBook format. In the future - #~ it might be good to generate a namespace index. - def _translate_compounddef_namespace( self, node, target=None, **kwargs ): - namespace = { - 'id' : node.getAttribute('id'), - 'kind' : 'namespace', - 'name' : self._getChildData('compoundname',root=node), - 'brief' : self._getChildData('briefdescription',root=node), - 'detailed' : self._getChildData('detaileddescription',root=node), - 'parsed' : False - } - if self.symbols.has_key(namespace['name']): - if not self.symbols[namespace['name']]['parsed']: - self.symbols[namespace['name']]['parsed'] = True - #~ for n in node.childNodes: - #~ if hasattr(n,'getAttribute'): - #~ self._translateNode(n,n.getAttribute('kind'),target=target,**kwargs) - else: - self.symbols[namespace['name']] = namespace - #~ self._setID(namespace['id'],namespace['name']) - return None - - #~ Translate a ..., which - #~ forwards to the kind=struct as they are the same. - def _translate_compounddef_class( self, node, target=None, **kwargs ): - return self._translate_compounddef_struct(node,tag='class',target=target,**kwargs) - - #~ Translate a ... into: - #~
- #~ - #~ ... - #~ - #~
- def _translate_compounddef_struct( self, node, tag='struct', target=None, **kwargs ): - result = None - includes = self._getChild('includes',root=node) - if includes: - ## Add the header into the output table. - self._translate_compounddef_includes_(includes,includes,**kwargs) - ## Compounds are the declared symbols, classes, types, etc. - ## We add them to the symbol table, along with the partial DOM for them - ## so that they can be organized into the output later. - compoundname = self._getChildData('compoundname',root=node) - compoundname = self._cppName(compoundname) - self._setID(node.getAttribute('id'),compoundname['compoundname']) - struct = self._createNode(tag,name=compoundname['name'].split('::')[-1]) - self.symbols[compoundname['compoundname']] = { - 'header' : includes.firstChild.data, - 'namespace' : compoundname['namespace'], - 'id' : node.getAttribute('id'), - 'kind' : tag, - 'name' : compoundname['name'], - 'dom' : struct - } - ## Add the children which will be the members of the struct. - for n in node.childNodes: - self._translateNode(n,target=struct,scope=compoundname['compoundname']) - result = struct - return result - - #~ Translate a ..., - def _translate_compounddef_includes_( self, node, target=None, **kwargs ): - name = node.firstChild.data - if not self.symbols.has_key(name): - self._setID(node.getAttribute('refid'),name) - self.symbols[name] = { - 'kind' : 'header', - 'id' : node.getAttribute('refid'), - 'dom' : self._createNode('header', - id=node.getAttribute('refid'), - name=name) - } - return None - - #~ Translate a ... into: - #~ - #~ ... - #~ - def _translate_basecompoundref( self, ref, target=None, **kwargs ): - inherit = target.appendChild(self._createNode('inherit', - access=ref.getAttribute('prot'))) - self._translateChildren(ref,target=inherit) - return - - #~ Translate: - #~ - #~ - #~ ... - #~ ... - #~ ... - #~ ... - #~ - #~ ... - #~ - #~ Into: - #~ - def _translate_templateparamlist( self, templateparamlist, target=None, **kwargs ): - template = target.appendChild(self._createNode('template')) - for param in templateparamlist.childNodes: - if param.nodeName == 'param': - type = self._getChildData('type',root=param) - defval = self._getChild('defval',root=param) - paramKind = None - if type in ('class','typename'): - paramKind = 'template-type-parameter' - else: - paramKind = 'template-nontype-parameter' - templateParam = template.appendChild( - self._createNode(paramKind, - name=self._getChildData('declname',root=param))) - if paramKind == 'template-nontype-parameter': - template_type = templateParam.appendChild(self._createNode('type')) - self._translate_type( - self._getChild('type',root=param),target=template_type) - if defval: - value = self._getChildData('ref',root=defval.firstChild) - if not value: - value = self._getData(defval) - templateParam.appendChild(self._createText('default',value)) - return template - - #~ Translate: - #~ ... - #~ Into: - #~ ... - def _translate_briefdescription( self, brief, target=None, **kwargs ): - self._translateDescription(brief,target=target,**kwargs) - return self._translateDescription(brief,target=target,tag='purpose',**kwargs) - - #~ Translate: - #~ ... - #~ Into: - #~ ... - def _translate_detaileddescription( self, detailed, target=None, **kwargs ): - return self._translateDescription(detailed,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ With kind specific translation. - def _translate_sectiondef( self, sectiondef, target=None, **kwargs ): - self._translateNode(sectiondef,sectiondef.getAttribute('kind'),target=target,**kwargs) - - #~ Translate non-function sections. - def _translate_sectiondef_x_( self, sectiondef, target=None, **kwargs ): - for n in sectiondef.childNodes: - if hasattr(n,'getAttribute'): - self._translateNode(n,n.getAttribute('kind'),target=target,**kwargs) - return None - - #~ Translate: - #~ ... - def _translate_sectiondef_public_type( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_x_(sectiondef,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_public_attrib( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_x_(sectiondef,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ All the various function group translations end up here for which - #~ they are translated into: - #~ - #~ ... - #~ - def _translate_sectiondef_func_( self, sectiondef, name='functions', target=None, **kwargs ): - members = target.appendChild(self._createNode('method-group',name=name)) - for n in sectiondef.childNodes: - if hasattr(n,'getAttribute'): - self._translateNode(n,n.getAttribute('kind'),target=members,**kwargs) - return members - - #~ Translate: - #~ ... - def _translate_sectiondef_public_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='public member functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_public_static_func( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_func_(sectiondef, - name='public static functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_protected_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='protected member functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_private_static_func( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_func_(sectiondef, - name='private static functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_private_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='private member functions',target=target,**kwargs) - - #~ Translate: - #~
...
...
- def _translate_sectiondef_user_defined( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name=self._getChildData('header', root=sectiondef),target=target,**kwargs) - - #~ Translate: - #~ - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_typedef( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - typedef = target.appendChild(self._createNode('typedef', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - typedef_type = typedef.appendChild(self._createNode('type')) - self._translate_type(self._getChild('type',root=memberdef),target=typedef_type) - return typedef - - #~ Translate: - #~ - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_function( self, memberdef, target=None, scope=None, **kwargs ): - name = self._getChildData('name',root=memberdef) - self._setID(memberdef.getAttribute('id'),scope+'::'+name) - ## Check if we have some specific kind of method. - if name == scope.split('::')[-1]: - kind = 'constructor' - target = target.parentNode - elif name == '~'+scope.split('::')[-1]: - kind = 'destructor' - target = target.parentNode - elif name == 'operator=': - kind = 'copy-assignment' - target = target.parentNode - else: - kind = 'method' - method = target.appendChild(self._createNode(kind, - # id=memberdef.getAttribute('id'), - name=name, - cv=' '.join([ - if_attribute(memberdef,'const','const','').strip() - ]), - specifiers=' '.join([ - if_attribute(memberdef,'static','static',''), - if_attribute(memberdef,'explicit','explicit',''), - if_attribute(memberdef,'inline','inline','') - ]).strip() - )) - ## We iterate the children to translate each part of the function. - for n in memberdef.childNodes: - self._translateNode(memberdef,'function',n,target=method) - return method - - #~ Translate: - #~ ... - def _translate_memberdef_function_templateparamlist( - self, templateparamlist, target=None, **kwargs ): - return self._translate_templateparamlist(templateparamlist,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ To: - #~ ...? - def _translate_memberdef_function_type( self, resultType, target=None, **kwargs ): - methodType = self._createNode('type') - self._translate_type(resultType,target=methodType) - if methodType.hasChildNodes(): - target.appendChild(methodType) - return methodType - - #~ Translate: - #~ ... - def _translate_memberdef_function_briefdescription( self, description, target=None, **kwargs ): - result = self._translateDescription(description,target=target,**kwargs) - ## For functions if we translate the brief docs to the purpose they end up - ## right above the regular description. And since we just added the brief to that - ## on the previous line, don't bother with the repetition. - # result = self._translateDescription(description,target=target,tag='purpose',**kwargs) - return result - - #~ Translate: - #~ ... - def _translate_memberdef_function_detaileddescription( self, description, target=None, **kwargs ): - return self._translateDescription(description,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_memberdef_function_inbodydescription( self, description, target=None, **kwargs ): - return self._translateDescription(description,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_memberdef_function_param( self, param, target=None, **kwargs ): - return self._translate_param(param,target=target,**kwargs) - - #~ Translate: - #~ - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_variable( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - data_member = target.appendChild(self._createNode('data-member', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - data_member_type = data_member.appendChild(self._createNode('type')) - self._translate_type(self._getChild('type',root=memberdef),target=data_member_type) - - #~ Translate: - #~ - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_enum( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - enum = target.appendChild(self._createNode('enum', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - for n in memberdef.childNodes: - self._translateNode(memberdef,'enum',n,target=enum,scope=scope,**kwargs) - return enum - - #~ Translate: - #~ - #~ - #~ ... - #~ ... - #~ - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_enum_enumvalue( self, enumvalue, target=None, scope=None, **kwargs ): - self._setID(enumvalue.getAttribute('id'), - scope+'::'+self._getChildData('name',root=enumvalue)) - value = target.appendChild(self._createNode('enumvalue', - id=enumvalue.getAttribute('id'), - name=self._getChildData('name',root=enumvalue))) - initializer = self._getChild('initializer',root=enumvalue) - if initializer: - self._translateChildren(initializer, - target=target.appendChild(self._createNode('default'))) - return value - - #~ Translate: - #~ - #~ ... - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ ... - #~ - def _translate_param( self, param, target=None, **kwargs): - parameter = target.appendChild(self._createNode('parameter', - name=self._getChildData('declname',root=param))) - paramtype = parameter.appendChild(self._createNode('paramtype')) - self._translate_type(self._getChild('type',root=param),target=paramtype) - defval = self._getChild('defval',root=param) - if defval: - self._translateChildren(self._getChild('defval',root=param),target=parameter) - return parameter - - #~ Translate: - #~ ... - def _translate_ref( self, ref, **kwargs ): - return self._translateNode(ref,ref.getAttribute('kindref')) - - #~ Translate: - #~ ... - #~ To: - #~ ... - def _translate_ref_compound( self, ref, **kwargs ): - result = self._createNode('link',linkend=ref.getAttribute('refid')) - classname = result.appendChild(self._createNode('classname')) - self._translateChildren(ref,target=classname) - return result - - #~ Translate: - #~ ... - #~ To: - #~ ... - def _translate_ref_member( self, ref, **kwargs ): - result = self._createNode('link',linkend=ref.getAttribute('refid')) - self._translateChildren(ref,target=result) - return result - - #~ Translate: - #~ ... - def _translate_type( self, type, target=None, **kwargs ): - result = self._translateChildren(type,target=target,**kwargs) - #~ Filter types to clean up various readability problems, most notably - #~ with really long types. - xml = target.toxml('utf-8'); - if ( - xml.startswith('boost::mpl::') or - xml.startswith('BOOST_PP_') or - re.match('boost::(lazy_)?(enable|disable)_if',xml) - ): - while target.firstChild: - target.removeChild(target.firstChild) - target.appendChild(self._createText('emphasis','unspecified')) - return result - - def _getChild( self, tag = None, id = None, name = None, root = None ): - if not root: - root = self.boostbook.documentElement - for n in root.childNodes: - found = True - if tag and found: - found = found and tag == n.nodeName - if id and found: - if n.hasAttribute('id'): - found = found and n.getAttribute('id') == id - else: - found = found and n.hasAttribute('id') and n.getAttribute('id') == id - if name and found: - found = found and n.hasAttribute('name') and n.getAttribute('name') == name - if found: - #~ print '--|', n - return n - return None - - def _getChildData( self, tag, **kwargs ): - return self._getData(self._getChild(tag,**kwargs),**kwargs) - - def _getData( self, node, **kwargs ): - if node: - text = self._getChild('#text',root=node) - if text: - return text.data.strip() - return '' - - def _cppName( self, type ): - parts = re.search('^([^<]+)[<]?(.*)[>]?$',type.strip().strip(':')) - result = { - 'compoundname' : parts.group(1), - 'namespace' : parts.group(1).split('::')[0:-1], - 'name' : parts.group(1).split('::')[-1], - 'specialization' : parts.group(2) - } - if result['namespace'] and len(result['namespace']) > 0: - namespace = '::'.join(result['namespace']) - while ( - len(result['namespace']) > 0 and ( - not self.symbols.has_key(namespace) or - self.symbols[namespace]['kind'] != 'namespace') - ): - result['name'] = result['namespace'].pop()+'::'+result['name'] - namespace = '::'.join(result['namespace']) - return result - - def _createNode( self, tag, **kwargs ): - result = self.boostbook.createElement(tag) - for k in kwargs.keys(): - if kwargs[k] != '': - if k == 'id': - result.setAttribute('id',kwargs[k]) - else: - result.setAttribute(k,kwargs[k]) - return result - - def _createText( self, tag, data, **kwargs ): - result = self._createNode(tag,**kwargs) - data = data.strip() - if len(data) > 0: - result.appendChild(self.boostbook.createTextNode(data)) - return result - - -def main( xmldir=None, output=None, id=None, title=None, index=False ): - #~ print '--- main: xmldir = %s, output = %s' % (xmldir,output) - - input = glob.glob( os.path.abspath( os.path.join( xmldir, "*.xml" ) ) ) - input.sort - translator = Doxygen2BoostBook(id=id, title=title, index=index) - #~ Feed in the namespaces first to build up the set of namespaces - #~ and definitions so that lookup is unambiguous when reading in the definitions. - namespace_files = filter( - lambda x: - os.path.basename(x).startswith('namespace'), - input) - decl_files = filter( - lambda x: - not os.path.basename(x).startswith('namespace') and not os.path.basename(x).startswith('_'), - input) - for dox in namespace_files: - #~ print '--|',os.path.basename(dox) - translator.addDox(xml.dom.minidom.parse(dox)) - for dox in decl_files: - #~ print '--|',os.path.basename(dox) - translator.addDox(xml.dom.minidom.parse(dox)) - - if output: - output = open(output,'w') - else: - output = sys.stdout - if output: - output.write(translator.tostring()) - - -main( **get_args() ) diff --git a/jam-files/boost-build/tools/doxygen-config.jam b/jam-files/boost-build/tools/doxygen-config.jam deleted file mode 100644 index 2cd2ccae..00000000 --- a/jam-files/boost-build/tools/doxygen-config.jam +++ /dev/null @@ -1,11 +0,0 @@ -#~ Copyright 2005, 2006 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Doxygen tools. To use, just import this module. - -import toolset : using ; - -ECHO "warning: doxygen-config.jam is deprecated. Use 'using doxygen ;' instead." ; - -using doxygen ; diff --git a/jam-files/boost-build/tools/doxygen.jam b/jam-files/boost-build/tools/doxygen.jam deleted file mode 100644 index 8394848d..00000000 --- a/jam-files/boost-build/tools/doxygen.jam +++ /dev/null @@ -1,776 +0,0 @@ -# Copyright 2003, 2004 Douglas Gregor -# Copyright 2003, 2004, 2005 Vladimir Prus -# Copyright 2006 Rene Rivera -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines rules to handle generation of various outputs from source -# files documented with doxygen comments. The supported transformations are: -# -# * Source -> Doxygen XML -> BoostBook XML -# * Source -> Doxygen HTML -# -# The type of transformation is selected based on the target requested. For -# BoostBook XML, the default, specifying a target with an ".xml" suffix, or an -# empty suffix, will produce a .xml and .boostbook. For Doxygen -# HTML specifying a target with an ".html" suffix will produce a directory -# with the Doxygen html files, and a .html file redirecting to -# that directory. - -import "class" : new ; -import targets ; -import feature ; -import property ; -import generators ; -import boostbook ; -import type ; -import path ; -import print ; -import regex ; -import stage ; -import project ; -import xsltproc ; -import make ; -import os ; -import toolset : flags ; -import alias ; -import common ; -import modules ; -import project ; -import utility ; -import errors ; - - -# Use to specify extra configuration paramters. These get translated -# into a doxyfile which configures the building of the docs. -feature.feature doxygen:param : : free ; - -# Specify the "boost.doxygen.header.prefix" XSLT option. -feature.feature prefix : : free ; - -# Specify the "boost.doxygen.reftitle" XSLT option. -feature.feature reftitle : : free ; - -# Which processor to use for various translations from Doxygen. -feature.feature doxygen.processor : xsltproc doxproc : propagated implicit ; - -# To generate, or not, index sections. -feature.feature doxygen.doxproc.index : no yes : propagated incidental ; - -# The ID for the resulting BoostBook reference section. -feature.feature doxygen.doxproc.id : : free ; - -# The title for the resulting BoostBook reference section. -feature.feature doxygen.doxproc.title : : free ; - -# Location for images when generating XML -feature.feature doxygen:xml-imagedir : : free ; - -# Indicates whether the entire directory should be deleted -feature.feature doxygen.rmdir : off on : optional incidental ; - -# Doxygen configuration input file. -type.register DOXYFILE : doxyfile ; - -# Doxygen XML multi-file output. -type.register DOXYGEN_XML_MULTIFILE : xml-dir : XML ; - -# Doxygen XML coallesed output. -type.register DOXYGEN_XML : doxygen : XML ; - -# Doxygen HTML multifile directory. -type.register DOXYGEN_HTML_MULTIFILE : html-dir : HTML ; - -# Redirection HTML file to HTML multifile directory. -type.register DOXYGEN_HTML : : HTML ; - -type.register DOXYGEN_XML_IMAGES : doxygen-xml-images ; - -# Initialize the Doxygen module. Parameters are: -# name: the name of the 'doxygen' executable. If not specified, the name -# 'doxygen' will be used -# -rule init ( name ? ) -{ - if ! $(.initialized) - { - .initialized = true ; - - .doxproc = [ modules.binding $(__name__) ] ; - .doxproc = $(.doxproc:D)/doxproc.py ; - - generators.register-composing doxygen.headers-to-doxyfile - : H HPP CPP : DOXYFILE ; - generators.register-standard doxygen.run - : DOXYFILE : DOXYGEN_XML_MULTIFILE ; - generators.register-standard doxygen.xml-dir-to-boostbook - : DOXYGEN_XML_MULTIFILE : BOOSTBOOK : doxproc ; - generators.register-standard doxygen.xml-to-boostbook - : DOXYGEN_XML : BOOSTBOOK : xsltproc ; - generators.register-standard doxygen.collect - : DOXYGEN_XML_MULTIFILE : DOXYGEN_XML ; - generators.register-standard doxygen.run - : DOXYFILE : DOXYGEN_HTML_MULTIFILE ; - generators.register-standard doxygen.html-redirect - : DOXYGEN_HTML_MULTIFILE : DOXYGEN_HTML ; - generators.register-standard doxygen.copy-latex-pngs - : DOXYGEN_HTML : DOXYGEN_XML_IMAGES ; - - IMPORT $(__name__) : doxygen : : doxygen ; - } - - if $(name) - { - modify-config ; - .doxygen = $(name) ; - check-doxygen ; - } - - if ! $(.doxygen) - { - check-doxygen ; - } -} - -rule freeze-config ( ) -{ - if ! $(.initialized) - { - errors.user-error "doxygen must be initialized before it can be used." ; - } - if ! $(.config-frozen) - { - .config-frozen = true ; - - if [ .is-cygwin ] - { - .is-cygwin = true ; - } - } -} - -rule modify-config ( ) -{ - if $(.config-frozen) - { - errors.user-error "Cannot change doxygen after it has been used." ; - } -} - -rule check-doxygen ( ) -{ - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using doxygen ":" $(.doxygen) ; - } - local extra-paths ; - if [ os.name ] = NT - { - local ProgramFiles = [ modules.peek : ProgramFiles ] ; - if $(ProgramFiles) - { - extra-paths = "$(ProgramFiles:J= )" ; - } - else - { - extra-paths = "C:\\Program Files" ; - } - } - .doxygen = [ common.get-invocation-command doxygen : - doxygen : $(.doxygen) : $(extra-paths) ] ; -} - -rule name ( ) -{ - freeze-config ; - return $(.doxygen) ; -} - -rule .is-cygwin ( ) -{ - if [ os.on-windows ] - { - local file = [ path.make [ modules.binding $(__name__) ] ] ; - local dir = [ path.native - [ path.join [ path.parent $(file) ] doxygen ] ] ; - local command = - "cd \"$(dir)\" && \"$(.doxygen)\" windows-paths-check.doxyfile 2>&1" ; - result = [ SHELL $(command) ] ; - if [ MATCH "(Parsing file /)" : $(result) ] - { - return true ; - } - } -} - -# Runs Doxygen on the given Doxygen configuration file (the source) to generate -# the Doxygen files. The output is dumped according to the settings in the -# Doxygen configuration file, not according to the target! Because of this, we -# essentially "touch" the target file, in effect making it look like we have -# really written something useful to it. Anyone that uses this action must deal -# with this behavior. -# -actions doxygen-action -{ - $(RM) "$(*.XML)" & "$(NAME:E=doxygen)" "$(>)" && echo "Stamped" > "$(<)" -} - - -# Runs the Python doxproc XML processor. -# -actions doxproc -{ - python "$(DOXPROC)" "--xmldir=$(>)" "--output=$(<)" "$(OPTIONS)" "--id=$(ID)" "--title=$(TITLE)" -} - - -rule translate-path ( path ) -{ - freeze-config ; - if [ os.on-windows ] - { - if [ os.name ] = CYGWIN - { - if $(.is-cygwin) - { - return $(path) ; - } - else - { - return $(path:W) ; - } - } - else - { - if $(.is-cygwin) - { - match = [ MATCH ^(.):(.*) : $(path) ] ; - if $(match) - { - return /cygdrive/$(match[1])$(match[2]:T) ; - } - else - { - return $(path:T) ; - } - } - else - { - return $(path) ; - } - } - } - else - { - return $(path) ; - } -} - - -# Generates a doxygen configuration file (doxyfile) given a set of C++ sources -# and a property list that may contain features. -# -rule headers-to-doxyfile ( target : sources * : properties * ) -{ - local text "# Generated by Boost.Build version 2" ; - - local output-dir ; - - # Translate into command line flags. - for local param in [ feature.get-values : $(properties) ] - { - local namevalue = [ regex.match ([^=]*)=(.*) : $(param) ] ; - if $(namevalue[1]) = OUTPUT_DIRECTORY - { - output-dir = [ translate-path - [ utility.unquote $(namevalue[2]) ] ] ; - text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ; - } - else - { - text += "$(namevalue[1]) = $(namevalue[2])" ; - } - } - - if ! $(output-dir) - { - output-dir = [ translate-path [ on $(target) return $(LOCATE) ] ] ; - text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ; - } - - local headers = ; - for local header in $(sources:G=) - { - header = [ translate-path $(header) ] ; - headers += \"$(header)\" ; - } - - # Doxygen generates LaTex by default. So disable it unconditionally, or at - # least until someone needs, and hence writes support for, LaTex output. - text += "GENERATE_LATEX = NO" ; - text += "INPUT = $(headers:J= )" ; - print.output $(target) plain ; - print.text $(text) : true ; -} - - -# Run Doxygen. See doxygen-action for a description of the strange properties of -# this rule. -# -rule run ( target : source : properties * ) -{ - freeze-config ; - if on in $(properties) - { - local output-dir = - [ path.make - [ MATCH OUTPUT_DIRECTORY=\"?([^\"]*) : - $(properties) ] ] ; - local html-dir = - [ path.make - [ MATCH HTML_OUTPUT=(.*) : - $(properties) ] ] ; - if $(output-dir) && $(html-dir) && - [ path.glob $(output-dir) : $(html-dir) ] - { - HTMLDIR on $(target) = - [ path.native [ path.join $(output-dir) $(html-dir) ] ] ; - rm-htmldir $(target) ; - } - } - doxygen-action $(target) : $(source) ; - NAME on $(target) = $(.doxygen) ; - RM on $(target) = [ modules.peek common : RM ] ; - *.XML on $(target) = - [ path.native - [ path.join - [ path.make [ on $(target) return $(LOCATE) ] ] - $(target:B:S=) - *.xml ] ] ; -} - -if [ os.name ] = NT -{ - RMDIR = rmdir /s /q ; -} -else -{ - RMDIR = rm -rf ; -} - -actions quietly rm-htmldir -{ - $(RMDIR) $(HTMLDIR) -} - -# The rules below require Boost.Book stylesheets, so we need some code to check -# that the boostbook module has actualy been initialized. -# -rule check-boostbook ( ) -{ - if ! [ modules.peek boostbook : .initialized ] - { - ECHO "error: the boostbook module is not initialized" ; - ECHO "error: you've attempted to use the 'doxygen' toolset, " ; - ECHO "error: which requires Boost.Book," ; - ECHO "error: but never initialized Boost.Book." ; - EXIT "error: Hint: add 'using boostbook ;' to your user-config.jam" ; - } -} - - -# Collect the set of Doxygen XML files into a single XML source file that can be -# handled by an XSLT processor. The source is completely ignored (see -# doxygen-action), because this action picks up the Doxygen XML index file -# xml/index.xml. This is because we can not teach Doxygen to act like a NORMAL -# program and take a "-o output.xml" argument (grrrr). The target of the -# collection will be a single Doxygen XML file. -# -rule collect ( target : source : properties * ) -{ - check-boostbook ; - local collect-xsl-dir - = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen collect ] ] ; - local source-path - = [ path.make [ on $(source) return $(LOCATE) ] ] ; - local collect-path - = [ path.root [ path.join $(source-path) $(source:B) ] [ path.pwd ] ] ; - local native-path - = [ path.native $(collect-path) ] ; - local real-source - = [ path.native [ path.join $(collect-path) index.xml ] ] ; - xsltproc.xslt $(target) : $(real-source) $(collect-xsl-dir:S=.xsl) - : doxygen.xml.path=$(native-path) ; -} - - -# Translate Doxygen XML into BoostBook. -# -rule xml-to-boostbook ( target : source : properties * ) -{ - check-boostbook ; - local xsl-dir = [ boostbook.xsl-dir ] ; - local d2b-xsl = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen - doxygen2boostbook.xsl ] ] ; - - local xslt-properties = $(properties) ; - for local prefix in [ feature.get-values : $(properties) ] - { - xslt-properties += "boost.doxygen.header.prefix=$(prefix)" ; - } - for local title in [ feature.get-values : $(properties) ] - { - xslt-properties += "boost.doxygen.reftitle=$(title)" ; - } - - xsltproc.xslt $(target) : $(source) $(d2b-xsl) : $(xslt-properties) ; -} - - -flags doxygen.xml-dir-to-boostbook OPTIONS yes : --enable-index ; -flags doxygen.xml-dir-to-boostbook ID ; -flags doxygen.xml-dir-to-boostbook TITLE ; - - -rule xml-dir-to-boostbook ( target : source : properties * ) -{ - DOXPROC on $(target) = $(.doxproc) ; - - LOCATE on $(source:S=) = [ on $(source) return $(LOCATE) ] ; - - doxygen.doxproc $(target) : $(source:S=) ; -} - - -# Generate the HTML redirect to HTML dir index.html file. -# -rule html-redirect ( target : source : properties * ) -{ - local uri = "$(target:B)/index.html" ; - print.output $(target) plain ; - print.text -" - - - - - - - - - Automatic redirection failed, please go to $(uri). - - -" - : true ; -} - -rule copy-latex-pngs ( target : source : requirements * ) -{ - local directory = [ path.native - [ feature.get-values : - $(requirements) ] ] ; - - local location = [ on $(target) return $(LOCATE) ] ; - - local pdf-location = - [ path.native - [ path.join - [ path.make $(location) ] - [ path.make $(directory) ] ] ] ; - local html-location = - [ path.native - [ path.join - . - html - [ path.make $(directory) ] ] ] ; - - common.MkDir $(pdf-location) ; - common.MkDir $(html-location) ; - - DEPENDS $(target) : $(pdf-location) $(html-location) ; - - if [ os.name ] = NT - { - CP on $(target) = copy /y ; - FROM on $(target) = \\*.png ; - TOHTML on $(target) = .\\html\\$(directory) ; - TOPDF on $(target) = \\$(directory) ; - } - else - { - CP on $(target) = cp ; - FROM on $(target) = /*.png ; - TOHTML on $(target) = ./html/$(directory) ; - TOPDF on $(target) = $(target:D)/$(directory) ; - } -} - -actions copy-latex-pngs -{ - $(CP) $(>:S=)$(FROM) $(TOHTML) - $(CP) $(>:S=)$(FROM) $(<:D)$(TOPDF) - echo "Stamped" > "$(<)" -} - -# building latex images for doxygen XML depends -# on latex, dvips, and ps being in your PATH. -# This is true for most Unix installs, but -# not on Win32, where you will need to install -# MkTex and Ghostscript and add these tools -# to your path. - -actions check-latex -{ - latex -version >$(<) -} - -actions check-dvips -{ - dvips -version >$(<) -} - -if [ os.name ] = "NT" -{ - actions check-gs - { - gswin32c -version >$(<) - } -} -else -{ - actions check-gs - { - gs -version >$(<) - } -} - -rule check-tools ( ) -{ - if ! $(.check-tools-targets) - { - # Find the root project. - local root-project = [ project.current ] ; - root-project = [ $(root-project).project-module ] ; - while - [ project.attribute $(root-project) parent-module ] && - [ project.attribute $(root-project) parent-module ] != user-config - { - root-project = - [ project.attribute $(root-project) parent-module ] ; - } - - .latex.check = [ new file-target latex.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-latex ] - : - ] ; - .dvips.check = [ new file-target dvips.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-dvips ] - : - ] ; - .gs.check = [ new file-target gs.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-gs ] - : - ] ; - .check-tools-targets = $(.latex.check) $(.dvips.check) $(.gs.check) ; - } - return $(.check-tools-targets) ; -} - -project.initialize $(__name__) ; -project doxygen ; - -class doxygen-check-tools-target-class : basic-target -{ - import doxygen ; - rule construct ( name : sources * : property-set ) - { - return [ property-set.empty ] [ doxygen.check-tools ] ; - } -} - -local project = [ project.current ] ; - -targets.main-target-alternative - [ new doxygen-check-tools-target-class check-tools : $(project) - : [ targets.main-target-sources : check-tools : no-renaming ] - : [ targets.main-target-requirements : $(project) ] - : [ targets.main-target-default-build : $(project) ] - : [ targets.main-target-usage-requirements : $(project) ] - ] ; - -# User-level rule to generate BoostBook XML from a set of headers via Doxygen. -# -rule doxygen ( target : sources * : requirements * : default-build * : usage-requirements * ) -{ - freeze-config ; - local project = [ project.current ] ; - - if $(target:S) = .html - { - # Build an HTML directory from the sources. - local html-location = [ feature.get-values : $(requirements) ] ; - local output-dir ; - if [ $(project).get build-dir ] - { - # Explicitly specified build dir. Add html at the end. - output-dir = [ path.join [ $(project).build-dir ] $(html-location:E=html) ] ; - } - else - { - # Trim 'bin' from implicit build dir, for no other reason that backward - # compatibility. - output-dir = [ path.join [ path.parent [ $(project).build-dir ] ] - $(html-location:E=html) ] ; - } - output-dir = [ path.root $(output-dir) [ path.pwd ] ] ; - local output-dir-native = [ path.native $(output-dir) ] ; - requirements = [ property.change $(requirements) : ] ; - - ## The doxygen configuration file. - targets.main-target-alternative - [ new typed-target $(target:S=.tag) : $(project) : DOXYFILE - : [ targets.main-target-sources $(sources) : $(target:S=.tag) ] - : [ targets.main-target-requirements $(requirements) - GENERATE_HTML=YES - GENERATE_XML=NO - "OUTPUT_DIRECTORY=\"$(output-dir-native)\"" - HTML_OUTPUT=$(target:B) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.tag) ; - - ## The html directory to generate by running doxygen. - targets.main-target-alternative - [ new typed-target $(target:S=.dir) : $(project) : DOXYGEN_HTML_MULTIFILE - : $(target:S=.tag) - : [ targets.main-target-requirements $(requirements) - "OUTPUT_DIRECTORY=\"$(output-dir-native)\"" - HTML_OUTPUT=$(target:B) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.dir) ; - - ## The redirect html file into the generated html. - targets.main-target-alternative - [ new typed-target $(target) : $(project) : DOXYGEN_HTML - : $(target:S=.dir) - : [ targets.main-target-requirements $(requirements) - $(output-dir) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - } - else - { - # Build a BoostBook XML file from the sources. - local location-xml = [ feature.get-values : $(requirements) ] ; - requirements = [ property.change $(requirements) : ] ; - local target-xml = $(target:B=$(target:B)-xml) ; - - # Check whether we need to build images - local images-location = - [ feature.get-values : $(requirements) ] ; - if $(images-location) - { - doxygen $(target).doxygen-xml-images.html : $(sources) - : $(requirements) - on - QUIET=YES - WARNINGS=NO - WARN_IF_UNDOCUMENTED=NO - /doxygen//check-tools ; - $(project).mark-target-as-explicit - $(target).doxygen-xml-images.html ; - - targets.main-target-alternative - [ new typed-target $(target).doxygen-xml-images - : $(project) : DOXYGEN_XML_IMAGES - : $(target).doxygen-xml-images.html - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) - : $(project) ] - ] ; - - $(project).mark-target-as-explicit - $(target).doxygen-xml-images ; - - if ! [ regex.match "^(.*/)$" : $(images-location) ] - { - images-location = $(images-location)/ ; - } - - requirements += - $(target).doxygen-xml-images - boost.doxygen.formuladir=$(images-location) ; - } - - ## The doxygen configuration file. - targets.main-target-alternative - [ new typed-target $(target-xml:S=.tag) : $(project) : DOXYFILE - : [ targets.main-target-sources $(sources) : $(target-xml:S=.tag) ] - : [ targets.main-target-requirements $(requirements) - GENERATE_HTML=NO - GENERATE_XML=YES - XML_OUTPUT=$(target-xml) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml:S=.tag) ; - - ## The Doxygen XML directory of the processed source files. - targets.main-target-alternative - [ new typed-target $(target-xml:S=.dir) : $(project) : DOXYGEN_XML_MULTIFILE - : $(target-xml:S=.tag) - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml:S=.dir) ; - - ## The resulting BoostBook file is generated by the processor tool. The - ## tool can be either the xsltproc plus accompanying XSL scripts. Or it - ## can be the python doxproc.py script. - targets.main-target-alternative - [ new typed-target $(target-xml) : $(project) : BOOSTBOOK - : $(target-xml:S=.dir) - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml) ; - - targets.main-target-alternative - [ new install-target-class $(target:S=.xml) : $(project) - : $(target-xml) - : [ targets.main-target-requirements $(requirements) - $(location-xml:E=.) - $(target:S=.xml) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.xml) ; - - targets.main-target-alternative - [ new alias-target-class $(target) : $(project) - : - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) - $(target:S=.xml) - : $(project) ] - ] ; - } -} diff --git a/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile b/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile deleted file mode 100644 index 9b969df9..00000000 --- a/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile +++ /dev/null @@ -1,3 +0,0 @@ -INPUT = windows-paths-check.hpp -GENERATE_HTML = NO -GENERATE_LATEX = NO diff --git a/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp b/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp deleted file mode 100644 index e69de29b..00000000 diff --git a/jam-files/boost-build/tools/fop.jam b/jam-files/boost-build/tools/fop.jam deleted file mode 100644 index c24b8725..00000000 --- a/jam-files/boost-build/tools/fop.jam +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2003-2004 Doug Gregor and Dave Abrahams. Distributed -# under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) -# -# This module defines rules to handle generation of PDF and -# PostScript files from XSL Formatting Objects via Apache FOP - -import generators ; -import common ; -import boostbook ; - -generators.register-standard fop.render.pdf : FO : PDF ; -generators.register-standard fop.render.ps : FO : PS ; - -# Initializes the fop toolset. -# -rule init ( fop-command ? : java-home ? : java ? ) -{ - local has-command = $(.has-command) ; - - if $(fop-command) - { - .has-command = true ; - } - - if $(fop-command) || ! $(has-command) - { - fop-command = [ common.get-invocation-command fop : fop : $(fop-command) - : [ modules.peek : FOP_DIR ] ] ; - } - - if $(fop-command) - { - .FOP_COMMAND = $(fop-command) ; - } - - if $(java-home) || $(java) - { - .FOP_SETUP = ; - - - # JAVA_HOME is the location that java was installed to. - - if $(java-home) - { - .FOP_SETUP += [ common.variable-setting-command JAVA_HOME : $(java-home) ] ; - } - - # JAVACMD is the location that of the java executable, useful for a - # non-standard java installation, where the executable isn't at - # $JAVA_HOME/bin/java. - - if $(java) - { - .FOP_SETUP += [ common.variable-setting-command JAVACMD : $(java) ] ; - } - } -} - -actions render.pdf -{ - $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) $(<) -} - -actions render.ps -{ - $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) -ps $(<) -} diff --git a/jam-files/boost-build/tools/fortran.jam b/jam-files/boost-build/tools/fortran.jam deleted file mode 100644 index 37665825..00000000 --- a/jam-files/boost-build/tools/fortran.jam +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# -# This file contains common settings for all fortran tools -# - -import "class" : new ; -import feature : feature ; - -import type ; -import generators ; -import common ; - -type.register FORTRAN : f F for f77 ; -type.register FORTRAN90 : f90 F90 ; - -feature fortran : : free ; -feature fortran90 : : free ; - -class fortran-compiling-generator : generator -{ - rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * ) - { - generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ; - } -} - -rule register-fortran-compiler ( id : source-types + : target-types + : requirements * : optional-properties * ) -{ - local g = [ new fortran-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ; - generators.register $(g) ; -} - -class fortran90-compiling-generator : generator -{ - rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * ) - { - generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ; - } -} - -rule register-fortran90-compiler ( id : source-types + : target-types + : requirements * : optional-properties * ) -{ - local g = [ new fortran90-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ; - generators.register $(g) ; -} - -# FIXME: this is ugly, should find a better way (we'd want client code to -# register all generators as "generator.some-rule", not with "some-module.some-rule".) -IMPORT $(__name__) : register-fortran-compiler : : generators.register-fortran-compiler ; -IMPORT $(__name__) : register-fortran90-compiler : : generators.register-fortran90-compiler ; diff --git a/jam-files/boost-build/tools/gcc.jam b/jam-files/boost-build/tools/gcc.jam deleted file mode 100644 index f7b0da54..00000000 --- a/jam-files/boost-build/tools/gcc.jam +++ /dev/null @@ -1,1185 +0,0 @@ -# Copyright 2001 David Abrahams. -# Copyright 2002-2006 Rene Rivera. -# Copyright 2002-2003 Vladimir Prus. -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov. -# Copyright 2007 Roland Schwarz -# Copyright 2007 Boris Gubenko. -# -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import pch ; -import property ; -import property-set ; -import toolset ; -import type ; -import rc ; -import regex ; -import set ; -import unix ; -import fortran ; - - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - - -feature.extend toolset : gcc ; -# feature.subfeature toolset gcc : flavor : : optional ; - -toolset.inherit-generators gcc : unix : unix.link unix.link.dll ; -toolset.inherit-flags gcc : unix ; -toolset.inherit-rules gcc : unix ; - -generators.override gcc.prebuilt : builtin.prebuilt ; -generators.override gcc.searched-lib-generator : searched-lib-generator ; - -# Make gcc toolset object files use the "o" suffix on all platforms. -type.set-generated-target-suffix OBJ : gcc : o ; -type.set-generated-target-suffix OBJ : gcc windows : o ; -type.set-generated-target-suffix OBJ : gcc cygwin : o ; - -# Initializes the gcc toolset for the given version. If necessary, command may -# be used to specify where the compiler is located. The parameter 'options' is a -# space-delimited list of options, each one specified as -# option-value. Valid option names are: cxxflags, linkflags and -# linker-type. Accepted linker-type values are aix, darwin, gnu, hpux, osf or -# sun and the default value will be selected based on the current OS. -# Example: -# using gcc : 3.4 : : foo bar sun ; -# -# The compiler command to use is detected in a three step manner: -# 1) If an explicit command is specified by the user, it will be used and must available. -# 2) If only a certain version is specified, it is enforced: -# - either a command 'g++-VERSION' must be available -# - or the default command 'g++' must be available and match the exact version. -# 3) Without user-provided restrictions use default 'g++' -rule init ( version ? : command * : options * ) -{ - #1): use user-provided command - local tool-command = ; - if $(command) - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ : $(command) ] ; - if ! $(tool-command) - { - errors.error "toolset gcc initialization:" : - "provided command '$(command)' not found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - #2): enforce user-provided version - else if $(version) - { - tool-command = [ common.get-invocation-command-nodefault gcc : "g++-$(version[1])" ] ; - - #2.1) fallback: check whether "g++" reports the requested version - if ! $(tool-command) - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ ] ; - if $(tool-command) - { - local tool-command-string = $(tool-command:J=" ") ; - local tool-version = [ MATCH "^([0-9.]+)" : [ SHELL "$(tool-command-string) -dumpversion" ] ] ; - if $(tool-version) != $(version) - { - # Permit a match betwen two-digit version specified by the user - # (e.g. 4.4) and 3-digit version reported by gcc. - # Since only two digits are present in binary name anyway, - # insisting that user specify 3-digit version when - # configuring Boost.Build while it's not required on - # command like would be strange. - local stripped = [ MATCH "^([0-9]+\.[0-9]+).*" : $(tool-version) ] ; - if $(stripped) != $(version) - { - errors.error "toolset gcc initialization:" : - "version '$(version)' requested but 'g++-$(version)' not found and version '$(tool-version)' of default '$(tool-command)' does not match" : - "initialized from" [ errors.nearest-user-location ] ; - tool-command = ; - } - # Use full 3-digit version to be compatible with the 'using gcc ;' case - version = $(tool-version) ; - } - } - else - { - errors.error "toolset gcc initialization:" : - "version '$(version)' requested but neither 'g++-$(version)' nor default 'g++' found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - } - #3) default: no command and no version specified, try using default command "g++" - else - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ ] ; - if ! $(tool-command) - { - errors.error "toolset gcc initialization:" : - "no command provided, default command 'g++' not found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - - - # Information about the gcc command... - # The command. - local command = $(tool-command) ; - # The root directory of the tool install. - local root = [ feature.get-values : $(options) ] ; - # The bin directory where to find the command to execute. - local bin ; - # The flavor of compiler. - local flavor = [ feature.get-values : $(options) ] ; - # Autodetect the root and bin dir if not given. - if $(command) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - } - # The 'command' variable can have multiple elements. When calling - # the SHELL builtin we need a single string. - local command-string = $(command:J=" ") ; - # Autodetect the version and flavor if not given. - if $(command) - { - local machine = [ MATCH "^([^ ]+)" - : [ SHELL "$(command-string) -dumpmachine" ] ] ; - version ?= [ MATCH "^([0-9.]+)" - : [ SHELL "$(command-string) -dumpversion" ] ] ; - switch $(machine:L) - { - case *mingw* : flavor ?= mingw ; - } - } - - local condition ; - if $(flavor) - { - condition = [ common.check-init-parameters gcc - : version $(version) - : flavor $(flavor) - ] ; - } - else - { - condition = [ common.check-init-parameters gcc - : version $(version) - ] ; - condition = $(condition) ; #/ ; - } - - common.handle-options gcc : $(condition) : $(command) : $(options) ; - - local linker = [ feature.get-values : $(options) ] ; - # The logic below should actually be keyed on - if ! $(linker) - { - if [ os.name ] = OSF - { - linker = osf ; - } - else if [ os.name ] = HPUX - { - linker = hpux ; - } - else if [ os.name ] = AIX - { - linker = aix ; - } - else if [ os.name ] = SOLARIS - { - linker = sun ; - } - else - { - linker = gnu ; - } - } - init-link-flags gcc $(linker) $(condition) ; - - - # If gcc is installed in non-standard location, we'd need to add - # LD_LIBRARY_PATH when running programs created with it (for unit-test/run - # rules). - if $(command) - { - # On multilib 64-bit boxes, there are both 32-bit and 64-bit libraries - # and all must be added to LD_LIBRARY_PATH. The linker will pick the - # right onces. Note that we don't provide a clean way to build 32-bit - # binary with 64-bit compiler, but user can always pass -m32 manually. - local lib_path = $(root)/bin $(root)/lib $(root)/lib32 $(root)/lib64 ; - if $(.debug-configuration) - { - ECHO notice: using gcc libraries :: $(condition) :: $(lib_path) ; - } - toolset.flags gcc.link RUN_PATH $(condition) : $(lib_path) ; - } - - # If it's not a system gcc install we should adjust the various programs as - # needed to prefer using the install specific versions. This is essential - # for correct use of MinGW and for cross-compiling. - - local nl = " -" ; - - # - The archive builder. - local archiver = [ common.get-invocation-command gcc - : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" : [ SHELL "$(command-string) -print-prog-name=ar" ] ] ] - : [ feature.get-values : $(options) ] - : $(bin) - : search-path ] ; - toolset.flags gcc.archive .AR $(condition) : $(archiver[1]) ; - if $(.debug-configuration) - { - ECHO notice: using gcc archiver :: $(condition) :: $(archiver[1]) ; - } - - # - Ranlib - local ranlib = [ common.get-invocation-command gcc - : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" : [ SHELL "$(command-string) -print-prog-name=ranlib" ] ] ] - : [ feature.get-values : $(options) ] - : $(bin) - : search-path ] ; - toolset.flags gcc.archive .RANLIB $(condition) : $(ranlib[1]) ; - if $(.debug-configuration) - { - ECHO notice: using gcc ranlib :: $(condition) :: $(ranlib[1]) ; - } - - - # - The resource compiler. - local rc = - [ common.get-invocation-command-nodefault gcc - : windres : [ feature.get-values : $(options) ] : $(bin) : search-path ] ; - local rc-type = - [ feature.get-values : $(options) ] ; - rc-type ?= windres ; - if ! $(rc) - { - # If we can't find an RC compiler we fallback to a null RC compiler that - # creates empty object files. This allows the same Jamfiles to work - # across the board. The null RC uses the assembler to create the empty - # objects, so configure that. - rc = [ common.get-invocation-command gcc : as : : $(bin) : search-path ] ; - rc-type = null ; - } - rc.configure $(rc) : $(condition) : $(rc-type) ; -} - -if [ os.name ] = NT -{ - # This causes single-line command invocation to not go through .bat files, - # thus avoiding command-line length limitations. - JAMSHELL = % ; -} - -generators.register-c-compiler gcc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : gcc ; -generators.register-c-compiler gcc.compile.c.preprocess : C : PREPROCESSED_C : gcc ; -generators.register-c-compiler gcc.compile.c++ : CPP : OBJ : gcc ; -generators.register-c-compiler gcc.compile.c : C : OBJ : gcc ; -generators.register-c-compiler gcc.compile.asm : ASM : OBJ : gcc ; -generators.register-fortran-compiler gcc.compile.fortran : FORTRAN FORTRAN90 : OBJ : gcc ; - -# pch support - -# The compiler looks for a precompiled header in each directory just before it -# looks for the include file in that directory. The name searched for is the -# name specified in the #include directive with ".gch" suffix appended. The -# logic in gcc-pch-generator will make sure that BASE_PCH suffix is appended to -# full name of the header. - -type.set-generated-target-suffix PCH : gcc : gch ; - -# GCC-specific pch generator. -class gcc-pch-generator : pch-generator -{ - import project ; - import property-set ; - import type ; - - rule run-pch ( project name ? : property-set : sources + ) - { - # Find the header in sources. Ignore any CPP sources. - local header ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] H ] - { - header = $(s) ; - } - } - - # Error handling: Base header file name should be the same as the base - # precompiled header name. - local header-name = [ $(header).name ] ; - local header-basename = $(header-name:B) ; - if $(header-basename) != $(name) - { - local location = [ $(project).project-module ] ; - errors.user-error "in" $(location)": pch target name `"$(name)"' should be the same as the base name of header file `"$(header-name)"'" ; - } - - local pch-file = [ generator.run $(project) $(name) : $(property-set) - : $(header) ] ; - - # return result of base class and pch-file property as usage-requirements - return - [ property-set.create $(pch-file) -Winvalid-pch ] - $(pch-file) - ; - } - - # Calls the base version specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - rule generated-targets ( sources + : property-set : project name ? ) - { - name = [ $(sources[1]).name ] ; - return [ generator.generated-targets $(sources) - : $(property-set) : $(project) $(name) ] ; - } -} - -# Note: the 'H' source type will catch both '.h' header and '.hpp' header. The -# latter have HPP type, but HPP type is derived from H. The type of compilation -# is determined entirely by the destination type. -generators.register [ new gcc-pch-generator gcc.compile.c.pch : H : C_PCH : on gcc ] ; -generators.register [ new gcc-pch-generator gcc.compile.c++.pch : H : CPP_PCH : on gcc ] ; - -# Override default do-nothing generators. -generators.override gcc.compile.c.pch : pch.default-c-pch-generator ; -generators.override gcc.compile.c++.pch : pch.default-cpp-pch-generator ; - -toolset.flags gcc.compile PCH_FILE on : ; - -# Declare flags and action for compilation. -toolset.flags gcc.compile OPTIONS off : -O0 ; -toolset.flags gcc.compile OPTIONS speed : -O3 ; -toolset.flags gcc.compile OPTIONS space : -Os ; - -toolset.flags gcc.compile OPTIONS off : -fno-inline ; -toolset.flags gcc.compile OPTIONS on : -Wno-inline ; -toolset.flags gcc.compile OPTIONS full : -finline-functions -Wno-inline ; - -toolset.flags gcc.compile OPTIONS off : -w ; -toolset.flags gcc.compile OPTIONS on : -Wall ; -toolset.flags gcc.compile OPTIONS all : -Wall -pedantic ; -toolset.flags gcc.compile OPTIONS on : -Werror ; - -toolset.flags gcc.compile OPTIONS on : -g ; -toolset.flags gcc.compile OPTIONS on : -pg ; -toolset.flags gcc.compile OPTIONS off : -fno-rtti ; - -rule setup-fpic ( targets * : sources * : properties * ) -{ - local link = [ feature.get-values link : $(properties) ] ; - if $(link) = shared - { - local target = [ feature.get-values target-os : $(properties) ] ; - - # This logic will add -fPIC for all compilations: - # - # lib a : a.cpp b ; - # obj b : b.cpp ; - # exe c : c.cpp a d ; - # obj d : d.cpp ; - # - # This all is fine, except that 'd' will be compiled with -fPIC even though - # it is not needed, as 'd' is used only in exe. However, it is hard to - # detect where a target is going to be used. Alternatively, we can set -fPIC - # only when main target type is LIB but than 'b' would be compiled without - # -fPIC which would lead to link errors on x86-64. So, compile everything - # with -fPIC. - # - # Yet another alternative would be to create a propagated - # feature and set it when building shared libraries, but that would be hard - # to implement and would increase the target path length even more. - - # On Windows, fPIC is default, specifying -fPIC explicitly leads to - # a warning. - if $(target) != cygwin && $(target) != windows - { - OPTIONS on $(targets) += -fPIC ; - } - } -} - -rule setup-address-model ( targets * : sources * : properties * ) -{ - local model = [ feature.get-values address-model : $(properties) ] ; - if $(model) - { - local option ; - local os = [ feature.get-values target-os : $(properties) ] ; - if $(os) = aix - { - if $(model) = 32 - { - option = -maix32 ; - } - else - { - option = -maix64 ; - } - } - else if $(os) = hpux - { - if $(model) = 32 - { - option = -milp32 ; - } - else - { - option = -mlp64 ; - } - } - else - { - if $(model) = 32 - { - option = -m32 ; - } - else if $(model) = 64 - { - option = -m64 ; - } - # For darwin, the model can be 32_64. darwin.jam will handle that - # on its own. - } - OPTIONS on $(targets) += $(option) ; - } -} - - -# FIXME: this should not use os.name. -if [ os.name ] != NT && [ os.name ] != OSF && [ os.name ] != HPUX && [ os.name ] != AIX -{ - # OSF does have an option called -soname but it does not seem to work as - # expected, therefore it has been disabled. - HAVE_SONAME = "" ; - SONAME_OPTION = -h ; -} - -# HPUX, for some reason, seem to use '+h', not '-h'. -if [ os.name ] = HPUX -{ - HAVE_SONAME = "" ; - SONAME_OPTION = +h ; -} - -toolset.flags gcc.compile USER_OPTIONS ; -toolset.flags gcc.compile.c++ USER_OPTIONS ; -toolset.flags gcc.compile DEFINES ; -toolset.flags gcc.compile INCLUDES ; -toolset.flags gcc.compile.c++ TEMPLATE_DEPTH ; -toolset.flags gcc.compile.fortran USER_OPTIONS ; - -rule compile.c++.pch ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c++.pch -{ - "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c.pch ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c.pch -{ - "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c++.preprocess ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C - { - LANG on $(<) = "-x c++" ; - } - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.c.preprocess ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - LANG on $(<) = "-x c" ; - #} - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.c++ ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C - { - LANG on $(<) = "-x c++" ; - } - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; - - # Here we want to raise the template-depth parameter value to something - # higher than the default value of 17. Note that we could do this using the - # feature.set-default rule but we do not want to set the default value for - # all toolsets as well. - # - # TODO: This 'modified default' has been inherited from some 'older Boost - # Build implementation' and has most likely been added to make some Boost - # library parts compile correctly. We should see what exactly prompted this - # and whether we can get around the problem more locally. - local template-depth = [ on $(<) return $(TEMPLATE_DEPTH) ] ; - if ! $(template-depth) - { - TEMPLATE_DEPTH on $(<) = 128 ; - } -} - -rule compile.c ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - LANG on $(<) = "-x c" ; - #} - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.fortran ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c++ bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<:W)" "$(>:W)" -} - -actions compile.c bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++.preprocess bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>:W)" -E >"$(<:W)" -} - -actions compile.c.preprocess bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>)" -E >$(<) -} - -actions compile.fortran -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.asm ( targets * : sources * : properties * ) -{ - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - LANG on $(<) = "-x assembler-with-cpp" ; -} - -actions compile.asm -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# The class which check that we don't try to use the static -# property while creating or using shared library, since it's not supported by -# gcc/libc. -class gcc-linking-generator : unix-linking-generator -{ - rule run ( project name ? : property-set : sources + ) - { - # TODO: Replace this with the use of a target-os property. - local no-static-link = ; - if [ modules.peek : UNIX ] - { - switch [ modules.peek : JAMUNAME ] - { - case * : no-static-link = true ; - } - } - - local properties = [ $(property-set).raw ] ; - local reason ; - if $(no-static-link) && static in $(properties) - { - if shared in $(properties) - { - reason = - "On gcc, DLL can't be build with 'static'." ; - } - else if [ type.is-derived $(self.target-types[1]) EXE ] - { - for local s in $(sources) - { - local type = [ $(s).type ] ; - if $(type) && [ type.is-derived $(type) SHARED_LIB ] - { - reason = - "On gcc, using DLLS together with the" - "static options is not possible " ; - } - } - } - } - if $(reason) - { - ECHO warning: - $(reason) ; - ECHO warning: - "It is suggested to use 'static' together" - "with 'static'." ; - return ; - } - else - { - local generated-targets = [ unix-linking-generator.run $(project) - $(name) : $(property-set) : $(sources) ] ; - return $(generated-targets) ; - } - } -} - -# The set of permissible input types is different on mingw. -# So, define two sets of generators, with mingw generators -# selected when target-os=windows. - -local g ; -g = [ new gcc-linking-generator gcc.mingw.link - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : EXE - : gcc windows ] ; -$(g).set-rule-name gcc.link ; -generators.register $(g) ; - -g = [ new gcc-linking-generator gcc.mingw.link.dll - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : IMPORT_LIB SHARED_LIB - : gcc windows ] ; -$(g).set-rule-name gcc.link.dll ; -generators.register $(g) ; - -generators.register - [ new gcc-linking-generator gcc.link - : LIB OBJ - : EXE - : gcc ] ; -generators.register - [ new gcc-linking-generator gcc.link.dll - : LIB OBJ - : SHARED_LIB - : gcc ] ; - -generators.override gcc.mingw.link : gcc.link ; -generators.override gcc.mingw.link.dll : gcc.link.dll ; - -# Cygwin is similar to msvc and mingw in that it uses import libraries. -# While in simple cases, it can directly link to a shared library, -# it is believed to be slower, and not always possible. Define cygwin-specific -# generators here. - -g = [ new gcc-linking-generator gcc.cygwin.link - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : EXE - : gcc cygwin ] ; -$(g).set-rule-name gcc.link ; -generators.register $(g) ; - -g = [ new gcc-linking-generator gcc.cygwin.link.dll - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : IMPORT_LIB SHARED_LIB - : gcc cygwin ] ; -$(g).set-rule-name gcc.link.dll ; -generators.register $(g) ; - -generators.override gcc.cygwin.link : gcc.link ; -generators.override gcc.cygwin.link.dll : gcc.link.dll ; - -# Declare flags for linking. -# First, the common flags. -toolset.flags gcc.link OPTIONS on : -g ; -toolset.flags gcc.link OPTIONS on : -pg ; -toolset.flags gcc.link USER_OPTIONS ; -toolset.flags gcc.link LINKPATH ; -toolset.flags gcc.link FINDLIBS-ST ; -toolset.flags gcc.link FINDLIBS-SA ; -toolset.flags gcc.link LIBRARIES ; - -toolset.flags gcc.link.dll .IMPLIB-COMMAND windows : "-Wl,--out-implib," ; -toolset.flags gcc.link.dll .IMPLIB-COMMAND cygwin : "-Wl,--out-implib," ; - -# For static we made sure there are no dynamic libraries in the -# link. On HP-UX not all system libraries exist as archived libraries (for -# example, there is no libunwind.a), so, on this platform, the -static option -# cannot be specified. -if [ os.name ] != HPUX -{ - toolset.flags gcc.link OPTIONS static : -static ; -} - -# Now, the vendor specific flags. -# The parameter linker can be either aix, darwin, gnu, hpux, osf or sun. -rule init-link-flags ( toolset linker condition ) -{ - switch $(linker) - { - case aix : - { - # - # On AIX we *have* to use the native linker. - # - # Using -brtl, the AIX linker will look for libraries with both the .a - # and .so extensions, such as libfoo.a and libfoo.so. Without -brtl, the - # AIX linker looks only for libfoo.a. Note that libfoo.a is an archived - # file that may contain shared objects and is different from static libs - # as on Linux. - # - # The -bnoipath strips the prepending (relative) path of libraries from - # the loader section in the target library or executable. Hence, during - # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded - # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without - # this option, the prepending (relative) path + library name is - # hard-coded in the loader section, causing *only* this path to be - # searched during load-time. Note that the AIX linker does not have an - # -soname equivalent, this is as close as it gets. - # - # The above options are definately for AIX 5.x, and most likely also for - # AIX 4.x and AIX 6.x. For details about the AIX linker see: - # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf - # - - toolset.flags $(toolset).link OPTIONS : -Wl,-brtl -Wl,-bnoipath - : unchecked ; - } - - case darwin : - { - # On Darwin, the -s option to ld does not work unless we pass -static, - # and passing -static unconditionally is a bad idea. So, don't pass -s. - # at all, darwin.jam will use separate 'strip' invocation. - toolset.flags $(toolset).link RPATH $(condition) : : unchecked ; - toolset.flags $(toolset).link RPATH_LINK $(condition) : : unchecked ; - } - - case gnu : - { - # Strip the binary when no debugging is needed. We use --strip-all flag - # as opposed to -s since icc (intel's compiler) is generally - # option-compatible with and inherits from the gcc toolset, but does not - # support -s. - toolset.flags $(toolset).link OPTIONS $(condition)/on : -Wl,--strip-all : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : : unchecked ; - toolset.flags $(toolset).link RPATH_LINK $(condition) : : unchecked ; - toolset.flags $(toolset).link START-GROUP $(condition) : -Wl,--start-group : unchecked ; - toolset.flags $(toolset).link END-GROUP $(condition) : -Wl,--end-group : unchecked ; - - # gnu ld has the ability to change the search behaviour for libraries - # referenced by -l switch. These modifiers are -Bstatic and -Bdynamic - # and change search for -l switches that follow them. The following list - # shows the tried variants. - # The search stops at the first variant that has a match. - # *nix: -Bstatic -lxxx - # libxxx.a - # - # *nix: -Bdynamic -lxxx - # libxxx.so - # libxxx.a - # - # windows (mingw,cygwin) -Bstatic -lxxx - # libxxx.a - # xxx.lib - # - # windows (mingw,cygwin) -Bdynamic -lxxx - # libxxx.dll.a - # xxx.dll.a - # libxxx.a - # xxx.lib - # cygxxx.dll (*) - # libxxx.dll - # xxx.dll - # libxxx.a - # - # (*) This is for cygwin - # Please note that -Bstatic and -Bdynamic are not a guarantee that a - # static or dynamic lib indeed gets linked in. The switches only change - # search patterns! - - # On *nix mixing shared libs with static runtime is not a good idea. - toolset.flags $(toolset).link FINDLIBS-ST-PFX $(condition)/shared - : -Wl,-Bstatic : unchecked ; - toolset.flags $(toolset).link FINDLIBS-SA-PFX $(condition)/shared - : -Wl,-Bdynamic : unchecked ; - - # On windows allow mixing of static and dynamic libs with static - # runtime. - toolset.flags $(toolset).link FINDLIBS-ST-PFX $(condition)/static/windows - : -Wl,-Bstatic : unchecked ; - toolset.flags $(toolset).link FINDLIBS-SA-PFX $(condition)/static/windows - : -Wl,-Bdynamic : unchecked ; - toolset.flags $(toolset).link OPTIONS $(condition)/static/windows - : -Wl,-Bstatic : unchecked ; - } - - case hpux : - { - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link OPTIONS $(condition)/shared - : -fPIC : unchecked ; - } - - case osf : - { - # No --strip-all, just -s. - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : - : unchecked ; - # This does not supports -R. - toolset.flags $(toolset).link RPATH_OPTION $(condition) : -rpath - : unchecked ; - # -rpath-link is not supported at all. - } - - case sun : - { - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : - : unchecked ; - # Solaris linker does not have a separate -rpath-link, but allows to use - # -L for the same purpose. - toolset.flags $(toolset).link LINKPATH $(condition) : - : unchecked ; - - # This permits shared libraries with non-PIC code on Solaris. - # VP, 2004/09/07: Now that we have -fPIC hardcode in link.dll, the - # following is not needed. Whether -fPIC should be hardcoded, is a - # separate question. - # AH, 2004/10/16: it is still necessary because some tests link against - # static libraries that were compiled without PIC. - toolset.flags $(toolset).link OPTIONS $(condition)/shared - : -mimpure-text : unchecked ; - } - - case * : - { - errors.user-error - "$(toolset) initialization: invalid linker '$(linker)'" : - "The value '$(linker)' specified for is not recognized." : - "Possible values are 'aix', 'darwin', 'gnu', 'hpux', 'osf' or 'sun'" ; - } - } -} - -# Enclose the RPATH variable on 'targets' in (double) quotes, -# unless it's already enclosed in single quotes. -# This special casing is done because it's common to pass -# '$ORIGIN' to linker -- and it has to have single quotes -# to prevent expansion by shell -- and if we add double -# quotes then preventing properties of single quotes disappear. -rule quote-rpath ( targets * ) -{ - local r = [ on $(targets[1]) return $(RPATH) ] ; - if ! [ MATCH "('.*')" : $(r) ] - { - r = "\"$(r)\"" ; - } - RPATH on $(targets) = $(r) ; -} - -# Declare actions for linking. -rule link ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only gcc links, it might be a - # good idea to serialize all links. - JAM_SEMAPHORE on $(targets) = gcc-link-semaphore ; - quote-rpath $(targets) ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS) - -} - -# Default value. Mostly for the sake of intel-linux that inherits from gcc, but -# does not have the same logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble as on Linux, 'ar' is -# always available. -.AR = ar ; -.RANLIB = ranlib ; - -toolset.flags gcc.archive AROPTIONS ; - -rule archive ( targets * : sources * : properties * ) -{ - # Always remove archive and start again. Here is the rationale from - # - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. I moved a1.c to - # a2.c, updated my Jamfiles and rebuilt. My program was crashing with absurd - # errors. After some debugging I traced it back to the fact that a1.o was - # *still* in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM onto the - # archive action. That makes archives fail to build on NT when they have - # many files because it will no longer execute the action directly and blow - # the line length limit. Instead we remove the file in a different action, - # just before building the archive. - # - local clean.a = $(targets[1])(clean) ; - TEMPORARY $(clean.a) ; - NOCARE $(clean.a) ; - LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ; - DEPENDS $(clean.a) : $(sources) ; - DEPENDS $(targets) : $(clean.a) ; - common.RmTemps $(clean.a) : $(targets) ; -} - -# Declare action for creating static libraries. -# The letter 'r' means to add files to the archive with replacement. Since we -# remove archive, we don't care about replacement, but there's no option "add -# without replacement". -# The letter 'c' suppresses the warning in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -actions piecemeal archive -{ - "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)" - "$(.RANLIB)" "$(<)" -} - -rule link.dll ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = gcc-link-semaphore ; - quote-rpath $(targets) ; -} - -# Differs from 'link' above only by -shared. -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) "$(.IMPLIB-COMMAND)$(<[1])" -o "$(<[-1])" $(HAVE_SONAME)-Wl,$(SONAME_OPTION)$(SPACE)-Wl,$(<[-1]:D=) -shared $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS) -} - -rule setup-threading ( targets * : sources * : properties * ) -{ - local threading = [ feature.get-values threading : $(properties) ] ; - if $(threading) = multi - { - local target = [ feature.get-values target-os : $(properties) ] ; - local option ; - local libs ; - - switch $(target) - { - case windows : - { - option = -mthreads ; - } - case cygwin : - { - option = -mthreads ; - } - case solaris : - { - option = -pthreads ; - libs = rt ; - } - case beos : - { - # BeOS has no threading options, so do not set anything here. - } - case *bsd : - { - option = -pthread ; - # There is no -lrt on BSD. - } - case sgi : - { - # gcc on IRIX does not support multi-threading so do not set anything - # here. - } - case darwin : - { - # Darwin has no threading options so do not set anything here. - } - case * : - { - option = -pthread ; - libs = rt ; - } - } - - if $(option) - { - OPTIONS on $(targets) += $(option) ; - } - if $(libs) - { - FINDLIBS-SA on $(targets) += $(libs) ; - } - } -} - -local rule cpu-flags ( toolset variable : architecture : instruction-set + : values + : default ? ) -{ - if $(default) - { - toolset.flags $(toolset) $(variable) - $(architecture)/ - : $(values) ; - } - toolset.flags $(toolset) $(variable) - /$(instruction-set) - $(architecture)/$(instruction-set) - : $(values) ; -} - -# Set architecture/instruction-set options. -# -# x86 and compatible -# The 'native' option appeared in gcc 4.2 so we cannot safely use it -# as default. Use conservative i386 instead. -cpu-flags gcc OPTIONS : x86 : native : -march=native ; -cpu-flags gcc OPTIONS : x86 : i386 : -march=i386 : default ; -cpu-flags gcc OPTIONS : x86 : i486 : -march=i486 ; -cpu-flags gcc OPTIONS : x86 : i586 : -march=i586 ; -cpu-flags gcc OPTIONS : x86 : i686 : -march=i686 ; -cpu-flags gcc OPTIONS : x86 : pentium : -march=pentium ; -cpu-flags gcc OPTIONS : x86 : pentium-mmx : -march=pentium-mmx ; -cpu-flags gcc OPTIONS : x86 : pentiumpro : -march=pentiumpro ; -cpu-flags gcc OPTIONS : x86 : pentium2 : -march=pentium2 ; -cpu-flags gcc OPTIONS : x86 : pentium3 : -march=pentium3 ; -cpu-flags gcc OPTIONS : x86 : pentium3m : -march=pentium3m ; -cpu-flags gcc OPTIONS : x86 : pentium-m : -march=pentium-m ; -cpu-flags gcc OPTIONS : x86 : pentium4 : -march=pentium4 ; -cpu-flags gcc OPTIONS : x86 : pentium4m : -march=pentium4m ; -cpu-flags gcc OPTIONS : x86 : prescott : -march=prescott ; -cpu-flags gcc OPTIONS : x86 : nocona : -march=nocona ; -cpu-flags gcc OPTIONS : x86 : core2 : -march=core2 ; -cpu-flags gcc OPTIONS : x86 : k6 : -march=k6 ; -cpu-flags gcc OPTIONS : x86 : k6-2 : -march=k6-2 ; -cpu-flags gcc OPTIONS : x86 : k6-3 : -march=k6-3 ; -cpu-flags gcc OPTIONS : x86 : athlon : -march=athlon ; -cpu-flags gcc OPTIONS : x86 : athlon-tbird : -march=athlon-tbird ; -cpu-flags gcc OPTIONS : x86 : athlon-4 : -march=athlon-4 ; -cpu-flags gcc OPTIONS : x86 : athlon-xp : -march=athlon-xp ; -cpu-flags gcc OPTIONS : x86 : athlon-mp : -march=athlon-mp ; -## -cpu-flags gcc OPTIONS : x86 : k8 : -march=k8 ; -cpu-flags gcc OPTIONS : x86 : opteron : -march=opteron ; -cpu-flags gcc OPTIONS : x86 : athlon64 : -march=athlon64 ; -cpu-flags gcc OPTIONS : x86 : athlon-fx : -march=athlon-fx ; -cpu-flags gcc OPTIONS : x86 : winchip-c6 : -march=winchip-c6 ; -cpu-flags gcc OPTIONS : x86 : winchip2 : -march=winchip2 ; -cpu-flags gcc OPTIONS : x86 : c3 : -march=c3 ; -cpu-flags gcc OPTIONS : x86 : c3-2 : -march=c3-2 ; -# Sparc -cpu-flags gcc OPTIONS : sparc : c3 : -mcpu=c3 : default ; -cpu-flags gcc OPTIONS : sparc : v7 : -mcpu=v7 ; -cpu-flags gcc OPTIONS : sparc : cypress : -mcpu=cypress ; -cpu-flags gcc OPTIONS : sparc : v8 : -mcpu=v8 ; -cpu-flags gcc OPTIONS : sparc : supersparc : -mcpu=supersparc ; -cpu-flags gcc OPTIONS : sparc : sparclite : -mcpu=sparclite ; -cpu-flags gcc OPTIONS : sparc : hypersparc : -mcpu=hypersparc ; -cpu-flags gcc OPTIONS : sparc : sparclite86x : -mcpu=sparclite86x ; -cpu-flags gcc OPTIONS : sparc : f930 : -mcpu=f930 ; -cpu-flags gcc OPTIONS : sparc : f934 : -mcpu=f934 ; -cpu-flags gcc OPTIONS : sparc : sparclet : -mcpu=sparclet ; -cpu-flags gcc OPTIONS : sparc : tsc701 : -mcpu=tsc701 ; -cpu-flags gcc OPTIONS : sparc : v9 : -mcpu=v9 ; -cpu-flags gcc OPTIONS : sparc : ultrasparc : -mcpu=ultrasparc ; -cpu-flags gcc OPTIONS : sparc : ultrasparc3 : -mcpu=ultrasparc3 ; -# RS/6000 & PowerPC -cpu-flags gcc OPTIONS : power : 403 : -mcpu=403 ; -cpu-flags gcc OPTIONS : power : 505 : -mcpu=505 ; -cpu-flags gcc OPTIONS : power : 601 : -mcpu=601 ; -cpu-flags gcc OPTIONS : power : 602 : -mcpu=602 ; -cpu-flags gcc OPTIONS : power : 603 : -mcpu=603 ; -cpu-flags gcc OPTIONS : power : 603e : -mcpu=603e ; -cpu-flags gcc OPTIONS : power : 604 : -mcpu=604 ; -cpu-flags gcc OPTIONS : power : 604e : -mcpu=604e ; -cpu-flags gcc OPTIONS : power : 620 : -mcpu=620 ; -cpu-flags gcc OPTIONS : power : 630 : -mcpu=630 ; -cpu-flags gcc OPTIONS : power : 740 : -mcpu=740 ; -cpu-flags gcc OPTIONS : power : 7400 : -mcpu=7400 ; -cpu-flags gcc OPTIONS : power : 7450 : -mcpu=7450 ; -cpu-flags gcc OPTIONS : power : 750 : -mcpu=750 ; -cpu-flags gcc OPTIONS : power : 801 : -mcpu=801 ; -cpu-flags gcc OPTIONS : power : 821 : -mcpu=821 ; -cpu-flags gcc OPTIONS : power : 823 : -mcpu=823 ; -cpu-flags gcc OPTIONS : power : 860 : -mcpu=860 ; -cpu-flags gcc OPTIONS : power : 970 : -mcpu=970 ; -cpu-flags gcc OPTIONS : power : 8540 : -mcpu=8540 ; -cpu-flags gcc OPTIONS : power : power : -mcpu=power ; -cpu-flags gcc OPTIONS : power : power2 : -mcpu=power2 ; -cpu-flags gcc OPTIONS : power : power3 : -mcpu=power3 ; -cpu-flags gcc OPTIONS : power : power4 : -mcpu=power4 ; -cpu-flags gcc OPTIONS : power : power5 : -mcpu=power5 ; -cpu-flags gcc OPTIONS : power : powerpc : -mcpu=powerpc ; -cpu-flags gcc OPTIONS : power : powerpc64 : -mcpu=powerpc64 ; -cpu-flags gcc OPTIONS : power : rios : -mcpu=rios ; -cpu-flags gcc OPTIONS : power : rios1 : -mcpu=rios1 ; -cpu-flags gcc OPTIONS : power : rios2 : -mcpu=rios2 ; -cpu-flags gcc OPTIONS : power : rsc : -mcpu=rsc ; -cpu-flags gcc OPTIONS : power : rs64a : -mcpu=rs64 ; -# AIX variant of RS/6000 & PowerPC -toolset.flags gcc AROPTIONS 64/aix : "-X 64" ; diff --git a/jam-files/boost-build/tools/gcc.py b/jam-files/boost-build/tools/gcc.py deleted file mode 100644 index 2a3e675e..00000000 --- a/jam-files/boost-build/tools/gcc.py +++ /dev/null @@ -1,796 +0,0 @@ -# Status: being ported by Steven Watanabe -# Base revision: 47077 -# TODO: common.jam needs to be ported -# TODO: generators.jam needs to have register_c_compiler. -# -# Copyright 2001 David Abrahams. -# Copyright 2002-2006 Rene Rivera. -# Copyright 2002-2003 Vladimir Prus. -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov. -# Copyright 2007 Roland Schwarz -# Copyright 2007 Boris Gubenko. -# Copyright 2008 Steven Watanabe -# -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import os -import subprocess -import re - -import bjam - -from b2.tools import unix, common, rc, pch, builtin -from b2.build import feature, type, toolset, generators -from b2.util.utility import os_name, on_windows -from b2.manager import get_manager -from b2.build.generators import Generator -from b2.build.toolset import flags -from b2.util.utility import to_seq - -__debug = None - -def debug(): - global __debug - if __debug is None: - __debug = "--debug-configuration" in bjam.variable("ARGV") - return __debug - -feature.extend('toolset', ['gcc']) - - -toolset.inherit_generators('gcc', [], 'unix', ['unix.link', 'unix.link.dll']) -toolset.inherit_flags('gcc', 'unix') -toolset.inherit_rules('gcc', 'unix') - -generators.override('gcc.prebuilt', 'builtin.prebuilt') -generators.override('gcc.searched-lib-generator', 'searched-lib-generator') - -# Target naming is determined by types/lib.jam and the settings below this -# comment. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (mingw): -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (cygwin) i.e. cygwin -# libxxx.a static library -# xxx.dll DLL -# libxxx.dll.a import library -# -# Note: user can always override by using the @rule -# This settings have been choosen, so that mingw -# is in line with msvc naming conventions. For -# cygwin the cygwin naming convention has been choosen. - -# Make the "o" suffix used for gcc toolset on all -# platforms -type.set_generated_target_suffix('OBJ', ['gcc'], 'o') -type.set_generated_target_suffix('STATIC_LIB', ['gcc', 'cygwin'], 'a') - -type.set_generated_target_suffix('IMPORT_LIB', ['gcc', 'cygwin'], 'dll.a') -type.set_generated_target_prefix('IMPORT_LIB', ['gcc', 'cygwin'], 'lib') - -__machine_match = re.compile('^([^ ]+)') -__version_match = re.compile('^([0-9.]+)') - -def init(version = None, command = None, options = None): - """ - Initializes the gcc toolset for the given version. If necessary, command may - be used to specify where the compiler is located. The parameter 'options' is a - space-delimited list of options, each one specified as - option-value. Valid option names are: cxxflags, linkflags and - linker-type. Accepted linker-type values are gnu, darwin, osf, hpux or sun - and the default value will be selected based on the current OS. - Example: - using gcc : 3.4 : : foo bar sun ; - """ - - options = to_seq(options) - command = to_seq(command) - - # Information about the gcc command... - # The command. - command = to_seq(common.get_invocation_command('gcc', 'g++', command)) - # The root directory of the tool install. - root = feature.get_values('', options) ; - # The bin directory where to find the command to execute. - bin = None - # The flavor of compiler. - flavor = feature.get_values('', options) - # Autodetect the root and bin dir if not given. - if command: - if not bin: - bin = common.get_absolute_tool_path(command[-1]) - if not root: - root = os.path.dirname(bin) - # Autodetect the version and flavor if not given. - if command: - machine_info = subprocess.Popen(command + ['-dumpmachine'], stdout=subprocess.PIPE).communicate()[0] - machine = __machine_match.search(machine_info).group(1) - - version_info = subprocess.Popen(command + ['-dumpversion'], stdout=subprocess.PIPE).communicate()[0] - version = __version_match.search(version_info).group(1) - if not flavor and machine.find('mingw') != -1: - flavor = 'mingw' - - condition = None - if flavor: - condition = common.check_init_parameters('gcc', None, - ('version', version), - ('flavor', flavor)) - else: - condition = common.check_init_parameters('gcc', None, - ('version', version)) - - if command: - command = command[0] - - common.handle_options('gcc', condition, command, options) - - linker = feature.get_values('', options) - if not linker: - if os_name() == 'OSF': - linker = 'osf' - elif os_name() == 'HPUX': - linker = 'hpux' ; - else: - linker = 'gnu' - - init_link_flags('gcc', linker, condition) - - # If gcc is installed in non-standard location, we'd need to add - # LD_LIBRARY_PATH when running programs created with it (for unit-test/run - # rules). - if command: - # On multilib 64-bit boxes, there are both 32-bit and 64-bit libraries - # and all must be added to LD_LIBRARY_PATH. The linker will pick the - # right onces. Note that we don't provide a clean way to build 32-bit - # binary with 64-bit compiler, but user can always pass -m32 manually. - lib_path = [os.path.join(root, 'bin'), - os.path.join(root, 'lib'), - os.path.join(root, 'lib32'), - os.path.join(root, 'lib64')] - if debug(): - print 'notice: using gcc libraries ::', condition, '::', lib_path - toolset.flags('gcc.link', 'RUN_PATH', condition, lib_path) - - # If it's not a system gcc install we should adjust the various programs as - # needed to prefer using the install specific versions. This is essential - # for correct use of MinGW and for cross-compiling. - - # - The archive builder. - archiver = common.get_invocation_command('gcc', - 'ar', feature.get_values('', options), [bin], path_last=True) - toolset.flags('gcc.archive', '.AR', condition, [archiver]) - if debug(): - print 'notice: using gcc archiver ::', condition, '::', archiver - - # - The resource compiler. - rc_command = common.get_invocation_command_nodefault('gcc', - 'windres', feature.get_values('', options), [bin], path_last=True) - rc_type = feature.get_values('', options) - - if not rc_type: - rc_type = 'windres' - - if not rc_command: - # If we can't find an RC compiler we fallback to a null RC compiler that - # creates empty object files. This allows the same Jamfiles to work - # across the board. The null RC uses the assembler to create the empty - # objects, so configure that. - rc_command = common.get_invocation_command('gcc', 'as', [], [bin], path_last=True) - rc_type = 'null' - rc.configure(rc_command, condition, '' + rc_type) - -###if [ os.name ] = NT -###{ -### # This causes single-line command invocation to not go through .bat files, -### # thus avoiding command-line length limitations. -### JAMSHELL = % ; -###} - -#FIXME: when register_c_compiler is moved to -# generators, these should be updated -builtin.register_c_compiler('gcc.compile.c++', ['CPP'], ['OBJ'], ['gcc']) -builtin.register_c_compiler('gcc.compile.c', ['C'], ['OBJ'], ['gcc']) -builtin.register_c_compiler('gcc.compile.asm', ['ASM'], ['OBJ'], ['gcc']) - -# pch support - -# The compiler looks for a precompiled header in each directory just before it -# looks for the include file in that directory. The name searched for is the -# name specified in the #include directive with ".gch" suffix appended. The -# logic in gcc-pch-generator will make sure that BASE_PCH suffix is appended to -# full name of the header. - -type.set_generated_target_suffix('PCH', ['gcc'], 'gch') - -# GCC-specific pch generator. -class GccPchGenerator(pch.PchGenerator): - - # Inherit the __init__ method - - def run_pch(self, project, name, prop_set, sources): - # Find the header in sources. Ignore any CPP sources. - header = None - for s in sources: - if type.is_derived(s.type, 'H'): - header = s - - # Error handling: Base header file name should be the same as the base - # precompiled header name. - header_name = header.name - header_basename = os.path.basename(header_name).rsplit('.', 1)[0] - if header_basename != name: - location = project.project_module - ###FIXME: - raise Exception() - ### errors.user-error "in" $(location)": pch target name `"$(name)"' should be the same as the base name of header file `"$(header-name)"'" ; - - pch_file = Generator.run(self, project, name, prop_set, [header]) - - # return result of base class and pch-file property as usage-requirements - # FIXME: what about multiple results from generator.run? - return (property_set.create('' + pch_file[0], '-Winvalid-pch'), - pch_file) - - # Calls the base version specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - def generated_targets(self, sources, prop_set, project, name = None): - name = sources[0].name - return Generator.generated_targets(self, sources, - prop_set, project, name) - -# Note: the 'H' source type will catch both '.h' header and '.hpp' header. The -# latter have HPP type, but HPP type is derived from H. The type of compilation -# is determined entirely by the destination type. -generators.register(GccPchGenerator('gcc.compile.c.pch', False, ['H'], ['C_PCH'], ['on', 'gcc' ])) -generators.register(GccPchGenerator('gcc.compile.c++.pch', False, ['H'], ['CPP_PCH'], ['on', 'gcc' ])) - -# Override default do-nothing generators. -generators.override('gcc.compile.c.pch', 'pch.default-c-pch-generator') -generators.override('gcc.compile.c++.pch', 'pch.default-cpp-pch-generator') - -flags('gcc.compile', 'PCH_FILE', ['on'], ['']) - -# Declare flags and action for compilation -flags('gcc.compile', 'OPTIONS', ['off'], ['-O0']) -flags('gcc.compile', 'OPTIONS', ['speed'], ['-O3']) -flags('gcc.compile', 'OPTIONS', ['space'], ['-Os']) - -flags('gcc.compile', 'OPTIONS', ['off'], ['-fno-inline']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Wno-inline']) -flags('gcc.compile', 'OPTIONS', ['full'], ['-finline-functions', '-Wno-inline']) - -flags('gcc.compile', 'OPTIONS', ['off'], ['-w']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Wall']) -flags('gcc.compile', 'OPTIONS', ['all'], ['-Wall', '-pedantic']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Werror']) - -flags('gcc.compile', 'OPTIONS', ['on'], ['-g']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-pg']) -flags('gcc.compile', 'OPTIONS', ['off'], ['-fno-rtti']) - -# On cygwin and mingw, gcc generates position independent code by default, and -# warns if -fPIC is specified. This might not be the right way of checking if -# we're using cygwin. For example, it's possible to run cygwin gcc from NT -# shell, or using crosscompiling. But we'll solve that problem when it's time. -# In that case we'll just add another parameter to 'init' and move this login -# inside 'init'. -if not os_name () in ['CYGWIN', 'NT']: - # This logic will add -fPIC for all compilations: - # - # lib a : a.cpp b ; - # obj b : b.cpp ; - # exe c : c.cpp a d ; - # obj d : d.cpp ; - # - # This all is fine, except that 'd' will be compiled with -fPIC even though - # it's not needed, as 'd' is used only in exe. However, it's hard to detect - # where a target is going to be used. Alternative, we can set -fPIC only - # when main target type is LIB but than 'b' will be compiled without -fPIC. - # In x86-64 that will lead to link errors. So, compile everything with - # -fPIC. - # - # Yet another alternative would be to create propagated - # feature, and set it when building shared libraries, but that's hard to - # implement and will increase target path length even more. - flags('gcc.compile', 'OPTIONS', ['shared'], ['-fPIC']) - -if os_name() != 'NT' and os_name() != 'OSF' and os_name() != 'HPUX': - # OSF does have an option called -soname but it doesn't seem to work as - # expected, therefore it has been disabled. - HAVE_SONAME = '' - SONAME_OPTION = '-h' - - -flags('gcc.compile', 'USER_OPTIONS', [], ['']) -flags('gcc.compile.c++', 'USER_OPTIONS',[], ['']) -flags('gcc.compile', 'DEFINES', [], ['']) -flags('gcc.compile', 'INCLUDES', [], ['']) - -engine = get_manager().engine() - -engine.register_action('gcc.compile.c++.pch', - '"$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"') - -engine.register_action('gcc.compile.c.pch', - '"$(CONFIG_COMMAND)" -x c-header $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"') - - -def gcc_compile_cpp(targets, sources, properties): - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - extension = os.path.splitext (sources [0]) [1] - lang = '' - if not extension in ['.cc', '.cp', '.cxx', '.cpp', '.c++', '.C']: - lang = '-x c++' - get_manager().engine().set_target_variable (targets, 'LANG', lang) - engine.add_dependency(targets, bjam.call('get-target-variable', targets, 'PCH_FILE')) - -def gcc_compile_c(targets, sources, properties): - engine = get_manager().engine() - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - engine.set_target_variable (targets, 'LANG', '-x c') - #} - engine.add_dependency(targets, bjam.call('get-target-variable', targets, 'PCH_FILE')) - -engine.register_action( - 'gcc.compile.c++', - '"$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-128 $(OPTIONS) ' + - '$(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" ' + - '-c -o "$(<:W)" "$(>:W)"', - function=gcc_compile_cpp, - bound_list=['PCH_FILE']) - -engine.register_action( - 'gcc.compile.c', - '"$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) ' + - '-I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)"', - function=gcc_compile_c, - bound_list=['PCH_FILE']) - -def gcc_compile_asm(targets, sources, properties): - get_manager().engine().set_target_variable(targets, 'LANG', '-x assembler-with-cpp') - -engine.register_action( - 'gcc.compile.asm', - '"$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"', - function=gcc_compile_asm) - - -class GccLinkingGenerator(unix.UnixLinkingGenerator): - """ - The class which check that we don't try to use the static - property while creating or using shared library, since it's not supported by - gcc/libc. - """ - def run(self, project, name, ps, sources): - # TODO: Replace this with the use of a target-os property. - - no_static_link = False - if bjam.variable('UNIX'): - no_static_link = True; - ##FIXME: what does this mean? -## { -## switch [ modules.peek : JAMUNAME ] -## { -## case * : no-static-link = true ; -## } -## } - - reason = None - if no_static_link and ps.get('runtime-link') == 'static': - if ps.get('link') == 'shared': - reason = "On gcc, DLL can't be build with 'static'." - elif type.is_derived(self.target_types[0], 'EXE'): - for s in sources: - source_type = s.type() - if source_type and type.is_derived(source_type, 'SHARED_LIB'): - reason = "On gcc, using DLLS together with the " +\ - "static options is not possible " - if reason: - print 'warning:', reason - print 'warning:',\ - "It is suggested to use 'static' together",\ - "with 'static'." ; - return - else: - generated_targets = unix.UnixLinkingGenerator.run(self, project, - name, ps, sources) - return generated_targets - -if on_windows(): - flags('gcc.link.dll', '.IMPLIB-COMMAND', [], ['-Wl,--out-implib,']) - generators.register( - GccLinkingGenerator('gcc.link', True, - ['OBJ', 'SEARCHED_LIB', 'STATIC_LIB', 'IMPORT_LIB'], - [ 'EXE' ], - [ 'gcc' ])) - generators.register( - GccLinkingGenerator('gcc.link.dll', True, - ['OBJ', 'SEARCHED_LIB', 'STATIC_LIB', 'IMPORT_LIB'], - ['IMPORT_LIB', 'SHARED_LIB'], - ['gcc'])) -else: - generators.register( - GccLinkingGenerator('gcc.link', True, - ['LIB', 'OBJ'], - ['EXE'], - ['gcc'])) - generators.register( - GccLinkingGenerator('gcc.link.dll', True, - ['LIB', 'OBJ'], - ['SHARED_LIB'], - ['gcc'])) - -# Declare flags for linking. -# First, the common flags. -flags('gcc.link', 'OPTIONS', ['on'], ['-g']) -flags('gcc.link', 'OPTIONS', ['on'], ['-pg']) -flags('gcc.link', 'USER_OPTIONS', [], ['']) -flags('gcc.link', 'LINKPATH', [], ['']) -flags('gcc.link', 'FINDLIBS-ST', [], ['']) -flags('gcc.link', 'FINDLIBS-SA', [], ['']) -flags('gcc.link', 'LIBRARIES', [], ['']) - -# For static we made sure there are no dynamic libraries in the -# link. On HP-UX not all system libraries exist as archived libraries (for -# example, there is no libunwind.a), so, on this platform, the -static option -# cannot be specified. -if os_name() != 'HPUX': - flags('gcc.link', 'OPTIONS', ['static'], ['-static']) - -# Now, the vendor specific flags. -# The parameter linker can be either gnu, darwin, osf, hpux or sun. -def init_link_flags(toolset, linker, condition): - """ - Now, the vendor specific flags. - The parameter linker can be either gnu, darwin, osf, hpux or sun. - """ - toolset_link = toolset + '.link' - if linker == 'gnu': - # Strip the binary when no debugging is needed. We use --strip-all flag - # as opposed to -s since icc (intel's compiler) is generally - # option-compatible with and inherits from the gcc toolset, but does not - # support -s. - - # FIXME: what does unchecked translate to? - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,--strip-all']) # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - flags(toolset_link, 'RPATH_LINK', condition, ['']) # : unchecked ; - flags(toolset_link, 'START-GROUP', condition, ['-Wl,--start-group'])# : unchecked ; - flags(toolset_link, 'END-GROUP', condition, ['-Wl,--end-group']) # : unchecked ; - - # gnu ld has the ability to change the search behaviour for libraries - # referenced by -l switch. These modifiers are -Bstatic and -Bdynamic - # and change search for -l switches that follow them. The following list - # shows the tried variants. - # The search stops at the first variant that has a match. - # *nix: -Bstatic -lxxx - # libxxx.a - # - # *nix: -Bdynamic -lxxx - # libxxx.so - # libxxx.a - # - # windows (mingw,cygwin) -Bstatic -lxxx - # libxxx.a - # xxx.lib - # - # windows (mingw,cygwin) -Bdynamic -lxxx - # libxxx.dll.a - # xxx.dll.a - # libxxx.a - # xxx.lib - # cygxxx.dll (*) - # libxxx.dll - # xxx.dll - # libxxx.a - # - # (*) This is for cygwin - # Please note that -Bstatic and -Bdynamic are not a guarantee that a - # static or dynamic lib indeed gets linked in. The switches only change - # search patterns! - - # On *nix mixing shared libs with static runtime is not a good idea. - flags(toolset_link, 'FINDLIBS-ST-PFX', - map(lambda x: x + '/shared', condition), - ['-Wl,-Bstatic']) # : unchecked ; - flags(toolset_link, 'FINDLIBS-SA-PFX', - map(lambda x: x + '/shared', condition), - ['-Wl,-Bdynamic']) # : unchecked ; - - # On windows allow mixing of static and dynamic libs with static - # runtime. - flags(toolset_link, 'FINDLIBS-ST-PFX', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bstatic']) # : unchecked ; - flags(toolset_link, 'FINDLIBS-SA-PFX', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bdynamic']) # : unchecked ; - flags(toolset_link, 'OPTIONS', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bstatic']) # : unchecked ; - - elif linker == 'darwin': - # On Darwin, the -s option to ld does not work unless we pass -static, - # and passing -static unconditionally is a bad idea. So, don't pass -s. - # at all, darwin.jam will use separate 'strip' invocation. - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - flags(toolset_link, 'RPATH_LINK', condition, ['']) # : unchecked ; - - elif linker == 'osf': - # No --strip-all, just -s. - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,-s']) - # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - # This does not supports -R. - flags(toolset_link, 'RPATH_OPTION', condition, ['-rpath']) # : unchecked ; - # -rpath-link is not supported at all. - - elif linker == 'sun': - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,-s']) - # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - # Solaris linker does not have a separate -rpath-link, but allows to use - # -L for the same purpose. - flags(toolset_link, 'LINKPATH', condition, ['']) # : unchecked ; - - # This permits shared libraries with non-PIC code on Solaris. - # VP, 2004/09/07: Now that we have -fPIC hardcode in link.dll, the - # following is not needed. Whether -fPIC should be hardcoded, is a - # separate question. - # AH, 2004/10/16: it is still necessary because some tests link against - # static libraries that were compiled without PIC. - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/shared', condition), ['-mimpure-text']) - # : unchecked ; - - elif linker == 'hpux': - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), - ['-Wl,-s']) # : unchecked ; - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/shared', condition), - ['-fPIC']) # : unchecked ; - - else: - # FIXME: - errors.user_error( - "$(toolset) initialization: invalid linker '$(linker)' " + - "The value '$(linker)' specified for is not recognized. " + - "Possible values are 'gnu', 'darwin', 'osf', 'hpux' or 'sun'") - -# Declare actions for linking. -def gcc_link(targets, sources, properties): - engine = get_manager().engine() - engine.set_target_variable(targets, 'SPACE', ' ') - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only gcc links, it might be a - # good idea to serialize all links. - engine.set_target_variable(targets, 'JAM_SEMAPHORE', 'gcc-link-semaphore') - -engine.register_action( - 'gcc.link', - '"$(CONFIG_COMMAND)" -L"$(LINKPATH)" ' + - '-Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" ' + - '-Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" ' + - '$(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) ' + - '-l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) ' + - '$(OPTIONS) $(USER_OPTIONS)', - function=gcc_link, - bound_list=['LIBRARIES']) - -# Default value. Mostly for the sake of intel-linux that inherits from gcc, but -# does not have the same logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble as on Linux, 'ar' is -# always available. -__AR = 'ar' - -flags('gcc.archive', 'AROPTIONS', [], ['']) - -def gcc_archive(targets, sources, properties): - # Always remove archive and start again. Here's rationale from - # - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. I moved a1.c to - # a2.c, updated my Jamfiles and rebuilt. My program was crashing with absurd - # errors. After some debugging I traced it back to the fact that a1.o was - # *still* in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM onto the - # archive action. That makes archives fail to build on NT when they have - # many files because it will no longer execute the action directly and blow - # the line length limit. Instead we remove the file in a different action, - # just before building the archive. - clean = targets[0] + '(clean)' - bjam.call('TEMPORARY', clean) - bjam.call('NOCARE', clean) - engine = get_manager().engine() - engine.set_target_variable('LOCATE', clean, bjam.call('get-target-variable', targets, 'LOCATE')) - engine.add_dependency(clean, sources) - engine.add_dependency(targets, clean) - engine.set_update_action('common.RmTemps', clean, targets) - -# Declare action for creating static libraries. -# The letter 'r' means to add files to the archive with replacement. Since we -# remove archive, we don't care about replacement, but there's no option "add -# without replacement". -# The letter 'c' suppresses the warning in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -engine.register_action('gcc.archive', - '"$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)"', - function=gcc_archive, - flags=['piecemeal']) - -def gcc_link_dll(targets, sources, properties): - engine = get_manager().engine() - engine.set_target_variable(targets, 'SPACE', ' ') - engine.set_target_variable(targets, 'JAM_SEMAPHORE', 'gcc-link-semaphore') - engine.set_target_variable(targets, "HAVE_SONAME", HAVE_SONAME) - engine.set_target_variable(targets, "SONAME_OPTION", SONAME_OPTION) - -engine.register_action( - 'gcc.link.dll', - # Differ from 'link' above only by -shared. - '"$(CONFIG_COMMAND)" -L"$(LINKPATH)" ' + - '-Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" ' + - '"$(.IMPLIB-COMMAND)$(<[1])" -o "$(<[-1])" ' + - '$(HAVE_SONAME)-Wl,$(SONAME_OPTION)$(SPACE)-Wl,$(<[-1]:D=) ' + - '-shared $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) ' + - '-l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) ' + - '$(OPTIONS) $(USER_OPTIONS)', - function = gcc_link_dll, - bound_list=['LIBRARIES']) - -# Set up threading support. It's somewhat contrived, so perform it at the end, -# to avoid cluttering other code. - -if on_windows(): - flags('gcc', 'OPTIONS', ['multi'], ['-mthreads']) -elif bjam.variable('UNIX'): - jamuname = bjam.variable('JAMUNAME') - host_os_name = jamuname[0] - if host_os_name.startswith('SunOS'): - flags('gcc', 'OPTIONS', ['multi'], ['-pthreads']) - flags('gcc', 'FINDLIBS-SA', [], ['rt']) - elif host_os_name == 'BeOS': - # BeOS has no threading options, don't set anything here. - pass - elif host_os_name.endswith('BSD'): - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - # there is no -lrt on BSD - elif host_os_name == 'DragonFly': - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - # there is no -lrt on BSD - DragonFly is a FreeBSD variant, - # which anoyingly doesn't say it's a *BSD. - elif host_os_name == 'IRIX': - # gcc on IRIX does not support multi-threading, don't set anything here. - pass - elif host_os_name == 'Darwin': - # Darwin has no threading options, don't set anything here. - pass - else: - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - flags('gcc', 'FINDLIBS-SA', [], ['rt']) - -def cpu_flags(toolset, variable, architecture, instruction_set, values, default=None): - #FIXME: for some reason this fails. Probably out of date feature code -## if default: -## flags(toolset, variable, -## ['' + architecture + '/'], -## values) - flags(toolset, variable, - #FIXME: same as above - [##'/' + instruction_set, - '' + architecture + '/' + instruction_set], - values) - -# Set architecture/instruction-set options. -# -# x86 and compatible -flags('gcc', 'OPTIONS', ['x86/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['x86/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i386', ['-march=i386'], default=True) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i486', ['-march=i486']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i586', ['-march=i586']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i686', ['-march=i686']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium', ['-march=pentium']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium-mmx', ['-march=pentium-mmx']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentiumpro', ['-march=pentiumpro']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium2', ['-march=pentium2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium3', ['-march=pentium3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium3m', ['-march=pentium3m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium-m', ['-march=pentium-m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium4', ['-march=pentium4']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium4m', ['-march=pentium4m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'prescott', ['-march=prescott']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'nocona', ['-march=nocona']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6', ['-march=k6']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6-2', ['-march=k6-2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6-3', ['-march=k6-3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon', ['-march=athlon']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-tbird', ['-march=athlon-tbird']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-4', ['-march=athlon-4']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-xp', ['-march=athlon-xp']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-mp', ['-march=athlon-mp']) -## -cpu_flags('gcc', 'OPTIONS', 'x86', 'k8', ['-march=k8']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'opteron', ['-march=opteron']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon64', ['-march=athlon64']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-fx', ['-march=athlon-fx']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'winchip-c6', ['-march=winchip-c6']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'winchip2', ['-march=winchip2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'c3', ['-march=c3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'c3-2', ['-march=c3-2']) -# Sparc -flags('gcc', 'OPTIONS', ['sparc/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['sparc/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'c3', ['-mcpu=c3'], default=True) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v7', ['-mcpu=v7']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'cypress', ['-mcpu=cypress']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v8', ['-mcpu=v8']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'supersparc', ['-mcpu=supersparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclite', ['-mcpu=sparclite']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'hypersparc', ['-mcpu=hypersparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclite86x', ['-mcpu=sparclite86x']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'f930', ['-mcpu=f930']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'f934', ['-mcpu=f934']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclet', ['-mcpu=sparclet']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'tsc701', ['-mcpu=tsc701']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v9', ['-mcpu=v9']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'ultrasparc', ['-mcpu=ultrasparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'ultrasparc3', ['-mcpu=ultrasparc3']) -# RS/6000 & PowerPC -flags('gcc', 'OPTIONS', ['power/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['power/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'power', '403', ['-mcpu=403']) -cpu_flags('gcc', 'OPTIONS', 'power', '505', ['-mcpu=505']) -cpu_flags('gcc', 'OPTIONS', 'power', '601', ['-mcpu=601']) -cpu_flags('gcc', 'OPTIONS', 'power', '602', ['-mcpu=602']) -cpu_flags('gcc', 'OPTIONS', 'power', '603', ['-mcpu=603']) -cpu_flags('gcc', 'OPTIONS', 'power', '603e', ['-mcpu=603e']) -cpu_flags('gcc', 'OPTIONS', 'power', '604', ['-mcpu=604']) -cpu_flags('gcc', 'OPTIONS', 'power', '604e', ['-mcpu=604e']) -cpu_flags('gcc', 'OPTIONS', 'power', '620', ['-mcpu=620']) -cpu_flags('gcc', 'OPTIONS', 'power', '630', ['-mcpu=630']) -cpu_flags('gcc', 'OPTIONS', 'power', '740', ['-mcpu=740']) -cpu_flags('gcc', 'OPTIONS', 'power', '7400', ['-mcpu=7400']) -cpu_flags('gcc', 'OPTIONS', 'power', '7450', ['-mcpu=7450']) -cpu_flags('gcc', 'OPTIONS', 'power', '750', ['-mcpu=750']) -cpu_flags('gcc', 'OPTIONS', 'power', '801', ['-mcpu=801']) -cpu_flags('gcc', 'OPTIONS', 'power', '821', ['-mcpu=821']) -cpu_flags('gcc', 'OPTIONS', 'power', '823', ['-mcpu=823']) -cpu_flags('gcc', 'OPTIONS', 'power', '860', ['-mcpu=860']) -cpu_flags('gcc', 'OPTIONS', 'power', '970', ['-mcpu=970']) -cpu_flags('gcc', 'OPTIONS', 'power', '8540', ['-mcpu=8540']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power', ['-mcpu=power']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power2', ['-mcpu=power2']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power3', ['-mcpu=power3']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power4', ['-mcpu=power4']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power5', ['-mcpu=power5']) -cpu_flags('gcc', 'OPTIONS', 'power', 'powerpc', ['-mcpu=powerpc']) -cpu_flags('gcc', 'OPTIONS', 'power', 'powerpc64', ['-mcpu=powerpc64']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios', ['-mcpu=rios']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios1', ['-mcpu=rios1']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios2', ['-mcpu=rios2']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rsc', ['-mcpu=rsc']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rs64a', ['-mcpu=rs64']) -# AIX variant of RS/6000 & PowerPC -flags('gcc', 'OPTIONS', ['power/32/aix'], ['-maix32']) -flags('gcc', 'OPTIONS', ['power/64/aix'], ['-maix64']) -flags('gcc', 'AROPTIONS', ['power/64/aix'], ['-X 64']) diff --git a/jam-files/boost-build/tools/generate.jam b/jam-files/boost-build/tools/generate.jam deleted file mode 100644 index 6732fa35..00000000 --- a/jam-files/boost-build/tools/generate.jam +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Declares main target 'generate' used to produce targets by calling a -# user-provided rule that takes and produces virtual targets. - -import "class" : new ; -import errors ; -import feature ; -import project ; -import property ; -import property-set ; -import targets ; -import regex ; - - -feature.feature generating-rule : : free ; - - -class generated-target-class : basic-target -{ - import errors ; - import indirect ; - import virtual-target ; - - rule __init__ ( name : project : sources * : requirements * - : default-build * : usage-requirements * ) - { - basic-target.__init__ $(name) : $(project) : $(sources) - : $(requirements) : $(default-build) : $(usage-requirements) ; - - if ! [ $(self.requirements).get ] - { - errors.user-error "The generate rule requires the " - "property to be set" ; - } - } - - rule construct ( name : sources * : property-set ) - { - local result ; - local gr = [ $(property-set).get ] ; - - # FIXME: this is a copy-paste from virtual-target.jam. We should add a - # utility rule to call a rule like this. - local rule-name = [ MATCH ^@(.*) : $(gr) ] ; - if $(rule-name) - { - if $(gr[2]) - { - local target-name = [ full-name ] ; - errors.user-error "Multiple properties" - "encountered for target $(target-name)." ; - } - - result = [ indirect.call $(rule-name) $(self.project) $(name) - : $(property-set) : $(sources) ] ; - - if ! $(result) - { - ECHO "warning: Unable to construct" [ full-name ] ; - } - } - - local ur ; - local targets ; - - if $(result) - { - if [ class.is-a $(result[1]) : property-set ] - { - ur = $(result[1]) ; - targets = $(result[2-]) ; - } - else - { - ur = [ property-set.empty ] ; - targets = $(result) ; - } - } - # FIXME: the following loop should be doable using sequence.transform or - # some similar utility rule. - local rt ; - for local t in $(targets) - { - rt += [ virtual-target.register $(t) ] ; - } - return $(ur) $(rt) ; - } -} - - -rule generate ( name : sources * : requirements * : default-build * - : usage-requirements * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new generated-target-class $(name) : $(project) - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ] - ] ; -} - -IMPORT $(__name__) : generate : : generate ; diff --git a/jam-files/boost-build/tools/gettext.jam b/jam-files/boost-build/tools/gettext.jam deleted file mode 100644 index 99a43ffe..00000000 --- a/jam-files/boost-build/tools/gettext.jam +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module support GNU gettext internationalization utilities. -# -# It provides two main target rules: 'gettext.catalog', used for -# creating machine-readable catalogs from translations files, and -# 'gettext.update', used for update translation files from modified -# sources. -# -# To add i18n support to your application you should follow these -# steps. -# -# - Decide on a file name which will contain translations and -# what main target name will be used to update it. For example:: -# -# gettext.update update-russian : russian.po a.cpp my_app ; -# -# - Create the initial translation file by running:: -# -# bjam update-russian -# -# - Edit russian.po. For example, you might change fields like LastTranslator. -# -# - Create a main target for final message catalog:: -# -# gettext.catalog russian : russian.po ; -# -# The machine-readable catalog will be updated whenever you update -# "russian.po". The "russian.po" file will be updated only on explicit -# request. When you're ready to update translations, you should -# -# - Run:: -# -# bjam update-russian -# -# - Edit "russian.po" in appropriate editor. -# -# The next bjam run will convert "russian.po" into machine-readable form. -# -# By default, translations are marked by 'i18n' call. The 'gettext.keyword' -# feature can be used to alter this. - - -import targets ; -import property-set ; -import virtual-target ; -import "class" : new ; -import project ; -import type ; -import generators ; -import errors ; -import feature : feature ; -import toolset : flags ; -import regex ; - -.path = "" ; - -# Initializes the gettext module. -rule init ( path ? # Path where all tools are located. If not specified, - # they should be in PATH. - ) -{ - if $(.initialized) && $(.path) != $(path) - { - errors.error "Attempt to reconfigure with different path" ; - } - .initialized = true ; - if $(path) - { - .path = $(path)/ ; - } -} - -# Creates a main target 'name', which, when updated, will cause -# file 'existing-translation' to be updated with translations -# extracted from 'sources'. It's possible to specify main target -# in sources --- it which case all target from dependency graph -# of those main targets will be scanned, provided they are of -# appropricate type. The 'gettext.types' feature can be used to -# control the types. -# -# The target will be updated only if explicitly requested on the -# command line. -rule update ( name : existing-translation sources + : requirements * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(name) : $(project) : gettext.UPDATE : - $(existing-translation) $(sources) - : [ targets.main-target-requirements $(requirements) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(name) ; -} - - -# The human editable source, containing translation. -type.register gettext.PO : po ; -# The machine readable message catalog. -type.register gettext.catalog : mo ; -# Intermediate type produce by extracting translations from -# sources. -type.register gettext.POT : pot ; -# Pseudo type used to invoke update-translations generator -type.register gettext.UPDATE ; - -# Identifies the keyword that should be used when scanning sources. -# Default: i18n -feature gettext.keyword : : free ; -# Contains space-separated list of sources types which should be scanned. -# Default: "C CPP" -feature gettext.types : : free ; - -generators.register-standard gettext.compile : gettext.PO : gettext.catalog ; - -class update-translations-generator : generator -{ - import regex : split ; - import property-set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - # The rule should be called with at least two sources. The first source - # is the translation (.po) file to update. The remaining sources are targets - # which should be scanned for new messages. All sources files for those targets - # will be found and passed to the 'xgettext' utility, which extracts the - # messages for localization. Those messages will be merged to the .po file. - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local types = [ $(property-set).get ] ; - types ?= "C CPP" ; - types = [ regex.split $(types) " " ] ; - - local keywords = [ $(property-set).get ] ; - property-set = [ property-set.create $(keywords:G=) ] ; - - # First deterime the list of sources that must be scanned for - # messages. - local all-sources ; - # CONSIDER: I'm not sure if the logic should be the same as for 'stage': - # i.e. following dependency properties as well. - for local s in $(sources[2-]) - { - all-sources += [ virtual-target.traverse $(s) : : include-sources ] ; - } - local right-sources ; - for local s in $(all-sources) - { - if [ $(s).type ] in $(types) - { - right-sources += $(s) ; - } - } - - local .constructed ; - if $(right-sources) - { - # Create the POT file, which will contain list of messages extracted - # from the sources. - local extract = - [ new action $(right-sources) : gettext.extract : $(property-set) ] ; - local new-messages = [ new file-target $(name) : gettext.POT - : $(project) : $(extract) ] ; - - # Create a notfile target which will update the existing translation file - # with new messages. - local a = [ new action $(sources[1]) $(new-messages) - : gettext.update-po-dispatch ] ; - local r = [ new notfile-target $(name) : $(project) : $(a) ] ; - .constructed = [ virtual-target.register $(r) ] ; - } - else - { - errors.error "No source could be scanned by gettext tools" ; - } - return $(.constructed) ; - } -} -generators.register [ new update-translations-generator gettext.update : : gettext.UPDATE ] ; - -flags gettext.extract KEYWORD ; -actions extract -{ - $(.path)xgettext -k$(KEYWORD:E=i18n) -o $(<) $(>) -} - -# Does realy updating of po file. The tricky part is that -# we're actually updating one of the sources: -# $(<) is the NOTFILE target we're updating -# $(>[1]) is the PO file to be really updated. -# $(>[2]) is the PO file created from sources. -# -# When file to be updated does not exist (during the -# first run), we need to copy the file created from sources. -# In all other cases, we need to update the file. -rule update-po-dispatch -{ - NOCARE $(>[1]) ; - gettext.create-po $(<) : $(>) ; - gettext.update-po $(<) : $(>) ; - _ on $(<) = " " ; - ok on $(<) = "" ; - EXISTING_PO on $(<) = $(>[1]) ; -} - -# Due to fancy interaction of existing and updated, this rule can be called with -# one source, in which case we copy the lonely source into EXISTING_PO, or with -# two sources, in which case the action body expands to nothing. I'd really like -# to have "missing" action modifier. -actions quietly existing updated create-po bind EXISTING_PO -{ - cp$(_)"$(>[1])"$(_)"$(EXISTING_PO)"$($(>[2]:E=ok)) -} - -actions updated update-po bind EXISTING_PO -{ - $(.path)msgmerge$(_)-U$(_)"$(EXISTING_PO)"$(_)"$(>[1])" -} - -actions gettext.compile -{ - $(.path)msgfmt -o $(<) $(>) -} - -IMPORT $(__name__) : update : : gettext.update ; diff --git a/jam-files/boost-build/tools/gfortran.jam b/jam-files/boost-build/tools/gfortran.jam deleted file mode 100644 index 0aa69b85..00000000 --- a/jam-files/boost-build/tools/gfortran.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags gfortran OPTIONS ; - -flags gfortran OPTIONS off : -O0 ; -flags gfortran OPTIONS speed : -O3 ; -flags gfortran OPTIONS space : -Os ; - -flags gfortran OPTIONS on : -g ; -flags gfortran OPTIONS on : -pg ; - -flags gfortran OPTIONS shared/LIB : -fPIC ; - -flags gfortran DEFINES ; -flags gfortran INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - gcc -Wall $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler gfortran.compile.fortran : FORTRAN FORTRAN90 : OBJ ; diff --git a/jam-files/boost-build/tools/hp_cxx.jam b/jam-files/boost-build/tools/hp_cxx.jam deleted file mode 100644 index 86cd783e..00000000 --- a/jam-files/boost-build/tools/hp_cxx.jam +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2001 David Abrahams. -# Copyright 2004, 2005 Markus Schoepflin. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# -# HP CXX compiler -# See http://h30097.www3.hp.com/cplus/?jumpid=reg_R1002_USEN -# -# -# Notes on this toolset: -# -# - Because of very subtle issues with the default ansi mode, strict_ansi mode -# is used for compilation. One example of things that don't work correctly in -# the default ansi mode is overload resolution of function templates when -# mixed with non-template functions. -# -# - For template instantiation "-timplicit_local" is used. Previously, -# "-tlocal" has been tried to avoid the need for a template repository -# but this doesn't work with manually instantiated templates. "-tweak" -# has not been used to avoid the stream of warning messages issued by -# ar or ld when creating a library or linking an application. -# -# - Debug symbols are generated with "-g3", as this works both in debug and -# release mode. When compiling C++ code without optimization, we additionally -# use "-gall", which generates full symbol table information for all classes, -# structs, and unions. As this turns off optimization, it can't be used when -# optimization is needed. -# - -import feature generators common ; -import toolset : flags ; - -feature.extend toolset : hp_cxx ; -feature.extend c++abi : cxxarm ; - -# Inherit from Unix toolset to get library ordering magic. -toolset.inherit hp_cxx : unix ; - -generators.override hp_cxx.prebuilt : builtin.lib-generator ; -generators.override hp_cxx.prebuilt : builtin.prebuilt ; -generators.override hp_cxx.searched-lib-generator : searched-lib-generator ; - - -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters hp_cxx : version $(version) ] ; - - local command = [ common.get-invocation-command hp_cxx : cxx : $(command) ] ; - - if $(command) - { - local root = [ common.get-absolute-tool-path $(command[-1]) ] ; - - if $(root) - { - flags hp_cxx .root $(condition) : "\"$(root)\"/" ; - } - } - # If we can't find 'cxx' anyway, at least show 'cxx' in the commands - command ?= cxx ; - - common.handle-options hp_cxx : $(condition) : $(command) : $(options) ; -} - -generators.register-c-compiler hp_cxx.compile.c++ : CPP : OBJ : hp_cxx ; -generators.register-c-compiler hp_cxx.compile.c : C : OBJ : hp_cxx ; - - - -# No static linking as far as I can tell. -# flags cxx LINKFLAGS static : -bstatic ; -flags hp_cxx.compile OPTIONS on : -g3 ; -flags hp_cxx.compile OPTIONS off/on : -gall ; -flags hp_cxx.link OPTIONS on : -g ; -flags hp_cxx.link OPTIONS off : -s ; - -flags hp_cxx.compile OPTIONS off : -O0 ; -flags hp_cxx.compile OPTIONS speed/on : -O2 ; -flags hp_cxx.compile OPTIONS speed : -O2 ; - -# This (undocumented) macro needs to be defined to get all C function -# overloads required by the C++ standard. -flags hp_cxx.compile.c++ OPTIONS : -D__CNAME_OVERLOADS ; - -# Added for threading support -flags hp_cxx.compile OPTIONS multi : -pthread ; -flags hp_cxx.link OPTIONS multi : -pthread ; - -flags hp_cxx.compile OPTIONS space/on : size ; -flags hp_cxx.compile OPTIONS space : -O1 ; -flags hp_cxx.compile OPTIONS off : -inline none ; - -# The compiler versions tried (up to V6.5-040) hang when compiling Boost code -# with full inlining enabled. So leave it at the default level for now. -# -# flags hp_cxx.compile OPTIONS full : -inline all ; - -flags hp_cxx.compile OPTIONS on : -pg ; -flags hp_cxx.link OPTIONS on : -pg ; - -# Selection of the object model. This flag is needed on both the C++ compiler -# and linker command line. - -# Unspecified ABI translates to '-model ansi' as most -# standard-conforming. -flags hp_cxx.compile.c++ OPTIONS : -model ansi : : hack-hack ; -flags hp_cxx.compile.c++ OPTIONS cxxarm : -model arm ; -flags hp_cxx.link OPTIONS : -model ansi : : hack-hack ; -flags hp_cxx.link OPTIONS cxxarm : -model arm ; - -# Display a descriptive tag together with each compiler message. This tag can -# be used by the user to explicitely suppress the compiler message. -flags hp_cxx.compile OPTIONS : -msg_display_tag ; - -flags hp_cxx.compile OPTIONS ; -flags hp_cxx.compile.c++ OPTIONS ; -flags hp_cxx.compile DEFINES ; -flags hp_cxx.compile INCLUDES ; -flags hp_cxx.link OPTIONS ; - -flags hp_cxx.link LIBPATH ; -flags hp_cxx.link LIBRARIES ; -flags hp_cxx.link FINDLIBS-ST ; -flags hp_cxx.link FINDLIBS-SA ; - -flags hp_cxx.compile.c++ TEMPLATE_DEPTH ; - -actions link bind LIBRARIES -{ - $(CONFIG_COMMAND) -noimplicit_include $(OPTIONS) -o "$(<)" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lrt -lm -} - -# When creating dynamic libraries, we don't want to be warned about unresolved -# symbols, therefore all unresolved symbols are marked as expected by -# '-expect_unresolved *'. This also mirrors the behaviour of the GNU tool -# chain. - -actions link.dll bind LIBRARIES -{ - $(CONFIG_COMMAND) -shared -expect_unresolved \* -noimplicit_include $(OPTIONS) -o "$(<[1])" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lm -} - - -# Note: Relaxed ANSI mode (-std) is used for compilation because in strict ANSI -# C89 mode (-std1) the compiler doesn't accept C++ comments in C files. As -std -# is the default, no special flag is needed. -actions compile.c -{ - $(.root:E=)cc -c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)" -} - -# Note: The compiler is forced to compile the files as C++ (-x cxx) because -# otherwise it will silently ignore files with no file extension. -# -# Note: We deliberately don't suppress any warnings on the compiler command -# line, the user can always do this in a customized toolset later on. - -rule compile.c++ -{ - # We preprocess the TEMPLATE_DEPTH command line option here because we found - # no way to do it correctly in the actual action code. There we either get - # the -pending_instantiations parameter when no c++-template-depth property - # has been specified or we get additional quotes around - # "-pending_instantiations ". - local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ; - TEMPLATE_DEPTH on $(1) = "-pending_instantiations "$(template-depth) ; -} - -actions compile.c++ -{ - $(CONFIG_COMMAND) -x cxx -c -std strict_ansi -nopure_cname -noimplicit_include -timplicit_local -ptr "$(<[1]:D)/cxx_repository" $(OPTIONS) $(TEMPLATE_DEPTH) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)" -} - -# Always create archive from scratch. See the gcc toolet for rationale. -RM = [ common.rm-command ] ; -actions together piecemeal archive -{ - $(RM) "$(<)" - ar rc $(<) $(>) -} diff --git a/jam-files/boost-build/tools/hpfortran.jam b/jam-files/boost-build/tools/hpfortran.jam deleted file mode 100644 index 96e8d18b..00000000 --- a/jam-files/boost-build/tools/hpfortran.jam +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags hpfortran OPTIONS off : -O0 ; -flags hpfortran OPTIONS speed : -O3 ; -flags hpfortran OPTIONS space : -O1 ; - -flags hpfortran OPTIONS on : -g ; -flags hpfortran OPTIONS on : -pg ; - -flags hpfortran DEFINES ; -flags hpfortran INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - f77 +DD64 $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler hpfortran.compile.fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/ifort.jam b/jam-files/boost-build/tools/ifort.jam deleted file mode 100644 index eb7c1988..00000000 --- a/jam-files/boost-build/tools/ifort.jam +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags ifort OPTIONS ; - -flags ifort OPTIONS off : /Od ; -flags ifort OPTIONS speed : /O3 ; -flags ifort OPTIONS space : /O1 ; - -flags ifort OPTIONS on : /debug:full ; -flags ifort OPTIONS on : /Qprof_gen ; - -flags ifort.compile FFLAGS off/shared : /MD ; -flags ifort.compile FFLAGS on/shared : /MDd ; -flags ifort.compile FFLAGS off/static/single : /ML ; -flags ifort.compile FFLAGS on/static/single : /MLd ; -flags ifort.compile FFLAGS off/static/multi : /MT ; -flags ifort.compile FFLAGS on/static/multi : /MTd ; - -flags ifort DEFINES ; -flags ifort INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - ifort $(FFLAGS) $(OPTIONS) /names:lowercase /D$(DEFINES) /I"$(INCLUDES)" /c /object:"$(<)" "$(>)" -} - -generators.register-fortran-compiler ifort.compile.fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/intel-darwin.jam b/jam-files/boost-build/tools/intel-darwin.jam deleted file mode 100644 index aa0fd8fb..00000000 --- a/jam-files/boost-build/tools/intel-darwin.jam +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -import intel ; -import feature : feature ; -import os ; -import toolset ; -import toolset : flags ; -import gcc ; -import common ; -import errors ; -import generators ; - -feature.extend-subfeature toolset intel : platform : darwin ; - -toolset.inherit-generators intel-darwin - intel darwin - : gcc - # Don't inherit PCH generators. They were not tested, and probably - # don't work for this compiler. - : gcc.mingw.link gcc.mingw.link.dll gcc.compile.c.pch gcc.compile.c++.pch - ; - -generators.override intel-darwin.prebuilt : builtin.lib-generator ; -generators.override intel-darwin.prebuilt : builtin.prebuilt ; -generators.override intel-darwin.searched-lib-generator : searched-lib-generator ; - -toolset.inherit-rules intel-darwin : gcc ; -toolset.inherit-flags intel-darwin : gcc - : off on full space - off all on - x86/32 - x86/64 - ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# vectorization diagnostics -feature vectorize : off on full ; - -# Initializes the intel-darwin toolset -# version in mandatory -# name (default icc) is used to invoke the specified intel complier -# compile and link options allow you to specify addition command line options for each version -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters intel-darwin - : version $(version) ] ; - - command = [ common.get-invocation-command intel-darwin : icc - : $(command) : /opt/intel_cc_80/bin ] ; - - common.handle-options intel-darwin : $(condition) : $(command) : $(options) ; - - gcc.init-link-flags intel-darwin darwin $(condition) ; - - # handle - # local library-path = [ feature.get-values : $(options) ] ; - # flags intel-darwin.link USER_OPTIONS $(condition) : [ feature.get-values : $(options) ] ; - - local root = [ feature.get-values : $(options) ] ; - local bin ; - if $(command) || $(root) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - - if $(root) - { - # Libraries required to run the executable may be in either - # $(root)/lib (10.1 and earlier) - # or - # $(root)/lib/architecture-name (11.0 and later: - local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ; - if $(.debug-configuration) - { - ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ; - } - flags intel-darwin.link RUN_PATH $(condition) : $(lib_path) ; - } - } - - local m = [ MATCH (..).* : $(version) ] ; - local n = [ MATCH (.)\\. : $(m) ] ; - if $(n) { - m = $(n) ; - } - - local major = $(m) ; - - if $(major) = "9" { - flags intel-darwin.compile OPTIONS $(condition)/off : -Ob0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -Ob1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -Ob2 ; - flags intel-darwin.compile OPTIONS $(condition)/off : -vec-report0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -vec-report1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -vec-report5 ; - flags intel-darwin.link OPTIONS $(condition)/static : -static -static-libcxa -lstdc++ -lpthread ; - flags intel-darwin.link OPTIONS $(condition)/shared : -shared-libcxa -lstdc++ -lpthread ; - } - else { - flags intel-darwin.compile OPTIONS $(condition)/off : -inline-level=0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -inline-level=1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -inline-level=2 ; - flags intel-darwin.compile OPTIONS $(condition)/off : -vec-report0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -vec-report1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -vec-report5 ; - flags intel-darwin.link OPTIONS $(condition)/static : -static -static-intel -lstdc++ -lpthread ; - flags intel-darwin.link OPTIONS $(condition)/shared : -shared-intel -lstdc++ -lpthread ; - } - - local minor = [ MATCH ".*\\.(.).*" : $(version) ] ; - - # wchar_t char_traits workaround for compilers older than 10.2 - if $(major) = "9" || ( $(major) = "10" && ( $(minor) = "0" || $(minor) = "1" ) ) { - flags intel-darwin.compile DEFINES $(condition) : __WINT_TYPE__=int : unchecked ; - } -} - -SPACE = " " ; - -flags intel-darwin.compile OPTIONS ; -flags intel-darwin.compile OPTIONS ; -# flags intel-darwin.compile INCLUDES ; - -flags intel-darwin.compile OPTIONS space : -O1 ; # no specific space optimization flag in icc - -# -cpu-type-em64t = prescott nocona ; -flags intel-darwin.compile OPTIONS $(cpu-type-em64t)/32 : -m32 ; # -mcmodel=small ; -flags intel-darwin.compile OPTIONS $(cpu-type-em64t)/64 : -m64 ; # -mcmodel=large ; - -flags intel-darwin.compile.c OPTIONS off : -w0 ; -flags intel-darwin.compile.c OPTIONS on : -w1 ; -flags intel-darwin.compile.c OPTIONS all : -w2 ; - -flags intel-darwin.compile.c++ OPTIONS off : -w0 ; -flags intel-darwin.compile.c++ OPTIONS on : -w1 ; -flags intel-darwin.compile.c++ OPTIONS all : -w2 ; - -actions compile.c -{ - "$(CONFIG_COMMAND)" -xc $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -xc++ $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -flags intel-darwin ARFLAGS ; - -# Default value. Mostly for the sake of intel-linux -# that inherits from gcc, but does not has the same -# logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble -# as on Linux, 'ar' is always available. -.AR = ar ; - -rule archive ( targets * : sources * : properties * ) -{ - # Always remove archive and start again. Here's rationale from - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. - # I moved a1.c to a2.c, updated my Jamfiles and rebuilt. - # My program was crashing with absurd errors. - # After some debugging I traced it back to the fact that a1.o was *still* - # in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM - # onto the archive action. That makes archives fail to build on NT - # when they have many files because it will no longer execute the - # action directly and blow the line length limit. Instead we - # remove the file in a different action, just before the building - # of the archive. - # - local clean.a = $(targets[1])(clean) ; - TEMPORARY $(clean.a) ; - NOCARE $(clean.a) ; - LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ; - DEPENDS $(clean.a) : $(sources) ; - DEPENDS $(targets) : $(clean.a) ; - common.RmTemps $(clean.a) : $(targets) ; -} - -actions piecemeal archive -{ - "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)" - "ranlib" -cs "$(<)" -} - -flags intel-darwin.link USER_OPTIONS ; - -# Declare actions for linking -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since - # running N links in parallel is just slower. - JAM_SEMAPHORE on $(targets) = intel-darwin-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" -single_module -dynamiclib -install_name "$(<[1]:D=)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) -} diff --git a/jam-files/boost-build/tools/intel-linux.jam b/jam-files/boost-build/tools/intel-linux.jam deleted file mode 100644 index d9164add..00000000 --- a/jam-files/boost-build/tools/intel-linux.jam +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (c) 2003 Michael Stevens -# Copyright (c) 2011 Bryce Lelbach -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset ; -import feature ; -import toolset : flags ; - -import intel ; -import gcc ; -import common ; -import errors ; -import generators ; -import type ; -import numbers ; - -feature.extend-subfeature toolset intel : platform : linux ; - -toolset.inherit-generators intel-linux - intel linux : gcc : gcc.mingw.link gcc.mingw.link.dll ; -generators.override intel-linux.prebuilt : builtin.lib-generator ; -generators.override intel-linux.prebuilt : builtin.prebuilt ; -generators.override intel-linux.searched-lib-generator : searched-lib-generator ; - -# Override default do-nothing generators. -generators.override intel-linux.compile.c.pch : pch.default-c-pch-generator ; -generators.override intel-linux.compile.c++.pch : pch.default-cpp-pch-generator ; - -type.set-generated-target-suffix PCH : intel linux : pchi ; - -toolset.inherit-rules intel-linux : gcc ; -toolset.inherit-flags intel-linux : gcc - : off on full - space speed - off all on - ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Initializes the intel-linux toolset -# version in mandatory -# name (default icpc) is used to invoke the specified intel-linux complier -# compile and link options allow you to specify addition command line options for each version -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters intel-linux - : version $(version) ] ; - - if $(.debug-configuration) - { - ECHO "notice: intel-linux version is" $(version) ; - } - - local default_path ; - - # Intel C++ Composer XE 2011 for Linux, aka Intel C++ Compiler XE 12.0, - # aka intel-linux-12.0. In this version, Intel thankfully decides to install - # to a sane 'intel' folder in /opt. - if [ MATCH "(12[.]0|12)" : $(version) ] - { default_path = /opt/intel/bin ; } - # Intel C++ Compiler 11.1. - else if [ MATCH "(11[.]1)" : $(version) ] - { default_path = /opt/intel_cce_11.1.064.x86_64/bin ; } - # Intel C++ Compiler 11.0. - else if [ MATCH "(11[.]0|11)" : $(version) ] - { default_path = /opt/intel_cce_11.0.074.x86_64/bin ; } - # Intel C++ Compiler 10.1. - else if [ MATCH "(10[.]1)" : $(version) ] - { default_path = /opt/intel_cce_10.1.013_x64/bin ; } - # Intel C++ Compiler 9.1. - else if [ MATCH "(9[.]1)" : $(version) ] - { default_path = /opt/intel_cc_91/bin ; } - # Intel C++ Compiler 9.0. - else if [ MATCH "(9[.]0|9)" : $(version) ] - { default_path = /opt/intel_cc_90/bin ; } - # Intel C++ Compiler 8.1. - else if [ MATCH "(8[.]1)" : $(version) ] - { default_path = /opt/intel_cc_81/bin ; } - # Intel C++ Compiler 8.0 - this used to be the default, so now it's the - # fallback. - else - { default_path = /opt/intel_cc_80/bin ; } - - if $(.debug-configuration) - { - ECHO "notice: default search path for intel-linux is" $(default_path) ; - } - - command = [ common.get-invocation-command intel-linux : icpc - : $(command) : $(default_path) ] ; - - common.handle-options intel-linux : $(condition) : $(command) : $(options) ; - - gcc.init-link-flags intel-linux gnu $(condition) ; - - local root = [ feature.get-values : $(options) ] ; - local bin ; - if $(command) || $(root) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - - local command-string = $(command:J=" ") ; - local version-output = [ SHELL "$(command-string) --version" ] ; - local real-version = [ MATCH "([0-9.]+)" : $(version-output) ] ; - local major = [ MATCH "([0-9]+).*" : $(real-version) ] ; - - # If we failed to determine major version, use the behaviour for - # the current compiler. - if $(major) && [ numbers.less $(major) 10 ] - { - flags intel-linux.compile OPTIONS $(condition)/off : "-Ob0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-Ob1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-Ob2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-O1" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - else if $(major) && [ numbers.less $(major) 11 ] - { - flags intel-linux.compile OPTIONS $(condition)/off : "-inline-level=0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-inline-level=1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-inline-level=2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-O1" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - else # newer version of intel do have -Os (at least 11+, don't know about 10) - { - flags intel-linux.compile OPTIONS $(condition)/off : "-inline-level=0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-inline-level=1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-inline-level=2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-Os" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - - if $(root) - { - # Libraries required to run the executable may be in either - # $(root)/lib (10.1 and earlier) - # or - # $(root)/lib/architecture-name (11.0 and later: - local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ; - if $(.debug-configuration) - { - ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ; - } - flags intel-linux.link RUN_PATH $(condition) : $(lib_path) ; - } - } -} - -SPACE = " " ; - -flags intel-linux.compile OPTIONS off : -w0 ; -flags intel-linux.compile OPTIONS on : -w1 ; -flags intel-linux.compile OPTIONS all : -w2 ; - -rule compile.c++ ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -actions compile.c++ bind PCH_FILE -{ - "$(CONFIG_COMMAND)" -c -xc++ $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)" -} - -rule compile.c ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -actions compile.c bind PCH_FILE -{ - "$(CONFIG_COMMAND)" -c -xc $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)" -} - -rule compile.c++.pch ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; -} -# -# Compiling a pch first deletes any existing *.pchi file, as Intel's compiler -# won't over-write an existing pch: instead it creates filename$1.pchi, filename$2.pchi -# etc - which appear not to do anything except take up disk space :-( -# -actions compile.c++.pch -{ - rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)" -} - -actions compile.fortran -{ - "ifort" -c $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c.pch ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c.pch -{ - rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)" -} - -rule link ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = intel-linux-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS) -} - -rule link.dll ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = intel-linux-link-semaphore ; -} - -# Differ from 'link' above only by -shared. -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS) -} - - - diff --git a/jam-files/boost-build/tools/intel-win.jam b/jam-files/boost-build/tools/intel-win.jam deleted file mode 100644 index 691b5dce..00000000 --- a/jam-files/boost-build/tools/intel-win.jam +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# Importing common is needed because the rules we inherit here depend on it. -# That is nasty. -import common ; -import errors ; -import feature ; -import intel ; -import msvc ; -import os ; -import toolset ; -import generators ; -import type ; - -feature.extend-subfeature toolset intel : platform : win ; - -toolset.inherit-generators intel-win intel win : msvc ; -toolset.inherit-flags intel-win : msvc : : YLOPTION ; -toolset.inherit-rules intel-win : msvc ; - -# Override default do-nothing generators. -generators.override intel-win.compile.c.pch : pch.default-c-pch-generator ; -generators.override intel-win.compile.c++.pch : pch.default-cpp-pch-generator ; -generators.override intel-win.compile.rc : rc.compile.resource ; -generators.override intel-win.compile.mc : mc.compile ; - -toolset.flags intel-win.compile PCH_SOURCE on : ; - -toolset.add-requirements intel-win,shared:multi ; - -# Initializes the intel toolset for windows -rule init ( version ? : # the compiler version - command * : # the command to invoke the compiler itself - options * # Additional option: - # either 'vc6', 'vc7', 'vc7.1' - # or 'native'(default). - ) -{ - local compatibility = - [ feature.get-values : $(options) ] ; - local condition = [ common.check-init-parameters intel-win - : version $(version) : compatibility $(compatibility) ] ; - - command = [ common.get-invocation-command intel-win : icl.exe : - $(command) ] ; - - common.handle-options intel-win : $(condition) : $(command) : $(options) ; - - local root ; - if $(command) - { - root = [ common.get-absolute-tool-path $(command[-1]) ] ; - root = $(root)/ ; - } - - local setup ; - setup = [ GLOB $(root) : iclvars_*.bat ] ; - if ! $(setup) - { - setup = $(root)/iclvars.bat ; - } - setup = "call \""$(setup)"\" > nul " ; - - if [ os.name ] = NT - { - setup = $(setup)" -" ; - } - else - { - setup = "cmd /S /C "$(setup)" \"&&\" " ; - } - - toolset.flags intel-win.compile .CC $(condition) : $(setup)icl ; - toolset.flags intel-win.link .LD $(condition) : $(setup)xilink ; - toolset.flags intel-win.archive .LD $(condition) : $(setup)xilink /lib ; - toolset.flags intel-win.link .MT $(condition) : $(setup)mt -nologo ; - toolset.flags intel-win.compile .MC $(condition) : $(setup)mc ; - toolset.flags intel-win.compile .RC $(condition) : $(setup)rc ; - - local m = [ MATCH (.).* : $(version) ] ; - local major = $(m[1]) ; - - local C++FLAGS ; - - C++FLAGS += /nologo ; - - # Reduce the number of spurious error messages - C++FLAGS += /Qwn5 /Qwd985 ; - - # Enable ADL - C++FLAGS += -Qoption,c,--arg_dep_lookup ; #"c" works for C++, too - - # Disable Microsoft "secure" overloads in Dinkumware libraries since they - # cause compile errors with Intel versions 9 and 10. - C++FLAGS += -D_SECURE_SCL=0 ; - - if $(major) > 5 - { - C++FLAGS += /Zc:forScope ; # Add support for correct for loop scoping. - } - - # Add options recognized only by intel7 and above. - if $(major) >= 7 - { - C++FLAGS += /Qansi_alias ; - } - - if $(compatibility) = vc6 - { - C++FLAGS += - # Emulate VC6 - /Qvc6 - - # No wchar_t support in vc6 dinkum library. Furthermore, in vc6 - # compatibility-mode, wchar_t is not a distinct type from unsigned - # short. - -DBOOST_NO_INTRINSIC_WCHAR_T - ; - } - else - { - if $(major) > 5 - { - # Add support for wchar_t - C++FLAGS += /Zc:wchar_t - # Tell the dinkumware library about it. - -D_NATIVE_WCHAR_T_DEFINED - ; - } - } - - if $(compatibility) && $(compatibility) != native - { - C++FLAGS += /Q$(base-vc) ; - } - else - { - C++FLAGS += - -Qoption,cpp,--arg_dep_lookup - # The following options were intended to disable the Intel compiler's - # 'bug-emulation' mode, but were later reported to be causing ICE with - # Intel-Win 9.0. It is not yet clear which options can be safely used. - # -Qoption,cpp,--const_string_literals - # -Qoption,cpp,--new_for_init - # -Qoption,cpp,--no_implicit_typename - # -Qoption,cpp,--no_friend_injection - # -Qoption,cpp,--no_microsoft_bugs - ; - } - - toolset.flags intel-win CFLAGS $(condition) : $(C++FLAGS) ; - # By default, when creating PCH, intel adds 'i' to the explicitly - # specified name of the PCH file. Of course, Boost.Build is not - # happy when compiler produces not the file it was asked for. - # The option below stops this behaviour. - toolset.flags intel-win CFLAGS : -Qpchi- ; - - if ! $(compatibility) - { - # If there's no backend version, assume 7.1. - compatibility = vc7.1 ; - } - - local extract-version = [ MATCH ^vc(.*) : $(compatibility) ] ; - if ! $(extract-version) - { - errors.user-error "Invalid value for compatibility option:" - $(compatibility) ; - } - - # Depending on the settings, running of tests require some runtime DLLs. - toolset.flags intel-win RUN_PATH $(condition) : $(root) ; - - msvc.configure-version-specific intel-win : $(extract-version[1]) : $(condition) ; -} - -toolset.flags intel-win.link LIBRARY_OPTION intel : "" ; - -toolset.flags intel-win YLOPTION ; - diff --git a/jam-files/boost-build/tools/intel.jam b/jam-files/boost-build/tools/intel.jam deleted file mode 100644 index 67038aa2..00000000 --- a/jam-files/boost-build/tools/intel.jam +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# This is a generic 'intel' toolset. Depending on the current -# system, it forwards either to 'intel-linux' or 'intel-win' -# modules. - -import feature ; -import os ; -import toolset ; - -feature.extend toolset : intel ; -feature.subfeature toolset intel : platform : : propagated link-incompatible ; - -rule init ( * : * ) -{ - if [ os.name ] = LINUX - { - toolset.using intel-linux : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else if [ os.name ] = MACOSX - { - toolset.using intel-darwin : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else - { - toolset.using intel-win : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } -} diff --git a/jam-files/boost-build/tools/lex.jam b/jam-files/boost-build/tools/lex.jam deleted file mode 100644 index 75d64131..00000000 --- a/jam-files/boost-build/tools/lex.jam +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -import type ; -import generators ; -import feature ; -import property ; - - -feature.feature flex.prefix : : free ; -type.register LEX : l ; -type.register LEX++ : ll ; -generators.register-standard lex.lex : LEX : C ; -generators.register-standard lex.lex : LEX++ : CPP ; - -rule init ( ) -{ -} - -rule lex ( target : source : properties * ) -{ - local r = [ property.select flex.prefix : $(properties) ] ; - if $(r) - { - PREFIX on $(<) = $(r:G=) ; - } -} - -actions lex -{ - flex -P$(PREFIX) -o$(<) $(>) -} diff --git a/jam-files/boost-build/tools/make.jam b/jam-files/boost-build/tools/make.jam deleted file mode 100644 index 08567285..00000000 --- a/jam-files/boost-build/tools/make.jam +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2003 Douglas Gregor -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'make' main target rule. - -import "class" : new ; -import errors : error ; -import project ; -import property ; -import property-set ; -import regex ; -import targets ; - - -class make-target-class : basic-target -{ - import type regex virtual-target ; - import "class" : new ; - - rule __init__ ( name : project : sources * : requirements * - : default-build * : usage-requirements * ) - { - basic-target.__init__ $(name) : $(project) : $(sources) : - $(requirements) : $(default-build) : $(usage-requirements) ; - } - - rule construct ( name : source-targets * : property-set ) - { - local action-name = [ $(property-set).get ] ; - # 'm' will always be set -- we add '@' ourselves in the 'make' rule - # below. - local m = [ MATCH ^@(.*) : $(action-name) ] ; - - local a = [ new action $(source-targets) : $(m[1]) : $(property-set) ] ; - local t = [ new file-target $(self.name) exact : [ type.type - $(self.name) ] : $(self.project) : $(a) ] ; - return [ property-set.empty ] [ virtual-target.register $(t) ] ; - } -} - - -# Declares the 'make' main target. -# -rule make ( target-name : sources * : generating-rule + : requirements * : - usage-requirements * ) -{ - local project = [ project.current ] ; - - # The '@' sign causes the feature.jam module to qualify rule name with the - # module name of current project, if needed. - local m = [ MATCH ^(@).* : $(generating-rule) ] ; - if ! $(m) - { - generating-rule = @$(generating-rule) ; - } - requirements += $(generating-rule) ; - - targets.main-target-alternative - [ new make-target-class $(target-name) : $(project) - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) : - $(project) ] ] ; -} - - -IMPORT $(__name__) : make : : make ; diff --git a/jam-files/boost-build/tools/make.py b/jam-files/boost-build/tools/make.py deleted file mode 100644 index 10baa1cb..00000000 --- a/jam-files/boost-build/tools/make.py +++ /dev/null @@ -1,59 +0,0 @@ -# Status: ported. -# Base revision: 64068 - -# Copyright 2003 Dave Abrahams -# Copyright 2003 Douglas Gregor -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'make' main target rule. - -from b2.build.targets import BasicTarget -from b2.build.virtual_target import Action, FileTarget -from b2.build import type -from b2.manager import get_manager -import b2.build.property_set - - -class MakeTarget(BasicTarget): - - def construct(self, name, source_targets, property_set): - - action_name = property_set.get("")[0] - action = Action(get_manager(), source_targets, action_name[1:], property_set) - target = FileTarget(self.name(), type.type(self.name()), - self.project(), action, exact=True) - return [ b2.build.property_set.empty(), - [self.project().manager().virtual_targets().register(target)]] - -def make (target_name, sources, generating_rule, - requirements=None, usage_requirements=None): - - target_name = target_name[0] - generating_rule = generating_rule[0] - if generating_rule[0] != '@': - generating_rule = '@' + generating_rule - - if not requirements: - requirements = [] - - - requirements.append("%s" % generating_rule) - - m = get_manager() - targets = m.targets() - project = m.projects().current() - engine = m.engine() - engine.register_bjam_action(generating_rule) - - targets.main_target_alternative(MakeTarget( - target_name, project, - targets.main_target_sources(sources, target_name), - targets.main_target_requirements(requirements, project), - targets.main_target_default_build([], project), - targets.main_target_usage_requirements(usage_requirements or [], project))) - -get_manager().projects().add_rule("make", make) - diff --git a/jam-files/boost-build/tools/mc.jam b/jam-files/boost-build/tools/mc.jam deleted file mode 100644 index 57837773..00000000 --- a/jam-files/boost-build/tools/mc.jam +++ /dev/null @@ -1,44 +0,0 @@ -#~ Copyright 2005 Alexey Pakhunov. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Support for Microsoft message compiler tool. -# Notes: -# - there's just message compiler tool, there's no tool for -# extracting message strings from sources -# - This file allows to use Microsoft message compiler -# with any toolset. In msvc.jam, there's more specific -# message compiling action. - -import common ; -import generators ; -import feature : feature get-values ; -import toolset : flags ; -import type ; -import rc ; - -rule init ( ) -{ -} - -type.register MC : mc ; - - -# Command line options -feature mc-input-encoding : ansi unicode : free ; -feature mc-output-encoding : unicode ansi : free ; -feature mc-set-customer-bit : no yes : free ; - -flags mc.compile MCFLAGS ansi : -a ; -flags mc.compile MCFLAGS unicode : -u ; -flags mc.compile MCFLAGS ansi : -A ; -flags mc.compile MCFLAGS unicode : -U ; -flags mc.compile MCFLAGS no : ; -flags mc.compile MCFLAGS yes : -c ; - -generators.register-standard mc.compile : MC : H RC ; - -actions compile -{ - mc $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)" -} diff --git a/jam-files/boost-build/tools/message.jam b/jam-files/boost-build/tools/message.jam deleted file mode 100644 index 212d8542..00000000 --- a/jam-files/boost-build/tools/message.jam +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2008 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines main target type 'message', that prints a message when built for the -# first time. - -import project ; -import "class" : new ; -import targets ; -import property-set ; - -class message-target-class : basic-target -{ - rule __init__ ( name-and-dir : project : * ) - { - basic-target.__init__ $(name-and-dir) : $(project) ; - self.3 = $(3) ; - self.4 = $(4) ; - self.5 = $(5) ; - self.6 = $(6) ; - self.7 = $(7) ; - self.8 = $(8) ; - self.9 = $(9) ; - self.built = ; - } - - rule construct ( name : source-targets * : property-set ) - { - if ! $(self.built) - { - for i in 3 4 5 6 7 8 9 - { - if $(self.$(i)) - { - ECHO $(self.$(i)) ; - } - } - self.built = 1 ; - } - - return [ property-set.empty ] ; - } -} - - -rule message ( name : * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new message-target-class $(name) : $(project) - : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) ] ; -} -IMPORT $(__name__) : message : : message ; \ No newline at end of file diff --git a/jam-files/boost-build/tools/message.py b/jam-files/boost-build/tools/message.py deleted file mode 100644 index cc0b946f..00000000 --- a/jam-files/boost-build/tools/message.py +++ /dev/null @@ -1,46 +0,0 @@ -# Status: ported. -# Base revision: 64488. -# -# Copyright 2008, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines main target type 'message', that prints a message when built for the -# first time. - -import b2.build.targets as targets -import b2.build.property_set as property_set - -from b2.manager import get_manager - -class MessageTargetClass(targets.BasicTarget): - - def __init__(self, name, project, *args): - - targets.BasicTarget.__init__(self, name, project, []) - self.args = args - self.built = False - - def construct(self, name, sources, ps): - - if not self.built: - for arg in self.args: - if type(arg) == type([]): - arg = " ".join(arg) - print arg - self.built = True - - return (property_set.empty(), []) - -def message(name, *args): - - if type(name) == type([]): - name = name[0] - - t = get_manager().targets() - - project = get_manager().projects().current() - - return t.main_target_alternative(MessageTargetClass(*((name, project) + args))) - -get_manager().projects().add_rule("message", message) diff --git a/jam-files/boost-build/tools/midl.jam b/jam-files/boost-build/tools/midl.jam deleted file mode 100644 index 0aa5dda3..00000000 --- a/jam-files/boost-build/tools/midl.jam +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2005 Alexey Pakhunov. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Microsoft Interface Definition Language (MIDL) related routines - -import common ; -import generators ; -import feature : feature get-values ; -import os ; -import scanner ; -import toolset : flags ; -import type ; - -rule init ( ) -{ -} - -type.register IDL : idl ; - -# A type library (.tlb) is generated by MIDL compiler and can be included -# to resources of an application (.rc). In order to be found by a resource -# compiler its target type should be derived from 'H' - otherwise -# the property '' will be ignored. -type.register MSTYPELIB : tlb : H ; - - -# Register scanner for MIDL files -class midl-scanner : scanner -{ - import path property-set regex scanner type virtual-target ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - self.includes = $(includes) ; - - # List of quoted strings - self.re-strings = "[ \t]*\"([^\"]*)\"([ \t]*,[ \t]*\"([^\"]*)\")*[ \t]*" ; - - # 'import' and 'importlib' directives - self.re-import = "import"$(self.re-strings)"[ \t]*;" ; - self.re-importlib = "importlib[ \t]*[(]"$(self.re-strings)"[)][ \t]*;" ; - - # C preprocessor 'include' directive - self.re-include-angle = "#[ \t]*include[ \t]*<(.*)>" ; - self.re-include-quoted = "#[ \t]*include[ \t]*\"(.*)\"" ; - } - - rule pattern ( ) - { - # Match '#include', 'import' and 'importlib' directives - return "((#[ \t]*include|import(lib)?).+(<(.*)>|\"(.*)\").+)" ; - } - - rule process ( target : matches * : binding ) - { - local included-angle = [ regex.transform $(matches) : $(self.re-include-angle) : 1 ] ; - local included-quoted = [ regex.transform $(matches) : $(self.re-include-quoted) : 1 ] ; - local imported = [ regex.transform $(matches) : $(self.re-import) : 1 3 ] ; - local imported_tlbs = [ regex.transform $(matches) : $(self.re-importlib) : 1 3 ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - local g2 = $(g)"#"$(b) ; - - included-angle = $(included-angle:G=$(g)) ; - included-quoted = $(included-quoted:G=$(g2)) ; - imported = $(imported:G=$(g2)) ; - imported_tlbs = $(imported_tlbs:G=$(g2)) ; - - local all = $(included-angle) $(included-quoted) $(imported) ; - - INCLUDES $(target) : $(all) ; - DEPENDS $(target) : $(imported_tlbs) ; - NOCARE $(all) $(imported_tlbs) ; - SEARCH on $(included-angle) = $(self.includes:G=) ; - SEARCH on $(included-quoted) = $(b) $(self.includes:G=) ; - SEARCH on $(imported) = $(b) $(self.includes:G=) ; - SEARCH on $(imported_tlbs) = $(b) $(self.includes:G=) ; - - scanner.propagate - [ type.get-scanner CPP : [ property-set.create $(self.includes) ] ] : - $(included-angle) $(included-quoted) : $(target) ; - - scanner.propagate $(__name__) : $(imported) : $(target) ; - } -} - -scanner.register midl-scanner : include ; -type.set-scanner IDL : midl-scanner ; - - -# Command line options -feature midl-stubless-proxy : yes no : propagated ; -feature midl-robust : yes no : propagated ; - -flags midl.compile.idl MIDLFLAGS yes : /Oicf ; -flags midl.compile.idl MIDLFLAGS no : /Oic ; -flags midl.compile.idl MIDLFLAGS yes : /robust ; -flags midl.compile.idl MIDLFLAGS no : /no_robust ; - -# Architecture-specific options -architecture-x86 = x86 ; -address-model-32 = 32 ; -address-model-64 = 64 ; - -flags midl.compile.idl MIDLFLAGS $(architecture-x86)/$(address-model-32) : /win32 ; -flags midl.compile.idl MIDLFLAGS $(architecture-x86)/64 : /x64 ; -flags midl.compile.idl MIDLFLAGS ia64/$(address-model-64) : /ia64 ; - - -flags midl.compile.idl DEFINES ; -flags midl.compile.idl UNDEFS ; -flags midl.compile.idl INCLUDES ; - - -generators.register-c-compiler midl.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) ; - - -# MIDL does not always generate '%_proxy.c' and '%_dlldata.c'. This behavior -# depends on contents of the source IDL file. Calling TOUCH_FILE below ensures -# that both files will be created so bjam will not try to recreate them -# constantly. -TOUCH_FILE = [ common.file-touch-command ] ; - -actions compile.idl -{ - midl /nologo @"@($(<[1]:W).rsp:E=$(nl)"$(>:W)" $(nl)-D$(DEFINES) $(nl)"-I$(INCLUDES)" $(nl)-U$(UNDEFS) $(nl)$(MIDLFLAGS) $(nl)/tlb "$(<[1]:W)" $(nl)/h "$(<[2]:W)" $(nl)/iid "$(<[3]:W)" $(nl)/proxy "$(<[4]:W)" $(nl)/dlldata "$(<[5]:W)")" - $(TOUCH_FILE) "$(<[4]:W)" - $(TOUCH_FILE) "$(<[5]:W)" -} diff --git a/jam-files/boost-build/tools/mipspro.jam b/jam-files/boost-build/tools/mipspro.jam deleted file mode 100644 index 417eaefc..00000000 --- a/jam-files/boost-build/tools/mipspro.jam +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import fortran ; -import type ; -import common ; - -feature.extend toolset : mipspro ; -toolset.inherit mipspro : unix ; -generators.override mipspro.prebuilt : builtin.lib-generator ; -generators.override mipspro.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.sgi.com/products/software/irix/tools/ - -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters mipspro : version $(version) ] ; - - command = [ common.get-invocation-command mipspro : CC : $(command) ] ; - - common.handle-options mipspro : $(condition) : $(command) : $(options) ; - - command_c = $(command_c[1--2]) $(command[-1]:B=cc) ; - - toolset.flags mipspro CONFIG_C_COMMAND $(condition) : $(command_c) ; - - # fortran support - local command = [ - common.get-invocation-command mipspro : f77 : $(command) : $(install_dir) ] ; - - command_f = $(command_f[1--2]) $(command[-1]:B=f77) ; - toolset.flags mipspro CONFIG_F_COMMAND $(condition) : $(command_f) ; - - # set link flags - flags mipspro.link FINDLIBS-ST : [ - feature.get-values : $(options) ] : unchecked ; - - flags mipspro.link FINDLIBS-SA : [ - feature.get-values : $(options) ] : unchecked ; -} - -# Declare generators -generators.register-c-compiler mipspro.compile.c : C : OBJ : mipspro ; -generators.register-c-compiler mipspro.compile.c++ : CPP : OBJ : mipspro ; -generators.register-fortran-compiler mipspro.compile.fortran : FORTRAN : OBJ : mipspro ; - -cpu-arch-32 = - / - /32 ; - -cpu-arch-64 = - /64 ; - -flags mipspro.compile OPTIONS $(cpu-arch-32) : -n32 ; -flags mipspro.compile OPTIONS $(cpu-arch-64) : -64 ; - -# Declare flags and actions for compilation -flags mipspro.compile OPTIONS on : -g ; -# flags mipspro.compile OPTIONS on : -xprofile=tcov ; -flags mipspro.compile OPTIONS off : -w ; -flags mipspro.compile OPTIONS on : -ansiW -diag_suppress 1429 ; # suppress long long is nonstandard warning -flags mipspro.compile OPTIONS all : -fullwarn ; -flags mipspro.compile OPTIONS speed : -Ofast ; -flags mipspro.compile OPTIONS space : -O2 ; -flags mipspro.compile OPTIONS : -LANG:std ; -flags mipspro.compile.c++ OPTIONS off : -INLINE:none ; -flags mipspro.compile.c++ OPTIONS ; -flags mipspro.compile DEFINES ; -flags mipspro.compile INCLUDES ; - - -flags mipspro.compile.fortran OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags mipspro.link OPTIONS on : -g ; -# Strip the binary when no debugging is needed -# flags mipspro.link OPTIONS off : -s ; -# flags mipspro.link OPTIONS on : -xprofile=tcov ; -# flags mipspro.link OPTIONS multi : -mt ; - -flags mipspro.link OPTIONS $(cpu-arch-32) : -n32 ; -flags mipspro.link OPTIONS $(cpu-arch-64) : -64 ; - -flags mipspro.link OPTIONS speed : -Ofast ; -flags mipspro.link OPTIONS space : -O2 ; -flags mipspro.link OPTIONS ; -flags mipspro.link LINKPATH ; -flags mipspro.link FINDLIBS-ST ; -flags mipspro.link FINDLIBS-SA ; -flags mipspro.link FINDLIBS-SA multi : pthread ; -flags mipspro.link LIBRARIES ; -flags mipspro.link LINK-RUNTIME static : static ; -flags mipspro.link LINK-RUNTIME shared : dynamic ; -flags mipspro.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -lm -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Declare action for creating static libraries -actions piecemeal archive -{ - ar -cr "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/mpi.jam b/jam-files/boost-build/tools/mpi.jam deleted file mode 100644 index 0fe490be..00000000 --- a/jam-files/boost-build/tools/mpi.jam +++ /dev/null @@ -1,583 +0,0 @@ -# Support for the Message Passing Interface (MPI) -# -# (C) Copyright 2005, 2006 Trustees of Indiana University -# (C) Copyright 2005 Douglas Gregor -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.) -# -# Authors: Douglas Gregor -# Andrew Lumsdaine -# -# ==== MPI Configuration ==== -# -# For many users, MPI support can be enabled simply by adding the following -# line to your user-config.jam file: -# -# using mpi ; -# -# This should auto-detect MPI settings based on the MPI wrapper compiler in -# your path, e.g., "mpic++". If the wrapper compiler is not in your path, or -# has a different name, you can pass the name of the wrapper compiler as the -# first argument to the mpi module: -# -# using mpi : /opt/mpich2-1.0.4/bin/mpiCC ; -# -# If your MPI implementation does not have a wrapper compiler, or the MPI -# auto-detection code does not work with your MPI's wrapper compiler, -# you can pass MPI-related options explicitly via the second parameter to the -# mpi module: -# -# using mpi : : lammpio lammpi++ -# mpi lam -# dl ; -# -# To see the results of MPI auto-detection, pass "--debug-configuration" on -# the bjam command line. -# -# The (optional) fourth argument configures Boost.MPI for running -# regression tests. These parameters specify the executable used to -# launch jobs (default: "mpirun") followed by any necessary arguments -# to this to run tests and tell the program to expect the number of -# processors to follow (default: "-np"). With the default parameters, -# for instance, the test harness will execute, e.g., -# -# mpirun -np 4 all_gather_test -# -# ==== Linking Against the MPI Libraries === -# -# To link against the MPI libraries, import the "mpi" module and add the -# following requirement to your target: -# -# /mpi//mpi -# -# Since MPI support is not always available, you should check -# "mpi.configured" before trying to link against the MPI libraries. - -import "class" : new ; -import common ; -import feature : feature ; -import generators ; -import os ; -import project ; -import property ; -import testing ; -import toolset ; -import type ; -import path ; - -# Make this module a project -project.initialize $(__name__) ; -project mpi ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Assuming the first part of the command line is the given prefix -# followed by some non-empty value, remove the first argument. Returns -# either nothing (if there was no prefix or no value) or a pair -# -# value rest-of-cmdline -# -# This is a subroutine of cmdline_to_features -rule add_feature ( prefix name cmdline ) -{ - local match = [ MATCH "^$(prefix)([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - - # If there was no value associated with the prefix, abort - if ! $(match) { - return ; - } - - local value = $(match[1]) ; - - if [ MATCH " +" : $(value) ] { - value = "\"$(value)\"" ; - } - - return "<$(name)>$(value)" $(match[2]) ; -} - -# Strip any end-of-line characters off the given string and return the -# result. -rule strip-eol ( string ) -{ - local match = [ MATCH "^(([A-Za-z0-9~`\.!@#$%^&*()_+={};:'\",.<>/?\\| -]|[|])*).*$" : $(string) ] ; - - if $(match) - { - return $(match[1]) ; - } - else - { - return $(string) ; - } -} - -# Split a command-line into a set of features. Certain kinds of -# compiler flags are recognized (e.g., -I, -D, -L, -l) and replaced -# with their Boost.Build equivalents (e.g., , , -# , ). All other arguments are introduced -# using the features in the unknown-features parameter, because we -# don't know how to deal with them. For instance, if your compile and -# correct. The incoming command line should be a string starting with -# an executable (e.g., g++ -I/include/path") and may contain any -# number of command-line arguments thereafter. The result is a list of -# features corresponding to the given command line, ignoring the -# executable. -rule cmdline_to_features ( cmdline : unknown-features ? ) -{ - local executable ; - local features ; - local otherflags ; - local result ; - - unknown-features ?= ; - - # Pull the executable out of the command line. At this point, the - # executable is just thrown away. - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - executable = $(match[1]) ; - cmdline = $(match[2]) ; - - # List the prefix/feature pairs that we will be able to transform. - # Every kind of parameter not mentioned here will be placed in both - # cxxflags and linkflags, because we don't know where they should go. - local feature_kinds-D = "define" ; - local feature_kinds-I = "include" ; - local feature_kinds-L = "library-path" ; - local feature_kinds-l = "find-shared-library" ; - - while $(cmdline) { - - # Check for one of the feature prefixes we know about. If we - # find one (and the associated value is nonempty), convert it - # into a feature. - local match = [ MATCH "^(-.)(.*)" : $(cmdline) ] ; - local matched ; - if $(match) && $(match[2]) { - local prefix = $(match[1]) ; - if $(feature_kinds$(prefix)) { - local name = $(feature_kinds$(prefix)) ; - local add = [ add_feature $(prefix) $(name) $(cmdline) ] ; - - if $(add) { - - if $(add[1]) = pthread - { - # Uhm. It's not really nice that this MPI implementation - # uses -lpthread as opposed to -pthread. We do want to - # set multi, instead of -lpthread. - result += "multi" ; - MPI_EXTRA_REQUIREMENTS += "multi" ; - } - else - { - result += $(add[1]) ; - } - - cmdline = $(add[2]) ; - matched = yes ; - } - } - } - - # If we haven't matched a feature prefix, just grab the command-line - # argument itself. If we can map this argument to a feature - # (e.g., -pthread -> multi), then do so; otherwise, - # and add it to the list of "other" flags that we don't - # understand. - if ! $(matched) { - match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - local value = $(match[1]) ; - cmdline = $(match[2]) ; - - # Check for multithreading support - if $(value) = "-pthread" || $(value) = "-pthreads" - { - result += "multi" ; - - # DPG: This is a hack intended to work around a BBv2 bug where - # requirements propagated from libraries are not checked for - # conflicts when BBv2 determines which "common" properties to - # apply to a target. In our case, the single property - # gets propagated from the common properties to Boost.MPI - # targets, even though multi is in the usage - # requirements of /mpi//mpi. - MPI_EXTRA_REQUIREMENTS += "multi" ; - } - else if [ MATCH "(.*[a-zA-Z0-9<>?-].*)" : $(value) ] { - otherflags += $(value) ; - } - } - } - - # If there are other flags that we don't understand, add them to the - # result as both and - if $(otherflags) { - for unknown in $(unknown-features) - { - result += "$(unknown)$(otherflags:J= )" ; - } - } - - return $(result) ; -} - -# Determine if it is safe to execute the given shell command by trying -# to execute it and determining whether the exit code is zero or -# not. Returns true for an exit code of zero, false otherwise. -local rule safe-shell-command ( cmdline ) -{ - local result = [ SHELL "$(cmdline) > /dev/null 2>/dev/null; if [ "$?" -eq "0" ]; then echo SSCOK; fi" ] ; - return [ MATCH ".*(SSCOK).*" : $(result) ] ; -} - -# Initialize the MPI module. -rule init ( mpicxx ? : options * : mpirun-with-options * ) -{ - if ! $(options) && $(.debug-configuration) - { - ECHO "===============MPI Auto-configuration===============" ; - } - - if ! $(mpicxx) && [ os.on-windows ] - { - # Try to auto-configure to the Microsoft Compute Cluster Pack - local cluster_pack_path_native = "C:\\Program Files\\Microsoft Compute Cluster Pack" ; - local cluster_pack_path = [ path.make $(cluster_pack_path_native) ] ; - if [ GLOB $(cluster_pack_path_native)\\Include : mpi.h ] - { - if $(.debug-configuration) - { - ECHO "Found Microsoft Compute Cluster Pack: $(cluster_pack_path_native)" ; - } - - # Pick up either the 32-bit or 64-bit library, depending on which address - # model the user has selected. Default to 32-bit. - options = $(cluster_pack_path)/Include - 64:$(cluster_pack_path)/Lib/amd64 - $(cluster_pack_path)/Lib/i386 - msmpi - msvc:_SECURE_SCL=0 - ; - - # Setup the "mpirun" equivalent (mpiexec) - .mpirun = "\"$(cluster_pack_path_native)\\Bin\\mpiexec.exe"\" ; - .mpirun_flags = -n ; - } - else if $(.debug-configuration) - { - ECHO "Did not find Microsoft Compute Cluster Pack in $(cluster_pack_path_native)." ; - } - } - - if ! $(options) - { - # Try to auto-detect options based on the wrapper compiler - local command = [ common.get-invocation-command mpi : mpic++ : $(mpicxx) ] ; - - if ! $(mpicxx) && ! $(command) - { - # Try "mpiCC", which is used by MPICH - command = [ common.get-invocation-command mpi : mpiCC ] ; - } - - if ! $(mpicxx) && ! $(command) - { - # Try "mpicxx", which is used by OpenMPI and MPICH2 - command = [ common.get-invocation-command mpi : mpicxx ] ; - } - - local result ; - local compile_flags ; - local link_flags ; - - if ! $(command) - { - # Do nothing: we'll complain later - } - # OpenMPI and newer versions of LAM-MPI have -showme:compile and - # -showme:link. - else if [ safe-shell-command "$(command) -showme:compile" ] && - [ safe-shell-command "$(command) -showme:link" ] - { - if $(.debug-configuration) - { - ECHO "Found recent LAM-MPI or Open MPI wrapper compiler: $(command)" ; - } - - compile_flags = [ SHELL "$(command) -showme:compile" ] ; - link_flags = [ SHELL "$(command) -showme:link" ] ; - - # Prepend COMPILER as the executable name, to match the format of - # other compilation commands. - compile_flags = "COMPILER $(compile_flags)" ; - link_flags = "COMPILER $(link_flags)" ; - } - # Look for LAM-MPI's -showme - else if [ safe-shell-command "$(command) -showme" ] - { - if $(.debug-configuration) - { - ECHO "Found older LAM-MPI wrapper compiler: $(command)" ; - } - - result = [ SHELL "$(command) -showme" ] ; - } - # Look for MPICH - else if [ safe-shell-command "$(command) -show" ] - { - if $(.debug-configuration) - { - ECHO "Found MPICH wrapper compiler: $(command)" ; - } - compile_flags = [ SHELL "$(command) -compile_info" ] ; - link_flags = [ SHELL "$(command) -link_info" ] ; - } - # Sun HPC and Ibm POE - else if [ SHELL "$(command) -v 2>/dev/null" ] - { - compile_flags = [ SHELL "$(command) -c -v -xtarget=native64 2>/dev/null" ] ; - - local back = [ MATCH "--------------------(.*)" : $(compile_flags) ] ; - if $(back) - { - # Sun HPC - if $(.debug-configuration) - { - ECHO "Found Sun MPI wrapper compiler: $(command)" ; - } - - compile_flags = [ MATCH "(.*)--------------------" : $(back) ] ; - compile_flags = [ MATCH "(.*)-v" : $(compile_flags) ] ; - link_flags = [ SHELL "$(command) -v -xtarget=native64 2>/dev/null" ] ; - link_flags = [ MATCH "--------------------(.*)" : $(link_flags) ] ; - link_flags = [ MATCH "(.*)--------------------" : $(link_flags) ] ; - - # strip out -v from compile options - local front = [ MATCH "(.*)-v" : $(link_flags) ] ; - local back = [ MATCH "-v(.*)" : $(link_flags) ] ; - link_flags = "$(front) $(back)" ; - front = [ MATCH "(.*)-xtarget=native64" : $(link_flags) ] ; - back = [ MATCH "-xtarget=native64(.*)" : $(link_flags) ] ; - link_flags = "$(front) $(back)" ; - } - else - { - # Ibm POE - if $(.debug-configuration) - { - ECHO "Found IBM MPI wrapper compiler: $(command)" ; - } - - # - compile_flags = [ SHELL "$(command) -c -v 2>/dev/null" ] ; - compile_flags = [ MATCH "(.*)exec: export.*" : $(compile_flags) ] ; - local front = [ MATCH "(.*)-v" : $(compile_flags) ] ; - local back = [ MATCH "-v(.*)" : $(compile_flags) ] ; - compile_flags = "$(front) $(back)" ; - front = [ MATCH "(.*)-c" : $(compile_flags) ] ; - back = [ MATCH "-c(.*)" : $(compile_flags) ] ; - compile_flags = "$(front) $(back)" ; - link_flags = $(compile_flags) ; - - # get location of mpif.h from mpxlf - local f_flags = [ SHELL "mpxlf -v 2>/dev/null" ] ; - f_flags = [ MATCH "(.*)exec: export.*" : $(f_flags) ] ; - front = [ MATCH "(.*)-v" : $(f_flags) ] ; - back = [ MATCH "-v(.*)" : $(f_flags) ] ; - f_flags = "$(front) $(back)" ; - f_flags = [ MATCH "xlf_r(.*)" : $(f_flags) ] ; - f_flags = [ MATCH "-F:mpxlf_r(.*)" : $(f_flags) ] ; - compile_flags = [ strip-eol $(compile_flags) ] ; - compile_flags = "$(compile_flags) $(f_flags)" ; - } - } - - if $(result) || $(compile_flags) && $(link_flags) - { - if $(result) - { - result = [ strip-eol $(result) ] ; - options = [ cmdline_to_features $(result) ] ; - } - else - { - compile_flags = [ strip-eol $(compile_flags) ] ; - link_flags = [ strip-eol $(link_flags) ] ; - - # Separately process compilation and link features, then combine - # them at the end. - local compile_features = [ cmdline_to_features $(compile_flags) - : "" ] ; - local link_features = [ cmdline_to_features $(link_flags) - : "" ] ; - options = $(compile_features) $(link_features) ; - } - - # If requested, display MPI configuration information. - if $(.debug-configuration) - { - if $(result) - { - ECHO " Wrapper compiler command line: $(result)" ; - } - else - { - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" - : $(compile_flags) ] ; - ECHO "MPI compilation flags: $(match[2])" ; - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" - : $(link_flags) ] ; - ECHO "MPI link flags: $(match[2])" ; - } - } - } - else - { - if $(command) - { - ECHO "MPI auto-detection failed: unknown wrapper compiler $(command)" ; - ECHO "Please report this error to the Boost mailing list: http://www.boost.org" ; - } - else if $(mpicxx) - { - ECHO "MPI auto-detection failed: unable to find wrapper compiler $(mpicxx)" ; - } - else - { - ECHO "MPI auto-detection failed: unable to find wrapper compiler `mpic++' or `mpiCC'" ; - } - ECHO "You will need to manually configure MPI support." ; - } - - } - - # Find mpirun (or its equivalent) and its flags - if ! $(.mpirun) - { - .mpirun = - [ common.get-invocation-command mpi : mpirun : $(mpirun-with-options[1]) ] ; - .mpirun_flags = $(mpirun-with-options[2-]) ; - .mpirun_flags ?= -np ; - } - - if $(.debug-configuration) - { - if $(options) - { - echo "MPI build features: " ; - ECHO $(options) ; - } - - if $(.mpirun) - { - echo "MPI launcher: $(.mpirun) $(.mpirun_flags)" ; - } - - ECHO "====================================================" ; - } - - if $(options) - { - .configured = true ; - - # Set up the "mpi" alias - alias mpi : : : : $(options) ; - } -} - -# States whether MPI has bee configured -rule configured ( ) -{ - return $(.configured) ; -} - -# Returs the "extra" requirements needed to build MPI. These requirements are -# part of the /mpi//mpi library target, but they need to be added to anything -# that uses MPI directly to work around bugs in BBv2's propagation of -# requirements. -rule extra-requirements ( ) -{ - return $(MPI_EXTRA_REQUIREMENTS) ; -} - -# Support for testing; borrowed from Python -type.register RUN_MPI_OUTPUT ; -type.register RUN_MPI : : TEST ; - -class mpi-test-generator : generator -{ - import property-set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - self.composing = true ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - # Generate an executable from the sources. This is the executable we will run. - local executable = - [ generators.construct $(project) $(name) : EXE : $(property-set) : $(sources) ] ; - - result = - [ construct-result $(executable[2-]) : $(project) $(name)-run : $(property-set) ] ; - } -} - -# Use mpi-test-generator to generate MPI tests from sources -generators.register - [ new mpi-test-generator mpi.capture-output : : RUN_MPI_OUTPUT ] ; - -generators.register-standard testing.expect-success - : RUN_MPI_OUTPUT : RUN_MPI ; - -# The number of processes to spawn when executing an MPI test. -feature mpi:processes : : free incidental ; - -# The flag settings on testing.capture-output do not -# apply to mpi.capture output at the moment. -# Redo this explicitly. -toolset.flags mpi.capture-output ARGS ; -rule capture-output ( target : sources * : properties * ) -{ - # Use the standard capture-output rule to run the tests - testing.capture-output $(target) : $(sources[1]) : $(properties) ; - - # Determine the number of processes we should run on. - local num_processes = [ property.select : $(properties) ] ; - num_processes = $(num_processes:G=) ; - - # serialize the MPI tests to avoid overloading systems - JAM_SEMAPHORE on $(target) = mpi-run-semaphore ; - - # We launch MPI processes using the "mpirun" equivalent specified by the user. - LAUNCHER on $(target) = - [ on $(target) return $(.mpirun) $(.mpirun_flags) $(num_processes) ] ; -} - -# Creates a set of test cases to be run through the MPI launcher. The name, sources, -# and requirements are the same as for any other test generator. However, schedule is -# a list of numbers, which indicates how many processes each test run will use. For -# example, passing 1 2 7 will run the test with 1 process, then 2 processes, then 7 -# 7 processes. The name provided is just the base name: the actual tests will be -# the name followed by a hypen, then the number of processes. -rule mpi-test ( name : sources * : requirements * : schedule * ) -{ - sources ?= $(name).cpp ; - schedule ?= 1 2 3 4 7 8 13 17 ; - - local result ; - for processes in $(schedule) - { - result += [ testing.make-test - run-mpi : $(sources) /boost/mpi//boost_mpi - : $(requirements) msvc:static $(processes) : $(name)-$(processes) ] ; - } - return $(result) ; -} diff --git a/jam-files/boost-build/tools/msvc-config.jam b/jam-files/boost-build/tools/msvc-config.jam deleted file mode 100644 index 6c71e3b0..00000000 --- a/jam-files/boost-build/tools/msvc-config.jam +++ /dev/null @@ -1,12 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for VisualStudio toolset. To use, just import this module. - -import toolset : using ; - -ECHO "warning: msvc-config.jam is deprecated. Use 'using msvc : all ;' instead." ; - -using msvc : all ; - diff --git a/jam-files/boost-build/tools/msvc.jam b/jam-files/boost-build/tools/msvc.jam deleted file mode 100644 index e33a66d2..00000000 --- a/jam-files/boost-build/tools/msvc.jam +++ /dev/null @@ -1,1392 +0,0 @@ -# Copyright (c) 2003 David Abrahams. -# Copyright (c) 2005 Vladimir Prus. -# Copyright (c) 2005 Alexey Pakhunov. -# Copyright (c) 2006 Bojan Resnik. -# Copyright (c) 2006 Ilya Sokolov. -# Copyright (c) 2007 Rene Rivera -# Copyright (c) 2008 Jurko Gospodnetic -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -################################################################################ -# -# MSVC Boost Build toolset module. -# -------------------------------- -# -# All toolset versions need to have their location either auto-detected or -# explicitly specified except for the special 'default' version that expects the -# environment to find the needed tools or report an error. -# -################################################################################ - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import mc ; -import midl ; -import os ; -import path ; -import pch ; -import property ; -import rc ; -import toolset ; -import type ; - - -type.register MANIFEST : manifest ; -feature.feature embed-manifest : on off : incidental propagated ; - -type.register PDB : pdb ; - -################################################################################ -# -# Public rules. -# -################################################################################ - -# Initialize a specific toolset version configuration. As the result, path to -# compiler and, possible, program names are set up, and will be used when that -# version of compiler is requested. For example, you might have: -# -# using msvc : 6.5 : cl.exe ; -# using msvc : 7.0 : Y:/foo/bar/cl.exe ; -# -# The version parameter may be ommited: -# -# using msvc : : Z:/foo/bar/cl.exe ; -# -# The following keywords have special meanings when specified as versions: -# - all - all detected but not yet used versions will be marked as used -# with their default options. -# - default - this is an equivalent to an empty version. -# -# Depending on a supplied version, detected configurations and presence 'cl.exe' -# in the path different results may be achieved. The following table describes -# the possible scenarios: -# -# Nothing "x.y" -# Passed Nothing "x.y" detected, detected, -# version detected detected cl.exe in path cl.exe in path -# -# default Error Use "x.y" Create "default" Use "x.y" -# all None Use all None Use all -# x.y - Use "x.y" - Use "x.y" -# a.b Error Error Create "a.b" Create "a.b" -# -# "x.y" - refers to a detected version; -# "a.b" - refers to an undetected version. -# -# FIXME: Currently the command parameter and the property parameter -# seem to overlap in duties. Remove this duplication. This seems to be related -# to why someone started preparing to replace init with configure rules. -# -rule init ( - # The msvc version being configured. When omitted the tools invoked when no - # explicit version is given will be configured. - version ? - - # The command used to invoke the compiler. If not specified: - # - if version is given, default location for that version will be - # searched - # - # - if version is not given, default locations for MSVC 9.0, 8.0, 7.1, 7.0 - # and 6.* will be searched - # - # - if compiler is not found in the default locations, PATH will be - # searched. - : command * - - # Options may include: - # - # All options shared by multiple toolset types as handled by the - # common.handle-options() rule, e.g. , , , - # & . - # - # - # - # - # - # - # - # Exact tool names to be used by this msvc toolset configuration. - # - # - # Command through which to pipe the output of running the compiler. - # For example to pass the output to STLfilt. - # - # - # Global setup command to invoke before running any of the msvc tools. - # It will be passed additional option parameters depending on the actual - # target platform. - # - # - # - # - # Platform specific setup command to invoke before running any of the - # msvc tools used when builing a target for a specific platform, e.g. - # when building a 32 or 64 bit executable. - : options * -) -{ - if $(command) - { - options += $(command) ; - } - configure $(version) : $(options) ; -} - - -# 'configure' is a newer version of 'init'. The parameter 'command' is passed as -# a part of the 'options' list. See the 'init' rule comment for more detailed -# information. -# -rule configure ( version ? : options * ) -{ - switch $(version) - { - case "all" : - if $(options) - { - errors.error "MSVC toolset configuration: options should be" - "empty when '$(version)' is specified." ; - } - - # Configure (i.e. mark as used) all registered versions. - local all-versions = [ $(.versions).all ] ; - if ! $(all-versions) - { - if $(.debug-configuration) - { - ECHO "notice: [msvc-cfg] Asked to configure all registered" - "msvc toolset versions when there are none currently" - "registered." ; - } - } - else - { - for local v in $(all-versions) - { - # Note that there is no need to skip already configured - # versions here as this will request configure-really rule - # to configure the version using default options which will - # in turn cause it to simply do nothing in case the version - # has already been configured. - configure-really $(v) ; - } - } - - case "default" : - configure-really : $(options) ; - - case * : - configure-really $(version) : $(options) ; - } -} - - -# Sets up flag definitions dependent on the compiler version used. -# - 'version' is the version of compiler in N.M format. -# - 'conditions' is the property set to be used as flag conditions. -# - 'toolset' is the toolset for which flag settings are to be defined. -# This makes the rule reusable for other msvc-option-compatible compilers. -# -rule configure-version-specific ( toolset : version : conditions ) -{ - toolset.push-checking-for-flags-module unchecked ; - # Starting with versions 7.0, the msvc compiler have the /Zc:forScope and - # /Zc:wchar_t options that improve C++ standard conformance, but those - # options are off by default. If we are sure that the msvc version is at - # 7.*, add those options explicitly. We can be sure either if user specified - # version 7.* explicitly or if we auto-detected the version ourselves. - if ! [ MATCH ^(6\\.) : $(version) ] - { - toolset.flags $(toolset).compile CFLAGS $(conditions) : /Zc:forScope /Zc:wchar_t ; - toolset.flags $(toolset).compile.c++ C++FLAGS $(conditions) : /wd4675 ; - - # Explicitly disable the 'function is deprecated' warning. Some msvc - # versions have a bug, causing them to emit the deprecation warning even - # with /W0. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off : /wd4996 ; - - if [ MATCH ^([78]\\.) : $(version) ] - { - # 64-bit compatibility warning deprecated since 9.0, see - # http://msdn.microsoft.com/en-us/library/yt4xw8fh.aspx - toolset.flags $(toolset).compile CFLAGS $(conditions)/all : /Wp64 ; - } - } - - # - # Processor-specific optimization. - # - - if [ MATCH ^([67]) : $(version) ] - { - # 8.0 deprecates some of the options. - toolset.flags $(toolset).compile CFLAGS $(conditions)/speed $(conditions)/space : /Ogiy /Gs ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/speed : /Ot ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/space : /Os ; - - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/ : /GB ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/i386 : /G3 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/i486 : /G4 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g5) : /G5 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g6) : /G6 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g7) : /G7 ; - - # Improve floating-point accuracy. Otherwise, some of C++ Boost's "math" - # tests will fail. - toolset.flags $(toolset).compile CFLAGS $(conditions) : /Op ; - - # 7.1 and below have single-threaded static RTL. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off/static/single : /ML ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/on/static/single : /MLd ; - } - else - { - # 8.0 and above adds some more options. - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/ : /favor:blend ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/$(.cpu-type-em64t) : /favor:EM64T ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/$(.cpu-type-amd64) : /favor:AMD64 ; - - # 8.0 and above only has multi-threaded static RTL. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off/static/single : /MT ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/on/static/single : /MTd ; - - # Specify target machine type so the linker will not need to guess. - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-amd64) : /MACHINE:X64 ; - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-i386) : /MACHINE:X86 ; - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-ia64) : /MACHINE:IA64 ; - - # Make sure that manifest will be generated even if there is no - # dependencies to put there. - toolset.flags $(toolset).link LINKFLAGS $(conditions)/off : /MANIFEST ; - } - toolset.pop-checking-for-flags-module ; -} - - -# Registers this toolset including all of its flags, features & generators. Does -# nothing on repeated calls. -# -rule register-toolset ( ) -{ - if ! msvc in [ feature.values toolset ] - { - register-toolset-really ; - } -} - - -# Declare action for creating static libraries. If library exists, remove it -# before adding files. See -# http://article.gmane.org/gmane.comp.lib.boost.build/4241 for rationale. -if [ os.name ] in NT -{ - # The 'DEL' command would issue a message to stdout if the file does not - # exist, so need a check. - actions archive - { - if exist "$(<[1])" DEL "$(<[1])" - $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } -} -else -{ - actions archive - { - $(.RM) "$(<[1])" - $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } -} - - -# For the assembler the following options are turned on by default: -# -# -Zp4 align structures to 4 bytes -# -Cp preserve case of user identifiers -# -Cx preserve case in publics, externs -# -actions compile.asm -{ - $(.ASM) -c -Zp4 -Cp -Cx -D$(DEFINES) $(ASMFLAGS) $(USER_ASMFLAGS) -Fo "$(<:W)" "$(>:W)" -} - - -rule compile.c ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets) : -TC ; - compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c.preprocess ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets) : -TC ; - preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c.pch ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets[1]) : -TC ; - get-rspline $(targets[2]) : -TC ; - local pch-source = [ on $(<) return $(PCH_SOURCE) ] ; - if $(pch-source) - { - DEPENDS $(<) : $(pch-source) ; - compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ; - } - else - { - compile-c-c++-pch $(targets) : $(sources) ; - } -} - -toolset.flags msvc YLOPTION : "-Yl" ; - -# Action for running the C/C++ compiler without using precompiled headers. -# -# WARNING: Synchronize any changes this in action with intel-win -# -# Notes regarding PDB generation, for when we use on/database -# -# 1. PDB_CFLAG is only set for on/database, ensuring that the /Fd flag is dropped if PDB_CFLAG is empty -# -# 2. When compiling executables's source files, PDB_NAME is set on a per-source file basis by rule compile-c-c++. -# The linker will pull these into the executable's PDB -# -# 3. When compiling library's source files, PDB_NAME is updated to .pdb for each source file by rule archive, -# as in this case the compiler must be used to create a single PDB for our library. -# -actions compile-c-c++ bind PDB_NAME -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -Fo"$(<[1]:W)" $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" $(.CC.FILTER) -} - -actions preprocess-c-c++ bind PDB_NAME -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -E $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" >"$(<[1]:W)" -} - -rule compile-c-c++ ( targets + : sources * ) -{ - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ; - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ; - PDB_NAME on $(<) = $(<:S=.pdb) ; -} - -rule preprocess-c-c++ ( targets + : sources * ) -{ - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ; - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ; - PDB_NAME on $(<) = $(<:S=.pdb) ; -} - -# Action for running the C/C++ compiler using precompiled headers. In addition -# to whatever else it needs to compile, this action also adds a temporary source -# .cpp file used to compile the precompiled headers themselves. -# -# The global .escaped-double-quote variable is used to avoid messing up Emacs -# syntax highlighting in the messy N-quoted code below. -actions compile-c-c++-pch -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" "@($(<[1]:W).cpp:E=#include $(.escaped-double-quote)$(>[1]:D=)$(.escaped-double-quote)$(.nl))" $(.CC.FILTER) -} - - -# Action for running the C/C++ compiler using precompiled headers. An already -# built source file for compiling the precompiled headers is expected to be -# given as one of the source parameters. -actions compile-c-c++-pch-s -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" $(.CC.FILTER) -} - - -rule compile.c++ ( targets + : sources * : properties * ) -{ - get-rspline $(targets) : -TP ; - compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - -rule compile.c++.preprocess ( targets + : sources * : properties * ) -{ - get-rspline $(targets) : -TP ; - preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c++.pch ( targets + : sources * : properties * ) -{ - get-rspline $(targets[1]) : -TP ; - get-rspline $(targets[2]) : -TP ; - local pch-source = [ on $(<) return $(PCH_SOURCE) ] ; - if $(pch-source) - { - DEPENDS $(<) : $(pch-source) ; - compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ; - } - else - { - compile-c-c++-pch $(targets) : $(sources) ; - } -} - - -# See midl.jam for details. -# -actions compile.idl -{ - $(.IDL) /nologo @"@($(<[1]:W).rsp:E=$(.nl)"$(>:W)" $(.nl)-D$(DEFINES) $(.nl)"-I$(INCLUDES:W)" $(.nl)-U$(UNDEFS) $(.nl)$(MIDLFLAGS) $(.nl)/tlb "$(<[1]:W)" $(.nl)/h "$(<[2]:W)" $(.nl)/iid "$(<[3]:W)" $(.nl)/proxy "$(<[4]:W)" $(.nl)/dlldata "$(<[5]:W)")" - $(.TOUCH_FILE) "$(<[4]:W)" - $(.TOUCH_FILE) "$(<[5]:W)" -} - - -actions compile.mc -{ - $(.MC) $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)" -} - - -actions compile.rc -{ - $(.RC) -l 0x409 -U$(UNDEFS) -D$(DEFINES) -I"$(INCLUDES:W)" -fo "$(<:W)" "$(>:W)" -} - - -rule link ( targets + : sources * : properties * ) -{ - if on in $(properties) - { - msvc.manifest $(targets) : $(sources) : $(properties) ; - } -} - -rule link.dll ( targets + : sources * : properties * ) -{ - DEPENDS $(<) : [ on $(<) return $(DEF_FILE) ] ; - if on in $(properties) - { - msvc.manifest.dll $(targets) : $(sources) : $(properties) ; - } -} - -# Incremental linking a DLL causes no end of problems: if the actual exports do -# not change, the import .lib file is never updated. Therefore, the .lib is -# always out-of-date and gets rebuilt every time. I am not sure that incremental -# linking is such a great idea in general, but in this case I am sure we do not -# want it. - -# Windows manifest is a new way to specify dependencies on managed DotNet -# assemblies and Windows native DLLs. The manifests are embedded as resources -# and are useful in any PE target (both DLL and EXE). - -if [ os.name ] in NT -{ - actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% - } - - actions manifest - { - if exist "$(<[1]).manifest" ( - $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);1" - ) - } - - actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% - } - - actions manifest.dll - { - if exist "$(<[1]).manifest" ( - $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);2" - ) - } -} -else -{ - actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } - - actions manifest - { - if test -e "$(<[1]).manifest"; then - $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);1" - fi - } - - actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } - - actions manifest.dll - { - if test -e "$(<[1]).manifest"; then - $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);2" - fi - } -} - -# this rule sets up the pdb file that will be used when generating static -# libraries and the debug-store option is database, so that the compiler -# puts all debug info into a single .pdb file named after the library -# -# Poking at source targets this way is probably not clean, but it's the -# easiest approach. -rule archive ( targets + : sources * : properties * ) -{ - PDB_NAME on $(>) = $(<:S=.pdb) ; -} - -################################################################################ -# -# Classes. -# -################################################################################ - -class msvc-pch-generator : pch-generator -{ - import property-set ; - - rule run-pch ( project name ? : property-set : sources * ) - { - # Searching for the header and source file in the sources. - local pch-header ; - local pch-source ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] H ] - { - pch-header = $(s) ; - } - else if - [ type.is-derived [ $(s).type ] CPP ] || - [ type.is-derived [ $(s).type ] C ] - { - pch-source = $(s) ; - } - } - - if ! $(pch-header) - { - errors.user-error "can not build pch without pch-header" ; - } - - # If we do not have the PCH source - that is fine. We will just create a - # temporary .cpp file in the action. - - local generated = [ generator.run $(project) $(name) - : [ property-set.create - # Passing of is a dirty trick, needed because - # non-composing generators with multiple inputs are subtly - # broken. For more detailed information see: - # https://zigzag.cs.msu.su:7813/boost.build/ticket/111 - $(pch-source) - [ $(property-set).raw ] ] - : $(pch-header) ] ; - - local pch-file ; - for local g in $(generated) - { - if [ type.is-derived [ $(g).type ] PCH ] - { - pch-file = $(g) ; - } - } - - return [ property-set.create $(pch-header) - $(pch-file) ] $(generated) ; - } -} - - -################################################################################ -# -# Local rules. -# -################################################################################ - -# Detects versions listed as '.known-versions' by checking registry information, -# environment variables & default paths. Supports both native Windows and -# Cygwin. -# -local rule auto-detect-toolset-versions ( ) -{ - if [ os.name ] in NT CYGWIN - { - # Get installation paths from the registry. - for local i in $(.known-versions) - { - if $(.version-$(i)-reg) - { - local vc-path ; - for local x in "" "Wow6432Node\\" - { - vc-path += [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\"$(x)"\\Microsoft\\"$(.version-$(i)-reg) - : "ProductDir" ] ; - } - - if $(vc-path) - { - vc-path = [ path.join [ path.make-NT $(vc-path[1]) ] "bin" ] ; - register-configuration $(i) : [ path.native $(vc-path[1]) ] ; - } - } - } - } - - # Check environment and default installation paths. - for local i in $(.known-versions) - { - if ! $(i) in [ $(.versions).all ] - { - register-configuration $(i) : [ default-path $(i) ] ; - } - } -} - - -# Worker rule for toolset version configuration. Takes an explicit version id or -# nothing in case it should configure the default toolset version (the first -# registered one or a new 'default' one in case no toolset versions have been -# registered yet). -# -local rule configure-really ( version ? : options * ) -{ - local v = $(version) ; - - # Decide what the 'default' version is. - if ! $(v) - { - # Take the first registered (i.e. auto-detected) version. - version = [ $(.versions).all ] ; - version = $(version[1]) ; - v = $(version) ; - - # Note: 'version' can still be empty at this point if no versions have - # been auto-detected. - version ?= "default" ; - } - - # Version alias -> real version number. - if $(.version-alias-$(version)) - { - version = $(.version-alias-$(version)) ; - } - - # Check whether the selected configuration is already in use. - if $(version) in [ $(.versions).used ] - { - # Allow multiple 'toolset.using' calls for the same configuration if the - # identical sets of options are used. - if $(options) && ( $(options) != [ $(.versions).get $(version) : options ] ) - { - errors.error "MSVC toolset configuration: Toolset version" - "'$(version)' already configured." ; - } - } - else - { - # Register a new configuration. - $(.versions).register $(version) ; - - # Add user-supplied to auto-detected options. - options = [ $(.versions).get $(version) : options ] $(options) ; - - # Mark the configuration as 'used'. - $(.versions).use $(version) ; - - # Generate conditions and save them. - local conditions = [ common.check-init-parameters msvc : version $(v) ] - ; - - $(.versions).set $(version) : conditions : $(conditions) ; - - local command = [ feature.get-values : $(options) ] ; - - # If version is specified, we try to search first in default paths, and - # only then in PATH. - command = [ common.get-invocation-command msvc : cl.exe : $(command) : - [ default-paths $(version) ] : $(version) ] ; - - common.handle-options msvc : $(conditions) : $(command) : $(options) ; - - if ! $(version) - { - # Even if version is not explicitly specified, try to detect the - # version from the path. - # FIXME: We currently detect both Microsoft Visual Studio 9.0 and - # 9.0express as 9.0 here. - if [ MATCH "(Microsoft Visual Studio 10)" : $(command) ] - { - version = 10.0 ; - } - else if [ MATCH "(Microsoft Visual Studio 9)" : $(command) ] - { - version = 9.0 ; - } - else if [ MATCH "(Microsoft Visual Studio 8)" : $(command) ] - { - version = 8.0 ; - } - else if [ MATCH "(NET 2003[\/\\]VC7)" : $(command) ] - { - version = 7.1 ; - } - else if [ MATCH "(Microsoft Visual C\\+\\+ Toolkit 2003)" : - $(command) ] - { - version = 7.1toolkit ; - } - else if [ MATCH "(.NET[\/\\]VC7)" : $(command) ] - { - version = 7.0 ; - } - else - { - version = 6.0 ; - } - } - - # Generate and register setup command. - - local below-8.0 = [ MATCH ^([67]\\.) : $(version) ] ; - - local cpu = i386 amd64 ia64 ; - if $(below-8.0) - { - cpu = i386 ; - } - - local setup-amd64 ; - local setup-i386 ; - local setup-ia64 ; - - if $(command) - { - # TODO: Note that if we specify a non-existant toolset version then - # this rule may find and use a corresponding compiler executable - # belonging to an incorrect toolset version. For example, if you - # have only MSVC 7.1 installed, have its executable on the path and - # specify you want Boost Build to use MSVC 9.0, then you want Boost - # Build to report an error but this may cause it to silently use the - # MSVC 7.1 compiler even though it thinks it is using the msvc-9.0 - # toolset version. - command = [ common.get-absolute-tool-path $(command[-1]) ] ; - } - - if $(command) - { - local parent = [ path.make $(command) ] ; - parent = [ path.parent $(parent) ] ; - parent = [ path.native $(parent) ] ; - - # Setup will be used if the command name has been specified. If - # setup is not specified explicitly then a default setup script will - # be used instead. Setup scripts may be global or arhitecture/ - # /platform/cpu specific. Setup options are used only in case of - # global setup scripts. - - # Default setup scripts provided with different VC distributions: - # - # VC 7.1 had only the vcvars32.bat script specific to 32 bit i386 - # builds. It was located in the bin folder for the regular version - # and in the root folder for the free VC 7.1 tools. - # - # Later 8.0 & 9.0 versions introduce separate platform specific - # vcvars*.bat scripts (e.g. 32 bit, 64 bit AMD or 64 bit Itanium) - # located in or under the bin folder. Most also include a global - # vcvarsall.bat helper script located in the root folder which runs - # one of the aforementioned vcvars*.bat scripts based on the options - # passed to it. So far only the version coming with some PlatformSDK - # distributions does not include this top level script but to - # support those we need to fall back to using the worker scripts - # directly in case the top level script can not be found. - - local global-setup = [ feature.get-values : $(options) ] ; - global-setup = $(global-setup[1]) ; - if ! $(below-8.0) - { - global-setup ?= [ locate-default-setup $(command) : $(parent) : - vcvarsall.bat ] ; - } - - local default-setup-amd64 = vcvarsx86_amd64.bat ; - local default-setup-i386 = vcvars32.bat ; - local default-setup-ia64 = vcvarsx86_ia64.bat ; - - # http://msdn2.microsoft.com/en-us/library/x4d2c09s(VS.80).aspx and - # http://msdn2.microsoft.com/en-us/library/x4d2c09s(vs.90).aspx - # mention an x86_IPF option, that seems to be a documentation bug - # and x86_ia64 is the correct option. - local default-global-setup-options-amd64 = x86_amd64 ; - local default-global-setup-options-i386 = x86 ; - local default-global-setup-options-ia64 = x86_ia64 ; - - # When using 64-bit Windows, and targeting 64-bit, it is possible to - # use a native 64-bit compiler, selected by the "amd64" & "ia64" - # parameters to vcvarsall.bat. There are two variables we can use -- - # PROCESSOR_ARCHITECTURE and PROCESSOR_IDENTIFIER. The first is - # 'x86' when running 32-bit Windows, no matter which processor is - # used, and 'AMD64' on 64-bit windows on x86 (either AMD64 or EM64T) - # Windows. - # - if [ MATCH ^(AMD64) : [ os.environ PROCESSOR_ARCHITECTURE ] ] - { - default-global-setup-options-amd64 = amd64 ; - } - # TODO: The same 'native compiler usage' should be implemented for - # the Itanium platform by using the "ia64" parameter. For this - # though we need someone with access to this platform who can find - # out how to correctly detect this case. - else if $(somehow-detect-the-itanium-platform) - { - default-global-setup-options-ia64 = ia64 ; - } - - local setup-prefix = "call " ; - local setup-suffix = " >nul"$(.nl) ; - if ! [ os.name ] in NT - { - setup-prefix = "cmd.exe /S /C call " ; - setup-suffix = " \">nul\" \"&&\" " ; - } - - for local c in $(cpu) - { - local setup-options ; - - setup-$(c) = [ feature.get-values : $(options) ] ; - - if ! $(setup-$(c))-is-not-empty - { - if $(global-setup)-is-not-empty - { - setup-$(c) = $(global-setup) ; - - # If needed we can easily add using configuration flags - # here for overriding which options get passed to the - # global setup command for which target platform: - # setup-options = [ feature.get-values : $(options) ] ; - - setup-options ?= $(default-global-setup-options-$(c)) ; - } - else - { - setup-$(c) = [ locate-default-setup $(command) : $(parent) : $(default-setup-$(c)) ] ; - } - } - - # Cygwin to Windows path translation. - setup-$(c) = "\""$(setup-$(c):W)"\"" ; - - # Append setup options to the setup name and add the final setup - # prefix & suffix. - setup-options ?= "" ; - setup-$(c) = $(setup-prefix)$(setup-$(c):J=" ")" "$(setup-options:J=" ")$(setup-suffix) ; - } - } - - # Get tool names (if any) and finish setup. - - compiler = [ feature.get-values : $(options) ] ; - compiler ?= cl ; - - linker = [ feature.get-values : $(options) ] ; - linker ?= link ; - - resource-compiler = [ feature.get-values : $(options) ] ; - resource-compiler ?= rc ; - - # Turn on some options for i386 assembler - # -coff generate COFF format object file (compatible with cl.exe output) - local default-assembler-amd64 = ml64 ; - local default-assembler-i386 = "ml -coff" ; - local default-assembler-ia64 = ias ; - - assembler = [ feature.get-values : $(options) ] ; - - idl-compiler = [ feature.get-values : $(options) ] ; - idl-compiler ?= midl ; - - mc-compiler = [ feature.get-values : $(options) ] ; - mc-compiler ?= mc ; - - manifest-tool = [ feature.get-values : $(options) ] ; - manifest-tool ?= mt ; - - local cc-filter = [ feature.get-values : $(options) ] ; - - for local c in $(cpu) - { - # Setup script is not required in some configurations. - setup-$(c) ?= "" ; - - local cpu-conditions = $(conditions)/$(.cpu-arch-$(c)) ; - - if $(.debug-configuration) - { - for local cpu-condition in $(cpu-conditions) - { - ECHO "notice: [msvc-cfg] condition: '$(cpu-condition)', setup: '$(setup-$(c))'" ; - } - } - - local cpu-assembler = $(assembler) ; - cpu-assembler ?= $(default-assembler-$(c)) ; - - toolset.flags msvc.compile .CC $(cpu-conditions) : $(setup-$(c))$(compiler) /Zm800 -nologo ; - toolset.flags msvc.compile .RC $(cpu-conditions) : $(setup-$(c))$(resource-compiler) ; - toolset.flags msvc.compile .ASM $(cpu-conditions) : $(setup-$(c))$(cpu-assembler) -nologo ; - toolset.flags msvc.link .LD $(cpu-conditions) : $(setup-$(c))$(linker) /NOLOGO /INCREMENTAL:NO ; - toolset.flags msvc.archive .LD $(cpu-conditions) : $(setup-$(c))$(linker) /lib /NOLOGO ; - toolset.flags msvc.compile .IDL $(cpu-conditions) : $(setup-$(c))$(idl-compiler) ; - toolset.flags msvc.compile .MC $(cpu-conditions) : $(setup-$(c))$(mc-compiler) ; - - toolset.flags msvc.link .MT $(cpu-conditions) : $(setup-$(c))$(manifest-tool) -nologo ; - - if $(cc-filter) - { - toolset.flags msvc .CC.FILTER $(cpu-conditions) : "|" $(cc-filter) ; - } - } - - # Set version-specific flags. - configure-version-specific msvc : $(version) : $(conditions) ; - } -} - - -# Returns the default installation path for the given version. -# -local rule default-path ( version ) -{ - # Use auto-detected path if possible. - local path = [ feature.get-values : [ $(.versions).get $(version) - : options ] ] ; - - if $(path) - { - path = $(path:D) ; - } - else - { - # Check environment. - if $(.version-$(version)-env) - { - local vc-path = [ os.environ $(.version-$(version)-env) ] ; - if $(vc-path) - { - vc-path = [ path.make $(vc-path) ] ; - vc-path = [ path.join $(vc-path) $(.version-$(version)-envpath) ] ; - vc-path = [ path.native $(vc-path) ] ; - - path = $(vc-path) ; - } - } - - # Check default path. - if ! $(path) && $(.version-$(version)-path) - { - path = [ path.native [ path.join $(.ProgramFiles) $(.version-$(version)-path) ] ] ; - } - } - - return $(path) ; -} - - -# Returns either the default installation path (if 'version' is not empty) or -# list of all known default paths (if no version is given) -# -local rule default-paths ( version ? ) -{ - local possible-paths ; - - if $(version) - { - possible-paths += [ default-path $(version) ] ; - } - else - { - for local i in $(.known-versions) - { - possible-paths += [ default-path $(i) ] ; - } - } - - return $(possible-paths) ; -} - - -rule get-rspline ( target : lang-opt ) -{ - CC_RSPLINE on $(target) = [ on $(target) return $(lang-opt) -U$(UNDEFS) - $(CFLAGS) $(C++FLAGS) $(OPTIONS) -c $(.nl)-D$(DEFINES) - $(.nl)\"-I$(INCLUDES:W)\" ] ; -} - -class msvc-linking-generator : linking-generator -{ - # Calls the base version. If necessary, also create a target for the - # manifest file.specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - rule generated-targets ( sources + : property-set : project name ? ) - { - local result = [ linking-generator.generated-targets $(sources) - : $(property-set) : $(project) $(name) ] ; - - if $(result) - { - local name-main = [ $(result[0]).name ] ; - local action = [ $(result[0]).action ] ; - - if [ $(property-set).get ] = "on" - { - # We force exact name on PDB. The reason is tagging -- the tag rule may - # reasonably special case some target types, like SHARED_LIB. The tag rule - # will not catch PDB, and it cannot even easily figure if PDB is paired with - # SHARED_LIB or EXE or something else. Because PDB always get the - # same name as the main target, with .pdb as extension, just force it. - local target = [ class.new file-target $(name-main:S=.pdb) exact : PDB : $(project) : $(action) ] ; - local registered-target = [ virtual-target.register $(target) ] ; - if $(target) != $(registered-target) - { - $(action).replace-targets $(target) : $(registered-target) ; - } - result += $(registered-target) ; - } - - if [ $(property-set).get ] = "off" - { - # Manifest is evil target. It has .manifest appened to the name of - # main target, including extension. E.g. a.exe.manifest. We use 'exact' - # name because to achieve this effect. - local target = [ class.new file-target $(name-main).manifest exact : MANIFEST : $(project) : $(action) ] ; - local registered-target = [ virtual-target.register $(target) ] ; - if $(target) != $(registered-target) - { - $(action).replace-targets $(target) : $(registered-target) ; - } - result += $(registered-target) ; - } - } - return $(result) ; - } -} - - - -# Unsafe worker rule for the register-toolset() rule. Must not be called -# multiple times. -# -local rule register-toolset-really ( ) -{ - feature.extend toolset : msvc ; - - # Intel and msvc supposedly have link-compatible objects. - feature.subfeature toolset msvc : vendor : intel : propagated optional ; - - # Inherit MIDL flags. - toolset.inherit-flags msvc : midl ; - - # Inherit MC flags. - toolset.inherit-flags msvc : mc ; - - # Dynamic runtime comes only in MT flavour. - toolset.add-requirements - msvc,shared:multi ; - - # Declare msvc toolset specific features. - { - feature.feature debug-store : object database : propagated ; - feature.feature pch-source : : dependency free ; - } - - # Declare generators. - { - # TODO: Is it possible to combine these? Make the generators - # non-composing so that they do not convert each source into a separate - # .rsp file. - generators.register [ new msvc-linking-generator - msvc.link : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : EXE : msvc ] ; - generators.register [ new msvc-linking-generator - msvc.link.dll : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : SHARED_LIB IMPORT_LIB : msvc ] ; - - generators.register-archiver msvc.archive : OBJ : STATIC_LIB : msvc ; - generators.register-c-compiler msvc.compile.c++ : CPP : OBJ : msvc ; - generators.register-c-compiler msvc.compile.c : C : OBJ : msvc ; - generators.register-c-compiler msvc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : msvc ; - generators.register-c-compiler msvc.compile.c.preprocess : C : PREPROCESSED_C : msvc ; - - # Using 'register-c-compiler' adds the build directory to INCLUDES. - generators.register-c-compiler msvc.compile.rc : RC : OBJ(%_res) : msvc ; - generators.override msvc.compile.rc : rc.compile.resource ; - generators.register-standard msvc.compile.asm : ASM : OBJ : msvc ; - - generators.register-c-compiler msvc.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) : msvc ; - generators.override msvc.compile.idl : midl.compile.idl ; - - generators.register-standard msvc.compile.mc : MC : H RC : msvc ; - generators.override msvc.compile.mc : mc.compile ; - - # Note: the 'H' source type will catch both '.h' and '.hpp' headers as - # the latter have their HPP type derived from H. The type of compilation - # is determined entirely by the destination type. - generators.register [ new msvc-pch-generator msvc.compile.c.pch : H : C_PCH OBJ : on msvc ] ; - generators.register [ new msvc-pch-generator msvc.compile.c++.pch : H : CPP_PCH OBJ : on msvc ] ; - - generators.override msvc.compile.c.pch : pch.default-c-pch-generator ; - generators.override msvc.compile.c++.pch : pch.default-cpp-pch-generator ; - } - - toolset.flags msvc.compile PCH_FILE on : ; - toolset.flags msvc.compile PCH_SOURCE on : ; - toolset.flags msvc.compile PCH_HEADER on : ; - - # - # Declare flags for compilation. - # - - toolset.flags msvc.compile CFLAGS speed : /O2 ; - toolset.flags msvc.compile CFLAGS space : /O1 ; - - toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/$(.cpu-type-itanium) : /G1 ; - toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/$(.cpu-type-itanium2) : /G2 ; - - toolset.flags msvc.compile CFLAGS on/object : /Z7 ; - toolset.flags msvc.compile CFLAGS on/database : /Zi ; - toolset.flags msvc.compile CFLAGS off : /Od ; - toolset.flags msvc.compile CFLAGS off : /Ob0 ; - toolset.flags msvc.compile CFLAGS on : /Ob1 ; - toolset.flags msvc.compile CFLAGS full : /Ob2 ; - - toolset.flags msvc.compile CFLAGS on : /W3 ; - toolset.flags msvc.compile CFLAGS off : /W0 ; - toolset.flags msvc.compile CFLAGS all : /W4 ; - toolset.flags msvc.compile CFLAGS on : /WX ; - - toolset.flags msvc.compile C++FLAGS on/off/off : /EHs ; - toolset.flags msvc.compile C++FLAGS on/off/on : /EHsc ; - toolset.flags msvc.compile C++FLAGS on/on/off : /EHa ; - toolset.flags msvc.compile C++FLAGS on/on/on : /EHac ; - - # By default 8.0 enables rtti support while prior versions disabled it. We - # simply enable or disable it explicitly so we do not have to depend on this - # default behaviour. - toolset.flags msvc.compile CFLAGS on : /GR ; - toolset.flags msvc.compile CFLAGS off : /GR- ; - toolset.flags msvc.compile CFLAGS off/shared : /MD ; - toolset.flags msvc.compile CFLAGS on/shared : /MDd ; - - toolset.flags msvc.compile CFLAGS off/static/multi : /MT ; - toolset.flags msvc.compile CFLAGS on/static/multi : /MTd ; - - toolset.flags msvc.compile OPTIONS : ; - toolset.flags msvc.compile.c++ OPTIONS : ; - - toolset.flags msvc.compile PDB_CFLAG on/database : /Fd ; - - toolset.flags msvc.compile DEFINES ; - toolset.flags msvc.compile UNDEFS ; - toolset.flags msvc.compile INCLUDES ; - - # Declare flags for the assembler. - toolset.flags msvc.compile.asm USER_ASMFLAGS ; - - toolset.flags msvc.compile.asm ASMFLAGS on : "/Zi /Zd" ; - - toolset.flags msvc.compile.asm ASMFLAGS on : /W3 ; - toolset.flags msvc.compile.asm ASMFLAGS off : /W0 ; - toolset.flags msvc.compile.asm ASMFLAGS all : /W4 ; - toolset.flags msvc.compile.asm ASMFLAGS on : /WX ; - - toolset.flags msvc.compile.asm DEFINES ; - - # Declare flags for linking. - { - toolset.flags msvc.link PDB_LINKFLAG on/database : /PDB: ; # not used yet - toolset.flags msvc.link LINKFLAGS on : /DEBUG ; - toolset.flags msvc.link DEF_FILE ; - - # The linker disables the default optimizations when using /DEBUG so we - # have to enable them manually for release builds with debug symbols. - toolset.flags msvc LINKFLAGS on/off : /OPT:REF,ICF ; - - toolset.flags msvc LINKFLAGS console : /subsystem:console ; - toolset.flags msvc LINKFLAGS gui : /subsystem:windows ; - toolset.flags msvc LINKFLAGS wince : /subsystem:windowsce ; - toolset.flags msvc LINKFLAGS native : /subsystem:native ; - toolset.flags msvc LINKFLAGS auto : /subsystem:posix ; - - toolset.flags msvc.link OPTIONS ; - toolset.flags msvc.link LINKPATH ; - - toolset.flags msvc.link FINDLIBS_ST ; - toolset.flags msvc.link FINDLIBS_SA ; - toolset.flags msvc.link LIBRARY_OPTION msvc : "" : unchecked ; - toolset.flags msvc.link LIBRARIES_MENTIONED_BY_FILE : ; - } - - toolset.flags msvc.archive AROPTIONS ; -} - - -# Locates the requested setup script under the given folder and returns its full -# path or nothing in case the script can not be found. In case multiple scripts -# are found only the first one is returned. -# -# TODO: There used to exist a code comment for the msvc.init rule stating that -# we do not correctly detect the location of the vcvars32.bat setup script for -# the free VC7.1 tools in case user explicitly provides a path. This should be -# tested or simply remove this whole comment in case this toolset version is no -# longer important. -# -local rule locate-default-setup ( command : parent : setup-name ) -{ - local result = [ GLOB $(command) $(parent) : $(setup-name) ] ; - if $(result[1]) - { - return $(result[1]) ; - } -} - - -# Validates given path, registers found configuration and prints debug -# information about it. -# -local rule register-configuration ( version : path ? ) -{ - if $(path) - { - local command = [ GLOB $(path) : cl.exe ] ; - - if $(command) - { - if $(.debug-configuration) - { - ECHO "notice: [msvc-cfg] msvc-$(version) detected, command: '$(command)'" ; - } - - $(.versions).register $(version) ; - $(.versions).set $(version) : options : $(command) ; - } - } -} - - -################################################################################ -# -# Startup code executed when loading this module. -# -################################################################################ - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Miscellaneous constants. -.RM = [ common.rm-command ] ; -.nl = " -" ; -.ProgramFiles = [ path.make [ common.get-program-files-dir ] ] ; -.escaped-double-quote = "\"" ; -.TOUCH_FILE = [ common.file-touch-command ] ; - -# List of all registered configurations. -.versions = [ new configurations ] ; - -# Supported CPU architectures. -.cpu-arch-i386 = - / - /32 - x86/ - x86/32 ; - -.cpu-arch-amd64 = - /64 - x86/64 ; - -.cpu-arch-ia64 = - ia64/ - ia64/64 ; - - -# Supported CPU types (only Itanium optimization options are supported from -# VC++ 2005 on). See -# http://msdn2.microsoft.com/en-us/library/h66s5s0e(vs.90).aspx for more -# detailed information. -.cpu-type-g5 = i586 pentium pentium-mmx ; -.cpu-type-g6 = i686 pentiumpro pentium2 pentium3 pentium3m pentium-m k6 - k6-2 k6-3 winchip-c6 winchip2 c3 c3-2 ; -.cpu-type-em64t = prescott nocona conroe conroe-xe conroe-l allendale mermon - mermon-xe kentsfield kentsfield-xe penryn wolfdale - yorksfield nehalem ; -.cpu-type-amd64 = k8 opteron athlon64 athlon-fx ; -.cpu-type-g7 = pentium4 pentium4m athlon athlon-tbird athlon-4 athlon-xp - athlon-mp $(.cpu-type-em64t) $(.cpu-type-amd64) ; -.cpu-type-itanium = itanium itanium1 merced ; -.cpu-type-itanium2 = itanium2 mckinley ; - - -# Known toolset versions, in order of preference. -.known-versions = 10.0 10.0express 9.0 9.0express 8.0 8.0express 7.1 7.1toolkit 7.0 6.0 ; - -# Version aliases. -.version-alias-6 = 6.0 ; -.version-alias-6.5 = 6.0 ; -.version-alias-7 = 7.0 ; -.version-alias-8 = 8.0 ; -.version-alias-9 = 9.0 ; -.version-alias-10 = 10.0 ; - -# Names of registry keys containing the Visual C++ installation path (relative -# to "HKEY_LOCAL_MACHINE\SOFTWARE\\Microsoft"). -.version-6.0-reg = "VisualStudio\\6.0\\Setup\\Microsoft Visual C++" ; -.version-7.0-reg = "VisualStudio\\7.0\\Setup\\VC" ; -.version-7.1-reg = "VisualStudio\\7.1\\Setup\\VC" ; -.version-8.0-reg = "VisualStudio\\8.0\\Setup\\VC" ; -.version-8.0express-reg = "VCExpress\\8.0\\Setup\\VC" ; -.version-9.0-reg = "VisualStudio\\9.0\\Setup\\VC" ; -.version-9.0express-reg = "VCExpress\\9.0\\Setup\\VC" ; -.version-10.0-reg = "VisualStudio\\10.0\\Setup\\VC" ; -.version-10.0express-reg = "VCExpress\\10.0\\Setup\\VC" ; - -# Visual C++ Toolkit 2003 does not store its installation path in the registry. -# The environment variable 'VCToolkitInstallDir' and the default installation -# path will be checked instead. -.version-7.1toolkit-path = "Microsoft Visual C++ Toolkit 2003" "bin" ; -.version-7.1toolkit-env = VCToolkitInstallDir ; - -# Path to the folder containing "cl.exe" relative to the value of the -# corresponding environment variable. -.version-7.1toolkit-envpath = "bin" ; - - -# Auto-detect all the available msvc installations on the system. -auto-detect-toolset-versions ; - - -# And finally trigger the actual Boost Build toolset registration. -register-toolset ; diff --git a/jam-files/boost-build/tools/notfile.jam b/jam-files/boost-build/tools/notfile.jam deleted file mode 100644 index 97a5b0e8..00000000 --- a/jam-files/boost-build/tools/notfile.jam +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2005 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import generators ; -import project ; -import targets ; -import toolset ; -import type ; - - -type.register NOTFILE_MAIN ; - - -class notfile-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local action ; - local action-name = [ $(property-set).get ] ; - - local m = [ MATCH ^@(.*) : $(action-name) ] ; - - if $(m) - { - action = [ new action $(sources) : $(m[1]) - : $(property-set) ] ; - } - else - { - action = [ new action $(sources) : notfile.run - : $(property-set) ] ; - } - return [ virtual-target.register - [ new notfile-target $(name) : $(project) : $(action) ] ] ; - } -} - - -generators.register [ new notfile-generator notfile.main : : NOTFILE_MAIN ] ; - - -toolset.flags notfile.run ACTION : ; - - -actions run -{ - $(ACTION) -} - - -rule notfile ( target-name : action + : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - requirements += $(action) ; - - targets.main-target-alternative - [ new typed-target $(target-name) : $(project) : NOTFILE_MAIN - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - -IMPORT $(__name__) : notfile : : notfile ; diff --git a/jam-files/boost-build/tools/notfile.py b/jam-files/boost-build/tools/notfile.py deleted file mode 100644 index afbf68fb..00000000 --- a/jam-files/boost-build/tools/notfile.py +++ /dev/null @@ -1,51 +0,0 @@ -# Status: ported. -# Base revision: 64429. -# -# Copyright (c) 2005-2010 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - - -import b2.build.type as type -import b2.build.generators as generators -import b2.build.virtual_target as virtual_target -import b2.build.toolset as toolset -import b2.build.targets as targets - -from b2.manager import get_manager -from b2.util import bjam_signature - -type.register("NOTFILE_MAIN") - -class NotfileGenerator(generators.Generator): - - def run(self, project, name, ps, sources): - pass - action_name = ps.get('action')[0] - if action_name[0] == '@': - action = virtual_target.Action(get_manager(), sources, action_name[1:], ps) - else: - action = virtual_target.Action(get_manager(), sources, "notfile.run", ps) - - return [get_manager().virtual_targets().register( - virtual_target.NotFileTarget(name, project, action))] - -generators.register(NotfileGenerator("notfile.main", False, [], ["NOTFILE_MAIN"])) - -toolset.flags("notfile.run", "ACTION", [], [""]) - -get_manager().engine().register_action("notfile.run", "$(ACTION)") - -@bjam_signature((["target_name"], ["action"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"])) -def notfile(target_name, action, sources, requirements, default_build): - - requirements.append("" + action) - - return targets.create_typed_metatarget(target_name, "NOTFILE_MAIN", sources, requirements, - default_build, []) - - -get_manager().projects().add_rule("notfile", notfile) diff --git a/jam-files/boost-build/tools/package.jam b/jam-files/boost-build/tools/package.jam deleted file mode 100644 index 198c2231..00000000 --- a/jam-files/boost-build/tools/package.jam +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2005 Vladimir Prus. -# Copyright 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Provides mechanism for installing whole packages into a specific directory -# structure. This is opposed to the 'install' rule, that installs a number of -# targets to a single directory, and does not care about directory structure at -# all. - -# Example usage: -# -# package.install boost : -# : -# : -# : -# ; -# -# This will install binaries, libraries and headers to the 'proper' location, -# given by command line options --prefix, --exec-prefix, --bindir, --libdir and -# --includedir. -# -# The rule is just a convenient wrapper, avoiding the need to define several -# 'install' targets. -# -# The only install-related feature is . It will apply to -# headers only and if present, paths of headers relatively to source root will -# be retained after installing. If it is not specified, then "." is assumed, so -# relative paths in headers are always preserved. - -import "class" : new ; -import option ; -import project ; -import feature ; -import property ; -import stage ; -import targets ; -import modules ; - -feature.feature install-default-prefix : : free incidental ; - -rule install ( name package-name ? : requirements * : binaries * : libraries * : headers * ) -{ - package-name ?= $(name) ; - if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ] - { - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of libdir/includir that - # is specified via options in config files. - option.set bindir : ; - option.set libdir : ; - option.set includedir : ; - } - - # If is not specified, all headers are installed to - # prefix/include, no matter what their relative path is. Sometimes that is - # what is needed. - local install-source-root = [ property.select : - $(requirements) ] ; - install-source-root = $(install-source-root:G=) ; - requirements = [ property.change $(requirements) : ] ; - - local install-header-subdir = [ property.select : - $(requirements) ] ; - install-header-subdir = /$(install-header-subdir:G=) ; - install-header-subdir ?= "" ; - requirements = [ property.change $(requirements) : ] - ; - - # First, figure out all locations. Use the default if no prefix option - # given. - local prefix = [ get-prefix $(name) : $(requirements) ] ; - - # Architecture dependent files. - local exec-locate = [ option.get exec-prefix : $(prefix) ] ; - - # Binaries. - local bin-locate = [ option.get bindir : $(prefix)/bin ] ; - - # Object code libraries. - local lib-locate = [ option.get libdir : $(prefix)/lib ] ; - - # Source header files. - local include-locate = [ option.get includedir : $(prefix)/include ] ; - - stage.install $(name)-bin : $(binaries) : $(requirements) - $(bin-locate) ; - alias $(name)-lib : $(name)-lib-shared $(name)-lib-static ; - - # Since the install location of shared libraries differs on universe - # and cygwin, use target alternatives to make different targets. - # We should have used indirection conditioanl requirements, but it's - # awkward to pass bin-locate and lib-locate from there to another rule. - alias $(name)-lib-shared : $(name)-lib-shared-universe ; - alias $(name)-lib-shared : $(name)-lib-shared-cygwin : cygwin ; - - # For shared libraries, we install both explicitly specified one and the - # shared libraries that the installed executables depend on. - stage.install $(name)-lib-shared-universe : $(binaries) $(libraries) : $(requirements) - $(lib-locate) on SHARED_LIB ; - stage.install $(name)-lib-shared-cygwin : $(binaries) $(libraries) : $(requirements) - $(bin-locate) on SHARED_LIB ; - - # For static libraries, we do not care about executable dependencies, since - # static libraries are already incorporated into them. - stage.install $(name)-lib-static : $(libraries) : $(requirements) - $(lib-locate) on STATIC_LIB ; - stage.install $(name)-headers : $(headers) : $(requirements) - $(include-locate)$(install-header-subdir) - $(install-source-root) ; - alias $(name) : $(name)-bin $(name)-lib $(name)-headers ; - - local c = [ project.current ] ; - local project-module = [ $(c).project-module ] ; - module $(project-module) - { - explicit $(1)-bin $(1)-lib $(1)-headers $(1) $(1)-lib-shared $(1)-lib-static - $(1)-lib-shared-universe $(1)-lib-shared-cygwin ; - } -} - -rule install-data ( target-name : package-name : data * : requirements * ) -{ - package-name ?= target-name ; - if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ] - { - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of datarootdir - option.set datarootdir : ; - } - - local prefix = [ get-prefix $(package-name) : $(requirements) ] ; - local datadir = [ option.get datarootdir : $(prefix)/share ] ; - - stage.install $(target-name) - : $(data) - : $(requirements) $(datadir)/$(package-name) - ; - - local c = [ project.current ] ; - local project-module = [ $(c).project-module ] ; - module $(project-module) - { - explicit $(1) ; - } -} - -local rule get-prefix ( package-name : requirements * ) -{ - local prefix = [ option.get prefix : [ property.select - : $(requirements) ] ] ; - prefix = $(prefix:G=) ; - requirements = [ property.change $(requirements) : - ] ; - # Or some likely defaults if neither is given. - if ! $(prefix) - { - if [ modules.peek : NT ] { prefix = C:\\$(package-name) ; } - else if [ modules.peek : UNIX ] { prefix = /usr/local ; } - } - return $(prefix) ; -} - diff --git a/jam-files/boost-build/tools/package.py b/jam-files/boost-build/tools/package.py deleted file mode 100644 index aa081b4f..00000000 --- a/jam-files/boost-build/tools/package.py +++ /dev/null @@ -1,168 +0,0 @@ -# Status: ported -# Base revision: 64488 -# -# Copyright (c) 2005, 2010 Vladimir Prus. -# Copyright 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Provides mechanism for installing whole packages into a specific directory -# structure. This is opposed to the 'install' rule, that installs a number of -# targets to a single directory, and does not care about directory structure at -# all. - -# Example usage: -# -# package.install boost : -# : -# : -# : -# ; -# -# This will install binaries, libraries and headers to the 'proper' location, -# given by command line options --prefix, --exec-prefix, --bindir, --libdir and -# --includedir. -# -# The rule is just a convenient wrapper, avoiding the need to define several -# 'install' targets. -# -# The only install-related feature is . It will apply to -# headers only and if present, paths of headers relatively to source root will -# be retained after installing. If it is not specified, then "." is assumed, so -# relative paths in headers are always preserved. - -import b2.build.feature as feature -import b2.build.property as property -import b2.util.option as option -import b2.tools.stage as stage - -from b2.build.alias import alias - -from b2.manager import get_manager - -from b2.util import bjam_signature -from b2.util.utility import ungrist - - -import os - -feature.feature("install-default-prefix", [], ["free", "incidental"]) - -@bjam_signature((["name", "package_name", "?"], ["requirements", "*"], - ["binaries", "*"], ["libraries", "*"], ["headers", "*"])) -def install(name, package_name=None, requirements=[], binaries=[], libraries=[], headers=[]): - - requirements = requirements[:] - binaries = binaries[:] - libraries - - if not package_name: - package_name = name - - if option.get("prefix"): - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of libdir/includir that - # is specified via options in config files. - option.set("bindir", None) - option.set("libdir", None) - option.set("includedir", None) - - # If is not specified, all headers are installed to - # prefix/include, no matter what their relative path is. Sometimes that is - # what is needed. - install_source_root = property.select('install-source-root', requirements) - if install_source_root: - requirements = property.change(requirements, 'install-source-root', None) - - install_header_subdir = property.select('install-header-subdir', requirements) - if install_header_subdir: - install_header_subdir = ungrist(install_header_subdir[0]) - requirements = property.change(requirements, 'install-header-subdir', None) - - # First, figure out all locations. Use the default if no prefix option - # given. - prefix = get_prefix(name, requirements) - - # Architecture dependent files. - exec_locate = option.get("exec-prefix", prefix) - - # Binaries. - bin_locate = option.get("bindir", os.path.join(prefix, "bin")) - - # Object code libraries. - lib_locate = option.get("libdir", os.path.join(prefix, "lib")) - - # Source header files. - include_locate = option.get("includedir", os.path.join(prefix, "include")) - - stage.install(name + "-bin", binaries, requirements + ["" + bin_locate]) - - alias(name + "-lib", [name + "-lib-shared", name + "-lib-static"]) - - # Since the install location of shared libraries differs on universe - # and cygwin, use target alternatives to make different targets. - # We should have used indirection conditioanl requirements, but it's - # awkward to pass bin-locate and lib-locate from there to another rule. - alias(name + "-lib-shared", [name + "-lib-shared-universe"]) - alias(name + "-lib-shared", [name + "-lib-shared-cygwin"], ["cygwin"]) - - # For shared libraries, we install both explicitly specified one and the - # shared libraries that the installed executables depend on. - stage.install(name + "-lib-shared-universe", binaries + libraries, - requirements + ["" + lib_locate, "on", - "SHARED_LIB"]) - stage.install(name + "-lib-shared-cygwin", binaries + libraries, - requirements + ["" + bin_locate, "on", - "SHARED_LIB"]) - - # For static libraries, we do not care about executable dependencies, since - # static libraries are already incorporated into them. - stage.install(name + "-lib-static", libraries, requirements + - ["" + lib_locate, "on", "STATIC_LIB"]) - stage.install(name + "-headers", headers, requirements \ - + ["" + os.path.join(include_locate, s) for s in install_header_subdir] - + install_source_root) - - alias(name, [name + "-bin", name + "-lib", name + "-headers"]) - - pt = get_manager().projects().current() - - for subname in ["bin", "lib", "headers", "lib-shared", "lib-static", "lib-shared-universe", "lib-shared-cygwin"]: - pt.mark_targets_as_explicit([name + "-" + subname]) - -@bjam_signature((["target_name"], ["package_name"], ["data", "*"], ["requirements", "*"])) -def install_data(target_name, package_name, data, requirements): - if not package_name: - package_name = target_name - - if option.get("prefix"): - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of datarootdir - option.set("datarootdir", None) - - prefix = get_prefix(package_name, requirements) - datadir = option.get("datarootdir", os.path.join(prefix, "share")) - - stage.install(target_name, data, - requirements + ["" + os.path.join(datadir, package_name)]) - - get_manager().projects().current().mark_targets_as_explicit([target_name]) - -def get_prefix(package_name, requirements): - - specified = property.select("install-default-prefix", requirements) - if specified: - specified = ungrist(specified[0]) - prefix = option.get("prefix", specified) - requirements = property.change(requirements, "install-default-prefix", None) - # Or some likely defaults if neither is given. - if not prefix: - if os.name == "nt": - prefix = "C:\\" + package_name - elif os.name == "posix": - prefix = "/usr/local" - - return prefix - diff --git a/jam-files/boost-build/tools/pathscale.jam b/jam-files/boost-build/tools/pathscale.jam deleted file mode 100644 index 454e3454..00000000 --- a/jam-files/boost-build/tools/pathscale.jam +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2006 Noel Belcourt -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import toolset : flags ; -import feature ; -import type ; -import common ; -import fortran ; - -feature.extend toolset : pathscale ; -toolset.inherit pathscale : unix ; -generators.override pathscale.prebuilt : builtin.prebuilt ; -generators.override pathscale.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.pathscale.com/docs.html - -rule init ( version ? : command * : options * ) -{ - command = [ common.get-invocation-command pathscale : pathCC : $(command) - : /opt/ekopath/bin ] ; - - # Determine the version - local command-string = $(command:J=" ") ; - if $(command) - { - version ?= [ MATCH "^([0-9.]+)" - : [ SHELL "$(command-string) -dumpversion" ] ] ; - } - - local condition = [ common.check-init-parameters pathscale - : version $(version) ] ; - - common.handle-options pathscale : $(condition) : $(command) : $(options) ; - - toolset.flags pathscale.compile.fortran90 OPTIONS $(condition) : - [ feature.get-values : $(options) ] : unchecked ; - - command_c = $(command_c[1--2]) $(command[-1]:B=pathcc) ; - - toolset.flags pathscale CONFIG_C_COMMAND $(condition) : $(command_c) ; - - # fortran support - local f-command = [ common.get-invocation-command pathscale : pathf90 : $(command) ] ; - local command_f = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ; - local command_f90 = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ; - - toolset.flags pathscale CONFIG_F_COMMAND $(condition) : $(command_f) ; - toolset.flags pathscale CONFIG_F90_COMMAND $(condition) : $(command_f90) ; - - # always link lib rt to resolve clock_gettime() - flags pathscale.link FINDLIBS-SA : rt : unchecked ; -} - -# Declare generators -generators.register-c-compiler pathscale.compile.c : C : OBJ : pathscale ; -generators.register-c-compiler pathscale.compile.c++ : CPP : OBJ : pathscale ; -generators.register-fortran-compiler pathscale.compile.fortran : FORTRAN : OBJ : pathscale ; -generators.register-fortran90-compiler pathscale.compile.fortran90 : FORTRAN90 : OBJ : pathscale ; - -# Declare flags and actions for compilation -flags pathscale.compile OPTIONS off : -O0 ; -flags pathscale.compile OPTIONS speed : -O3 ; -flags pathscale.compile OPTIONS space : -Os ; - -flags pathscale.compile OPTIONS off : -noinline ; -flags pathscale.compile OPTIONS on : -inline ; -flags pathscale.compile OPTIONS full : -inline ; - -flags pathscale.compile OPTIONS off : -woffall ; -flags pathscale.compile OPTIONS on : -Wall ; -flags pathscale.compile OPTIONS all : -Wall -pedantic ; -flags pathscale.compile OPTIONS on : -Werror ; - -flags pathscale.compile OPTIONS on : -ggdb ; -flags pathscale.compile OPTIONS on : -pg ; -flags pathscale.compile OPTIONS shared : -fPIC ; -flags pathscale.compile OPTIONS 32 : -m32 ; -flags pathscale.compile OPTIONS 64 : -m64 ; - -flags pathscale.compile USER_OPTIONS ; -flags pathscale.compile.c++ USER_OPTIONS ; -flags pathscale.compile DEFINES ; -flags pathscale.compile INCLUDES ; - -flags pathscale.compile.fortran USER_OPTIONS ; -flags pathscale.compile.fortran90 USER_OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.fortran90 ( targets * : sources * : properties * ) -{ - # the space rule inserts spaces between targets and it's necessary - SPACE on $(targets) = " " ; - # Serialize execution of the compile.fortran90 action - # F90 source must be compiled in a particular order so we - # serialize the build as a parallel F90 compile might fail - JAM_SEMAPHORE on $(targets) = pathscale-f90-semaphore ; -} - -actions compile.fortran90 -{ - "$(CONFIG_F90_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -module $(<[1]:D) -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags pathscale.link OPTIONS on : -ggdb -rdynamic ; -# Strip the binary when no debugging is needed -flags pathscale.link OPTIONS off : -g0 ; -flags pathscale.link OPTIONS on : -pg ; -flags pathscale.link USER_OPTIONS ; -flags pathscale.link LINKPATH ; -flags pathscale.link FINDLIBS-ST ; -flags pathscale.link FINDLIBS-SA ; -flags pathscale.link FINDLIBS-SA multi : pthread ; -flags pathscale.link LIBRARIES ; -flags pathscale.link LINK-RUNTIME static : static ; -flags pathscale.link LINK-RUNTIME shared : dynamic ; -flags pathscale.link RPATH ; -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags pathscale.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) -} - -# Declare action for creating static libraries -# "$(CONFIG_COMMAND)" -ar -o "$(<)" "$(>)" -actions piecemeal archive -{ - ar $(ARFLAGS) ru "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/pch.jam b/jam-files/boost-build/tools/pch.jam deleted file mode 100644 index 0c6e98fa..00000000 --- a/jam-files/boost-build/tools/pch.jam +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##### Using Precompiled Headers (Quick Guide) ##### -# -# Make precompiled mypch.hpp: -# -# import pch ; -# -# cpp-pch mypch -# : # sources -# mypch.hpp -# : # requiremnts -# msvc:mypch.cpp -# ; -# -# Add cpp-pch to sources: -# -# exe hello -# : main.cpp hello.cpp mypch -# ; - -import "class" : new ; -import type ; -import feature ; -import generators ; - -type.register PCH : pch ; - -type.register C_PCH : : PCH ; -type.register CPP_PCH : : PCH ; - -# Control precompiled header (PCH) generation. -feature.feature pch : - on - off - : propagated ; - - -feature.feature pch-header : : free dependency ; -feature.feature pch-file : : free dependency ; - -# Base PCH generator. The 'run' method has the logic to prevent this generator -# from being run unless it's being used for a top-level PCH target. -class pch-generator : generator -{ - import property-set ; - - rule action-class ( ) - { - return compile-action ; - } - - rule run ( project name ? : property-set : sources + ) - { - if ! $(name) - { - # Unless this generator is invoked as the top-most generator for a - # main target, fail. This allows using 'H' type as input type for - # this generator, while preventing Boost.Build to try this generator - # when not explicitly asked for. - # - # One bad example is msvc, where pch generator produces both PCH - # target and OBJ target, so if there's any header generated (like by - # bison, or by msidl), we'd try to use pch generator to get OBJ from - # that H, which is completely wrong. By restricting this generator - # only to pch main target, such problem is solved. - } - else - { - local r = [ run-pch $(project) $(name) - : [ $(property-set).add-raw BOOST_BUILD_PCH_ENABLED ] - : $(sources) ] ; - return [ generators.add-usage-requirements $(r) - : BOOST_BUILD_PCH_ENABLED ] ; - } - } - - # This rule must be overridden by the derived classes. - rule run-pch ( project name ? : property-set : sources + ) - { - } -} - - -# NOTE: requirements are empty, default pch generator can be applied when -# pch=off. -generators.register - [ new dummy-generator pch.default-c-pch-generator : : C_PCH ] ; -generators.register - [ new dummy-generator pch.default-cpp-pch-generator : : CPP_PCH ] ; diff --git a/jam-files/boost-build/tools/pch.py b/jam-files/boost-build/tools/pch.py deleted file mode 100644 index 21d3db09..00000000 --- a/jam-files/boost-build/tools/pch.py +++ /dev/null @@ -1,83 +0,0 @@ -# Status: Being ported by Steven Watanabe -# Base revision: 47077 -# -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov -# Copyright (c) 2008 Steven Watanabe -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##### Using Precompiled Headers (Quick Guide) ##### -# -# Make precompiled mypch.hpp: -# -# import pch ; -# -# cpp-pch mypch -# : # sources -# mypch.hpp -# : # requiremnts -# msvc:mypch.cpp -# ; -# -# Add cpp-pch to sources: -# -# exe hello -# : main.cpp hello.cpp mypch -# ; - -from b2.build import type, feature, generators - -type.register('PCH', ['pch']) -type.register('C_PCH', [], 'PCH') -type.register('CPP_PCH', [], 'PCH') - -# Control precompiled header (PCH) generation. -feature.feature('pch', - ['on', 'off'], - ['propagated']) - -feature.feature('pch-header', [], ['free', 'dependency']) -feature.feature('pch-file', [], ['free', 'dependency']) - -class PchGenerator(generators.Generator): - """ - Base PCH generator. The 'run' method has the logic to prevent this generator - from being run unless it's being used for a top-level PCH target. - """ - def action_class(self): - return 'compile-action' - - def run(self, project, name, prop_set, sources): - if not name: - # Unless this generator is invoked as the top-most generator for a - # main target, fail. This allows using 'H' type as input type for - # this generator, while preventing Boost.Build to try this generator - # when not explicitly asked for. - # - # One bad example is msvc, where pch generator produces both PCH - # target and OBJ target, so if there's any header generated (like by - # bison, or by msidl), we'd try to use pch generator to get OBJ from - # that H, which is completely wrong. By restricting this generator - # only to pch main target, such problem is solved. - pass - else: - r = self.run_pch(project, name, - prop_set.add_raw('BOOST_BUILD_PCH_ENABLED'), - sources) - return generators.add_usage_requirements( - r, ['BOOST_BUILD_PCH_ENABLED']) - - # This rule must be overridden by the derived classes. - def run_pch(self, project, name, prop_set, sources): - pass - -#FIXME: dummy-generator in builtins.jam needs to be ported. -# NOTE: requirements are empty, default pch generator can be applied when -# pch=off. -###generators.register( -### [ new dummy-generator pch.default-c-pch-generator : : C_PCH ] ; -###generators.register -### [ new dummy-generator pch.default-cpp-pch-generator : : CPP_PCH ] ; diff --git a/jam-files/boost-build/tools/pgi.jam b/jam-files/boost-build/tools/pgi.jam deleted file mode 100644 index 3a35c644..00000000 --- a/jam-files/boost-build/tools/pgi.jam +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import fortran ; -import type ; -import common ; -import gcc ; - -feature.extend toolset : pgi ; -toolset.inherit pgi : unix ; -generators.override pgi.prebuilt : builtin.lib-generator ; -generators.override pgi.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.pgroup.com/resources/docs.htm - -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters pgi : version $(version) ] ; - - local l_command = [ common.get-invocation-command pgi : pgCC : $(command) ] ; - - common.handle-options pgi : $(condition) : $(l_command) : $(options) ; - - command_c = $(command_c[1--2]) $(l_command[-1]:B=cc) ; - - toolset.flags pgi CONFIG_C_COMMAND $(condition) : $(command_c) ; - - flags pgi.compile DEFINES $(condition) : - [ feature.get-values : $(options) ] : unchecked ; - - # IOV_MAX support - flags pgi.compile DEFINES $(condition) : __need_IOV_MAX : unchecked ; - - # set link flags - flags pgi.link FINDLIBS-ST : [ - feature.get-values : $(options) ] : unchecked ; - - # always link lib rt to resolve clock_gettime() - flags pgi.link FINDLIBS-SA : rt [ - feature.get-values : $(options) ] : unchecked ; - - gcc.init-link-flags pgi gnu $(condition) ; -} - -# Declare generators -generators.register-c-compiler pgi.compile.c : C : OBJ : pgi ; -generators.register-c-compiler pgi.compile.c++ : CPP : OBJ : pgi ; -generators.register-fortran-compiler pgi.compile.fortran : FORTRAN : OBJ : pgi ; - -# Declare flags and actions for compilation -flags pgi.compile OPTIONS : -Kieee ; -flags pgi.compile OPTIONS shared : -fpic -fPIC ; -flags pgi.compile OPTIONS on : -gopt ; -flags pgi.compile OPTIONS on : -xprofile=tcov ; -flags pgi.compile OPTIONS speed : -fast -Mx,8,0x10000000 ; -flags pgi.compile OPTIONS space : -xO2 -xspace ; -# flags pgi.compile OPTIONS multi : -mt ; - -flags pgi.compile OPTIONS off : -Minform=severe ; -flags pgi.compile OPTIONS on : -Minform=warn ; - -flags pgi.compile.c++ OPTIONS off : -INLINE:none ; - -flags pgi.compile OPTIONS ; -flags pgi.compile.c++ OPTIONS ; -flags pgi.compile DEFINES ; -flags pgi.compile INCLUDES ; - -flags pgi.compile.fortran OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags pgi.link OPTIONS on : -gopt ; -# Strip the binary when no debugging is needed -flags pgi.link OPTIONS off : -s ; -flags pgi.link OPTIONS on : -xprofile=tcov ; -flags pgi.link OPTIONS ; -flags pgi.link OPTIONS shared : -fpic -fPIC ; -flags pgi.link LINKPATH ; -flags pgi.link FINDLIBS-ST ; -flags pgi.link FINDLIBS-SA ; -flags pgi.link FINDLIBS-SA multi : pthread rt ; -flags pgi.link LIBRARIES ; -flags pgi.link LINK-RUNTIME static : static ; -flags pgi.link LINK-RUNTIME shared : dynamic ; -flags pgi.link RPATH ; - -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags pgi.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -# reddish can only link statically and, somehow, the presence of -Bdynamic on the link line -# marks the executable as a dynamically linked exec even though no dynamic libraries are supplied. -# Yod on redstorm refuses to load an executable that is dynamically linked. -# removing the dynamic link options should get us where we need to be on redstorm. -# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bstatic -l$(FINDLIBS-ST) -Bdynamic -l$(FINDLIBS-SA) -B$(LINK-RUNTIME) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -shared -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" -Wl,-h -Wl,$(<[1]:D=) "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -actions updated together piecemeal pgi.archive -{ - ar -rc$(ARFLAGS:E=) "$(<)" "$(>)" -} - diff --git a/jam-files/boost-build/tools/python-config.jam b/jam-files/boost-build/tools/python-config.jam deleted file mode 100644 index 40aa825b..00000000 --- a/jam-files/boost-build/tools/python-config.jam +++ /dev/null @@ -1,27 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Python tools and librries. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - for local R in 2.4 2.3 2.2 - { - local python-path = [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\$(R)\\InstallPath" ] ; - local python-version = $(R) ; - - if $(python-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using python ":" $(python-version) ":" $(python-path) ; - } - using python : $(python-version) : $(python-path) ; - } - } -} diff --git a/jam-files/boost-build/tools/python.jam b/jam-files/boost-build/tools/python.jam deleted file mode 100644 index 97a9f9a5..00000000 --- a/jam-files/boost-build/tools/python.jam +++ /dev/null @@ -1,1267 +0,0 @@ -# Copyright 2004 Vladimir Prus. -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -# Support for Python and the the Boost.Python library. -# -# This module defines -# -# - a project 'python' with a target 'python' in it, that corresponds to the -# python library -# -# - a main target rule 'python-extension' which can be used to build a python -# extension. -# -# Extensions that use Boost.Python must explicitly link to it. - -import type ; -import testing ; -import generators ; -import project ; -import errors ; -import targets ; -import "class" : new ; -import os ; -import common ; -import toolset ; -import regex ; -import numbers ; -import string ; -import property ; -import sequence ; -import path ; -import feature ; -import set ; -import builtin ; -import version ; - - -# Make this module a project. -project.initialize $(__name__) ; -project python ; - -# Save the project so that if 'init' is called several times we define new -# targets in the python project, not in whatever project we were called by. -.project = [ project.current ] ; - -# Dynamic linker lib. Necessary to specify it explicitly on some platforms. -lib dl ; -# This contains 'openpty' function need by python. Again, on some system need to -# pass this to linker explicitly. -lib util ; -# Python uses pthread symbols. -lib pthread ; -# Extra library needed by phtread on some platforms. -lib rt ; - -# The pythonpath feature specifies additional elements for the PYTHONPATH -# environment variable, set by run-pyd. For example, pythonpath can be used to -# access Python modules that are part of the product being built, but are not -# installed in the development system's default paths. -feature.feature pythonpath : : free optional path ; - -# Initializes the Python toolset. Note that all parameters are optional. -# -# - version -- the version of Python to use. Should be in Major.Minor format, -# for example 2.3. Do not include the subminor version. -# -# - cmd-or-prefix: Preferably, a command that invokes a Python interpreter. -# Alternatively, the installation prefix for Python libraries and includes. If -# empty, will be guessed from the version, the platform's installation -# patterns, and the python executables that can be found in PATH. -# -# - includes: the include path to Python headers. If empty, will be guessed. -# -# - libraries: the path to Python library binaries. If empty, will be guessed. -# On MacOS/Darwin, you can also pass the path of the Python framework. -# -# - condition: if specified, should be a set of properties that are matched -# against the build configuration when Boost.Build selects a Python -# configuration to use. -# -# - extension-suffix: A string to append to the name of extension modules before -# the true filename extension. Ordinarily we would just compute this based on -# the value of the feature. However ubuntu's python-dbg -# package uses the windows convention of appending _d to debug-build extension -# modules. We have no way of detecting ubuntu, or of probing python for the -# "_d" requirement, and if you configure and build python using -# --with-pydebug, you'll be using the standard *nix convention. Defaults to "" -# (or "_d" when targeting windows and is set). -# -# Example usage: -# -# using python : 2.3 ; -# using python : 2.3 : /usr/local/bin/python ; -# -rule init ( version ? : cmd-or-prefix ? : includes * : libraries ? - : condition * : extension-suffix ? ) -{ - project.push-current $(.project) ; - - debug-message Configuring python... ; - for local v in version cmd-or-prefix includes libraries condition - { - if $($(v)) - { - debug-message " user-specified "$(v): \"$($(v))\" ; - } - } - - configure $(version) : $(cmd-or-prefix) : $(includes) : $(libraries) : $(condition) : $(extension-suffix) ; - - project.pop-current ; -} - -# A simpler version of SHELL that grabs stderr as well as stdout, but returns -# nothing if there was an error. -# -local rule shell-cmd ( cmd ) -{ - debug-message running command '$(cmd)" 2>&1"' ; - x = [ SHELL $(cmd)" 2>&1" : exit-status ] ; - if $(x[2]) = 0 - { - return $(x[1]) ; - } - else - { - return ; - } -} - - -# Try to identify Cygwin symlinks. Invoking such a file directly as an NT -# executable from a native Windows build of bjam would be fatal to the bjam -# process. One /can/ invoke them through sh.exe or bash.exe, if you can prove -# that those are not also symlinks. ;-) -# -# If a symlink is found returns non-empty; we try to extract the target of the -# symlink from the file and return that. -# -# Note: 1. only works on NT 2. path is a native path. -local rule is-cygwin-symlink ( path ) -{ - local is-symlink = ; - - # Look for a file with the given path having the S attribute set, as cygwin - # symlinks do. /-C means "do not use thousands separators in file sizes." - local dir-listing = [ shell-cmd "DIR /-C /A:S \""$(path)"\"" ] ; - - if $(dir-listing) - { - # Escape any special regex characters in the base part of the path. - local base-pat = [ regex.escape $(path:D=) : ].[()*+?|\\$^ : \\ ] ; - - # Extract the file's size from the directory listing. - local size-of-system-file = [ MATCH "([0-9]+) "$(base-pat) : $(dir-listing) : 1 ] ; - - # If the file has a reasonably small size, look for the special symlink - # identification text. - if $(size-of-system-file) && [ numbers.less $(size-of-system-file) 1000 ] - { - local link = [ SHELL "FIND /OFF \"!\" \""$(path)"\" 2>&1" ] ; - if $(link[2]) != 0 - { - local nl = " - -" ; - is-symlink = [ MATCH ".*!([^"$(nl)"]*)" : $(link[1]) : 1 ] ; - if $(is-symlink) - { - is-symlink = [ *nix-path-to-native $(is-symlink) ] ; - is-symlink = $(is-symlink:R=$(path:D)) ; - } - - } - } - } - return $(is-symlink) ; -} - - -# Append ext to each member of names that does not contain '.'. -# -local rule default-extension ( names * : ext * ) -{ - local result ; - for local n in $(names) - { - switch $(n) - { - case *.* : result += $(n) ; - case * : result += $(n)$(ext) ; - } - } - return $(result) ; -} - - -# Tries to determine whether invoking "cmd" would actually attempt to launch a -# cygwin symlink. -# -# Note: only works on NT. -# -local rule invokes-cygwin-symlink ( cmd ) -{ - local dirs = $(cmd:D) ; - if ! $(dirs) - { - dirs = . [ os.executable-path ] ; - } - local base = [ default-extension $(cmd:D=) : .exe .cmd .bat ] ; - local paths = [ GLOB $(dirs) : $(base) ] ; - if $(paths) - { - # Make sure we have not run into a Cygwin symlink. Invoking such a file - # as an NT executable would be fatal for the bjam process. - return [ is-cygwin-symlink $(paths[1]) ] ; - } -} - - -local rule debug-message ( message * ) -{ - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO notice: [python-cfg] $(message) ; - } -} - - -# Like W32_GETREG, except prepend HKEY_CURRENT_USER\SOFTWARE and -# HKEY_LOCAL_MACHINE\SOFTWARE to the first argument, returning the first result -# found. Also accounts for the fact that on 64-bit machines, 32-bit software has -# its own area, under SOFTWARE\Wow6432node. -# -local rule software-registry-value ( path : data ? ) -{ - local result ; - for local root in HKEY_CURRENT_USER HKEY_LOCAL_MACHINE - { - for local x64elt in "" Wow6432node\\ # Account for 64-bit windows - { - if ! $(result) - { - result = [ W32_GETREG $(root)\\SOFTWARE\\$(x64elt)$(path) : $(data) ] ; - } - } - - } - return $(result) ; -} - - -.windows-drive-letter-re = ^([A-Za-z]):[\\/](.*) ; -.cygwin-drive-letter-re = ^/cygdrive/([a-z])/(.*) ; - -.working-directory = [ PWD ] ; -.working-drive-letter = [ SUBST $(.working-directory) $(.windows-drive-letter-re) $1 ] ; -.working-drive-letter ?= [ SUBST $(.working-directory) $(.cygwin-drive-letter-re) $1 ] ; - - -local rule windows-to-cygwin-path ( path ) -{ - # If path is rooted with a drive letter, rewrite it using the /cygdrive - # mountpoint. - local p = [ SUBST $(path:T) $(.windows-drive-letter-re) /cygdrive/$1/$2 ] ; - - # Else if path is rooted without a drive letter, use the working directory. - p ?= [ SUBST $(path:T) ^/(.*) /cygdrive/$(.working-drive-letter:L)/$2 ] ; - - # Else return the path unchanged. - return $(p:E=$(path:T)) ; -} - - -# :W only works in Cygwin builds of bjam. This one works on NT builds as well. -# -local rule cygwin-to-windows-path ( path ) -{ - path = $(path:R="") ; # strip any trailing slash - - local drive-letter = [ SUBST $(path) $(.cygwin-drive-letter-re) $1:/$2 ] ; - if $(drive-letter) - { - path = $(drive-letter) ; - } - else if $(path:R=/x) = $(path) # already rooted? - { - # Look for a cygwin mount that includes each head sequence in $(path). - local head = $(path) ; - local tail = "" ; - - while $(head) - { - local root = [ software-registry-value - "Cygnus Solutions\\Cygwin\\mounts v2\\"$(head) : native ] ; - - if $(root) - { - path = $(tail:R=$(root)) ; - head = ; - } - tail = $(tail:R=$(head:D=)) ; - - if $(head) = / - { - head = ; - } - else - { - head = $(head:D) ; - } - } - } - return [ regex.replace $(path:R="") / \\ ] ; -} - - -# Convert a *nix path to native. -# -local rule *nix-path-to-native ( path ) -{ - if [ os.name ] = NT - { - path = [ cygwin-to-windows-path $(path) ] ; - } - return $(path) ; -} - - -# Convert an NT path to native. -# -local rule windows-path-to-native ( path ) -{ - if [ os.name ] = NT - { - return $(path) ; - } - else - { - return [ windows-to-cygwin-path $(path) ] ; - } -} - - -# Return nonempty if path looks like a windows path, i.e. it starts with a drive -# letter or contains backslashes. -# -local rule guess-windows-path ( path ) -{ - return [ SUBST $(path) ($(.windows-drive-letter-re)|.*([\\]).*) $1 ] ; -} - - -local rule path-to-native ( paths * ) -{ - local result ; - - for local p in $(paths) - { - if [ guess-windows-path $(p) ] - { - result += [ windows-path-to-native $(p) ] ; - } - else - { - result += [ *nix-path-to-native $(p:T) ] ; - } - } - return $(result) ; -} - - -# Validate the version string and extract the major/minor part we care about. -# -local rule split-version ( version ) -{ - local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ; - if ! $(major-minor[2]) || $(major-minor[3]) - { - ECHO "Warning: \"using python\" expects a two part (major, minor) version number; got" $(version) instead ; - - # Add a zero to account for the missing digit if necessary. - major-minor += 0 ; - } - - return $(major-minor[1]) $(major-minor[2]) ; -} - - -# Build a list of versions from 3.0 down to 1.5. Because bjam can not enumerate -# registry sub-keys, we have no way of finding a version with a 2-digit minor -# version, e.g. 2.10 -- let us hope that never happens. -# -.version-countdown = ; -for local v in [ numbers.range 15 30 ] -{ - .version-countdown = [ SUBST $(v) (.)(.*) $1.$2 ] $(.version-countdown) ; -} - - -local rule windows-installed-pythons ( version ? ) -{ - version ?= $(.version-countdown) ; - local interpreters ; - - for local v in $(version) - { - local install-path = [ - software-registry-value "Python\\PythonCore\\"$(v)"\\InstallPath" ] ; - - if $(install-path) - { - install-path = [ windows-path-to-native $(install-path) ] ; - debug-message Registry indicates Python $(v) installed at \"$(install-path)\" ; - } - - interpreters += $(:E=python:R=$(install-path)) ; - } - return $(interpreters) ; -} - - -local rule darwin-installed-pythons ( version ? ) -{ - version ?= $(.version-countdown) ; - - local prefix - = [ GLOB /System/Library/Frameworks /Library/Frameworks - : Python.framework ] ; - - return $(prefix)/Versions/$(version)/bin/python ; -} - - -# Assume "python-cmd" invokes a python interpreter and invoke it to extract all -# the information we care about from its "sys" module. Returns void if -# unsuccessful. -# -local rule probe ( python-cmd ) -{ - # Avoid invoking a Cygwin symlink on NT. - local skip-symlink ; - if [ os.name ] = NT - { - skip-symlink = [ invokes-cygwin-symlink $(python-cmd) ] ; - } - - if $(skip-symlink) - { - debug-message -------------------------------------------------------------------- ; - debug-message \"$(python-cmd)\" would attempt to invoke a Cygwin symlink, ; - debug-message causing a bjam built for Windows to hang. ; - debug-message ; - debug-message If you intend to target a Cygwin build of Python, please ; - debug-message replace the path to the link with the path to a real executable ; - debug-message (guessing: \"$(skip-symlink)\") "in" your 'using python' line ; - debug-message "in" user-config.jam or site-config.jam. Do not forget to escape ; - debug-message backslashes ; - debug-message -------------------------------------------------------------------- ; - } - else - { - # Prepare a List of Python format strings and expressions that can be - # used to print the constants we want from the sys module. - - # We do not really want sys.version since that is a complicated string, - # so get the information from sys.version_info instead. - local format = "version=%d.%d" ; - local exprs = "version_info[0]" "version_info[1]" ; - - for local s in $(sys-elements[2-]) - { - format += $(s)=%s ; - exprs += $(s) ; - } - - # Invoke Python and ask it for all those values. - if [ version.check-jam-version 3 1 17 ] || ( [ os.name ] != NT ) - { - # Prior to version 3.1.17 Boost Jam's SHELL command did not support - # quoted commands correctly on Windows. This means that on that - # platform we do not support using a Python command interpreter - # executable whose path contains a space character. - python-cmd = \"$(python-cmd)\" ; - } - local full-cmd = - $(python-cmd)" -c \"from sys import *; print('"$(format:J=\\n)"' % ("$(exprs:J=,)"))\"" ; - - local output = [ shell-cmd $(full-cmd) ] ; - if $(output) - { - # Parse the output to get all the results. - local nl = " - -" ; - for s in $(sys-elements) - { - # These variables are expected to be declared local in the - # caller, so Jam's dynamic scoping will set their values there. - sys.$(s) = [ SUBST $(output) \\<$(s)=([^$(nl)]+) $1 ] ; - } - } - return $(output) ; - } -} - - -# Make sure the "libraries" and "includes" variables (in an enclosing scope) -# have a value based on the information given. -# -local rule compute-default-paths ( target-os : version ? : prefix ? : - exec-prefix ? ) -{ - exec-prefix ?= $(prefix) ; - - if $(target-os) = windows - { - # The exec_prefix is where you're supposed to look for machine-specific - # libraries. - local default-library-path = $(exec-prefix)\\libs ; - local default-include-path = $(:E=Include:R=$(prefix)) ; - - # If the interpreter was found in a directory called "PCBuild" or - # "PCBuild8," assume we're looking at a Python built from the source - # distro, and go up one additional level to the default root. Otherwise, - # the default root is the directory where the interpreter was found. - - # We ask Python itself what the executable path is in case of - # intermediate symlinks or shell scripts. - local executable-dir = $(sys.executable:D) ; - - if [ MATCH ^(PCBuild) : $(executable-dir:D=) ] - { - debug-message "This Python appears to reside in a source distribution;" ; - debug-message "prepending \""$(executable-dir)"\" to default library search path" ; - - default-library-path = $(executable-dir) $(default-library-path) ; - - default-include-path = $(:E=PC:R=$(executable-dir:D)) $(default-include-path) ; - - debug-message "and \""$(default-include-path[1])"\" to default #include path" ; - } - - libraries ?= $(default-library-path) ; - includes ?= $(default-include-path) ; - } - else - { - includes ?= $(prefix)/include/python$(version) ; - - local lib = $(exec-prefix)/lib ; - libraries ?= $(lib)/python$(version)/config $(lib) ; - } -} - -# The version of the python interpreter to use. -feature.feature python : : propagated ; -feature.feature python.interpreter : : free ; - -toolset.flags python.capture-output PYTHON : ; - -# -# Support for Python configured --with-pydebug -# -feature.feature python-debugging : off on : propagated ; -builtin.variant debug-python : debug : on ; - - -# Return a list of candidate commands to try when looking for a Python -# interpreter. prefix is expected to be a native path. -# -local rule candidate-interpreters ( version ? : prefix ? : target-os ) -{ - local bin-path = bin ; - if $(target-os) = windows - { - # On Windows, look in the root directory itself and, to work with the - # result of a build-from-source, the PCBuild directory. - bin-path = PCBuild8 PCBuild "" ; - } - - bin-path = $(bin-path:R=$(prefix)) ; - - if $(target-os) in windows darwin - { - return # Search: - $(:E=python:R=$(bin-path)) # Relative to the prefix, if any - python # In the PATH - [ $(target-os)-installed-pythons $(version) ] # Standard install locations - ; - } - else - { - # Search relative to the prefix, or if none supplied, in PATH. - local unversioned = $(:E=python:R=$(bin-path:E=)) ; - - # If a version was specified, look for a python with that specific - # version appended before looking for one called, simply, "python" - return $(unversioned)$(version) $(unversioned) ; - } -} - - -# Compute system library dependencies for targets linking with static Python -# libraries. -# -# On many systems, Python uses libraries such as pthreads or libdl. Since static -# libraries carry no library dependency information of their own that the linker -# can extract, these extra dependencies have to be given explicitly on the link -# line of the client. The information about these dependencies is packaged into -# the "python" target below. -# -# Even where Python itself uses pthreads, it never allows extension modules to -# be entered concurrently (unless they explicitly give up the interpreter lock). -# Therefore, extension modules do not need the efficiency overhead of threadsafe -# code as produced by multi, and we handle libpthread along with -# other libraries here. Note: this optimization is based on an assumption that -# the compiler generates link-compatible code in both the single- and -# multi-threaded cases, and that system libraries do not change their ABIs -# either. -# -# Returns a list of usage-requirements that link to the necessary system -# libraries. -# -local rule system-library-dependencies ( target-os ) -{ - switch $(target-os) - { - case s[uo][nl]* : # solaris, sun, sunos - # Add a librt dependency for the gcc toolset on SunOS (the sun - # toolset adds -lrt unconditionally). While this appears to - # duplicate the logic already in gcc.jam, it does not as long as - # we are not forcing multi. - - # On solaris 10, distutils.sysconfig.get_config_var('LIBS') yields - # '-lresolv -lsocket -lnsl -lrt -ldl'. However, that does not seem - # to be the right list for extension modules. For example, on my - # installation, adding -ldl causes at least one test to fail because - # the library can not be found and removing it causes no failures. - - # Apparently, though, we need to add -lrt for gcc. - return gcc:rt ; - - case osf : return pthread gcc:rt ; - - case qnx* : return ; - case darwin : return ; - case windows : return ; - - case hpux : return rt ; - case *bsd : return pthread gcc:util ; - - case aix : return pthread dl ; - - case * : return pthread dl - gcc:util linux:util ; - } -} - - -# Declare a target to represent Python's library. -# -local rule declare-libpython-target ( version ? : requirements * ) -{ - # Compute the representation of Python version in the name of Python's - # library file. - local lib-version = $(version) ; - if windows in $(requirements) - { - local major-minor = [ split-version $(version) ] ; - lib-version = $(major-minor:J="") ; - if on in $(requirements) - { - lib-version = $(lib-version)_d ; - } - } - - if ! $(lib-version) - { - ECHO *** warning: could not determine Python version, which will ; - ECHO *** warning: probably prevent us from linking with the python ; - ECHO *** warning: library. Consider explicitly passing the version ; - ECHO *** warning: to 'using python'. ; - } - - # Declare it. - lib python.lib : : python$(lib-version) $(requirements) ; -} - - -# Implementation of init. -local rule configure ( version ? : cmd-or-prefix ? : includes * : libraries ? : - condition * : extension-suffix ? ) -{ - local prefix ; - local exec-prefix ; - local cmds-to-try ; - local interpreter-cmd ; - - local target-os = [ feature.get-values target-os : $(condition) ] ; - target-os ?= [ feature.defaults target-os ] ; - target-os = $(target-os:G=) ; - - if $(target-os) = windows && on in $(condition) - { - extension-suffix ?= _d ; - } - extension-suffix ?= "" ; - - # Normalize and dissect any version number. - local major-minor ; - if $(version) - { - major-minor = [ split-version $(version) ] ; - version = $(major-minor:J=.) ; - } - - local cmds-to-try ; - - if ! $(cmd-or-prefix) || [ GLOB $(cmd-or-prefix) : * ] - { - # If the user did not pass a command, whatever we got was a prefix. - prefix = $(cmd-or-prefix) ; - cmds-to-try = [ candidate-interpreters $(version) : $(prefix) : $(target-os) ] ; - } - else - { - # Work with the command the user gave us. - cmds-to-try = $(cmd-or-prefix) ; - - # On Windows, do not nail down the interpreter command just yet in case - # the user specified something that turns out to be a cygwin symlink, - # which could bring down bjam if we invoke it. - if $(target-os) != windows - { - interpreter-cmd = $(cmd-or-prefix) ; - } - } - - # Values to use in case we can not really find anything in the system. - local fallback-cmd = $(cmds-to-try[1]) ; - local fallback-version ; - - # Anything left to find or check? - if ! ( $(interpreter-cmd) && $(includes) && $(libraries) ) - { - # Values to be extracted from python's sys module. These will be set by - # the probe rule, above, using Jam's dynamic scoping. - local sys-elements = version platform prefix exec_prefix executable ; - local sys.$(sys-elements) ; - - # Compute the string Python's sys.platform needs to match. If not - # targeting Windows or cygwin we will assume only native builds can - # possibly run, so we will not require a match and we leave sys.platform - # blank. - local platform ; - switch $(target-os) - { - case windows : platform = win32 ; - case cygwin : platform = cygwin ; - } - - while $(cmds-to-try) - { - # Pop top command. - local cmd = $(cmds-to-try[1]) ; - cmds-to-try = $(cmds-to-try[2-]) ; - - debug-message Checking interpreter command \"$(cmd)\"... ; - if [ probe $(cmd) ] - { - fallback-version ?= $(sys.version) ; - - # Check for version/platform validity. - for local x in version platform - { - if $($(x)) && $($(x)) != $(sys.$(x)) - { - debug-message ...$(x) "mismatch (looking for" - $($(x)) but found $(sys.$(x))")" ; - cmd = ; - } - } - - if $(cmd) - { - debug-message ...requested configuration matched! ; - - exec-prefix = $(sys.exec_prefix) ; - - compute-default-paths $(target-os) : $(sys.version) : - $(sys.prefix) : $(sys.exec_prefix) ; - - version = $(sys.version) ; - interpreter-cmd ?= $(cmd) ; - cmds-to-try = ; # All done. - } - } - else - { - debug-message ...does not invoke a working interpreter ; - } - } - } - - # Anything left to compute? - if $(includes) && $(libraries) - { - .configured = true ; - } - else - { - version ?= $(fallback-version) ; - version ?= 2.5 ; - exec-prefix ?= $(prefix) ; - compute-default-paths $(target-os) : $(version) : $(prefix:E=) ; - } - - if ! $(interpreter-cmd) - { - fallback-cmd ?= python ; - debug-message No working Python interpreter found. ; - if [ os.name ] != NT || ! [ invokes-cygwin-symlink $(fallback-cmd) ] - { - interpreter-cmd = $(fallback-cmd) ; - debug-message falling back to \"$(interpreter-cmd)\" ; - } - } - - includes = [ path-to-native $(includes) ] ; - libraries = [ path-to-native $(libraries) ] ; - - debug-message "Details of this Python configuration:" ; - debug-message " interpreter command:" \"$(interpreter-cmd:E=)\" ; - debug-message " include path:" \"$(includes:E=)\" ; - debug-message " library path:" \"$(libraries:E=)\" ; - if $(target-os) = windows - { - debug-message " DLL search path:" \"$(exec-prefix:E=)\" ; - } - - # - # End autoconfiguration sequence. - # - local target-requirements = $(condition) ; - - # Add the version, if any, to the target requirements. - if $(version) - { - if ! $(version) in [ feature.values python ] - { - feature.extend python : $(version) ; - } - target-requirements += $(version:E=default) ; - } - - target-requirements += $(target-os) ; - - # See if we can find a framework directory on darwin. - local framework-directory ; - if $(target-os) = darwin - { - # Search upward for the framework directory. - local framework-directory = $(libraries[-1]) ; - while $(framework-directory:D=) && $(framework-directory:D=) != Python.framework - { - framework-directory = $(framework-directory:D) ; - } - - if $(framework-directory:D=) = Python.framework - { - debug-message framework directory is \"$(framework-directory)\" ; - } - else - { - debug-message "no framework directory found; using library path" ; - framework-directory = ; - } - } - - local dll-path = $(libraries) ; - - # Make sure that we can find the Python DLL on Windows. - if ( $(target-os) = windows ) && $(exec-prefix) - { - dll-path += $(exec-prefix) ; - } - - # - # Prepare usage requirements. - # - local usage-requirements = [ system-library-dependencies $(target-os) ] ; - usage-requirements += $(includes) $(interpreter-cmd) ; - if on in $(condition) - { - if $(target-os) = windows - { - # In pyconfig.h, Py_DEBUG is set if _DEBUG is set. If we define - # Py_DEBUG we will get multiple definition warnings. - usage-requirements += _DEBUG ; - } - else - { - usage-requirements += Py_DEBUG ; - } - } - - # Global, but conditional, requirements to give access to the interpreter - # for general utilities, like other toolsets, that run Python scripts. - toolset.add-requirements - $(target-requirements:J=,):$(interpreter-cmd) ; - - # Register the right suffix for extensions. - register-extension-suffix $(extension-suffix) : $(target-requirements) ; - - # - # Declare the "python" target. This should really be called - # python_for_embedding. - # - - if $(framework-directory) - { - alias python - : - : $(target-requirements) - : - : $(usage-requirements) $(framework-directory) - ; - } - else - { - declare-libpython-target $(version) : $(target-requirements) ; - - # This is an evil hack. On, Windows, when Python is embedded, nothing - # seems to set up sys.path to include Python's standard library - # (http://article.gmane.org/gmane.comp.python.general/544986). The evil - # here, aside from the workaround necessitated by Python's bug, is that: - # - # a. we're guessing the location of the python standard library from the - # location of pythonXX.lib - # - # b. we're hijacking the property to get the - # environment variable set up, and the user may want to use it for - # something else (e.g. launch the debugger). - local set-PYTHONPATH ; - if $(target-os) = windows - { - set-PYTHONPATH = [ common.prepend-path-variable-command PYTHONPATH : - $(libraries:D)/Lib ] ; - } - - alias python - : - : $(target-requirements) - : - # Why python.lib must be listed here instead of along with the - # system libs is a mystery, but if we do not do it, on cygwin, - # -lpythonX.Y never appears in the command line (although it does on - # linux). - : $(usage-requirements) - $(set-PYTHONPATH) - $(libraries) python.lib - ; - } - - # On *nix, we do not want to link either Boost.Python or Python extensions - # to libpython, because the Python interpreter itself provides all those - # symbols. If we linked to libpython, we would get duplicate symbols. So - # declare two targets -- one for building extensions and another for - # embedding. - # - # Unlike most *nix systems, Mac OS X's linker does not permit undefined - # symbols when linking a shared library. So, we still need to link against - # the Python framework, even when building extensions. Note that framework - # builds of Python always use shared libraries, so we do not need to worry - # about duplicate Python symbols. - if $(target-os) in windows cygwin darwin - { - alias python_for_extensions : python : $(target-requirements) ; - } - # On AIX we need Python extensions and Boost.Python to import symbols from - # the Python interpreter. Dynamic libraries opened with dlopen() do not - # inherit the symbols from the Python interpreter. - else if $(target-os) = aix - { - alias python_for_extensions - : - : $(target-requirements) - : - : $(usage-requirements) -Wl,-bI:$(libraries[1])/python.exp - ; - } - else - { - alias python_for_extensions - : - : $(target-requirements) - : - : $(usage-requirements) - ; - } -} - - -rule configured ( ) -{ - return $(.configured) ; -} - - -type.register PYTHON_EXTENSION : : SHARED_LIB ; - - -local rule register-extension-suffix ( root : condition * ) -{ - local suffix ; - - switch [ feature.get-values target-os : $(condition) ] - { - case windows : suffix = pyd ; - case cygwin : suffix = dll ; - case hpux : - { - if [ feature.get-values python : $(condition) ] in 1.5 1.6 2.0 2.1 2.2 2.3 2.4 - { - suffix = sl ; - } - else - { - suffix = so ; - } - } - case * : suffix = so ; - } - - type.set-generated-target-suffix PYTHON_EXTENSION : $(condition) : <$(root).$(suffix)> ; -} - - -# Unset 'lib' prefix for PYTHON_EXTENSION -type.set-generated-target-prefix PYTHON_EXTENSION : : "" ; - - -rule python-extension ( name : sources * : requirements * : default-build * : - usage-requirements * ) -{ - if [ configured ] - { - requirements += /python//python_for_extensions ; - } - requirements += true ; - - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(name) : $(project) : PYTHON_EXTENSION - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - -IMPORT python : python-extension : : python-extension ; - -rule py2to3 -{ - common.copy $(>) $(<) ; - 2to3 $(<) ; -} - -actions 2to3 -{ - 2to3 -wn "$(<)" - 2to3 -dwn "$(<)" -} - - -# Support for testing. -type.register PY : py ; -type.register RUN_PYD_OUTPUT ; -type.register RUN_PYD : : TEST ; - - -class python-test-generator : generator -{ - import set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - self.composing = true ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local pyversion = [ $(property-set).get ] ; - local python ; - local other-pythons ; - - # Make new target that converting Python source by 2to3 when running with Python 3. - local rule make-2to3-source ( source ) - { - if $(pyversion) >= 3.0 - { - local a = [ new action $(source) : python.py2to3 : $(property-set) ] ; - local t = [ utility.basename [ $(s).name ] ] ; - local p = [ new file-target $(t) : PY : $(project) : $(a) ] ; - return $(p) ; - } - else - { - return $(source) ; - } - } - - for local s in $(sources) - { - if [ $(s).type ] = PY - { - if ! $(python) - { - # First Python source ends up on command line. - python = [ make-2to3-source $(s) ] ; - - } - else - { - # Other Python sources become dependencies. - other-pythons += [ make-2to3-source $(s) ] ; - } - } - } - - local extensions ; - for local s in $(sources) - { - if [ $(s).type ] = PYTHON_EXTENSION - { - extensions += $(s) ; - } - } - - local libs ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] LIB ] - && ! $(s) in $(extensions) - { - libs += $(s) ; - } - } - - local new-sources ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] CPP ] - { - local name = [ utility.basename [ $(s).name ] ] ; - if $(name) = [ utility.basename [ $(python).name ] ] - { - name = $(name)_ext ; - } - local extension = [ generators.construct $(project) $(name) : - PYTHON_EXTENSION : $(property-set) : $(s) $(libs) ] ; - - # The important part of usage requirements returned from - # PYTHON_EXTENSION generator are xdll-path properties that will - # allow us to find the python extension at runtime. - property-set = [ $(property-set).add $(extension[1]) ] ; - - # Ignore usage requirements. We're a top-level generator and - # nobody is going to use what we generate. - new-sources += $(extension[2-]) ; - } - } - - property-set = [ $(property-set).add-raw $(other-pythons) ] ; - - result = [ construct-result $(python) $(extensions) $(new-sources) : - $(project) $(name) : $(property-set) ] ; - } -} - - -generators.register - [ new python-test-generator python.capture-output : : RUN_PYD_OUTPUT ] ; - -generators.register-standard testing.expect-success - : RUN_PYD_OUTPUT : RUN_PYD ; - - -# There are two different ways of spelling OS names. One is used for [ os.name ] -# and the other is used for the and properties. Until that -# is remedied, this sets up a crude mapping from the latter to the former, that -# will work *for the purposes of cygwin/NT cross-builds only*. Could not think -# of a better name than "translate". -# -.translate-os-windows = NT ; -.translate-os-cygwin = CYGWIN ; -local rule translate-os ( src-os ) -{ - local x = $(.translate-os-$(src-os)) [ os.name ] ; - return $(x[1]) ; -} - - -# Extract the path to a single ".pyd" source. This is used to build the -# PYTHONPATH for running bpl tests. -# -local rule pyd-pythonpath ( source ) -{ - return [ on $(source) return $(LOCATE) $(SEARCH) ] ; -} - - -# The flag settings on testing.capture-output do not apply to python.capture -# output at the moment. Redo this explicitly. -toolset.flags python.capture-output ARGS ; - - -rule capture-output ( target : sources * : properties * ) -{ - # Setup up a proper DLL search path. Here, $(sources[1]) is a python module - # and $(sources[2]) is a DLL. Only $(sources[1]) is passed to - # testing.capture-output, so RUN_PATH variable on $(sources[2]) is not - # consulted. Move it over explicitly. - RUN_PATH on $(sources[1]) = [ on $(sources[2-]) return $(RUN_PATH) ] ; - - PYTHONPATH = [ sequence.transform pyd-pythonpath : $(sources[2-]) ] ; - PYTHONPATH += [ feature.get-values pythonpath : $(properties) ] ; - - # After test is run, we remove the Python module, but not the Python script. - testing.capture-output $(target) : $(sources[1]) : $(properties) : - $(sources[2-]) ; - - # PYTHONPATH is different; it will be interpreted by whichever Python is - # invoked and so must follow path rules for the target os. The only OSes - # where we can run python for other OSes currently are NT and CYGWIN so we - # only need to handle those cases. - local target-os = [ feature.get-values target-os : $(properties) ] ; - # Oddly, host-os is not in properties, so grab the default value. - local host-os = [ feature.defaults host-os ] ; - host-os = $(host-os:G=) ; - if $(target-os) != $(host-os) - { - PYTHONPATH = [ sequence.transform $(host-os)-to-$(target-os)-path : - $(PYTHONPATH) ] ; - } - local path-separator = [ os.path-separator [ translate-os $(target-os) ] ] ; - local set-PYTHONPATH = [ common.variable-setting-command PYTHONPATH : - $(PYTHONPATH:J=$(path-separator)) ] ; - LAUNCHER on $(target) = $(set-PYTHONPATH) [ on $(target) return \"$(PYTHON)\" ] ; -} - - -rule bpl-test ( name : sources * : requirements * ) -{ - local s ; - sources ?= $(name).py $(name).cpp ; - return [ testing.make-test run-pyd : $(sources) /boost/python//boost_python - : $(requirements) : $(name) ] ; -} - - -IMPORT $(__name__) : bpl-test : : bpl-test ; diff --git a/jam-files/boost-build/tools/qcc.jam b/jam-files/boost-build/tools/qcc.jam deleted file mode 100644 index 4f2a4fc1..00000000 --- a/jam-files/boost-build/tools/qcc.jam +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) 2001 David Abrahams. -# Copyright (c) 2002-2003 Rene Rivera. -# Copyright (c) 2002-2003 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import property ; -import set ; -import toolset ; -import type ; -import unix ; - -feature.extend toolset : qcc ; - -toolset.inherit-generators qcc : unix : unix.link unix.link.dll ; -generators.override builtin.lib-generator : qcc.prebuilt ; -toolset.inherit-flags qcc : unix ; -toolset.inherit-rules qcc : unix ; - -# Initializes the qcc toolset for the given version. If necessary, command may -# be used to specify where the compiler is located. The parameter 'options' is a -# space-delimited list of options, each one being specified as -# option-value. Valid option names are: cxxflags, linkflags and -# linker-type. Accepted values for linker-type are gnu and sun, gnu being the -# default. -# -# Example: -# using qcc : 3.4 : : foo bar sun ; -# -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters qcc : version $(version) ] ; - local command = [ common.get-invocation-command qcc : QCC : $(command) ] ; - common.handle-options qcc : $(condition) : $(command) : $(options) ; -} - - -generators.register-c-compiler qcc.compile.c++ : CPP : OBJ : qcc ; -generators.register-c-compiler qcc.compile.c : C : OBJ : qcc ; -generators.register-c-compiler qcc.compile.asm : ASM : OBJ : qcc ; - - -# Declare flags for compilation. -toolset.flags qcc.compile OPTIONS on : -gstabs+ ; - -# Declare flags and action for compilation. -toolset.flags qcc.compile OPTIONS off : -O0 ; -toolset.flags qcc.compile OPTIONS speed : -O3 ; -toolset.flags qcc.compile OPTIONS space : -Os ; - -toolset.flags qcc.compile OPTIONS off : -Wc,-fno-inline ; -toolset.flags qcc.compile OPTIONS on : -Wc,-Wno-inline ; -toolset.flags qcc.compile OPTIONS full : -Wc,-finline-functions -Wc,-Wno-inline ; - -toolset.flags qcc.compile OPTIONS off : -w ; -toolset.flags qcc.compile OPTIONS all : -Wc,-Wall ; -toolset.flags qcc.compile OPTIONS on : -Wc,-Werror ; - -toolset.flags qcc.compile OPTIONS on : -p ; - -toolset.flags qcc.compile OPTIONS ; -toolset.flags qcc.compile.c++ OPTIONS ; -toolset.flags qcc.compile DEFINES ; -toolset.flags qcc.compile INCLUDES ; - -toolset.flags qcc.compile OPTIONS shared : -shared ; - -toolset.flags qcc.compile.c++ TEMPLATE_DEPTH ; - - -rule compile.c++ -{ - # Here we want to raise the template-depth parameter value to something - # higher than the default value of 17. Note that we could do this using the - # feature.set-default rule but we do not want to set the default value for - # all toolsets as well. - # - # TODO: This 'modified default' has been inherited from some 'older Boost - # Build implementation' and has most likely been added to make some Boost - # library parts compile correctly. We should see what exactly prompted this - # and whether we can get around the problem more locally. - local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ; - if ! $(template-depth) - { - TEMPLATE_DEPTH on $(1) = 128 ; - } -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -Wc,-ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.asm -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - - -# The class checking that we do not try to use the static property -# while creating or using a shared library, since it is not supported by qcc/ -# /libc. -# -class qcc-linking-generator : unix-linking-generator -{ - rule generated-targets ( sources + : property-set : project name ? ) - { - if static in [ $(property-set).raw ] - { - local m ; - if [ id ] = "qcc.link.dll" - { - m = "on qcc, DLL can't be build with static" ; - } - if ! $(m) - { - for local s in $(sources) - { - local type = [ $(s).type ] ; - if $(type) && [ type.is-derived $(type) SHARED_LIB ] - { - m = "on qcc, using DLLS together with the static options is not possible " ; - } - } - } - if $(m) - { - errors.user-error $(m) : "It is suggested to use" - "static together with static." ; - } - } - - return [ unix-linking-generator.generated-targets - $(sources) : $(property-set) : $(project) $(name) ] ; - } -} - -generators.register [ new qcc-linking-generator qcc.link : LIB OBJ : EXE - : qcc ] ; - -generators.register [ new qcc-linking-generator qcc.link.dll : LIB OBJ - : SHARED_LIB : qcc ] ; - -generators.override qcc.prebuilt : builtin.prebuilt ; -generators.override qcc.searched-lib-generator : searched-lib-generator ; - - -# Declare flags for linking. -# First, the common flags. -toolset.flags qcc.link OPTIONS on : -gstabs+ ; -toolset.flags qcc.link OPTIONS on : -p ; -toolset.flags qcc.link OPTIONS ; -toolset.flags qcc.link LINKPATH ; -toolset.flags qcc.link FINDLIBS-ST ; -toolset.flags qcc.link FINDLIBS-SA ; -toolset.flags qcc.link LIBRARIES ; - -toolset.flags qcc.link FINDLIBS-SA : m ; - -# For static we made sure there are no dynamic libraries in the -# link. -toolset.flags qcc.link OPTIONS static : -static ; - -# Assuming this is just like with gcc. -toolset.flags qcc.link RPATH : : unchecked ; -toolset.flags qcc.link RPATH_LINK : : unchecked ; - - -# Declare actions for linking. -# -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only qcc links while it might - # be a good idea to serialize all links. - JAM_SEMAPHORE on $(targets) = qcc-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS) -} - - -# Always remove archive and start again. Here is the rationale from Andre Hentz: -# I had a file, say a1.c, that was included into liba.a. I moved a1.c to a2.c, -# updated my Jamfiles and rebuilt. My program was crashing with absurd errors. -# After some debugging I traced it back to the fact that a1.o was *still* in -# liba.a -RM = [ common.rm-command ] ; -if [ os.name ] = NT -{ - RM = "if exist \"$(<[1])\" DEL \"$(<[1])\"" ; -} - - -# Declare action for creating static libraries. The 'r' letter means to add -# files to the archive with replacement. Since we remove the archive, we do not -# care about replacement, but there is no option to "add without replacement". -# The 'c' letter suppresses warnings in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -# -actions piecemeal archive -{ - $(RM) "$(<)" - ar rc "$(<)" "$(>)" -} - - -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = qcc-link-semaphore ; -} - - -# Differ from 'link' above only by -shared. -# -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" $(HAVE_SONAME)-Wl,-h$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS) -} diff --git a/jam-files/boost-build/tools/qt.jam b/jam-files/boost-build/tools/qt.jam deleted file mode 100644 index 8aa7ca26..00000000 --- a/jam-files/boost-build/tools/qt.jam +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2006 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Forwarning toolset file to Qt GUI library. Forwards to the toolset file -# for the current version of Qt. - -import qt4 ; - -rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * ) -{ - qt4.init $(prefix) : $(full_bin) : $(full_inc) : $(full_lib) : $(version) : $(condition) ; -} - - diff --git a/jam-files/boost-build/tools/qt3.jam b/jam-files/boost-build/tools/qt3.jam deleted file mode 100644 index f82cf0ac..00000000 --- a/jam-files/boost-build/tools/qt3.jam +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Support for the Qt GUI library version 3 -# (http://www.trolltech.com/products/qt3/index.html). -# For new developments, it is recommended to use Qt4 via the qt4 Boost.Build -# module. - -import modules ; -import feature ; -import errors ; -import type ; -import "class" : new ; -import generators ; -import project ; -import toolset : flags ; - -# Convert this module into a project, so that we can declare targets here. -project.initialize $(__name__) ; -project qt3 ; - - -# Initialized the QT support module. The 'prefix' parameter tells where QT is -# installed. When not given, environmental variable QTDIR should be set. -# -rule init ( prefix ? ) -{ - if ! $(prefix) - { - prefix = [ modules.peek : QTDIR ] ; - if ! $(prefix) - { - errors.error - "QT installation prefix not given and QTDIR variable is empty" ; - } - } - - if $(.initialized) - { - if $(prefix) != $(.prefix) - { - errors.error - "Attempt the reinitialize QT with different installation prefix" ; - } - } - else - { - .initialized = true ; - .prefix = $(prefix) ; - - generators.register-standard qt3.moc : H : CPP(moc_%) : qt3 ; - # Note: the OBJ target type here is fake, take a look at - # qt4.jam/uic-h-generator for explanations that apply in this case as - # well. - generators.register [ new moc-h-generator-qt3 - qt3.moc.cpp : MOCCABLE_CPP : OBJ : qt3 ] ; - - # The UI type is defined in types/qt.jam, and UIC_H is only used in - # qt.jam, but not in qt4.jam, so define it here. - type.register UIC_H : : H ; - - generators.register-standard qt3.uic-h : UI : UIC_H : qt3 ; - - # The following generator is used to convert UI files to CPP. It creates - # UIC_H from UI, and constructs CPP from UI/UIC_H. In addition, it also - # returns UIC_H target, so that it can be mocced. - class qt::uic-cpp-generator : generator - { - rule __init__ ( ) - { - generator.__init__ qt3.uic-cpp : UI UIC_H : CPP : qt3 ; - } - - rule run ( project name ? : properties * : sources + ) - { - # Consider this: - # obj test : test_a.cpp : off ; - # - # This generator will somehow be called in this case, and, - # will fail -- which is okay. However, if there are - # properties they will be converted to sources, so the size of - # 'sources' will be more than 1. In this case, the base generator - # will just crash -- and that's not good. Just use a quick test - # here. - - local result ; - if ! $(sources[2]) - { - # Construct CPP as usual - result = [ generator.run $(project) $(name) - : $(properties) : $(sources) ] ; - - # If OK, process UIC_H with moc. It's pretty clear that - # the object generated with UIC will have Q_OBJECT macro. - if $(result) - { - local action = [ $(result[1]).action ] ; - local sources = [ $(action).sources ] ; - local mocced = [ generators.construct $(project) $(name) - : CPP : $(properties) : $(sources[2]) ] ; - result += $(mocced[2-]) ; - } - } - - return $(result) ; - } - } - - generators.register [ new qt::uic-cpp-generator ] ; - - # Finally, declare prebuilt target for QT library. - local usage-requirements = - $(.prefix)/include - $(.prefix)/lib - $(.prefix)/lib - qt3 - ; - lib qt : : qt-mt multi : : $(usage-requirements) ; - lib qt : : qt single : : $(usage-requirements) ; - } -} - -class moc-h-generator-qt3 : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP - { - name = [ $(sources[1]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt3.moc.cpp : - $(property-set) ] ; - - local target = [ - new file-target $(name) : MOC : $(project) : $(a) ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that the dependency from - # sources to this generated header is detected -- if Jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } - } -} - - -# Query the installation directory. This is needed in at least two scenarios. -# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt -# plugins to the Qt-Tree. -# -rule directory -{ - return $(.prefix) ; -} - -# -f forces moc to include the processed source file. Without it, it would think -# that .qpp is not a header and would not include it from the generated file. -# -actions moc -{ - $(.prefix)/bin/moc -f $(>) -o $(<) -} - -# When moccing .cpp files, we don't need -f, otherwise generated code will -# include .cpp and we'll get duplicated symbols. -# -actions moc.cpp -{ - $(.prefix)/bin/moc $(>) -o $(<) -} - - -space = " " ; - -# Sometimes it's required to make 'plugins' available during uic invocation. To -# help with this we add paths to all dependency libraries to uic commane line. -# The intention is that it's possible to write -# -# exe a : ... a.ui ... : some_plugin ; -# -# and have everything work. We'd add quite a bunch of unrelated paths but it -# won't hurt. -# -flags qt3.uic-h LIBRARY_PATH ; -actions uic-h -{ - $(.prefix)/bin/uic $(>) -o $(<) -L$(space)$(LIBRARY_PATH) -} - - -flags qt3.uic-cpp LIBRARY_PATH ; -# The second target is uic-generated header name. It's placed in build dir, but -# we want to include it using only basename. -actions uic-cpp -{ - $(.prefix)/bin/uic $(>[1]) -i $(>[2]:D=) -o $(<) -L$(space)$(LIBRARY_PATH) -} diff --git a/jam-files/boost-build/tools/qt4.jam b/jam-files/boost-build/tools/qt4.jam deleted file mode 100644 index 71d1b762..00000000 --- a/jam-files/boost-build/tools/qt4.jam +++ /dev/null @@ -1,724 +0,0 @@ -# Copyright 2002-2006 Vladimir Prus -# Copyright 2005 Alo Sarv -# Copyright 2005-2009 Juergen Hunold -# -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -# Qt4 library support module -# -# The module attempts to auto-detect QT installation location from QTDIR -# environment variable; failing that, installation location can be passed as -# argument: -# -# toolset.using qt4 : /usr/local/Trolltech/Qt-4.0.0 ; -# -# The module supports code generation from .ui and .qrc files, as well as -# running the moc preprocessor on headers. Note that you must list all your -# moc-able headers in sources. -# -# Example: -# -# exe myapp : myapp.cpp myapp.h myapp.ui myapp.qrc -# /qt4//QtGui /qt4//QtNetwork ; -# -# It's also possible to run moc on cpp sources: -# -# import cast ; -# -# exe myapp : myapp.cpp [ cast _ moccable-cpp : myapp.cpp ] /qt4//QtGui ; -# -# When moccing source file myapp.cpp you need to include "myapp.moc" from -# myapp.cpp. When moccing .h files, the output of moc will be automatically -# compiled and linked in, you don't need any includes. -# -# This is consistent with Qt guidelines: -# http://doc.trolltech.com/4.0/moc.html - -import modules ; -import feature ; -import errors ; -import type ; -import "class" : new ; -import generators ; -import project ; -import toolset : flags ; -import os ; -import virtual-target ; -import scanner ; - -# Qt3Support control feature -# -# Qt4 configure defaults to build Qt4 libraries with Qt3Support. -# The autodetection is missing, so we default to disable Qt3Support. -# This prevents the user from inadvertedly using a deprecated API. -# -# The Qt3Support library can be activated by adding -# "on" to requirements -# -# Use "on:QT3_SUPPORT_WARNINGS" -# to get warnings about deprecated Qt3 support funtions and classes. -# Files ported by the "qt3to4" conversion tool contain _tons_ of -# warnings, so this define is not set as default. -# -# Todo: Detect Qt3Support from Qt's configure data. -# Or add more auto-configuration (like python). -feature.feature qt3support : off on : propagated link-incompatible ; - -# The Qt version used for requirements -# Valid are 4.4 or 4.5.0 -# Auto-detection via qmake sets 'major.minor.patch' -feature.feature qt : : propagated ; - -project.initialize $(__name__) ; -project qt ; - -# Save the project so that we tolerate 'import + using' combo. -.project = [ project.current ] ; - -# Helper utils for easy debug output -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = TRUE ; -} - -local rule debug-message ( message * ) -{ - if $(.debug-configuration) = TRUE - { - ECHO notice: [qt4-cfg] $(message) ; - } -} - -# Capture qmake output line by line -local rule read-output ( content ) -{ - local lines ; - local nl = " -" ; - local << = "([^$(nl)]*)[$(nl)](.*)" ; - local line+ = [ MATCH "$(<<)" : "$(content)" ] ; - while $(line+) - { - lines += $(line+[1]) ; - line+ = [ MATCH "$(<<)" : "$(line+[2])" ] ; - } - return $(lines) ; -} - -# Capture Qt version from qmake -local rule check-version ( bin_prefix ) -{ - full-cmd = $(bin_prefix)"/qmake -v" ; - debug-message Running '$(full-cmd)' ; - local output = [ SHELL $(full-cmd) ] ; - for line in [ read-output $(output) ] - { - # Parse the output to get all the results. - if [ MATCH "QMake" : $(line) ] - { - # Skip first line of output - } - else - { - temp = [ MATCH "([0-9]*)\\.([0-9]*)\\.([0-9]*)" : $(line) ] ; - } - } - return $(temp) ; -} - -# Validate the version string and extract the major/minor part we care about. -# -local rule split-version ( version ) -{ - local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ; - if ! $(major-minor[2]) || $(major-minor[3]) - { - ECHO "Warning: 'using qt' expects a two part (major, minor) version number; got" $(version) instead ; - - # Add a zero to account for the missing digit if necessary. - major-minor += 0 ; - } - - return $(major-minor[1]) $(major-minor[2]) ; -} - -# Initialize the QT support module. -# Parameters: -# - 'prefix' parameter tells where Qt is installed. -# - 'full_bin' optional full path to Qt binaries (qmake,moc,uic,rcc) -# - 'full_inc' optional full path to Qt top-level include directory -# - 'full_lib' optional full path to Qt library directory -# - 'version' optional version of Qt, else autodetected via 'qmake -v' -# - 'condition' optional requirements -rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * ) -{ - project.push-current $(.project) ; - - debug-message "==== Configuring Qt ... ====" ; - for local v in version cmd-or-prefix includes libraries condition - { - if $($(v)) - { - debug-message " user-specified "$(v): '$($(v))' ; - } - } - - # Needed as default value - .prefix = $(prefix) ; - - # pre-build paths to detect reinitializations changes - local inc_prefix lib_prefix bin_prefix ; - if $(full_inc) - { - inc_prefix = $(full_inc) ; - } - else - { - inc_prefix = $(prefix)/include ; - } - if $(full_lib) - { - lib_prefix = $(full_lib) ; - } - else - { - lib_prefix = $(prefix)/lib ; - } - if $(full_bin) - { - bin_prefix = $(full_bin) ; - } - else - { - bin_prefix = $(prefix)/bin ; - } - - # Globally needed variables - .incprefix = $(inc_prefix) ; - .libprefix = $(lib_prefix) ; - .binprefix = $(bin_prefix) ; - - if ! $(.initialized) - { - # Make sure this is initialised only once - .initialized = true ; - - # Generates cpp files from header files using "moc" tool - generators.register-standard qt4.moc : H : CPP(moc_%) : qt4 ; - - # The OBJ result type is a fake, 'H' will be really produced. See - # comments on the generator class, defined below the 'init' function. - generators.register [ new uic-generator qt4.uic : UI : OBJ : - qt4 ] ; - - # The OBJ result type is a fake here too. - generators.register [ new moc-h-generator - qt4.moc.inc : MOCCABLE_CPP : OBJ : qt4 ] ; - - generators.register [ new moc-inc-generator - qt4.moc.inc : MOCCABLE_H : OBJ : qt4 ] ; - - # Generates .cpp files from .qrc files. - generators.register-standard qt4.rcc : QRC : CPP(qrc_%) ; - - # dependency scanner for wrapped files. - type.set-scanner QRC : qrc-scanner ; - - # Save value of first occuring prefix - .PREFIX = $(prefix) ; - } - - if $(version) - { - major-minor = [ split-version $(version) ] ; - version = $(major-minor:J=.) ; - } - else - { - version = [ check-version $(bin_prefix) ] ; - if $(version) - { - version = $(version:J=.) ; - } - debug-message Detected version '$(version)' ; - } - - local target-requirements = $(condition) ; - - # Add the version, if any, to the target requirements. - if $(version) - { - if ! $(version) in [ feature.values qt ] - { - feature.extend qt : $(version) ; - } - target-requirements += $(version:E=default) ; - } - - local target-os = [ feature.get-values target-os : $(condition) ] ; - if ! $(target-os) - { - target-os ?= [ feature.defaults target-os ] ; - target-os = $(target-os:G=) ; - target-requirements += $(target-os) ; - } - - # Build exact requirements for the tools - local tools-requirements = $(target-requirements:J=/) ; - - debug-message "Details of this Qt configuration:" ; - debug-message " prefix: " '$(prefix:E=)' ; - debug-message " binary path: " '$(bin_prefix:E=)' ; - debug-message " include path:" '$(inc_prefix:E=)' ; - debug-message " library path:" '$(lib_prefix:E=)' ; - debug-message " target requirements:" '$(target-requirements)' ; - debug-message " tool requirements: " '$(tools-requirements)' ; - - # setup the paths for the tools - toolset.flags qt4.moc .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - toolset.flags qt4.rcc .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - toolset.flags qt4.uic .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - - # TODO: 2009-02-12: Better support for directories - # Most likely needed are separate getters for: include,libraries,binaries and sources. - toolset.flags qt4.directory .PREFIX $(tools-requirements) : $(prefix) ; - - # Test for a buildable Qt. - if [ glob $(.prefix)/Jamroot ] - { - .bjam-qt = true - - # this will declare QtCore (and qtmain on windows) - add-shared-library QtCore ; - } - else - # Setup common pre-built Qt. - # Special setup for QtCore on which everything depends - { - local usage-requirements = - $(.incprefix) - $(.libprefix) - $(.libprefix) - multi - qt4 ; - - local suffix ; - - # Since Qt-4.2, debug versions on unix have to be built - # separately and therefore have no suffix. - .suffix_version = "" ; - .suffix_debug = "" ; - - # Control flag for auto-configuration of the debug libraries. - # This setup requires Qt 'configure -debug-and-release'. - # Only available on some platforms. - # ToDo: 2009-02-12: Maybe throw this away and - # require separate setup with debug as condition. - .have_separate_debug = FALSE ; - - # Setup other platforms - if $(target-os) in windows cygwin - { - .have_separate_debug = TRUE ; - - # On NT, the libs have "4" suffix, and "d" suffix in debug builds. - .suffix_version = "4" ; - .suffix_debug = "d" ; - - # On Windows we must link against the qtmain library - lib qtmain - : # sources - : # requirements - qtmain$(.suffix_debug) - debug - $(target-requirements) - ; - - lib qtmain - : # sources - : # requirements - qtmain - $(target-requirements) - ; - } - else if $(target-os) = darwin - { - # On MacOS X, both debug and release libraries are available. - .suffix_debug = "_debug" ; - - .have_separate_debug = TRUE ; - - alias qtmain ; - } - else - { - alias qtmain : : $(target-requirements) ; - } - - lib QtCore : qtmain - : # requirements - QtCore$(.suffix_version) - $(target-requirements) - : # default-build - : # usage-requirements - QT_CORE_LIB - QT_NO_DEBUG - $(.incprefix)/QtCore - $(usage-requirements) - ; - - if $(.have_separate_debug) = TRUE - { - debug-message Configure debug libraries with suffix '$(.suffix_debug)' ; - - lib QtCore : $(main) - : # requirements - QtCore$(.suffix_debug)$(.suffix_version) - debug - $(target-requirements) - : # default-build - : # usage-requirements - QT_CORE_LIB - $(.incprefix)/QtCore - $(usage-requirements) - ; - } - } - - # Initialising the remaining libraries is canonical - # parameters 'module' : 'depends-on' : 'usage-define' : 'requirements' : 'include' - # 'include' only for non-canonical include paths. - add-shared-library QtGui : QtCore : QT_GUI_LIB : $(target-requirements) ; - add-shared-library QtNetwork : QtCore : QT_NETWORK_LIB : $(target-requirements) ; - add-shared-library QtSql : QtCore : QT_SQL_LIB : $(target-requirements) ; - add-shared-library QtXml : QtCore : QT_XML_LIB : $(target-requirements) ; - - add-shared-library Qt3Support : QtGui QtNetwork QtXml QtSql - : QT_QT3SUPPORT_LIB QT3_SUPPORT - : on $(target-requirements) ; - - # Dummy target to enable "off" and - # "/qt//Qt3Support" at the same time. This enables quick - # switching from one to the other for test/porting purposes. - alias Qt3Support : : off $(target-requirements) ; - - # OpenGl Support - add-shared-library QtOpenGL : QtGui : QT_OPENGL_LIB : $(target-requirements) ; - - # SVG-Support (Qt 4.1) - add-shared-library QtSvg : QtXml QtOpenGL : QT_SVG_LIB : $(target-requirements) ; - - # Test-Support (Qt 4.1) - add-shared-library QtTest : QtCore : : $(target-requirements) ; - - # Qt designer library - add-shared-library QtDesigner : QtGui QtXml : : $(target-requirements) ; - add-shared-library QtDesignerComponents : QtGui QtXml : : $(target-requirements) ; - - # Support for dynamic Widgets (Qt 4.1) - add-static-library QtUiTools : QtGui QtXml : $(target-requirements) ; - - # DBus-Support (Qt 4.2) - add-shared-library QtDBus : QtXml : : $(target-requirements) ; - - # Script-Engine (Qt 4.3) - add-shared-library QtScript : QtGui QtXml : QT_SCRIPT_LIB : $(target-requirements) ; - - # Tools for the Script-Engine (Qt 4.5) - add-shared-library QtScriptTools : QtScript : QT_SCRIPTTOOLS_LIB : $(target-requirements) ; - - # WebKit (Qt 4.4) - add-shared-library QtWebKit : QtGui : QT_WEBKIT_LIB : $(target-requirements) ; - - # Phonon Multimedia (Qt 4.4) - add-shared-library phonon : QtGui QtXml : QT_PHONON_LIB : $(target-requirements) ; - - # Multimedia engine (Qt 4.6) - add-shared-library QtMultimedia : QtGui : QT_MULTIMEDIA_LIB : $(target-requirements) ; - - # XmlPatterns-Engine (Qt 4.4) - add-shared-library QtXmlPatterns : QtNetwork : QT_XMLPATTERNS_LIB : $(target-requirements) ; - - # Help-Engine (Qt 4.4) - add-shared-library QtHelp : QtGui QtSql QtXml : : $(target-requirements) ; - add-shared-library QtCLucene : QCore QtSql QtXml : : $(target-requirements) ; - - # QML-Engine (Qt 4.7) - add-shared-library QtDeclarative : QtGui QtXml : : $(target-requirements) ; - - # AssistantClient Support - # Compat library removed in 4.7.0 - # Pre-4.4 help system, use QtHelp for new programs - if $(version) < "4.7" - { - add-shared-library QtAssistantClient : QtGui : : $(target-requirements) : QtAssistant ; - } - debug-message "==== Configured Qt-$(version) ====" ; - - project.pop-current ; -} - -rule initialized ( ) -{ - return $(.initialized) ; -} - - - -# This custom generator is needed because in QT4, UI files are translated only -# into H files, and no C++ files are created. Further, the H files need not be -# passed via MOC. The header is used only via inclusion. If we define a standard -# UI -> H generator, Boost.Build will run MOC on H, and then compile the -# resulting cpp. It will give a warning, since output from moc will be empty. -# -# This generator is declared with a UI -> OBJ signature, so it gets invoked when -# linking generator tries to convert sources to OBJ, but it produces target of -# type H. This is non-standard, but allowed. That header won't be mocced. -# -class uic-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(name) - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - } - - local a = [ new action $(sources[1]) : qt4.uic : $(property-set) ] ; - - # The 'ui_' prefix is to match qmake's default behavior. - local target = [ new file-target ui_$(name) : H : $(project) : $(a) ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, we - # need the target to be seen by bjam, so that dependency from sources to - # this generated header is detected -- if jam does not know about this - # target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } -} - - -class moc-h-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt4.moc.inc : - $(property-set) ] ; - - local target = [ new file-target $(name) : MOC : $(project) : $(a) - ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that dependency from - # sources to this generated header is detected -- if jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } - } -} - - -class moc-inc-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_H - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt4.moc.inc : - $(property-set) ] ; - - local target = [ new file-target moc_$(name) : CPP : $(project) : - $(a) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that dependency from - # sources to this generated header is detected -- if jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(target).actualize ] ; - - return [ virtual-target.register $(target) ] ; - } - } -} - - -# Query the installation directory. This is needed in at least two scenarios. -# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt -# plugins to the Qt-Tree. -# -rule directory -{ - return $(.PREFIX) ; -} - -# Add a shared Qt library. -rule add-shared-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? ) -{ - add-library $(lib-name) : $(.suffix_version) : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ; -} - -# Add a static Qt library. -rule add-static-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? ) -{ - add-library $(lib-name) : : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ; -} - -# Add a Qt library. -# Static libs are unversioned, whereas shared libs have the major number as suffix. -# Creates both release and debug versions on platforms where both are enabled by Qt configure. -# Flags: -# - lib-name Qt library Name -# - version Qt major number used as shared library suffix (QtCore4.so) -# - depends-on other Qt libraries -# - usage-defines those are set by qmake, so set them when using this library -# - requirements addional requirements -# - include non-canonical include path. The canonical path is $(.incprefix)/$(lib-name). -rule add-library ( lib-name : version ? : depends-on * : usage-defines * : requirements * : include ? ) -{ - if $(.bjam-qt) - { - # Import Qt module - # Eveything will be setup there - alias $(lib-name) - : $(.prefix)//$(lib-name) - : - : - : qt4 ; - } - else - { - local real_include ; - real_include ?= $(include) ; - real_include ?= $(lib-name) ; - - lib $(lib-name) - : # sources - $(depends-on) - : # requirements - $(lib-name)$(version) - $(requirements) - : # default-build - : # usage-requirements - $(usage-defines) - $(.incprefix)/$(real_include) - ; - - if $(.have_separate_debug) = TRUE - { - lib $(lib-name) - : # sources - $(depends-on) - : # requirements - $(lib-name)$(.suffix_debug)$(version) - $(requirements) - debug - : # default-build - : # usage-requirements - $(usage-defines) - $(.incprefix)/$(real_include) - ; - } - } - - # Make library explicit so that a simple qt4 will not bring in everything. - # And some components like QtDBus/Phonon may not be available on all platforms. - explicit $(lib-name) ; -} - -# Use $(.BINPREFIX[-1]) for the paths as several tools-requirements can match. -# The exact match is the last one. - -# Get and from current toolset. -flags qt4.moc INCLUDES ; -flags qt4.moc DEFINES ; - -# need a newline for expansion of DEFINES and INCLUDES in the response file. -.nl = " -" ; - -# Processes headers to create Qt MetaObject information. Qt4-moc has its -# c++-parser, so pass INCLUDES and DEFINES. -# We use response file with one INCLUDE/DEFINE per line -# -actions moc -{ - $(.BINPREFIX[-1])/moc -f $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))" -} - -# When moccing files for include only, we don't need -f, otherwise the generated -# code will include the .cpp and we'll get duplicated symbols. -# -actions moc.inc -{ - $(.BINPREFIX[-1])/moc $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))" -} - - -# Generates source files from resource files. -# -actions rcc -{ - $(.BINPREFIX[-1])/rcc $(>) -name $(>:B) -o $(<) -} - - -# Generates user-interface source from .ui files. -# -actions uic -{ - $(.BINPREFIX[-1])/uic $(>) -o $(<) -} - - -# Scanner for .qrc files. Look for the CDATA section of the tag. Ignore -# the "alias" attribute. See http://doc.trolltech.com/qt/resources.html for -# detailed documentation of the Qt Resource System. -# -class qrc-scanner : common-scanner -{ - rule pattern ( ) - { - return "(.*)" ; - } -} - - -# Wrapped files are "included". -scanner.register qrc-scanner : include ; diff --git a/jam-files/boost-build/tools/quickbook-config.jam b/jam-files/boost-build/tools/quickbook-config.jam deleted file mode 100644 index e983a78a..00000000 --- a/jam-files/boost-build/tools/quickbook-config.jam +++ /dev/null @@ -1,44 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for BoostBook tools. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - local boost-dir = ; - for local R in snapshot cvs 1.33.0 - { - boost-dir += [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\Boost.org\\$(R)" - : "InstallRoot" ] ; - } - local quickbook-path = [ GLOB "$(boost-dir)\\bin" "\\Boost\\bin" : quickbook.exe ] ; - quickbook-path = $(quickbook-path[1]) ; - - if $(quickbook-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using quickbook ":" $(quickbook-path) ; - } - using quickbook : $(quickbook-path) ; - } -} -else -{ - local quickbook-path = [ GLOB "/usr/local/bin" "/usr/bin" "/opt/bin" : quickbook ] ; - quickbook-path = $(quickbook-path[1]) ; - - if $(quickbook-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using quickbook ":" $(quickbook-path) ; - } - using quickbook : $(quickbook-path) ; - } -} diff --git a/jam-files/boost-build/tools/quickbook.jam b/jam-files/boost-build/tools/quickbook.jam deleted file mode 100644 index 6de2d42f..00000000 --- a/jam-files/boost-build/tools/quickbook.jam +++ /dev/null @@ -1,361 +0,0 @@ -# -# Copyright (c) 2005 João Abecasis -# Copyright (c) 2005 Vladimir Prus -# Copyright (c) 2006 Rene Rivera -# -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) -# - -# This toolset defines a generator to translate QuickBook to BoostBook. It can -# be used to generate nice (!) user documentation in different formats -# (pdf/html/...), from a single text file with simple markup. -# -# The toolset defines the QUICKBOOK type (file extension 'qbk') and -# a QUICKBOOK to XML (BOOSTBOOK) generator. -# -# -# =========================================================================== -# Q & A -# =========================================================================== -# -# If you don't know what this is all about, some Q & A will hopefully get you -# up to speed with QuickBook and this toolset. -# -# -# What is QuickBook ? -# -# QuickBook is a WikiWiki style documentation tool geared towards C++ -# documentation using simple rules and markup for simple formatting tasks. -# QuickBook extends the WikiWiki concept. Like the WikiWiki, QuickBook -# documents are simple text files. A single QuickBook document can -# generate a fully linked set of nice HTML and PostScript/PDF documents -# complete with images and syntax-colorized source code. -# -# -# Where can I get QuickBook ? -# -# Quickbook can be found in Boost's repository, under the tools/quickbook -# directory it was added there on Jan 2005, some time after the release of -# Boost v1.32.0 and has been an integral part of the Boost distribution -# since v1.33. -# -# Here's a link to the SVN repository: -# https://svn.boost.org/svn/boost/trunk/tools/quickbook -# -# And to QuickBook's QuickBook-generated docs: -# http://www.boost.org/doc/libs/release/tools/quickbook/index.html -# -# -# How do I use QuickBook and this toolset in my projects ? -# -# The minimal example is: -# -# using boostbook ; -# import quickbook ; -# -# boostbook my_docs : my_docs_source.qbk ; -# -# where my_docs is a target name and my_docs_source.qbk is a QuickBook -# file. The documentation format to be generated is determined by the -# boostbook toolset. By default html documentation should be generated, -# but you should check BoostBook's docs to be sure. -# -# -# What do I need ? -# -# You should start by setting up the BoostBook toolset. Please refer to -# boostbook.jam and the BoostBook documentation for information on how to -# do this. -# -# A QuickBook executable is also needed. The toolset will generate this -# executable if it can find the QuickBook sources. The following -# directories will be searched: -# -# BOOST_ROOT/tools/quickbook/ -# BOOST_BUILD_PATH/../../quickbook/ -# -# (BOOST_ROOT and BOOST_BUILD_PATH are environment variables) -# -# If QuickBook sources are not found the toolset will then try to use -# the shell command 'quickbook'. -# -# -# How do I provide a custom QuickBook executable ? -# -# You may put the following in your user-config.jam or site-config.jam: -# -# using quickbook : /path/to/quickbook ; -# -# or, if 'quickbook' can be found in your PATH, -# -# using quickbook : quickbook ; -# -# -# For convenience three alternatives are tried to get a QuickBook executable: -# -# 1. If the user points us to the a QuickBook executable, that is used. -# -# 2. Otherwise, we search for the QuickBook sources and compile QuickBook -# using the default toolset. -# -# 3. As a last resort, we rely on the shell for finding 'quickbook'. -# - -import boostbook ; -import "class" : new ; -import feature ; -import generators ; -import toolset ; -import type ; -import scanner ; -import project ; -import targets ; -import build-system ; -import path ; -import common ; -import errors ; - -# The one and only QUICKBOOK type! -type.register QUICKBOOK : qbk ; - -# shell command to run QuickBook -# targets to build QuickBook from sources. -feature.feature : : free ; -feature.feature : : free dependency ; -feature.feature : : free ; -feature.feature : : free ; -feature.feature : : free ; - - -# quickbook-binary-generator handles generation of the QuickBook executable, by -# marking it as a dependency for QuickBook docs. -# -# If the user supplied the QuickBook command that will be used. -# -# Otherwise we search some sensible places for the QuickBook sources and compile -# from scratch using the default toolset. -# -# As a last resort we rely on the shell to find 'quickbook'. -# -class quickbook-binary-generator : generator -{ - import modules path targets quickbook ; - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - quickbook.freeze-config ; - # QuickBook invocation command and dependencies. - local quickbook-binary = [ modules.peek quickbook : .quickbook-binary ] ; - local quickbook-binary-dependencies ; - - if ! $(quickbook-binary) - { - # If the QuickBook source directory was found, mark its main target - # as a dependency for the current project. Otherwise, try to find - # 'quickbook' in user's PATH - local quickbook-dir = [ modules.peek quickbook : .quickbook-dir ] ; - if $(quickbook-dir) - { - # Get the main-target in QuickBook directory. - local quickbook-main-target = [ targets.resolve-reference $(quickbook-dir) : $(project) ] ; - - # The first element are actual targets, the second are - # properties found in target-id. We do not care about these - # since we have passed the id ourselves. - quickbook-main-target = - [ $(quickbook-main-target[1]).main-target quickbook ] ; - - quickbook-binary-dependencies = - [ $(quickbook-main-target).generate [ $(property-set).propagated ] ] ; - - # Ignore usage-requirements returned as first element. - quickbook-binary-dependencies = $(quickbook-binary-dependencies[2-]) ; - - # Some toolsets generate extra targets (e.g. RSP). We must mark - # all targets as dependencies for the project, but we will only - # use the EXE target for quickbook-to-boostbook translation. - for local target in $(quickbook-binary-dependencies) - { - if [ $(target).type ] = EXE - { - quickbook-binary = - [ path.native - [ path.join - [ $(target).path ] - [ $(target).name ] - ] - ] ; - } - } - } - } - - # Add $(quickbook-binary-dependencies) as a dependency of the current - # project and set it as the feature for the - # quickbook-to-boostbook rule, below. - property-set = [ $(property-set).add-raw - $(quickbook-binary-dependencies) - $(quickbook-binary) - $(quickbook-binary-dependencies) - ] ; - - return [ generator.run $(project) $(name) : $(property-set) : $(sources) : $(multiple) ] ; - } -} - - -# Define a scanner for tracking QBK include dependencies. -# -class qbk-scanner : common-scanner -{ - rule pattern ( ) - { - return "\\[[ ]*include[ ]+([^]]+)\\]" - "\\[[ ]*include:[a-zA-Z0-9_]+[ ]+([^]]+)\\]" - "\\[[ ]*import[ ]+([^]]+)\\]" ; - } -} - - -scanner.register qbk-scanner : include ; - -type.set-scanner QUICKBOOK : qbk-scanner ; - - -# Initialization of toolset. -# -# Parameters: -# command ? -> path to QuickBook executable. -# -# When command is not supplied toolset will search for QuickBook directory and -# compile the executable from source. If that fails we still search the path for -# 'quickbook'. -# -rule init ( - command ? # path to the QuickBook executable. - ) -{ - if $(command) - { - if $(.config-frozen) - { - errors.user-error "quickbook: configuration cannot be changed after it has been used." ; - } - .command = $(command) ; - } -} - -rule freeze-config ( ) -{ - if ! $(.config-frozen) - { - .config-frozen = true ; - - # QuickBook invocation command and dependencies. - - .quickbook-binary = $(.command) ; - - if $(.quickbook-binary) - { - # Use user-supplied command. - .quickbook-binary = [ common.get-invocation-command quickbook : quickbook : $(.quickbook-binary) ] ; - } - else - { - # Search for QuickBook sources in sensible places, like - # $(BOOST_ROOT)/tools/quickbook - # $(BOOST_BUILD_PATH)/../../quickbook - - # And build quickbook executable from sources. - - local boost-root = [ modules.peek : BOOST_ROOT ] ; - local boost-build-path = [ build-system.location ] ; - - if $(boost-root) - { - .quickbook-dir += [ path.join $(boost-root) tools ] ; - } - - if $(boost-build-path) - { - .quickbook-dir += $(boost-build-path)/../.. ; - } - - .quickbook-dir = [ path.glob $(.quickbook-dir) : quickbook ] ; - - # If the QuickBook source directory was found, mark its main target - # as a dependency for the current project. Otherwise, try to find - # 'quickbook' in user's PATH - if $(.quickbook-dir) - { - .quickbook-dir = [ path.make $(.quickbook-dir[1]) ] ; - } - else - { - ECHO "QuickBook warning: The path to the quickbook executable was" ; - ECHO " not provided. Additionally, couldn't find QuickBook" ; - ECHO " sources searching in" ; - ECHO " * BOOST_ROOT/tools/quickbook" ; - ECHO " * BOOST_BUILD_PATH/../../quickbook" ; - ECHO " Will now try to find a precompiled executable by searching" ; - ECHO " the PATH for 'quickbook'." ; - ECHO " To disable this warning in the future, or to completely" ; - ECHO " avoid compilation of quickbook, you can explicitly set the" ; - ECHO " path to a quickbook executable command in user-config.jam" ; - ECHO " or site-config.jam with the call" ; - ECHO " using quickbook : /path/to/quickbook ;" ; - - # As a last resort, search for 'quickbook' command in path. Note - # that even if the 'quickbook' command is not found, - # get-invocation-command will still return 'quickbook' and might - # generate an error while generating the virtual-target. - - .quickbook-binary = [ common.get-invocation-command quickbook : quickbook ] ; - } - } - } -} - - -generators.register [ new quickbook-binary-generator quickbook.quickbook-to-boostbook : QUICKBOOK : XML ] ; - - -# shell command to run QuickBook -# targets to build QuickBook from sources. -toolset.flags quickbook.quickbook-to-boostbook QB-COMMAND ; -toolset.flags quickbook.quickbook-to-boostbook QB-DEPENDENCIES ; -toolset.flags quickbook.quickbook-to-boostbook INCLUDES ; -toolset.flags quickbook.quickbook-to-boostbook QB-DEFINES ; -toolset.flags quickbook.quickbook-to-boostbook QB-INDENT ; -toolset.flags quickbook.quickbook-to-boostbook QB-LINE-WIDTH ; - - -rule quickbook-to-boostbook ( target : source : properties * ) -{ - # Signal dependency of quickbook sources on - # upon invocation of quickbook-to-boostbook. - DEPENDS $(target) : [ on $(target) return $(QB-DEPENDENCIES) ] ; -} - - -actions quickbook-to-boostbook -{ - "$(QB-COMMAND)" -I"$(INCLUDES)" -D"$(QB-DEFINES)" --indent="$(QB-INDENT)" --linewidth="$(QB-LINE-WIDTH)" --output-file="$(1)" "$(2)" -} - - -# Declare a main target to convert a quickbook source into a boostbook XML file. -# -rule to-boostbook ( target-name : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(target-name) : $(project) : XML - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} diff --git a/jam-files/boost-build/tools/rc.jam b/jam-files/boost-build/tools/rc.jam deleted file mode 100644 index 9964d339..00000000 --- a/jam-files/boost-build/tools/rc.jam +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (C) Andre Hentz 2003. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. -# -# Copyright (c) 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import type ; -import generators ; -import feature ; -import errors ; -import scanner ; -import toolset : flags ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -type.register RC : rc ; - -rule init ( ) -{ -} - -# Configures a new resource compilation command specific to a condition, -# usually a toolset selection condition. The possible options are: -# -# * (rc|windres) - Indicates the type of options the command -# accepts. -# -# Even though the arguments are all optional, only when a command, condition, -# and at minimum the rc-type option are given will the command be configured. -# This is so that callers don't have to check auto-configuration values -# before calling this. And still get the functionality of build failures when -# the resource compiler can't be found. -# -rule configure ( command ? : condition ? : options * ) -{ - local rc-type = [ feature.get-values : $(options) ] ; - - if $(command) && $(condition) && $(rc-type) - { - flags rc.compile.resource .RC $(condition) : $(command) ; - flags rc.compile.resource .RC_TYPE $(condition) : $(rc-type:L) ; - flags rc.compile.resource DEFINES ; - flags rc.compile.resource INCLUDES ; - if $(.debug-configuration) - { - ECHO notice: using rc compiler :: $(condition) :: $(command) ; - } - } -} - -rule compile.resource ( target : sources * : properties * ) -{ - local rc-type = [ on $(target) return $(.RC_TYPE) ] ; - rc-type ?= null ; - compile.resource.$(rc-type) $(target) : $(sources[1]) ; -} - -actions compile.resource.rc -{ - "$(.RC)" -l 0x409 "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -fo "$(<)" "$(>)" -} - -actions compile.resource.windres -{ - "$(.RC)" "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -o "$(<)" -i "$(>)" -} - -actions quietly compile.resource.null -{ - as /dev/null -o "$(<)" -} - -# Since it's a common practice to write -# exe hello : hello.cpp hello.rc -# we change the name of object created from RC file, to -# avoid conflict with hello.cpp. -# The reason we generate OBJ and not RES, is that gcc does not -# seem to like RES files, but works OK with OBJ. -# See http://article.gmane.org/gmane.comp.lib.boost.build/5643/ -# -# Using 'register-c-compiler' adds the build directory to INCLUDES -generators.register-c-compiler rc.compile.resource : RC : OBJ(%_res) ; - -# Register scanner for resources -class res-scanner : scanner -{ - import regex virtual-target path scanner ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - self.includes = $(includes) ; - } - - rule pattern ( ) - { - return "(([^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+([^ \"]+|\"[^\"]+\"))|(#include[ ]*(<[^<]+>|\"[^\"]+\")))" ; - } - - rule process ( target : matches * : binding ) - { - local angle = [ regex.transform $(matches) : "#include[ ]*<([^<]+)>" ] ; - local quoted = [ regex.transform $(matches) : "#include[ ]*\"([^\"]+)\"" ] ; - local res = [ regex.transform $(matches) : "[^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+(([^ \"]+)|\"([^\"]+)\")" : 3 4 ] ; - - # Icons and other includes may referenced as - # - # IDR_MAINFRAME ICON "res\\icon.ico" - # - # so we have to replace double backslashes to single ones. - res = [ regex.replace-list $(res) : "\\\\\\\\" : "/" ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - # We don't need this extra information for angle includes, - # since they should not depend on including file (we can't - # get literal "." in include path). - local g2 = $(g)"#"$(b) ; - - angle = $(angle:G=$(g)) ; - quoted = $(quoted:G=$(g2)) ; - res = $(res:G=$(g2)) ; - - local all = $(angle) $(quoted) ; - - INCLUDES $(target) : $(all) ; - DEPENDS $(target) : $(res) ; - NOCARE $(all) $(res) ; - SEARCH on $(angle) = $(self.includes:G=) ; - SEARCH on $(quoted) = $(b) $(self.includes:G=) ; - SEARCH on $(res) = $(b) $(self.includes:G=) ; - - # Just propagate current scanner to includes, in a hope - # that includes do not change scanners. - scanner.propagate $(__name__) : $(angle) $(quoted) : $(target) ; - } -} - -scanner.register res-scanner : include ; -type.set-scanner RC : res-scanner ; diff --git a/jam-files/boost-build/tools/rc.py b/jam-files/boost-build/tools/rc.py deleted file mode 100644 index 0b82d231..00000000 --- a/jam-files/boost-build/tools/rc.py +++ /dev/null @@ -1,189 +0,0 @@ -# Status: being ported by Steven Watanabe -# Base revision: 47077 -# -# Copyright (C) Andre Hentz 2003. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. -# -# Copyright (c) 2006 Rene Rivera. -# -# Copyright (c) 2008 Steven Watanabe -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##import type ; -##import generators ; -##import feature ; -##import errors ; -##import scanner ; -##import toolset : flags ; - -from b2.build import type, toolset, generators, scanner, feature -from b2.tools import builtin -from b2.util import regex -from b2.build.toolset import flags -from b2.manager import get_manager - -__debug = None - -def debug(): - global __debug - if __debug is None: - __debug = "--debug-configuration" in bjam.variable("ARGV") - return __debug - -type.register('RC', ['rc']) - -def init(): - pass - -def configure (command = None, condition = None, options = None): - """ - Configures a new resource compilation command specific to a condition, - usually a toolset selection condition. The possible options are: - - * (rc|windres) - Indicates the type of options the command - accepts. - - Even though the arguments are all optional, only when a command, condition, - and at minimum the rc-type option are given will the command be configured. - This is so that callers don't have to check auto-configuration values - before calling this. And still get the functionality of build failures when - the resource compiler can't be found. - """ - rc_type = feature.get_values('', options) - if rc_type: - assert(len(rc_type) == 1) - rc_type = rc_type[0] - - if command and condition and rc_type: - flags('rc.compile.resource', '.RC', condition, command) - flags('rc.compile.resource', '.RC_TYPE', condition, rc_type.lower()) - flags('rc.compile.resource', 'DEFINES', [], ['']) - flags('rc.compile.resource', 'INCLUDES', [], ['']) - if debug(): - print 'notice: using rc compiler ::', condition, '::', command - -engine = get_manager().engine() - -class RCAction: - """Class representing bjam action defined from Python. - The function must register the action to execute.""" - - def __init__(self, action_name, function): - self.action_name = action_name - self.function = function - - def __call__(self, targets, sources, property_set): - if self.function: - self.function(targets, sources, property_set) - -# FIXME: What is the proper way to dispatch actions? -def rc_register_action(action_name, function = None): - global engine - if engine.actions.has_key(action_name): - raise "Bjam action %s is already defined" % action_name - engine.actions[action_name] = RCAction(action_name, function) - -def rc_compile_resource(targets, sources, properties): - rc_type = bjam.call('get-target-variable', targets, '.RC_TYPE') - global engine - engine.set_update_action('rc.compile.resource.' + rc_type, targets, sources, properties) - -rc_register_action('rc.compile.resource', rc_compile_resource) - - -engine.register_action( - 'rc.compile.resource.rc', - '"$(.RC)" -l 0x409 "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -fo "$(<)" "$(>)"') - -engine.register_action( - 'rc.compile.resource.windres', - '"$(.RC)" "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -o "$(<)" -i "$(>)"') - -# FIXME: this was originally declared quietly -engine.register_action( - 'compile.resource.null', - 'as /dev/null -o "$(<)"') - -# Since it's a common practice to write -# exe hello : hello.cpp hello.rc -# we change the name of object created from RC file, to -# avoid conflict with hello.cpp. -# The reason we generate OBJ and not RES, is that gcc does not -# seem to like RES files, but works OK with OBJ. -# See http://article.gmane.org/gmane.comp.lib.boost.build/5643/ -# -# Using 'register-c-compiler' adds the build directory to INCLUDES -# FIXME: switch to generators -builtin.register_c_compiler('rc.compile.resource', ['RC'], ['OBJ(%_res)'], []) - -__angle_include_re = "#include[ ]*<([^<]+)>" - -# Register scanner for resources -class ResScanner(scanner.Scanner): - - def __init__(self, includes): - scanner.__init__ ; - self.includes = includes - - def pattern(self): - return "(([^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)" +\ - "[ ]+([^ \"]+|\"[^\"]+\"))|(#include[ ]*(<[^<]+>|\"[^\"]+\")))" ; - - def process(self, target, matches, binding): - - angle = regex.transform(matches, "#include[ ]*<([^<]+)>") - quoted = regex.transform(matches, "#include[ ]*\"([^\"]+)\"") - res = regex.transform(matches, - "[^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)" +\ - "[ ]+(([^ \"]+)|\"([^\"]+)\")", [3, 4]) - - # Icons and other includes may referenced as - # - # IDR_MAINFRAME ICON "res\\icon.ico" - # - # so we have to replace double backslashes to single ones. - res = [ re.sub(r'\\\\', '/', match) for match in res ] - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - g = bjam.call('get-target-variable', target, 'HDRGRIST') - b = os.path.normalize_path(os.path.dirname(binding)) - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - # We don't need this extra information for angle includes, - # since they should not depend on including file (we can't - # get literal "." in include path). - g2 = g + "#" + b - - g = "<" + g + ">" - g2 = "<" + g2 + ">" - angle = [g + x for x in angle] - quoted = [g2 + x for x in quoted] - res = [g2 + x for x in res] - - all = angle + quoted - - bjam.call('mark-included', target, all) - - engine = get_manager().engine() - - engine.add_dependency(target, res) - bjam.call('NOCARE', all + res) - engine.set_target_variable(angle, 'SEARCH', ungrist(self.includes)) - engine.set_target_variable(quoted, 'SEARCH', b + ungrist(self.includes)) - engine.set_target_variable(res, 'SEARCH', b + ungrist(self.includes)) ; - - # Just propagate current scanner to includes, in a hope - # that includes do not change scanners. - get_manager().scanners().propagate(self, angle + quoted) - -scanner.register(ResScanner, 'include') -type.set_scanner('RC', ResScanner) diff --git a/jam-files/boost-build/tools/stage.jam b/jam-files/boost-build/tools/stage.jam deleted file mode 100644 index 296e7558..00000000 --- a/jam-files/boost-build/tools/stage.jam +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2005, 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'install' rule, used to copy a set of targets to a -# single location. - -import targets ; -import "class" : new ; -import errors ; -import type ; -import generators ; -import feature ; -import project ; -import virtual-target ; -import path ; -import types/register ; - - -feature.feature : off on : incidental ; -feature.feature : : free incidental ; -feature.feature : : free path ; -feature.feature : : free incidental ; - -# If 'on', version symlinks for shared libraries will not be created. Affects -# Unix builds only. -feature.feature : on : optional incidental ; - - -class install-target-class : basic-target -{ - import feature ; - import project ; - import type ; - import errors ; - import generators ; - import path ; - import stage ; - import "class" : new ; - import property ; - import property-set ; - - rule __init__ ( name-and-dir : project : sources * : requirements * : default-build * ) - { - basic-target.__init__ $(name-and-dir) : $(project) : $(sources) : - $(requirements) : $(default-build) ; - } - - # If is not set, sets it based on the project data. - # - rule update-location ( property-set ) - { - local loc = [ $(property-set).get ] ; - if ! $(loc) - { - loc = [ path.root $(self.name) [ $(self.project).get location ] ] ; - property-set = [ $(property-set).add-raw $(loc:G=) ] ; - } - - return $(property-set) ; - } - - # Takes a target that is installed and a property set which is used when - # installing. - # - rule adjust-properties ( target : build-property-set ) - { - local ps-raw ; - local a = [ $(target).action ] ; - if $(a) - { - local ps = [ $(a).properties ] ; - ps-raw = [ $(ps).raw ] ; - - # Unless true is in properties, which can happen - # only if the user has explicitly requested it, nuke all - # properties. - if [ $(build-property-set).get ] != true - { - ps-raw = [ property.change $(ps-raw) : ] ; - } - - # If any properties were specified for installing, add - # them. - local l = [ $(build-property-set).get ] ; - ps-raw += $(l:G=) ; - - # Also copy feature from current build set, to be used - # for relinking. - local l = [ $(build-property-set).get ] ; - ps-raw += $(l:G=) ; - - # Remove the feature on original targets. - ps-raw = [ property.change $(ps-raw) : ] ; - - # And . If stage target has another stage target in - # sources, then we shall get virtual targets with the - # property set. - ps-raw = [ property.change $(ps-raw) : ] ; - } - - local d = [ $(build-property-set).get ] ; - ps-raw += $(d:G=) ; - - local d = [ $(build-property-set).get ] ; - ps-raw += $(d:G=) ; - - local ns = [ $(build-property-set).get ] ; - ps-raw += $(ns:G=) ; - - local d = [ $(build-property-set).get ] ; - # Make the path absolute: we shall use it to compute relative paths and - # making the path absolute will help. - if $(d) - { - d = [ path.root $(d) [ path.pwd ] ] ; - ps-raw += $(d:G=) ; - } - - if $(ps-raw) - { - return [ property-set.create $(ps-raw) ] ; - } - else - { - return [ property-set.empty ] ; - } - } - - rule construct ( name : source-targets * : property-set ) - { - source-targets = [ targets-to-stage $(source-targets) : - $(property-set) ] ; - - property-set = [ update-location $(property-set) ] ; - - local ename = [ $(property-set).get ] ; - - if $(ename) && $(source-targets[2]) - { - errors.error "When property is used in 'install', only one" - "source is allowed" ; - } - - local result ; - for local i in $(source-targets) - { - local staged-targets ; - - local new-properties = [ adjust-properties $(i) : - $(property-set) ] ; - - # See if something special should be done when staging this type. It - # is indicated by the presence of a special "INSTALLED_" type. - local t = [ $(i).type ] ; - if $(t) && [ type.registered INSTALLED_$(t) ] - { - if $(ename) - { - errors.error "In 'install': property specified with target that requires relinking." ; - } - else - { - local targets = [ generators.construct $(self.project) - $(name) : INSTALLED_$(t) : $(new-properties) : $(i) ] ; - staged-targets += $(targets[2-]) ; - } - } - else - { - staged-targets = [ stage.copy-file $(self.project) $(ename) : - $(i) : $(new-properties) ] ; - } - - if ! $(staged-targets) - { - errors.error "Unable to generate staged version of " [ $(source).str ] ; - } - - for t in $(staged-targets) - { - result += [ virtual-target.register $(t) ] ; - } - } - - return [ property-set.empty ] $(result) ; - } - - # Given the list of source targets explicitly passed to 'stage', returns the - # list of targets which must be staged. - # - rule targets-to-stage ( source-targets * : property-set ) - { - local result ; - - # Traverse the dependencies, if needed. - if [ $(property-set).get ] = "on" - { - source-targets = [ collect-targets $(source-targets) ] ; - } - - # Filter the target types, if needed. - local included-types = [ $(property-set).get ] ; - for local r in $(source-targets) - { - local ty = [ $(r).type ] ; - if $(ty) - { - # Do not stage searched libs. - if $(ty) != SEARCHED_LIB - { - if $(included-types) - { - if [ include-type $(ty) : $(included-types) ] - { - result += $(r) ; - } - } - else - { - result += $(r) ; - } - } - } - else if ! $(included-types) - { - # Don't install typeless target if there is an explicit list of - # allowed types. - result += $(r) ; - } - } - - return $(result) ; - } - - # CONSIDER: figure out why we can not use virtual-target.traverse here. - # - rule collect-targets ( targets * ) - { - # Find subvariants - local s ; - for local t in $(targets) - { - s += [ $(t).creating-subvariant ] ; - } - s = [ sequence.unique $(s) ] ; - - local result = [ new set ] ; - $(result).add $(targets) ; - - for local i in $(s) - { - $(i).all-referenced-targets $(result) ; - } - local result2 ; - for local r in [ $(result).list ] - { - if $(r:G) != - { - result2 += $(r:G=) ; - } - } - DELETE_MODULE $(result) ; - result = [ sequence.unique $(result2) ] ; - } - - # Returns true iff 'type' is subtype of some element of 'types-to-include'. - # - local rule include-type ( type : types-to-include * ) - { - local found ; - while $(types-to-include) && ! $(found) - { - if [ type.is-subtype $(type) $(types-to-include[1]) ] - { - found = true ; - } - types-to-include = $(types-to-include[2-]) ; - } - - return $(found) ; - } -} - - -# Creates a copy of target 'source'. The 'properties' object should have a -# property which specifies where the target must be placed. -# -rule copy-file ( project name ? : source : properties ) -{ - name ?= [ $(source).name ] ; - local relative ; - - local new-a = [ new non-scanning-action $(source) : common.copy : - $(properties) ] ; - local source-root = [ $(properties).get ] ; - if $(source-root) - { - # Get the real path of the target. We probably need to strip relative - # path from the target name at construction. - local path = [ $(source).path ] ; - path = [ path.root $(name:D) $(path) ] ; - # Make the path absolute. Otherwise, it would be hard to compute the - # relative path. The 'source-root' is already absolute, see the - # 'adjust-properties' method above. - path = [ path.root $(path) [ path.pwd ] ] ; - - relative = [ path.relative-to $(source-root) $(path) ] ; - } - - # Note: Using $(name:D=$(relative)) might be faster here, but then we would - # need to explicitly check that relative is not ".", otherwise we might get - # paths like '/boost/.', try to create it and mkdir would obviously - # fail. - name = [ path.join $(relative) $(name:D=) ] ; - - return [ new file-target $(name) exact : [ $(source).type ] : $(project) : - $(new-a) ] ; -} - - -rule symlink ( name : project : source : properties ) -{ - local a = [ new action $(source) : symlink.ln : $(properties) ] ; - return [ new file-target $(name) exact : [ $(source).type ] : $(project) : - $(a) ] ; -} - - -rule relink-file ( project : source : property-set ) -{ - local action = [ $(source).action ] ; - local cloned-action = [ virtual-target.clone-action $(action) : $(project) : - "" : $(property-set) ] ; - return [ $(cloned-action).targets ] ; -} - - -# Declare installed version of the EXE type. Generator for this type will cause -# relinking to the new location. -type.register INSTALLED_EXE : : EXE ; - - -class installed-exe-generator : generator -{ - import type ; - import property-set ; - import modules ; - import stage ; - - rule __init__ ( ) - { - generator.__init__ install-exe : EXE : INSTALLED_EXE ; - } - - rule run ( project name ? : property-set : source : multiple ? ) - { - local need-relink ; - - if [ $(property-set).get ] in NT CYGWIN || - [ $(property-set).get ] in windows cygwin - { - } - else - { - # See if the dll-path properties are not changed during - # install. If so, copy, don't relink. - local a = [ $(source).action ] ; - local p = [ $(a).properties ] ; - local original = [ $(p).get ] ; - local current = [ $(property-set).get ] ; - - if $(current) != $(original) - { - need-relink = true ; - } - } - - - if $(need-relink) - { - return [ stage.relink-file $(project) - : $(source) : $(property-set) ] ; - } - else - { - return [ stage.copy-file $(project) - : $(source) : $(property-set) ] ; - } - } -} - - -generators.register [ new installed-exe-generator ] ; - - -# Installing a shared link on Unix might cause a creation of versioned symbolic -# links. -type.register INSTALLED_SHARED_LIB : : SHARED_LIB ; - - -class installed-shared-lib-generator : generator -{ - import type ; - import property-set ; - import modules ; - import stage ; - - rule __init__ ( ) - { - generator.__init__ install-shared-lib : SHARED_LIB - : INSTALLED_SHARED_LIB ; - } - - rule run ( project name ? : property-set : source : multiple ? ) - { - if [ $(property-set).get ] in NT CYGWIN || - [ $(property-set).get ] in windows cygwin - { - local copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - return [ virtual-target.register $(copied) ] ; - } - else - { - local a = [ $(source).action ] ; - local copied ; - if ! $(a) - { - # Non-derived file, just copy. - copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - } - else - { - local cp = [ $(a).properties ] ; - local current-dll-path = [ $(cp).get ] ; - local new-dll-path = [ $(property-set).get ] ; - - if $(current-dll-path) != $(new-dll-path) - { - # Rpath changed, need to relink. - copied = [ stage.relink-file $(project) : $(source) : - $(property-set) ] ; - } - else - { - copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - } - } - - copied = [ virtual-target.register $(copied) ] ; - - local result = $(copied) ; - # If the name is in the form NNN.XXX.YYY.ZZZ, where all 'X', 'Y' and - # 'Z' are numbers, we need to create NNN.XXX and NNN.XXX.YYY - # symbolic links. - local m = [ MATCH (.*)\\.([0123456789]+)\\.([0123456789]+)\\.([0123456789]+)$ - : [ $(copied).name ] ] ; - if $(m) - { - # Symlink without version at all is used to make - # -lsome_library work. - result += [ stage.symlink $(m[1]) : $(project) : $(copied) : - $(property-set) ] ; - - # Symlinks of some libfoo.N and libfoo.N.M are used so that - # library can found at runtime, if libfoo.N.M.X has soname of - # libfoo.N. That happens when the library makes some binary - # compatibility guarantees. If not, it is possible to skip those - # symlinks. - local suppress = - [ $(property-set).get ] ; - - if $(suppress) != "on" - { - result += [ stage.symlink $(m[1]).$(m[2]) : $(project) - : $(copied) : $(property-set) ] ; - result += [ stage.symlink $(m[1]).$(m[2]).$(m[3]) : $(project) - : $(copied) : $(property-set) ] ; - } - } - - return $(result) ; - } - } -} - -generators.register [ new installed-shared-lib-generator ] ; - - -# Main target rule for 'install'. -# -rule install ( name : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - # Unless the user has explicitly asked us to hardcode dll paths, add - # false in requirements, to override default value. - if ! true in $(requirements) - { - requirements += false ; - } - - if in $(requirements:G) - { - errors.user-error - "The property is not allowed for the 'install' rule" ; - } - - targets.main-target-alternative - [ new install-target-class $(name) : $(project) - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - - -IMPORT $(__name__) : install : : install ; -IMPORT $(__name__) : install : : stage ; diff --git a/jam-files/boost-build/tools/stage.py b/jam-files/boost-build/tools/stage.py deleted file mode 100644 index 25eccbe5..00000000 --- a/jam-files/boost-build/tools/stage.py +++ /dev/null @@ -1,350 +0,0 @@ -# Status: ported. -# Base revision 64444. -# -# Copyright 2003 Dave Abrahams -# Copyright 2005, 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'install' rule, used to copy a set of targets to a -# single location. - -import b2.build.feature as feature -import b2.build.targets as targets -import b2.build.property as property -import b2.build.property_set as property_set -import b2.build.generators as generators -import b2.build.virtual_target as virtual_target - -from b2.manager import get_manager -from b2.util.sequence import unique -from b2.util import bjam_signature - -import b2.build.type - -import os.path -import re -import types - -feature.feature('install-dependencies', ['off', 'on'], ['incidental']) -feature.feature('install-type', [], ['free', 'incidental']) -feature.feature('install-source-root', [], ['free', 'path']) -feature.feature('so-version', [], ['free', 'incidental']) - -# If 'on', version symlinks for shared libraries will not be created. Affects -# Unix builds only. -feature.feature('install-no-version-symlinks', ['on'], ['optional', 'incidental']) - -class InstallTargetClass(targets.BasicTarget): - - def update_location(self, ps): - """If is not set, sets it based on the project data.""" - - loc = ps.get('location') - if not loc: - loc = os.path.join(self.project().get('location'), self.name()) - ps = ps.add_raw(["" + loc]) - - return ps - - def adjust_properties(self, target, build_ps): - a = target.action() - properties = [] - if a: - ps = a.properties() - properties = ps.all() - - # Unless true is in properties, which can happen - # only if the user has explicitly requested it, nuke all - # properties. - - if build_ps.get('hardcode-dll-paths') != ['true']: - properties = [p for p in properties if p.feature().name() != 'dll-path'] - - # If any properties were specified for installing, add - # them. - properties.extend(build_ps.get_properties('dll-path')) - - # Also copy feature from current build set, to be used - # for relinking. - properties.extend(build_ps.get_properties('linkflags')) - - # Remove the feature on original targets. - # And . If stage target has another stage target in - # sources, then we shall get virtual targets with the - # property set. - properties = [p for p in properties - if not p.feature().name() in ['tag', 'location']] - - properties.extend(build_ps.get_properties('dependency')) - - properties.extend(build_ps.get_properties('location')) - - - properties.extend(build_ps.get_properties('install-no-version-symlinks')) - - d = build_ps.get_properties('install-source-root') - - # Make the path absolute: we shall use it to compute relative paths and - # making the path absolute will help. - if d: - p = d[0] - properties.append(property.Property(p.feature(), os.path.abspath(p.value()))) - - return property_set.create(properties) - - - def construct(self, name, source_targets, ps): - - source_targets = self.targets_to_stage(source_targets, ps) - ps = self.update_location(ps) - - ename = ps.get('name') - if ename: - ename = ename[0] - if ename and len(source_targets) > 1: - get_manager().errors()("When property is used in 'install', only one source is allowed") - - result = [] - - for i in source_targets: - - staged_targets = [] - new_ps = self.adjust_properties(i, ps) - - # See if something special should be done when staging this type. It - # is indicated by the presence of a special "INSTALLED_" type. - t = i.type() - if t and b2.build.type.registered("INSTALLED_" + t): - - if ename: - get_manager().errors()("In 'install': property specified with target that requires relinking.") - else: - (r, targets) = generators.construct(self.project(), name, "INSTALLED_" + t, - new_ps, [i]) - assert isinstance(r, property_set.PropertySet) - staged_targets.extend(targets) - - else: - staged_targets.append(copy_file(self.project(), ename, i, new_ps)) - - if not staged_targets: - get_manager().errors()("Unable to generate staged version of " + i) - - result.extend(get_manager().virtual_targets().register(t) for t in staged_targets) - - return (property_set.empty(), result) - - def targets_to_stage(self, source_targets, ps): - """Given the list of source targets explicitly passed to 'stage', returns the - list of targets which must be staged.""" - - result = [] - - # Traverse the dependencies, if needed. - if ps.get('install-dependencies') == ['on']: - source_targets = self.collect_targets(source_targets) - - # Filter the target types, if needed. - included_types = ps.get('install-type') - for r in source_targets: - ty = r.type() - if ty: - # Do not stage searched libs. - if ty != "SEARCHED_LIB": - if included_types: - if self.include_type(ty, included_types): - result.append(r) - else: - result.append(r) - elif not included_types: - # Don't install typeless target if there is an explicit list of - # allowed types. - result.append(r) - - return result - - # CONSIDER: figure out why we can not use virtual-target.traverse here. - # - def collect_targets(self, targets): - - s = [t.creating_subvariant() for t in targets] - s = unique(s) - - result = set(targets) - for i in s: - i.all_referenced_targets(result) - - result2 = [] - for r in result: - if isinstance(r, property.Property): - - if r.feature().name() != 'use': - result2.append(r.value()) - else: - result2.append(r) - result2 = unique(result2) - return result2 - - # Returns true iff 'type' is subtype of some element of 'types-to-include'. - # - def include_type(self, type, types_to_include): - return any(b2.build.type.is_subtype(type, ti) for ti in types_to_include) - -# Creates a copy of target 'source'. The 'properties' object should have a -# property which specifies where the target must be placed. -# -def copy_file(project, name, source, ps): - - if not name: - name = source.name() - - relative = "" - - new_a = virtual_target.NonScanningAction([source], "common.copy", ps) - source_root = ps.get('install-source-root') - if source_root: - source_root = source_root[0] - # Get the real path of the target. We probably need to strip relative - # path from the target name at construction. - path = os.path.join(source.path(), os.path.dirname(name)) - # Make the path absolute. Otherwise, it would be hard to compute the - # relative path. The 'source-root' is already absolute, see the - # 'adjust-properties' method above. - path = os.path.abspath(path) - - relative = os.path.relpath(path, source_root) - - name = os.path.join(relative, os.path.basename(name)) - return virtual_target.FileTarget(name, source.type(), project, new_a, exact=True) - -def symlink(name, project, source, ps): - a = virtual_target.Action([source], "symlink.ln", ps) - return virtual_target.FileTarget(name, source.type(), project, a, exact=True) - -def relink_file(project, source, ps): - action = source.action() - cloned_action = virtual_target.clone_action(action, project, "", ps) - targets = cloned_action.targets() - # We relink only on Unix, where exe or shared lib is always a single file. - assert len(targets) == 1 - return targets[0] - - -# Declare installed version of the EXE type. Generator for this type will cause -# relinking to the new location. -b2.build.type.register('INSTALLED_EXE', [], 'EXE') - -class InstalledExeGenerator(generators.Generator): - - def __init__(self): - generators.Generator.__init__(self, "install-exe", False, ['EXE'], ['INSTALLED_EXE']) - - def run(self, project, name, ps, source): - - need_relink = False; - - if ps.get('os') in ['NT', 'CYGWIN'] or ps.get('target-os') in ['windows', 'cygwin']: - # Never relink - pass - else: - # See if the dll-path properties are not changed during - # install. If so, copy, don't relink. - need_relink = ps.get('dll-path') != source[0].action().properties().get('dll-path') - - if need_relink: - return [relink_file(project, source, ps)] - else: - return [copy_file(project, None, source[0], ps)] - -generators.register(InstalledExeGenerator()) - - -# Installing a shared link on Unix might cause a creation of versioned symbolic -# links. -b2.build.type.register('INSTALLED_SHARED_LIB', [], 'SHARED_LIB') - -class InstalledSharedLibGenerator(generators.Generator): - - def __init__(self): - generators.Generator.__init__(self, 'install-shared-lib', False, ['SHARED_LIB'], ['INSTALLED_SHARED_LIB']) - - def run(self, project, name, ps, source): - - source = source[0] - if ps.get('os') in ['NT', 'CYGWIN'] or ps.get('target-os') in ['windows', 'cygwin']: - copied = copy_file(project, None, source, ps) - return [get_manager().virtual_targets().register(copied)] - else: - a = source.action() - if not a: - # Non-derived file, just copy. - copied = copy_file(project, source, ps) - else: - - need_relink = ps.get('dll-path') != source.action().properties().get('dll-path') - - if need_relink: - # Rpath changed, need to relink. - copied = relink_file(project, source, ps) - else: - copied = copy_file(project, None, source, ps) - - result = [get_manager().virtual_targets().register(copied)] - # If the name is in the form NNN.XXX.YYY.ZZZ, where all 'X', 'Y' and - # 'Z' are numbers, we need to create NNN.XXX and NNN.XXX.YYY - # symbolic links. - m = re.match("(.*)\\.([0123456789]+)\\.([0123456789]+)\\.([0123456789]+)$", - copied.name()); - if m: - # Symlink without version at all is used to make - # -lsome_library work. - result.append(symlink(m.group(1), project, copied, ps)) - - # Symlinks of some libfoo.N and libfoo.N.M are used so that - # library can found at runtime, if libfoo.N.M.X has soname of - # libfoo.N. That happens when the library makes some binary - # compatibility guarantees. If not, it is possible to skip those - # symlinks. - if ps.get('install-no-version-symlinks') != ['on']: - - result.append(symlink(m.group(1) + '.' + m.group(2), project, copied, ps)) - result.append(symlink(m.group(1) + '.' + m.group(2) + '.' + m.group(3), - project, copied, ps)) - - return result - -generators.register(InstalledSharedLibGenerator()) - - -# Main target rule for 'install'. -# -@bjam_signature((["name"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"], ["usage_requirements", "*"])) -def install(name, sources, requirements=[], default_build=[], usage_requirements=[]): - - requirements = requirements[:] - # Unless the user has explicitly asked us to hardcode dll paths, add - # false in requirements, to override default value. - if not 'true' in requirements: - requirements.append('false') - - if any(r.startswith('') for r in requirements): - get_manager().errors()("The property is not allowed for the 'install' rule") - - from b2.manager import get_manager - t = get_manager().targets() - - project = get_manager().projects().current() - - return t.main_target_alternative( - InstallTargetClass(name, project, - t.main_target_sources(sources, name), - t.main_target_requirements(requirements, project), - t.main_target_default_build(default_build, project), - t.main_target_usage_requirements(usage_requirements, project))) - -get_manager().projects().add_rule("install", install) -get_manager().projects().add_rule("stage", install) - diff --git a/jam-files/boost-build/tools/stlport.jam b/jam-files/boost-build/tools/stlport.jam deleted file mode 100644 index 62eebda5..00000000 --- a/jam-files/boost-build/tools/stlport.jam +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright Gennadiy Rozental -# Copyright 2006 Rene Rivera -# Copyright 2003, 2004, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# The STLPort is usable by means of 'stdlib' feature. When -# stdlib=stlport is specified, default version of STLPort will be used, -# while stdlib=stlport-4.5 will use specific version. -# The subfeature value 'hostios' means to use host compiler's iostreams. -# -# The specific version of stlport is selected by features: -# The feature selects between static and shared library -# The on selects STLPort with debug symbols -# and stl debugging. -# There's no way to use STLPort with debug symbols but without -# stl debugging. - -# TODO: must implement selection of different STLPort installations based -# on used toolset. -# Also, finish various flags: -# -# This is copied from V1 toolset, "+" means "implemented" -#+flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_OWN_IOSTREAMS=1 _STLP_HAS_NO_NEW_IOSTREAMS=1 ; -#+flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_EXTENSIONS=1 ; -# flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_ANACHRONISMS=1 ; -# flags $(CURR_TOOLSET) DEFINES global : _STLP_VENDOR_GLOBAL_CSTD=1 ; -# flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_EXCEPTIONS=1 ; -# flags $(CURR_TOOLSET) DEFINES on : _STLP_DEBUG_ALLOC=1 ; -#+flags $(CURR_TOOLSET) DEFINES debug : _STLP_DEBUG=1 _STLP_DEBUG_UNINITIALIZED=1 ; -#+flags $(CURR_TOOLSET) DEFINES dynamic : _STLP_USE_DYNAMIC_LIB=1 ; - - -import feature : feature subfeature ; -import project ; -import "class" : new ; -import targets ; -import property-set ; -import common ; -import type ; - -# Make this module into a project. -project.initialize $(__name__) ; -project stlport ; - -# The problem: how to request to use host compiler's iostreams? -# -# Solution 1: Global 'stlport-iostream' feature. -# That's ugly. Subfeature make more sense for stlport-specific thing. -# Solution 2: Use subfeature with two values, one of which ("use STLPort iostream") -# is default. -# The problem is that such subfeature will appear in target paths, and that's ugly -# Solution 3: Use optional subfeature with only one value. - -feature.extend stdlib : stlport ; -feature.compose stlport : /stlport//stlport ; - -# STLport iostreams or native iostreams -subfeature stdlib stlport : iostream : hostios : optional propagated ; - -# STLport extensions -subfeature stdlib stlport : extensions : noext : optional propagated ; - -# STLport anachronisms -- NOT YET SUPPORTED -# subfeature stdlib stlport : anachronisms : on off ; - -# STLport debug allocation -- NOT YET SUPPORTED -#subfeature stdlib stlport : debug-alloc : off on ; - -# Declare a special target class to handle the creation of search-lib-target -# instances for STLport. We need a special class, because otherwise we'll have -# - declare prebuilt targets for all possible toolsets. And by the time 'init' -# is called we don't even know the list of toolsets that are registered -# - when host iostreams are used, we really should produce nothing. It would -# be hard/impossible to achieve this using prebuilt targets. - -class stlport-target-class : basic-target -{ - import feature project type errors generators ; - import set : difference ; - - rule __init__ ( project : headers ? : libraries * : version ? ) - { - basic-target.__init__ stlport : $(project) ; - self.headers = $(headers) ; - self.libraries = $(libraries) ; - self.version = $(version) ; - self.version.5 = [ MATCH "^(5[.][0123456789]+).*" : $(version) ] ; - - local requirements ; - requirements += $(self.version) ; - self.requirements = [ property-set.create $(requirements) ] ; - } - - rule generate ( property-set ) - { - # Since this target is built with stlport, it will also - # have /stlport//stlport in requirements, which will - # cause a loop in main target references. Remove that property - # manually. - - property-set = [ property-set.create - [ difference - [ $(property-set).raw ] : - /stlport//stlport - stlport - ] - ] ; - return [ basic-target.generate $(property-set) ] ; - } - - rule construct ( name : source-targets * : property-set ) - { - # Deduce the name of stlport library, based on toolset and - # debug setting. - local raw = [ $(property-set).raw ] ; - local hostios = [ feature.get-values : $(raw) ] ; - local toolset = [ feature.get-values : $(raw) ] ; - - if $(self.version.5) - { - # Version 5.x - - # STLport host IO streams no longer supported. So we always - # need libraries. - - # name: stlport(stl)?[dg]?(_static)?.M.R - local name = stlport ; - if [ feature.get-values : $(raw) ] = "on" - { - name += stl ; - switch $(toolset) - { - case gcc* : name += g ; - case darwin* : name += g ; - case * : name += d ; - } - } - - if [ feature.get-values : $(raw) ] = "static" - { - name += _static ; - } - - # Starting with version 5.2.0, the STLport static libraries no longer - # include a version number in their name - local version.pre.5.2 = [ MATCH "^(5[.][01]+).*" : $(version) ] ; - if $(version.pre.5.2) || [ feature.get-values : $(raw) ] != "static" - { - name += .$(self.version.5) ; - } - - name = $(name:J=) ; - - if [ feature.get-values : $(raw) ] = "on" - { - #~ Allow explicitly asking to install the STLport lib by - #~ refering to it directly: /stlport//stlport/on - #~ This allows for install packaging of all libs one might need for - #~ a standalone distribution. - import path : make : path-make ; - local runtime-link - = [ feature.get-values : $(raw) ] ; - local lib-file.props - = [ property-set.create $(raw) $(runtime-link) ] ; - local lib-file.prefix - = [ type.generated-target-prefix $(runtime-link:U)_LIB : $(lib-file.props) ] ; - local lib-file.suffix - = [ type.generated-target-suffix $(runtime-link:U)_LIB : $(lib-file.props) ] ; - lib-file.prefix - ?= "" "lib" ; - lib-file.suffix - ?= "" ; - local lib-file - = [ GLOB $(self.libraries) [ modules.peek : PATH ] : - $(lib-file.prefix)$(name).$(lib-file.suffix) ] ; - lib-file - = [ new file-reference [ path-make $(lib-file[1]) ] : $(self.project) ] ; - lib-file - = [ $(lib-file).generate "" ] ; - local lib-file.requirements - = [ targets.main-target-requirements - [ $(lib-file.props).raw ] $(lib-file[-1]) - : $(self.project) ] ; - return [ generators.construct $(self.project) $(name) : LIB : $(lib-file.requirements) ] ; - } - else - { - #~ Otherwise, it's just a regular usage of the library. - return [ generators.construct - $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ; - } - } - else if ! $(hostios) && $(toolset) != msvc - { - # We don't need libraries if host istreams are used. For - # msvc, automatic library selection will be used. - - # name: stlport_(_stldebug)? - local name = stlport ; - name = $(name)_$(toolset) ; - if [ feature.get-values : $(raw) ] = "on" - { - name = $(name)_stldebug ; - } - - return [ generators.construct - $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ; - } - else - { - return [ property-set.empty ] ; - } - } - - rule compute-usage-requirements ( subvariant ) - { - local usage-requirements = - $(self.headers) - $(self.libraries) - $(self.libraries) - ; - - local rproperties = [ $(subvariant).build-properties ] ; - # CONSIDER: should this "if" sequence be replaced with - # some use of 'property-map' class? - if [ $(rproperties).get ] = "on" - { - usage-requirements += - _STLP_DEBUG=1 - _STLP_DEBUG_UNINITIALIZED=1 ; - } - if [ $(rproperties).get ] = "shared" - { - usage-requirements += - _STLP_USE_DYNAMIC_LIB=1 ; - } - if [ $(rproperties).get ] = noext - { - usage-requirements += - _STLP_NO_EXTENSIONS=1 ; - } - if [ $(rproperties).get ] = hostios - { - usage-requirements += - _STLP_NO_OWN_IOSTREAMS=1 - _STLP_HAS_NO_NEW_IOSTREAMS=1 ; - } - if $(self.version.5) - { - # Version 5.x - if [ $(rproperties).get ] = "single" - { - # Since STLport5 doesn't normally support single-thread - # we force STLport5 into the multi-thread mode. Hence - # getting what other libs provide of single-thread code - # linking against a multi-thread lib. - usage-requirements += - _STLP_THREADS=1 ; - } - } - - return [ property-set.create $(usage-requirements) ] ; - } -} - -rule stlport-target ( headers ? : libraries * : version ? ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new stlport-target-class $(project) : $(headers) : $(libraries) - : $(version) - ] ; -} - -local .version-subfeature-defined ; - -# Initialize stlport support. -rule init ( - version ? : - headers : # Location of header files - libraries * # Location of libraries, lib and bin subdirs of STLport. - ) -{ - # FIXME: need to use common.check-init-parameters here. - # At the moment, that rule always tries to define subfeature - # of the 'toolset' feature, while we need to define subfeature - # of stlport, so tweaks to check-init-parameters are needed. - if $(version) - { - if ! $(.version-subfeature-defined) - { - feature.subfeature stdlib stlport : version : : propagated ; - .version-subfeature-defined = true ; - } - feature.extend-subfeature stdlib stlport : version : $(version) ; - } - - # Declare the main target for this STLPort version. - stlport-target $(headers) : $(libraries) : $(version) ; -} - diff --git a/jam-files/boost-build/tools/sun.jam b/jam-files/boost-build/tools/sun.jam deleted file mode 100644 index 0ca927d3..00000000 --- a/jam-files/boost-build/tools/sun.jam +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) Christopher Currie 2003. Permission to copy, use, -# modify, sell and distribute this software is granted provided this -# copyright notice appears in all copies. This software is provided -# "as is" without express or implied warranty, and with no claim as -# to its suitability for any purpose. - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import type ; -import common ; - -feature.extend toolset : sun ; -toolset.inherit sun : unix ; -generators.override sun.prebuilt : builtin.lib-generator ; -generators.override sun.prebuilt : builtin.prebuilt ; -generators.override sun.searched-lib-generator : searched-lib-generator ; - -feature.extend stdlib : sun-stlport ; -feature.compose sun-stlport - : -library=stlport4 -library=stlport4 - ; - -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters sun : version $(version) ] ; - - command = [ common.get-invocation-command sun : CC - : $(command) : "/opt/SUNWspro/bin" ] ; - - # Even if the real compiler is not found, put CC to - # command line so that user see command line that would have being executed. - command ?= CC ; - - common.handle-options sun : $(condition) : $(command) : $(options) ; - - command_c = $(command[1--2]) $(command[-1]:B=cc) ; - - toolset.flags sun CONFIG_C_COMMAND $(condition) : $(command_c) ; -} - -# Declare generators -generators.register-c-compiler sun.compile.c : C : OBJ : sun ; -generators.register-c-compiler sun.compile.c++ : CPP : OBJ : sun ; - -# Declare flags and actions for compilation -flags sun.compile OPTIONS on : -g ; -flags sun.compile OPTIONS on : -xprofile=tcov ; -flags sun.compile OPTIONS speed : -xO4 ; -flags sun.compile OPTIONS space : -xO2 -xspace ; -flags sun.compile OPTIONS multi : -mt ; -flags sun.compile OPTIONS off : -erroff ; -flags sun.compile OPTIONS on : -erroff=%none ; -flags sun.compile OPTIONS all : -erroff=%none ; -flags sun.compile OPTIONS on : -errwarn ; - -flags sun.compile.c++ OPTIONS off : +d ; - -# The -m32 and -m64 options are supported starting -# with Sun Studio 12. On earlier compilers, the -# 'address-model' feature is not supported and should not -# be used. Instead, use -xarch=generic64 command line -# option. -# See http://svn.boost.org/trac/boost/ticket/1186 -# for details. -flags sun OPTIONS 32 : -m32 ; -flags sun OPTIONS 64 : -m64 ; -# On sparc, there's a difference between -Kpic -# and -KPIC. The first is slightly more efficient, -# but has the limits on the size of GOT table. -# For minimal fuss on user side, we use -KPIC here. -# See http://svn.boost.org/trac/boost/ticket/1186#comment:6 -# for detailed explanation. -flags sun OPTIONS shared : -KPIC ; - -flags sun.compile OPTIONS ; -flags sun.compile.c++ OPTIONS ; -flags sun.compile DEFINES ; -flags sun.compile INCLUDES ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags sun.link OPTIONS on : -g ; -# Strip the binary when no debugging is needed -flags sun.link OPTIONS off : -s ; -flags sun.link OPTIONS on : -xprofile=tcov ; -flags sun.link OPTIONS multi : -mt ; -flags sun.link OPTIONS ; -flags sun.link LINKPATH ; -flags sun.link FINDLIBS-ST ; -flags sun.link FINDLIBS-SA ; -flags sun.link LIBRARIES ; -flags sun.link LINK-RUNTIME static : static ; -flags sun.link LINK-RUNTIME shared : dynamic ; -flags sun.link RPATH ; -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags sun.link RPATH ; - -# The POSIX real-time library is always needed (nanosleep, clock_gettime etc.) -flags sun.link FINDLIBS-SA : rt ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Declare action for creating static libraries -actions piecemeal archive -{ - "$(CONFIG_COMMAND)" -xar -o "$(<)" "$(>)" -} - diff --git a/jam-files/boost-build/tools/symlink.jam b/jam-files/boost-build/tools/symlink.jam deleted file mode 100644 index b33e8260..00000000 --- a/jam-files/boost-build/tools/symlink.jam +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004, 2005 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines the "symlink" special target. 'symlink' targets make symbolic links -# to the sources. - -import targets modules path class os feature project property-set ; - -.count = 0 ; - -feature.feature symlink-location : project-relative build-relative : incidental ; - -# The class representing "symlink" targets. -# -class symlink-targets : basic-target -{ - import numbers modules class property project path ; - - rule __init__ ( - project - : targets * - : sources * - ) - { - # Generate a fake name for now. Need unnamed targets eventually. - local c = [ modules.peek symlink : .count ] ; - modules.poke symlink : .count : [ numbers.increment $(c) ] ; - local fake-name = symlink#$(c) ; - - basic-target.__init__ $(fake-name) : $(project) : $(sources) ; - - # Remember the targets to map the sources onto. Pad or truncate - # to fit the sources given. - self.targets = ; - for local source in $(sources) - { - if $(targets) - { - self.targets += $(targets[1]) ; - targets = $(targets[2-]) ; - } - else - { - self.targets += $(source) ; - } - } - - # The virtual targets corresponding to the given targets. - self.virtual-targets = ; - } - - rule construct ( name : source-targets * : property-set ) - { - local i = 1 ; - for local t in $(source-targets) - { - local s = $(self.targets[$(i)]) ; - local a = [ class.new action $(t) : symlink.ln : $(property-set) ] ; - local vt = [ class.new file-target $(s:D=) - : [ $(t).type ] : $(self.project) : $(a) ] ; - - # Place the symlink in the directory relative to the project - # location, instead of placing it in the build directory. - if [ property.select : [ $(property-set).raw ] ] = project-relative - { - $(vt).set-path [ path.root $(s:D) [ $(self.project).get location ] ] ; - } - - self.virtual-targets += $(vt) ; - i = [ numbers.increment $(i) ] ; - } - return [ property-set.empty ] $(self.virtual-targets) ; - } -} - -# Creates a symbolic link from a set of targets to a set of sources. -# The targets and sources map one to one. The symlinks generated are -# limited to be the ones given as the sources. That is, the targets -# are either padded or trimmed to equate to the sources. The padding -# is done with the name of the corresponding source. For example:: -# -# symlink : one two ; -# -# Is equal to:: -# -# symlink one two : one two ; -# -# Names for symlink are relative to the project location. They cannot -# include ".." path components. -rule symlink ( - targets * - : sources * - ) -{ - local project = [ project.current ] ; - - return [ targets.main-target-alternative - [ class.new symlink-targets $(project) : $(targets) : - # Note: inline targets are not supported for symlink, intentionally, - # since it's used to linking existing non-local targets. - $(sources) ] ] ; -} - -rule ln -{ - local os ; - if [ modules.peek : UNIX ] { os = UNIX ; } - else { os ?= [ os.name ] ; } - # Remember the path to make the link relative to where the symlink is located. - local path-to-source = [ path.relative-to - [ path.make [ on $(<) return $(LOCATE) ] ] - [ path.make [ on $(>) return $(LOCATE) ] ] ] ; - if $(path-to-source) = . - { - PATH_TO_SOURCE on $(<) = "" ; - } - else - { - PATH_TO_SOURCE on $(<) = [ path.native $(path-to-source) ] ; - } - ln-$(os) $(<) : $(>) ; -} - -actions ln-UNIX -{ - ln -f -s '$(>:D=:R=$(PATH_TO_SOURCE))' '$(<)' -} - -# there is a way to do this; we fall back to a copy for now -actions ln-NT -{ - echo "NT symlinks not supported yet, making copy" - del /f /q "$(<)" 2>nul >nul - copy "$(>)" "$(<)" $(NULL_OUT) -} - -IMPORT $(__name__) : symlink : : symlink ; diff --git a/jam-files/boost-build/tools/symlink.py b/jam-files/boost-build/tools/symlink.py deleted file mode 100644 index 6345ded6..00000000 --- a/jam-files/boost-build/tools/symlink.py +++ /dev/null @@ -1,112 +0,0 @@ -# Status: ported. -# Base revision: 64488. - -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004, 2005 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines the "symlink" special target. 'symlink' targets make symbolic links -# to the sources. - -import b2.build.feature as feature -import b2.build.targets as targets -import b2.build.property_set as property_set -import b2.build.virtual_target as virtual_target -import b2.build.targets - -from b2.manager import get_manager - -import bjam - -import os - - -feature.feature("symlink-location", ["project-relative", "build-relative"], ["incidental"]) - -class SymlinkTarget(targets.BasicTarget): - - _count = 0 - - def __init__(self, project, targets, sources): - - # Generate a fake name for now. Need unnamed targets eventually. - fake_name = "symlink#%s" % SymlinkTarget._count - SymlinkTarget._count = SymlinkTarget._count + 1 - - b2.build.targets.BasicTarget.__init__(self, fake_name, project, sources) - - # Remember the targets to map the sources onto. Pad or truncate - # to fit the sources given. - assert len(targets) <= len(sources) - self.targets = targets[:] + sources[len(targets):] - - # The virtual targets corresponding to the given targets. - self.virtual_targets = [] - - def construct(self, name, source_targets, ps): - i = 0 - for t in source_targets: - s = self.targets[i] - a = virtual_target.Action(self.manager(), [t], "symlink.ln", ps) - vt = virtual_target.FileTarget(os.path.basename(s), t.type(), self.project(), a) - - # Place the symlink in the directory relative to the project - # location, instead of placing it in the build directory. - if not ps.get('symlink-location') == "project-relative": - vt.set_path(os.path.join(self.project().get('location'), os.path.dirname(s))) - - vt = get_manager().virtual_targets().register(vt) - self.virtual_targets.append(vt) - i = i + 1 - - return (property_set.empty(), self.virtual_targets) - -# Creates a symbolic link from a set of targets to a set of sources. -# The targets and sources map one to one. The symlinks generated are -# limited to be the ones given as the sources. That is, the targets -# are either padded or trimmed to equate to the sources. The padding -# is done with the name of the corresponding source. For example:: -# -# symlink : one two ; -# -# Is equal to:: -# -# symlink one two : one two ; -# -# Names for symlink are relative to the project location. They cannot -# include ".." path components. -def symlink(targets, sources): - - from b2.manager import get_manager - t = get_manager().targets() - p = get_manager().projects().current() - - return t.main_target_alternative( - SymlinkTarget(p, targets, - # Note: inline targets are not supported for symlink, intentionally, - # since it's used to linking existing non-local targets. - sources)) - - -def setup_ln(targets, sources, ps): - - source_path = bjam.call("get-target-variable", sources[0], "LOCATE")[0] - target_path = bjam.call("get-target-variable", targets[0], "LOCATE")[0] - rel = os.path.relpath(source_path, target_path) - if rel == ".": - bjam.call("set-target-variable", targets, "PATH_TO_SOURCE", "") - else: - bjam.call("set-target-variable", targets, "PATH_TO_SOURCE", rel) - -if os.name == 'nt': - ln_action = """echo "NT symlinks not supported yet, making copy" -del /f /q "$(<)" 2>nul >nul -copy "$(>)" "$(<)" $(NULL_OUT)""" -else: - ln_action = "ln -f -s '$(>:D=:R=$(PATH_TO_SOURCE))' '$(<)'" - -get_manager().engine().register_action("symlink.ln", ln_action, function=setup_ln) - -get_manager().projects().add_rule("symlink", symlink) diff --git a/jam-files/boost-build/tools/testing-aux.jam b/jam-files/boost-build/tools/testing-aux.jam deleted file mode 100644 index 525dafd0..00000000 --- a/jam-files/boost-build/tools/testing-aux.jam +++ /dev/null @@ -1,210 +0,0 @@ -# This module is imported by testing.py. The definitions here are -# too tricky to do in Python - -# Causes the 'target' to exist after bjam invocation if and only if all the -# dependencies were successfully built. -# -rule expect-success ( target : dependency + : requirements * ) -{ - **passed** $(target) : $(sources) ; -} -IMPORT testing : expect-success : : testing.expect-success ; - -# Causes the 'target' to exist after bjam invocation if and only if all some of -# the dependencies were not successfully built. -# -rule expect-failure ( target : dependency + : properties * ) -{ - local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ; - local marker = $(dependency:G=$(grist)*fail) ; - (failed-as-expected) $(marker) ; - FAIL_EXPECTED $(dependency) ; - LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ; - RMOLD $(marker) ; - DEPENDS $(marker) : $(dependency) ; - DEPENDS $(target) : $(marker) ; - **passed** $(target) : $(marker) ; -} -IMPORT testing : expect-failure : : testing.expect-failure ; - -# The rule/action combination used to report successful passing of a test. -# -rule **passed** -{ - # Force deletion of the target, in case any dependencies failed to build. - RMOLD $(<) ; -} - - -# Used to create test files signifying passed tests. -# -actions **passed** -{ - echo passed > "$(<)" -} - - -# Used to create replacement object files that do not get created during tests -# that are expected to fail. -# -actions (failed-as-expected) -{ - echo failed as expected > "$(<)" -} - -# Runs executable 'sources' and stores stdout in file 'target'. Unless -# --preserve-test-targets command line option has been specified, removes the -# executable. The 'target-to-remove' parameter controls what should be removed: -# - if 'none', does not remove anything, ever -# - if empty, removes 'source' -# - if non-empty and not 'none', contains a list of sources to remove. -# -rule capture-output ( target : source : properties * : targets-to-remove * ) -{ - output-file on $(target) = $(target:S=.output) ; - LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ; - - # The INCLUDES kill a warning about independent target... - INCLUDES $(target) : $(target:S=.output) ; - # but it also puts .output into dependency graph, so we must tell jam it is - # OK if it cannot find the target or updating rule. - NOCARE $(target:S=.output) ; - - # This has two-fold effect. First it adds input files to the dependendency - # graph, preventing a warning. Second, it causes input files to be bound - # before target is created. Therefore, they are bound using SEARCH setting - # on them and not LOCATE setting of $(target), as in other case (due to jam - # bug). - DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ; - - if $(targets-to-remove) = none - { - targets-to-remove = ; - } - else if ! $(targets-to-remove) - { - targets-to-remove = $(source) ; - } - - if [ on $(target) return $(REMOVE_TEST_TARGETS) ] - { - TEMPORARY $(targets-to-remove) ; - # Set a second action on target that will be executed after capture - # output action. The 'RmTemps' rule has the 'ignore' modifier so it is - # always considered succeeded. This is needed for 'run-fail' test. For - # that test the target will be marked with FAIL_EXPECTED, and without - # 'ignore' successful execution will be negated and be reported as - # failure. With 'ignore' we do not detect a case where removing files - # fails, but it is not likely to happen. - RmTemps $(target) : $(targets-to-remove) ; - } -} - - -if [ os.name ] = NT -{ - .STATUS = %status% ; - .SET_STATUS = "set status=%ERRORLEVEL%" ; - .RUN_OUTPUT_NL = "echo." ; - .STATUS_0 = "%status% EQU 0 (" ; - .STATUS_NOT_0 = "%status% NEQ 0 (" ; - .VERBOSE = "%verbose% EQU 1 (" ; - .ENDIF = ")" ; - .SHELL_SET = "set " ; - .CATENATE = type ; - .CP = copy ; -} -else -{ - .STATUS = "$status" ; - .SET_STATUS = "status=$?" ; - .RUN_OUTPUT_NL = "echo" ; - .STATUS_0 = "test $status -eq 0 ; then" ; - .STATUS_NOT_0 = "test $status -ne 0 ; then" ; - .VERBOSE = "test $verbose -eq 1 ; then" ; - .ENDIF = "fi" ; - .SHELL_SET = "" ; - .CATENATE = cat ; - .CP = cp ; -} - - -.VERBOSE_TEST = 0 ; -if --verbose-test in [ modules.peek : ARGV ] -{ - .VERBOSE_TEST = 1 ; -} - - -.RM = [ common.rm-command ] ; - - -actions capture-output bind INPUT_FILES output-file -{ - $(PATH_SETUP) - $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1 - $(.SET_STATUS) - $(.RUN_OUTPUT_NL) >> "$(output-file)" - echo EXIT STATUS: $(.STATUS) >> "$(output-file)" - if $(.STATUS_0) - $(.CP) "$(output-file)" "$(<)" - $(.ENDIF) - $(.SHELL_SET)verbose=$(.VERBOSE_TEST) - if $(.STATUS_NOT_0) - $(.SHELL_SET)verbose=1 - $(.ENDIF) - if $(.VERBOSE) - echo ====== BEGIN OUTPUT ====== - $(.CATENATE) "$(output-file)" - echo ====== END OUTPUT ====== - $(.ENDIF) - exit $(.STATUS) -} - -IMPORT testing : capture-output : : testing.capture-output ; - - -actions quietly updated ignore piecemeal together RmTemps -{ - $(.RM) "$(>)" -} - - -.MAKE_FILE = [ common.file-creation-command ] ; - -actions unit-test -{ - $(PATH_SETUP) - $(LAUNCHER) $(>) $(ARGS) && $(.MAKE_FILE) $(<) -} - -rule record-time ( target : source : start end user system ) -{ - local src-string = [$(source:G=:J=",")"] " ; - USER_TIME on $(target) += $(src-string)$(user) ; - SYSTEM_TIME on $(target) += $(src-string)$(system) ; -} - -# Calling this rule requests that Boost Build time how long it taks to build the -# 'source' target and display the results both on the standard output and in the -# 'target' file. -# -rule time ( target : source : properties * ) -{ - # Set up rule for recording timing information. - __TIMING_RULE__ on $(source) = testing.record-time $(target) ; - - # Make sure that the source is rebuilt any time we need to retrieve that - # information. - REBUILDS $(target) : $(source) ; -} - - -actions time -{ - echo user: $(USER_TIME) - echo system: $(SYSTEM_TIME) - - echo user: $(USER_TIME)" seconds" > "$(<)" - echo system: $(SYSTEM_TIME)" seconds" >> "$(<)" -} diff --git a/jam-files/boost-build/tools/testing.jam b/jam-files/boost-build/tools/testing.jam deleted file mode 100644 index c42075b7..00000000 --- a/jam-files/boost-build/tools/testing.jam +++ /dev/null @@ -1,581 +0,0 @@ -# Copyright 2005 Dave Abrahams -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module implements regression testing framework. It declares a number of -# main target rules which perform some action and, if the results are OK, -# creates an output file. -# -# The exact list of rules is: -# 'compile' -- creates .test file if compilation of sources was -# successful. -# 'compile-fail' -- creates .test file if compilation of sources failed. -# 'run' -- creates .test file is running of executable produced from -# sources was successful. Also leaves behind .output file -# with the output from program run. -# 'run-fail' -- same as above, but .test file is created if running fails. -# -# In all cases, presence of .test file is an indication that the test passed. -# For more convenient reporting, you might want to use C++ Boost regression -# testing utilities (see http://www.boost.org/more/regression.html). -# -# For historical reason, a 'unit-test' rule is available which has the same -# syntax as 'exe' and behaves just like 'run'. - -# Things to do: -# - Teach compiler_status handle Jamfile.v2. -# Notes: -# - is not implemented, since it is Como-specific, and it is not -# clear how to implement it -# - std::locale-support is not implemented (it is used in one test). - - -import alias ; -import "class" ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import path ; -import project ; -import property ; -import property-set ; -import regex ; -import sequence ; -import targets ; -import toolset ; -import type ; -import virtual-target ; - - -rule init ( ) -{ -} - - -# Feature controling the command used to lanch test programs. -feature.feature testing.launcher : : free optional ; - -feature.feature test-info : : free incidental ; -feature.feature testing.arg : : free incidental ; -feature.feature testing.input-file : : free dependency ; - -feature.feature preserve-test-targets : on off : incidental propagated ; - -# Register target types. -type.register TEST : test ; -type.register COMPILE : : TEST ; -type.register COMPILE_FAIL : : TEST ; -type.register RUN_OUTPUT : run ; -type.register RUN : : TEST ; -type.register RUN_FAIL : : TEST ; -type.register LINK_FAIL : : TEST ; -type.register LINK : : TEST ; -type.register UNIT_TEST : passed : TEST ; - - -# Declare the rules which create main targets. While the 'type' module already -# creates rules with the same names for us, we need extra convenience: default -# name of main target, so write our own versions. - -# Helper rule. Create a test target, using basename of first source if no target -# name is explicitly passed. Remembers the created target in a global variable. -# -rule make-test ( target-type : sources + : requirements * : target-name ? ) -{ - target-name ?= $(sources[1]:D=:S=) ; - - # Having periods (".") in the target name is problematic because the typed - # generator will strip the suffix and use the bare name for the file - # targets. Even though the location-prefix averts problems most times it - # does not prevent ambiguity issues when referring to the test targets. For - # example when using the XML log output. So we rename the target to remove - # the periods, and provide an alias for users. - local real-name = [ regex.replace $(target-name) "[.]" "~" ] ; - - local project = [ project.current ] ; - # The forces the build system for generate paths in the - # form '$build_dir/array1.test/gcc/debug'. This is necessary to allow - # post-processing tools to work. - local t = [ targets.create-typed-target [ type.type-from-rule-name - $(target-type) ] : $(project) : $(real-name) : $(sources) : - $(requirements) $(real-name).test ] ; - - # The alias to the real target, per period replacement above. - if $(real-name) != $(target-name) - { - alias $(target-name) : $(t) ; - } - - # Remember the test (for --dump-tests). A good way would be to collect all - # given a project. This has some technical problems: e.g. we can not call - # this dump from a Jamfile since projects referred by 'build-project' are - # not available until the whole Jamfile has been loaded. - .all-tests += $(t) ; - return $(t) ; -} - - -# Note: passing more that one cpp file here is known to fail. Passing a cpp file -# and a library target works. -# -rule compile ( sources + : requirements * : target-name ? ) -{ - return [ make-test compile : $(sources) : $(requirements) : $(target-name) ] - ; -} - - -rule compile-fail ( sources + : requirements * : target-name ? ) -{ - return [ make-test compile-fail : $(sources) : $(requirements) : - $(target-name) ] ; -} - - -rule link ( sources + : requirements * : target-name ? ) -{ - return [ make-test link : $(sources) : $(requirements) : $(target-name) ] ; -} - - -rule link-fail ( sources + : requirements * : target-name ? ) -{ - return [ make-test link-fail : $(sources) : $(requirements) : $(target-name) - ] ; -} - - -rule handle-input-files ( input-files * ) -{ - if $(input-files[2]) - { - # Check that sorting made when creating property-set instance will not - # change the ordering. - if [ sequence.insertion-sort $(input-files) ] != $(input-files) - { - errors.user-error "Names of input files must be sorted alphabetically" - : "due to internal limitations" ; - } - } - return $(input-files) ; -} - - -rule run ( sources + : args * : input-files * : requirements * : target-name ? : - default-build * ) -{ - requirements += $(args:J=" ") ; - requirements += [ handle-input-files $(input-files) ] ; - return [ make-test run : $(sources) : $(requirements) : $(target-name) ] ; -} - - -rule run-fail ( sources + : args * : input-files * : requirements * : - target-name ? : default-build * ) -{ - requirements += $(args:J=" ") ; - requirements += [ handle-input-files $(input-files) ] ; - return [ make-test run-fail : $(sources) : $(requirements) : $(target-name) - ] ; -} - - -# Use 'test-suite' as a synonym for 'alias', for backward compatibility. -IMPORT : alias : : test-suite ; - - -# For all main targets in 'project-module', which are typed targets with type -# derived from 'TEST', produce some interesting information. -# -rule dump-tests -{ - for local t in $(.all-tests) - { - dump-test $(t) ; - } -} - - -# Given a project location in normalized form (slashes are forward), compute the -# name of the Boost library. -# -local rule get-library-name ( path ) -{ - # Path is in normalized form, so all slashes are forward. - local match1 = [ MATCH /(tools|libs)/(.*)/(test|example) : $(path) ] ; - local match2 = [ MATCH /(tools|libs)/(.*)$ : $(path) ] ; - local match3 = [ MATCH (/status$) : $(path) ] ; - - if $(match1) { return $(match1[2]) ; } - else if $(match2) { return $(match2[2]) ; } - else if $(match3) { return "" ; } - else if --dump-tests in [ modules.peek : ARGV ] - { - # The 'run' rule and others might be used outside boost. In that case, - # just return the path, since the 'library name' makes no sense. - return $(path) ; - } -} - - -# Was an XML dump requested? -.out-xml = [ MATCH --out-xml=(.*) : [ modules.peek : ARGV ] ] ; - - -# Takes a target (instance of 'basic-target') and prints -# - its type -# - its name -# - comments specified via the property -# - relative location of all source from the project root. -# -rule dump-test ( target ) -{ - local type = [ $(target).type ] ; - local name = [ $(target).name ] ; - local project = [ $(target).project ] ; - - local project-root = [ $(project).get project-root ] ; - local library = [ get-library-name [ path.root [ $(project).get location ] - [ path.pwd ] ] ] ; - if $(library) - { - name = $(library)/$(name) ; - } - - local sources = [ $(target).sources ] ; - local source-files ; - for local s in $(sources) - { - if [ class.is-a $(s) : file-reference ] - { - local location = [ path.root [ path.root [ $(s).name ] - [ $(s).location ] ] [ path.pwd ] ] ; - - source-files += [ path.relative-to [ path.root $(project-root) - [ path.pwd ] ] $(location) ] ; - } - } - - local target-name = [ $(project).get location ] // [ $(target).name ] .test - ; - target-name = $(target-name:J=) ; - - local r = [ $(target).requirements ] ; - # Extract values of the feature. - local test-info = [ $(r).get ] ; - - # If the user requested XML output on the command-line, add the test info to - # that XML file rather than dumping them to stdout. - if $(.out-xml) - { - local nl = " -" ; - .contents on $(.out-xml) += - "$(nl) " - "$(nl) " - "$(nl) " - "$(nl) " - "$(nl) " - ; - } - else - { - # Format them into a single string of quoted strings. - test-info = \"$(test-info:J=\"\ \")\" ; - - ECHO boost-test($(type)) \"$(name)\" [$(test-info)] ":" - \"$(source-files)\" ; - } -} - - -# Register generators. Depending on target type, either 'expect-success' or -# 'expect-failure' rule will be used. -generators.register-standard testing.expect-success : OBJ : COMPILE ; -generators.register-standard testing.expect-failure : OBJ : COMPILE_FAIL ; -generators.register-standard testing.expect-success : RUN_OUTPUT : RUN ; -generators.register-standard testing.expect-failure : RUN_OUTPUT : RUN_FAIL ; -generators.register-standard testing.expect-failure : EXE : LINK_FAIL ; -generators.register-standard testing.expect-success : EXE : LINK ; - -# Generator which runs an EXE and captures output. -generators.register-standard testing.capture-output : EXE : RUN_OUTPUT ; - -# Generator which creates a target if sources run successfully. Differs from RUN -# in that run output is not captured. The reason why it exists is that the 'run' -# rule is much better for automated testing, but is not user-friendly (see -# http://article.gmane.org/gmane.comp.lib.boost.build/6353). -generators.register-standard testing.unit-test : EXE : UNIT_TEST ; - - -# The action rules called by generators. - -# Causes the 'target' to exist after bjam invocation if and only if all the -# dependencies were successfully built. -# -rule expect-success ( target : dependency + : requirements * ) -{ - **passed** $(target) : $(sources) ; -} - - -# Causes the 'target' to exist after bjam invocation if and only if all some of -# the dependencies were not successfully built. -# -rule expect-failure ( target : dependency + : properties * ) -{ - local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ; - local marker = $(dependency:G=$(grist)*fail) ; - (failed-as-expected) $(marker) ; - FAIL_EXPECTED $(dependency) ; - LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ; - RMOLD $(marker) ; - DEPENDS $(marker) : $(dependency) ; - DEPENDS $(target) : $(marker) ; - **passed** $(target) : $(marker) ; -} - - -# The rule/action combination used to report successful passing of a test. -# -rule **passed** -{ - # Dump all the tests, if needed. We do it here, since dump should happen - # only after all Jamfiles have been read, and there is no such place - # currently defined (but there should be). - if ! $(.dumped-tests) && ( --dump-tests in [ modules.peek : ARGV ] ) - { - .dumped-tests = true ; - dump-tests ; - } - - # Force deletion of the target, in case any dependencies failed to build. - RMOLD $(<) ; -} - - -# Used to create test files signifying passed tests. -# -actions **passed** -{ - echo passed > "$(<)" -} - - -# Used to create replacement object files that do not get created during tests -# that are expected to fail. -# -actions (failed-as-expected) -{ - echo failed as expected > "$(<)" -} - - -rule run-path-setup ( target : source : properties * ) -{ - # For testing, we need to make sure that all dynamic libraries needed by the - # test are found. So, we collect all paths from dependency libraries (via - # xdll-path property) and add whatever explicit dll-path user has specified. - # The resulting paths are added to the environment on each test invocation. - local dll-paths = [ feature.get-values : $(properties) ] ; - dll-paths += [ feature.get-values : $(properties) ] ; - dll-paths += [ on $(source) return $(RUN_PATH) ] ; - dll-paths = [ sequence.unique $(dll-paths) ] ; - if $(dll-paths) - { - dll-paths = [ sequence.transform path.native : $(dll-paths) ] ; - PATH_SETUP on $(target) = [ common.prepend-path-variable-command - [ os.shared-library-path-variable ] : $(dll-paths) ] ; - } -} - - -local argv = [ modules.peek : ARGV ] ; - -toolset.flags testing.capture-output ARGS ; -toolset.flags testing.capture-output INPUT_FILES ; -toolset.flags testing.capture-output LAUNCHER ; - - -# Runs executable 'sources' and stores stdout in file 'target'. Unless -# --preserve-test-targets command line option has been specified, removes the -# executable. The 'target-to-remove' parameter controls what should be removed: -# - if 'none', does not remove anything, ever -# - if empty, removes 'source' -# - if non-empty and not 'none', contains a list of sources to remove. -# -rule capture-output ( target : source : properties * : targets-to-remove * ) -{ - output-file on $(target) = $(target:S=.output) ; - LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ; - - # The INCLUDES kill a warning about independent target... - INCLUDES $(target) : $(target:S=.output) ; - # but it also puts .output into dependency graph, so we must tell jam it is - # OK if it cannot find the target or updating rule. - NOCARE $(target:S=.output) ; - - # This has two-fold effect. First it adds input files to the dependendency - # graph, preventing a warning. Second, it causes input files to be bound - # before target is created. Therefore, they are bound using SEARCH setting - # on them and not LOCATE setting of $(target), as in other case (due to jam - # bug). - DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ; - - if $(targets-to-remove) = none - { - targets-to-remove = ; - } - else if ! $(targets-to-remove) - { - targets-to-remove = $(source) ; - } - - run-path-setup $(target) : $(source) : $(properties) ; - - if [ feature.get-values preserve-test-targets : $(properties) ] = off - { - TEMPORARY $(targets-to-remove) ; - # Set a second action on target that will be executed after capture - # output action. The 'RmTemps' rule has the 'ignore' modifier so it is - # always considered succeeded. This is needed for 'run-fail' test. For - # that test the target will be marked with FAIL_EXPECTED, and without - # 'ignore' successful execution will be negated and be reported as - # failure. With 'ignore' we do not detect a case where removing files - # fails, but it is not likely to happen. - RmTemps $(target) : $(targets-to-remove) ; - } -} - - -if [ os.name ] = NT -{ - .STATUS = %status% ; - .SET_STATUS = "set status=%ERRORLEVEL%" ; - .RUN_OUTPUT_NL = "echo." ; - .STATUS_0 = "%status% EQU 0 (" ; - .STATUS_NOT_0 = "%status% NEQ 0 (" ; - .VERBOSE = "%verbose% EQU 1 (" ; - .ENDIF = ")" ; - .SHELL_SET = "set " ; - .CATENATE = type ; - .CP = copy ; -} -else -{ - .STATUS = "$status" ; - .SET_STATUS = "status=$?" ; - .RUN_OUTPUT_NL = "echo" ; - .STATUS_0 = "test $status -eq 0 ; then" ; - .STATUS_NOT_0 = "test $status -ne 0 ; then" ; - .VERBOSE = "test $verbose -eq 1 ; then" ; - .ENDIF = "fi" ; - .SHELL_SET = "" ; - .CATENATE = cat ; - .CP = cp ; -} - - -.VERBOSE_TEST = 0 ; -if --verbose-test in [ modules.peek : ARGV ] -{ - .VERBOSE_TEST = 1 ; -} - - -.RM = [ common.rm-command ] ; - - -actions capture-output bind INPUT_FILES output-file -{ - $(PATH_SETUP) - $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1 - $(.SET_STATUS) - $(.RUN_OUTPUT_NL) >> "$(output-file)" - echo EXIT STATUS: $(.STATUS) >> "$(output-file)" - if $(.STATUS_0) - $(.CP) "$(output-file)" "$(<)" - $(.ENDIF) - $(.SHELL_SET)verbose=$(.VERBOSE_TEST) - if $(.STATUS_NOT_0) - $(.SHELL_SET)verbose=1 - $(.ENDIF) - if $(.VERBOSE) - echo ====== BEGIN OUTPUT ====== - $(.CATENATE) "$(output-file)" - echo ====== END OUTPUT ====== - $(.ENDIF) - exit $(.STATUS) -} - - -actions quietly updated ignore piecemeal together RmTemps -{ - $(.RM) "$(>)" -} - - -.MAKE_FILE = [ common.file-creation-command ] ; - -toolset.flags testing.unit-test LAUNCHER ; -toolset.flags testing.unit-test ARGS ; - - -rule unit-test ( target : source : properties * ) -{ - run-path-setup $(target) : $(source) : $(properties) ; -} - - -actions unit-test -{ - $(PATH_SETUP) - $(LAUNCHER) $(>) $(ARGS) && $(.MAKE_FILE) $(<) -} - - -IMPORT $(__name__) : compile compile-fail run run-fail link link-fail - : : compile compile-fail run run-fail link link-fail ; - - -type.register TIME : time ; -generators.register-standard testing.time : : TIME ; - - -rule record-time ( target : source : start end user system ) -{ - local src-string = [$(source:G=:J=",")"] " ; - USER_TIME on $(target) += $(src-string)$(user) ; - SYSTEM_TIME on $(target) += $(src-string)$(system) ; -} - - -IMPORT testing : record-time : : testing.record-time ; - - -# Calling this rule requests that Boost Build time how long it taks to build the -# 'source' target and display the results both on the standard output and in the -# 'target' file. -# -rule time ( target : source : properties * ) -{ - # Set up rule for recording timing information. - __TIMING_RULE__ on $(source) = testing.record-time $(target) ; - - # Make sure that the source is rebuilt any time we need to retrieve that - # information. - REBUILDS $(target) : $(source) ; -} - - -actions time -{ - echo user: $(USER_TIME) - echo system: $(SYSTEM_TIME) - - echo user: $(USER_TIME)" seconds" > "$(<)" - echo system: $(SYSTEM_TIME)" seconds" >> "$(<)" -} diff --git a/jam-files/boost-build/tools/testing.py b/jam-files/boost-build/tools/testing.py deleted file mode 100644 index 3b53500c..00000000 --- a/jam-files/boost-build/tools/testing.py +++ /dev/null @@ -1,342 +0,0 @@ -# Status: ported, except for --out-xml -# Base revision: 64488 -# -# Copyright 2005 Dave Abrahams -# Copyright 2002, 2003, 2004, 2005, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module implements regression testing framework. It declares a number of -# main target rules which perform some action and, if the results are OK, -# creates an output file. -# -# The exact list of rules is: -# 'compile' -- creates .test file if compilation of sources was -# successful. -# 'compile-fail' -- creates .test file if compilation of sources failed. -# 'run' -- creates .test file is running of executable produced from -# sources was successful. Also leaves behind .output file -# with the output from program run. -# 'run-fail' -- same as above, but .test file is created if running fails. -# -# In all cases, presence of .test file is an indication that the test passed. -# For more convenient reporting, you might want to use C++ Boost regression -# testing utilities (see http://www.boost.org/more/regression.html). -# -# For historical reason, a 'unit-test' rule is available which has the same -# syntax as 'exe' and behaves just like 'run'. - -# Things to do: -# - Teach compiler_status handle Jamfile.v2. -# Notes: -# - is not implemented, since it is Como-specific, and it is not -# clear how to implement it -# - std::locale-support is not implemented (it is used in one test). - -import b2.build.feature as feature -import b2.build.type as type -import b2.build.targets as targets -import b2.build.generators as generators -import b2.build.toolset as toolset -import b2.tools.common as common -import b2.util.option as option -import b2.build_system as build_system - - - -from b2.manager import get_manager -from b2.util import stem, bjam_signature -from b2.util.sequence import unique - -import bjam - -import re -import os.path -import sys - -def init(): - pass - -# Feature controling the command used to lanch test programs. -feature.feature("testing.launcher", [], ["free", "optional"]) - -feature.feature("test-info", [], ["free", "incidental"]) -feature.feature("testing.arg", [], ["free", "incidental"]) -feature.feature("testing.input-file", [], ["free", "dependency"]) - -feature.feature("preserve-test-targets", ["on", "off"], ["incidental", "propagated"]) - -# Register target types. -type.register("TEST", ["test"]) -type.register("COMPILE", [], "TEST") -type.register("COMPILE_FAIL", [], "TEST") - -type.register("RUN_OUTPUT", ["run"]) -type.register("RUN", [], "TEST") -type.register("RUN_FAIL", [], "TEST") - -type.register("LINK", [], "TEST") -type.register("LINK_FAIL", [], "TEST") -type.register("UNIT_TEST", ["passed"], "TEST") - -__all_tests = [] - -# Declare the rules which create main targets. While the 'type' module already -# creates rules with the same names for us, we need extra convenience: default -# name of main target, so write our own versions. - -# Helper rule. Create a test target, using basename of first source if no target -# name is explicitly passed. Remembers the created target in a global variable. -def make_test(target_type, sources, requirements, target_name=None): - - if not target_name: - target_name = stem(os.path.basename(sources[0])) - - # Having periods (".") in the target name is problematic because the typed - # generator will strip the suffix and use the bare name for the file - # targets. Even though the location-prefix averts problems most times it - # does not prevent ambiguity issues when referring to the test targets. For - # example when using the XML log output. So we rename the target to remove - # the periods, and provide an alias for users. - real_name = target_name.replace(".", "~") - - project = get_manager().projects().current() - # The forces the build system for generate paths in the - # form '$build_dir/array1.test/gcc/debug'. This is necessary to allow - # post-processing tools to work. - t = get_manager().targets().create_typed_target( - type.type_from_rule_name(target_type), project, real_name, sources, - requirements + ["" + real_name + ".test"], [], []) - - # The alias to the real target, per period replacement above. - if real_name != target_name: - get_manager().projects().project_rules().all_names_["alias"]( - target_name, [t]) - - # Remember the test (for --dump-tests). A good way would be to collect all - # given a project. This has some technical problems: e.g. we can not call - # this dump from a Jamfile since projects referred by 'build-project' are - # not available until the whole Jamfile has been loaded. - __all_tests.append(t) - return t - - -# Note: passing more that one cpp file here is known to fail. Passing a cpp file -# and a library target works. -# -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def compile(sources, requirements, target_name=None): - return make_test("compile", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def compile_fail(sources, requirements, target_name=None): - return make_test("compile-fail", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def link(sources, requirements, target_name=None): - return make_test("link", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def link_fail(sources, requirements, target_name=None): - return make_test("link-fail", sources, requirements, target_name) - -def handle_input_files(input_files): - if len(input_files) > 1: - # Check that sorting made when creating property-set instance will not - # change the ordering. - if sorted(input_files) != input_files: - get_manager().errors()("Names of input files must be sorted alphabetically\n" + - "due to internal limitations") - return ["" + f for f in input_files] - -@bjam_signature((["sources", "*"], ["args", "*"], ["input_files", "*"], - ["requirements", "*"], ["target_name", "?"], - ["default_build", "*"])) -def run(sources, args, input_files, requirements, target_name=None, default_build=[]): - if args: - requirements.append("" + " ".join(args)) - requirements.extend(handle_input_files(input_files)) - return make_test("run", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["args", "*"], ["input_files", "*"], - ["requirements", "*"], ["target_name", "?"], - ["default_build", "*"])) -def run_fail(sources, args, input_files, requirements, target_name=None, default_build=[]): - if args: - requirements.append("" + " ".join(args)) - requirements.extend(handle_input_files(input_files)) - return make_test("run-fail", sources, requirements, target_name) - -# Register all the rules -for name in ["compile", "compile-fail", "link", "link-fail", "run", "run-fail"]: - get_manager().projects().add_rule(name, getattr(sys.modules[__name__], name.replace("-", "_"))) - -# Use 'test-suite' as a synonym for 'alias', for backward compatibility. -from b2.build.alias import alias -get_manager().projects().add_rule("test-suite", alias) - -# For all main targets in 'project-module', which are typed targets with type -# derived from 'TEST', produce some interesting information. -# -def dump_tests(): - for t in __all_tests: - dump_test(t) - -# Given a project location in normalized form (slashes are forward), compute the -# name of the Boost library. -# -__ln1 = re.compile("/(tools|libs)/(.*)/(test|example)") -__ln2 = re.compile("/(tools|libs)/(.*)$") -__ln3 = re.compile("(/status$)") -def get_library_name(path): - - path = path.replace("\\", "/") - match1 = __ln1.match(path) - match2 = __ln2.match(path) - match3 = __ln3.match(path) - - if match1: - return match1.group(2) - elif match2: - return match2.group(2) - elif match3: - return "" - elif option.get("dump-tests", False, True): - # The 'run' rule and others might be used outside boost. In that case, - # just return the path, since the 'library name' makes no sense. - return path - -# Was an XML dump requested? -__out_xml = option.get("out-xml", False, True) - -# Takes a target (instance of 'basic-target') and prints -# - its type -# - its name -# - comments specified via the property -# - relative location of all source from the project root. -# -def dump_test(target): - type = target.type() - name = target.name() - project = target.project() - - project_root = project.get('project-root') - library = get_library_name(os.path.abspath(project.get('location'))) - if library: - name = library + "/" + name - - sources = target.sources() - source_files = [] - for s in sources: - if isinstance(s, targets.FileReference): - location = os.path.abspath(os.path.join(s.location(), s.name())) - source_files.append(os.path.relpath(location, os.path.abspath(project_root))) - - target_name = project.get('location') + "//" + target.name() + ".test" - - test_info = target.requirements().get('test-info') - test_info = " ".join('"' + ti + '"' for ti in test_info) - - # If the user requested XML output on the command-line, add the test info to - # that XML file rather than dumping them to stdout. - #if $(.out-xml) - #{ -# local nl = " -#" ; -# .contents on $(.out-xml) += -# "$(nl) " -# "$(nl) " -# "$(nl) " -# "$(nl) " -# "$(nl) " -# ; -# } -# else - - source_files = " ".join('"' + s + '"' for s in source_files) - if test_info: - print 'boost-test(%s) "%s" [%s] : %s' % (type, name, test_info, source_files) - else: - print 'boost-test(%s) "%s" : %s' % (type, name, source_files) - -# Register generators. Depending on target type, either 'expect-success' or -# 'expect-failure' rule will be used. -generators.register_standard("testing.expect-success", ["OBJ"], ["COMPILE"]) -generators.register_standard("testing.expect-failure", ["OBJ"], ["COMPILE_FAIL"]) -generators.register_standard("testing.expect-success", ["RUN_OUTPUT"], ["RUN"]) -generators.register_standard("testing.expect-failure", ["RUN_OUTPUT"], ["RUN_FAIL"]) -generators.register_standard("testing.expect-success", ["EXE"], ["LINK"]) -generators.register_standard("testing.expect-failure", ["EXE"], ["LINK_FAIL"]) - -# Generator which runs an EXE and captures output. -generators.register_standard("testing.capture-output", ["EXE"], ["RUN_OUTPUT"]) - -# Generator which creates a target if sources run successfully. Differs from RUN -# in that run output is not captured. The reason why it exists is that the 'run' -# rule is much better for automated testing, but is not user-friendly (see -# http://article.gmane.org/gmane.comp.lib.boost.build/6353). -generators.register_standard("testing.unit-test", ["EXE"], ["UNIT_TEST"]) - -# FIXME: if those calls are after bjam.call, then bjam will crash -# when toolset.flags calls bjam.caller. -toolset.flags("testing.capture-output", "ARGS", [], [""]) -toolset.flags("testing.capture-output", "INPUT_FILES", [], [""]) -toolset.flags("testing.capture-output", "LAUNCHER", [], [""]) - -toolset.flags("testing.unit-test", "LAUNCHER", [], [""]) -toolset.flags("testing.unit-test", "ARGS", [], [""]) - -type.register("TIME", ["time"]) -generators.register_standard("testing.time", [], ["TIME"]) - - -# The following code sets up actions for this module. It's pretty convoluted, -# but the basic points is that we most of actions are defined by Jam code -# contained in testing-aux.jam, which we load into Jam module named 'testing' - -def run_path_setup(target, sources, ps): - - # For testing, we need to make sure that all dynamic libraries needed by the - # test are found. So, we collect all paths from dependency libraries (via - # xdll-path property) and add whatever explicit dll-path user has specified. - # The resulting paths are added to the environment on each test invocation. - dll_paths = ps.get('dll-path') - dll_paths.extend(ps.get('xdll-path')) - dll_paths.extend(bjam.call("get-target-variable", sources, "RUN_PATH")) - dll_paths = unique(dll_paths) - if dll_paths: - bjam.call("set-target-variable", target, "PATH_SETUP", - common.prepend_path_variable_command( - common.shared_library_path_variable(), dll_paths)) - -def capture_output_setup(target, sources, ps): - run_path_setup(target, sources, ps) - - if ps.get('preserve-test-targets') == ['off']: - bjam.call("set-target-variable", target, "REMOVE_TEST_TARGETS", "1") - -get_manager().engine().register_bjam_action("testing.capture-output", - capture_output_setup) - - -path = os.path.dirname(get_manager().projects().loaded_tool_module_path_[__name__]) -import b2.util.os_j -get_manager().projects().project_rules()._import_rule("testing", "os.name", - b2.util.os_j.name) -import b2.tools.common -get_manager().projects().project_rules()._import_rule("testing", "common.rm-command", - b2.tools.common.rm_command) -get_manager().projects().project_rules()._import_rule("testing", "common.file-creation-command", - b2.tools.common.file_creation_command) - -bjam.call("load", "testing", os.path.join(path, "testing-aux.jam")) - - -for name in ["expect-success", "expect-failure", "time"]: - get_manager().engine().register_bjam_action("testing." + name) - -get_manager().engine().register_bjam_action("testing.unit-test", - run_path_setup) - -if option.get("dump-tests", False, True): - build_system.add_pre_build_hook(dump_tests) diff --git a/jam-files/boost-build/tools/types/__init__.py b/jam-files/boost-build/tools/types/__init__.py deleted file mode 100644 index f972b714..00000000 --- a/jam-files/boost-build/tools/types/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -__all__ = [ - 'asm', - 'cpp', - 'exe', - 'html', - 'lib', - 'obj', - 'rsp', -] - -def register_all (): - for i in __all__: - m = __import__ (__name__ + '.' + i) - reg = i + '.register ()' - #exec (reg) - -# TODO: (PF) I thought these would be imported automatically. Anyone knows why they aren't? -register_all () diff --git a/jam-files/boost-build/tools/types/asm.jam b/jam-files/boost-build/tools/types/asm.jam deleted file mode 100644 index a340db36..00000000 --- a/jam-files/boost-build/tools/types/asm.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright Craig Rodrigues 2005. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type ASM : s S asm ; diff --git a/jam-files/boost-build/tools/types/asm.py b/jam-files/boost-build/tools/types/asm.py deleted file mode 100644 index b4e1c30e..00000000 --- a/jam-files/boost-build/tools/types/asm.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright Craig Rodrigues 2005. -# Copyright (c) 2008 Steven Watanabe -# -# Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register(): - type.register_type('ASM', ['s', 'S', 'asm']) - -register() diff --git a/jam-files/boost-build/tools/types/cpp.jam b/jam-files/boost-build/tools/types/cpp.jam deleted file mode 100644 index 3159cdd7..00000000 --- a/jam-files/boost-build/tools/types/cpp.jam +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright David Abrahams 2004. -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Copyright 2010 Rene Rivera -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) -import type ; -import scanner ; - -class c-scanner : scanner -{ - import path ; - import regex ; - import scanner ; - import sequence ; - import virtual-target ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - for local i in $(includes) - { - self.includes += [ sequence.transform path.native - : [ regex.split $(i:G=) "&&" ] ] ; - } - } - - rule pattern ( ) - { - return "#[ \t]*include[ ]*(<(.*)>|\"(.*)\")" ; - } - - rule process ( target : matches * : binding ) - { - local angle = [ regex.transform $(matches) : "<(.*)>" ] ; - angle = [ sequence.transform path.native : $(angle) ] ; - local quoted = [ regex.transform $(matches) : "\"(.*)\"" ] ; - quoted = [ sequence.transform path.native : $(quoted) ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. When a target is - # directly created from virtual target this extra information is - # unnecessary. But in other cases, it allows us to distinguish between - # two headers of the same name included from different places. We do not - # need this extra information for angle includes, since they should not - # depend on including file (we can not get literal "." in include path). - local g2 = $(g)"#"$(b) ; - - angle = $(angle:G=$(g)) ; - quoted = $(quoted:G=$(g2)) ; - - local all = $(angle) $(quoted) ; - - INCLUDES $(target) : $(all) ; - NOCARE $(all) ; - SEARCH on $(angle) = $(self.includes:G=) ; - SEARCH on $(quoted) = $(b) $(self.includes:G=) ; - - # Just propagate the current scanner to includes in hope that includes - # do not change scanners. - scanner.propagate $(__name__) : $(angle) $(quoted) : $(target) ; - - ISFILE $(angle) $(quoted) ; - } -} - -scanner.register c-scanner : include ; - -type.register CPP : cpp cxx cc ; -type.register H : h ; -type.register HPP : hpp : H ; -type.register C : c ; - -# It most cases where a CPP file or a H file is a source of some action, we -# should rebuild the result if any of files included by CPP/H are changed. One -# case when this is not needed is installation, which is handled specifically. -type.set-scanner CPP : c-scanner ; -type.set-scanner C : c-scanner ; -# One case where scanning of H/HPP files is necessary is PCH generation -- if -# any header included by HPP being precompiled changes, we need to recompile the -# header. -type.set-scanner H : c-scanner ; -type.set-scanner HPP : c-scanner ; diff --git a/jam-files/boost-build/tools/types/cpp.py b/jam-files/boost-build/tools/types/cpp.py deleted file mode 100644 index 7b56111c..00000000 --- a/jam-files/boost-build/tools/types/cpp.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('CPP', ['cpp', 'cxx', 'cc']) - -register () diff --git a/jam-files/boost-build/tools/types/exe.jam b/jam-files/boost-build/tools/types/exe.jam deleted file mode 100644 index 47109513..00000000 --- a/jam-files/boost-build/tools/types/exe.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register EXE ; -type.set-generated-target-suffix EXE : windows : "exe" ; -type.set-generated-target-suffix EXE : cygwin : "exe" ; diff --git a/jam-files/boost-build/tools/types/exe.py b/jam-files/boost-build/tools/types/exe.py deleted file mode 100644 index a4935e24..00000000 --- a/jam-files/boost-build/tools/types/exe.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('EXE', ['exe'], None, ['NT', 'CYGWIN']) - type.register_type ('EXE', [], None, []) - -register () diff --git a/jam-files/boost-build/tools/types/html.jam b/jam-files/boost-build/tools/types/html.jam deleted file mode 100644 index 5cd337d0..00000000 --- a/jam-files/boost-build/tools/types/html.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type HTML : html ; diff --git a/jam-files/boost-build/tools/types/html.py b/jam-files/boost-build/tools/types/html.py deleted file mode 100644 index 63af4d90..00000000 --- a/jam-files/boost-build/tools/types/html.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('HTML', ['html']) - -register () diff --git a/jam-files/boost-build/tools/types/lib.jam b/jam-files/boost-build/tools/types/lib.jam deleted file mode 100644 index 854ab8fd..00000000 --- a/jam-files/boost-build/tools/types/lib.jam +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; # for set-generated-target-suffix -import os ; - -# The following naming scheme is used for libraries. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (msvc) -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (mingw): -# libxxx.a static library -# libxxx.dll DLL -# libxxx.dll.a import library -# -# On cygwin i.e. cygwin -# libxxx.a static library -# cygxxx.dll DLL -# libxxx.dll.a import library -# - -type.register LIB ; - -# FIXME: should not register both extensions on both platforms. -type.register STATIC_LIB : a lib : LIB ; - -# The 'lib' prefix is used everywhere -type.set-generated-target-prefix STATIC_LIB : : lib ; - -# Use '.lib' suffix for windows -type.set-generated-target-suffix STATIC_LIB : windows : lib ; - -# Except with gcc. -type.set-generated-target-suffix STATIC_LIB : gcc windows : a ; - -# Use xxx.lib for import libs -type IMPORT_LIB : : STATIC_LIB ; -type.set-generated-target-prefix IMPORT_LIB : : "" ; -type.set-generated-target-suffix IMPORT_LIB : : lib ; - -# Except with gcc (mingw or cygwin), where use libxxx.dll.a -type.set-generated-target-prefix IMPORT_LIB : gcc : lib ; -type.set-generated-target-suffix IMPORT_LIB : gcc : dll.a ; - -type.register SHARED_LIB : so dll dylib : LIB ; - -# Both mingw and cygwin use libxxx.dll naming scheme. -# On Linux, use "lib" prefix -type.set-generated-target-prefix SHARED_LIB : : lib ; -# But don't use it on windows -type.set-generated-target-prefix SHARED_LIB : windows : "" ; -# But use it again on mingw -type.set-generated-target-prefix SHARED_LIB : gcc windows : lib ; -# And use 'cyg' on cygwin -type.set-generated-target-prefix SHARED_LIB : cygwin : cyg ; - - -type.set-generated-target-suffix SHARED_LIB : windows : dll ; -type.set-generated-target-suffix SHARED_LIB : cygwin : dll ; -type.set-generated-target-suffix SHARED_LIB : darwin : dylib ; - -type SEARCHED_LIB : : LIB ; -# This is needed so that when we create a target of SEARCHED_LIB -# type, there's no prefix or suffix automatically added. -type.set-generated-target-prefix SEARCHED_LIB : : "" ; -type.set-generated-target-suffix SEARCHED_LIB : : "" ; diff --git a/jam-files/boost-build/tools/types/lib.py b/jam-files/boost-build/tools/types/lib.py deleted file mode 100644 index d0ec1fb5..00000000 --- a/jam-files/boost-build/tools/types/lib.py +++ /dev/null @@ -1,77 +0,0 @@ -# Status: ported -# Base revision: 64456. -# Copyright David Abrahams 2004. -# Copyright Vladimir Prus 2010. -# Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import b2.build.type as type - -# The following naming scheme is used for libraries. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (msvc) -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (mingw): -# libxxx.a static library -# libxxx.dll DLL -# libxxx.dll.a import library -# -# On cygwin i.e. cygwin -# libxxx.a static library -# cygxxx.dll DLL -# libxxx.dll.a import library -# - -type.register('LIB') - -# FIXME: should not register both extensions on both platforms. -type.register('STATIC_LIB', ['a', 'lib'], 'LIB') - -# The 'lib' prefix is used everywhere -type.set_generated_target_prefix('STATIC_LIB', [], 'lib') - -# Use '.lib' suffix for windows -type.set_generated_target_suffix('STATIC_LIB', ['windows'], 'lib') - -# Except with gcc. -type.set_generated_target_suffix('STATIC_LIB', ['gcc', 'windows'], 'a') - -# Use xxx.lib for import libs -type.register('IMPORT_LIB', [], 'STATIC_LIB') -type.set_generated_target_prefix('IMPORT_LIB', [], '') -type.set_generated_target_suffix('IMPORT_LIB', [], 'lib') - -# Except with gcc (mingw or cygwin), where use libxxx.dll.a -type.set_generated_target_prefix('IMPORT_LIB', ['gcc'], 'lib') -type.set_generated_target_suffix('IMPORT_LIB', ['gcc'], 'dll.a') - -type.register('SHARED_LIB', ['so', 'dll', 'dylib'], 'LIB') - -# Both mingw and cygwin use libxxx.dll naming scheme. -# On Linux, use "lib" prefix -type.set_generated_target_prefix('SHARED_LIB', [], 'lib') -# But don't use it on windows -type.set_generated_target_prefix('SHARED_LIB', ['windows'], '') -# But use it again on mingw -type.set_generated_target_prefix('SHARED_LIB', ['gcc', 'windows'], 'lib') -# And use 'cyg' on cygwin -type.set_generated_target_prefix('SHARED_LIB', ['cygwin'], 'cyg') - - -type.set_generated_target_suffix('SHARED_LIB', ['windows'], 'dll') -type.set_generated_target_suffix('SHARED_LIB', ['cygwin'], 'dll') -type.set_generated_target_suffix('SHARED_LIB', ['darwin'], 'dylib') - -type.register('SEARCHED_LIB', [], 'LIB') -# This is needed so that when we create a target of SEARCHED_LIB -# type, there's no prefix or suffix automatically added. -type.set_generated_target_prefix('SEARCHED_LIB', [], '') -type.set_generated_target_suffix('SEARCHED_LIB', [], '') diff --git a/jam-files/boost-build/tools/types/obj.jam b/jam-files/boost-build/tools/types/obj.jam deleted file mode 100644 index 6afbcaa6..00000000 --- a/jam-files/boost-build/tools/types/obj.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register OBJ : o obj ; -type.set-generated-target-suffix OBJ : windows : obj ; -type.set-generated-target-suffix OBJ : cygwin : obj ; diff --git a/jam-files/boost-build/tools/types/obj.py b/jam-files/boost-build/tools/types/obj.py deleted file mode 100644 index e61e99a8..00000000 --- a/jam-files/boost-build/tools/types/obj.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('OBJ', ['obj'], None, ['NT', 'CYGWIN']) - type.register_type ('OBJ', ['o']) - -register () diff --git a/jam-files/boost-build/tools/types/objc.jam b/jam-files/boost-build/tools/types/objc.jam deleted file mode 100644 index 709cbd0c..00000000 --- a/jam-files/boost-build/tools/types/objc.jam +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright Rene Rivera 2008, 2010. -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -import type ; -import scanner ; -import types/cpp ; - -class objc-scanner : c-scanner -{ - rule __init__ ( includes * ) - { - c-scanner.__init__ $(includes) ; - } - - rule pattern ( ) - { - return "#[ \t]*include|import[ ]*(<(.*)>|\"(.*)\")" ; - } -} - -scanner.register objc-scanner : include ; - -type.register OBJECTIVE_C : m ; -type.register OBJECTIVE_CPP : mm ; -type.set-scanner OBJECTIVE_C : objc-scanner ; -type.set-scanner OBJECTIVE_CPP : objc-scanner ; diff --git a/jam-files/boost-build/tools/types/preprocessed.jam b/jam-files/boost-build/tools/types/preprocessed.jam deleted file mode 100644 index c9187ba6..00000000 --- a/jam-files/boost-build/tools/types/preprocessed.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Steven Watanabe 2011 -# Distributed under the Boost Software License Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register PREPROCESSED_C : i : C ; -type.register PREPROCESSED_CPP : ii : CPP ; diff --git a/jam-files/boost-build/tools/types/qt.jam b/jam-files/boost-build/tools/types/qt.jam deleted file mode 100644 index 6d1dfbd4..00000000 --- a/jam-files/boost-build/tools/types/qt.jam +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vladimir Prus 2005. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -type UI : ui ; -type QRC : qrc ; -type MOCCABLE_CPP ; -type MOCCABLE_H ; -# Result of running moc. -type MOC : moc : H ; diff --git a/jam-files/boost-build/tools/types/register.jam b/jam-files/boost-build/tools/types/register.jam deleted file mode 100644 index 203992ca..00000000 --- a/jam-files/boost-build/tools/types/register.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -# This module's job is to automatically import all the type -# registration modules in its directory. -import type os path modules ; - -# Register the given type on the specified OSes, or on remaining OSes -# if os is not specified. This rule is injected into each of the type -# modules for the sake of convenience. -local rule type ( type : suffixes * : base-type ? : os * ) -{ - if ! [ type.registered $(type) ] - { - if ( ! $(os) ) || [ os.name ] in $(os) - { - type.register $(type) : $(suffixes) : $(base-type) ; - } - } -} - -.this-module's-file = [ modules.binding $(__name__) ] ; -.this-module's-dir = [ path.parent $(.this-module's-file) ] ; -.sibling-jamfiles = [ path.glob $(.this-module's-dir) : *.jam ] ; -.sibling-modules = [ MATCH ^(.*)\.jam$ : $(.sibling-jamfiles) ] ; - -# A loop over all modules in this directory -for m in $(.sibling-modules) -{ - m = [ path.basename $(m) ] ; - m = types/$(m) ; - - # Inject the type rule into the new module - IMPORT $(__name__) : type : $(m) : type ; - import $(m) ; -} - - diff --git a/jam-files/boost-build/tools/types/rsp.jam b/jam-files/boost-build/tools/types/rsp.jam deleted file mode 100644 index bdf8a7c9..00000000 --- a/jam-files/boost-build/tools/types/rsp.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type RSP : rsp ; diff --git a/jam-files/boost-build/tools/types/rsp.py b/jam-files/boost-build/tools/types/rsp.py deleted file mode 100644 index ccb379e9..00000000 --- a/jam-files/boost-build/tools/types/rsp.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('RSP', ['rsp']) - -register () diff --git a/jam-files/boost-build/tools/unix.jam b/jam-files/boost-build/tools/unix.jam deleted file mode 100644 index 75949851..00000000 --- a/jam-files/boost-build/tools/unix.jam +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2004 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# This file implements linking semantic common to all unixes. On unix, static -# libraries must be specified in a fixed order on the linker command line. Generators -# declared there store information about the order and use it property. - -import feature ; -import "class" : new ; -import generators ; -import type ; -import set ; -import order ; -import builtin ; - -class unix-linking-generator : linking-generator -{ - import property-set ; - import type ; - import unix ; - - rule __init__ ( id - composing ? : # Specify if generator is composing. The generator will be - # composing if non-empty string is passed, or parameter is - # not given. To make generator non-composing, pass empty - # string ("") - source-types + : target-types + : - requirements * ) - { - composing ?= true ; - generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) : - $(requirements) ; - } - - rule run ( project name ? : property-set : sources + ) - { - local result = [ linking-generator.run $(project) $(name) : $(property-set) - : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - } - - rule generated-targets ( sources + : property-set : project name ? ) - { - local sources2 ; - local libraries ; - for local l in $(sources) - { - if [ type.is-derived [ $(l).type ] LIB ] - { - libraries += $(l) ; - } - else - { - sources2 += $(l) ; - } - } - - sources = $(sources2) [ unix.order-libraries $(libraries) ] ; - - return [ linking-generator.generated-targets $(sources) : $(property-set) - : $(project) $(name) ] ; - } - -} - -class unix-archive-generator : archive-generator -{ - import unix ; - - rule __init__ ( id composing ? : source-types + : target-types + : - requirements * ) - { - composing ?= true ; - archive-generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) : - $(requirements) ; - } - - rule run ( project name ? : property-set : sources + ) - { - local result = [ archive-generator.run $(project) $(name) : $(property-set) - : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - - } -} - -class unix-searched-lib-generator : searched-lib-generator -{ - import unix ; - rule __init__ ( * : * ) - { - generator.__init__ - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule optional-properties ( ) - { - return $(self.requirements) ; - } - - rule run ( project name ? : property-set : sources * ) - { - local result = [ searched-lib-generator.run $(project) $(name) - : $(property-set) : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - } -} - -class unix-prebuilt-lib-generator : generator -{ - import unix ; - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - local f = [ $(property-set).get ] ; - unix.set-library-order-aux $(f) : $(sources) ; - return $(f) $(sources) ; - } -} - -generators.register - [ new unix-prebuilt-lib-generator unix.prebuilt : : LIB - : unix ] ; - -generators.override unix.prebuilt : builtin.lib-generator ; - - -# Declare generators -generators.register [ new unix-linking-generator unix.link : LIB OBJ : EXE - : unix ] ; - -generators.register [ new unix-archive-generator unix.archive : OBJ : STATIC_LIB - : unix ] ; - -generators.register [ new unix-linking-generator unix.link.dll : LIB OBJ : SHARED_LIB - : unix ] ; - -generators.register [ new unix-searched-lib-generator - unix.searched-lib-generator : : SEARCHED_LIB : unix ] ; - - -# The derived toolset must specify their own actions. -actions link { -} - -actions link.dll { -} - -actions archive { -} - -actions searched-lib-generator { -} - -actions prebuilt { -} - - - - - -.order = [ new order ] ; - -rule set-library-order-aux ( from * : to * ) -{ - for local f in $(from) - { - for local t in $(to) - { - if $(f) != $(t) - { - $(.order).add-pair $(f) $(t) ; - } - } - } -} - -rule set-library-order ( sources * : property-set : result * ) -{ - local used-libraries ; - local deps = [ $(property-set).dependency ] ; - for local l in $(sources) $(deps:G=) - { - if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ] - { - used-libraries += $(l) ; - } - } - - local created-libraries ; - for local l in $(result) - { - if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ] - { - created-libraries += $(l) ; - } - } - - created-libraries = [ set.difference $(created-libraries) : $(used-libraries) ] ; - set-library-order-aux $(created-libraries) : $(used-libraries) ; -} - -rule order-libraries ( libraries * ) -{ - local r = [ $(.order).order $(libraries) ] ; - return $(r) ; -} - \ No newline at end of file diff --git a/jam-files/boost-build/tools/unix.py b/jam-files/boost-build/tools/unix.py deleted file mode 100644 index d409c2e4..00000000 --- a/jam-files/boost-build/tools/unix.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) 2004 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -""" This file implements linking semantics common to all unixes. On unix, static - libraries must be specified in a fixed order on the linker command line. Generators - declared there store information about the order and use it properly. -""" - -import builtin -from b2.build import generators, type -from b2.util.utility import * -from b2.util import set, sequence - -class UnixLinkingGenerator (builtin.LinkingGenerator): - - def __init__ (self, id, composing, source_types, target_types, requirements): - builtin.LinkingGenerator.__init__ (self, id, composing, source_types, target_types, requirements) - - def run (self, project, name, prop_set, sources): - result = builtin.LinkingGenerator.run (self, project, name, prop_set, sources) - if result: - set_library_order (project.manager (), sources, prop_set, result [1]) - - return result - - def generated_targets (self, sources, prop_set, project, name): - sources2 = [] - libraries = [] - for l in sources: - if type.is_derived (l.type (), 'LIB'): - libraries.append (l) - - else: - sources2.append (l) - - sources = sources2 + order_libraries (libraries) - - return builtin.LinkingGenerator.generated_targets (self, sources, prop_set, project, name) - - -class UnixArchiveGenerator (builtin.ArchiveGenerator): - def __init__ (self, id, composing, source_types, target_types_and_names, requirements): - builtin.ArchiveGenerator.__init__ (self, id, composing, source_types, target_types_and_names, requirements) - - def run (self, project, name, prop_set, sources): - result = builtin.ArchiveGenerator.run(self, project, name, prop_set, sources) - set_library_order(project.manager(), sources, prop_set, result) - return result - -class UnixSearchedLibGenerator (builtin.SearchedLibGenerator): - - def __init__ (self): - builtin.SearchedLibGenerator.__init__ (self) - - def optional_properties (self): - return self.requirements () - - def run (self, project, name, prop_set, sources, multiple): - result = SearchedLibGenerator.run (project, name, prop_set, sources, multiple) - - set_library_order (sources, prop_set, result) - - return result - -class UnixPrebuiltLibGenerator (generators.Generator): - def __init__ (self, id, composing, source_types, target_types_and_names, requirements): - generators.Generator.__init__ (self, id, composing, source_types, target_types_and_names, requirements) - - def run (self, project, name, prop_set, sources, multiple): - f = prop_set.get ('') - set_library_order_aux (f, sources) - return (f, sources) - -### # The derived toolset must specify their own rules and actions. -# FIXME: restore? -# action.register ('unix.prebuilt', None, None) - - -generators.register (UnixPrebuiltLibGenerator ('unix.prebuilt', False, [], ['LIB'], ['', 'unix'])) - - - - - -### # Declare generators -### generators.register [ new UnixLinkingGenerator unix.link : LIB OBJ : EXE -### : unix ] ; -generators.register (UnixArchiveGenerator ('unix.archive', True, ['OBJ'], ['STATIC_LIB'], ['unix'])) - -### generators.register [ new UnixLinkingGenerator unix.link.dll : LIB OBJ : SHARED_LIB -### : unix ] ; -### -### generators.register [ new UnixSearchedLibGenerator -### unix.SearchedLibGenerator : : SEARCHED_LIB : unix ] ; -### -### -### # The derived toolset must specify their own actions. -### actions link { -### } -### -### actions link.dll { -### } - -def unix_archive (manager, targets, sources, properties): - pass - -# FIXME: restore? -#action.register ('unix.archive', unix_archive, ['']) - -### actions searched-lib-generator { -### } -### -### actions prebuilt { -### } - - -from b2.util.order import Order -__order = Order () - -def set_library_order_aux (from_libs, to_libs): - for f in from_libs: - for t in to_libs: - if f != t: - __order.add_pair (f, t) - -def set_library_order (manager, sources, prop_set, result): - used_libraries = [] - deps = prop_set.dependency () - - sources.extend(d.value() for d in deps) - sources = sequence.unique(sources) - - for l in sources: - if l.type () and type.is_derived (l.type (), 'LIB'): - used_libraries.append (l) - - created_libraries = [] - for l in result: - if l.type () and type.is_derived (l.type (), 'LIB'): - created_libraries.append (l) - - created_libraries = set.difference (created_libraries, used_libraries) - set_library_order_aux (created_libraries, used_libraries) - -def order_libraries (libraries): - return __order.order (libraries) - diff --git a/jam-files/boost-build/tools/vacpp.jam b/jam-files/boost-build/tools/vacpp.jam deleted file mode 100644 index f4080fc0..00000000 --- a/jam-files/boost-build/tools/vacpp.jam +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Copyright Toon Knapen 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# -# Boost.Build V2 toolset for the IBM XL C++ compiler -# - -import toolset : flags ; -import feature ; -import common ; -import generators ; -import os ; - -feature.extend toolset : vacpp ; -toolset.inherit vacpp : unix ; -generators.override vacpp.prebuilt : builtin.prebuilt ; -generators.override vacpp.searched-lib-generator : searched-lib-generator ; - -# Configure the vacpp toolset -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters vacpp : version $(version) ] ; - - command = [ common.get-invocation-command vacpp : xlC - : $(command) : "/usr/vacpp/bin/xlC" ] ; - - common.handle-options vacpp : $(condition) : $(command) : $(options) ; -} - -# Declare generators -generators.register-c-compiler vacpp.compile.c : C : OBJ : vacpp ; -generators.register-c-compiler vacpp.compile.c++ : CPP : OBJ : vacpp ; - -# Allow C++ style comments in C files -flags vacpp CFLAGS : -qcpluscmt ; - -# Declare flags -flags vacpp CFLAGS off : -qNOOPTimize ; -flags vacpp CFLAGS speed : -O3 -qstrict ; -flags vacpp CFLAGS space : -O2 -qcompact ; - -# Discretionary inlining (not recommended) -flags vacpp CFLAGS off : -qnoinline ; -flags vacpp CFLAGS on : -qinline ; -#flags vacpp CFLAGS full : -qinline ; -flags vacpp CFLAGS full : ; - -# Exception handling -flags vacpp C++FLAGS off : -qnoeh ; -flags vacpp C++FLAGS on : -qeh ; - -# Run-time Type Identification -flags vacpp C++FLAGS off : -qnortti ; -flags vacpp C++FLAGS on : -qrtti ; - -# Enable 64-bit memory addressing model -flags vacpp CFLAGS 64 : -q64 ; -flags vacpp LINKFLAGS 64 : -q64 ; -flags vacpp ARFLAGS aix/64 : -X 64 ; - -# Use absolute path when generating debug information -flags vacpp CFLAGS on : -g -qfullpath ; -flags vacpp LINKFLAGS on : -g -qfullpath ; -flags vacpp LINKFLAGS off : -s ; - -if [ os.name ] = AIX -{ - flags vacpp.compile C++FLAGS : -qfuncsect ; - - # The -bnoipath strips the prepending (relative) path of libraries from - # the loader section in the target library or executable. Hence, during - # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded - # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without - # this option, the prepending (relative) path + library name is - # hard-coded in the loader section, causing *only* this path to be - # searched during load-time. Note that the AIX linker does not have an - # -soname equivalent, this is as close as it gets. - # - # The above options are definately for AIX 5.x, and most likely also for - # AIX 4.x and AIX 6.x. For details about the AIX linker see: - # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf - # - flags vacpp.link LINKFLAGS shared : -bnoipath ; - - # Run-time linking - flags vacpp.link EXE-LINKFLAGS shared : -brtl ; -} -else -{ - # Linux PPC - flags vacpp.compile CFLAGS shared : -qpic=large ; - flags vacpp FINDLIBS : rt ; -} - -# Profiling -flags vacpp CFLAGS on : -pg ; -flags vacpp LINKFLAGS on : -pg ; - -flags vacpp.compile OPTIONS ; -flags vacpp.compile.c++ OPTIONS ; -flags vacpp DEFINES ; -flags vacpp UNDEFS ; -flags vacpp HDRS ; -flags vacpp STDHDRS ; -flags vacpp.link OPTIONS ; -flags vacpp ARFLAGS ; - -flags vacpp LIBPATH ; -flags vacpp NEEDLIBS ; -flags vacpp FINDLIBS ; -flags vacpp FINDLIBS ; - -# Select the compiler name according to the threading model. -flags vacpp VA_C_COMPILER single : xlc ; -flags vacpp VA_C_COMPILER multi : xlc_r ; -flags vacpp VA_CXX_COMPILER single : xlC ; -flags vacpp VA_CXX_COMPILER multi : xlC_r ; - -SPACE = " " ; - -flags vacpp.link.dll HAVE_SONAME linux : "" ; - -actions vacpp.link bind NEEDLIBS -{ - $(VA_CXX_COMPILER) $(EXE-LINKFLAGS) $(LINKFLAGS) -o "$(<[1])" -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS) -} - -actions vacpp.link.dll bind NEEDLIBS -{ - xlC_r -G $(LINKFLAGS) -o "$(<[1])" $(HAVE_SONAME)-Wl,-soname$(SPACE)-Wl,$(<[-1]:D=) -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS) -} - -actions vacpp.compile.c -{ - $(VA_C_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" -} - -actions vacpp.compile.c++ -{ - $(VA_CXX_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" -} - -actions updated together piecemeal vacpp.archive -{ - ar $(ARFLAGS) ru "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/whale.jam b/jam-files/boost-build/tools/whale.jam deleted file mode 100644 index 9335ff0c..00000000 --- a/jam-files/boost-build/tools/whale.jam +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) Vladimir Prus 2002-2005. - -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# This module implements support for Whale/Dolphin/WD parser/lexer tools. -# See http://www.cs.queensu.ca/home/okhotin/whale/ for details. -# -# There are three interesting target types: -# - WHL (the parser sources), that are converted to CPP and H -# - DLP (the lexer sources), that are converted to CPP and H -# - WD (combined parser/lexer sources), that are converted to WHL + DLP - -import type ; -import generators ; -import path ; -import "class" : new ; -import errors ; - -rule init ( path # path the Whale/Dolphin/WD binaries - ) -{ - if $(.configured) && $(.path) != $(path) - { - errors.user-error "Attempt to reconfigure Whale support" : - "Previously configured with path \"$(.path:E=)\"" : - "Now configuring with path \"$(path:E=)\"" ; - - } - .configured = true ; - .path = $(path) ; - - .whale = [ path.join $(path) whale ] ; - .dolphin = [ path.join $(path) dolphin ] ; - .wd = [ path.join $(path) wd ] ; -} - - -# Declare the types. -type.register WHL : whl ; -type.register DLP : dlp ; -type.register WHL_LR0 : lr0 ; -type.register WD : wd ; - -# Declare standard generators. -generators.register-standard whale.whale : WHL : CPP H H(%_symbols) ; -generators.register-standard whale.dolphin : DLP : CPP H ; -generators.register-standard whale.wd : WD : WHL(%_parser) DLP(%_lexer) ; - -# The conversions defines above a ambiguious when we generated CPP from WD. -# We can either go via WHL type, or via DLP type. -# The following custom generator handles this by running both conversions. - -class wd-to-cpp : generator -{ - rule __init__ ( * : * : * ) - { - generator.__init__ $(1) : $(2) : $(3) ; - } - - rule run ( project name ? : property-set : source * ) - { - if ! $(source[2]) - { - local new-sources ; - if ! [ $(source).type ] in WHL DLP - { - local r1 = [ generators.construct $(project) $(name) - : WHL : $(property-set) : $(source) ] ; - local r2 = [ generators.construct $(project) $(name) - : DLP : $(property-set) : $(source) ] ; - - new-sources = [ sequence.unique $(r1[2-]) $(r2[2-]) ] ; - } - else - { - new-sources = $(source) ; - } - - local result ; - for local i in $(new-sources) - { - local t = [ generators.construct $(project) $(name) : CPP - : $(property-set) : $(i) ] ; - result += $(t[2-]) ; - } - return $(result) ; - } - } - -} - - -generators.override whale.wd-to-cpp : whale.whale ; -generators.override whale.wd-to-cpp : whale.dolphin ; - - -generators.register [ new wd-to-cpp whale.wd-to-cpp : : CPP ] ; - - -actions whale -{ - $(.whale) -d $(<[1]:D) $(>) -} - -actions dolphin -{ - $(.dolphin) -d $(<[1]:D) $(>) -} - -actions wd -{ - $(.wd) -d $(<[1]:D) -g $(>) -} - diff --git a/jam-files/boost-build/tools/xlf.jam b/jam-files/boost-build/tools/xlf.jam deleted file mode 100644 index e7fcc608..00000000 --- a/jam-files/boost-build/tools/xlf.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# -# toolset configuration for the IBM Fortran compiler (xlf) -# - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags xlf OPTIONS off : -O0 ; -flags xlf OPTIONS speed : -O3 ; -flags xlf OPTIONS space : -Os ; - -flags xlf OPTIONS on : -g ; -flags xlf OPTIONS on : -pg ; - -flags xlf DEFINES ; -flags xlf INCLUDES ; - -rule compile-fortran -{ -} - -actions compile-fortran -{ - xlf $(OPTIONS) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler xlf.compile-fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/xsltproc-config.jam b/jam-files/boost-build/tools/xsltproc-config.jam deleted file mode 100644 index de54a2eb..00000000 --- a/jam-files/boost-build/tools/xsltproc-config.jam +++ /dev/null @@ -1,37 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Python tools and librries. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - local xsltproc-path = [ GLOB [ modules.peek : PATH ] "C:\\Boost\\bin" : xsltproc\.exe ] ; - xsltproc-path = $(xsltproc-path[1]) ; - - if $(xsltproc-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using xsltproc ":" $(xsltproc-path) ; - } - using xsltproc : $(xsltproc-path) ; - } -} -else -{ - local xsltproc-path = [ GLOB [ modules.peek : PATH ] : xsltproc ] ; - xsltproc-path = $(xsltproc-path[1]) ; - - if $(xsltproc-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using xsltproc ":" $(xsltproc-path) ; - } - using xsltproc : $(xsltproc-path) ; - } -} diff --git a/jam-files/boost-build/tools/xsltproc.jam b/jam-files/boost-build/tools/xsltproc.jam deleted file mode 100644 index 96f5170b..00000000 --- a/jam-files/boost-build/tools/xsltproc.jam +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (C) 2003 Doug Gregor. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. - -# This module defines rules to apply an XSLT stylesheet to an XML file using the -# xsltproc driver, part of libxslt. -# -# Note: except for 'init', this modules does not provide any rules for end -# users. - -import feature ; -import regex ; -import sequence ; -import common ; -import os ; -import modules ; -import path ; -import errors ; - -feature.feature xsl:param : : free ; -feature.feature xsl:path : : free ; -feature.feature catalog : : free ; - - -# Initialize xsltproc support. The parameters are: -# xsltproc: The xsltproc executable -# -rule init ( xsltproc ? ) -{ - if $(xsltproc) - { - modify-config ; - .xsltproc = $(xsltproc) ; - check-xsltproc ; - } -} - -rule freeze-config ( ) -{ - if ! $(.config-frozen) - { - .config-frozen = true ; - .xsltproc ?= [ modules.peek : XSLTPROC ] ; - .xsltproc ?= xsltproc ; - check-xsltproc ; - .is-cygwin = [ .is-cygwin $(.xsltproc) ] ; - } -} - -rule modify-config -{ - if $(.config-frozen) - { - errors.user-error "xsltproc: Cannot change xsltproc command after it has been used." ; - } -} - -rule check-xsltproc ( ) -{ - if $(.xsltproc) - { - local status = [ SHELL "\"$(.xsltproc)\" -V" : no-output : exit-status ] ; - if $(status[2]) != "0" - { - errors.user-error "xsltproc: Could not run \"$(.xsltproc)\" -V." ; - } - } -} - -# Returns a non-empty string if a cygwin xsltproc binary was specified. -rule is-cygwin ( ) -{ - freeze-config ; - return $(.is-cygwin) ; -} - -rule .is-cygwin ( xsltproc ) -{ - if [ os.on-windows ] - { - local file = [ path.make [ modules.binding $(__name__) ] ] ; - local dir = [ path.native - [ path.join [ path.parent $(file) ] xsltproc ] ] ; - if [ os.name ] = CYGWIN - { - dir = $(dir:W) ; - } - local command = - "\"$(xsltproc)\" \"$(dir)\\test.xsl\" \"$(dir)\\test.xml\" 2>&1" ; - local status = [ SHELL $(command) : no-output : exit-status ] ; - if $(status[2]) != "0" - { - return true ; - } - } -} - -rule compute-xslt-flags ( target : properties * ) -{ - local flags ; - - # Raw flags. - flags += [ feature.get-values : $(properties) ] ; - - # Translate into command line flags. - for local param in [ feature.get-values : $(properties) ] - { - local namevalue = [ regex.split $(param) "=" ] ; - flags += --stringparam $(namevalue[1]) \"$(namevalue[2])\" ; - } - - # Translate . - for local path in [ feature.get-values : $(properties) ] - { - flags += --path \"$(path:G=)\" ; - } - - # Take care of implicit dependencies. - local other-deps ; - for local dep in [ feature.get-values : $(properties) ] - { - other-deps += [ $(dep:G=).creating-subvariant ] ; - } - - local implicit-target-directories ; - for local dep in [ sequence.unique $(other-deps) ] - { - implicit-target-directories += [ $(dep).all-target-directories ] ; - } - - for local dir in $(implicit-target-directories) - { - flags += --path \"$(dir:T)\" ; - } - - return $(flags) ; -} - - -local rule .xsltproc ( target : source stylesheet : properties * : dirname ? : action ) -{ - freeze-config ; - STYLESHEET on $(target) = $(stylesheet) ; - FLAGS on $(target) += [ compute-xslt-flags $(target) : $(properties) ] ; - NAME on $(target) = $(.xsltproc) ; - - for local catalog in [ feature.get-values : $(properties) ] - { - CATALOG = [ common.variable-setting-command XML_CATALOG_FILES : $(catalog:T) ] ; - } - - if [ os.on-windows ] && ! [ is-cygwin ] - { - action = $(action).windows ; - } - - $(action) $(target) : $(source) ; -} - - -rule xslt ( target : source stylesheet : properties * ) -{ - return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) : : xslt-xsltproc ] ; -} - - -rule xslt-dir ( target : source stylesheet : properties * : dirname ) -{ - return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) : $(dirname) : xslt-xsltproc-dir ] ; -} - -actions xslt-xsltproc.windows -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:W)" "$(>:W)" -} - - -actions xslt-xsltproc bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:T)" "$(>:T)" -} - - -actions xslt-xsltproc-dir.windows bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:W)" "$(>:W)" -} - - -actions xslt-xsltproc-dir bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:T)" "$(>:T)" -} diff --git a/jam-files/boost-build/tools/xsltproc/included.xsl b/jam-files/boost-build/tools/xsltproc/included.xsl deleted file mode 100644 index ef86394a..00000000 --- a/jam-files/boost-build/tools/xsltproc/included.xsl +++ /dev/null @@ -1,11 +0,0 @@ - - - - diff --git a/jam-files/boost-build/tools/xsltproc/test.xml b/jam-files/boost-build/tools/xsltproc/test.xml deleted file mode 100644 index 57c8ba18..00000000 --- a/jam-files/boost-build/tools/xsltproc/test.xml +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/jam-files/boost-build/tools/xsltproc/test.xsl b/jam-files/boost-build/tools/xsltproc/test.xsl deleted file mode 100644 index a142c91d..00000000 --- a/jam-files/boost-build/tools/xsltproc/test.xsl +++ /dev/null @@ -1,12 +0,0 @@ - - - - - diff --git a/jam-files/boost-build/tools/zlib.jam b/jam-files/boost-build/tools/zlib.jam deleted file mode 100644 index f9138fd5..00000000 --- a/jam-files/boost-build/tools/zlib.jam +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2010 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Supports the zlib library -# -# After 'using zlib', the following targets are available: -# -# /zlib//zlib -- The zlib library - - -# In addition to direct purpose of supporting zlib, this module also -# serves as canonical example of how third-party condiguration works -# in Boost.Build. The operation is as follows -# -# - For each 'using zlib : condition ... : ...' we create a target alternative -# for zlib, with the specified condition. -# - There's one target alternative for 'zlib' with no specific condition -# properties. -# -# Two invocations of 'using zlib' with the same condition but different -# properties are not permitted, e.g.: -# -# using zlib : condition windows : include foo ; -# using zlib : condition windows : include bar ; -# -# is in error. One exception is for empty condition, 'using' without any -# parameters is overridable. That is: -# -# using zlib ; -# using zlib : include foo ; -# -# Is OK then the first 'using' is ignored. Likewise if the order of the statements -# is reversed. -# -# When 'zlib' target is built, a target alternative is selected as usual for -# Boost.Build. The selected alternative is a custom target class, which: -# -# - calls ac.find-include-path to find header path. If explicit path is provided -# in 'using', only that path is checked, and if no header is found there, error -# is emitted. Otherwise, we check a directory specified using ZLIB_INCLUDE -# environment variable, and failing that, in standard directories. -# [TODO: document sysroot handling] -# - calls ac.find-library to find the library, in an identical fashion. -# - -import project ; -import ac ; -import errors ; -import "class" : new ; -import targets ; - -project.initialize $(__name__) ; -project = [ project.current ] ; -project zlib ; - -header = zlib.h ; -names = z zlib zll zdll ; - -.default-alternative = [ new ac-library zlib : $(project) ] ; -$(.default-alternative).set-header $(header) ; -$(.default-alternative).set-default-names $(names) ; -targets.main-target-alternative $(.default-alternative) ; - -rule init ( * : * ) -{ - if ! $(condition) - { - # Special case the no-condition case so that 'using' without parameters - # can mix with more specific 'using'. - $(.default-alternative).reconfigure $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else - { - # FIXME: consider if we should allow overriding definitions for a given - # condition -- e.g. project-config.jam might want to override whatever is - # in user-config.jam. - local mt = [ new ac-library zlib : $(project) - : $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - $(mt).set-header $(header) ; - $(mt).set-default-names $(names) ; - targets.main-target-alternative $(mt) ; - } -} - - - - - - diff --git a/jam-files/boost-build/user-config.jam b/jam-files/boost-build/user-config.jam deleted file mode 100644 index fbbf13fd..00000000 --- a/jam-files/boost-build/user-config.jam +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2003, 2005 Douglas Gregor -# Copyright 2004 John Maddock -# Copyright 2002, 2003, 2004, 2007 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This file is used to configure your Boost.Build installation. You can modify -# this file in place, or you can place it in a permanent location so that it -# does not get overwritten should you get a new version of Boost.Build. See: -# -# http://www.boost.org/boost-build2/doc/html/bbv2/overview/configuration.html -# -# for documentation about possible permanent locations. - -# This file specifies which toolsets (C++ compilers), libraries, and other -# tools are available. Often, you should be able to just uncomment existing -# example lines and adjust them to taste. The complete list of supported tools, -# and configuration instructions can be found at: -# -# http://boost.org/boost-build2/doc/html/bbv2/reference/tools.html -# - -# This file uses Jam language syntax to describe available tools. Mostly, -# there are 'using' lines, that contain the name of the used tools, and -# parameters to pass to those tools -- where paremeters are separated by -# semicolons. Important syntax notes: -# -# - Both ':' and ';' must be separated from other tokens by whitespace -# - The '\' symbol is a quote character, so when specifying Windows paths you -# should use '/' or '\\' instead. -# -# More details about the syntax can be found at: -# -# http://boost.org/boost-build2/doc/html/bbv2/advanced.html#bbv2.advanced.jam_language -# - -# ------------------ -# GCC configuration. -# ------------------ - -# Configure gcc (default version). -# using gcc ; - -# Configure specific gcc version, giving alternative name to use. -# using gcc : 3.2 : g++-3.2 ; - - -# ------------------- -# MSVC configuration. -# ------------------- - -# Configure msvc (default version, searched for in standard locations and PATH). -# using msvc ; - -# Configure specific msvc version (searched for in standard locations and PATH). -# using msvc : 8.0 ; - - -# ---------------------- -# Borland configuration. -# ---------------------- -# using borland ; - - -# ---------------------- -# STLPort configuration. -# ---------------------- - -# Configure specifying location of STLPort headers. Libraries must be either -# not needed or available to the compiler by default. -# using stlport : : /usr/include/stlport ; - -# Configure specifying location of both headers and libraries explicitly. -# using stlport : : /usr/include/stlport /usr/lib ; - - -# ----------------- -# QT configuration. -# ----------------- - -# Configure assuming QTDIR gives the installation prefix. -# using qt ; - -# Configure with an explicit installation prefix. -# using qt : /usr/opt/qt ; - -# --------------------- -# Python configuration. -# --------------------- - -# Configure specific Python version. -# using python : 3.1 : /usr/bin/python3 : /usr/include/python3.1 : /usr/lib ; diff --git a/jam-files/boost-build/util/__init__.py b/jam-files/boost-build/util/__init__.py deleted file mode 100644 index f80fe70e..00000000 --- a/jam-files/boost-build/util/__init__.py +++ /dev/null @@ -1,136 +0,0 @@ - -import bjam -import re -import types - -# Decorator the specifies bjam-side prototype for a Python function -def bjam_signature(s): - - def wrap(f): - f.bjam_signature = s - return f - - return wrap - -def metatarget(f): - - f.bjam_signature = (["name"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"], ["usage_requirements", "*"]) - return f - -class cached(object): - - def __init__(self, function): - self.function = function - self.cache = {} - - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - v = self.function(*args) - self.cache[args] = v - return v - - def __get__(self, instance, type): - return types.MethodType(self, instance, type) - -def unquote(s): - if s and s[0] == '"' and s[-1] == '"': - return s[1:-1] - else: - return s - -_extract_jamfile_and_rule = re.compile("(Jamfile<.*>)%(.*)") - -def qualify_jam_action(action_name, context_module): - - if action_name.startswith("###"): - # Callable exported from Python. Don't touch - return action_name - elif _extract_jamfile_and_rule.match(action_name): - # Rule is already in indirect format - return action_name - else: - ix = action_name.find('.') - if ix != -1 and action_name[:ix] == context_module: - return context_module + '%' + action_name[ix+1:] - - return context_module + '%' + action_name - - -def set_jam_action(name, *args): - - m = _extract_jamfile_and_rule.match(name) - if m: - args = ("set-update-action-in-module", m.group(1), m.group(2)) + args - else: - args = ("set-update-action", name) + args - - return bjam.call(*args) - - -def call_jam_function(name, *args): - - m = _extract_jamfile_and_rule.match(name) - if m: - args = ("call-in-module", m.group(1), m.group(2)) + args - return bjam.call(*args) - else: - return bjam.call(*((name,) + args)) - -__value_id = 0 -__python_to_jam = {} -__jam_to_python = {} - -def value_to_jam(value, methods=False): - """Makes a token to refer to a Python value inside Jam language code. - - The token is merely a string that can be passed around in Jam code and - eventually passed back. For example, we might want to pass PropertySet - instance to a tag function and it might eventually call back - to virtual_target.add_suffix_and_prefix, passing the same instance. - - For values that are classes, we'll also make class methods callable - from Jam. - - Note that this is necessary to make a bit more of existing Jamfiles work. - This trick should not be used to much, or else the performance benefits of - Python port will be eaten. - """ - - global __value_id - - r = __python_to_jam.get(value, None) - if r: - return r - - exported_name = '###_' + str(__value_id) - __value_id = __value_id + 1 - __python_to_jam[value] = exported_name - __jam_to_python[exported_name] = value - - if methods and type(value) == types.InstanceType: - for field_name in dir(value): - field = getattr(value, field_name) - if callable(field) and not field_name.startswith("__"): - bjam.import_rule("", exported_name + "." + field_name, field) - - return exported_name - -def record_jam_to_value_mapping(jam_value, python_value): - __jam_to_python[jam_value] = python_value - -def jam_to_value_maybe(jam_value): - - if type(jam_value) == type(""): - return __jam_to_python.get(jam_value, jam_value) - else: - return jam_value - -def stem(filename): - i = filename.find('.') - if i != -1: - return filename[0:i] - else: - return filename diff --git a/jam-files/boost-build/util/assert.jam b/jam-files/boost-build/util/assert.jam deleted file mode 100644 index abedad52..00000000 --- a/jam-files/boost-build/util/assert.jam +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright 2001, 2002, 2003 Dave Abrahams -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -import errors ; -import modules ; - - -################################################################################ -# -# Private implementation details. -# -################################################################################ - -# Rule added as a replacement for the regular Jam = operator but which does not -# ignore trailing empty string elements. -# -local rule exact-equal-test ( lhs * : rhs * ) -{ - local lhs_extended = $(lhs) xxx ; - local rhs_extended = $(rhs) xxx ; - if $(lhs_extended) = $(rhs_extended) - { - return true ; - } -} - - -# Two lists are considered set-equal if they contain the same elements, ignoring -# duplicates and ordering. -# -local rule set-equal-test ( set1 * : set2 * ) -{ - if ( $(set1) in $(set2) ) && ( $(set2) in $(set1) ) - { - return true ; - } -} - - -################################################################################ -# -# Public interface. -# -################################################################################ - -# Assert the equality of A and B, ignoring trailing empty string elements. -# -rule equal ( a * : b * ) -{ - if $(a) != $(b) - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\" - (ignoring trailing empty strings) ; - } -} - - -# Assert that the result of calling RULE-NAME on the given arguments has a false -# logical value (is either an empty list or all empty strings). -# -rule false ( rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) - : $(7) : $(8) : $(9) ] ; - } - - if $(result) - { - errors.error-skip-frames 3 assertion failure: Expected false result from - "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) : - $(5) : $(6) : $(7) : $(8) : $(9) ] "]" : Got: "[" \"$(result)\" "]" ; - } -} - - -# Assert that ELEMENT is present in LIST. -# -rule "in" ( element : list * ) -{ - if ! $(element) in $(list) - { - errors.error-skip-frames 3 assertion failure: Expected \"$(element)\" in - "[" \"$(list)\" "]" ; - } -} - - -# Assert the inequality of A and B, ignoring trailing empty string elements. -# -rule not-equal ( a * : b * ) -{ - if $(a) = $(b) - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\" - (ignoring trailing empty strings) ; - } -} - - -# Assert that ELEMENT is not present in LIST. -# -rule not-in ( element : list * ) -{ - if $(element) in $(list) - { - errors.error-skip-frames 3 assertion failure: Did not expect - \"$(element)\" in "[" \"$(list)\" "]" ; - } -} - - -# Assert the inequality of A and B as sets. -# -rule not-set-equal ( a * : b * ) -{ - if [ set-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]" - and "[" \"$(b)\" "]" to not be equal as sets ; - } -} - - -# Assert that A and B are not exactly equal, not ignoring trailing empty string -# elements. -# -rule not-exact-equal ( a * : b * ) -{ - if [ exact-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\" ; - } -} - - -# Assert that EXPECTED is the result of calling RULE-NAME with the given -# arguments. -# -rule result ( expected * : rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) - : $(8) : $(9) ] ; - } - - if ! [ exact-equal-test $(result) : $(expected) ] - { - errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [ - errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : - $(9) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "[" - \"$(result)\" "]" ; - } -} - - -# Assert that EXPECTED is set-equal (i.e. duplicates and ordering are ignored) -# to the result of calling RULE-NAME with the given arguments. Note that rules -# called this way may accept at most 8 parameters. -# -rule result-set-equal ( expected * : rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) - : $(8) : $(9) ] ; - } - - if ! [ set-equal-test $(result) : $(expected) ] - { - errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [ - errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : - $(9) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "[" - \"$(result)\" "]" ; - } -} - - -# Assert the equality of A and B as sets. -# -rule set-equal ( a * : b * ) -{ - if ! [ set-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]" - and "[" \"$(b)\" "]" to be equal as sets ; - } -} - - -# Assert that the result of calling RULE-NAME on the given arguments has a true -# logical value (is neither an empty list nor all empty strings). -# -rule true ( rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) - : $(7) : $(8) : $(9) ] ; - } - - if ! $(result) - { - errors.error-skip-frames 3 assertion failure: Expected true result from - "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) : - $(5) : $(6) : $(7) : $(8) : $(9) ] "]" ; - } -} - - -# Assert the exact equality of A and B, not ignoring trailing empty string -# elements. -# -rule exact-equal ( a * : b * ) -{ - if ! [ exact-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\" ; - } -} - - -# Assert that the given variable is not an empty list. -# -rule variable-not-empty ( name ) -{ - local value = [ modules.peek [ CALLER_MODULE ] : $(name) ] ; - if ! $(value)-is-not-empty - { - errors.error-skip-frames 3 assertion failure: Expected variable - \"$(name)\" not to be an empty list ; - } -} - - -rule __test__ ( ) -{ - # Helper rule used to avoid test duplication related to different list - # equality test rules. - # - local rule run-equality-test ( equality-assert : ignore-trailing-empty-strings ? ) - { - local not-equality-assert = not-$(equality-assert) ; - - # When the given equality test is expected to ignore trailing empty - # strings some of the test results should be inverted. - local not-equality-assert-i = not-$(equality-assert) ; - if $(ignore-trailing-empty-strings) - { - not-equality-assert-i = $(equality-assert) ; - } - - $(equality-assert) : ; - $(equality-assert) "" "" : "" "" ; - $(not-equality-assert-i) : "" "" ; - $(equality-assert) x : x ; - $(not-equality-assert) : x ; - $(not-equality-assert) "" : x ; - $(not-equality-assert) "" "" : x ; - $(not-equality-assert-i) x : x "" ; - $(equality-assert) x "" : x "" ; - $(not-equality-assert) x : "" x ; - $(equality-assert) "" x : "" x ; - - $(equality-assert) 1 2 3 : 1 2 3 ; - $(not-equality-assert) 1 2 3 : 3 2 1 ; - $(not-equality-assert) 1 2 3 : 1 5 3 ; - $(not-equality-assert) 1 2 3 : 1 "" 3 ; - $(not-equality-assert) 1 2 3 : 1 1 2 3 ; - $(not-equality-assert) 1 2 3 : 1 2 2 3 ; - $(not-equality-assert) 1 2 3 : 5 6 7 ; - - # Extra variables used here just to make sure Boost Jam or Boost Build - # do not handle lists with empty strings differently depending on - # whether they are literals or stored in variables. - - local empty = ; - local empty-strings = "" "" ; - local x-empty-strings = x "" "" ; - local empty-strings-x = "" "" x ; - - $(equality-assert) : $(empty) ; - $(not-equality-assert-i) "" : $(empty) ; - $(not-equality-assert-i) "" "" : $(empty) ; - $(not-equality-assert-i) : $(empty-strings) ; - $(not-equality-assert-i) "" : $(empty-strings) ; - $(equality-assert) "" "" : $(empty-strings) ; - $(equality-assert) $(empty) : $(empty) ; - $(equality-assert) $(empty-strings) : $(empty-strings) ; - $(not-equality-assert-i) $(empty) : $(empty-strings) ; - $(equality-assert) $(x-empty-strings) : $(x-empty-strings) ; - $(equality-assert) $(empty-strings-x) : $(empty-strings-x) ; - $(not-equality-assert) $(empty-strings-x) : $(x-empty-strings) ; - $(not-equality-assert-i) x : $(x-empty-strings) ; - $(not-equality-assert) x : $(empty-strings-x) ; - $(not-equality-assert-i) x : $(x-empty-strings) ; - $(not-equality-assert-i) x "" : $(x-empty-strings) ; - $(equality-assert) x "" "" : $(x-empty-strings) ; - $(not-equality-assert) x : $(empty-strings-x) ; - $(not-equality-assert) "" x : $(empty-strings-x) ; - $(equality-assert) "" "" x : $(empty-strings-x) ; - } - - - # --------------- - # Equality tests. - # --------------- - - run-equality-test equal : ignore-trailing-empty-strings ; - run-equality-test exact-equal ; - - - # ------------------------- - # assert.set-equal() tests. - # ------------------------- - - set-equal : ; - not-set-equal "" "" : ; - set-equal "" "" : "" ; - set-equal "" "" : "" "" ; - set-equal a b c : a b c ; - set-equal a b c : b c a ; - set-equal a b c a : a b c ; - set-equal a b c : a b c a ; - not-set-equal a b c : a b c d ; - not-set-equal a b c d : a b c ; -} diff --git a/jam-files/boost-build/util/container.jam b/jam-files/boost-build/util/container.jam deleted file mode 100644 index dd496393..00000000 --- a/jam-files/boost-build/util/container.jam +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Various container classes. - -# Base for container objects. This lets us construct recursive structures. That -# is containers with containers in them, specifically so we can tell literal -# values from node values. -# -class node -{ - rule __init__ ( - value ? # Optional value to set node to initially. - ) - { - self.value = $(value) ; - } - - # Set the value of this node, passing nothing will clear it. - # - rule set ( value * ) - { - self.value = $(value) ; - } - - # Get the value of this node. - # - rule get ( ) - { - return $(self.value) ; - } -} - - -# A simple vector. Interface mimics the C++ std::vector and std::list, with the -# exception that indices are one (1) based to follow Jam standard. -# -# TODO: Possibly add assertion checks. -# -class vector : node -{ - import numbers ; - import utility ; - import sequence ; - - rule __init__ ( - values * # Initial contents of vector. - ) - { - node.__init__ ; - self.value = $(values) ; - } - - # Get the value of the first element. - # - rule front ( ) - { - return $(self.value[1]) ; - } - - # Get the value of the last element. - # - rule back ( ) - { - return $(self.value[-1]) ; - } - - # Get the value of the element at the given index, one based. Access to - # elements of recursive structures is supported directly. Specifying - # additional index values recursively accesses the elements as containers. - # For example: [ $(v).at 1 : 2 ] would retrieve the second element of our - # first element, assuming the first element is a container. - # - rule at ( - index # The element index, one based. - : * # Additional indices to access recursively. - ) - { - local r = $(self.value[$(index)]) ; - if $(2) - { - r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - } - return $(r) ; - } - - # Get the value contained in the given element. This has the same - # functionality and interface as "at" but in addition gets the value of the - # referenced element, assuming it is a "node". - # - rule get-at ( - index # The element index, one based. - : * # Additional indices to access recursively. - ) - { - local r = $(self.value[$(index)]) ; - if $(2) - { - r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - } - return [ $(r).get ] ; - } - - # Insert the given value into the front of the vector pushing the rest of - # the elements back. - # - rule push-front ( - value # Value to become first element. - ) - { - self.value = $(value) $(self.value) ; - } - - # Remove the front element from the vector. Does not return the value. No - # effect if vector is empty. - # - rule pop-front ( ) - { - self.value = $(self.value[2-]) ; - } - - # Add the given value at the end of the vector. - # - rule push-back ( - value # Value to become back element. - ) - { - self.value += $(value) ; - } - - # Remove the back element from the vector. Does not return the value. No - # effect if vector is empty. - # - rule pop-back ( ) - { - self.value = $(self.value[1--2]) ; - } - - # Insert the given value at the given index, one based. The values at and to - # the right of the index are pushed back to make room for the new value. - # If the index is passed the end of the vector the element is added to the - # end. - # - rule insert ( - index # The index to insert at, one based. - : value # The value to insert. - ) - { - local left = $(self.value[1-$(index)]) ; - local right = $(self.value[$(index)-]) ; - if $(right)-is-not-empty - { - left = $(left[1--2]) ; - } - self.value = $(left) $(value) $(right) ; - } - - # Remove one or more elements from the vector. The range is inclusive, and - # not specifying an end is equivalent to the [start, start] range. - # - rule erase ( - start # Index of first element to remove. - end ? # Optional, index of last element to remove. - ) - { - end ?= $(start) ; - local left = $(self.value[1-$(start)]) ; - left = $(left[1--2]) ; - local right = $(self.value[$(end)-]) ; - right = $(right[2-]) ; - self.value = $(left) $(right) ; - } - - # Remove all elements from the vector. - # - rule clear ( ) - { - self.value = ; - } - - # The number of elements in the vector. - # - rule size ( ) - { - return [ sequence.length $(self.value) ] ; - } - - # Returns "true" if there are NO elements in the vector, empty otherwise. - # - rule empty ( ) - { - if ! $(self.value)-is-not-empty - { - return true ; - } - } - - # Returns the textual representation of content. - # - rule str ( ) - { - return "[" [ sequence.transform utility.str : $(self.value) ] "]" ; - } - - # Sorts the vector inplace, calling 'utility.less' for comparisons. - # - rule sort ( ) - { - self.value = [ sequence.insertion-sort $(self.value) : utility.less ] ; - } - - # Returns true if content is equal to the content of other vector. Uses - # 'utility.equal' for comparison. - # - rule equal ( another ) - { - local mismatch ; - local size = [ size ] ; - if $(size) = [ $(another).size ] - { - for local i in [ numbers.range 1 $(size) ] - { - if ! [ utility.equal [ at $(i) ] [ $(another).at $(i) ] ] - { - mismatch = true ; - } - } - } - else - { - mismatch = true ; - } - - if ! $(mismatch) - { - return true ; - } - } -} - - -rule __test__ ( ) -{ - import assert ; - import "class" : new ; - - local v1 = [ new vector ] ; - assert.true $(v1).equal $(v1) ; - assert.true $(v1).empty ; - assert.result 0 : $(v1).size ; - assert.result "[" "]" : $(v1).str ; - $(v1).push-back b ; - $(v1).push-front a ; - assert.result "[" a b "]" : $(v1).str ; - assert.result a : $(v1).front ; - assert.result b : $(v1).back ; - $(v1).insert 2 : d ; - $(v1).insert 2 : c ; - $(v1).insert 4 : f ; - $(v1).insert 4 : e ; - $(v1).pop-back ; - assert.result 5 : $(v1).size ; - assert.result d : $(v1).at 3 ; - $(v1).pop-front ; - assert.result c : $(v1).front ; - assert.false $(v1).empty ; - $(v1).erase 3 4 ; - assert.result 2 : $(v1).size ; - - local v2 = [ new vector q w e r t y ] ; - assert.result 6 : $(v2).size ; - $(v1).push-back $(v2) ; - assert.result 3 : $(v1).size ; - local v2-alias = [ $(v1).back ] ; - assert.result e : $(v2-alias).at 3 ; - $(v1).clear ; - assert.true $(v1).empty ; - assert.false $(v2-alias).empty ; - $(v2).pop-back ; - assert.result t : $(v2-alias).back ; - - local v3 = [ new vector ] ; - $(v3).push-back [ new vector 1 2 3 4 5 ] ; - $(v3).push-back [ new vector a b c ] ; - assert.result "[" "[" 1 2 3 4 5 "]" "[" a b c "]" "]" : $(v3).str ; - $(v3).push-back [ new vector [ new vector x y z ] [ new vector 7 8 9 ] ] ; - assert.result 1 : $(v3).at 1 : 1 ; - assert.result b : $(v3).at 2 : 2 ; - assert.result a b c : $(v3).get-at 2 ; - assert.result 7 8 9 : $(v3).get-at 3 : 2 ; - - local v4 = [ new vector 4 3 6 ] ; - $(v4).sort ; - assert.result 3 4 6 : $(v4).get ; - assert.false $(v4).equal $(v3) ; - - local v5 = [ new vector 3 4 6 ] ; - assert.true $(v4).equal $(v5) ; - # Check that vectors of different sizes are considered non-equal. - $(v5).pop-back ; - assert.false $(v4).equal $(v5) ; - - local v6 = [ new vector [ new vector 1 2 3 ] ] ; - assert.true $(v6).equal [ new vector [ new vector 1 2 3 ] ] ; - - local v7 = [ new vector 111 222 333 ] ; - assert.true $(v7).equal $(v7) ; - $(v7).insert 4 : 444 ; - assert.result 111 222 333 444 : $(v7).get ; - $(v7).insert 999 : xxx ; - assert.result 111 222 333 444 xxx : $(v7).get ; - - local v8 = [ new vector "" "" "" ] ; - assert.true $(v8).equal $(v8) ; - assert.false $(v8).empty ; - assert.result 3 : $(v8).size ; - assert.result "" : $(v8).at 1 ; - assert.result "" : $(v8).at 2 ; - assert.result "" : $(v8).at 3 ; - assert.result : $(v8).at 4 ; - $(v8).insert 2 : 222 ; - assert.result 4 : $(v8).size ; - assert.result "" 222 "" "" : $(v8).get ; - $(v8).insert 999 : "" ; - assert.result 5 : $(v8).size ; - assert.result "" 222 "" "" "" : $(v8).get ; - $(v8).insert 999 : xxx ; - assert.result 6 : $(v8).size ; - assert.result "" 222 "" "" "" xxx : $(v8).get ; - - # Regression test for a bug causing vector.equal to compare only the first - # and the last element in the given vectors. - local v9 = [ new vector 111 xxx 222 ] ; - local v10 = [ new vector 111 yyy 222 ] ; - assert.false $(v9).equal $(v10) ; -} diff --git a/jam-files/boost-build/util/doc.jam b/jam-files/boost-build/util/doc.jam deleted file mode 100644 index a7515588..00000000 --- a/jam-files/boost-build/util/doc.jam +++ /dev/null @@ -1,997 +0,0 @@ -# Copyright 2002, 2005 Dave Abrahams -# Copyright 2002, 2003, 2006 Rene Rivera -# Copyright 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Documentation system, handles --help requests. -# It defines rules that attach documentation to modules, rules, and variables. -# Collects and generates documentation for the various parts of the build -# system. The documentation is collected from comments integrated into the code. - -import modules ; -import print ; -import set ; -import container ; -import "class" ; -import sequence ; -import path ; - - -# The type of output to generate. -# "console" is formated text echoed to the console (the default); -# "text" is formated text appended to the output file; -# "html" is HTML output to the file. -# -help-output = console ; - - -# The file to output documentation to when generating "text" or "html" help. -# This is without extension as the extension is determined by the type of -# output. -# -help-output-file = help ; - -# Whether to include local rules in help output. -# -.option.show-locals ?= ; - -# When showing documentation for a module, whether to also generate -# automatically the detailed docs for each item in the module. -# -.option.detailed ?= ; - -# Generate debug output as the help is generated and modules are parsed. -# -.option.debug ?= ; - -# Enable or disable a documentation option. -# -local rule set-option ( - option # The option name. - : value ? # Enabled (non-empty), or disabled (empty) -) -{ - .option.$(option) = $(value) ; -} - - -# Set the type of output. -# -local rule set-output ( type ) -{ - help-output = $(type) ; -} - - -# Set the output to a file. -# -local rule set-output-file ( file ) -{ - help-output-file = $(file) ; -} - - -# Extracts the brief comment from a complete comment. The brief comment is the -# first sentence. -# -local rule brief-comment ( - docs * # The comment documentation. -) -{ - local d = $(docs:J=" ") ; - local p = [ MATCH ".*([.])$" : $(d) ] ; - if ! $(p) { d = $(d)"." ; } - d = $(d)" " ; - local m = [ MATCH "^([^.]+[.])(.*)" : $(d) ] ; - local brief = $(m[1]) ; - while $(m[2]) && [ MATCH "^([^ ])" : $(m[2]) ] - { - m = [ MATCH "^([^.]+[.])(.*)" : $(m[2]) ] ; - brief += $(m[1]) ; - } - return $(brief:J="") ; -} - - -# Specifies the documentation for the current module. -# -local rule set-module-doc ( - module-name ? # The name of the module to document. - : docs * # The documentation for the module. -) -{ - module-name ?= * ; - - $(module-name).brief = [ brief-comment $(docs) ] ; - $(module-name).docs = $(docs) ; - - if ! $(module-name) in $(documented-modules) - { - documented-modules += $(module-name) ; - } -} - - -# Specifies the documentation for the current module. -# -local rule set-module-copyright ( - module-name ? # The name of the module to document. - : copyright * # The copyright for the module. -) -{ - module-name ?= * ; - - $(module-name).copy-brief = [ brief-comment $(copyright) ] ; - $(module-name).copy-docs = $(docs) ; - - if ! $(module-name) in $(documented-modules) - { - documented-modules += $(module-name) ; - } -} - - -# Specifies the documentation for a rule in the current module. If called in the -# global module, this documents a global rule. -# -local rule set-rule-doc ( - name # The name of the rule. - module-name ? # The name of the module to document. - is-local ? # Whether the rule is local to the module. - : docs * # The documentation for the rule. -) -{ - module-name ?= * ; - - $(module-name).$(name).brief = [ brief-comment $(docs) ] ; - $(module-name).$(name).docs = $(docs) ; - $(module-name).$(name).is-local = $(is-local) ; - - if ! $(name) in $($(module-name).rules) - { - $(module-name).rules += $(name) ; - } -} - - -# Specify a class, will turn a rule into a class. -# -local rule set-class-doc ( - name # The name of the class. - module-name ? # The name of the module to document. - : super-name ? # The super class name. -) -{ - module-name ?= * ; - - $(module-name).$(name).is-class = true ; - $(module-name).$(name).super-name = $(super-name) ; - $(module-name).$(name).class-rules = - [ MATCH "^($(name)[.].*)" : $($(module-name).rules) ] ; - $(module-name).$($(module-name).$(name).class-rules).is-class-rule = true ; - - $(module-name).classes += $(name) ; - $(module-name).class-rules += $($(module-name).$(name).class-rules) ; - $(module-name).rules = - [ set.difference $($(module-name).rules) : - $(name) $($(module-name).$(name).class-rules) ] ; -} - - -# Set the argument call signature of a rule. -# -local rule set-rule-arguments-signature ( - name # The name of the rule. - module-name ? # The name of the module to document. - : signature * # The arguments signature. -) -{ - module-name ?= * ; - - $(module-name).$(name).signature = $(signature) ; -} - - -# Specifies the documentation for an argument of a rule. -# -local rule set-argument-doc ( - name # The name of the argument. - qualifier # Argument syntax qualifier, "*", "+", etc. - rule-name # The name of the rule. - module-name ? # THe optional name of the module. - : docs * # The documentation. -) -{ - module-name ?= * ; - - $(module-name).$(rule-name).args.$(name).qualifier = $(qualifier) ; - $(module-name).$(rule-name).args.$(name).docs = $(docs) ; - - if ! $(name) in $($(module-name).$(rule-name).args) - { - $(module-name).$(rule-name).args += $(name) ; - } -} - - -# Specifies the documentation for a variable in the current module. If called in -# the global module, the global variable is documented. -# -local rule set-variable-doc ( - name # The name of the variable. - default # The default value. - initial # The initial value. - module-name ? # The name of the module to document. - : docs * # The documentation for the variable. -) -{ - module-name ?= * ; - - $(module-name).$(name).brief = [ brief-comment $(docs) ] ; - $(module-name).$(name).default = $(default) ; - $(module-name).$(name).initial = $(initial) ; - $(module-name).$(name).docs = $(docs) ; - - if ! $(name) in $($(module-name).variables) - { - $(module-name).variables += $(name) ; - } -} - - -# Generates a general description of the documentation and help system. -# -local rule print-help-top ( ) -{ - print.section "General command line usage" ; - - print.text " bjam [options] [properties] [targets] - - Options, properties and targets can be specified in any order. - " ; - - print.section "Important Options" ; - - print.list-start ; - print.list-item "--clean Remove targets instead of building" ; - print.list-item "-a Rebuild everything" ; - print.list-item "-n Don't execute the commands, only print them" ; - print.list-item "-d+2 Show commands as they are executed" ; - print.list-item "-d0 Supress all informational messages" ; - print.list-item "-q Stop at first error" ; - print.list-item "--debug-configuration Diagnose configuration" ; - print.list-item "--debug-building Report which targets are built with what properties" ; - print.list-item "--debug-generator Diagnose generator search/execution" ; - print.list-end ; - - print.section "Further Help" - The following options can be used to obtain additional documentation. - ; - - print.list-start ; - print.list-item "--help-options Print more obscure command line options." ; - print.list-item "--help-internal Boost.Build implementation details." ; - print.list-item "--help-doc-options Implementation details doc formatting." ; - print.list-end ; -} - - -# Generate Jam/Boost.Jam command usage information. -# -local rule print-help-usage ( ) -{ - print.section "Boost.Jam Usage" - "bjam [ options... ] targets..." - ; - print.list-start ; - print.list-item -a; - Build all targets, even if they are current. ; - print.list-item -fx; - Read '"x"' as the Jamfile for building instead of searching for the - Boost.Build system. ; - print.list-item -jx; - Run up to '"x"' commands concurrently. ; - print.list-item -n; - Do not execute build commands. Instead print out the commands as they - would be executed if building. ; - print.list-item -ox; - Output the used build commands to file '"x"'. ; - print.list-item -q; - Quit as soon as a build failure is encountered. Without this option - Boost.Jam will continue building as many targets as it can. - print.list-item -sx=y; - Sets a Jam variable '"x"' to the value '"y"', overriding any value that - variable would have from the environment. ; - print.list-item -tx; - Rebuild the target '"x"', even if it is up-to-date. ; - print.list-item -v; - Display the version of bjam. ; - print.list-item --x; - Any option not explicitly handled by Boost.Jam remains available to - build scripts using the '"ARGV"' variable. ; - print.list-item -dn; - Enables output of diagnostic messages. The debug level '"n"' and all - below it are enabled by this option. ; - print.list-item -d+n; - Enables output of diagnostic messages. Only the output for debug level - '"n"' is enabled. ; - print.list-end ; - print.section "Debug Levels" - Each debug level shows a different set of information. Usually with - higher levels producing more verbose information. The following levels - are supported: ; - print.list-start ; - print.list-item 0; - Turn off all diagnostic output. Only errors are reported. ; - print.list-item 1; - Show the actions taken for building targets, as they are executed. ; - print.list-item 2; - Show "quiet" actions and display all action text, as they are executed. ; - print.list-item 3; - Show dependency analysis, and target/source timestamps/paths. ; - print.list-item 4; - Show arguments of shell invocations. ; - print.list-item 5; - Show rule invocations and variable expansions. ; - print.list-item 6; - Show directory/header file/archive scans, and attempts at binding to targets. ; - print.list-item 7; - Show variable settings. ; - print.list-item 8; - Show variable fetches, variable expansions, and evaluation of '"if"' expressions. ; - print.list-item 9; - Show variable manipulation, scanner tokens, and memory usage. ; - print.list-item 10; - Show execution times for rules. ; - print.list-item 11; - Show parsing progress of Jamfiles. ; - print.list-item 12; - Show graph for target dependencies. ; - print.list-item 13; - Show changes in target status (fate). ; - print.list-end ; -} - - -# Generates description of options controlling the help system. This -# automatically reads the options as all variables in the doc module of the form -# ".option.*". -# -local rule print-help-options ( - module-name # The doc module. -) -{ - print.section "Help Options" - These are all the options available for enabling or disabling to control - the help system in various ways. Options can be enabled or disabled with - '"--help-enable-