From 4d48a6d19521b24d9ac0987ce9a472d9ba574c4b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 24 Jun 2012 16:42:56 +0200 Subject: RuleTargetBigramFeatures, parallelize.rb --- decoder/cdec_ff.cc | 3 +- decoder/ff_rules.cc | 48 ++++++++++++++++++++--- decoder/ff_rules.h | 19 ++++++++- dtrain/parallelize/parallelize.rb | 79 ++++++++++++++++++++++++++++++++++++++ dtrain/parallelize/test/cdec.ini | 22 +++++++++++ dtrain/parallelize/test/dtrain.ini | 15 ++++++++ dtrain/parallelize/test/in | 10 +++++ dtrain/parallelize/test/refs | 10 +++++ 8 files changed, 198 insertions(+), 8 deletions(-) create mode 100755 dtrain/parallelize/parallelize.rb create mode 100644 dtrain/parallelize/test/cdec.ini create mode 100644 dtrain/parallelize/test/dtrain.ini create mode 100644 dtrain/parallelize/test/in create mode 100644 dtrain/parallelize/test/refs diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b516c386..d64bdada 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -47,8 +47,9 @@ void register_feature_functions() { ff_registry.Register("RuleIdentityFeatures", new FFFactory()); ff_registry.Register("SourceSyntaxFeatures", new FFFactory); ff_registry.Register("SourceSpanSizeFeatures", new FFFactory); - ff_registry.Register("RuleNgramFeatures", new FFFactory()); ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory()); + ff_registry.Register("RuleSourceBigramFeatures", new FFFactory()); + ff_registry.Register("RuleTargetBigramFeatures", new FFFactory()); ff_registry.Register("KLanguageModel", new KLanguageModelFactory()); ff_registry.Register("NonLatinCount", new FFFactory); ff_registry.Register("RuleShape", new FFFactory); diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index bd4c4cc0..3d0e514a 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -66,15 +66,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->add_value(it->second, 1); } -RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) { } -void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { // std::map > rule2_feats_.clear(); } -void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const vector& ant_contexts, SparseVector* features, @@ -92,14 +92,52 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, assert(w > 0); const string& cur = TD::Convert(w); ostringstream os; - os << "RB:" << prev << '_' << cur; + os << "RBS:" << prev << '_' << cur; const int fid = FD::Convert(Escape(os.str())); if (fid <= 0) return; f.add_value(fid, 1.0); prev = cur; } ostringstream os; - os << "RB:" << prev << '_' << ""; + os << "RBS:" << prev << '_' << ""; + f.set_value(FD::Convert(Escape(os.str())), 1.0); + } + (*features) += it->second; +} + +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) { +} + +void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { + rule2_feats_.clear(); +} + +void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + map >::iterator it = rule2_feats_.find(edge.rule_.get()); + if (it == rule2_feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2_feats_.insert(make_pair(&rule, SparseVector())).first; + SparseVector& f = it->second; + string prev = ""; + for (int i = 0; i < rule.e_.size(); ++i) { + WordID w = rule.e_[i]; + if (w < 0) w = -w; + if (w == 0) return; + const string& cur = TD::Convert(w); + ostringstream os; + os << "RBT:" << prev << '_' << cur; + const int fid = FD::Convert(Escape(os.str())); + if (fid <= 0) return; + f.add_value(fid, 1.0); + prev = cur; + } + ostringstream os; + os << "RBT:" << prev << '_' << ""; f.set_value(FD::Convert(Escape(os.str())), 1.0); } (*features) += it->second; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..08b168b0 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -22,9 +22,24 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map rule2_fid_; }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction { public: - RuleNgramFeatures(const std::string& param); + RuleSourceBigramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + mutable std::map > rule2_feats_; +}; + +class RuleTargetBigramFeatures : public FeatureFunction { + public: + RuleTargetBigramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, diff --git a/dtrain/parallelize/parallelize.rb b/dtrain/parallelize/parallelize.rb new file mode 100755 index 00000000..e88d9eef --- /dev/null +++ b/dtrain/parallelize/parallelize.rb @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby + + +if ARGV.size != 5 + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb <#shards> \n" + exit +end + +dtrain_bin = '/home/pks/mt/cdec-dtrain/dtrain/dtrain_local_new_f' +ruby = '/usr/bin/ruby' +lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' +lplp_args = 'l2 select_k 100000' +gzip = '/bin/gzip' + +num_shards = ARGV[0].to_i +input = ARGV[1] +refs = ARGV[2] +epochs = ARGV[3].to_i +ini = ARGV[4] + + +`mkdir work` + +def make_shards(input, refs, num_shards) + lc = `wc -l #{input}`.split.first.to_i + shard_sz = lc / num_shards + leftover = lc % num_shards + in_f = File.new input, 'r' + refs_f = File.new refs, 'r' + shard_in_files = [] + shard_refs_files = [] + 0.upto(num_shards-1) { |shard| + shard_in = File.new "work/shard.#{shard}.in", 'w+' + shard_refs = File.new "work/shard.#{shard}.refs", 'w+' + 0.upto(shard_sz-1) { |i| + shard_in.write in_f.gets + shard_refs.write refs_f.gets + } + shard_in_files << shard_in + shard_refs_files << shard_refs + } + while leftover > 0 + shard_in_files[-1].write in_f.gets + shard_refs_files[-1].write refs_f.gets + leftover -= 1 + end + (shard_in_files + shard_refs_files).each do |f| f.close end + in_f.close + refs_f.close +end + +make_shards input, refs, num_shards + +0.upto(epochs-1) { |epoch| + pids = [] + input_weights = '' + if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end + weights_files = [] + 0.upto(num_shards-1) { |shard| + pids << Kernel.fork { + `#{dtrain_bin} -c #{ini}\ + --input work/shard.#{shard}.in\ + --refs work/shard.#{shard}.refs #{input_weights}\ + --output work/weights.#{shard}.#{epoch}\ + &> work/out.#{shard}.#{epoch}` + } + weights_files << "work/weights.#{shard}.#{epoch}" + } + pids.each { |pid| Process.wait(pid) } + cat = File.new('work/weights_cat', 'w+') + weights_files.each { |f| cat.write File.new(f, 'r').read } + cat.close + `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` +} + +`rm work/weights_cat` +`#{gzip} work/*` + diff --git a/dtrain/parallelize/test/cdec.ini b/dtrain/parallelize/test/cdec.ini new file mode 100644 index 00000000..72e99dc5 --- /dev/null +++ b/dtrain/parallelize/test/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/dtrain/parallelize/test/dtrain.ini b/dtrain/parallelize/test/dtrain.ini new file mode 100644 index 00000000..03f9d240 --- /dev/null +++ b/dtrain/parallelize/test/dtrain.ini @@ -0,0 +1,15 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +decoder_config=cdec.ini diff --git a/dtrain/parallelize/test/in b/dtrain/parallelize/test/in new file mode 100644 index 00000000..a312809f --- /dev/null +++ b/dtrain/parallelize/test/in @@ -0,0 +1,10 @@ +barack obama erhält als vierter us @-@ präsident den frieden nobelpreis +der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . +darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . +der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . +zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . +das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . +nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . +diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . +das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . +der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/dtrain/parallelize/test/refs b/dtrain/parallelize/test/refs new file mode 100644 index 00000000..4d3128cb --- /dev/null +++ b/dtrain/parallelize/test/refs @@ -0,0 +1,10 @@ +barack obama becomes the fourth american president to receive the nobel peace prize +the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . +he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . +the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . +first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . +the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . +then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . +he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . +the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . +the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . -- cgit v1.2.3 From d04c516536db996e5fe5b94b8e5dea4ce2e04b4a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 24 Jun 2012 16:43:54 +0200 Subject: move --- dtrain/parallelize.rb | 79 +++++++++++++++++++++++++++++++++++++++ dtrain/parallelize/parallelize.rb | 79 --------------------------------------- 2 files changed, 79 insertions(+), 79 deletions(-) create mode 100755 dtrain/parallelize.rb delete mode 100755 dtrain/parallelize/parallelize.rb diff --git a/dtrain/parallelize.rb b/dtrain/parallelize.rb new file mode 100755 index 00000000..e88d9eef --- /dev/null +++ b/dtrain/parallelize.rb @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby + + +if ARGV.size != 5 + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb <#shards> \n" + exit +end + +dtrain_bin = '/home/pks/mt/cdec-dtrain/dtrain/dtrain_local_new_f' +ruby = '/usr/bin/ruby' +lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' +lplp_args = 'l2 select_k 100000' +gzip = '/bin/gzip' + +num_shards = ARGV[0].to_i +input = ARGV[1] +refs = ARGV[2] +epochs = ARGV[3].to_i +ini = ARGV[4] + + +`mkdir work` + +def make_shards(input, refs, num_shards) + lc = `wc -l #{input}`.split.first.to_i + shard_sz = lc / num_shards + leftover = lc % num_shards + in_f = File.new input, 'r' + refs_f = File.new refs, 'r' + shard_in_files = [] + shard_refs_files = [] + 0.upto(num_shards-1) { |shard| + shard_in = File.new "work/shard.#{shard}.in", 'w+' + shard_refs = File.new "work/shard.#{shard}.refs", 'w+' + 0.upto(shard_sz-1) { |i| + shard_in.write in_f.gets + shard_refs.write refs_f.gets + } + shard_in_files << shard_in + shard_refs_files << shard_refs + } + while leftover > 0 + shard_in_files[-1].write in_f.gets + shard_refs_files[-1].write refs_f.gets + leftover -= 1 + end + (shard_in_files + shard_refs_files).each do |f| f.close end + in_f.close + refs_f.close +end + +make_shards input, refs, num_shards + +0.upto(epochs-1) { |epoch| + pids = [] + input_weights = '' + if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end + weights_files = [] + 0.upto(num_shards-1) { |shard| + pids << Kernel.fork { + `#{dtrain_bin} -c #{ini}\ + --input work/shard.#{shard}.in\ + --refs work/shard.#{shard}.refs #{input_weights}\ + --output work/weights.#{shard}.#{epoch}\ + &> work/out.#{shard}.#{epoch}` + } + weights_files << "work/weights.#{shard}.#{epoch}" + } + pids.each { |pid| Process.wait(pid) } + cat = File.new('work/weights_cat', 'w+') + weights_files.each { |f| cat.write File.new(f, 'r').read } + cat.close + `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` +} + +`rm work/weights_cat` +`#{gzip} work/*` + diff --git a/dtrain/parallelize/parallelize.rb b/dtrain/parallelize/parallelize.rb deleted file mode 100755 index e88d9eef..00000000 --- a/dtrain/parallelize/parallelize.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env ruby - - -if ARGV.size != 5 - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb <#shards> \n" - exit -end - -dtrain_bin = '/home/pks/mt/cdec-dtrain/dtrain/dtrain_local_new_f' -ruby = '/usr/bin/ruby' -lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' -lplp_args = 'l2 select_k 100000' -gzip = '/bin/gzip' - -num_shards = ARGV[0].to_i -input = ARGV[1] -refs = ARGV[2] -epochs = ARGV[3].to_i -ini = ARGV[4] - - -`mkdir work` - -def make_shards(input, refs, num_shards) - lc = `wc -l #{input}`.split.first.to_i - shard_sz = lc / num_shards - leftover = lc % num_shards - in_f = File.new input, 'r' - refs_f = File.new refs, 'r' - shard_in_files = [] - shard_refs_files = [] - 0.upto(num_shards-1) { |shard| - shard_in = File.new "work/shard.#{shard}.in", 'w+' - shard_refs = File.new "work/shard.#{shard}.refs", 'w+' - 0.upto(shard_sz-1) { |i| - shard_in.write in_f.gets - shard_refs.write refs_f.gets - } - shard_in_files << shard_in - shard_refs_files << shard_refs - } - while leftover > 0 - shard_in_files[-1].write in_f.gets - shard_refs_files[-1].write refs_f.gets - leftover -= 1 - end - (shard_in_files + shard_refs_files).each do |f| f.close end - in_f.close - refs_f.close -end - -make_shards input, refs, num_shards - -0.upto(epochs-1) { |epoch| - pids = [] - input_weights = '' - if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end - weights_files = [] - 0.upto(num_shards-1) { |shard| - pids << Kernel.fork { - `#{dtrain_bin} -c #{ini}\ - --input work/shard.#{shard}.in\ - --refs work/shard.#{shard}.refs #{input_weights}\ - --output work/weights.#{shard}.#{epoch}\ - &> work/out.#{shard}.#{epoch}` - } - weights_files << "work/weights.#{shard}.#{epoch}" - } - pids.each { |pid| Process.wait(pid) } - cat = File.new('work/weights_cat', 'w+') - weights_files.each { |f| cat.write File.new(f, 'r').read } - cat.close - `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` -} - -`rm work/weights_cat` -`#{gzip} work/*` - -- cgit v1.2.3 From 3a94ac22e5c60aa205f2b3dadf81b0666500e0c3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 8 Jul 2012 14:26:37 +0200 Subject: parallel. test --- dtrain/test/parallelize/cdec.ini | 22 ++++++++++++++++++++++ dtrain/test/parallelize/dtrain.ini | 15 +++++++++++++++ dtrain/test/parallelize/in | 10 ++++++++++ dtrain/test/parallelize/refs | 10 ++++++++++ 4 files changed, 57 insertions(+) create mode 100644 dtrain/test/parallelize/cdec.ini create mode 100644 dtrain/test/parallelize/dtrain.ini create mode 100644 dtrain/test/parallelize/in create mode 100644 dtrain/test/parallelize/refs diff --git a/dtrain/test/parallelize/cdec.ini b/dtrain/test/parallelize/cdec.ini new file mode 100644 index 00000000..72e99dc5 --- /dev/null +++ b/dtrain/test/parallelize/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/dtrain/test/parallelize/dtrain.ini b/dtrain/test/parallelize/dtrain.ini new file mode 100644 index 00000000..03f9d240 --- /dev/null +++ b/dtrain/test/parallelize/dtrain.ini @@ -0,0 +1,15 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +decoder_config=cdec.ini diff --git a/dtrain/test/parallelize/in b/dtrain/test/parallelize/in new file mode 100644 index 00000000..a312809f --- /dev/null +++ b/dtrain/test/parallelize/in @@ -0,0 +1,10 @@ +barack obama erhält als vierter us @-@ präsident den frieden nobelpreis +der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . +darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . +der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . +zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . +das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . +nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . +diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . +das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . +der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/dtrain/test/parallelize/refs b/dtrain/test/parallelize/refs new file mode 100644 index 00000000..4d3128cb --- /dev/null +++ b/dtrain/test/parallelize/refs @@ -0,0 +1,10 @@ +barack obama becomes the fourth american president to receive the nobel peace prize +the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . +he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . +the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . +first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . +the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . +then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . +he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . +the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . +the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . -- cgit v1.2.3 From bb9309432fd35e95cf88b630853a928a3e3228c3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 1 Aug 2012 18:27:51 +0200 Subject: Makefile.am, dtrain output, python build --- .gitignore | 1 + Makefile.am | 2 +- dtrain/test/example/cdec.ini | 3 +- dtrain/test/example/dtrain.ini | 2 +- dtrain/test/example/expected-output | 148 +++++++++++++++++------------------- python/setup.py | 2 +- 6 files changed, 75 insertions(+), 83 deletions(-) diff --git a/.gitignore b/.gitignore index 571360ed..4c40fe81 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,4 @@ phrasinator/gibbs_train_plm phrasinator/gibbs_train_plm_notables previous.sh dist +python/build/ diff --git a/Makefile.am b/Makefile.am index 4df72cff..332542c4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,4 +25,4 @@ SUBDIRS = \ AUTOMAKE_OPTIONS = foreign ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -D_GLIBCXX_PARALLEL +AM_CPPFLAGS = -D_GLIBCXX_PARALLEL -march=native -mtune=native -O2 -pipe -fomit-frame-pointer -Wall diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini index 6642107f..d5955f0e 100644 --- a/dtrain/test/example/cdec.ini +++ b/dtrain/test/example/cdec.ini @@ -17,7 +17,8 @@ feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz #feature_function=NonLatinCount #feature_function=OutputIndicator feature_function=RuleIdentityFeatures -feature_function=RuleNgramFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures feature_function=RuleShape #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index c8ac7c3f..8338b2d3 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -5,7 +5,7 @@ decoder_config=test/example/cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=10 # stop epoch after 10 inputs +stop_after=100 # stop epoch after 10 inputs # interesting stuff epochs=3 # run over input 3 times diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output index 25d2c069..43798484 100644 --- a/dtrain/test/example/expected-output +++ b/dtrain/test/example/expected-output @@ -1,21 +1,10 @@ cdec cfg 'test/example/cdec.ini' -feature: WordPenalty (no config parameters) -State is 0 bytes for feature WordPenalty -feature: KLanguageModel (with config parameters 'test/example/nc-wmt11.en.srilm.gz') Loading the LM will be faster if you build a binary file. Reading test/example/nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** -Loaded 5-gram KLM from test/example/nc-wmt11.en.srilm.gz (MapSize=49581) -State is 98 bytes for feature KLanguageModel test/example/nc-wmt11.en.srilm.gz -feature: RuleIdentityFeatures (no config parameters) -State is 0 bytes for feature RuleIdentityFeatures -feature: RuleNgramFeatures (no config parameters) -State is 0 bytes for feature RuleNgramFeatures -feature: RuleShape (no config parameters) Example feature: Shape_S00000_T00000 -State is 0 bytes for feature RuleShape -Seeding random number sequence to 1072059181 +Seeding random number sequence to 2108658507 dtrain Parameters: @@ -33,93 +22,94 @@ Parameters: pair threshold 0 select weights 'VOID' l1 reg 0 'none' + max pairs 4294967295 cdec cfg 'test/example/cdec.ini' input 'test/example/nc-wmt11.1k.gz' output '-' - stop_after 10 + stop_after 100 (a dot represents 10 inputs) Iteration #1 of 3. - . 10 -Stopping after 10 input sentences. + .......... 100 +Stopping after 100 input sentences. WEIGHTS - Glue = -0.0293 - WordPenalty = +0.049075 - LanguageModel = +0.24345 - LanguageModel_OOV = -0.2029 - PhraseModel_0 = +0.0084102 - PhraseModel_1 = +0.021729 - PhraseModel_2 = +0.014922 - PhraseModel_3 = +0.104 - PhraseModel_4 = -0.14308 - PhraseModel_5 = +0.0247 - PhraseModel_6 = -0.012 - PassThrough = -0.2161 + Glue = -0.236 + WordPenalty = +0.056111 + LanguageModel = +0.71011 + LanguageModel_OOV = -0.489 + PhraseModel_0 = -0.21332 + PhraseModel_1 = -0.13038 + PhraseModel_2 = +0.085148 + PhraseModel_3 = -0.16982 + PhraseModel_4 = -0.026332 + PhraseModel_5 = +0.2133 + PhraseModel_6 = +0.1002 + PassThrough = -0.5541 --- - 1best avg score: 0.16872 (+0.16872) - 1best avg model score: -1.8276 (-1.8276) - avg # pairs: 1121.1 - avg # rank err: 555.6 + 1best avg score: 0.16928 (+0.16928) + 1best avg model score: 2.4454 (+2.4454) + avg # pairs: 1616.2 + avg # rank err: 769.6 avg # margin viol: 0 - non0 feature count: 277 - avg list sz: 77.2 - avg f count: 90.96 -(time 0.1 min, 0.6 s/S) + non0 feature count: 4068 + avg list sz: 96.65 + avg f count: 118.01 +(time 1.3 min, 0.79 s/S) Iteration #2 of 3. - . 10 + .......... 100 WEIGHTS - Glue = -0.3526 - WordPenalty = +0.067576 - LanguageModel = +1.155 - LanguageModel_OOV = -0.2728 - PhraseModel_0 = -0.025529 - PhraseModel_1 = +0.095869 - PhraseModel_2 = +0.094567 - PhraseModel_3 = +0.12482 - PhraseModel_4 = -0.36533 - PhraseModel_5 = +0.1068 - PhraseModel_6 = -0.1517 - PassThrough = -0.286 + Glue = -0.1721 + WordPenalty = -0.14132 + LanguageModel = +0.56023 + LanguageModel_OOV = -0.6786 + PhraseModel_0 = +0.14155 + PhraseModel_1 = +0.34218 + PhraseModel_2 = +0.22954 + PhraseModel_3 = -0.24762 + PhraseModel_4 = -0.25848 + PhraseModel_5 = -0.0453 + PhraseModel_6 = -0.0264 + PassThrough = -0.7436 --- - 1best avg score: 0.18394 (+0.015221) - 1best avg model score: 3.205 (+5.0326) - avg # pairs: 1168.3 - avg # rank err: 594.8 + 1best avg score: 0.19585 (+0.02657) + 1best avg model score: -16.311 (-18.757) + avg # pairs: 1475.8 + avg # rank err: 668.48 avg # margin viol: 0 - non0 feature count: 543 - avg list sz: 77.5 - avg f count: 85.916 -(time 0.083 min, 0.5 s/S) + non0 feature count: 6300 + avg list sz: 96.08 + avg f count: 114.92 +(time 1.3 min, 0.76 s/S) Iteration #3 of 3. - . 10 + .......... 100 WEIGHTS - Glue = -0.392 - WordPenalty = +0.071963 - LanguageModel = +0.81266 - LanguageModel_OOV = -0.4177 - PhraseModel_0 = -0.2649 - PhraseModel_1 = -0.17931 - PhraseModel_2 = +0.038261 - PhraseModel_3 = +0.20261 - PhraseModel_4 = -0.42621 - PhraseModel_5 = +0.3198 - PhraseModel_6 = -0.1437 - PassThrough = -0.4309 + Glue = -0.1577 + WordPenalty = -0.086902 + LanguageModel = +0.30136 + LanguageModel_OOV = -0.7848 + PhraseModel_0 = +0.11743 + PhraseModel_1 = +0.11142 + PhraseModel_2 = -0.0053865 + PhraseModel_3 = -0.18731 + PhraseModel_4 = -0.67144 + PhraseModel_5 = +0.1236 + PhraseModel_6 = -0.2665 + PassThrough = -0.8498 --- - 1best avg score: 0.2962 (+0.11225) - 1best avg model score: -36.274 (-39.479) - avg # pairs: 1109.6 - avg # rank err: 515.9 + 1best avg score: 0.20034 (+0.0044978) + 1best avg model score: -7.2775 (+9.0336) + avg # pairs: 1578.6 + avg # rank err: 705.77 avg # margin viol: 0 - non0 feature count: 741 - avg list sz: 77 - avg f count: 88.982 -(time 0.083 min, 0.5 s/S) + non0 feature count: 7313 + avg list sz: 96.84 + avg f count: 124.48 +(time 1.5 min, 0.9 s/S) Writing weights file to '-' ... done --- -Best iteration: 3 [SCORE 'stupid_bleu'=0.2962]. -This took 0.26667 min. +Best iteration: 3 [SCORE 'stupid_bleu'=0.20034]. +This took 4.0833 min. diff --git a/python/setup.py b/python/setup.py index 7be976e8..cced4c1a 100644 --- a/python/setup.py +++ b/python/setup.py @@ -6,7 +6,7 @@ import glob INC = ['..', 'src/', '../decoder', '../utils', '../mteval'] LIB = ['../decoder', '../utils', '../mteval', '../training', '../klm/lm', '../klm/util'] -LINK_ARGS = [] +LINK_ARGS = ['-lz'] # Detect Boost BOOST_ROOT = os.getenv('BOOST_ROOT') -- cgit v1.2.3 From 104e23dd0b0795abab4565228537438481dc5a5b Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 9 Sep 2012 21:56:05 +0100 Subject: Fix dpmert deps --- dpmert/Jamfile | 1 + 1 file changed, 1 insertion(+) diff --git a/dpmert/Jamfile b/dpmert/Jamfile index bc4b079b..647caac8 100644 --- a/dpmert/Jamfile +++ b/dpmert/Jamfile @@ -16,6 +16,7 @@ lib dpmert : : : ..//utils ..//mteval + ..//decoder ../klm/lm//kenlm ..//boost_program_options . -- cgit v1.2.3 From 8882e9ebe158aef382bb5544559ef7f2a553db62 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:23:39 +0100 Subject: Update kenlm and build system --- bjam | 4 +- jam-files/fail/Jamroot | 4 + jam-files/sanity.jam | 9 ++ klm/lm/Jamfile | 11 +-- klm/lm/bhiksha.cc | 2 +- klm/lm/bhiksha.hh | 4 +- klm/lm/binary_format.cc | 4 +- klm/lm/binary_format.hh | 4 +- klm/lm/build_binary.cc | 9 +- klm/lm/max_order.hh | 2 +- klm/lm/model.cc | 21 +++-- klm/lm/model.hh | 2 +- klm/lm/partial.hh | 167 ++++++++++++++++++++++++++++++++++ klm/lm/partial_test.cc | 199 +++++++++++++++++++++++++++++++++++++++++ klm/lm/quantize.hh | 8 +- klm/lm/read_arpa.cc | 23 +++-- klm/lm/search_hashed.hh | 6 +- klm/lm/search_trie.hh | 4 +- klm/lm/state.hh | 2 + klm/lm/trie.cc | 4 +- klm/lm/trie.hh | 8 +- klm/lm/vocab.cc | 4 +- klm/lm/vocab.hh | 4 +- klm/util/Jamfile | 14 +-- klm/util/ersatz_progress.cc | 10 +-- klm/util/ersatz_progress.hh | 10 ++- klm/util/exception.cc | 3 + klm/util/exception.hh | 22 +++++ klm/util/file.cc | 7 +- klm/util/file_piece.cc | 1 - klm/util/probing_hash_table.hh | 5 +- 31 files changed, 502 insertions(+), 75 deletions(-) create mode 100644 jam-files/fail/Jamroot create mode 100644 klm/lm/partial.hh create mode 100644 klm/lm/partial_test.cc diff --git a/bjam b/bjam index d1ac8a55..2b0232c8 100755 --- a/bjam +++ b/bjam @@ -4,8 +4,8 @@ if bjam="$(which bjam 2>/dev/null)" && #exists [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" /dev/null && #bjam in path isn't this script - "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build) - "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough. + "${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null && #The test in jam-files/sanity.jam passes + (cd jam-files/fail && ! "${bjam}") >/dev/null #Returns non-zero on failure then #Delegate to system bjam exec "${bjam}" "$@" diff --git a/jam-files/fail/Jamroot b/jam-files/fail/Jamroot new file mode 100644 index 00000000..c3584d89 --- /dev/null +++ b/jam-files/fail/Jamroot @@ -0,0 +1,4 @@ +actions fail { + false +} +make fail : : fail ; diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 8ccfc65d..086f20ae 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -15,6 +15,13 @@ rule _shell ( cmd : extras * ) { return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ; } +rule shell_or_fail ( cmd ) { + local ret = [ SHELL $(cmd) : exit-status ] ; + if $(ret[2]) != 0 { + exit $(cmd) failed : 1 ; + } +} + cxxflags = [ os.environ "CXXFLAGS" ] ; cflags = [ os.environ "CFLAGS" ] ; ldflags = [ os.environ "LDFLAGS" ] ; @@ -275,3 +282,5 @@ if [ option.get "sanity-test" : : "yes" ] { EXIT "Bad" : 1 ; } } + +use-project /top : . ; diff --git a/klm/lm/Jamfile b/klm/lm/Jamfile index b1971d88..dd620068 100644 --- a/klm/lm/Jamfile +++ b/klm/lm/Jamfile @@ -2,13 +2,14 @@ lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quant import testing ; -run left_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa ; -run model_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa test_nounk.arpa ; +run left_test.cc ../util//kenutil kenlm /top//boost_unit_test_framework : : test.arpa ; +run model_test.cc ../util//kenutil kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ; exe query : ngram_query.cc kenlm ../util//kenutil ; exe build_binary : build_binary.cc kenlm ../util//kenutil ; +exe kenlm_max_order : max_order.cc : .. ; -install legacy : build_binary query - : $(TOP)/klm/lm EXE on shared:$(TOP)/klm/lm shared:LIB ; +alias programs : query build_binary kenlm_max_order ; -alias programs : build_binary query ; +install legacy : build_binary query kenlm_max_order + : $(TOP)/lm EXE on shared:$(TOP)/lm shared:LIB ; diff --git a/klm/lm/bhiksha.cc b/klm/lm/bhiksha.cc index 870a4eee..088ea98d 100644 --- a/klm/lm/bhiksha.cc +++ b/klm/lm/bhiksha.cc @@ -50,7 +50,7 @@ std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &con } } // namespace -std::size_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) { +uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) { return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */; } diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh index 9734f3ab..8ff88654 100644 --- a/klm/lm/bhiksha.hh +++ b/klm/lm/bhiksha.hh @@ -33,7 +33,7 @@ class DontBhiksha { static void UpdateConfigFromBinary(int /*fd*/, Config &/*config*/) {} - static std::size_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; } + static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; } static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) { return util::RequiredBits(max_next); @@ -67,7 +67,7 @@ class ArrayBhiksha { static void UpdateConfigFromBinary(int fd, Config &config); - static std::size_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); + static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config); diff --git a/klm/lm/binary_format.cc b/klm/lm/binary_format.cc index a56e998e..fd841e59 100644 --- a/klm/lm/binary_format.cc +++ b/klm/lm/binary_format.cc @@ -200,10 +200,10 @@ void SeekPastHeader(int fd, const Parameters ¶ms) { util::SeekOrThrow(fd, TotalHeaderSize(params.counts.size())); } -uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, std::size_t memory_size, Backing &backing) { +uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, uint64_t memory_size, Backing &backing) { const uint64_t file_size = util::SizeFile(backing.file.get()); // The header is smaller than a page, so we have to map the whole header as well. - std::size_t total_map = TotalHeaderSize(params.counts.size()) + memory_size; + std::size_t total_map = util::CheckOverflow(TotalHeaderSize(params.counts.size()) + memory_size); if (file_size != util::kBadSize && static_cast(file_size) < total_map) UTIL_THROW(FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map); diff --git a/klm/lm/binary_format.hh b/klm/lm/binary_format.hh index dd795f62..bf699d5f 100644 --- a/klm/lm/binary_format.hh +++ b/klm/lm/binary_format.hh @@ -70,7 +70,7 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet void SeekPastHeader(int fd, const Parameters ¶ms); -uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, std::size_t memory_size, Backing &backing); +uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, uint64_t memory_size, Backing &backing); void ComplainAboutARPA(const Config &config, ModelType model_type); @@ -90,7 +90,7 @@ template void LoadLM(const char *file, const Config &config, To &to) new_config.probing_multiplier = params.fixed.probing_multiplier; detail::SeekPastHeader(backing.file.get(), params); To::UpdateConfigFromBinary(backing.file.get(), params.counts, new_config); - std::size_t memory_size = To::Size(params.counts, new_config); + uint64_t memory_size = To::Size(params.counts, new_config); uint8_t *start = detail::SetupBinary(new_config, params, memory_size, backing); to.InitializeFromBinary(start, params, new_config, backing.file.get()); } else { diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc index c2ca1101..efe99899 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary.cc @@ -8,7 +8,6 @@ #include #include -#include #ifdef WIN32 #include "util/getopt.hh" @@ -86,16 +85,16 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) { std::vector counts; util::FilePiece f(file); lm::ReadARPACounts(f, counts); - std::size_t sizes[6]; + uint64_t sizes[6]; sizes[0] = ProbingModel::Size(counts, config); sizes[1] = RestProbingModel::Size(counts, config); sizes[2] = TrieModel::Size(counts, config); sizes[3] = QuantTrieModel::Size(counts, config); sizes[4] = ArrayTrieModel::Size(counts, config); sizes[5] = QuantArrayTrieModel::Size(counts, config); - std::size_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); - std::size_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(size_t)); - std::size_t divide; + uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t)); + uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t)); + uint64_t divide; char prefix; if (min_length < (1 << 10) * 10) { prefix = ' '; diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index bc8687cd..989f8324 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -8,5 +8,5 @@ #define KENLM_MAX_ORDER 6 #endif #ifndef KENLM_ORDER_MESSAGE -#define KENLM_ORDER_MESSAGE "Edit klm/lm/max_order.hh." +#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --kenlm-max-order=6 -a'. Otherwise, edit lm/max_order.hh." #endif diff --git a/klm/lm/model.cc b/klm/lm/model.cc index b46333a4..40af8a63 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -12,6 +12,7 @@ #include #include #include +#include namespace lm { namespace ngram { @@ -19,17 +20,18 @@ namespace detail { template const ModelType GenericModel::kModelType = Search::kModelType; -template size_t GenericModel::Size(const std::vector &counts, const Config &config) { +template uint64_t GenericModel::Size(const std::vector &counts, const Config &config) { return VocabularyT::Size(counts[0], config) + Search::Size(counts, config); } template void GenericModel::SetupMemory(void *base, const std::vector &counts, const Config &config) { + size_t goal_size = util::CheckOverflow(Size(counts, config)); uint8_t *start = static_cast(base); size_t allocated = VocabularyT::Size(counts[0], config); vocab_.SetupMemory(start, allocated, counts[0], config); start += allocated; start = search_.SetupMemory(start, counts, config); - if (static_cast(start - static_cast(base)) != Size(counts, config)) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast(base)) << " but Size says they should take " << Size(counts, config)); + if (static_cast(start - static_cast(base)) != goal_size) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast(base)) << " but Size says they should take " << goal_size); } template GenericModel::GenericModel(const char *file, const Config &config) { @@ -49,13 +51,18 @@ template GenericModel::Ge } namespace { -void CheckMaxOrder(size_t order) { - UTIL_THROW_IF(order > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << order << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE); +void CheckCounts(const std::vector &counts) { + UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE); + if (sizeof(uint64_t) > sizeof(std::size_t)) { + for (std::vector::const_iterator i = counts.begin(); i != counts.end(); ++i) { + UTIL_THROW_IF(*i > static_cast(std::numeric_limits::max()), util::OverflowException, "This model has " << *i << " " << (i - counts.begin() + 1) << "-grams which is too many for 32-bit machines."); + } + } } } // namespace template void GenericModel::InitializeFromBinary(void *start, const Parameters ¶ms, const Config &config, int fd) { - CheckMaxOrder(params.counts.size()); + CheckCounts(params.counts); SetupMemory(start, params.counts, config); vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab); search_.LoadedBinary(); @@ -68,11 +75,11 @@ template void GenericModel counts; // File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_. ReadARPACounts(f, counts); - CheckMaxOrder(counts.size()); + CheckCounts(counts); if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model."); if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0"); - std::size_t vocab_size = VocabularyT::Size(counts[0], config); + std::size_t vocab_size = util::CheckOverflow(VocabularyT::Size(counts[0], config)); // Setup the binary file for writing the vocab lookup table. The search_ is responsible for growing the binary file to its needs. vocab_.SetupMemory(SetupJustVocab(config, counts.size(), vocab_size, backing_), vocab_size, counts[0], config); diff --git a/klm/lm/model.hh b/klm/lm/model.hh index 6dee9419..13ff864e 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -41,7 +41,7 @@ template class GenericModel : public base::Mod * does not include small non-mapped control structures, such as this class * itself. */ - static size_t Size(const std::vector &counts, const Config &config = Config()); + static uint64_t Size(const std::vector &counts, const Config &config = Config()); /* Load the model from a file. It may be an ARPA or binary file. Binary * files must have the format expected by this class or you'll get an diff --git a/klm/lm/partial.hh b/klm/lm/partial.hh new file mode 100644 index 00000000..1dede359 --- /dev/null +++ b/klm/lm/partial.hh @@ -0,0 +1,167 @@ +#ifndef LM_PARTIAL__ +#define LM_PARTIAL__ + +#include "lm/return.hh" +#include "lm/state.hh" + +#include + +#include + +namespace lm { +namespace ngram { + +struct ExtendReturn { + float adjust; + bool make_full; + unsigned char next_use; +}; + +template ExtendReturn ExtendLoop( + const Model &model, + unsigned char seen, const WordIndex *add_rbegin, const WordIndex *add_rend, const float *backoff_start, + const uint64_t *pointers, const uint64_t *pointers_end, + uint64_t *&pointers_write, + float *backoff_write) { + unsigned char add_length = add_rend - add_rbegin; + + float backoff_buf[2][KENLM_MAX_ORDER - 1]; + float *backoff_in = backoff_buf[0], *backoff_out = backoff_buf[1]; + std::copy(backoff_start, backoff_start + add_length, backoff_in); + + ExtendReturn value; + value.make_full = false; + value.adjust = 0.0; + value.next_use = add_length; + + unsigned char i = 0; + unsigned char length = pointers_end - pointers; + // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities. + if (pointers_write) { + // Using full context, writing to new left state. + for (; i < length; ++i) { + FullScoreReturn ret(model.ExtendLeft( + add_rbegin, add_rbegin + value.next_use, + backoff_in, + pointers[i], i + seen + 1, + backoff_out, + value.next_use)); + std::swap(backoff_in, backoff_out); + if (ret.independent_left) { + value.adjust += ret.prob; + value.make_full = true; + ++i; + break; + } + value.adjust += ret.rest; + *pointers_write++ = ret.extend_left; + if (value.next_use != add_length) { + value.make_full = true; + ++i; + break; + } + } + } + // Using some of the new context. + for (; i < length && value.next_use; ++i) { + FullScoreReturn ret(model.ExtendLeft( + add_rbegin, add_rbegin + value.next_use, + backoff_in, + pointers[i], i + seen + 1, + backoff_out, + value.next_use)); + std::swap(backoff_in, backoff_out); + value.adjust += ret.prob; + } + float unrest = model.UnRest(pointers + i, pointers_end, i + seen + 1); + // Using none of the new context. + value.adjust += unrest; + + std::copy(backoff_in, backoff_in + value.next_use, backoff_write); + return value; +} + +template float RevealBefore(const Model &model, const Right &reveal, const unsigned char seen, bool reveal_full, Left &left, Right &right) { + assert(seen < reveal.length || reveal_full); + uint64_t *pointers_write = reveal_full ? NULL : left.pointers; + float backoff_buffer[KENLM_MAX_ORDER - 1]; + ExtendReturn value(ExtendLoop( + model, + seen, reveal.words + seen, reveal.words + reveal.length, reveal.backoff + seen, + left.pointers, left.pointers + left.length, + pointers_write, + left.full ? backoff_buffer : (right.backoff + right.length))); + if (reveal_full) { + left.length = 0; + value.make_full = true; + } else { + left.length = pointers_write - left.pointers; + value.make_full |= (left.length == model.Order() - 1); + } + if (left.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i]; + } else { + // If left wasn't full when it came in, put words into right state. + std::copy(reveal.words + seen, reveal.words + seen + value.next_use, right.words + right.length); + right.length += value.next_use; + left.full = value.make_full || (right.length == model.Order() - 1); + } + return value.adjust; +} + +template float RevealAfter(const Model &model, Left &left, Right &right, const Left &reveal, unsigned char seen) { + assert(seen < reveal.length || reveal.full); + uint64_t *pointers_write = left.full ? NULL : (left.pointers + left.length); + ExtendReturn value(ExtendLoop( + model, + seen, right.words, right.words + right.length, right.backoff, + reveal.pointers + seen, reveal.pointers + reveal.length, + pointers_write, + right.backoff)); + if (reveal.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += right.backoff[i]; + right.length = 0; + value.make_full = true; + } else { + right.length = value.next_use; + value.make_full |= (right.length == model.Order() - 1); + } + if (!left.full) { + left.length = pointers_write - left.pointers; + left.full = value.make_full || (left.length == model.Order() - 1); + } + return value.adjust; +} + +template float Subsume(const Model &model, Left &first_left, const Right &first_right, const Left &second_left, Right &second_right, const unsigned int between_length) { + assert(first_right.length < KENLM_MAX_ORDER); + assert(second_left.length < KENLM_MAX_ORDER); + assert(between_length < KENLM_MAX_ORDER - 1); + uint64_t *pointers_write = first_left.full ? NULL : (first_left.pointers + first_left.length); + float backoff_buffer[KENLM_MAX_ORDER - 1]; + ExtendReturn value(ExtendLoop( + model, + between_length, first_right.words, first_right.words + first_right.length, first_right.backoff, + second_left.pointers, second_left.pointers + second_left.length, + pointers_write, + second_left.full ? backoff_buffer : (second_right.backoff + second_right.length))); + if (second_left.full) { + for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i]; + } else { + std::copy(first_right.words, first_right.words + value.next_use, second_right.words + second_right.length); + second_right.length += value.next_use; + value.make_full |= (second_right.length == model.Order() - 1); + } + if (!first_left.full) { + first_left.length = pointers_write - first_left.pointers; + first_left.full = value.make_full || second_left.full || (first_left.length == model.Order() - 1); + } + assert(first_left.length < KENLM_MAX_ORDER); + assert(second_right.length < KENLM_MAX_ORDER); + return value.adjust; +} + +} // namespace ngram +} // namespace lm + +#endif // LM_PARTIAL__ diff --git a/klm/lm/partial_test.cc b/klm/lm/partial_test.cc new file mode 100644 index 00000000..8d309c85 --- /dev/null +++ b/klm/lm/partial_test.cc @@ -0,0 +1,199 @@ +#include "lm/partial.hh" + +#include "lm/left.hh" +#include "lm/model.hh" +#include "util/tokenize_piece.hh" + +#define BOOST_TEST_MODULE PartialTest +#include +#include + +namespace lm { +namespace ngram { +namespace { + +const char *TestLocation() { + if (boost::unit_test::framework::master_test_suite().argc < 2) { + return "test.arpa"; + } + return boost::unit_test::framework::master_test_suite().argv[1]; +} + +Config SilentConfig() { + Config config; + config.arpa_complain = Config::NONE; + config.messages = NULL; + return config; +} + +struct ModelFixture { + ModelFixture() : m(TestLocation(), SilentConfig()) {} + + RestProbingModel m; +}; + +BOOST_FIXTURE_TEST_SUITE(suite, ModelFixture) + +BOOST_AUTO_TEST_CASE(SimpleBefore) { + Left left; + left.full = false; + left.length = 0; + Right right; + right.length = 0; + + Right reveal; + reveal.length = 1; + WordIndex period = m.GetVocabulary().Index("."); + reveal.words[0] = period; + reveal.backoff[0] = -0.845098; + + BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 0, false, left, right), 0.001); + BOOST_CHECK_EQUAL(0, left.length); + BOOST_CHECK(!left.full); + BOOST_CHECK_EQUAL(1, right.length); + BOOST_CHECK_EQUAL(period, right.words[0]); + BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001); + + WordIndex more = m.GetVocabulary().Index("more"); + reveal.words[1] = more; + reveal.backoff[1] = -0.4771212; + reveal.length = 2; + BOOST_CHECK_CLOSE(0.0, RevealBefore(m, reveal, 1, false, left, right), 0.001); + BOOST_CHECK_EQUAL(0, left.length); + BOOST_CHECK(!left.full); + BOOST_CHECK_EQUAL(2, right.length); + BOOST_CHECK_EQUAL(period, right.words[0]); + BOOST_CHECK_EQUAL(more, right.words[1]); + BOOST_CHECK_CLOSE(-0.845098, right.backoff[0], 0.001); + BOOST_CHECK_CLOSE(-0.4771212, right.backoff[1], 0.001); +} + +BOOST_AUTO_TEST_CASE(AlsoWouldConsider) { + WordIndex would = m.GetVocabulary().Index("would"); + WordIndex consider = m.GetVocabulary().Index("consider"); + + ChartState current; + current.left.length = 1; + current.left.pointers[0] = would; + current.left.full = false; + current.right.length = 1; + current.right.words[0] = would; + current.right.backoff[0] = -0.30103; + + Left after; + after.full = false; + after.length = 1; + after.pointers[0] = consider; + + // adjustment for would consider + BOOST_CHECK_CLOSE(-1.687872 - -0.2922095 - 0.30103, RevealAfter(m, current.left, current.right, after, 0), 0.001); + + BOOST_CHECK_EQUAL(2, current.left.length); + BOOST_CHECK_EQUAL(would, current.left.pointers[0]); + BOOST_CHECK_EQUAL(false, current.left.full); + + WordIndex also = m.GetVocabulary().Index("also"); + Right before; + before.length = 1; + before.words[0] = also; + before.backoff[0] = -0.30103; + // r(would) = -0.2922095 [i would], r(would -> consider) = -1.988902 [b(would) + p(consider)] + // p(also -> would) = -2, p(also would -> consider) = -3 + BOOST_CHECK_CLOSE(-2 + 0.2922095 -3 + 1.988902, RevealBefore(m, before, 0, false, current.left, current.right), 0.001); + BOOST_CHECK_EQUAL(0, current.left.length); + BOOST_CHECK(current.left.full); + BOOST_CHECK_EQUAL(2, current.right.length); + BOOST_CHECK_EQUAL(would, current.right.words[0]); + BOOST_CHECK_EQUAL(also, current.right.words[1]); +} + +BOOST_AUTO_TEST_CASE(EndSentence) { + WordIndex loin = m.GetVocabulary().Index("loin"); + WordIndex period = m.GetVocabulary().Index("."); + WordIndex eos = m.GetVocabulary().EndSentence(); + + ChartState between; + between.left.length = 1; + between.left.pointers[0] = eos; + between.left.full = true; + between.right.length = 0; + + Right before; + before.words[0] = period; + before.words[1] = loin; + before.backoff[0] = -0.845098; + before.backoff[1] = 0.0; + + before.length = 1; + BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001); + BOOST_CHECK_EQUAL(0, between.left.length); +} + +float ScoreFragment(const RestProbingModel &model, unsigned int *begin, unsigned int *end, ChartState &out) { + RuleScore scorer(model, out); + for (unsigned int *i = begin; i < end; ++i) { + scorer.Terminal(*i); + } + return scorer.Finish(); +} + +void CheckAdjustment(const RestProbingModel &model, float expect, const Right &before_in, bool before_full, ChartState between, const Left &after_in) { + Right before(before_in); + Left after(after_in); + after.full = false; + float got = 0.0; + for (unsigned int i = 1; i < 5; ++i) { + if (before_in.length >= i) { + before.length = i; + got += RevealBefore(model, before, i - 1, false, between.left, between.right); + } + if (after_in.length >= i) { + after.length = i; + got += RevealAfter(model, between.left, between.right, after, i - 1); + } + } + if (after_in.full) { + after.full = true; + got += RevealAfter(model, between.left, between.right, after, after.length); + } + if (before_full) { + got += RevealBefore(model, before, before.length, true, between.left, between.right); + } + // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this. + BOOST_CHECK(fabs(expect - got) < 0.001); +} + +void FullDivide(const RestProbingModel &model, StringPiece str) { + std::vector indices; + for (util::TokenIter i(str, ' '); i; ++i) { + indices.push_back(model.GetVocabulary().Index(*i)); + } + ChartState full_state; + float full = ScoreFragment(model, &indices.front(), &indices.back() + 1, full_state); + + ChartState before_state; + before_state.left.full = false; + RuleScore before_scorer(model, before_state); + float before_score = 0.0; + for (unsigned int before = 0; before < indices.size(); ++before) { + for (unsigned int after = before; after <= indices.size(); ++after) { + ChartState after_state, between_state; + float after_score = ScoreFragment(model, &indices.front() + after, &indices.front() + indices.size(), after_state); + float between_score = ScoreFragment(model, &indices.front() + before, &indices.front() + after, between_state); + CheckAdjustment(model, full - before_score - after_score - between_score, before_state.right, before_state.left.full, between_state, after_state.left); + } + before_scorer.Terminal(indices[before]); + before_score = before_scorer.Finish(); + } +} + +BOOST_AUTO_TEST_CASE(Strings) { + FullDivide(m, "also would consider"); + FullDivide(m, "looking on a little more loin . "); + FullDivide(m, "in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown "); +} + +BOOST_AUTO_TEST_SUITE_END() +} // namespace +} // namespace ngram +} // namespace lm diff --git a/klm/lm/quantize.hh b/klm/lm/quantize.hh index abed0112..8ce2378a 100644 --- a/klm/lm/quantize.hh +++ b/klm/lm/quantize.hh @@ -24,7 +24,7 @@ class DontQuantize { public: static const ModelType kModelTypeAdd = static_cast(0); static void UpdateConfigFromBinary(int, const std::vector &, Config &) {} - static std::size_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; } + static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; } static uint8_t MiddleBits(const Config &/*config*/) { return 63; } static uint8_t LongestBits(const Config &/*config*/) { return 31; } @@ -138,9 +138,9 @@ class SeparatelyQuantize { static void UpdateConfigFromBinary(int fd, const std::vector &counts, Config &config); - static std::size_t Size(uint8_t order, const Config &config) { - size_t longest_table = (static_cast(1) << static_cast(config.prob_bits)) * sizeof(float); - size_t middle_table = (static_cast(1) << static_cast(config.backoff_bits)) * sizeof(float) + longest_table; + static uint64_t Size(uint8_t order, const Config &config) { + uint64_t longest_table = (static_cast(1) << static_cast(config.prob_bits)) * sizeof(float); + uint64_t middle_table = (static_cast(1) << static_cast(config.backoff_bits)) * sizeof(float) + longest_table; // unigrams are currently not quantized so no need for a table. return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8; } diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 70727e4c..174bd3a3 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -2,12 +2,13 @@ #include "lm/blank.hh" +#include #include #include +#include #include #include -#include #include #include @@ -31,6 +32,16 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) { const char kBinaryMagic[] = "mmap lm http://kheafield.com/code"; +// strtoull isn't portable enough :-( +uint64_t ReadCount(const std::string &from) { + std::stringstream stream(from); + uint64_t ret; + stream >> ret; + UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from); + UTIL_THROW_IF(static_cast(stream.tellg()) != from.size(), FormatLoadException, "Extra content in count: '" << from << "'"); + return ret; +} + } // namespace void ReadARPACounts(util::FilePiece &in, std::vector &number) { @@ -52,15 +63,11 @@ void ReadARPACounts(util::FilePiece &in, std::vector &number) { // So strtol doesn't go off the end of line. std::string remaining(line.data() + 6, line.size() - 6); char *end_ptr; - unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10); + unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10); if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line); if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line); ++end_ptr; - const char *start = end_ptr; - long int count = std::strtol(start, &end_ptr, 10); - if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count); - if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line); - number.push_back(count); + number.push_back(ReadCount(end_ptr)); } } @@ -103,7 +110,7 @@ void ReadBackoff(util::FilePiece &in, float &backoff) { int float_class = _fpclass(backoff); UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff); #else - int float_class = fpclassify(backoff); + int float_class = std::fpclassify(backoff); UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff); #endif } diff --git a/klm/lm/search_hashed.hh b/klm/lm/search_hashed.hh index 7e8c1220..3bcde921 100644 --- a/klm/lm/search_hashed.hh +++ b/klm/lm/search_hashed.hh @@ -74,8 +74,8 @@ template class HashedSearch { // TODO: move probing_multiplier here with next binary file format update. static void UpdateConfigFromBinary(int, const std::vector &, Config &) {} - static std::size_t Size(const std::vector &counts, const Config &config) { - std::size_t ret = Unigram::Size(counts[0]); + static uint64_t Size(const std::vector &counts, const Config &config) { + uint64_t ret = Unigram::Size(counts[0]); for (unsigned char n = 1; n < counts.size() - 1; ++n) { ret += Middle::Size(counts[n], config.probing_multiplier); } @@ -160,7 +160,7 @@ template class HashedSearch { #endif {} - static std::size_t Size(uint64_t count) { + static uint64_t Size(uint64_t count) { return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate } diff --git a/klm/lm/search_trie.hh b/klm/lm/search_trie.hh index 10b22ab1..1264baf5 100644 --- a/klm/lm/search_trie.hh +++ b/klm/lm/search_trie.hh @@ -44,8 +44,8 @@ template class TrieSearch { Bhiksha::UpdateConfigFromBinary(fd, config); } - static std::size_t Size(const std::vector &counts, const Config &config) { - std::size_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]); + static uint64_t Size(const std::vector &counts, const Config &config) { + uint64_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]); for (unsigned char i = 1; i < counts.size() - 1; ++i) { ret += Middle::Size(Quant::MiddleBits(config), counts[i], counts[0], counts[i+1], config); } diff --git a/klm/lm/state.hh b/klm/lm/state.hh index 830e40aa..551510a8 100644 --- a/klm/lm/state.hh +++ b/klm/lm/state.hh @@ -47,6 +47,8 @@ class State { unsigned char length; }; +typedef State Right; + inline uint64_t hash_value(const State &state, uint64_t seed = 0) { return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed); } diff --git a/klm/lm/trie.cc b/klm/lm/trie.cc index 0f1ca574..d9895f89 100644 --- a/klm/lm/trie.cc +++ b/klm/lm/trie.cc @@ -36,7 +36,7 @@ bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_ } } // namespace -std::size_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) { +uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) { uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits; // Extra entry for next pointer at the end. // +7 then / 8 to round up bits and convert to bytes @@ -57,7 +57,7 @@ void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits) max_vocab_ = max_vocab; } -template std::size_t BitPackedMiddle::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { +template uint64_t BitPackedMiddle::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) { return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config)); } diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh index 034a1414..9ea3c546 100644 --- a/klm/lm/trie.hh +++ b/klm/lm/trie.hh @@ -49,7 +49,7 @@ class Unigram { unigram_ = static_cast(start); } - static std::size_t Size(uint64_t count) { + static uint64_t Size(uint64_t count) { // +1 in case unknown doesn't appear. +1 for the final next. return (count + 2) * sizeof(UnigramValue); } @@ -84,7 +84,7 @@ class BitPacked { } protected: - static std::size_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits); + static uint64_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits); void BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits); @@ -99,7 +99,7 @@ class BitPacked { template class BitPackedMiddle : public BitPacked { public: - static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config); + static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config); // next_source need not be initialized. BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config); @@ -128,7 +128,7 @@ template class BitPackedMiddle : public BitPacked { class BitPackedLongest : public BitPacked { public: - static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) { + static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) { return BaseSize(entries, max_vocab, quant_bits); } diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc index 5de68f16..398475be 100644 --- a/klm/lm/vocab.cc +++ b/klm/lm/vocab.cc @@ -87,7 +87,7 @@ void WriteWordsWrapper::Write(int fd) { SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {} -std::size_t SortedVocabulary::Size(std::size_t entries, const Config &/*config*/) { +uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) { // Lead with the number of entries. return sizeof(uint64_t) + sizeof(uint64_t) * entries; } @@ -165,7 +165,7 @@ struct ProbingVocabularyHeader { ProbingVocabulary::ProbingVocabulary() : enumerate_(NULL) {} -std::size_t ProbingVocabulary::Size(std::size_t entries, const Config &config) { +uint64_t ProbingVocabulary::Size(uint64_t entries, const Config &config) { return ALIGN8(sizeof(detail::ProbingVocabularyHeader)) + Lookup::Size(entries, config.probing_multiplier); } diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh index a25432f9..074cd446 100644 --- a/klm/lm/vocab.hh +++ b/klm/lm/vocab.hh @@ -62,7 +62,7 @@ class SortedVocabulary : public base::Vocabulary { } // Size for purposes of file writing - static size_t Size(std::size_t entries, const Config &config); + static uint64_t Size(uint64_t entries, const Config &config); // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary. WordIndex Bound() const { return bound_; } @@ -129,7 +129,7 @@ class ProbingVocabulary : public base::Vocabulary { return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0; } - static size_t Size(std::size_t entries, const Config &config); + static uint64_t Size(uint64_t entries, const Config &config); // Vocab words are [0, Bound()). WordIndex Bound() const { return bound_; } diff --git a/klm/util/Jamfile b/klm/util/Jamfile index 3ee2c2c2..a939265f 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,10 +1,10 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : .. : : .. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc /top//z : .. : : .. ; import testing ; -unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ; -run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ; -unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ; +unit-test bit_packing_test : bit_packing_test.cc kenutil /top//boost_unit_test_framework ; +run file_piece_test.cc kenutil /top//boost_unit_test_framework : : file_piece.cc ; +unit-test joint_sort_test : joint_sort_test.cc kenutil /top//boost_unit_test_framework ; +unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil /top//boost_unit_test_framework ; +unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil /top//boost_unit_test_framework ; +unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil /top//boost_unit_test_framework ; diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc index 07b14e26..eb635ad8 100644 --- a/klm/util/ersatz_progress.cc +++ b/klm/util/ersatz_progress.cc @@ -9,16 +9,16 @@ namespace util { namespace { const unsigned char kWidth = 100; } -ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} +ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} ErsatzProgress::~ErsatzProgress() { if (out_) Finished(); } -ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) +ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message) : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) { if (!out_) { - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); return; } if (!message.empty()) *out_ << message << '\n'; @@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std void ErsatzProgress::Milestone() { if (!out_) { current_ = 0; return; } if (!complete_) return; - unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); + unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); for (; stones_written_ < stone; ++stones_written_) { (*out_) << '*'; } if (stone == kWidth) { (*out_) << std::endl; - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); out_ = NULL; } else { next_ = std::max(next_, (stone * complete_) / kWidth); diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh index f709dc51..ff4d590f 100644 --- a/klm/util/ersatz_progress.hh +++ b/klm/util/ersatz_progress.hh @@ -4,6 +4,8 @@ #include #include +#include + // Ersatz version of boost::progress so core language model doesn't depend on // boost. Also adds option to print nothing. @@ -14,7 +16,7 @@ class ErsatzProgress { ErsatzProgress(); // Null means no output. The null value is useful for passing along the ostream pointer from another caller. - explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); + explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); ~ErsatzProgress(); @@ -23,12 +25,12 @@ class ErsatzProgress { return *this; } - ErsatzProgress &operator+=(std::size_t amount) { + ErsatzProgress &operator+=(uint64_t amount) { if ((current_ += amount) >= next_) Milestone(); return *this; } - void Set(std::size_t to) { + void Set(uint64_t to) { if ((current_ = to) >= next_) Milestone(); Milestone(); } @@ -40,7 +42,7 @@ class ErsatzProgress { private: void Milestone(); - std::size_t current_, next_, complete_; + uint64_t current_, next_, complete_; unsigned char stones_written_; std::ostream *out_; diff --git a/klm/util/exception.cc b/klm/util/exception.cc index c4f8c04c..3806e6de 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() { } EndOfFileException::~EndOfFileException() throw() {} +OverflowException::OverflowException() throw() {} +OverflowException::~OverflowException() throw() {} + } // namespace util diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 6d6a37cb..83f99cd6 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -2,9 +2,12 @@ #define UTIL_EXCEPTION__ #include +#include #include #include +#include + namespace util { template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); @@ -111,6 +114,25 @@ class EndOfFileException : public Exception { ~EndOfFileException() throw(); }; +class OverflowException : public Exception { + public: + OverflowException() throw(); + ~OverflowException() throw(); +}; + +template inline std::size_t CheckOverflowInternal(uint64_t value) { + UTIL_THROW_IF(value > static_cast(std::numeric_limits::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code."); + return value; +} + +template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) { + return value; +} + +inline std::size_t CheckOverflow(uint64_t value) { + return CheckOverflowInternal(value); +} + } // namespace util #endif // UTIL_EXCEPTION__ diff --git a/klm/util/file.cc b/klm/util/file.cc index 98f13983..ff5e64c9 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -119,8 +119,13 @@ void FSyncOrThrow(int fd) { } namespace { -void InternalSeek(int fd, off_t off, int whence) { +void InternalSeek(int fd, int64_t off, int whence) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed"); + +#else UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed"); +#endif } } // namespace diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index af341d6d..19a68728 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 3354b68e..770faa7e 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -8,6 +8,7 @@ #include #include +#include namespace util { @@ -42,8 +43,8 @@ template (multiplier * static_cast(entries))); + static uint64_t Size(uint64_t entries, float multiplier) { + uint64_t buckets = std::max(entries + 1, static_cast(multiplier * static_cast(entries))); return buckets * sizeof(Entry); } -- cgit v1.2.3 From 45c9efc7d558dbe160056f02e74df1fee5d2d0e5 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:30:16 +0100 Subject: Add search library to cdec (not used yet) --- klm/search/Jamfile | 5 ++ klm/search/arity.hh | 8 ++ klm/search/config.hh | 25 +++++++ klm/search/context.hh | 66 +++++++++++++++++ klm/search/edge.hh | 54 ++++++++++++++ klm/search/edge_generator.cc | 129 ++++++++++++++++++++++++++++++++ klm/search/edge_generator.hh | 54 ++++++++++++++ klm/search/final.hh | 40 ++++++++++ klm/search/rule.cc | 55 ++++++++++++++ klm/search/rule.hh | 60 +++++++++++++++ klm/search/source.hh | 48 ++++++++++++ klm/search/types.hh | 18 +++++ klm/search/vertex.cc | 48 ++++++++++++ klm/search/vertex.hh | 165 +++++++++++++++++++++++++++++++++++++++++ klm/search/vertex_generator.cc | 99 +++++++++++++++++++++++++ klm/search/vertex_generator.hh | 70 +++++++++++++++++ klm/search/weights.cc | 69 +++++++++++++++++ klm/search/weights.hh | 49 ++++++++++++ klm/search/weights_test.cc | 38 ++++++++++ klm/search/word.hh | 47 ++++++++++++ 20 files changed, 1147 insertions(+) create mode 100644 klm/search/Jamfile create mode 100644 klm/search/arity.hh create mode 100644 klm/search/config.hh create mode 100644 klm/search/context.hh create mode 100644 klm/search/edge.hh create mode 100644 klm/search/edge_generator.cc create mode 100644 klm/search/edge_generator.hh create mode 100644 klm/search/final.hh create mode 100644 klm/search/rule.cc create mode 100644 klm/search/rule.hh create mode 100644 klm/search/source.hh create mode 100644 klm/search/types.hh create mode 100644 klm/search/vertex.cc create mode 100644 klm/search/vertex.hh create mode 100644 klm/search/vertex_generator.cc create mode 100644 klm/search/vertex_generator.hh create mode 100644 klm/search/weights.cc create mode 100644 klm/search/weights.hh create mode 100644 klm/search/weights_test.cc create mode 100644 klm/search/word.hh diff --git a/klm/search/Jamfile b/klm/search/Jamfile new file mode 100644 index 00000000..ac47c249 --- /dev/null +++ b/klm/search/Jamfile @@ -0,0 +1,5 @@ +lib search : weights.cc vertex.cc vertex_generator.cc edge_generator.cc rule.cc ../lm//kenlm ../util//kenutil : : : .. ; + +import testing ; + +unit-test weights_test : weights_test.cc search /top//boost_unit_test_framework ; diff --git a/klm/search/arity.hh b/klm/search/arity.hh new file mode 100644 index 00000000..09c2c671 --- /dev/null +++ b/klm/search/arity.hh @@ -0,0 +1,8 @@ +#ifndef SEARCH_ARITY__ +#define SEARCH_ARITY__ +namespace search { + +const unsigned int kMaxArity = 2; + +} // namespace search +#endif // SEARCH_ARITY__ diff --git a/klm/search/config.hh b/klm/search/config.hh new file mode 100644 index 00000000..e21e4b7c --- /dev/null +++ b/klm/search/config.hh @@ -0,0 +1,25 @@ +#ifndef SEARCH_CONFIG__ +#define SEARCH_CONFIG__ + +#include "search/weights.hh" +#include "util/string_piece.hh" + +namespace search { + +class Config { + public: + Config(StringPiece weight_str, unsigned int pop_limit) : + weights_(weight_str), pop_limit_(pop_limit) {} + + const Weights &GetWeights() const { return weights_; } + + unsigned int PopLimit() const { return pop_limit_; } + + private: + search::Weights weights_; + unsigned int pop_limit_; +}; + +} // namespace search + +#endif // SEARCH_CONFIG__ diff --git a/klm/search/context.hh b/klm/search/context.hh new file mode 100644 index 00000000..ae248549 --- /dev/null +++ b/klm/search/context.hh @@ -0,0 +1,66 @@ +#ifndef SEARCH_CONTEXT__ +#define SEARCH_CONTEXT__ + +#include "lm/model.hh" +#include "search/config.hh" +#include "search/final.hh" +#include "search/types.hh" +#include "search/vertex.hh" +#include "search/word.hh" +#include "util/exception.hh" + +#include +#include + +#include + +namespace search { + +class Weights; + +class ContextBase { + public: + explicit ContextBase(const Config &config) : pop_limit_(config.PopLimit()), weights_(config.GetWeights()) {} + + Final *NewFinal() { + Final *ret = final_pool_.construct(); + assert(ret); + return ret; + } + + VertexNode *NewVertexNode() { + VertexNode *ret = vertex_node_pool_.construct(); + assert(ret); + return ret; + } + + void DeleteVertexNode(VertexNode *node) { + vertex_node_pool_.destroy(node); + } + + unsigned int PopLimit() const { return pop_limit_; } + + const Weights &GetWeights() const { return weights_; } + + private: + boost::object_pool final_pool_; + boost::object_pool vertex_node_pool_; + + unsigned int pop_limit_; + + const Weights &weights_; +}; + +template class Context : public ContextBase { + public: + Context(const Config &config, const Model &model) : ContextBase(config), model_(model) {} + + const Model &LanguageModel() const { return model_; } + + private: + const Model &model_; +}; + +} // namespace search + +#endif // SEARCH_CONTEXT__ diff --git a/klm/search/edge.hh b/klm/search/edge.hh new file mode 100644 index 00000000..4d2a5cbf --- /dev/null +++ b/klm/search/edge.hh @@ -0,0 +1,54 @@ +#ifndef SEARCH_EDGE__ +#define SEARCH_EDGE__ + +#include "lm/state.hh" +#include "search/arity.hh" +#include "search/rule.hh" +#include "search/types.hh" +#include "search/vertex.hh" + +#include + +namespace search { + +class Edge { + public: + Edge() { + end_to_ = to_; + } + + Rule &InitRule() { return rule_; } + + void Add(Vertex &vertex) { + assert(end_to_ - to_ < kMaxArity); + *(end_to_++) = &vertex; + } + + const Vertex &GetVertex(std::size_t index) const { + return *to_[index]; + } + + const Rule &GetRule() const { return rule_; } + + private: + // Rule and pointers to rule arguments. + Rule rule_; + + Vertex *to_[kMaxArity]; + Vertex **end_to_; +}; + +struct PartialEdge { + Score score; + // Terminals + lm::ngram::ChartState between[kMaxArity + 1]; + // Non-terminals + PartialVertex nt[kMaxArity]; + + bool operator<(const PartialEdge &other) const { + return score < other.score; + } +}; + +} // namespace search +#endif // SEARCH_EDGE__ diff --git a/klm/search/edge_generator.cc b/klm/search/edge_generator.cc new file mode 100644 index 00000000..d135899a --- /dev/null +++ b/klm/search/edge_generator.cc @@ -0,0 +1,129 @@ +#include "search/edge_generator.hh" + +#include "lm/left.hh" +#include "lm/partial.hh" +#include "search/context.hh" +#include "search/vertex.hh" +#include "search/vertex_generator.hh" + +#include + +namespace search { + +bool EdgeGenerator::Init(Edge &edge, VertexGenerator &parent) { + from_ = &edge; + for (unsigned int i = 0; i < GetRule().Arity(); ++i) { + if (edge.GetVertex(i).RootPartial().Empty()) return false; + } + PartialEdge &root = *parent.MallocPartialEdge(); + root.score = GetRule().Bound(); + for (unsigned int i = 0; i < GetRule().Arity(); ++i) { + root.nt[i] = edge.GetVertex(i).RootPartial(); + root.score += root.nt[i].Bound(); + } + for (unsigned int i = GetRule().Arity(); i < 2; ++i) { + root.nt[i] = kBlankPartialVertex; + } + for (unsigned int i = 0; i < GetRule().Arity() + 1; ++i) { + root.between[i] = GetRule().Lexical(i); + } + // wtf no clear method? + generate_ = Generate(); + generate_.push(&root); + top_ = root.score; + return true; +} + +namespace { + +template float FastScore(const Context &context, unsigned char victim, unsigned char arity, const PartialEdge &previous, PartialEdge &update) { + memcpy(update.between, previous.between, sizeof(lm::ngram::ChartState) * (arity + 1)); + + float ret = 0.0; + lm::ngram::ChartState *before, *after; + if (victim == 0) { + before = &update.between[0]; + after = &update.between[(arity == 2 && previous.nt[1].Complete()) ? 2 : 1]; + } else { + assert(victim == 1); + assert(arity == 2); + before = &update.between[previous.nt[0].Complete() ? 0 : 1]; + after = &update.between[2]; + } + const lm::ngram::ChartState &previous_reveal = previous.nt[victim].State(); + const PartialVertex &update_nt = update.nt[victim]; + const lm::ngram::ChartState &update_reveal = update_nt.State(); + float just_after = 0.0; + if ((update_reveal.left.length > previous_reveal.left.length) || (update_reveal.left.full && !previous_reveal.left.full)) { + just_after += lm::ngram::RevealAfter(context.LanguageModel(), before->left, before->right, update_reveal.left, previous_reveal.left.length); + } + if ((update_reveal.right.length > previous_reveal.right.length) || (update_nt.RightFull() && !previous.nt[victim].RightFull())) { + ret += lm::ngram::RevealBefore(context.LanguageModel(), update_reveal.right, previous_reveal.right.length, update_nt.RightFull(), after->left, after->right); + } + if (update_nt.Complete()) { + if (update_reveal.left.full) { + before->left.full = true; + } else { + assert(update_reveal.left.length == update_reveal.right.length); + ret += lm::ngram::Subsume(context.LanguageModel(), before->left, before->right, after->left, after->right, update_reveal.left.length); + } + if (victim == 0) { + update.between[0].right = after->right; + } else { + update.between[2].left = before->left; + } + } + return previous.score + (ret + just_after) * context.GetWeights().LM(); +} + +} // namespace + +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent) { + assert(!generate_.empty()); + PartialEdge &top = *generate_.top(); + generate_.pop(); + unsigned int victim = 0; + unsigned char lowest_length = 255; + for (unsigned int i = 0; i != GetRule().Arity(); ++i) { + if (!top.nt[i].Complete() && top.nt[i].Length() < lowest_length) { + lowest_length = top.nt[i].Length(); + victim = i; + } + } + if (lowest_length == 255) { + // All states report complete. + top.between[0].right = top.between[GetRule().Arity()].right; + parent.NewHypothesis(top.between[0], *from_, top); + top_ = generate_.empty() ? -kScoreInf : generate_.top()->score; + return !generate_.empty(); + } + + unsigned int stay = !victim; + PartialEdge &continuation = *parent.MallocPartialEdge(); + float old_bound = top.nt[victim].Bound(); + // The alternate's score will change because alternate.nt[victim] changes. + bool split = top.nt[victim].Split(continuation.nt[victim]); + // top is now the alternate. + + continuation.nt[stay] = top.nt[stay]; + continuation.score = FastScore(context, victim, GetRule().Arity(), top, continuation); + // TODO: dedupe? + generate_.push(&continuation); + + if (split) { + // We have an alternate. + top.score += top.nt[victim].Bound() - old_bound; + // TODO: dedupe? + generate_.push(&top); + } else { + parent.FreePartialEdge(&top); + } + + top_ = generate_.top()->score; + return true; +} + +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent); +template bool EdgeGenerator::Pop(Context &context, VertexGenerator &parent); + +} // namespace search diff --git a/klm/search/edge_generator.hh b/klm/search/edge_generator.hh new file mode 100644 index 00000000..e306dc61 --- /dev/null +++ b/klm/search/edge_generator.hh @@ -0,0 +1,54 @@ +#ifndef SEARCH_EDGE_GENERATOR__ +#define SEARCH_EDGE_GENERATOR__ + +#include "search/edge.hh" + +#include + +#include +#include + +namespace lm { +namespace ngram { +class ChartState; +} // namespace ngram +} // namespace lm + +namespace search { + +template class Context; + +class VertexGenerator; + +struct PartialEdgePointerLess : std::binary_function { + bool operator()(const PartialEdge *first, const PartialEdge *second) const { + return *first < *second; + } +}; + +class EdgeGenerator { + public: + // True if it has a hypothesis. + bool Init(Edge &edge, VertexGenerator &parent); + + Score Top() const { + return top_; + } + + template bool Pop(Context &context, VertexGenerator &parent); + + private: + const Rule &GetRule() const { + return from_->GetRule(); + } + + Score top_; + + typedef std::priority_queue, PartialEdgePointerLess> Generate; + Generate generate_; + + Edge *from_; +}; + +} // namespace search +#endif // SEARCH_EDGE_GENERATOR__ diff --git a/klm/search/final.hh b/klm/search/final.hh new file mode 100644 index 00000000..24e6f0a5 --- /dev/null +++ b/klm/search/final.hh @@ -0,0 +1,40 @@ +#ifndef SEARCH_FINAL__ +#define SEARCH_FINAL__ + +#include "search/rule.hh" +#include "search/types.hh" + +#include + +namespace search { + +class Final { + public: + typedef boost::array ChildArray; + + void Reset(Score bound, const Rule &from, const Final &left, const Final &right) { + bound_ = bound; + from_ = &from; + children_[0] = &left; + children_[1] = &right; + } + + const ChildArray &Children() const { return children_; } + + unsigned int ChildCount() const { return from_->Arity(); } + + const Rule &From() const { return *from_; } + + Score Bound() const { return bound_; } + + private: + Score bound_; + + const Rule *from_; + + ChildArray children_; +}; + +} // namespace search + +#endif // SEARCH_FINAL__ diff --git a/klm/search/rule.cc b/klm/search/rule.cc new file mode 100644 index 00000000..a8b993eb --- /dev/null +++ b/klm/search/rule.cc @@ -0,0 +1,55 @@ +#include "search/rule.hh" + +#include "search/context.hh" +#include "search/final.hh" + +#include + +#include + +namespace search { + +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos) { + additive_ = additive; + Score lm_score = 0.0; + lexical_.clear(); + const lm::WordIndex oov = context.LanguageModel().GetVocabulary().NotFound(); + + for (std::vector::const_iterator word = items_.begin(); ; ++word) { + lexical_.resize(lexical_.size() + 1); + lm::ngram::RuleScore scorer(context.LanguageModel(), lexical_.back()); + // TODO: optimize + if (prepend_bos && (word == items_.begin())) { + scorer.BeginSentence(); + } + for (; ; ++word) { + if (word == items_.end()) { + lm_score += scorer.Finish(); + bound_ = additive_ + context.GetWeights().LM() * lm_score; + assert(lexical_.size() == arity_ + 1); + return; + } + if (!word->Terminal()) break; + if (word->Index() == oov) additive_ += context.GetWeights().OOV(); + scorer.Terminal(word->Index()); + } + lm_score += scorer.Finish(); + } +} + +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); +template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); + +std::ostream &operator<<(std::ostream &o, const Rule &rule) { + const Rule::ItemsRet &items = rule.Items(); + for (Rule::ItemsRet::const_iterator i = items.begin(); i != items.end(); ++i) { + if (i->Terminal()) { + o << i->String() << ' '; + } else { + o << "[] "; + } + } + return o; +} + +} // namespace search diff --git a/klm/search/rule.hh b/klm/search/rule.hh new file mode 100644 index 00000000..79192d40 --- /dev/null +++ b/klm/search/rule.hh @@ -0,0 +1,60 @@ +#ifndef SEARCH_RULE__ +#define SEARCH_RULE__ + +#include "lm/left.hh" +#include "search/arity.hh" +#include "search/types.hh" +#include "search/word.hh" + +#include + +#include +#include + +namespace search { + +template class Context; + +class Rule { + public: + Rule() : arity_(0) {} + + void AppendTerminal(Word w) { items_.push_back(w); } + + void AppendNonTerminal() { + items_.resize(items_.size() + 1); + ++arity_; + } + + template void FinishedAdding(const Context &context, Score additive, bool prepend_bos); + + Score Bound() const { return bound_; } + + Score Additive() const { return additive_; } + + unsigned int Arity() const { return arity_; } + + const lm::ngram::ChartState &Lexical(unsigned int index) const { + return lexical_[index]; + } + + // For printing. + typedef const std::vector ItemsRet; + ItemsRet &Items() const { return items_; } + + private: + Score bound_, additive_; + + unsigned int arity_; + + // TODO: pool? + std::vector items_; + + std::vector lexical_; +}; + +std::ostream &operator<<(std::ostream &o, const Rule &rule); + +} // namespace search + +#endif // SEARCH_RULE__ diff --git a/klm/search/source.hh b/klm/search/source.hh new file mode 100644 index 00000000..11839f7b --- /dev/null +++ b/klm/search/source.hh @@ -0,0 +1,48 @@ +#ifndef SEARCH_SOURCE__ +#define SEARCH_SOURCE__ + +#include "search/types.hh" + +#include +#include + +namespace search { + +template class Source { + public: + Source() : bound_(kScoreInf) {} + + Index Size() const { + return final_.size(); + } + + Score Bound() const { + return bound_; + } + + const Final &operator[](Index index) const { + return *final_[index]; + } + + Score ScoreOrBound(Index index) const { + return Size() > index ? final_[index]->Total() : Bound(); + } + + protected: + void AddFinal(const Final &store) { + final_.push_back(&store); + } + + void SetBound(Score to) { + assert(to <= bound_ + 0.001); + bound_ = to; + } + + private: + std::vector final_; + + Score bound_; +}; + +} // namespace search +#endif // SEARCH_SOURCE__ diff --git a/klm/search/types.hh b/klm/search/types.hh new file mode 100644 index 00000000..9726379f --- /dev/null +++ b/klm/search/types.hh @@ -0,0 +1,18 @@ +#ifndef SEARCH_TYPES__ +#define SEARCH_TYPES__ + +#include + +namespace search { + +typedef float Score; +const Score kScoreInf = INFINITY; + +// This could have been an enum but gcc wants 4 bytes. +typedef bool ExtendDirection; +const ExtendDirection kExtendLeft = 0; +const ExtendDirection kExtendRight = 1; + +} // namespace search + +#endif // SEARCH_TYPES__ diff --git a/klm/search/vertex.cc b/klm/search/vertex.cc new file mode 100644 index 00000000..cc53c0dd --- /dev/null +++ b/klm/search/vertex.cc @@ -0,0 +1,48 @@ +#include "search/vertex.hh" + +#include "search/context.hh" + +#include +#include + +#include + +namespace search { + +namespace { + +struct GreaterByBound : public std::binary_function { + bool operator()(const VertexNode *first, const VertexNode *second) const { + return first->Bound() > second->Bound(); + } +}; + +} // namespace + +void VertexNode::SortAndSet(ContextBase &context, VertexNode **parent_ptr) { + if (Complete()) { + assert(end_); + assert(extend_.empty()); + bound_ = end_->Bound(); + return; + } + if (extend_.size() == 1 && parent_ptr) { + *parent_ptr = extend_[0]; + extend_[0]->SortAndSet(context, parent_ptr); + context.DeleteVertexNode(this); + return; + } + for (std::vector::iterator i = extend_.begin(); i != extend_.end(); ++i) { + (*i)->SortAndSet(context, &*i); + } + std::sort(extend_.begin(), extend_.end(), GreaterByBound()); + bound_ = extend_.front()->Bound(); +} + +namespace { +VertexNode kBlankVertexNode; +} // namespace + +PartialVertex kBlankPartialVertex(kBlankVertexNode); + +} // namespace search diff --git a/klm/search/vertex.hh b/klm/search/vertex.hh new file mode 100644 index 00000000..7ef29efc --- /dev/null +++ b/klm/search/vertex.hh @@ -0,0 +1,165 @@ +#ifndef SEARCH_VERTEX__ +#define SEARCH_VERTEX__ + +#include "lm/left.hh" +#include "search/final.hh" +#include "search/types.hh" + +#include + +#include +#include + +#include + +namespace search { + +class ContextBase; + +class Edge; + +class VertexNode { + public: + VertexNode() : end_(NULL) {} + + void InitRoot() { + extend_.clear(); + state_.left.full = false; + state_.left.length = 0; + state_.right.length = 0; + right_full_ = false; + bound_ = -kScoreInf; + end_ = NULL; + } + + lm::ngram::ChartState &MutableState() { return state_; } + bool &MutableRightFull() { return right_full_; } + + void AddExtend(VertexNode *next) { + extend_.push_back(next); + } + + void SetEnd(Final *end) { end_ = end; } + + Final &MutableEnd() { return *end_; } + + void SortAndSet(ContextBase &context, VertexNode **parent_pointer); + + // Should only happen to a root node when the entire vertex is empty. + bool Empty() const { + return !end_ && extend_.empty(); + } + + bool Complete() const { + return end_; + } + + const lm::ngram::ChartState &State() const { return state_; } + bool RightFull() const { return right_full_; } + + Score Bound() const { + return bound_; + } + + unsigned char Length() const { + return state_.left.length + state_.right.length; + } + + // May be NULL. + const Final *End() const { return end_; } + + const VertexNode &operator[](size_t index) const { + return *extend_[index]; + } + + size_t Size() const { + return extend_.size(); + } + + private: + std::vector extend_; + + lm::ngram::ChartState state_; + bool right_full_; + + Score bound_; + Final *end_; +}; + +class PartialVertex { + public: + PartialVertex() {} + + explicit PartialVertex(const VertexNode &back) : back_(&back), index_(0) {} + + bool Empty() const { return back_->Empty(); } + + bool Complete() const { return back_->Complete(); } + + const lm::ngram::ChartState &State() const { return back_->State(); } + bool RightFull() const { return back_->RightFull(); } + + Score Bound() const { return Complete() ? back_->End()->Bound() : (*back_)[index_].Bound(); } + + unsigned char Length() const { return back_->Length(); } + + // Split into continuation and alternative, rendering this the alternative. + bool Split(PartialVertex &continuation) { + assert(!Complete()); + continuation.back_ = &((*back_)[index_]); + continuation.index_ = 0; + if (index_ + 1 < back_->Size()) { + ++index_; + return true; + } + return false; + } + + const Final &End() const { + return *back_->End(); + } + + private: + const VertexNode *back_; + unsigned int index_; +}; + +extern PartialVertex kBlankPartialVertex; + +class Vertex { + public: + Vertex() +#ifdef DEBUG + : finished_adding_(false) +#endif + {} + + void Add(Edge &edge) { +#ifdef DEBUG + assert(!finished_adding_); +#endif + edges_.push_back(&edge); + } + + void FinishedAdding() { +#ifdef DEBUG + assert(!finished_adding_); + finished_adding_ = true; +#endif + } + + PartialVertex RootPartial() const { return PartialVertex(root_); } + + private: + friend class VertexGenerator; + std::vector edges_; + +#ifdef DEBUG + bool finished_adding_; +#endif + + VertexNode root_; +}; + +} // namespace search +#endif // SEARCH_VERTEX__ diff --git a/klm/search/vertex_generator.cc b/klm/search/vertex_generator.cc new file mode 100644 index 00000000..0281fc37 --- /dev/null +++ b/klm/search/vertex_generator.cc @@ -0,0 +1,99 @@ +#include "search/vertex_generator.hh" + +#include "lm/left.hh" +#include "search/context.hh" + +#include + +namespace search { + +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen) : context_(context), edges_(gen.edges_.size()), partial_edge_pool_(sizeof(PartialEdge), context.PopLimit() * 2) { + for (std::size_t i = 0; i < gen.edges_.size(); ++i) { + if (edges_[i].Init(*gen.edges_[i], *this)) + generate_.push(&edges_[i]); + } + gen.root_.InitRoot(); + root_.under = &gen.root_; + to_pop_ = context.PopLimit(); + while (to_pop_ > 0 && !generate_.empty()) { + EdgeGenerator *top = generate_.top(); + generate_.pop(); + if (top->Pop(context, *this)) { + generate_.push(top); + } + } + gen.root_.SortAndSet(context, NULL); +} + +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen); +template VertexGenerator::VertexGenerator(Context &context, Vertex &gen); + +namespace { +const uint64_t kCompleteAdd = static_cast(-1); +} // namespace + +void VertexGenerator::NewHypothesis(const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial) { + std::pair got(existing_.insert(std::pair(hash_value(state), NULL))); + if (!got.second) { + // Found it already. + Final &exists = *got.first->second; + if (exists.Bound() < partial.score) { + exists.Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + } + --to_pop_; + return; + } + unsigned char left = 0, right = 0; + Trie *node = &root_; + while (true) { + if (left == state.left.length) { + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, left, true, right, false); + for (; right < state.right.length; ++right) { + node = &FindOrInsert(*node, state.right.words[right], state, left, true, right + 1, false); + } + break; + } + node = &FindOrInsert(*node, state.left.pointers[left], state, left + 1, false, right, false); + left++; + if (right == state.right.length) { + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, left, false, right, true); + for (; left < state.left.length; ++left) { + node = &FindOrInsert(*node, state.left.pointers[left], state, left + 1, false, right, true); + } + break; + } + node = &FindOrInsert(*node, state.right.words[right], state, left, false, right + 1, false); + right++; + } + + node = &FindOrInsert(*node, kCompleteAdd - state.left.full, state, state.left.length, true, state.right.length, true); + got.first->second = CompleteTransition(*node, state, from, partial); + --to_pop_; +} + +VertexGenerator::Trie &VertexGenerator::FindOrInsert(VertexGenerator::Trie &node, uint64_t added, const lm::ngram::ChartState &state, unsigned char left, bool left_full, unsigned char right, bool right_full) { + VertexGenerator::Trie &next = node.extend[added]; + if (!next.under) { + next.under = context_.NewVertexNode(); + lm::ngram::ChartState &writing = next.under->MutableState(); + writing = state; + writing.left.full &= left_full && state.left.full; + next.under->MutableRightFull() = right_full && state.left.full; + writing.left.length = left; + writing.right.length = right; + node.under->AddExtend(next.under); + } + return next; +} + +Final *VertexGenerator::CompleteTransition(VertexGenerator::Trie &starter, const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial) { + VertexNode &node = *starter.under; + assert(node.State().left.full == state.left.full); + assert(!node.End()); + Final *final = context_.NewFinal(); + final->Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + node.SetEnd(final); + return final; +} + +} // namespace search diff --git a/klm/search/vertex_generator.hh b/klm/search/vertex_generator.hh new file mode 100644 index 00000000..8cdf1420 --- /dev/null +++ b/klm/search/vertex_generator.hh @@ -0,0 +1,70 @@ +#ifndef SEARCH_VERTEX_GENERATOR__ +#define SEARCH_VERTEX_GENERATOR__ + +#include "search/edge.hh" +#include "search/edge_generator.hh" + +#include +#include + +#include + +namespace lm { +namespace ngram { +class ChartState; +} // namespace ngram +} // namespace lm + +namespace search { + +template class Context; +class ContextBase; +class Final; + +class VertexGenerator { + public: + template VertexGenerator(Context &context, Vertex &gen); + + PartialEdge *MallocPartialEdge() { return static_cast(partial_edge_pool_.malloc()); } + void FreePartialEdge(PartialEdge *value) { partial_edge_pool_.free(value); } + + void NewHypothesis(const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial); + + private: + // Parallel structure to VertexNode. + struct Trie { + Trie() : under(NULL) {} + + VertexNode *under; + boost::unordered_map extend; + }; + + Trie &FindOrInsert(Trie &node, uint64_t added, const lm::ngram::ChartState &state, unsigned char left, bool left_full, unsigned char right, bool right_full); + + Final *CompleteTransition(Trie &node, const lm::ngram::ChartState &state, const Edge &from, const PartialEdge &partial); + + ContextBase &context_; + + std::vector edges_; + + struct LessByTop : public std::binary_function { + bool operator()(const EdgeGenerator *first, const EdgeGenerator *second) const { + return first->Top() < second->Top(); + } + }; + + typedef std::priority_queue, LessByTop> Generate; + Generate generate_; + + Trie root_; + + typedef boost::unordered_map Existing; + Existing existing_; + + int to_pop_; + + boost::pool<> partial_edge_pool_; +}; + +} // namespace search +#endif // SEARCH_VERTEX_GENERATOR__ diff --git a/klm/search/weights.cc b/klm/search/weights.cc new file mode 100644 index 00000000..82ff3f12 --- /dev/null +++ b/klm/search/weights.cc @@ -0,0 +1,69 @@ +#include "search/weights.hh" +#include "util/tokenize_piece.hh" + +#include + +namespace search { + +namespace { +struct Insert { + void operator()(boost::unordered_map &map, StringPiece name, search::Score score) const { + std::string copy(name.data(), name.size()); + map[copy] = score; + } +}; + +struct DotProduct { + search::Score total; + DotProduct() : total(0.0) {} + + void operator()(const boost::unordered_map &map, StringPiece name, search::Score score) { + boost::unordered_map::const_iterator i(FindStringPiece(map, name)); + if (i != map.end()) + total += score * i->second; + } +}; + +template void Parse(StringPiece text, Map &map, Op &op) { + for (util::TokenIter spaces(text, ' '); spaces; ++spaces) { + util::TokenIter equals(*spaces, '='); + UTIL_THROW_IF(!equals, WeightParseException, "Bad weight token " << *spaces); + StringPiece name(*equals); + UTIL_THROW_IF(!++equals, WeightParseException, "Bad weight token " << *spaces); + char *end; + // Assumes proper termination. + double value = std::strtod(equals->data(), &end); + UTIL_THROW_IF(end != equals->data() + equals->size(), WeightParseException, "Failed to parse weight" << *equals); + UTIL_THROW_IF(++equals, WeightParseException, "Too many equals in " << *spaces); + op(map, name, value); + } +} + +} // namespace + +Weights::Weights(StringPiece text) { + Insert op; + Parse(text, map_, op); + lm_ = Steal("LanguageModel"); + oov_ = Steal("OOV"); + word_penalty_ = Steal("WordPenalty"); +} + +search::Score Weights::DotNoLM(StringPiece text) const { + DotProduct dot; + Parse(text, map_, dot); + return dot.total; +} + +float Weights::Steal(const std::string &str) { + Map::iterator i(map_.find(str)); + if (i == map_.end()) { + return 0.0; + } else { + float ret = i->second; + map_.erase(i); + return ret; + } +} + +} // namespace search diff --git a/klm/search/weights.hh b/klm/search/weights.hh new file mode 100644 index 00000000..4a4388c7 --- /dev/null +++ b/klm/search/weights.hh @@ -0,0 +1,49 @@ +// For now, the individual features are not kept. +#ifndef SEARCH_WEIGHTS__ +#define SEARCH_WEIGHTS__ + +#include "search/types.hh" +#include "util/exception.hh" +#include "util/string_piece.hh" + +#include + +#include + +namespace search { + +class WeightParseException : public util::Exception { + public: + WeightParseException() {} + ~WeightParseException() throw() {} +}; + +class Weights { + public: + // Parses weights, sets lm_weight_, removes it from map_. + explicit Weights(StringPiece text); + + search::Score DotNoLM(StringPiece text) const; + + search::Score LM() const { return lm_; } + + search::Score OOV() const { return oov_; } + + search::Score WordPenalty() const { return word_penalty_; } + + // Mostly for testing. + const boost::unordered_map &GetMap() const { return map_; } + + private: + float Steal(const std::string &str); + + typedef boost::unordered_map Map; + + Map map_; + + search::Score lm_, oov_, word_penalty_; +}; + +} // namespace search + +#endif // SEARCH_WEIGHTS__ diff --git a/klm/search/weights_test.cc b/klm/search/weights_test.cc new file mode 100644 index 00000000..4811ff06 --- /dev/null +++ b/klm/search/weights_test.cc @@ -0,0 +1,38 @@ +#include "search/weights.hh" + +#define BOOST_TEST_MODULE WeightTest +#include +#include + +namespace search { +namespace { + +#define CHECK_WEIGHT(value, string) \ + i = parsed.find(string); \ + BOOST_REQUIRE(i != parsed.end()); \ + BOOST_CHECK_CLOSE((value), i->second, 0.001); + +BOOST_AUTO_TEST_CASE(parse) { + // These are not real feature weights. + Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); + const boost::unordered_map &parsed = w.GetMap(); + boost::unordered_map::const_iterator i; + CHECK_WEIGHT(0.0, "rarity"); + CHECK_WEIGHT(0.0, "phrase-SGT"); + CHECK_WEIGHT(9.45117, "phrase-TGS"); + CHECK_WEIGHT(2.33833, "lexical-SGT"); + BOOST_CHECK(parsed.end() == parsed.find("lm")); + BOOST_CHECK_CLOSE(3.0, w.LM(), 0.001); + CHECK_WEIGHT(-28.3317, "lexical-TGS"); + CHECK_WEIGHT(5.0, "glue?"); +} + +BOOST_AUTO_TEST_CASE(dot) { + Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); + BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0"), 0.001); + BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0 LanguageModel=10"), 0.001); + BOOST_CHECK_CLOSE(9.45117 * 3.0 + 28.3317 * 17.4, w.DotNoLM("rarity=5 phrase-TGS=3.0 LanguageModel=10 lexical-TGS=-17.4"), 0.001); +} + +} // namespace +} // namespace search diff --git a/klm/search/word.hh b/klm/search/word.hh new file mode 100644 index 00000000..e7a15be9 --- /dev/null +++ b/klm/search/word.hh @@ -0,0 +1,47 @@ +#ifndef SEARCH_WORD__ +#define SEARCH_WORD__ + +#include "lm/word_index.hh" + +#include + +#include +#include + +namespace search { + +class Word { + public: + // Construct a non-terminal. + Word() : entry_(NULL) {} + + explicit Word(const std::pair &entry) { + entry_ = &entry; + } + + // Returns true for two non-terminals even if their labels are different (since we don't care about labels). + bool operator==(const Word &other) const { + return entry_ == other.entry_; + } + + bool Terminal() const { return entry_ != NULL; } + + const std::string &String() const { return entry_->first; } + + lm::WordIndex Index() const { return entry_->second; } + + protected: + friend size_t hash_value(const Word &word); + + const std::pair *Entry() const { return entry_; } + + private: + const std::pair *entry_; +}; + +inline size_t hash_value(const Word &word) { + return boost::hash_value(word.Entry()); +} + +} // namespace search +#endif // SEARCH_WORD__ -- cgit v1.2.3 From dea088773b024d6d5c65eab2883910483f99bc0a Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:49:58 +0100 Subject: Minor build fixes --- Jamroot | 4 ++-- klm/util/have.hh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Jamroot b/Jamroot index ef426146..738b83e3 100644 --- a/Jamroot +++ b/Jamroot @@ -26,7 +26,7 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; } -project : requirements $(requirements) darwin:static ; +project : requirements $(requirements) darwin:static boost_system ; project : default-build on release ; install-bin-libs dpmert//programs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec phrasinator//programs mira//kbest_mira ; @@ -40,6 +40,6 @@ rule all_tests ( targets * : dependencies : properties * ) { targets ?= [ glob *_test.cc ] ; for t in $(targets) { local base = [ MATCH "^(.*).cc$" : $(t) ] ; - unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework : $(properties) ; + unit-test $(base) : $(t) $(dependencies) /top//boost_unit_test_framework : $(properties) ; } } diff --git a/klm/util/have.hh b/klm/util/have.hh index 1d76a7fc..b8181e99 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -13,7 +13,7 @@ #endif #ifndef HAVE_BOOST -//#define HAVE_BOOST +#define HAVE_BOOST #endif #ifndef HAVE_THREADS -- cgit v1.2.3 From 143ba7317dcaee3058d66f9e6558316f88f95212 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 12 Sep 2012 12:01:26 +0100 Subject: Refactor search so that it knows even less, but keeps track of edge pointers --- klm/lm/word_index.hh | 3 +++ klm/search/context.hh | 1 - klm/search/final.hh | 12 +++++------ klm/search/rule.cc | 32 +++++++++------------------- klm/search/rule.hh | 21 ++++--------------- klm/search/vertex_generator.cc | 4 ++-- klm/search/word.hh | 47 ------------------------------------------ 7 files changed, 25 insertions(+), 95 deletions(-) delete mode 100644 klm/search/word.hh diff --git a/klm/lm/word_index.hh b/klm/lm/word_index.hh index 67841c30..e09557a7 100644 --- a/klm/lm/word_index.hh +++ b/klm/lm/word_index.hh @@ -2,8 +2,11 @@ #ifndef LM_WORD_INDEX__ #define LM_WORD_INDEX__ +#include + namespace lm { typedef unsigned int WordIndex; +const WordIndex kMaxWordIndex = UINT_MAX; } // namespace lm typedef lm::WordIndex LMWordIndex; diff --git a/klm/search/context.hh b/klm/search/context.hh index ae248549..27940053 100644 --- a/klm/search/context.hh +++ b/klm/search/context.hh @@ -6,7 +6,6 @@ #include "search/final.hh" #include "search/types.hh" #include "search/vertex.hh" -#include "search/word.hh" #include "util/exception.hh" #include diff --git a/klm/search/final.hh b/klm/search/final.hh index 24e6f0a5..823b8c1a 100644 --- a/klm/search/final.hh +++ b/klm/search/final.hh @@ -1,18 +1,20 @@ #ifndef SEARCH_FINAL__ #define SEARCH_FINAL__ -#include "search/rule.hh" +#include "search/arity.hh" #include "search/types.hh" #include namespace search { +class Edge; + class Final { public: typedef boost::array ChildArray; - void Reset(Score bound, const Rule &from, const Final &left, const Final &right) { + void Reset(Score bound, const Edge &from, const Final &left, const Final &right) { bound_ = bound; from_ = &from; children_[0] = &left; @@ -21,16 +23,14 @@ class Final { const ChildArray &Children() const { return children_; } - unsigned int ChildCount() const { return from_->Arity(); } - - const Rule &From() const { return *from_; } + const Edge &From() const { return *from_; } Score Bound() const { return bound_; } private: Score bound_; - const Rule *from_; + const Edge *from_; ChildArray children_; }; diff --git a/klm/search/rule.cc b/klm/search/rule.cc index a8b993eb..0a941527 100644 --- a/klm/search/rule.cc +++ b/klm/search/rule.cc @@ -9,47 +9,35 @@ namespace search { -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos) { +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos) { additive_ = additive; Score lm_score = 0.0; lexical_.clear(); const lm::WordIndex oov = context.LanguageModel().GetVocabulary().NotFound(); - for (std::vector::const_iterator word = items_.begin(); ; ++word) { + for (std::vector::const_iterator word = words.begin(); ; ++word) { lexical_.resize(lexical_.size() + 1); lm::ngram::RuleScore scorer(context.LanguageModel(), lexical_.back()); // TODO: optimize - if (prepend_bos && (word == items_.begin())) { + if (prepend_bos && (word == words.begin())) { scorer.BeginSentence(); } for (; ; ++word) { - if (word == items_.end()) { + if (word == words.end()) { lm_score += scorer.Finish(); bound_ = additive_ + context.GetWeights().LM() * lm_score; - assert(lexical_.size() == arity_ + 1); + arity_ = lexical_.size() - 1; return; } - if (!word->Terminal()) break; - if (word->Index() == oov) additive_ += context.GetWeights().OOV(); - scorer.Terminal(word->Index()); + if (*word == kNonTerminal) break; + if (*word == oov) additive_ += context.GetWeights().OOV(); + scorer.Terminal(*word); } lm_score += scorer.Finish(); } } -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); -template void Rule::FinishedAdding(const Context &context, Score additive, bool prepend_bos); - -std::ostream &operator<<(std::ostream &o, const Rule &rule) { - const Rule::ItemsRet &items = rule.Items(); - for (Rule::ItemsRet::const_iterator i = items.begin(); i != items.end(); ++i) { - if (i->Terminal()) { - o << i->String() << ' '; - } else { - o << "[] "; - } - } - return o; -} +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); +template void Rule::Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); } // namespace search diff --git a/klm/search/rule.hh b/klm/search/rule.hh index 79192d40..920c64a7 100644 --- a/klm/search/rule.hh +++ b/klm/search/rule.hh @@ -2,9 +2,9 @@ #define SEARCH_RULE__ #include "lm/left.hh" +#include "lm/word_index.hh" #include "search/arity.hh" #include "search/types.hh" -#include "search/word.hh" #include @@ -19,14 +19,10 @@ class Rule { public: Rule() : arity_(0) {} - void AppendTerminal(Word w) { items_.push_back(w); } + static const lm::WordIndex kNonTerminal = lm::kMaxWordIndex; - void AppendNonTerminal() { - items_.resize(items_.size() + 1); - ++arity_; - } - - template void FinishedAdding(const Context &context, Score additive, bool prepend_bos); + // Use kNonTerminal for non-terminals. + template void Init(const Context &context, Score additive, const std::vector &words, bool prepend_bos); Score Bound() const { return bound_; } @@ -38,23 +34,14 @@ class Rule { return lexical_[index]; } - // For printing. - typedef const std::vector ItemsRet; - ItemsRet &Items() const { return items_; } - private: Score bound_, additive_; unsigned int arity_; - // TODO: pool? - std::vector items_; - std::vector lexical_; }; -std::ostream &operator<<(std::ostream &o, const Rule &rule); - } // namespace search #endif // SEARCH_RULE__ diff --git a/klm/search/vertex_generator.cc b/klm/search/vertex_generator.cc index 0281fc37..78948c97 100644 --- a/klm/search/vertex_generator.cc +++ b/klm/search/vertex_generator.cc @@ -38,7 +38,7 @@ void VertexGenerator::NewHypothesis(const lm::ngram::ChartState &state, const Ed // Found it already. Final &exists = *got.first->second; if (exists.Bound() < partial.score) { - exists.Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + exists.Reset(partial.score, from, partial.nt[0].End(), partial.nt[1].End()); } --to_pop_; return; @@ -91,7 +91,7 @@ Final *VertexGenerator::CompleteTransition(VertexGenerator::Trie &starter, const assert(node.State().left.full == state.left.full); assert(!node.End()); Final *final = context_.NewFinal(); - final->Reset(partial.score, from.GetRule(), partial.nt[0].End(), partial.nt[1].End()); + final->Reset(partial.score, from, partial.nt[0].End(), partial.nt[1].End()); node.SetEnd(final); return final; } diff --git a/klm/search/word.hh b/klm/search/word.hh deleted file mode 100644 index e7a15be9..00000000 --- a/klm/search/word.hh +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef SEARCH_WORD__ -#define SEARCH_WORD__ - -#include "lm/word_index.hh" - -#include - -#include -#include - -namespace search { - -class Word { - public: - // Construct a non-terminal. - Word() : entry_(NULL) {} - - explicit Word(const std::pair &entry) { - entry_ = &entry; - } - - // Returns true for two non-terminals even if their labels are different (since we don't care about labels). - bool operator==(const Word &other) const { - return entry_ == other.entry_; - } - - bool Terminal() const { return entry_ != NULL; } - - const std::string &String() const { return entry_->first; } - - lm::WordIndex Index() const { return entry_->second; } - - protected: - friend size_t hash_value(const Word &word); - - const std::pair *Entry() const { return entry_; } - - private: - const std::pair *entry_; -}; - -inline size_t hash_value(const Word &word) { - return boost::hash_value(word.Entry()); -} - -} // namespace search -#endif // SEARCH_WORD__ -- cgit v1.2.3 From 173910593bf6bf3dc52902f99a683560d8c73942 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 12 Sep 2012 15:07:44 +0100 Subject: Add the alone stuff, using a wrapper to the edge class. --- klm/alone/Jamfile | 4 ++ klm/alone/assemble.cc | 76 +++++++++++++++++++++++++++ klm/alone/assemble.hh | 21 ++++++++ klm/alone/graph.hh | 87 +++++++++++++++++++++++++++++++ klm/alone/just_vocab.cc | 14 +++++ klm/alone/labeled_edge.hh | 30 +++++++++++ klm/alone/main.cc | 84 ++++++++++++++++++++++++++++++ klm/alone/read.cc | 118 ++++++++++++++++++++++++++++++++++++++++++ klm/alone/read.hh | 29 +++++++++++ klm/alone/threading.cc | 80 ++++++++++++++++++++++++++++ klm/alone/threading.hh | 129 ++++++++++++++++++++++++++++++++++++++++++++++ klm/alone/vocab.cc | 19 +++++++ klm/alone/vocab.hh | 34 ++++++++++++ 13 files changed, 725 insertions(+) create mode 100644 klm/alone/Jamfile create mode 100644 klm/alone/assemble.cc create mode 100644 klm/alone/assemble.hh create mode 100644 klm/alone/graph.hh create mode 100644 klm/alone/just_vocab.cc create mode 100644 klm/alone/labeled_edge.hh create mode 100644 klm/alone/main.cc create mode 100644 klm/alone/read.cc create mode 100644 klm/alone/read.hh create mode 100644 klm/alone/threading.cc create mode 100644 klm/alone/threading.hh create mode 100644 klm/alone/vocab.cc create mode 100644 klm/alone/vocab.hh diff --git a/klm/alone/Jamfile b/klm/alone/Jamfile new file mode 100644 index 00000000..2cc90c05 --- /dev/null +++ b/klm/alone/Jamfile @@ -0,0 +1,4 @@ +lib standalone : assemble.cc read.cc threading.cc vocab.cc ../lm//kenlm ../util//kenutil ../search//search : .. : : .. ../search//search ../lm//kenlm ; + +exe decode : main.cc standalone main.cc : multi:..//boost_thread ; +exe just_vocab : just_vocab.cc standalone : multi:..//boost_thread ; diff --git a/klm/alone/assemble.cc b/klm/alone/assemble.cc new file mode 100644 index 00000000..2ae72ce9 --- /dev/null +++ b/klm/alone/assemble.cc @@ -0,0 +1,76 @@ +#include "alone/assemble.hh" + +#include "alone/labeled_edge.hh" +#include "search/final.hh" + +#include + +namespace alone { + +std::ostream &operator<<(std::ostream &o, const search::Final &final) { + const std::vector &words = static_cast(final.From()).Words(); + if (words.empty()) return o; + const search::Final *const *child = final.Children().data(); + std::vector::const_iterator i(words.begin()); + for (; i != words.end() - 1; ++i) { + if (*i) { + o << **i << ' '; + } else { + o << **child << ' '; + ++child; + } + } + + if (*i) { + if (**i != "") { + o << **i; + } + } else { + o << **child; + } + + return o; +} + +namespace { + +void MakeIndent(std::ostream &o, const char *indent_str, unsigned int level) { + for (unsigned int i = 0; i < level; ++i) + o << indent_str; +} + +void DetailedFinalInternal(std::ostream &o, const search::Final &final, const char *indent_str, unsigned int indent) { + o << "(\n"; + MakeIndent(o, indent_str, indent); + const std::vector &words = static_cast(final.From()).Words(); + const search::Final *const *child = final.Children().data(); + for (std::vector::const_iterator i(words.begin()); i != words.end(); ++i) { + if (*i) { + o << **i; + if (i == words.end() - 1) { + o << '\n'; + MakeIndent(o, indent_str, indent); + } else { + o << ' '; + } + } else { + // One extra indent from the line we're currently on. + o << indent_str; + DetailedFinalInternal(o, **child, indent_str, indent + 1); + for (unsigned int i = 0; i < indent; ++i) o << indent_str; + ++child; + } + } + o << ")=" << final.Bound() << '\n'; +} +} // namespace + +void DetailedFinal(std::ostream &o, const search::Final &final, const char *indent_str) { + DetailedFinalInternal(o, final, indent_str, 0); +} + +void PrintFinal(const search::Final &final) { + std::cout << final << std::endl; +} + +} // namespace alone diff --git a/klm/alone/assemble.hh b/klm/alone/assemble.hh new file mode 100644 index 00000000..e6b0ad5c --- /dev/null +++ b/klm/alone/assemble.hh @@ -0,0 +1,21 @@ +#ifndef ALONE_ASSEMBLE__ +#define ALONE_ASSEMBLE__ + +#include + +namespace search { +class Final; +} // namespace search + +namespace alone { + +std::ostream &operator<<(std::ostream &o, const search::Final &final); + +void DetailedFinal(std::ostream &o, const search::Final &final, const char *indent_str = " "); + +// This isn't called anywhere but makes it easy to print from gdb. +void PrintFinal(const search::Final &final); + +} // namespace alone + +#endif // ALONE_ASSEMBLE__ diff --git a/klm/alone/graph.hh b/klm/alone/graph.hh new file mode 100644 index 00000000..788352c9 --- /dev/null +++ b/klm/alone/graph.hh @@ -0,0 +1,87 @@ +#ifndef ALONE_GRAPH__ +#define ALONE_GRAPH__ + +#include "alone/labeled_edge.hh" +#include "search/rule.hh" +#include "search/types.hh" +#include "search/vertex.hh" +#include "util/exception.hh" + +#include +#include +#include + +namespace alone { + +template class FixedAllocator : boost::noncopyable { + public: + FixedAllocator() : current_(NULL), end_(NULL) {} + + void Init(std::size_t count) { + assert(!current_); + array_.reset(new T[count]); + current_ = array_.get(); + end_ = current_ + count; + } + + T &operator[](std::size_t idx) { + return array_.get()[idx]; + } + + T *New() { + T *ret = current_++; + UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end"); + return ret; + } + + std::size_t Size() const { + return end_ - array_.get(); + } + + private: + boost::scoped_array array_; + T *current_, *end_; +}; + +class Graph : boost::noncopyable { + public: + typedef LabeledEdge Edge; + typedef search::Vertex Vertex; + + Graph() {} + + void SetCounts(std::size_t vertices, std::size_t edges) { + vertices_.Init(vertices); + edges_.Init(edges); + } + + Vertex *NewVertex() { + return vertices_.New(); + } + + std::size_t VertexSize() const { return vertices_.Size(); } + + Vertex &MutableVertex(std::size_t index) { + return vertices_[index]; + } + + Edge *NewEdge() { + return edges_.New(); + } + + std::size_t EdgeSize() const { return edges_.Size(); } + + void SetRoot(Vertex *root) { root_ = root; } + + Vertex &Root() { return *root_; } + + private: + FixedAllocator vertices_; + FixedAllocator edges_; + + Vertex *root_; +}; + +} // namespace alone + +#endif // ALONE_GRAPH__ diff --git a/klm/alone/just_vocab.cc b/klm/alone/just_vocab.cc new file mode 100644 index 00000000..35aea5ed --- /dev/null +++ b/klm/alone/just_vocab.cc @@ -0,0 +1,14 @@ +#include "alone/read.hh" +#include "util/file_piece.hh" + +#include + +int main() { + util::FilePiece f(0, "stdin", &std::cerr); + while (true) { + try { + alone::JustVocab(f, std::cout); + } catch (const util::EndOfFileException &e) { break; } + std::cout << '\n'; + } +} diff --git a/klm/alone/labeled_edge.hh b/klm/alone/labeled_edge.hh new file mode 100644 index 00000000..94d8cbdf --- /dev/null +++ b/klm/alone/labeled_edge.hh @@ -0,0 +1,30 @@ +#ifndef ALONE_LABELED_EDGE__ +#define ALONE_LABELED_EDGE__ + +#include "search/edge.hh" + +#include +#include + +namespace alone { + +class LabeledEdge : public search::Edge { + public: + LabeledEdge() {} + + void AppendWord(const std::string *word) { + words_.push_back(word); + } + + const std::vector &Words() const { + return words_; + } + + private: + // NULL for non-terminals. + std::vector words_; +}; + +} // namespace alone + +#endif // ALONE_LABELED_EDGE__ diff --git a/klm/alone/main.cc b/klm/alone/main.cc new file mode 100644 index 00000000..7768b89c --- /dev/null +++ b/klm/alone/main.cc @@ -0,0 +1,84 @@ +#include "alone/threading.hh" +#include "search/config.hh" +#include "search/context.hh" +#include "util/exception.hh" +#include "util/file_piece.hh" +#include "util/usage.hh" + +#include + +#include +#include + +namespace alone { + +template void ReadLoop(const std::string &graph_prefix, Control &control) { + for (unsigned int sentence = 0; ; ++sentence) { + std::stringstream name; + name << graph_prefix << '/' << sentence; + std::auto_ptr file; + try { + file.reset(new util::FilePiece(name.str().c_str())); + } catch (const util::ErrnoException &e) { + if (e.Error() == ENOENT) return; + throw; + } + control.Add(file.release()); + } +} + +template void RunWithModelType(const char *graph_prefix, const char *model_file, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { + Model model(model_file); + search::Config config(weight_str, pop_limit); + + if (threads > 1) { +#ifdef WITH_THREADS + Controller controller(config, model, threads, std::cout); + ReadLoop(graph_prefix, controller); +#else + UTIL_THROW(util::Exception, "Threading support not compiled in."); +#endif + } else { + InThread controller(config, model, std::cout); + ReadLoop(graph_prefix, controller); + } +} + +void Run(const char *graph_prefix, const char *lm_name, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { + lm::ngram::ModelType model_type; + if (!lm::ngram::RecognizeBinary(lm_name, model_type)) model_type = lm::ngram::PROBING; + switch (model_type) { + case lm::ngram::PROBING: + RunWithModelType(graph_prefix, lm_name, weight_str, pop_limit, threads); + break; + case lm::ngram::REST_PROBING: + RunWithModelType(graph_prefix, lm_name, weight_str, pop_limit, threads); + break; + default: + UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); + } +} + +} // namespace alone + +int main(int argc, char *argv[]) { + if (argc < 5 || argc > 6) { + std::cerr << argv[0] << " graph_prefix lm \"weights\" pop [threads]" << std::endl; + return 1; + } + +#ifdef WITH_THREADS + unsigned thread_count = boost::thread::hardware_concurrency(); +#else + unsigned thread_count = 1; +#endif + if (argc == 6) { + thread_count = boost::lexical_cast(argv[5]); + UTIL_THROW_IF(!thread_count, util::Exception, "Thread count 0"); + } + UTIL_THROW_IF(!thread_count, util::Exception, "Boost doesn't know how many threads there are. Pass it on the command line."); + alone::Run(argv[1], argv[2], argv[3], boost::lexical_cast(argv[4]), thread_count); + + util::PrintUsage(std::cerr); + return 0; +} diff --git a/klm/alone/read.cc b/klm/alone/read.cc new file mode 100644 index 00000000..0b20be35 --- /dev/null +++ b/klm/alone/read.cc @@ -0,0 +1,118 @@ +#include "alone/read.hh" + +#include "alone/graph.hh" +#include "alone/vocab.hh" +#include "search/arity.hh" +#include "search/context.hh" +#include "search/weights.hh" +#include "util/file_piece.hh" + +#include +#include + +#include + +namespace alone { + +namespace { + +template Graph::Edge &ReadEdge(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab, bool final) { + Graph::Edge *ret = to.NewEdge(); + + StringPiece got; + + std::vector words; + unsigned long int terminals = 0; + while ("|||" != (got = from.ReadDelimited())) { + if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) { + // non-terminal + char *end_ptr; + unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10); + UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, FormatException, "Bad non-terminal" << got); + UTIL_THROW_IF(child >= to.VertexSize(), FormatException, "Reference to vertex " << child << " but we only have " << to.VertexSize() << " vertices. Is the file in bottom-up format?"); + ret->Add(to.MutableVertex(child)); + words.push_back(lm::kMaxWordIndex); + ret->AppendWord(NULL); + } else { + const std::pair &found = vocab.FindOrAdd(got); + words.push_back(found.second); + ret->AppendWord(&found.first); + ++terminals; + } + } + if (final) { + // This is not counted for the word penalty. + words.push_back(vocab.EndSentence().second); + ret->AppendWord(&vocab.EndSentence().first); + } + // Hard-coded word penalty. + float additive = context.GetWeights().DotNoLM(from.ReadLine()) - context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; + ret->InitRule().Init(context, additive, words, final); + unsigned int arity = ret->GetRule().Arity(); + UTIL_THROW_IF(arity > search::kMaxArity, util::Exception, "Edit search/arity.hh and increase " << search::kMaxArity << " to at least " << arity); + return *ret; +} + +} // namespace + +// TODO: refactor +void JustVocab(util::FilePiece &from, std::ostream &out) { + boost::unordered_set seen; + unsigned long int vertices = from.ReadULong(); + from.ReadULong(); // edges + UTIL_THROW_IF(vertices == 0, FormatException, "Vertex count is zero"); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected newline after counts"); + std::string temp; + for (unsigned long int i = 0; i < vertices; ++i) { + unsigned long int edge_count = from.ReadULong(); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected after edge count"); + for (unsigned long int e = 0; e < edge_count; ++e) { + StringPiece got; + while ("|||" != (got = from.ReadDelimited())) { + if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) continue; + temp.assign(got.data(), got.size()); + if (seen.insert(temp).second) out << temp << ' '; + } + from.ReadLine(); // weights + } + } + // Eat sentence + from.ReadLine(); +} + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab) { + unsigned long int vertices; + try { + vertices = from.ReadULong(); + } catch (const util::EndOfFileException &e) { return false; } + unsigned long int edges = from.ReadULong(); + UTIL_THROW_IF(vertices < 2, FormatException, "Vertex count is " << vertices); + UTIL_THROW_IF(edges == 0, FormatException, "Edge count is " << edges); + --vertices; + --edges; + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected newline after counts"); + to.SetCounts(vertices, edges); + Graph::Vertex *vertex; + for (unsigned long int i = 0; ; ++i) { + vertex = to.NewVertex(); + unsigned long int edge_count = from.ReadULong(); + bool root = (i == vertices - 1); + UTIL_THROW_IF('\n' != from.get(), FormatException, "Expected after edge count"); + for (unsigned long int e = 0; e < edge_count; ++e) { + vertex->Add(ReadEdge(context, from, to, vocab, root)); + } + vertex->FinishedAdding(); + if (root) break; + } + to.SetRoot(vertex); + StringPiece str = from.ReadLine(); + UTIL_THROW_IF("1" != str, FormatException, "Expected one edge to root"); + // The edge + from.ReadLine(); + return true; +} + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); + +} // namespace alone diff --git a/klm/alone/read.hh b/klm/alone/read.hh new file mode 100644 index 00000000..10769a86 --- /dev/null +++ b/klm/alone/read.hh @@ -0,0 +1,29 @@ +#ifndef ALONE_READ__ +#define ALONE_READ__ + +#include "util/exception.hh" + +#include + +namespace util { class FilePiece; } + +namespace search { template class Context; } + +namespace alone { + +class Graph; +class Vocab; + +class FormatException : public util::Exception { + public: + FormatException() {} + ~FormatException() throw() {} +}; + +void JustVocab(util::FilePiece &from, std::ostream &to); + +template bool ReadCDec(search::Context &context, util::FilePiece &from, Graph &to, Vocab &vocab); + +} // namespace alone + +#endif // ALONE_READ__ diff --git a/klm/alone/threading.cc b/klm/alone/threading.cc new file mode 100644 index 00000000..475386b6 --- /dev/null +++ b/klm/alone/threading.cc @@ -0,0 +1,80 @@ +#include "alone/threading.hh" + +#include "alone/assemble.hh" +#include "alone/graph.hh" +#include "alone/read.hh" +#include "alone/vocab.hh" +#include "lm/model.hh" +#include "search/context.hh" +#include "search/vertex_generator.hh" + +#include +#include +#include + +#include + +namespace alone { +template void Decode(const search::Config &config, const Model &model, util::FilePiece *in_ptr, std::ostream &out) { + search::Context context(config, model); + Graph graph; + Vocab vocab(model.GetVocabulary()); + { + boost::scoped_ptr in(in_ptr); + ReadCDec(context, *in, graph, vocab); + } + + for (std::size_t i = 0; i < graph.VertexSize(); ++i) { + search::VertexGenerator(context, graph.MutableVertex(i)); + } + search::PartialVertex top = graph.Root().RootPartial(); + if (top.Empty()) { + out << "NO PATH FOUND"; + } else { + search::PartialVertex continuation; + while (!top.Complete()) { + top.Split(continuation); + top = continuation; + } + out << top.End() << " ||| " << top.End().Bound() << std::endl; + } +} + +template void Decode(const search::Config &config, const lm::ngram::ProbingModel &model, util::FilePiece *in_ptr, std::ostream &out); +template void Decode(const search::Config &config, const lm::ngram::RestProbingModel &model, util::FilePiece *in_ptr, std::ostream &out); + +#ifdef WITH_THREADS +template void DecodeHandler::operator()(Input message) { + std::stringstream assemble; + Decode(config_, model_, message.file, assemble); + Produce(message.sentence_id, assemble.str()); +} + +template void DecodeHandler::Produce(unsigned int sentence_id, const std::string &str) { + Output out; + out.sentence_id = sentence_id; + out.str = new std::string(str); + out_.Produce(out); +} + +void PrintHandler::operator()(Output message) { + unsigned int relative = message.sentence_id - done_; + if (waiting_.size() <= relative) waiting_.resize(relative + 1); + waiting_[relative] = message.str; + for (std::string *lead; !waiting_.empty() && (lead = waiting_[0]); waiting_.pop_front(), ++done_) { + out_ << *lead; + delete lead; + } +} + +template Controller::Controller(const search::Config &config, const Model &model, size_t decode_workers, std::ostream &to) : + sentence_id_(0), + printer_(decode_workers, 1, boost::ref(to), Output::Poison()), + decoder_(3, decode_workers, boost::in_place(boost::ref(config), boost::ref(model), boost::ref(printer_.In())), Input::Poison()) {} + +template class Controller; +template class Controller; + +#endif + +} // namespace alone diff --git a/klm/alone/threading.hh b/klm/alone/threading.hh new file mode 100644 index 00000000..0ab0f739 --- /dev/null +++ b/klm/alone/threading.hh @@ -0,0 +1,129 @@ +#ifndef ALONE_THREADING__ +#define ALONE_THREADING__ + +#ifdef WITH_THREADS +#include "util/pcqueue.hh" +#include "util/pool.hh" +#endif + +#include +#include +#include + +namespace util { +class FilePiece; +} // namespace util + +namespace search { +class Config; +template class Context; +} // namespace search + +namespace alone { + +template void Decode(const search::Config &config, const Model &model, util::FilePiece *in_ptr, std::ostream &out); + +class Graph; + +#ifdef WITH_THREADS +struct SentenceID { + unsigned int sentence_id; + bool operator==(const SentenceID &other) const { + return sentence_id == other.sentence_id; + } +}; + +struct Input : public SentenceID { + util::FilePiece *file; + static Input Poison() { + Input ret; + ret.sentence_id = static_cast(-1); + ret.file = NULL; + return ret; + } +}; + +struct Output : public SentenceID { + std::string *str; + static Output Poison() { + Output ret; + ret.sentence_id = static_cast(-1); + ret.str = NULL; + return ret; + } +}; + +template class DecodeHandler { + public: + typedef Input Request; + + DecodeHandler(const search::Config &config, const Model &model, util::PCQueue &out) : config_(config), model_(model), out_(out) {} + + void operator()(Input message); + + private: + void Produce(unsigned int sentence_id, const std::string &str); + + const search::Config &config_; + + const Model &model_; + + util::PCQueue &out_; +}; + +class PrintHandler { + public: + typedef Output Request; + + explicit PrintHandler(std::ostream &o) : out_(o), done_(0) {} + + void operator()(Output message); + + private: + std::ostream &out_; + std::deque waiting_; + unsigned int done_; +}; + +template class Controller { + public: + // This config must remain valid. + explicit Controller(const search::Config &config, const Model &model, size_t decode_workers, std::ostream &to); + + // Takes ownership of in. + void Add(util::FilePiece *in) { + Input input; + input.sentence_id = sentence_id_++; + input.file = in; + decoder_.Produce(input); + } + + private: + unsigned int sentence_id_; + + util::Pool printer_; + + util::Pool > decoder_; +}; +#endif + +// Same API as controller. +template class InThread { + public: + InThread(const search::Config &config, const Model &model, std::ostream &to) : config_(config), model_(model), to_(to) {} + + // Takes ownership of in. + void Add(util::FilePiece *in) { + Decode(config_, model_, in, to_); + } + + private: + const search::Config &config_; + + const Model &model_; + + std::ostream &to_; +}; + +} // namespace alone +#endif // ALONE_THREADING__ diff --git a/klm/alone/vocab.cc b/klm/alone/vocab.cc new file mode 100644 index 00000000..ffe55301 --- /dev/null +++ b/klm/alone/vocab.cc @@ -0,0 +1,19 @@ +#include "alone/vocab.hh" + +#include "lm/virtual_interface.hh" +#include "util/string_piece.hh" + +namespace alone { + +Vocab::Vocab(const lm::base::Vocabulary &backing) : backing_(backing), end_sentence_(FindOrAdd("")) {} + +const std::pair &Vocab::FindOrAdd(const StringPiece &str) { + Map::const_iterator i(FindStringPiece(map_, str)); + if (i != map_.end()) return *i; + std::pair to_ins; + to_ins.first.assign(str.data(), str.size()); + to_ins.second = backing_.Index(str); + return *map_.insert(to_ins).first; +} + +} // namespace alone diff --git a/klm/alone/vocab.hh b/klm/alone/vocab.hh new file mode 100644 index 00000000..3ac0f542 --- /dev/null +++ b/klm/alone/vocab.hh @@ -0,0 +1,34 @@ +#ifndef ALONE_VOCAB__ +#define ALONE_VOCAB__ + +#include "lm/word_index.hh" +#include "util/string_piece.hh" + +#include +#include + +#include + +namespace lm { namespace base { class Vocabulary; } } + +namespace alone { + +class Vocab { + public: + explicit Vocab(const lm::base::Vocabulary &backing); + + const std::pair &FindOrAdd(const StringPiece &str); + + const std::pair &EndSentence() const { return end_sentence_; } + + private: + typedef boost::unordered_map Map; + Map map_; + + const lm::base::Vocabulary &backing_; + + const std::pair &end_sentence_; +}; + +} // namespace alone +#endif // ALONE_VCOAB__ -- cgit v1.2.3 From f794c881ccaf4aa0646300dea1e1f6e7a307e019 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 12 Sep 2012 18:36:03 +0100 Subject: Partially written bridge to lazy --- decoder/lazy.cc | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ decoder/lazy.h | 8 ++++ 2 files changed, 130 insertions(+) create mode 100644 decoder/lazy.cc create mode 100644 decoder/lazy.h diff --git a/decoder/lazy.cc b/decoder/lazy.cc new file mode 100644 index 00000000..f5b61c75 --- /dev/null +++ b/decoder/lazy.cc @@ -0,0 +1,122 @@ +#include "hg.h" +#include "lazy.h" +#include "tdict.h" + +#include "lm/enumerate_vocab.hh" +#include "lm/model.hh" +#include "search/edge.hh" +#include "search/vertex.hh" +#include "util/exception.hh" + +#include + +namespace { + +struct MapVocab : public lm::EnumerateVocab { + public: + MapVocab() {} + + // Do not call after Lookup. + void Add(lm::WordIndex index, const StringPiece &str) { + const WordID cdec_id = TD::Convert(str.as_string()); + if (cdec_id >= out_->size()) out_.resize(cdec_id + 1); + out_[cdec_id] = index; + } + + // Assumes Add has been called and will never be called again. + lm::WordIndex FromCDec(WordID id) const { + return out_[out.size() > id ? id : 0]; + } + + private: + std::vector out_; +}; + +class LazyBase { + public: + LazyBase() {} + + virtual ~LazyBase() {} + + virtual void Search(const Hypergraph &hg) const = 0; + + static LazyBase *Load(const char *model_file); + + protected: + lm::ngram::Config GetConfig() const { + lm::ngram::Config ret; + ret.enumerate_vocab = &vocab_; + return ret; + } + + MapVocab vocab_; +}; + +template class Lazy : public LazyBase { + public: + explicit Lazy(const char *model_file) : m_(model_file, GetConfig()) {} + + void Search(const Hypergraph &hg) const; + + private: + void ConvertEdge(const Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; + + const Model m_; +}; + +static LazyBase *LazyBase::Load(const char *model_file) { + lm::ngram::ModelType model_type; + if (!lm::ngram::RecognizeBinary(lm_name, model_type)) model_type = lm::ngram::PROBING; + switch (model_type) { + case lm::ngram::PROBING: + return new Lazy(model_file); + case lm::ngram::REST_PROBING: + return new Lazy(model_file); + default: + UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); + } +} + +template void Lazy::Search(const Hypergraph &hg) const { + boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); + boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); + for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { + search::Vertex *out_vertex = out_vertices[i]; + const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; + for (unsigned int j = 0; j < edges.size(); ++j) { + unsigned int edge_index = down_edges[j]; + const Hypergraph::Edge &in_edge = hg.edges_[edge_index]; + search::Edge &out_edge = out_edges[edge_index]; + } + } +} + +// TODO: get weights into here somehow. +template void Lazy::ConvertEdge(const Context &context, bool final, search::Vertices *vertices, const Hypergraph::Edge &in, search::Edge &out) const { + const std::vector &e = in_edge.rule_->e(); + std::vector words; + unsigned int terminals = 0; + for (std::vector::const_iterator word = e.begin(); word != e.end(); ++word) { + if (*word <= 0) { + out.Add(vertices[edge.tail_nodes_[-*word]]); + words.push_back(lm::kMaxWordIndex); + } else { + ++terminals; + words.push_back(vocab_.FromCDec(*word)); + } + } + + if (final) { + words.push_back(m_.GetVocabulary().EndSentence()); + } + + float additive = edge.rule_->GetFeatureValues().dot(weight_vector); + + out.InitRule().Init(context, additive, words, final); +} + +} // namespace + +void PassToLazy(const Hypergraph &hg) { + +} diff --git a/decoder/lazy.h b/decoder/lazy.h new file mode 100644 index 00000000..aecd030d --- /dev/null +++ b/decoder/lazy.h @@ -0,0 +1,8 @@ +#ifndef _LAZY_H_ +#define _LAZY_H_ + +class Hypergraph; + +void PassToLazy(const Hypergraph &hg); + +#endif // _LAZY_H_ -- cgit v1.2.3 From 816f33174b2c2938a3df9c75b2834da6f5184a3e Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 11:15:32 +0100 Subject: It compiles. --- decoder/Jamfile | 2 ++ decoder/decoder.cc | 4 +++ decoder/lazy.cc | 78 +++++++++++++++++++++++++++++++++++++-------------- decoder/lazy.h | 5 +++- klm/search/config.hh | 6 ++-- klm/search/weights.cc | 2 ++ klm/search/weights.hh | 17 ++++++----- 7 files changed, 82 insertions(+), 32 deletions(-) diff --git a/decoder/Jamfile b/decoder/Jamfile index da02d063..d778dc7f 100644 --- a/decoder/Jamfile +++ b/decoder/Jamfile @@ -58,10 +58,12 @@ lib decoder : rescore_translator.cc hg_remove_eps.cc hg_union.cc + lazy.cc $(glc) ..//utils ..//mteval ../klm/lm//kenlm + ../klm/search//search ..//boost_program_options : . : : diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a69a6d05..3a410cf2 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -38,6 +38,7 @@ #include "sampler.h" #include "forest_writer.h" // TODO this section should probably be handled by an Observer +#include "lazy.h" #include "hg_io.h" #include "aligner.h" @@ -832,6 +833,9 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (conf.count("show_target_graph")) HypergraphIO::WriteTarget(conf["show_target_graph"].as(), sent_id, forest); + if (conf.count("lazy_search")) + PassToLazy(forest, CurrentWeightVector()); + for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; const vector& cur_weights = *rp.weight_vector; diff --git a/decoder/lazy.cc b/decoder/lazy.cc index f5b61c75..4776c1b8 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -1,15 +1,23 @@ #include "hg.h" #include "lazy.h" +#include "fdict.h" #include "tdict.h" #include "lm/enumerate_vocab.hh" #include "lm/model.hh" +#include "search/config.hh" +#include "search/context.hh" #include "search/edge.hh" #include "search/vertex.hh" +#include "search/vertex_generator.hh" #include "util/exception.hh" +#include #include +#include +#include + namespace { struct MapVocab : public lm::EnumerateVocab { @@ -19,13 +27,13 @@ struct MapVocab : public lm::EnumerateVocab { // Do not call after Lookup. void Add(lm::WordIndex index, const StringPiece &str) { const WordID cdec_id = TD::Convert(str.as_string()); - if (cdec_id >= out_->size()) out_.resize(cdec_id + 1); + if (cdec_id >= out_.size()) out_.resize(cdec_id + 1); out_[cdec_id] = index; } // Assumes Add has been called and will never be called again. lm::WordIndex FromCDec(WordID id) const { - return out_[out.size() > id ? id : 0]; + return out_[out_.size() > id ? id : 0]; } private: @@ -34,44 +42,50 @@ struct MapVocab : public lm::EnumerateVocab { class LazyBase { public: - LazyBase() {} + LazyBase(const std::vector &weights) : + cdec_weights_(weights), + config_(search::Weights(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]), 1000) {} virtual ~LazyBase() {} virtual void Search(const Hypergraph &hg) const = 0; - static LazyBase *Load(const char *model_file); + static LazyBase *Load(const char *model_file, const std::vector &weights); protected: - lm::ngram::Config GetConfig() const { + lm::ngram::Config GetConfig() { lm::ngram::Config ret; ret.enumerate_vocab = &vocab_; return ret; } MapVocab vocab_; + + const std::vector &cdec_weights_; + + const search::Config config_; }; template class Lazy : public LazyBase { public: - explicit Lazy(const char *model_file) : m_(model_file, GetConfig()) {} + Lazy(const char *model_file, const std::vector &weights) : LazyBase(weights), m_(model_file, GetConfig()) {} void Search(const Hypergraph &hg) const; private: - void ConvertEdge(const Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; + void ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; const Model m_; }; -static LazyBase *LazyBase::Load(const char *model_file) { +LazyBase *LazyBase::Load(const char *model_file, const std::vector &weights) { lm::ngram::ModelType model_type; - if (!lm::ngram::RecognizeBinary(lm_name, model_type)) model_type = lm::ngram::PROBING; + if (!lm::ngram::RecognizeBinary(model_file, model_type)) model_type = lm::ngram::PROBING; switch (model_type) { case lm::ngram::PROBING: - return new Lazy(model_file); + return new Lazy(model_file, weights); case lm::ngram::REST_PROBING: - return new Lazy(model_file); + return new Lazy(model_file, weights); default: UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); } @@ -80,25 +94,41 @@ static LazyBase *LazyBase::Load(const char *model_file) { template void Lazy::Search(const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); + + search::Context context(config_, m_); + for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { - search::Vertex *out_vertex = out_vertices[i]; + search::Vertex &out_vertex = out_vertices[i]; const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; - for (unsigned int j = 0; j < edges.size(); ++j) { + for (unsigned int j = 0; j < down_edges.size(); ++j) { unsigned int edge_index = down_edges[j]; - const Hypergraph::Edge &in_edge = hg.edges_[edge_index]; - search::Edge &out_edge = out_edges[edge_index]; + ConvertEdge(context, i == hg.nodes_.size() - 1, out_vertices.get(), hg.edges_[edge_index], out_edges[edge_index]); + out_vertex.Add(out_edges[edge_index]); } + out_vertex.FinishedAdding(); + search::VertexGenerator(context, out_vertex); + } + search::PartialVertex top = out_vertices[hg.nodes_.size() - 1].RootPartial(); + if (top.Empty()) { + std::cout << "NO PATH FOUND"; + } else { + search::PartialVertex continuation; + while (!top.Complete()) { + top.Split(continuation); + top = continuation; + } + std::cout << top.End().Bound() << std::endl; } } // TODO: get weights into here somehow. -template void Lazy::ConvertEdge(const Context &context, bool final, search::Vertices *vertices, const Hypergraph::Edge &in, search::Edge &out) const { - const std::vector &e = in_edge.rule_->e(); +template void Lazy::ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const { + const std::vector &e = in.rule_->e(); std::vector words; unsigned int terminals = 0; for (std::vector::const_iterator word = e.begin(); word != e.end(); ++word) { if (*word <= 0) { - out.Add(vertices[edge.tail_nodes_[-*word]]); + out.Add(vertices[in.tail_nodes_[-*word]]); words.push_back(lm::kMaxWordIndex); } else { ++terminals; @@ -110,13 +140,19 @@ template void Lazy::ConvertEdge(const Context &conte words.push_back(m_.GetVocabulary().EndSentence()); } - float additive = edge.rule_->GetFeatureValues().dot(weight_vector); + float additive = in.rule_->GetFeatureValues().dot(cdec_weights_); + additive -= terminals * context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; out.InitRule().Init(context, additive, words, final); } -} // namespace +boost::scoped_ptr AwfulGlobalLazy; -void PassToLazy(const Hypergraph &hg) { +} // namespace +void PassToLazy(const Hypergraph &hg, const std::vector &weights) { + if (!AwfulGlobalLazy.get()) { + AwfulGlobalLazy.reset(LazyBase::Load("lm", weights)); + } + AwfulGlobalLazy->Search(hg); } diff --git a/decoder/lazy.h b/decoder/lazy.h index aecd030d..3e71a3b0 100644 --- a/decoder/lazy.h +++ b/decoder/lazy.h @@ -1,8 +1,11 @@ #ifndef _LAZY_H_ #define _LAZY_H_ +#include "weights.h" +#include + class Hypergraph; -void PassToLazy(const Hypergraph &hg); +void PassToLazy(const Hypergraph &hg, const std::vector &weights); #endif // _LAZY_H_ diff --git a/klm/search/config.hh b/klm/search/config.hh index e21e4b7c..ef8e2354 100644 --- a/klm/search/config.hh +++ b/klm/search/config.hh @@ -8,15 +8,15 @@ namespace search { class Config { public: - Config(StringPiece weight_str, unsigned int pop_limit) : - weights_(weight_str), pop_limit_(pop_limit) {} + Config(const Weights &weights, unsigned int pop_limit) : + weights_(weights), pop_limit_(pop_limit) {} const Weights &GetWeights() const { return weights_; } unsigned int PopLimit() const { return pop_limit_; } private: - search::Weights weights_; + Weights weights_; unsigned int pop_limit_; }; diff --git a/klm/search/weights.cc b/klm/search/weights.cc index 82ff3f12..d65471ad 100644 --- a/klm/search/weights.cc +++ b/klm/search/weights.cc @@ -49,6 +49,8 @@ Weights::Weights(StringPiece text) { word_penalty_ = Steal("WordPenalty"); } +Weights::Weights(Score lm, Score oov, Score word_penalty) : lm_(lm), oov_(oov), word_penalty_(word_penalty) {} + search::Score Weights::DotNoLM(StringPiece text) const { DotProduct dot; Parse(text, map_, dot); diff --git a/klm/search/weights.hh b/klm/search/weights.hh index 4a4388c7..df1c419f 100644 --- a/klm/search/weights.hh +++ b/klm/search/weights.hh @@ -23,25 +23,28 @@ class Weights { // Parses weights, sets lm_weight_, removes it from map_. explicit Weights(StringPiece text); - search::Score DotNoLM(StringPiece text) const; + // Just the three scores we care about adding. + Weights(Score lm, Score oov, Score word_penalty); - search::Score LM() const { return lm_; } + Score DotNoLM(StringPiece text) const; - search::Score OOV() const { return oov_; } + Score LM() const { return lm_; } - search::Score WordPenalty() const { return word_penalty_; } + Score OOV() const { return oov_; } + + Score WordPenalty() const { return word_penalty_; } // Mostly for testing. - const boost::unordered_map &GetMap() const { return map_; } + const boost::unordered_map &GetMap() const { return map_; } private: float Steal(const std::string &str); - typedef boost::unordered_map Map; + typedef boost::unordered_map Map; Map map_; - search::Score lm_, oov_, word_penalty_; + Score lm_, oov_, word_penalty_; }; } // namespace search -- cgit v1.2.3 From a548c46f3d636093d26f44d5b7642c98def6f351 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 03:54:03 -0700 Subject: Fine remove the length check --- klm/lm/read_arpa.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 174bd3a3..b709fef9 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -38,7 +38,6 @@ uint64_t ReadCount(const std::string &from) { uint64_t ret; stream >> ret; UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from); - UTIL_THROW_IF(static_cast(stream.tellg()) != from.size(), FormatLoadException, "Extra content in count: '" << from << "'"); return ret; } -- cgit v1.2.3 From 18c5b12399eb078dbb8c764a205caf9c610f9a2f Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 04:28:30 -0700 Subject: Allow lm file name, print weights --- decoder/decoder.cc | 3 ++- decoder/lazy.cc | 10 +++++++--- decoder/lazy.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 3a410cf2..525c6ba6 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -416,6 +416,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation") ("show_cfg_search_space", "Show the search space as a CFG") ("show_target_graph", po::value(), "Directory to write the target hypergraphs to") + ("lazy_search", po::value(), "Run lazy search with this language model file") ("coarse_to_fine_beam_prune", po::value(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)") ("ctf_beam_widen", po::value()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found") ("ctf_num_widenings", po::value()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -834,7 +835,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { HypergraphIO::WriteTarget(conf["show_target_graph"].as(), sent_id, forest); if (conf.count("lazy_search")) - PassToLazy(forest, CurrentWeightVector()); + PassToLazy(conf["lazy_search"].as().c_str(), CurrentWeightVector(), forest); for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; diff --git a/decoder/lazy.cc b/decoder/lazy.cc index 4776c1b8..58a9e08a 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -44,7 +44,9 @@ class LazyBase { public: LazyBase(const std::vector &weights) : cdec_weights_(weights), - config_(search::Weights(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]), 1000) {} + config_(search::Weights(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]), 1000) { + std::cerr << "Weights KLanguageModel " << config_.GetWeights().LM() << " KLanguageModel_OOV " << config_.GetWeights().OOV() << " WordPenalty " << config_.GetWeights().WordPenalty() << std::endl; + } virtual ~LazyBase() {} @@ -95,6 +97,7 @@ template void Lazy::Search(const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); + search::Context context(config_, m_); for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { @@ -141,6 +144,7 @@ template void Lazy::ConvertEdge(const search::ContextGetFeatureValues().dot(cdec_weights_); + UTIL_THROW_IF(isnan(additive), util::Exception, "Bad dot product"); additive -= terminals * context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; out.InitRule().Init(context, additive, words, final); @@ -150,9 +154,9 @@ boost::scoped_ptr AwfulGlobalLazy; } // namespace -void PassToLazy(const Hypergraph &hg, const std::vector &weights) { +void PassToLazy(const char *model_file, const std::vector &weights, const Hypergraph &hg) { if (!AwfulGlobalLazy.get()) { - AwfulGlobalLazy.reset(LazyBase::Load("lm", weights)); + AwfulGlobalLazy.reset(LazyBase::Load(model_file, weights)); } AwfulGlobalLazy->Search(hg); } diff --git a/decoder/lazy.h b/decoder/lazy.h index 3e71a3b0..d1f030d1 100644 --- a/decoder/lazy.h +++ b/decoder/lazy.h @@ -6,6 +6,6 @@ class Hypergraph; -void PassToLazy(const Hypergraph &hg, const std::vector &weights); +void PassToLazy(const char *model_file, const std::vector &weights, const Hypergraph &hg); #endif // _LAZY_H_ -- cgit v1.2.3 From 8b824d1b940dc8b19e627dc476fdacfa92d75d34 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 13:19:11 +0100 Subject: Apparently cdec works on Boost 1.42.0 --- Jamroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jamroot b/Jamroot index 738b83e3..8f6629ea 100644 --- a/Jamroot +++ b/Jamroot @@ -10,7 +10,7 @@ path-constant TOP : . ; include $(TOP)/jam-files/sanity.jam ; -boost 104400 ; +boost 104200 ; external-lib z ; with-google-hash = [ option.get "with-google-hash" ] ; -- cgit v1.2.3 From 24f29a96f5ab8c9a4f71bf53ee100d0ad2761694 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 14:15:03 +0100 Subject: Fix word penalty --- decoder/lazy.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/decoder/lazy.cc b/decoder/lazy.cc index 58a9e08a..9d69dac6 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -93,11 +93,21 @@ LazyBase *LazyBase::Load(const char *model_file, const std::vector &we } } +void PrintFinal(const Hypergraph &hg, const search::Edge *edge_base, const search::Final &final) { + const std::vector &words = hg.edges_[&final.From() - edge_base].rule_->e(); + boost::array::const_iterator child(final.Children().begin()); + for (std::vector::const_iterator i = words.begin(); i != words.end(); ++i) { + if (*i > 0) { + std::cout << TD::Convert(*i) << ' '; + } else { + PrintFinal(hg, edge_base, **child++); + } + } +} + template void Lazy::Search(const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); - - search::Context context(config_, m_); for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { @@ -120,7 +130,8 @@ template void Lazy::Search(const Hypergraph &hg) const { top.Split(continuation); top = continuation; } - std::cout << top.End().Bound() << std::endl; + PrintFinal(hg, out_edges.get(), top.End()); + std::cout << "||| " << top.End().Bound() << std::endl; } } @@ -145,7 +156,7 @@ template void Lazy::ConvertEdge(const search::ContextGetFeatureValues().dot(cdec_weights_); UTIL_THROW_IF(isnan(additive), util::Exception, "Bad dot product"); - additive -= terminals * context.GetWeights().WordPenalty() * static_cast(terminals) / M_LN10; + additive -= static_cast(terminals) * context.GetWeights().WordPenalty() / M_LN10; out.InitRule().Init(context, additive, words, final); } -- cgit v1.2.3 From ac3011136c455db79fcc06707908ce729195d510 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 06:36:28 -0700 Subject: Fix compilation with static --- dpmert/Jamfile | 4 ++-- jam-files/sanity.jam | 2 +- mira/Jamfile | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dpmert/Jamfile b/dpmert/Jamfile index 647caac8..6f6a3d40 100644 --- a/dpmert/Jamfile +++ b/dpmert/Jamfile @@ -9,14 +9,14 @@ lib dpmert : mert_geometry.cc ..//utils ..//mteval - ..//decoder + ../decoder//decoder ../klm/lm//kenlm ..//boost_program_options : . : : ..//utils ..//mteval - ..//decoder + ../decoder//decoder ../klm/lm//kenlm ..//boost_program_options . diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam index 086f20ae..738316f8 100644 --- a/jam-files/sanity.jam +++ b/jam-files/sanity.jam @@ -63,7 +63,7 @@ requirements = ; FORCE-STATIC = [ option.get "static" : : "yes" ] ; if $(FORCE-STATIC) { - requirements += static ; + requirements += static static ; } #Determine if a library can be compiled statically. diff --git a/mira/Jamfile b/mira/Jamfile index 8825b887..a483e57d 100644 --- a/mira/Jamfile +++ b/mira/Jamfile @@ -1 +1 @@ -exe kbest_mira : kbest_mira.cc ..//decoder ; +exe kbest_mira : kbest_mira.cc ../decoder//decoder ; -- cgit v1.2.3 From 9dd62d50667a6b34198ef6919042f010fd2e5c8e Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 13 Sep 2012 07:53:16 -0700 Subject: Steal cubepruning_pop_limit command line argument --- decoder/decoder.cc | 2 +- decoder/lazy.cc | 19 ++++++++++--------- decoder/lazy.h | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 525c6ba6..83077a68 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -835,7 +835,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { HypergraphIO::WriteTarget(conf["show_target_graph"].as(), sent_id, forest); if (conf.count("lazy_search")) - PassToLazy(conf["lazy_search"].as().c_str(), CurrentWeightVector(), forest); + PassToLazy(conf["lazy_search"].as().c_str(), CurrentWeightVector(), pop_limit, forest); for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; diff --git a/decoder/lazy.cc b/decoder/lazy.cc index 9d69dac6..0f12a1ff 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -44,13 +44,13 @@ class LazyBase { public: LazyBase(const std::vector &weights) : cdec_weights_(weights), - config_(search::Weights(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]), 1000) { - std::cerr << "Weights KLanguageModel " << config_.GetWeights().LM() << " KLanguageModel_OOV " << config_.GetWeights().OOV() << " WordPenalty " << config_.GetWeights().WordPenalty() << std::endl; + weights_(weights[FD::Convert("KLanguageModel")], weights[FD::Convert("KLanguageModel_OOV")], weights[FD::Convert("WordPenalty")]) { + std::cerr << "Weights KLanguageModel " << weights_.LM() << " KLanguageModel_OOV " << weights_.OOV() << " WordPenalty " << weights_.WordPenalty() << std::endl; } virtual ~LazyBase() {} - virtual void Search(const Hypergraph &hg) const = 0; + virtual void Search(unsigned int pop_limit, const Hypergraph &hg) const = 0; static LazyBase *Load(const char *model_file, const std::vector &weights); @@ -65,14 +65,14 @@ class LazyBase { const std::vector &cdec_weights_; - const search::Config config_; + const search::Weights weights_; }; template class Lazy : public LazyBase { public: Lazy(const char *model_file, const std::vector &weights) : LazyBase(weights), m_(model_file, GetConfig()) {} - void Search(const Hypergraph &hg) const; + void Search(unsigned int pop_limit, const Hypergraph &hg) const; private: void ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::Edge &out) const; @@ -105,10 +105,11 @@ void PrintFinal(const Hypergraph &hg, const search::Edge *edge_base, const searc } } -template void Lazy::Search(const Hypergraph &hg) const { +template void Lazy::Search(unsigned int pop_limit, const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); boost::scoped_array out_edges(new search::Edge[hg.edges_.size()]); - search::Context context(config_, m_); + search::Config config(weights_, pop_limit); + search::Context context(config, m_); for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { search::Vertex &out_vertex = out_vertices[i]; @@ -165,9 +166,9 @@ boost::scoped_ptr AwfulGlobalLazy; } // namespace -void PassToLazy(const char *model_file, const std::vector &weights, const Hypergraph &hg) { +void PassToLazy(const char *model_file, const std::vector &weights, unsigned int pop_limit, const Hypergraph &hg) { if (!AwfulGlobalLazy.get()) { AwfulGlobalLazy.reset(LazyBase::Load(model_file, weights)); } - AwfulGlobalLazy->Search(hg); + AwfulGlobalLazy->Search(pop_limit, hg); } diff --git a/decoder/lazy.h b/decoder/lazy.h index d1f030d1..94895b19 100644 --- a/decoder/lazy.h +++ b/decoder/lazy.h @@ -6,6 +6,6 @@ class Hypergraph; -void PassToLazy(const char *model_file, const std::vector &weights, const Hypergraph &hg); +void PassToLazy(const char *model_file, const std::vector &weights, unsigned int pop_limit, const Hypergraph &hg); #endif // _LAZY_H_ -- cgit v1.2.3 From 876d2b6cdada8422e8fac40a058ce68bd9c6e631 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 14 Sep 2012 06:26:17 -0700 Subject: Skip top node, just integrate and with the top edges. --- decoder/lazy.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/decoder/lazy.cc b/decoder/lazy.cc index 0f12a1ff..c4138d7b 100644 --- a/decoder/lazy.cc +++ b/decoder/lazy.cc @@ -111,18 +111,18 @@ template void Lazy::Search(unsigned int pop_limit, const Hy search::Config config(weights_, pop_limit); search::Context context(config, m_); - for (unsigned int i = 0; i < hg.nodes_.size(); ++i) { + for (unsigned int i = 0; i < hg.nodes_.size() - 1; ++i) { search::Vertex &out_vertex = out_vertices[i]; const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; for (unsigned int j = 0; j < down_edges.size(); ++j) { unsigned int edge_index = down_edges[j]; - ConvertEdge(context, i == hg.nodes_.size() - 1, out_vertices.get(), hg.edges_[edge_index], out_edges[edge_index]); + ConvertEdge(context, i == hg.nodes_.size() - 2, out_vertices.get(), hg.edges_[edge_index], out_edges[edge_index]); out_vertex.Add(out_edges[edge_index]); } out_vertex.FinishedAdding(); search::VertexGenerator(context, out_vertex); } - search::PartialVertex top = out_vertices[hg.nodes_.size() - 1].RootPartial(); + search::PartialVertex top = out_vertices[hg.nodes_.size() - 2].RootPartial(); if (top.Empty()) { std::cout << "NO PATH FOUND"; } else { @@ -168,6 +168,7 @@ boost::scoped_ptr AwfulGlobalLazy; void PassToLazy(const char *model_file, const std::vector &weights, unsigned int pop_limit, const Hypergraph &hg) { if (!AwfulGlobalLazy.get()) { + std::cerr << "Pop limit " << pop_limit << std::endl; AwfulGlobalLazy.reset(LazyBase::Load(model_file, weights)); } AwfulGlobalLazy->Search(pop_limit, hg); -- cgit v1.2.3 From dfed8371aefe3ed552a755732f7e7a5126a40629 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 24 Sep 2012 15:55:40 +0100 Subject: Fix up compilation of standalone --- klm/alone/main.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/klm/alone/main.cc b/klm/alone/main.cc index 7768b89c..e09ab01d 100644 --- a/klm/alone/main.cc +++ b/klm/alone/main.cc @@ -29,7 +29,8 @@ template void ReadLoop(const std::string &graph_prefix, Control template void RunWithModelType(const char *graph_prefix, const char *model_file, StringPiece weight_str, unsigned int pop_limit, unsigned int threads) { Model model(model_file); - search::Config config(weight_str, pop_limit); + search::Weights weights(weight_str); + search::Config config(weights, pop_limit); if (threads > 1) { #ifdef WITH_THREADS -- cgit v1.2.3 From 4ee3f6368fb5698a07af98c158ebc75f819ec5e4 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 1 Oct 2012 09:42:43 -0700 Subject: Add memory size logging --- decoder/cdec.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index c671af57..25d3b6af 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -4,6 +4,7 @@ #include "decoder.h" #include "ff_register.h" #include "verbose.h" +#include "util/usage.hh" using namespace std; @@ -38,6 +39,7 @@ int main(int argc, char** argv) { cout << FD::Convert(i) << endl; } } + util::PrintUsage(std::cerr); return 0; } -- cgit v1.2.3 From 0870d4a1f5e14cc7daf553b180d599f09f6614a2 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 1 Oct 2012 22:01:07 -0400 Subject: mbr fix for non-deduped lists --- mteval/mbr_kbest.cc | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/mteval/mbr_kbest.cc b/mteval/mbr_kbest.cc index 2bd31566..2519bc01 100644 --- a/mteval/mbr_kbest.cc +++ b/mteval/mbr_kbest.cc @@ -1,7 +1,9 @@ #include #include +#include #include +#include #include "prob.h" #include "tdict.h" @@ -10,6 +12,7 @@ #include "stringlib.h" using namespace std; +using namespace std::tr1; namespace po = boost::program_options; @@ -31,27 +34,33 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } } +struct ScoreComparer { + bool operator()(const pair, prob_t>& a, const pair, prob_t>& b) const { + return a.second > b.second; + } +}; + struct LossComparer { bool operator()(const pair, prob_t>& a, const pair, prob_t>& b) const { return a.second < b.second; } }; -bool ReadKBestList(istream* in, string* sent_id, vector, prob_t> >* list) { +bool ReadKBestList(const double mbr_scale, istream* in, string* sent_id, vector, prob_t> >* list) { static string cache_id; static pair, prob_t> cache_pair; list->clear(); string cur_id; + unordered_map, unsigned, boost::hash > > sent2id; if (cache_pair.first.size() > 0) { list->push_back(cache_pair); + sent2id[cache_pair.first] = 0; cur_id = cache_id; cache_pair.first.clear(); } string line; string tstr; - while(*in) { - getline(*in, line); - if (line.empty()) continue; + while(getline(*in, line)) { size_t p1 = line.find(" ||| "); if (p1 == string::npos) { cerr << "Bad format: " << line << endl; abort(); } size_t p2 = line.find(" ||| ", p1 + 4); @@ -59,16 +68,25 @@ bool ReadKBestList(istream* in, string* sent_id, vector, pro size_t p3 = line.rfind(" ||| "); cache_id = line.substr(0, p1); tstr = line.substr(p1 + 5, p2 - p1 - 5); - double val = strtod(line.substr(p3 + 5).c_str(), NULL); + double val = strtod(line.substr(p3 + 5).c_str(), NULL) * mbr_scale; TD::ConvertSentence(tstr, &cache_pair.first); cache_pair.second.logeq(val); if (cur_id.empty()) cur_id = cache_id; if (cur_id == cache_id) { - list->push_back(cache_pair); + unordered_map, unsigned, boost::hash > >::iterator it = + sent2id.find(cache_pair.first); + if (it == sent2id.end()) { + sent2id.insert(make_pair(cache_pair.first, unsigned(list->size()))); + list->push_back(cache_pair); + } else { + (*list)[it->second].second += cache_pair.second; + // cerr << "Cruch: " << line << "\n newp=" << (*list)[it->second].second << endl; + } *sent_id = cur_id; cache_pair.first.clear(); } else { break; } } + sort(list->begin(), list->end(), ScoreComparer()); return !list->empty(); } @@ -87,14 +105,14 @@ int main(int argc, char** argv) { vector, prob_t> > list; ReadFile rf(file); string sent_id; - while(ReadKBestList(rf.stream(), &sent_id, &list)) { + while(ReadKBestList(mbr_scale, rf.stream(), &sent_id, &list)) { vector joints(list.size()); - const prob_t max_score = pow(list.front().second, mbr_scale); + const prob_t max_score = list.front().second; prob_t marginal = prob_t::Zero(); for (int i = 0 ; i < list.size(); ++i) { - const prob_t joint = pow(list[i].second, mbr_scale) / max_score; + const prob_t joint = list[i].second / max_score; joints[i] = joint; - // cerr << "list[" << i << "] joint=" << log(joint) << endl; + //cerr << "list[" << i << "] joint=" << log(joint) << endl; marginal += joint; } int mbr_idx = -1; -- cgit v1.2.3 From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- Jamroot | 45 - Makefile.am | 6 +- bjam | 23 - configure.ac | 5 - decoder/Jamfile | 81 - decoder/decoder.h | 2 +- decoder/hg.h | 4 +- dpmert/Jamfile | 32 - gi/clda/src/Makefile.am | 6 - gi/clda/src/ccrp.h | 291 -- gi/clda/src/clda.cc | 148 - gi/clda/src/crp.h | 50 - gi/clda/src/slice_sampler.h | 191 - gi/clda/src/timer.h | 20 - gi/evaluation/conditional_entropy.py | 61 - gi/evaluation/confusion_matrix.py | 123 - gi/evaluation/entropy.py | 38 - gi/evaluation/extract_ccg_labels.py | 129 - gi/evaluation/tree.py | 485 --- gi/markov_al/Makefile.am | 6 - gi/markov_al/README | 2 - gi/markov_al/ml.cc | 470 --- gi/morf-segmentation/filter_docs.pl | 24 - gi/morf-segmentation/invalid_vocab.patterns | 6 - gi/morf-segmentation/linestripper.py | 40 - gi/morf-segmentation/morf-pipeline.pl | 486 --- gi/morf-segmentation/morfsegment.py | 50 - gi/morf-segmentation/morftrain.sh | 110 - gi/morf-segmentation/vocabextractor.sh | 40 - gi/pf/Makefile.am | 44 - gi/pf/README | 2 - gi/pf/align-lexonly-pyp.cc | 243 -- gi/pf/align-tl.cc | 339 -- gi/pf/backward.cc | 89 - gi/pf/backward.h | 33 - gi/pf/base_distributions.cc | 241 -- gi/pf/base_distributions.h | 238 -- gi/pf/bayes_lattice_score.cc | 309 -- gi/pf/brat.cc | 543 --- gi/pf/cbgi.cc | 330 -- gi/pf/cfg_wfst_composer.cc | 731 ---- gi/pf/cfg_wfst_composer.h | 46 - gi/pf/conditional_pseg.h | 275 -- gi/pf/condnaive.cc | 298 -- gi/pf/corpus.cc | 62 - gi/pf/corpus.h | 19 - gi/pf/dpnaive.cc | 301 -- gi/pf/guess-translits.pl | 72 - gi/pf/hpyp_tm.cc | 133 - gi/pf/hpyp_tm.h | 38 - gi/pf/itg.cc | 275 -- gi/pf/learn_cfg.cc | 428 --- gi/pf/make-freq-bins.pl | 26 - gi/pf/mh_test.cc | 148 - gi/pf/monotonic_pseg.h | 89 - gi/pf/ngram_base.cc | 69 - gi/pf/ngram_base.h | 25 - gi/pf/nuisance_test.cc | 161 - gi/pf/os_phrase.h | 15 - gi/pf/pf.h | 84 - gi/pf/pf_test.cc | 148 - gi/pf/pfbrat.cc | 543 --- gi/pf/pfdist.cc | 598 --- gi/pf/pfdist.new.cc | 620 --- gi/pf/pfnaive.cc | 284 -- gi/pf/poisson_uniform_word_model.h | 50 - gi/pf/pyp_lm.cc | 273 -- gi/pf/pyp_tm.cc | 128 - gi/pf/pyp_tm.h | 36 - gi/pf/pyp_word_model.h | 61 - gi/pf/quasi_model2.h | 177 - gi/pf/reachability.cc | 74 - gi/pf/reachability.h | 34 - gi/pf/tied_resampler.h | 122 - gi/pf/tpf.cc | 99 - gi/pf/transliterations.cc | 334 -- gi/pf/transliterations.h | 24 - gi/pf/unigrams.cc | 80 - gi/pf/unigrams.h | 69 - gi/pipeline/OLD.clsp.config | 9 - gi/pipeline/OLD.evaluation-pipeline.pl | 277 -- gi/pipeline/backoff-pipe.pl | 215 -- gi/pipeline/blacklight.config | 9 - gi/pipeline/clsp.config | 10 - gi/pipeline/evaluation-pipeline.pl | 364 -- gi/pipeline/local-gi-pipeline.pl | 465 --- gi/pipeline/lticluster.config | 9 - gi/pipeline/scripts/filter-by-f.pl | 56 - gi/pipeline/scripts/patch-corpus.pl | 65 - gi/pipeline/scripts/refilter.pl | 40 - gi/pipeline/scripts/rekey.pl | 8 - gi/pipeline/scripts/remove-tags-from-contexts.pl | 53 - gi/pipeline/scripts/remove-tags-from-corpus.pl | 44 - gi/pipeline/scripts/sort-by-key.sh | 5 - gi/pipeline/scripts/xfeats.pl | 39 - gi/pipeline/valhalla.config | 9 - gi/posterior-regularisation/Corpus.java | 167 - gi/posterior-regularisation/Lexicon.java | 32 - .../PhraseContextModel.java | 466 --- gi/posterior-regularisation/README | 3 - gi/posterior-regularisation/alphabet.hh | 61 - gi/posterior-regularisation/canned.concordance | 4 - gi/posterior-regularisation/em.cc | 830 ---- gi/posterior-regularisation/invert.hh | 45 - gi/posterior-regularisation/linesearch.py | 58 - gi/posterior-regularisation/log_add.hh | 30 - gi/posterior-regularisation/prjava.jar | 1 - gi/posterior-regularisation/prjava/Makefile | 8 - gi/posterior-regularisation/prjava/build.xml | 38 - .../prjava/lib/commons-math-2.1.jar | Bin 832410 -> 0 bytes .../prjava/lib/jopt-simple-3.2.jar | Bin 53244 -> 0 bytes .../prjava/lib/trove-2.0.2.jar | Bin 737844 -> 0 bytes gi/posterior-regularisation/prjava/src/arr/F.java | 99 - .../prjava/src/data/Corpus.java | 233 -- .../prjava/src/hmm/HMM.java | 579 --- .../prjava/src/hmm/HMMObjective.java | 351 -- .../prjava/src/hmm/POS.java | 120 - .../prjava/src/io/FileUtil.java | 48 - .../prjava/src/io/SerializedObjects.java | 83 - .../examples/GeneralizedRosenbrock.java | 110 - .../prjava/src/optimization/examples/x2y2.java | 128 - .../optimization/examples/x2y2WithConstraints.java | 127 - .../AbstractGradientBaseMethod.java | 120 - .../gradientBasedMethods/ConjugateGradient.java | 92 - .../gradientBasedMethods/DebugHelpers.java | 65 - .../gradientBasedMethods/GradientDescent.java | 19 - .../optimization/gradientBasedMethods/LBFGS.java | 234 -- .../gradientBasedMethods/Objective.java | 87 - .../gradientBasedMethods/Optimizer.java | 19 - .../ProjectedAbstractGradientBaseMethod.java | 11 - .../ProjectedGradientDescent.java | 154 - .../gradientBasedMethods/ProjectedObjective.java | 29 - .../gradientBasedMethods/ProjectedOptimizer.java | 10 - .../gradientBasedMethods/stats/OptimizerStats.java | 86 - .../stats/ProjectedOptimizerStats.java | 70 - .../linesearch/ArmijoLineSearchMinimization.java | 102 - ...joLineSearchMinimizationAlongProjectionArc.java | 141 - .../DifferentiableLineSearchObjective.java | 185 - .../linesearch/GenericPickFirstStep.java | 20 - .../linesearch/InterpolationPickFirstStep.java | 25 - .../optimization/linesearch/LineSearchMethod.java | 14 - .../NonNewtonInterpolationPickFirstStep.java | 33 - ...ProjectedDifferentiableLineSearchObjective.java | 137 - .../linesearch/WolfRuleLineSearch.java | 300 -- .../optimization/linesearch/WolfeConditions.java | 45 - .../optimization/projections/BoundsProjection.java | 104 - .../src/optimization/projections/Projection.java | 72 - .../projections/SimplexProjection.java | 127 - .../stopCriteria/CompositeStopingCriteria.java | 33 - .../optimization/stopCriteria/GradientL2Norm.java | 30 - .../stopCriteria/NormalizedGradientL2Norm.java | 48 - .../NormalizedProjectedGradientL2Norm.java | 60 - .../stopCriteria/NormalizedValueDifference.java | 54 - .../stopCriteria/ProjectedGradientL2Norm.java | 51 - .../optimization/stopCriteria/StopingCriteria.java | 8 - .../optimization/stopCriteria/ValueDifference.java | 41 - .../src/optimization/util/Interpolation.java | 37 - .../prjava/src/optimization/util/Logger.java | 7 - .../prjava/src/optimization/util/MathUtils.java | 339 -- .../prjava/src/optimization/util/MatrixOutput.java | 28 - .../prjava/src/optimization/util/StaticTools.java | 180 - .../prjava/src/phrase/Agree.java | 204 - .../prjava/src/phrase/Agree2Sides.java | 197 - .../prjava/src/phrase/C2F.java | 216 -- .../prjava/src/phrase/Corpus.java | 288 -- .../prjava/src/phrase/Lexicon.java | 34 - .../prjava/src/phrase/PhraseCluster.java | 540 --- .../prjava/src/phrase/PhraseContextObjective.java | 436 --- .../prjava/src/phrase/PhraseCorpus.java | 193 - .../prjava/src/phrase/PhraseObjective.java | 224 -- .../prjava/src/phrase/Trainer.java | 257 -- .../prjava/src/phrase/VB.java | 419 -- .../prjava/src/test/CorpusTest.java | 60 - .../prjava/src/test/HMMModelStats.java | 105 - .../prjava/src/test/IntDoublePair.java | 23 - .../prjava/src/test/X2y2WithConstraints.java | 131 - .../prjava/src/util/Array.java | 41 - .../prjava/src/util/ArrayMath.java | 186 - .../prjava/src/util/DifferentiableObjective.java | 14 - .../prjava/src/util/DigammaFunction.java | 21 - .../prjava/src/util/FileSystem.java | 21 - .../prjava/src/util/InputOutput.java | 67 - .../prjava/src/util/LogSummer.java | 86 - .../prjava/src/util/MathUtil.java | 148 - .../prjava/src/util/Matrix.java | 16 - .../prjava/src/util/MemoryTracker.java | 47 - .../prjava/src/util/Pair.java | 31 - .../prjava/src/util/Printing.java | 158 - .../prjava/src/util/Sorters.java | 39 - .../prjava/train-PR-cluster.sh | 4 - gi/posterior-regularisation/projected_gradient.cc | 87 - gi/posterior-regularisation/simplex_pg.py | 55 - gi/posterior-regularisation/split-languages.py | 23 - gi/posterior-regularisation/train_pr_agree.py | 400 -- gi/posterior-regularisation/train_pr_global.py | 296 -- gi/posterior-regularisation/train_pr_parallel.py | 333 -- gi/pyp-topics/scripts/contexts2documents.py | 37 - gi/pyp-topics/scripts/extract_contexts.py | 144 - gi/pyp-topics/scripts/extract_contexts_test.py | 72 - gi/pyp-topics/scripts/extract_leaves.py | 49 - gi/pyp-topics/scripts/map-documents.py | 20 - gi/pyp-topics/scripts/map-terms.py | 20 - gi/pyp-topics/scripts/run.sh | 13 - gi/pyp-topics/scripts/score-mkcls.py | 61 - gi/pyp-topics/scripts/score-topics.py | 64 - gi/pyp-topics/scripts/spans2labels.py | 137 - gi/pyp-topics/scripts/tokens2classes.py | 27 - gi/pyp-topics/scripts/topics.py | 20 - gi/pyp-topics/src/Makefile.am | 16 - gi/pyp-topics/src/Makefile.mpi | 26 - gi/pyp-topics/src/clock_gettime_stub.c | 141 - gi/pyp-topics/src/contexts_corpus.cc | 164 - gi/pyp-topics/src/contexts_corpus.hh | 90 - gi/pyp-topics/src/contexts_lexer.h | 22 - gi/pyp-topics/src/contexts_lexer.l | 113 - gi/pyp-topics/src/corpus.cc | 104 - gi/pyp-topics/src/corpus.hh | 133 - gi/pyp-topics/src/gammadist.c | 247 -- gi/pyp-topics/src/gammadist.h | 72 - gi/pyp-topics/src/gzstream.cc | 165 - gi/pyp-topics/src/gzstream.hh | 121 - gi/pyp-topics/src/log_add.h | 30 - gi/pyp-topics/src/macros.Linux | 18 - gi/pyp-topics/src/makefile.darwin | 15 - gi/pyp-topics/src/makefile.depend | 4042 -------------------- gi/pyp-topics/src/mpi-corpus.hh | 69 - gi/pyp-topics/src/mpi-pyp-topics.cc | 466 --- gi/pyp-topics/src/mpi-pyp-topics.hh | 106 - gi/pyp-topics/src/mpi-pyp.hh | 447 --- gi/pyp-topics/src/mpi-train-contexts.cc | 201 - gi/pyp-topics/src/mt19937ar.c | 194 - gi/pyp-topics/src/mt19937ar.h | 44 - gi/pyp-topics/src/pyp-topics.cc | 499 --- gi/pyp-topics/src/pyp-topics.hh | 98 - gi/pyp-topics/src/pyp.hh | 566 --- gi/pyp-topics/src/slice-sampler.h | 192 - gi/pyp-topics/src/timing.h | 37 - gi/pyp-topics/src/train-contexts.cc | 174 - gi/pyp-topics/src/train.cc | 135 - gi/pyp-topics/src/utility.h | 962 ----- gi/pyp-topics/src/workers.hh | 275 -- gi/scripts/buck2utf8.pl | 87 - jam-files/LICENSE_1_0.txt | 23 - jam-files/boost-build/boost-build.jam | 8 - jam-files/boost-build/bootstrap.jam | 18 - jam-files/boost-build/build-system.jam | 1008 ----- jam-files/boost-build/build/__init__.py | 0 jam-files/boost-build/build/ac.jam | 198 - jam-files/boost-build/build/alias.jam | 73 - jam-files/boost-build/build/alias.py | 63 - jam-files/boost-build/build/build-request.jam | 322 -- jam-files/boost-build/build/build_request.py | 216 -- jam-files/boost-build/build/configure.jam | 237 -- jam-files/boost-build/build/configure.py | 164 - jam-files/boost-build/build/engine.py | 172 - jam-files/boost-build/build/errors.py | 127 - jam-files/boost-build/build/feature.jam | 1335 ------- jam-files/boost-build/build/feature.py | 905 ----- jam-files/boost-build/build/generators.jam | 1408 ------- jam-files/boost-build/build/generators.py | 1089 ------ jam-files/boost-build/build/modifiers.jam | 232 -- jam-files/boost-build/build/project.ann.py | 996 ----- jam-files/boost-build/build/project.jam | 1110 ------ jam-files/boost-build/build/project.py | 1120 ------ jam-files/boost-build/build/property-set.jam | 481 --- jam-files/boost-build/build/property.jam | 788 ---- jam-files/boost-build/build/property.py | 593 --- jam-files/boost-build/build/property_set.py | 449 --- jam-files/boost-build/build/readme.txt | 13 - jam-files/boost-build/build/scanner.jam | 153 - jam-files/boost-build/build/scanner.py | 158 - jam-files/boost-build/build/targets.jam | 1659 -------- jam-files/boost-build/build/targets.py | 1401 ------- jam-files/boost-build/build/toolset.jam | 502 --- jam-files/boost-build/build/toolset.py | 398 -- jam-files/boost-build/build/type.jam | 425 -- jam-files/boost-build/build/type.py | 313 -- jam-files/boost-build/build/version.jam | 161 - jam-files/boost-build/build/virtual-target.jam | 1317 ------- jam-files/boost-build/build/virtual_target.py | 1118 ------ jam-files/boost-build/kernel/boost-build.jam | 5 - jam-files/boost-build/kernel/bootstrap.jam | 263 -- jam-files/boost-build/kernel/bootstrap.py | 25 - jam-files/boost-build/kernel/class.jam | 420 -- jam-files/boost-build/kernel/errors.jam | 274 -- jam-files/boost-build/kernel/modules.jam | 354 -- jam-files/boost-build/options/help.jam | 212 - jam-files/boost-build/site-config.jam | 4 - jam-files/boost-build/tools/__init__.py | 0 jam-files/boost-build/tools/acc.jam | 118 - jam-files/boost-build/tools/auto-index.jam | 212 - jam-files/boost-build/tools/bison.jam | 32 - jam-files/boost-build/tools/boostbook-config.jam | 13 - jam-files/boost-build/tools/boostbook.jam | 727 ---- jam-files/boost-build/tools/borland.jam | 220 -- jam-files/boost-build/tools/builtin.jam | 960 ----- jam-files/boost-build/tools/builtin.py | 718 ---- jam-files/boost-build/tools/cast.jam | 91 - jam-files/boost-build/tools/cast.py | 69 - jam-files/boost-build/tools/clang-darwin.jam | 170 - jam-files/boost-build/tools/clang-linux.jam | 196 - jam-files/boost-build/tools/clang.jam | 27 - jam-files/boost-build/tools/common.jam | 994 ----- jam-files/boost-build/tools/common.py | 840 ---- jam-files/boost-build/tools/como-linux.jam | 103 - jam-files/boost-build/tools/como-win.jam | 117 - jam-files/boost-build/tools/como.jam | 29 - jam-files/boost-build/tools/convert.jam | 62 - jam-files/boost-build/tools/cw-config.jam | 34 - jam-files/boost-build/tools/cw.jam | 246 -- jam-files/boost-build/tools/darwin.jam | 568 --- jam-files/boost-build/tools/darwin.py | 57 - jam-files/boost-build/tools/dmc.jam | 134 - jam-files/boost-build/tools/docutils.jam | 84 - jam-files/boost-build/tools/doxproc.py | 859 ----- jam-files/boost-build/tools/doxygen-config.jam | 11 - jam-files/boost-build/tools/doxygen.jam | 776 ---- .../tools/doxygen/windows-paths-check.doxyfile | 3 - .../tools/doxygen/windows-paths-check.hpp | 0 jam-files/boost-build/tools/fop.jam | 69 - jam-files/boost-build/tools/fortran.jam | 55 - jam-files/boost-build/tools/gcc.jam | 1185 ------ jam-files/boost-build/tools/gcc.py | 796 ---- jam-files/boost-build/tools/generate.jam | 108 - jam-files/boost-build/tools/gettext.jam | 230 -- jam-files/boost-build/tools/gfortran.jam | 39 - jam-files/boost-build/tools/hp_cxx.jam | 181 - jam-files/boost-build/tools/hpfortran.jam | 35 - jam-files/boost-build/tools/ifort.jam | 44 - jam-files/boost-build/tools/intel-darwin.jam | 220 -- jam-files/boost-build/tools/intel-linux.jam | 250 -- jam-files/boost-build/tools/intel-win.jam | 184 - jam-files/boost-build/tools/intel.jam | 34 - jam-files/boost-build/tools/lex.jam | 33 - jam-files/boost-build/tools/make.jam | 72 - jam-files/boost-build/tools/make.py | 59 - jam-files/boost-build/tools/mc.jam | 44 - jam-files/boost-build/tools/message.jam | 55 - jam-files/boost-build/tools/message.py | 46 - jam-files/boost-build/tools/midl.jam | 142 - jam-files/boost-build/tools/mipspro.jam | 145 - jam-files/boost-build/tools/mpi.jam | 583 --- jam-files/boost-build/tools/msvc-config.jam | 12 - jam-files/boost-build/tools/msvc.jam | 1392 ------- jam-files/boost-build/tools/notfile.jam | 74 - jam-files/boost-build/tools/notfile.py | 51 - jam-files/boost-build/tools/package.jam | 165 - jam-files/boost-build/tools/package.py | 168 - jam-files/boost-build/tools/pathscale.jam | 168 - jam-files/boost-build/tools/pch.jam | 95 - jam-files/boost-build/tools/pch.py | 83 - jam-files/boost-build/tools/pgi.jam | 147 - jam-files/boost-build/tools/python-config.jam | 27 - jam-files/boost-build/tools/python.jam | 1267 ------ jam-files/boost-build/tools/qcc.jam | 236 -- jam-files/boost-build/tools/qt.jam | 17 - jam-files/boost-build/tools/qt3.jam | 209 - jam-files/boost-build/tools/qt4.jam | 724 ---- jam-files/boost-build/tools/quickbook-config.jam | 44 - jam-files/boost-build/tools/quickbook.jam | 361 -- jam-files/boost-build/tools/rc.jam | 156 - jam-files/boost-build/tools/rc.py | 189 - jam-files/boost-build/tools/stage.jam | 524 --- jam-files/boost-build/tools/stage.py | 350 -- jam-files/boost-build/tools/stlport.jam | 303 -- jam-files/boost-build/tools/sun.jam | 142 - jam-files/boost-build/tools/symlink.jam | 140 - jam-files/boost-build/tools/symlink.py | 112 - jam-files/boost-build/tools/testing-aux.jam | 210 - jam-files/boost-build/tools/testing.jam | 581 --- jam-files/boost-build/tools/testing.py | 342 -- jam-files/boost-build/tools/types/__init__.py | 18 - jam-files/boost-build/tools/types/asm.jam | 4 - jam-files/boost-build/tools/types/asm.py | 13 - jam-files/boost-build/tools/types/cpp.jam | 86 - jam-files/boost-build/tools/types/cpp.py | 10 - jam-files/boost-build/tools/types/exe.jam | 9 - jam-files/boost-build/tools/types/exe.py | 11 - jam-files/boost-build/tools/types/html.jam | 4 - jam-files/boost-build/tools/types/html.py | 10 - jam-files/boost-build/tools/types/lib.jam | 74 - jam-files/boost-build/tools/types/lib.py | 77 - jam-files/boost-build/tools/types/obj.jam | 9 - jam-files/boost-build/tools/types/obj.py | 11 - jam-files/boost-build/tools/types/objc.jam | 26 - jam-files/boost-build/tools/types/preprocessed.jam | 9 - jam-files/boost-build/tools/types/qt.jam | 10 - jam-files/boost-build/tools/types/register.jam | 39 - jam-files/boost-build/tools/types/rsp.jam | 4 - jam-files/boost-build/tools/types/rsp.py | 10 - jam-files/boost-build/tools/unix.jam | 224 -- jam-files/boost-build/tools/unix.py | 150 - jam-files/boost-build/tools/vacpp.jam | 150 - jam-files/boost-build/tools/whale.jam | 116 - jam-files/boost-build/tools/xlf.jam | 39 - jam-files/boost-build/tools/xsltproc-config.jam | 37 - jam-files/boost-build/tools/xsltproc.jam | 194 - jam-files/boost-build/tools/xsltproc/included.xsl | 11 - jam-files/boost-build/tools/xsltproc/test.xml | 2 - jam-files/boost-build/tools/xsltproc/test.xsl | 12 - jam-files/boost-build/tools/zlib.jam | 92 - jam-files/boost-build/user-config.jam | 92 - jam-files/boost-build/util/__init__.py | 136 - jam-files/boost-build/util/assert.jam | 336 -- jam-files/boost-build/util/container.jam | 339 -- jam-files/boost-build/util/doc.jam | 997 ----- jam-files/boost-build/util/indirect.jam | 115 - jam-files/boost-build/util/indirect.py | 15 - jam-files/boost-build/util/logger.py | 46 - jam-files/boost-build/util/numbers.jam | 218 -- jam-files/boost-build/util/option.jam | 109 - jam-files/boost-build/util/option.py | 35 - jam-files/boost-build/util/order.jam | 169 - jam-files/boost-build/util/order.py | 121 - jam-files/boost-build/util/os.jam | 171 - jam-files/boost-build/util/os_j.py | 19 - jam-files/boost-build/util/path.jam | 934 ----- jam-files/boost-build/util/path.py | 904 ----- jam-files/boost-build/util/print.jam | 488 --- jam-files/boost-build/util/regex.jam | 193 - jam-files/boost-build/util/regex.py | 25 - jam-files/boost-build/util/sequence.jam | 335 -- jam-files/boost-build/util/sequence.py | 50 - jam-files/boost-build/util/set.jam | 93 - jam-files/boost-build/util/set.py | 42 - jam-files/boost-build/util/string.jam | 189 - jam-files/boost-build/util/utility.jam | 235 -- jam-files/boost-build/util/utility.py | 155 - jam-files/engine/Jambase | 2473 ------------ jam-files/engine/boost-jam.spec | 64 - jam-files/engine/boost-no-inspect | 1 - jam-files/engine/build.bat | 532 --- jam-files/engine/build.jam | 1070 ------ jam-files/engine/build.sh | 303 -- jam-files/engine/build_vms.com | 105 - jam-files/engine/builtins.c | 2310 ----------- jam-files/engine/builtins.h | 69 - jam-files/engine/bump_version.py | 80 - jam-files/engine/class.c | 141 - jam-files/engine/class.h | 13 - jam-files/engine/command.c | 100 - jam-files/engine/command.h | 61 - jam-files/engine/compile.c | 1424 ------- jam-files/engine/compile.h | 82 - jam-files/engine/debian/changelog | 72 - jam-files/engine/debian/control | 16 - jam-files/engine/debian/copyright | 25 - jam-files/engine/debian/jam.man.sgml | 236 -- jam-files/engine/debian/rules | 73 - jam-files/engine/debug.c | 132 - jam-files/engine/debug.h | 54 - jam-files/engine/execcmd.h | 45 - jam-files/engine/execmac.c | 69 - jam-files/engine/execnt.c | 1296 ------- jam-files/engine/execunix.c | 569 --- jam-files/engine/execvms.c | 161 - jam-files/engine/expand.c | 733 ---- jam-files/engine/expand.h | 14 - jam-files/engine/filemac.c | 175 - jam-files/engine/filent.c | 387 -- jam-files/engine/fileos2.c | 138 - jam-files/engine/filesys.c | 83 - jam-files/engine/filesys.h | 60 - jam-files/engine/fileunix.c | 501 --- jam-files/engine/filevms.c | 327 -- jam-files/engine/frames.c | 22 - jam-files/engine/frames.h | 37 - jam-files/engine/glob.c | 152 - jam-files/engine/hash.c | 459 --- jam-files/engine/hash.h | 25 - jam-files/engine/hcache.c | 434 --- jam-files/engine/hcache.h | 18 - jam-files/engine/hdrmacro.c | 137 - jam-files/engine/hdrmacro.h | 14 - jam-files/engine/headers.c | 203 - jam-files/engine/headers.h | 16 - jam-files/engine/jam.c | 632 --- jam-files/engine/jam.h | 579 --- jam-files/engine/jambase.c | 1691 -------- jam-files/engine/jambase.h | 15 - jam-files/engine/jamgram.c | 1830 --------- jam-files/engine/jamgram.h | 140 - jam-files/engine/jamgram.y | 371 -- jam-files/engine/jamgram.yy | 329 -- jam-files/engine/jamgramtab.h | 44 - jam-files/engine/lists.c | 339 -- jam-files/engine/lists.h | 108 - jam-files/engine/make.c | 814 ---- jam-files/engine/make.h | 41 - jam-files/engine/make1.c | 1145 ------ jam-files/engine/md5.c | 381 -- jam-files/engine/md5.h | 91 - jam-files/engine/mem.c | 75 - jam-files/engine/mem.h | 134 - jam-files/engine/mkjambase.c | 123 - jam-files/engine/modules.c | 168 - jam-files/engine/modules.h | 37 - jam-files/engine/modules/order.c | 144 - jam-files/engine/modules/path.c | 32 - jam-files/engine/modules/property-set.c | 110 - jam-files/engine/modules/readme.txt | 3 - jam-files/engine/modules/regex.c | 96 - jam-files/engine/modules/sequence.c | 42 - jam-files/engine/modules/set.c | 41 - jam-files/engine/native.c | 36 - jam-files/engine/native.h | 34 - jam-files/engine/newstr.c | 174 - jam-files/engine/newstr.h | 14 - jam-files/engine/option.c | 94 - jam-files/engine/option.h | 23 - jam-files/engine/output.c | 125 - jam-files/engine/output.h | 29 - jam-files/engine/parse.c | 132 - jam-files/engine/parse.h | 59 - jam-files/engine/patchlevel.h | 17 - jam-files/engine/pathmac.c | 252 -- jam-files/engine/pathsys.h | 91 - jam-files/engine/pathunix.c | 457 --- jam-files/engine/pathvms.c | 406 -- jam-files/engine/pwd.c | 66 - jam-files/engine/pwd.h | 10 - jam-files/engine/regexp.c | 1328 ------- jam-files/engine/regexp.h | 32 - jam-files/engine/rules.c | 810 ---- jam-files/engine/rules.h | 280 -- jam-files/engine/scan.c | 418 -- jam-files/engine/scan.h | 56 - jam-files/engine/search.c | 223 -- jam-files/engine/search.h | 11 - jam-files/engine/strings.c | 201 - jam-files/engine/strings.h | 34 - jam-files/engine/subst.c | 94 - jam-files/engine/timestamp.c | 226 -- jam-files/engine/timestamp.h | 12 - jam-files/engine/variable.c | 631 --- jam-files/engine/variable.h | 35 - jam-files/engine/w32_getreg.c | 207 - jam-files/engine/yyacc.c | 268 -- jam-files/sanity.jam | 277 -- klm/lm/Jamfile | 14 - klm/util/Jamfile | 10 - mira/Jamfile | 1 - mteval/Jamfile | 8 - mteval/ns_docscorer.cc | 4 +- mteval/ns_docscorer.h | 2 +- phrasinator/Jamfile | 4 - phrasinator/Makefile.am | 14 - phrasinator/README | 16 - phrasinator/gibbs_train_plm.cc | 309 -- phrasinator/gibbs_train_plm.notables.cc | 335 -- phrasinator/train-phrasinator.pl | 89 - rst_parser/Makefile.am | 20 - rst_parser/arc_factored.cc | 151 - rst_parser/arc_factored.h | 124 - rst_parser/arc_factored_marginals.cc | 58 - rst_parser/arc_ff.cc | 183 - rst_parser/arc_ff.h | 28 - rst_parser/dep_training.cc | 76 - rst_parser/dep_training.h | 19 - rst_parser/global_ff.cc | 44 - rst_parser/global_ff.h | 18 - rst_parser/mst_train.cc | 228 -- rst_parser/picojson.h | 979 ----- rst_parser/random_tree.cc | 36 - rst_parser/rst.cc | 82 - rst_parser/rst.h | 21 - rst_parser/rst_parse.cc | 111 - rst_parser/rst_train.cc | 144 - training/Jamfile | 25 - training/liblbfgs/Jamfile | 5 - utils/Jamfile | 32 - utils/Makefile.am | 7 +- utils/ccrp.h | 270 -- utils/ccrp_nt.h | 164 - utils/ccrp_onetable.h | 253 -- utils/crp_table_manager.h | 114 - utils/crp_test.cc | 91 - utils/fast_sparse_vector.h | 2 +- utils/gamma_poisson.h | 33 - utils/mfcr.h | 370 -- utils/mfcr_test.cc | 72 - utils/sampler.h | 2 +- utils/slice_sampler.h | 191 - utils/small_vector.h | 2 +- utils/stringlib.h | 2 +- utils/unigram_pyp_lm.cc | 214 -- 586 files changed, 13 insertions(+), 125976 deletions(-) delete mode 100644 Jamroot delete mode 100755 bjam delete mode 100644 decoder/Jamfile delete mode 100644 dpmert/Jamfile delete mode 100644 gi/clda/src/Makefile.am delete mode 100644 gi/clda/src/ccrp.h delete mode 100644 gi/clda/src/clda.cc delete mode 100644 gi/clda/src/crp.h delete mode 100644 gi/clda/src/slice_sampler.h delete mode 100644 gi/clda/src/timer.h delete mode 100644 gi/evaluation/conditional_entropy.py delete mode 100644 gi/evaluation/confusion_matrix.py delete mode 100644 gi/evaluation/entropy.py delete mode 100644 gi/evaluation/extract_ccg_labels.py delete mode 100644 gi/evaluation/tree.py delete mode 100644 gi/markov_al/Makefile.am delete mode 100644 gi/markov_al/README delete mode 100644 gi/markov_al/ml.cc delete mode 100755 gi/morf-segmentation/filter_docs.pl delete mode 100644 gi/morf-segmentation/invalid_vocab.patterns delete mode 100755 gi/morf-segmentation/linestripper.py delete mode 100755 gi/morf-segmentation/morf-pipeline.pl delete mode 100755 gi/morf-segmentation/morfsegment.py delete mode 100755 gi/morf-segmentation/morftrain.sh delete mode 100755 gi/morf-segmentation/vocabextractor.sh delete mode 100644 gi/pf/Makefile.am delete mode 100644 gi/pf/README delete mode 100644 gi/pf/align-lexonly-pyp.cc delete mode 100644 gi/pf/align-tl.cc delete mode 100644 gi/pf/backward.cc delete mode 100644 gi/pf/backward.h delete mode 100644 gi/pf/base_distributions.cc delete mode 100644 gi/pf/base_distributions.h delete mode 100644 gi/pf/bayes_lattice_score.cc delete mode 100644 gi/pf/brat.cc delete mode 100644 gi/pf/cbgi.cc delete mode 100644 gi/pf/cfg_wfst_composer.cc delete mode 100644 gi/pf/cfg_wfst_composer.h delete mode 100644 gi/pf/conditional_pseg.h delete mode 100644 gi/pf/condnaive.cc delete mode 100644 gi/pf/corpus.cc delete mode 100644 gi/pf/corpus.h delete mode 100644 gi/pf/dpnaive.cc delete mode 100755 gi/pf/guess-translits.pl delete mode 100644 gi/pf/hpyp_tm.cc delete mode 100644 gi/pf/hpyp_tm.h delete mode 100644 gi/pf/itg.cc delete mode 100644 gi/pf/learn_cfg.cc delete mode 100755 gi/pf/make-freq-bins.pl delete mode 100644 gi/pf/mh_test.cc delete mode 100644 gi/pf/monotonic_pseg.h delete mode 100644 gi/pf/ngram_base.cc delete mode 100644 gi/pf/ngram_base.h delete mode 100644 gi/pf/nuisance_test.cc delete mode 100644 gi/pf/os_phrase.h delete mode 100644 gi/pf/pf.h delete mode 100644 gi/pf/pf_test.cc delete mode 100644 gi/pf/pfbrat.cc delete mode 100644 gi/pf/pfdist.cc delete mode 100644 gi/pf/pfdist.new.cc delete mode 100644 gi/pf/pfnaive.cc delete mode 100644 gi/pf/poisson_uniform_word_model.h delete mode 100644 gi/pf/pyp_lm.cc delete mode 100644 gi/pf/pyp_tm.cc delete mode 100644 gi/pf/pyp_tm.h delete mode 100644 gi/pf/pyp_word_model.h delete mode 100644 gi/pf/quasi_model2.h delete mode 100644 gi/pf/reachability.cc delete mode 100644 gi/pf/reachability.h delete mode 100644 gi/pf/tied_resampler.h delete mode 100644 gi/pf/tpf.cc delete mode 100644 gi/pf/transliterations.cc delete mode 100644 gi/pf/transliterations.h delete mode 100644 gi/pf/unigrams.cc delete mode 100644 gi/pf/unigrams.h delete mode 100644 gi/pipeline/OLD.clsp.config delete mode 100755 gi/pipeline/OLD.evaluation-pipeline.pl delete mode 100644 gi/pipeline/backoff-pipe.pl delete mode 100644 gi/pipeline/blacklight.config delete mode 100644 gi/pipeline/clsp.config delete mode 100755 gi/pipeline/evaluation-pipeline.pl delete mode 100755 gi/pipeline/local-gi-pipeline.pl delete mode 100644 gi/pipeline/lticluster.config delete mode 100755 gi/pipeline/scripts/filter-by-f.pl delete mode 100755 gi/pipeline/scripts/patch-corpus.pl delete mode 100755 gi/pipeline/scripts/refilter.pl delete mode 100755 gi/pipeline/scripts/rekey.pl delete mode 100755 gi/pipeline/scripts/remove-tags-from-contexts.pl delete mode 100755 gi/pipeline/scripts/remove-tags-from-corpus.pl delete mode 100755 gi/pipeline/scripts/sort-by-key.sh delete mode 100755 gi/pipeline/scripts/xfeats.pl delete mode 100644 gi/pipeline/valhalla.config delete mode 100644 gi/posterior-regularisation/Corpus.java delete mode 100644 gi/posterior-regularisation/Lexicon.java delete mode 100644 gi/posterior-regularisation/PhraseContextModel.java delete mode 100644 gi/posterior-regularisation/README delete mode 100644 gi/posterior-regularisation/alphabet.hh delete mode 100644 gi/posterior-regularisation/canned.concordance delete mode 100644 gi/posterior-regularisation/em.cc delete mode 100644 gi/posterior-regularisation/invert.hh delete mode 100644 gi/posterior-regularisation/linesearch.py delete mode 100644 gi/posterior-regularisation/log_add.hh delete mode 120000 gi/posterior-regularisation/prjava.jar delete mode 100755 gi/posterior-regularisation/prjava/Makefile delete mode 100644 gi/posterior-regularisation/prjava/build.xml delete mode 100644 gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar delete mode 100644 gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar delete mode 100644 gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar delete mode 100644 gi/posterior-regularisation/prjava/src/arr/F.java delete mode 100644 gi/posterior-regularisation/prjava/src/data/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMM.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/hmm/POS.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/FileUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/io/SerializedObjects.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/GeneralizedRosenbrock.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/examples/x2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/AbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ConjugateGradient.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/DebugHelpers.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/GradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/LBFGS.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Objective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/Optimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedAbstractGradientBaseMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedGradientDescent.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/ProjectedOptimizer.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/OptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/gradientBasedMethods/stats/ProjectedOptimizerStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimization.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ArmijoLineSearchMinimizationAlongProjectionArc.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/DifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/GenericPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/InterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/LineSearchMethod.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/NonNewtonInterpolationPickFirstStep.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/ProjectedDifferentiableLineSearchObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfRuleLineSearch.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/linesearch/WolfeConditions.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/BoundsProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/Projection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/projections/SimplexProjection.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/CompositeStopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/GradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/NormalizedValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ProjectedGradientL2Norm.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/StopingCriteria.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/stopCriteria/ValueDifference.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Interpolation.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/Logger.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MathUtils.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/MatrixOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/optimization/util/StaticTools.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Agree2Sides.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/C2F.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Corpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Lexicon.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCluster.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseContextObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/Trainer.java delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/VB.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/CorpusTest.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/HMMModelStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/IntDoublePair.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Array.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/ArrayMath.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DifferentiableObjective.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/DigammaFunction.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/FileSystem.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/InputOutput.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/LogSummer.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MathUtil.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Matrix.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/MemoryTracker.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Pair.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Printing.java delete mode 100644 gi/posterior-regularisation/prjava/src/util/Sorters.java delete mode 100755 gi/posterior-regularisation/prjava/train-PR-cluster.sh delete mode 100644 gi/posterior-regularisation/projected_gradient.cc delete mode 100644 gi/posterior-regularisation/simplex_pg.py delete mode 100755 gi/posterior-regularisation/split-languages.py delete mode 100644 gi/posterior-regularisation/train_pr_agree.py delete mode 100644 gi/posterior-regularisation/train_pr_global.py delete mode 100644 gi/posterior-regularisation/train_pr_parallel.py delete mode 100755 gi/pyp-topics/scripts/contexts2documents.py delete mode 100755 gi/pyp-topics/scripts/extract_contexts.py delete mode 100755 gi/pyp-topics/scripts/extract_contexts_test.py delete mode 100755 gi/pyp-topics/scripts/extract_leaves.py delete mode 100755 gi/pyp-topics/scripts/map-documents.py delete mode 100755 gi/pyp-topics/scripts/map-terms.py delete mode 100644 gi/pyp-topics/scripts/run.sh delete mode 100755 gi/pyp-topics/scripts/score-mkcls.py delete mode 100755 gi/pyp-topics/scripts/score-topics.py delete mode 100755 gi/pyp-topics/scripts/spans2labels.py delete mode 100755 gi/pyp-topics/scripts/tokens2classes.py delete mode 100755 gi/pyp-topics/scripts/topics.py delete mode 100644 gi/pyp-topics/src/Makefile.am delete mode 100644 gi/pyp-topics/src/Makefile.mpi delete mode 100644 gi/pyp-topics/src/clock_gettime_stub.c delete mode 100644 gi/pyp-topics/src/contexts_corpus.cc delete mode 100644 gi/pyp-topics/src/contexts_corpus.hh delete mode 100644 gi/pyp-topics/src/contexts_lexer.h delete mode 100644 gi/pyp-topics/src/contexts_lexer.l delete mode 100644 gi/pyp-topics/src/corpus.cc delete mode 100644 gi/pyp-topics/src/corpus.hh delete mode 100644 gi/pyp-topics/src/gammadist.c delete mode 100644 gi/pyp-topics/src/gammadist.h delete mode 100644 gi/pyp-topics/src/gzstream.cc delete mode 100644 gi/pyp-topics/src/gzstream.hh delete mode 100644 gi/pyp-topics/src/log_add.h delete mode 100644 gi/pyp-topics/src/macros.Linux delete mode 100644 gi/pyp-topics/src/makefile.darwin delete mode 100644 gi/pyp-topics/src/makefile.depend delete mode 100644 gi/pyp-topics/src/mpi-corpus.hh delete mode 100644 gi/pyp-topics/src/mpi-pyp-topics.cc delete mode 100644 gi/pyp-topics/src/mpi-pyp-topics.hh delete mode 100644 gi/pyp-topics/src/mpi-pyp.hh delete mode 100644 gi/pyp-topics/src/mpi-train-contexts.cc delete mode 100644 gi/pyp-topics/src/mt19937ar.c delete mode 100644 gi/pyp-topics/src/mt19937ar.h delete mode 100644 gi/pyp-topics/src/pyp-topics.cc delete mode 100644 gi/pyp-topics/src/pyp-topics.hh delete mode 100644 gi/pyp-topics/src/pyp.hh delete mode 100644 gi/pyp-topics/src/slice-sampler.h delete mode 100644 gi/pyp-topics/src/timing.h delete mode 100644 gi/pyp-topics/src/train-contexts.cc delete mode 100644 gi/pyp-topics/src/train.cc delete mode 100644 gi/pyp-topics/src/utility.h delete mode 100644 gi/pyp-topics/src/workers.hh delete mode 100755 gi/scripts/buck2utf8.pl delete mode 100644 jam-files/LICENSE_1_0.txt delete mode 100644 jam-files/boost-build/boost-build.jam delete mode 100644 jam-files/boost-build/bootstrap.jam delete mode 100644 jam-files/boost-build/build-system.jam delete mode 100644 jam-files/boost-build/build/__init__.py delete mode 100644 jam-files/boost-build/build/ac.jam delete mode 100644 jam-files/boost-build/build/alias.jam delete mode 100644 jam-files/boost-build/build/alias.py delete mode 100644 jam-files/boost-build/build/build-request.jam delete mode 100644 jam-files/boost-build/build/build_request.py delete mode 100644 jam-files/boost-build/build/configure.jam delete mode 100644 jam-files/boost-build/build/configure.py delete mode 100644 jam-files/boost-build/build/engine.py delete mode 100644 jam-files/boost-build/build/errors.py delete mode 100644 jam-files/boost-build/build/feature.jam delete mode 100644 jam-files/boost-build/build/feature.py delete mode 100644 jam-files/boost-build/build/generators.jam delete mode 100644 jam-files/boost-build/build/generators.py delete mode 100644 jam-files/boost-build/build/modifiers.jam delete mode 100644 jam-files/boost-build/build/project.ann.py delete mode 100644 jam-files/boost-build/build/project.jam delete mode 100644 jam-files/boost-build/build/project.py delete mode 100644 jam-files/boost-build/build/property-set.jam delete mode 100644 jam-files/boost-build/build/property.jam delete mode 100644 jam-files/boost-build/build/property.py delete mode 100644 jam-files/boost-build/build/property_set.py delete mode 100644 jam-files/boost-build/build/readme.txt delete mode 100644 jam-files/boost-build/build/scanner.jam delete mode 100644 jam-files/boost-build/build/scanner.py delete mode 100644 jam-files/boost-build/build/targets.jam delete mode 100644 jam-files/boost-build/build/targets.py delete mode 100644 jam-files/boost-build/build/toolset.jam delete mode 100644 jam-files/boost-build/build/toolset.py delete mode 100644 jam-files/boost-build/build/type.jam delete mode 100644 jam-files/boost-build/build/type.py delete mode 100644 jam-files/boost-build/build/version.jam delete mode 100644 jam-files/boost-build/build/virtual-target.jam delete mode 100644 jam-files/boost-build/build/virtual_target.py delete mode 100644 jam-files/boost-build/kernel/boost-build.jam delete mode 100644 jam-files/boost-build/kernel/bootstrap.jam delete mode 100644 jam-files/boost-build/kernel/bootstrap.py delete mode 100644 jam-files/boost-build/kernel/class.jam delete mode 100644 jam-files/boost-build/kernel/errors.jam delete mode 100644 jam-files/boost-build/kernel/modules.jam delete mode 100644 jam-files/boost-build/options/help.jam delete mode 100644 jam-files/boost-build/site-config.jam delete mode 100644 jam-files/boost-build/tools/__init__.py delete mode 100644 jam-files/boost-build/tools/acc.jam delete mode 100644 jam-files/boost-build/tools/auto-index.jam delete mode 100644 jam-files/boost-build/tools/bison.jam delete mode 100644 jam-files/boost-build/tools/boostbook-config.jam delete mode 100644 jam-files/boost-build/tools/boostbook.jam delete mode 100644 jam-files/boost-build/tools/borland.jam delete mode 100644 jam-files/boost-build/tools/builtin.jam delete mode 100644 jam-files/boost-build/tools/builtin.py delete mode 100644 jam-files/boost-build/tools/cast.jam delete mode 100644 jam-files/boost-build/tools/cast.py delete mode 100644 jam-files/boost-build/tools/clang-darwin.jam delete mode 100644 jam-files/boost-build/tools/clang-linux.jam delete mode 100644 jam-files/boost-build/tools/clang.jam delete mode 100644 jam-files/boost-build/tools/common.jam delete mode 100644 jam-files/boost-build/tools/common.py delete mode 100644 jam-files/boost-build/tools/como-linux.jam delete mode 100644 jam-files/boost-build/tools/como-win.jam delete mode 100644 jam-files/boost-build/tools/como.jam delete mode 100644 jam-files/boost-build/tools/convert.jam delete mode 100644 jam-files/boost-build/tools/cw-config.jam delete mode 100644 jam-files/boost-build/tools/cw.jam delete mode 100644 jam-files/boost-build/tools/darwin.jam delete mode 100644 jam-files/boost-build/tools/darwin.py delete mode 100644 jam-files/boost-build/tools/dmc.jam delete mode 100644 jam-files/boost-build/tools/docutils.jam delete mode 100644 jam-files/boost-build/tools/doxproc.py delete mode 100644 jam-files/boost-build/tools/doxygen-config.jam delete mode 100644 jam-files/boost-build/tools/doxygen.jam delete mode 100644 jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile delete mode 100644 jam-files/boost-build/tools/doxygen/windows-paths-check.hpp delete mode 100644 jam-files/boost-build/tools/fop.jam delete mode 100644 jam-files/boost-build/tools/fortran.jam delete mode 100644 jam-files/boost-build/tools/gcc.jam delete mode 100644 jam-files/boost-build/tools/gcc.py delete mode 100644 jam-files/boost-build/tools/generate.jam delete mode 100644 jam-files/boost-build/tools/gettext.jam delete mode 100644 jam-files/boost-build/tools/gfortran.jam delete mode 100644 jam-files/boost-build/tools/hp_cxx.jam delete mode 100644 jam-files/boost-build/tools/hpfortran.jam delete mode 100644 jam-files/boost-build/tools/ifort.jam delete mode 100644 jam-files/boost-build/tools/intel-darwin.jam delete mode 100644 jam-files/boost-build/tools/intel-linux.jam delete mode 100644 jam-files/boost-build/tools/intel-win.jam delete mode 100644 jam-files/boost-build/tools/intel.jam delete mode 100644 jam-files/boost-build/tools/lex.jam delete mode 100644 jam-files/boost-build/tools/make.jam delete mode 100644 jam-files/boost-build/tools/make.py delete mode 100644 jam-files/boost-build/tools/mc.jam delete mode 100644 jam-files/boost-build/tools/message.jam delete mode 100644 jam-files/boost-build/tools/message.py delete mode 100644 jam-files/boost-build/tools/midl.jam delete mode 100644 jam-files/boost-build/tools/mipspro.jam delete mode 100644 jam-files/boost-build/tools/mpi.jam delete mode 100644 jam-files/boost-build/tools/msvc-config.jam delete mode 100644 jam-files/boost-build/tools/msvc.jam delete mode 100644 jam-files/boost-build/tools/notfile.jam delete mode 100644 jam-files/boost-build/tools/notfile.py delete mode 100644 jam-files/boost-build/tools/package.jam delete mode 100644 jam-files/boost-build/tools/package.py delete mode 100644 jam-files/boost-build/tools/pathscale.jam delete mode 100644 jam-files/boost-build/tools/pch.jam delete mode 100644 jam-files/boost-build/tools/pch.py delete mode 100644 jam-files/boost-build/tools/pgi.jam delete mode 100644 jam-files/boost-build/tools/python-config.jam delete mode 100644 jam-files/boost-build/tools/python.jam delete mode 100644 jam-files/boost-build/tools/qcc.jam delete mode 100644 jam-files/boost-build/tools/qt.jam delete mode 100644 jam-files/boost-build/tools/qt3.jam delete mode 100644 jam-files/boost-build/tools/qt4.jam delete mode 100644 jam-files/boost-build/tools/quickbook-config.jam delete mode 100644 jam-files/boost-build/tools/quickbook.jam delete mode 100644 jam-files/boost-build/tools/rc.jam delete mode 100644 jam-files/boost-build/tools/rc.py delete mode 100644 jam-files/boost-build/tools/stage.jam delete mode 100644 jam-files/boost-build/tools/stage.py delete mode 100644 jam-files/boost-build/tools/stlport.jam delete mode 100644 jam-files/boost-build/tools/sun.jam delete mode 100644 jam-files/boost-build/tools/symlink.jam delete mode 100644 jam-files/boost-build/tools/symlink.py delete mode 100644 jam-files/boost-build/tools/testing-aux.jam delete mode 100644 jam-files/boost-build/tools/testing.jam delete mode 100644 jam-files/boost-build/tools/testing.py delete mode 100644 jam-files/boost-build/tools/types/__init__.py delete mode 100644 jam-files/boost-build/tools/types/asm.jam delete mode 100644 jam-files/boost-build/tools/types/asm.py delete mode 100644 jam-files/boost-build/tools/types/cpp.jam delete mode 100644 jam-files/boost-build/tools/types/cpp.py delete mode 100644 jam-files/boost-build/tools/types/exe.jam delete mode 100644 jam-files/boost-build/tools/types/exe.py delete mode 100644 jam-files/boost-build/tools/types/html.jam delete mode 100644 jam-files/boost-build/tools/types/html.py delete mode 100644 jam-files/boost-build/tools/types/lib.jam delete mode 100644 jam-files/boost-build/tools/types/lib.py delete mode 100644 jam-files/boost-build/tools/types/obj.jam delete mode 100644 jam-files/boost-build/tools/types/obj.py delete mode 100644 jam-files/boost-build/tools/types/objc.jam delete mode 100644 jam-files/boost-build/tools/types/preprocessed.jam delete mode 100644 jam-files/boost-build/tools/types/qt.jam delete mode 100644 jam-files/boost-build/tools/types/register.jam delete mode 100644 jam-files/boost-build/tools/types/rsp.jam delete mode 100644 jam-files/boost-build/tools/types/rsp.py delete mode 100644 jam-files/boost-build/tools/unix.jam delete mode 100644 jam-files/boost-build/tools/unix.py delete mode 100644 jam-files/boost-build/tools/vacpp.jam delete mode 100644 jam-files/boost-build/tools/whale.jam delete mode 100644 jam-files/boost-build/tools/xlf.jam delete mode 100644 jam-files/boost-build/tools/xsltproc-config.jam delete mode 100644 jam-files/boost-build/tools/xsltproc.jam delete mode 100644 jam-files/boost-build/tools/xsltproc/included.xsl delete mode 100644 jam-files/boost-build/tools/xsltproc/test.xml delete mode 100644 jam-files/boost-build/tools/xsltproc/test.xsl delete mode 100644 jam-files/boost-build/tools/zlib.jam delete mode 100644 jam-files/boost-build/user-config.jam delete mode 100644 jam-files/boost-build/util/__init__.py delete mode 100644 jam-files/boost-build/util/assert.jam delete mode 100644 jam-files/boost-build/util/container.jam delete mode 100644 jam-files/boost-build/util/doc.jam delete mode 100644 jam-files/boost-build/util/indirect.jam delete mode 100644 jam-files/boost-build/util/indirect.py delete mode 100644 jam-files/boost-build/util/logger.py delete mode 100644 jam-files/boost-build/util/numbers.jam delete mode 100644 jam-files/boost-build/util/option.jam delete mode 100644 jam-files/boost-build/util/option.py delete mode 100644 jam-files/boost-build/util/order.jam delete mode 100644 jam-files/boost-build/util/order.py delete mode 100644 jam-files/boost-build/util/os.jam delete mode 100644 jam-files/boost-build/util/os_j.py delete mode 100644 jam-files/boost-build/util/path.jam delete mode 100644 jam-files/boost-build/util/path.py delete mode 100644 jam-files/boost-build/util/print.jam delete mode 100644 jam-files/boost-build/util/regex.jam delete mode 100644 jam-files/boost-build/util/regex.py delete mode 100644 jam-files/boost-build/util/sequence.jam delete mode 100644 jam-files/boost-build/util/sequence.py delete mode 100644 jam-files/boost-build/util/set.jam delete mode 100644 jam-files/boost-build/util/set.py delete mode 100644 jam-files/boost-build/util/string.jam delete mode 100644 jam-files/boost-build/util/utility.jam delete mode 100644 jam-files/boost-build/util/utility.py delete mode 100644 jam-files/engine/Jambase delete mode 100644 jam-files/engine/boost-jam.spec delete mode 100644 jam-files/engine/boost-no-inspect delete mode 100644 jam-files/engine/build.bat delete mode 100644 jam-files/engine/build.jam delete mode 100755 jam-files/engine/build.sh delete mode 100644 jam-files/engine/build_vms.com delete mode 100644 jam-files/engine/builtins.c delete mode 100644 jam-files/engine/builtins.h delete mode 100644 jam-files/engine/bump_version.py delete mode 100644 jam-files/engine/class.c delete mode 100644 jam-files/engine/class.h delete mode 100644 jam-files/engine/command.c delete mode 100644 jam-files/engine/command.h delete mode 100644 jam-files/engine/compile.c delete mode 100644 jam-files/engine/compile.h delete mode 100644 jam-files/engine/debian/changelog delete mode 100644 jam-files/engine/debian/control delete mode 100644 jam-files/engine/debian/copyright delete mode 100644 jam-files/engine/debian/jam.man.sgml delete mode 100755 jam-files/engine/debian/rules delete mode 100644 jam-files/engine/debug.c delete mode 100644 jam-files/engine/debug.h delete mode 100644 jam-files/engine/execcmd.h delete mode 100644 jam-files/engine/execmac.c delete mode 100644 jam-files/engine/execnt.c delete mode 100644 jam-files/engine/execunix.c delete mode 100644 jam-files/engine/execvms.c delete mode 100644 jam-files/engine/expand.c delete mode 100644 jam-files/engine/expand.h delete mode 100644 jam-files/engine/filemac.c delete mode 100644 jam-files/engine/filent.c delete mode 100644 jam-files/engine/fileos2.c delete mode 100644 jam-files/engine/filesys.c delete mode 100644 jam-files/engine/filesys.h delete mode 100644 jam-files/engine/fileunix.c delete mode 100644 jam-files/engine/filevms.c delete mode 100644 jam-files/engine/frames.c delete mode 100644 jam-files/engine/frames.h delete mode 100644 jam-files/engine/glob.c delete mode 100644 jam-files/engine/hash.c delete mode 100644 jam-files/engine/hash.h delete mode 100644 jam-files/engine/hcache.c delete mode 100644 jam-files/engine/hcache.h delete mode 100644 jam-files/engine/hdrmacro.c delete mode 100644 jam-files/engine/hdrmacro.h delete mode 100644 jam-files/engine/headers.c delete mode 100644 jam-files/engine/headers.h delete mode 100644 jam-files/engine/jam.c delete mode 100644 jam-files/engine/jam.h delete mode 100644 jam-files/engine/jambase.c delete mode 100644 jam-files/engine/jambase.h delete mode 100644 jam-files/engine/jamgram.c delete mode 100644 jam-files/engine/jamgram.h delete mode 100644 jam-files/engine/jamgram.y delete mode 100644 jam-files/engine/jamgram.yy delete mode 100644 jam-files/engine/jamgramtab.h delete mode 100644 jam-files/engine/lists.c delete mode 100644 jam-files/engine/lists.h delete mode 100644 jam-files/engine/make.c delete mode 100644 jam-files/engine/make.h delete mode 100644 jam-files/engine/make1.c delete mode 100644 jam-files/engine/md5.c delete mode 100644 jam-files/engine/md5.h delete mode 100644 jam-files/engine/mem.c delete mode 100644 jam-files/engine/mem.h delete mode 100644 jam-files/engine/mkjambase.c delete mode 100644 jam-files/engine/modules.c delete mode 100644 jam-files/engine/modules.h delete mode 100644 jam-files/engine/modules/order.c delete mode 100644 jam-files/engine/modules/path.c delete mode 100644 jam-files/engine/modules/property-set.c delete mode 100644 jam-files/engine/modules/readme.txt delete mode 100644 jam-files/engine/modules/regex.c delete mode 100644 jam-files/engine/modules/sequence.c delete mode 100644 jam-files/engine/modules/set.c delete mode 100644 jam-files/engine/native.c delete mode 100644 jam-files/engine/native.h delete mode 100644 jam-files/engine/newstr.c delete mode 100644 jam-files/engine/newstr.h delete mode 100644 jam-files/engine/option.c delete mode 100644 jam-files/engine/option.h delete mode 100644 jam-files/engine/output.c delete mode 100644 jam-files/engine/output.h delete mode 100644 jam-files/engine/parse.c delete mode 100644 jam-files/engine/parse.h delete mode 100644 jam-files/engine/patchlevel.h delete mode 100644 jam-files/engine/pathmac.c delete mode 100644 jam-files/engine/pathsys.h delete mode 100644 jam-files/engine/pathunix.c delete mode 100644 jam-files/engine/pathvms.c delete mode 100644 jam-files/engine/pwd.c delete mode 100644 jam-files/engine/pwd.h delete mode 100644 jam-files/engine/regexp.c delete mode 100644 jam-files/engine/regexp.h delete mode 100644 jam-files/engine/rules.c delete mode 100644 jam-files/engine/rules.h delete mode 100644 jam-files/engine/scan.c delete mode 100644 jam-files/engine/scan.h delete mode 100644 jam-files/engine/search.c delete mode 100644 jam-files/engine/search.h delete mode 100644 jam-files/engine/strings.c delete mode 100644 jam-files/engine/strings.h delete mode 100644 jam-files/engine/subst.c delete mode 100644 jam-files/engine/timestamp.c delete mode 100644 jam-files/engine/timestamp.h delete mode 100644 jam-files/engine/variable.c delete mode 100644 jam-files/engine/variable.h delete mode 100644 jam-files/engine/w32_getreg.c delete mode 100644 jam-files/engine/yyacc.c delete mode 100644 jam-files/sanity.jam delete mode 100644 klm/lm/Jamfile delete mode 100644 klm/util/Jamfile delete mode 100644 mira/Jamfile delete mode 100644 mteval/Jamfile delete mode 100644 phrasinator/Jamfile delete mode 100644 phrasinator/Makefile.am delete mode 100644 phrasinator/README delete mode 100644 phrasinator/gibbs_train_plm.cc delete mode 100644 phrasinator/gibbs_train_plm.notables.cc delete mode 100755 phrasinator/train-phrasinator.pl delete mode 100644 rst_parser/Makefile.am delete mode 100644 rst_parser/arc_factored.cc delete mode 100644 rst_parser/arc_factored.h delete mode 100644 rst_parser/arc_factored_marginals.cc delete mode 100644 rst_parser/arc_ff.cc delete mode 100644 rst_parser/arc_ff.h delete mode 100644 rst_parser/dep_training.cc delete mode 100644 rst_parser/dep_training.h delete mode 100644 rst_parser/global_ff.cc delete mode 100644 rst_parser/global_ff.h delete mode 100644 rst_parser/mst_train.cc delete mode 100644 rst_parser/picojson.h delete mode 100644 rst_parser/random_tree.cc delete mode 100644 rst_parser/rst.cc delete mode 100644 rst_parser/rst.h delete mode 100644 rst_parser/rst_parse.cc delete mode 100644 rst_parser/rst_train.cc delete mode 100644 training/Jamfile delete mode 100644 training/liblbfgs/Jamfile delete mode 100644 utils/Jamfile delete mode 100644 utils/ccrp.h delete mode 100644 utils/ccrp_nt.h delete mode 100644 utils/ccrp_onetable.h delete mode 100644 utils/crp_table_manager.h delete mode 100644 utils/crp_test.cc delete mode 100644 utils/gamma_poisson.h delete mode 100644 utils/mfcr.h delete mode 100644 utils/mfcr_test.cc delete mode 100644 utils/slice_sampler.h delete mode 100644 utils/unigram_pyp_lm.cc diff --git a/Jamroot b/Jamroot deleted file mode 100644 index ef426146..00000000 --- a/Jamroot +++ /dev/null @@ -1,45 +0,0 @@ -#cdec compilation with bjam -# -#--with-boost=/usr/include -#--with-google-hash=/usr/include so that $with-google-hash/google/dense_hash_map exists -# -#-a forces the build to run from scratch -#-jN parallelizes just like make -# -#Respects CXXFLAGS, CFLAGS, and LDFLAGS environment variables. - -path-constant TOP : . ; -include $(TOP)/jam-files/sanity.jam ; -boost 104400 ; -external-lib z ; - -with-google-hash = [ option.get "with-google-hash" ] ; -if [ test_header sparsehash/dense_hash_map ] || $(with-google-hash) { - requirements += HAVE_SPARSEHASH $(with-google-hash) ; -} - -if [ test_header cmph.h ] || $(with-cmph) { - requirements += HAVE_CMPH $(with-cmph) ; -} - -if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serialization ] { - requirements += HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ; -} - -project : requirements $(requirements) darwin:static ; -project : default-build on release ; - -install-bin-libs dpmert//programs utils//programs mteval//programs klm/lm//programs training//liblbfgs decoder//cdec phrasinator//programs mira//kbest_mira ; - -install perl-scripts : dpmert//dpmert.pl : $(bindir) ; - -build-projects mteval decoder dpmert klm/lm training/liblbfgs ; - -#Compile everything ending with _test.cc into a test and run it. -rule all_tests ( targets * : dependencies : properties * ) { - targets ?= [ glob *_test.cc ] ; - for t in $(targets) { - local base = [ MATCH "^(.*).cc$" : $(t) ] ; - unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework : $(properties) ; - } -} diff --git a/Makefile.am b/Makefile.am index 24aafd63..c0826532 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,7 +7,6 @@ SUBDIRS = \ klm/util \ klm/lm \ decoder \ - phrasinator \ training \ training/liblbfgs \ mira \ @@ -15,10 +14,7 @@ SUBDIRS = \ dpmert \ pro-train \ rampion \ - minrisk \ - gi/pf \ - gi/markov_al \ - rst_parser + minrisk #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/bjam b/bjam deleted file mode 100755 index d1ac8a55..00000000 --- a/bjam +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -set -e -if - bjam="$(which bjam 2>/dev/null)" && #exists - [ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true - ! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" /dev/null && #bjam in path isn't this script - "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build) - "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough. -then - #Delegate to system bjam - exec "${bjam}" "$@" -fi - -top="$(dirname "$0")" -if [ ! -x "$top"/jam-files/bjam ]; then - pushd "$top/jam-files/engine" - ./build.sh - cp -f bin.*/bjam ../bjam - popd -fi - -export BOOST_BUILD_PATH="$top"/jam-files/boost-build -exec "$top"/jam-files/bjam "$@" diff --git a/configure.ac b/configure.ac index ea9e84fb..07ef9fe1 100644 --- a/configure.ac +++ b/configure.ac @@ -114,7 +114,6 @@ AC_CONFIG_FILES([Makefile]) AC_CONFIG_FILES([utils/Makefile]) AC_CONFIG_FILES([mteval/Makefile]) AC_CONFIG_FILES([decoder/Makefile]) -AC_CONFIG_FILES([phrasinator/Makefile]) AC_CONFIG_FILES([training/Makefile]) AC_CONFIG_FILES([training/liblbfgs/Makefile]) AC_CONFIG_FILES([dpmert/Makefile]) @@ -125,10 +124,6 @@ AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([gi/pyp-topics/src/Makefile]) -AC_CONFIG_FILES([gi/clda/src/Makefile]) -AC_CONFIG_FILES([gi/pf/Makefile]) -AC_CONFIG_FILES([gi/markov_al/Makefile]) AC_CONFIG_FILES([rst_parser/Makefile]) AC_CONFIG_FILES([python/setup.py]) diff --git a/decoder/Jamfile b/decoder/Jamfile deleted file mode 100644 index da02d063..00000000 --- a/decoder/Jamfile +++ /dev/null @@ -1,81 +0,0 @@ -import testing ; -import lex ; -import option ; - -if [ option.get "with-glc" ] { - glc = ff_glc.cc string_util.cc feature-factory.cc ; -} - -lib decoder : - forest_writer.cc - maxtrans_blunsom.cc - cdec_ff.cc - cfg.cc - dwarf.cc - ff_dwarf.cc - rule_lexer.ll - fst_translator.cc - csplit.cc - translator.cc - scfg_translator.cc - hg.cc - hg_io.cc - decoder.cc - hg_intersect.cc - hg_sampler.cc - factored_lexicon_helper.cc - viterbi.cc - lattice.cc - aligner.cc - apply_models.cc - earley_composer.cc - phrasetable_fst.cc - trule.cc - ff.cc - ff_rules.cc - ff_wordset.cc - ff_context.cc - ff_charset.cc - ff_lm.cc - ff_klm.cc - ff_ngrams.cc - ff_spans.cc - ff_ruleshape.cc - ff_wordalign.cc - ff_csplit.cc - ff_tagger.cc - ff_source_syntax.cc - ff_bleu.cc - ff_factory.cc - lexalign.cc - lextrans.cc - tagger.cc - bottom_up_parser.cc - phrasebased_translator.cc - JSON_parser.c - json_parse.cc - grammar.cc - rescore_translator.cc - hg_remove_eps.cc - hg_union.cc - $(glc) - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - : . - : : - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - . - ; - -exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; - -all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : $(TOP)/decoder/test_data ; - -install legacy : cdec - : $(TOP)/cdec EXE on shared:$(TOP)/cdec shared:LIB ; - diff --git a/decoder/decoder.h b/decoder/decoder.h index bef2ff5e..79c7a602 100644 --- a/decoder/decoder.h +++ b/decoder/decoder.h @@ -24,7 +24,7 @@ private: #endif class SentenceMetadata; -struct Hypergraph; +class Hypergraph; struct DecoderImpl; struct DecoderObserver { diff --git a/decoder/hg.h b/decoder/hg.h index 591e98ce..6d67f2fa 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -503,9 +503,9 @@ public: template void visit_edges_topo(V &v) { - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { EdgesVector const& in=nodes_[i].in_edges_; - for (int j=0;j. - : : - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - . - ; - -all_tests [ glob *_test.cc ] : dpmert : $(TOP)/dpmert/test_data ; - -exe sentserver : sentserver.c : multi ; -exe sentclient : sentclient.c ; -exe mr_dpmert_generate_mapper_input : mr_dpmert_generate_mapper_input.cc dpmert ..//boost_program_options ; -exe mr_dpmert_map : mr_dpmert_map.cc dpmert ..//boost_program_options ; -exe mr_dpmert_reduce : mr_dpmert_reduce.cc dpmert ..//boost_program_options ; - -alias programs : sentserver sentclient mr_dpmert_generate_mapper_input mr_dpmert_map mr_dpmert_reduce ; diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am deleted file mode 100644 index cdca1f97..00000000 --- a/gi/clda/src/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = clda - -clda_SOURCES = clda.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -AM_LDFLAGS = $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/clda/src/ccrp.h b/gi/clda/src/ccrp.h deleted file mode 100644 index a7c2825c..00000000 --- a/gi/clda/src/ccrp.h +++ /dev/null @@ -1,291 +0,0 @@ -#ifndef _CCRP_H_ -#define _CCRP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sampler.h" -#include "slice_sampler.h" - -// Chinese restaurant process (Pitman-Yor parameters) with table tracking. - -template > -class CCRP { - public: - CCRP(double disc, double conc) : - num_tables_(), - num_customers_(), - discount_(disc), - concentration_(conc), - discount_prior_alpha_(std::numeric_limits::quiet_NaN()), - discount_prior_beta_(std::numeric_limits::quiet_NaN()), - concentration_prior_shape_(std::numeric_limits::quiet_NaN()), - concentration_prior_rate_(std::numeric_limits::quiet_NaN()) {} - - CCRP(double d_alpha, double d_beta, double c_shape, double c_rate, double d = 0.1, double c = 10.0) : - num_tables_(), - num_customers_(), - discount_(d), - concentration_(c), - discount_prior_alpha_(d_alpha), - discount_prior_beta_(d_beta), - concentration_prior_shape_(c_shape), - concentration_prior_rate_(c_rate) {} - - double discount() const { return discount_; } - double concentration() const { return concentration_; } - - bool has_discount_prior() const { - return !std::isnan(discount_prior_alpha_); - } - - bool has_concentration_prior() const { - return !std::isnan(concentration_prior_shape_); - } - - void clear() { - num_tables_ = 0; - num_customers_ = 0; - dish_locs_.clear(); - } - - unsigned num_tables(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->second.table_counts_.size(); - } - - unsigned num_customers() const { - return num_customers_; - } - - unsigned num_customers(const Dish& dish) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - if (it == dish_locs_.end()) return 0; - return it->total_dish_count_; - } - - // returns +1 or 0 indicating whether a new table was opened - int increment(const Dish& dish, const double& p0, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - bool share_table = false; - if (loc.total_dish_count_) { - const double p_empty = (concentration_ + num_tables_ * discount_) * p0; - const double p_share = (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - share_table = rng->SelectSample(p_empty, p_share); - } - if (share_table) { - double r = rng->next() * (loc.total_dish_count_ - loc.table_counts_.size() * discount_); - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= (*ti - discount_); - if (r <= 0.0) { - ++(*ti); - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - } else { - loc.table_counts_.push_back(1u); - ++num_tables_; - } - ++loc.total_dish_count_; - ++num_customers_; - return (share_table ? 0 : 1); - } - - // returns -1 or 0, indicating whether a table was closed - int decrement(const Dish& dish, MT19937* rng) { - DishLocations& loc = dish_locs_[dish]; - assert(loc.total_dish_count_); - if (loc.total_dish_count_ == 1) { - dish_locs_.erase(dish); - --num_tables_; - --num_customers_; - return -1; - } else { - int delta = 0; - // sample customer to remove UNIFORMLY. that is, do NOT use the discount - // here. if you do, it will introduce (unwanted) bias! - double r = rng->next() * loc.total_dish_count_; - --loc.total_dish_count_; - for (typename std::list::iterator ti = loc.table_counts_.begin(); - ti != loc.table_counts_.end(); ++ti) { - r -= *ti; - if (r <= 0.0) { - if ((--(*ti)) == 0) { - --num_tables_; - delta = -1; - loc.table_counts_.erase(ti); - } - break; - } - } - if (r > 0.0) { - std::cerr << "Serious error: r=" << r << std::endl; - Print(&std::cerr); - assert(r <= 0.0); - } - --num_customers_; - return delta; - } - } - - double prob(const Dish& dish, const double& p0) const { - const typename std::tr1::unordered_map::const_iterator it = dish_locs_.find(dish); - const double r = num_tables_ * discount_ + concentration_; - if (it == dish_locs_.end()) { - return r * p0 / (num_customers_ + concentration_); - } else { - return (it->second.total_dish_count_ - discount_ * it->second.table_counts_.size() + r * p0) / - (num_customers_ + concentration_); - } - } - - double log_crp_prob() const { - return log_crp_prob(discount_, concentration_); - } - - static double log_beta_density(const double& x, const double& alpha, const double& beta) { - assert(x > 0.0); - assert(x < 1.0); - assert(alpha > 0.0); - assert(beta > 0.0); - const double lp = (alpha-1)*log(x)+(beta-1)*log(1-x)+lgamma(alpha+beta)-lgamma(alpha)-lgamma(beta); - return lp; - } - - static double log_gamma_density(const double& x, const double& shape, const double& rate) { - assert(x >= 0.0); - assert(shape > 0.0); - assert(rate > 0.0); - const double lp = (shape-1)*log(x) - shape*log(rate) - x/rate - lgamma(shape); - return lp; - } - - // taken from http://en.wikipedia.org/wiki/Chinese_restaurant_process - // does not include P_0's - double log_crp_prob(const double& discount, const double& concentration) const { - double lp = 0.0; - if (has_discount_prior()) - lp = log_beta_density(discount, discount_prior_alpha_, discount_prior_beta_); - if (has_concentration_prior()) - lp += log_gamma_density(concentration, concentration_prior_shape_, concentration_prior_rate_); - assert(lp <= 0.0); - if (num_customers_) { - if (discount > 0.0) { - const double r = lgamma(1.0 - discount); - lp += lgamma(concentration) - lgamma(concentration + num_customers_) - + num_tables_ * log(discount) + lgamma(concentration / discount + num_tables_) - - lgamma(concentration / discount); - assert(std::isfinite(lp)); - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - const DishLocations& cur = it->second; - for (std::list::const_iterator ti = cur.table_counts_.begin(); ti != cur.table_counts_.end(); ++ti) { - lp += lgamma(*ti - discount) - r; - } - } - } else { - assert(!"not implemented yet"); - } - } - assert(std::isfinite(lp)); - return lp; - } - - void resample_hyperparameters(MT19937* rng) { - assert(has_discount_prior() || has_concentration_prior()); - DiscountResampler dr(*this); - ConcentrationResampler cr(*this); - const int niterations = 10; - double gamma_upper = std::numeric_limits::infinity(); - for (int iter = 0; iter < 5; ++iter) { - if (has_concentration_prior()) { - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - gamma_upper, 0.0, niterations, 100*niterations); - } - if (has_discount_prior()) { - discount_ = slice_sampler1d(dr, discount_, *rng, std::numeric_limits::min(), - 1.0, 0.0, niterations, 100*niterations); - } - } - concentration_ = slice_sampler1d(cr, concentration_, *rng, 0.0, - gamma_upper, 0.0, niterations, 100*niterations); - } - - struct DiscountResampler { - DiscountResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_discount) const { - return crp_.log_crp_prob(proposed_discount, crp_.concentration_); - } - }; - - struct ConcentrationResampler { - ConcentrationResampler(const CCRP& crp) : crp_(crp) {} - const CCRP& crp_; - double operator()(const double& proposed_concentration) const { - return crp_.log_crp_prob(crp_.discount_, proposed_concentration); - } - }; - - struct DishLocations { - DishLocations() : total_dish_count_() {} - unsigned total_dish_count_; // customers at all tables with this dish - std::list table_counts_; // list<> gives O(1) deletion and insertion, which we want - // .size() is the number of tables for this dish - }; - - void Print(std::ostream* out) const { - for (typename std::tr1::unordered_map::const_iterator it = dish_locs_.begin(); - it != dish_locs_.end(); ++it) { - (*out) << it->first << " (" << it->second.total_dish_count_ << " on " << it->second.table_counts_.size() << " tables): "; - for (typename std::list::const_iterator i = it->second.table_counts_.begin(); - i != it->second.table_counts_.end(); ++i) { - (*out) << " " << *i; - } - (*out) << std::endl; - } - } - - typedef typename std::tr1::unordered_map::const_iterator const_iterator; - const_iterator begin() const { - return dish_locs_.begin(); - } - const_iterator end() const { - return dish_locs_.end(); - } - - unsigned num_tables_; - unsigned num_customers_; - std::tr1::unordered_map dish_locs_; - - double discount_; - double concentration_; - - // optional beta prior on discount_ (NaN if no prior) - double discount_prior_alpha_; - double discount_prior_beta_; - - // optional gamma prior on concentration_ (NaN if no prior) - double concentration_prior_shape_; - double concentration_prior_rate_; -}; - -template -std::ostream& operator<<(std::ostream& o, const CCRP& c) { - c.Print(&o); - return o; -} - -#endif diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc deleted file mode 100644 index f548997f..00000000 --- a/gi/clda/src/clda.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include -#include -#include -#include - -#include "timer.h" -#include "crp.h" -#include "ccrp.h" -#include "sampler.h" -#include "tdict.h" -const size_t MAX_DOC_LEN_CHARS = 10000000; - -using namespace std; - -void ShowTopWordsForTopic(const map& counts) { - multimap ms; - for (map::const_iterator it = counts.begin(); it != counts.end(); ++it) - ms.insert(make_pair(it->second, it->first)); - int cc = 0; - for (multimap::reverse_iterator it = ms.rbegin(); it != ms.rend(); ++it) { - cerr << it->first << ':' << TD::Convert(it->second) << " "; - ++cc; - if (cc==20) break; - } - cerr << endl; -} - -int main(int argc, char** argv) { - if (argc != 3) { - cerr << "Usage: " << argv[0] << " num-classes num-samples\n"; - return 1; - } - const int num_classes = atoi(argv[1]); - const int num_iterations = atoi(argv[2]); - const int burnin_size = num_iterations * 0.9; - if (num_classes < 2) { - cerr << "Must request more than 1 class\n"; - return 1; - } - if (num_iterations < 5) { - cerr << "Must request more than 5 iterations\n"; - return 1; - } - cerr << "CLASSES: " << num_classes << endl; - char* buf = new char[MAX_DOC_LEN_CHARS]; - vector > wji; // w[j][i] - observed word i of doc j - vector > zji; // z[j][i] - topic assignment for word i of doc j - cerr << "READING DOCUMENTS\n"; - while(cin) { - cin.getline(buf, MAX_DOC_LEN_CHARS); - if (buf[0] == 0) continue; - wji.push_back(vector()); - TD::ConvertSentence(buf, &wji.back()); - } - cerr << "READ " << wji.size() << " DOCUMENTS\n"; - MT19937 rng; - cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n"; - zji.resize(wji.size()); - double disc = 0.1; - double beta = 10.0; - double alpha = 50.0; - const double uniform_topic = 1.0 / num_classes; - const double uniform_word = 1.0 / TD::NumWords(); - vector > dr(zji.size(), CCRP(1,1,1,1,disc, beta)); // dr[i] describes the probability of using a topic in document i - vector > wr(num_classes, CCRP(1,1,1,1,disc, alpha)); // wr[k] describes the probability of generating a word in topic k - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - zj.resize(num_words); - for (int i = 0; i < num_words; ++i) { - int random_topic = rng.next() * num_classes; - if (random_topic == num_classes) { --random_topic; } - zj[i] = random_topic; - const int word = wj[i]; - dr[j].increment(random_topic, uniform_topic, &rng); - wr[random_topic].increment(word, uniform_word, &rng); - } - } - cerr << "SAMPLING\n"; - vector > t2w(num_classes); - Timer timer; - SampleSet ss; - ss.resize(num_classes); - double total_time = 0; - for (int iter = 0; iter < num_iterations; ++iter) { - cerr << '.'; - if (iter && iter % 10 == 0) { - total_time += timer.Elapsed(); - timer.Reset(); - double llh = 0; -#if 1 - for (int j = 0; j < dr.size(); ++j) - dr[j].resample_hyperparameters(&rng); - for (int j = 0; j < wr.size(); ++j) - wr[j].resample_hyperparameters(&rng); -#endif - - for (int j = 0; j < dr.size(); ++j) - llh += dr[j].log_crp_prob(); - for (int j = 0; j < wr.size(); ++j) - llh += wr[j].log_crp_prob(); - cerr << " [LLH=" << llh << " I=" << iter << "]\n"; - } - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - for (int i = 0; i < num_words; ++i) { - const int word = wj[i]; - const int cur_topic = zj[i]; - dr[j].decrement(cur_topic, &rng); - wr[cur_topic].decrement(word, &rng); - - for (int k = 0; k < num_classes; ++k) { - ss[k]= dr[j].prob(k, uniform_topic) * wr[k].prob(word, uniform_word); - } - const int new_topic = rng.SelectSample(ss); - dr[j].increment(new_topic, uniform_topic, &rng); - wr[new_topic].increment(word, uniform_word, &rng); - zj[i] = new_topic; - if (iter > burnin_size) { - ++t2w[cur_topic][word]; - } - } - } - } - for (int i = 0; i < num_classes; ++i) { - cerr << "---------------------------------\n"; - cerr << " final PYP(" << wr[i].discount() << "," << wr[i].concentration() << ")\n"; - ShowTopWordsForTopic(t2w[i]); - } - cerr << "-------------\n"; -#if 0 - for (int j = 0; j < zji.size(); ++j) { - const size_t num_words = wji[j].size(); - vector& zj = zji[j]; - const vector& wj = wji[j]; - zj.resize(num_words); - for (int i = 0; i < num_words; ++i) { - cerr << TD::Convert(wji[j][i]) << '(' << zj[i] << ") "; - } - cerr << endl; - } -#endif - return 0; -} - diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h deleted file mode 100644 index 9d35857e..00000000 --- a/gi/clda/src/crp.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _CRP_H_ -#define _CRP_H_ - -// shamelessly adapted from code by Phil Blunsom and Trevor Cohn - -#include -#include - -#include "prob.h" - -template > -class CRP { - public: - CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {} - void increment(const DishType& dish); - void decrement(const DishType& dish); - void erase(const DishType& dish) { - counts_.erase(dish); - } - inline int count(const DishType& dish) const { - const typename MapType::const_iterator i = counts_.find(dish); - if (i == counts_.end()) return 0; else return i->second; - } - inline prob_t prob(const DishType& dish, const prob_t& p0) const { - return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_); - } - private: - typedef std::tr1::unordered_map MapType; - MapType counts_; - const double alpha_; - const prob_t palpha_; - int total_customers_; -}; - -template -void CRP::increment(const Dish& dish) { - ++counts_[dish]; - ++total_customers_; -} - -template -void CRP::decrement(const Dish& dish) { - typename MapType::iterator i = counts_.find(dish); - assert(i != counts_.end()); - if (--i->second == 0) - counts_.erase(i); - --total_customers_; -} - -#endif diff --git a/gi/clda/src/slice_sampler.h b/gi/clda/src/slice_sampler.h deleted file mode 100644 index aa48a169..00000000 --- a/gi/clda/src/slice_sampler.h +++ /dev/null @@ -1,191 +0,0 @@ -//! slice-sampler.h is an MCMC slice sampler -//! -//! Mark Johnson, 1st August 2008 - -#ifndef SLICE_SAMPLER_H -#define SLICE_SAMPLER_H - -#include -#include -#include -#include -#include - -//! slice_sampler_rfc_type{} returns the value of a user-specified -//! function if the argument is within range, or - infinity otherwise -// -template -struct slice_sampler_rfc_type { - F min_x, max_x; - const Fn& f; - U max_nfeval, nfeval; - slice_sampler_rfc_type(F min_x, F max_x, const Fn& f, U max_nfeval) - : min_x(min_x), max_x(max_x), f(f), max_nfeval(max_nfeval), nfeval(0) { } - - F operator() (F x) { - if (min_x < x && x < max_x) { - assert(++nfeval <= max_nfeval); - F fx = f(x); - assert(std::isfinite(fx)); - return fx; - } - return -std::numeric_limits::infinity(); - } -}; // slice_sampler_rfc_type{} - -//! slice_sampler1d() implements the univariate "range doubling" slice sampler -//! described in Neal (2003) "Slice Sampling", The Annals of Statistics 31(3), 705-767. -// -template -F slice_sampler1d(const LogF& logF0, //!< log of function to sample - F x, //!< starting point - Uniform01& u01, //!< uniform [0,1) random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - slice_sampler_rfc_type logF(min_x, max_x, logF0, max_nfeval); - - assert(std::isfinite(x)); - - if (w <= 0.0) { // set w to a default width - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, (F) 0.1); - } - assert(std::isfinite(w)); - - F logFx = logF(x); - for (U sample = 0; sample < nsamples; ++sample) { - F logY = logFx + log(u01()+1e-100); //! slice logFx at this value - assert(std::isfinite(logY)); - - F xl = x - w*u01(); //! lower bound on slice interval - F logFxl = logF(xl); - F xr = xl + w; //! upper bound on slice interval - F logFxr = logF(xr); - - while (logY < logFxl || logY < logFxr) // doubling procedure - if (u01() < 0.5) - logFxl = logF(xl -= xr - xl); - else - logFxr = logF(xr += xr - xl); - - F xl1 = xl; - F xr1 = xr; - while (true) { // shrinking procedure - F x1 = xl1 + u01()*(xr1 - xl1); - if (logY < logF(x1)) { - F xl2 = xl; // acceptance procedure - F xr2 = xr; - bool d = false; - while (xr2 - xl2 > 1.1*w) { - F xm = (xl2 + xr2)/2; - if ((x < xm && x1 >= xm) || (x >= xm && x1 < xm)) - d = true; - if (x1 < xm) - xr2 = xm; - else - xl2 = xm; - if (d && logY >= logF(xl2) && logY >= logF(xr2)) - goto unacceptable; - } - x = x1; - goto acceptable; - } - goto acceptable; - unacceptable: - if (x1 < x) // rest of shrinking procedure - xl1 = x1; - else - xr1 = x1; - } - acceptable: - w = (4*w + (xr1 - xl1))/5; // update width estimate - } - return x; -} - -/* -//! slice_sampler1d() implements a 1-d MCMC slice sampler. -//! It should be correct for unimodal distributions, but -//! not for multimodal ones. -// -template -F slice_sampler1d(const LogP& logP, //!< log of distribution to sample - F x, //!< initial sample - Uniform01& u01, //!< uniform random number generator - F min_x = -std::numeric_limits::infinity(), //!< minimum value of support - F max_x = std::numeric_limits::infinity(), //!< maximum value of support - F w = 0.0, //!< guess at initial width - unsigned nsamples=1, //!< number of samples to draw - unsigned max_nfeval=200) //!< max number of function evaluations -{ - typedef unsigned U; - assert(std::isfinite(x)); - if (w <= 0.0) { - if (min_x > -std::numeric_limits::infinity() && max_x < std::numeric_limits::infinity()) - w = (max_x - min_x)/4; - else - w = std::max(((x < 0.0) ? -x : x)/4, 0.1); - } - // TRACE4(x, min_x, max_x, w); - F logPx = logP(x); - assert(std::isfinite(logPx)); - U nfeval = 1; - for (U sample = 0; sample < nsamples; ++sample) { - F x0 = x; - F logU = logPx + log(u01()+1e-100); - assert(std::isfinite(logU)); - F r = u01(); - F xl = std::max(min_x, x - r*w); - F xr = std::min(max_x, x + (1-r)*w); - // TRACE3(x, logPx, logU); - while (xl > min_x && logP(xl) > logU) { - xl -= w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << std::endl; - assert(nfeval < max_nfeval); - } - xl = std::max(xl, min_x); - while (xr < max_x && logP(xr) > logU) { - xr += w; - w *= 2; - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xr = " << xr << std::endl; - assert(nfeval < max_nfeval); - } - xr = std::min(xr, max_x); - while (true) { - r = u01(); - x = r*xl + (1-r)*xr; - assert(std::isfinite(x)); - logPx = logP(x); - // TRACE4(logPx, x, xl, xr); - assert(std::isfinite(logPx)); - ++nfeval; - if (nfeval >= max_nfeval) - std::cerr << "## Error: nfeval = " << nfeval << ", max_nfeval = " << max_nfeval << ", sample = " << sample << ", nsamples = " << nsamples << ", r = " << r << ", w = " << w << ", xl = " << xl << ", xr = " << xr << ", x = " << x << std::endl; - assert(nfeval < max_nfeval); - if (logPx > logU) - break; - else if (x > x0) - xr = x; - else - xl = x; - } - // w = (4*w + (xr-xl))/5; // gradually adjust w - } - // TRACE2(logPx, x); - return x; -} // slice_sampler1d() -*/ - -#endif // SLICE_SAMPLER_H diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h deleted file mode 100644 index 123d9a94..00000000 --- a/gi/clda/src/timer.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _TIMER_STATS_H_ -#define _TIMER_STATS_H_ - -#include - -struct Timer { - Timer() { Reset(); } - void Reset() { - start_t = clock(); - } - double Elapsed() const { - const clock_t end_t = clock(); - const double elapsed = (end_t - start_t) / 1000000.0; - return elapsed; - } - private: - std::clock_t start_t; -}; - -#endif diff --git a/gi/evaluation/conditional_entropy.py b/gi/evaluation/conditional_entropy.py deleted file mode 100644 index 356d3b1d..00000000 --- a/gi/evaluation/conditional_entropy.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input-1 input-2' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - else: - usage() -if len(args) != 2: - usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -# evaluating: H(G | P) = sum_{g,p} p(g,p) log { p(p) / p(g,p) } -# = sum_{g,p} c(g,p)/N { log c(p) - log N - log c(g,p) + log N } -# = 1/N sum_{g,p} c(g,p) { log c(p) - log c(g,p) } -# where G = gold, P = predicted, N = number of events - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): - gparts = gline.split('||| ')[1].split() - pparts = pline.split('||| ')[1].split() - assert len(gparts) == len(pparts) - - for gpart, ppart in zip(gparts, pparts): - gtag = gpart.split(':',1)[1] - ptag = ppart.split(':',1)[1] - - if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: - joint_frequencies.setdefault((gtag, ptag), 0) - joint_frequencies[gtag,ptag] += 1 - - predict_frequencies.setdefault(ptag, 0) - predict_frequencies[ptag] += 1 - - gold_frequencies.setdefault(gtag, 0) - gold_frequencies[gtag] += 1 - - N += 1 - -hg2p = 0 -hp2g = 0 -for (gtag, ptag), cgp in joint_frequencies.items(): - hp2g += cgp * (math.log(predict_frequencies[ptag], 2) - math.log(cgp, 2)) - hg2p += cgp * (math.log(gold_frequencies[gtag], 2) - math.log(cgp, 2)) -hg2p /= N -hp2g /= N - -print 'H(P|G)', hg2p, 'H(G|P)', hp2g, 'VI', hg2p + hp2g diff --git a/gi/evaluation/confusion_matrix.py b/gi/evaluation/confusion_matrix.py deleted file mode 100644 index 2dd7aa47..00000000 --- a/gi/evaluation/confusion_matrix.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] [-p output] [-m] input-1 input-2' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:mp:') -slash_threshold = None -output_fname = None -show_matrix = False -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - elif opt == '-p': - output_fname = arg - elif opt == '-m': - show_matrix = True - else: - usage() -if len(args) != 2 or (not show_matrix and not output_fname): - usage() - -ginfile = open(args[0]) -pinfile = open(args[1]) - -if output_fname: - try: - import Image, ImageDraw - except ImportError: - print >>sys.stderr, "Error: Python Image Library not available. Did you forget to set your PYTHONPATH environment variable?" - sys.exit(1) - -N = 0 -gold_frequencies = {} -predict_frequencies = {} -joint_frequencies = {} - -for gline, pline in itertools.izip(ginfile, pinfile): - gparts = gline.split('||| ')[1].split() - pparts = pline.split('||| ')[1].split() - assert len(gparts) == len(pparts) - - for gpart, ppart in zip(gparts, pparts): - gtag = gpart.split(':',1)[1] - ptag = ppart.split(':',1)[1] - - if slash_threshold == None or gtag.count('/') + gtag.count('\\') <= slash_threshold: - joint_frequencies.setdefault((gtag, ptag), 0) - joint_frequencies[gtag,ptag] += 1 - - predict_frequencies.setdefault(ptag, 0) - predict_frequencies[ptag] += 1 - - gold_frequencies.setdefault(gtag, 0) - gold_frequencies[gtag] += 1 - - N += 1 - -# find top tags -gtags = gold_frequencies.items() -gtags.sort(lambda x,y: x[1]-y[1]) -gtags.reverse() -#gtags = gtags[:50] - -preds = predict_frequencies.items() -preds.sort(lambda x,y: x[1]-y[1]) -preds.reverse() - -if show_matrix: - print '%7s %7s' % ('pred', 'cnt'), - for gtag, gcount in gtags: print '%7s' % gtag, - print - print '=' * 80 - - for ptag, pcount in preds: - print '%7s %7d' % (ptag, pcount), - for gtag, gcount in gtags: - print '%7d' % joint_frequencies.get((gtag, ptag), 0), - print - - print '%7s %7d' % ('total', N), - for gtag, gcount in gtags: print '%7d' % gcount, - print - -if output_fname: - offset=10 - - image = Image.new("RGB", (len(preds), len(gtags)), (255, 255, 255)) - #hsl(hue, saturation%, lightness%) - - # re-sort preds to get a better diagonal - ptags=[] - if True: - ptags = map(lambda (p,c): p, preds) - else: - remaining = set(predict_frequencies.keys()) - for y, (gtag, gcount) in enumerate(gtags): - best = (None, 0) - for ptag in remaining: - #pcount = predict_frequencies[ptag] - p = joint_frequencies.get((gtag, ptag), 0)# / float(pcount) - if p > best[1]: best = (ptag, p) - ptags.append(ptag) - remaining.remove(ptag) - if not remaining: break - - print 'Predicted tag ordering:', ' '.join(ptags) - print 'Gold tag ordering:', ' '.join(map(lambda (t,c): t, gtags)) - - draw = ImageDraw.Draw(image) - for x, ptag in enumerate(ptags): - pcount = predict_frequencies[ptag] - minval = math.log(offset) - maxval = math.log(pcount + offset) - for y, (gtag, gcount) in enumerate(gtags): - f = math.log(offset + joint_frequencies.get((gtag, ptag), 0)) - z = int(240. * (maxval - f) / float(maxval - minval)) - #print x, y, z, f, maxval - draw.point([(x,y)], fill='hsl(%d, 100%%, 50%%)' % z) - del draw - image.save(output_fname) diff --git a/gi/evaluation/entropy.py b/gi/evaluation/entropy.py deleted file mode 100644 index ec1ef502..00000000 --- a/gi/evaluation/entropy.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -import sys, math, itertools, getopt - -def usage(): - print >>sys.stderr, 'Usage:', sys.argv[0], '[-s slash_threshold] input file' - sys.exit(0) - -optlist, args = getopt.getopt(sys.argv[1:], 'hs:') -slash_threshold = None -for opt, arg in optlist: - if opt == '-s': - slash_threshold = int(arg) - else: - usage() -if len(args) != 1: - usage() - -infile = open(args[0]) -N = 0 -frequencies = {} - -for line in infile: - - for part in line.split('||| ')[1].split(): - tag = part.split(':',1)[1] - - if slash_threshold == None or tag.count('/') + tag.count('\\') <= slash_threshold: - frequencies.setdefault(tag, 0) - frequencies[tag] += 1 - N += 1 - -h = 0 -for tag, c in frequencies.items(): - h -= c * (math.log(c, 2) - math.log(N, 2)) -h /= N - -print 'entropy', h diff --git a/gi/evaluation/extract_ccg_labels.py b/gi/evaluation/extract_ccg_labels.py deleted file mode 100644 index e0034648..00000000 --- a/gi/evaluation/extract_ccg_labels.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python - -# -# Takes spans input along with treebank and spits out CG style categories for each span. -# spans = output from CDEC's extools/extractor with --base_phrase_spans option -# treebank = PTB format, one tree per line -# -# Output is in CDEC labelled-span format -# - -import sys, itertools, tree - -tinfile = open(sys.argv[1]) -einfile = open(sys.argv[2]) - -def number_leaves(node, next=0): - left, right = None, None - for child in node.children: - l, r = number_leaves(child, next) - next = max(next, r+1) - if left == None or l < left: - left = l - if right == None or r > right: - right = r - - #print node, left, right, next - if left == None or right == None: - assert not node.children - left = right = next - - node.left = left - node.right = right - - return left, right - -def ancestor(node, indices): - #print node, node.left, node.right, indices - # returns the deepest node covering all the indices - if min(indices) >= node.left and max(indices) <= node.right: - # try the children - for child in node.children: - x = ancestor(child, indices) - if x: return x - return node - else: - return None - -def frontier(node, indices): - #print 'frontier for node', node, 'indices', indices - if node.left > max(indices) or node.right < min(indices): - #print '\toutside' - return [node] - elif node.children: - #print '\tcovering at least part' - ns = [] - for child in node.children: - n = frontier(child, indices) - ns.extend(n) - return ns - else: - return [node] - -def project_heads(node): - #print 'project_heads', node - is_head = node.data.tag.endswith('-HEAD') - if node.children: - found = 0 - for child in node.children: - x = project_heads(child) - if x: - node.data.tag = x - found += 1 - assert found == 1 - elif is_head: - node.data.tag = node.data.tag[:-len('-HEAD')] - - if is_head: - return node.data.tag - else: - return None - -for tline, eline in itertools.izip(tinfile, einfile): - if tline.strip() != '(())': - if tline.startswith('( '): - tline = tline[2:-1].strip() - tr = tree.parse_PST(tline) - if tr != None: - number_leaves(tr) - #project_heads(tr) # assumes Bikel-style head annotation for the input trees - else: - tr = None - - parts = eline.strip().split(" ||| ") - zh, en = parts[:2] - spans = parts[-1] - print '|||', - for span in spans.split(): - sps = span.split(":") - i, j, x, y = map(int, sps[0].split("-")) - - if tr: - a = ancestor(tr, range(x,y)) - try: - fs = frontier(a, range(x,y)) - except: - print >>sys.stderr, "problem with line", tline.strip(), "--", eline.strip() - raise - - #print x, y - #print 'ancestor', a - #print 'frontier', fs - - cat = a.data.tag - for f in fs: - if f.right < x: - cat += '\\' + f.data.tag - else: - break - fs.reverse() - for f in fs: - if f.left >= y: - cat += '/' + f.data.tag - else: - break - else: - cat = 'FAIL' - - print '%d-%d:%s' % (x, y, cat), - print diff --git a/gi/evaluation/tree.py b/gi/evaluation/tree.py deleted file mode 100644 index 702d80b6..00000000 --- a/gi/evaluation/tree.py +++ /dev/null @@ -1,485 +0,0 @@ -import re, sys - -class Symbol: - def __init__(self, nonterm, term=None, var=None): - assert not (term != None and var != None) - self.tag = nonterm - self.token = term - self.variable = var - - def is_variable(self): - return self.variable != None - - def __eq__(self, other): - return self.tag == other.tag and self.token == other.token and self.variable == other.variable - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.tag, self.token, self.variable)) - - def __repr__(self): - return str(self) - - def __cmp__(self, other): - return cmp((self.tag, self.token, self.variable), - (other.tag, other.token, other.variable)) - - def __str__(self): - parts = [] - if False: # DEPENDENCY - if self.token: - parts.append(str(self.token)) - elif self.variable != None: - parts.append('#%d' % self.variable) - if self.tag: - parts.append(str(self.tag)) - return '/'.join(parts) - else: - if self.tag: - parts.append(str(self.tag)) - if self.token: - parts.append(str(self.token)) - elif self.variable != None: - parts.append('#%d' % self.variable) - return ' '.join(parts) - -class TreeNode: - def __init__(self, data, children=None, order=-1): - self.data = data - self.children = [] - self.order = order - self.parent = None - if children: self.children = children - - def insert(self, child): - self.children.append(child) - child.parent = self - - def leaves(self): - ls = [] - for node in self.xtraversal(): - if not node.children: - ls.append(node.data) - return ls - - def leaf_nodes(self): - ls = [] - for node in self.xtraversal(): - if not node.children: - ls.append(node) - return ls - - def max_depth(self): - d = 1 - for child in self.children: - d = max(d, 1 + child.max_depth()) - if not self.children and self.data.token: - d = 2 - return d - - def max_width(self): - w = 0 - for child in self.children: - w += child.max_width() - return max(1, w) - - def num_internal_nodes(self): - if self.children: - n = 1 - for child in self.children: - n += child.num_internal_nodes() - return n - elif self.data.token: - return 1 - else: - return 0 - - def postorder_traversal(self, visit): - """ - Postorder traversal; no guarantee that terminals will be read in the - correct order for dep. trees. - """ - for child in self.children: - child.traversal(visit) - visit(self) - - def traversal(self, visit): - """ - Preorder for phrase structure trees, and inorder for dependency trees. - In both cases the terminals will be read off in the correct order. - """ - visited_self = False - if self.order <= 0: - visited_self = True - visit(self) - - for i, child in enumerate(self.children): - child.traversal(visit) - if i + 1 == self.order: - visited_self = True - visit(self) - - assert visited_self - - def xpostorder_traversal(self): - for child in self.children: - for node in child.xpostorder_traversal(): - yield node - yield self - - def xtraversal(self): - visited_self = False - if self.order <= 0: - visited_self = True - yield self - - for i, child in enumerate(self.children): - for d in child.xtraversal(): - yield d - - if i + 1 == self.order: - visited_self = True - yield self - - assert visited_self - - def xpostorder_traversal(self): - for i, child in enumerate(self.children): - for d in child.xpostorder_traversal(): - yield d - yield self - - def edges(self): - es = [] - self.traverse_edges(lambda h,c: es.append((h,c))) - return es - - def traverse_edges(self, visit): - for child in self.children: - visit(self.data, child.data) - child.traverse_edges(visit) - - def subtrees(self, include_self=False): - st = [] - if include_self: - stack = [self] - else: - stack = self.children[:] - - while stack: - node = stack.pop() - st.append(node) - stack.extend(node.children) - return st - - def find_parent(self, node): - try: - index = self.children.index(node) - return self, index - except ValueError: - for child in self.children: - if isinstance(child, TreeNode): - r = child.find_parent(node) - if r: return r - return None - - def is_ancestor_of(self, node): - if self == node: - return True - for child in self.children: - if child.is_ancestor_of(child): - return True - return False - - def find(self, node): - if self == node: - return self - for child in self.children: - if isinstance(child, TreeNode): - r = child.find(node) - if r: return r - else: - if child == node: - return r - return None - - def equals_ignorecase(self, other): - if not isinstance(other, TreeNode): - return False - if self.data != other.data: - return False - if len(self.children) != len(other.children): - return False - for mc, oc in zip(self.children, other.children): - if isinstance(mc, TreeNode): - if not mc.equals_ignorecase(oc): - return False - else: - if mc.lower() != oc.lower(): - return False - return True - - def node_number(self, numbering, next=0): - if self.order <= 0: - numbering[id(self)] = next - next += 1 - - for i, child in enumerate(self.children): - next = child.node_number(numbering, next) - if i + 1 == self.order: - numbering[id(self)] = next - next += 1 - - return next - - def display_conll(self, out): - numbering = {} - self.node_number(numbering) - next = 0 - self.children[0].traversal(lambda x: \ - out.write('%d\t%s\t%s\t%s\t%s\t_\t%d\tLAB\n' \ - % (numbering[id(x)], x.data.token, x.data.token, - x.data.tag, x.data.tag, numbering[id(x.parent)]))) - out.write('\n') - - def size(self): - sz = 1 - for child in self.children: - sz += child.size() - return sz - - def __eq__(self, other): - if isinstance(other, TreeNode) and self.data == other.data \ - and self.children == other.children: - return True - return False - - def __cmp__(self, other): - if not isinstance(other, TreeNode): return 1 - n = cmp(self.data, other.data) - if n != 0: return n - n = len(self.children) - len(other.children) - if n != 0: return n - for sc, oc in zip(self.children, other.children): - n = cmp(sc, oc) - if n != 0: return n - return 0 - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((self.data, tuple(self.children))) - - def __repr__(self): - return str(self) - - def __str__(self): - s = '(' - space = False - if self.order <= 0: - s += str(self.data) - space = True - for i, child in enumerate(self.children): - if space: s += ' ' - s += str(child) - space = True - if i+1 == self.order: - s += ' ' + str(self.data) - return s + ')' - -def read_PSTs(fname): - infile = open(fname) - trees = [] - for line in infile: - trees.append(parse_PST(line.strip())) - infile.close() - return trees - -def parse_PST_multiline(infile, hash_is_var=True): - buf = '' - num_open = 0 - while True: - line = infile.readline() - if not line: - return None - buf += ' ' + line.rstrip() - num_open += line.count('(') - line.count(')') - if num_open == 0: - break - - return parse_PST(buf, hash_is_var) - -def parse_PST(line, hash_is_var=True): - line = line.rstrip() - if not line or line.lower() == 'null': - return None - - # allow either (a/DT) or (DT a) - #parts_re = re.compile(r'(\(*)([^/)]*)(?:/([^)]*))?(\)*)$') - - # only allow (DT a) - parts_re = re.compile(r'(\(*)([^)]*)(\)*)$') - - root = TreeNode(Symbol('TOP')) - stack = [root] - for part in line.rstrip().split(): - m = parts_re.match(part) - #opening, tok_or_tag, tag, closing = m.groups() - opening, tok_or_tag, closing = m.groups() - tag = None - #print 'token', part, 'bits', m.groups() - for i in opening: - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if tag: - stack[-1].data.tag = tag - if hash_is_var and tok_or_tag.startswith('#'): - stack[-1].data.variable = int(tok_or_tag[1:]) - else: - stack[-1].data.token = tok_or_tag - else: - if stack[-1].data.tag == None: - stack[-1].data.tag = tok_or_tag - else: - if hash_is_var and tok_or_tag.startswith('#'): - try: - stack[-1].data.variable = int(tok_or_tag[1:]) - except ValueError: # it's really a token! - #print >>sys.stderr, 'Warning: # used for token:', tok_or_tag - stack[-1].data.token = tok_or_tag - else: - stack[-1].data.token = tok_or_tag - - for i in closing: - stack.pop() - - #assert str(root.children[0]) == line - return root.children[0] - -def read_DTs(fname): - infile = open(fname) - trees = [] - while True: - t = parse_DT(infile) - if t: trees.append(t) - else: break - infile.close() - return trees - -def read_bracketed_DTs(fname): - infile = open(fname) - trees = [] - for line in infile: - trees.append(parse_bracketed_DT(line)) - infile.close() - return trees - -def parse_DT(infile): - tokens = [Symbol('ROOT')] - children = {} - - for line in infile: - parts = line.rstrip().split() - #print parts - if not parts: break - index = len(tokens) - token = parts[1] - tag = parts[3] - parent = int(parts[6]) - if token.startswith('#'): - tokens.append(Symbol(tag, var=int(token[1:]))) - else: - tokens.append(Symbol(tag, token)) - children.setdefault(parent, set()).add(index) - - if len(tokens) == 1: return None - - root = TreeNode(Symbol('ROOT'), [], 0) - schedule = [] - for child in sorted(children[0]): - schedule.append((root, child)) - - while schedule: - parent, index = schedule[0] - del schedule[0] - - node = TreeNode(tokens[index]) - node.order = 0 - parent.insert(node) - - for child in sorted(children.get(index, [])): - schedule.append((node, child)) - if child < index: - node.order += 1 - - return root - -_bracket_split_re = re.compile(r'([(]*)([^)/]*)(?:/([^)]*))?([)]*)') - -def parse_bracketed_DT(line, insert_root=True): - line = line.rstrip() - if not line or line == 'NULL': return None - #print line - - root = TreeNode(Symbol('ROOT')) - stack = [root] - for part in line.rstrip().split(): - m = _bracket_split_re.match(part) - - for c in m.group(1): - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if m.group(3) != None: - if m.group(2).startswith('#'): - stack[-1].data.variable = int(m.group(2)[1:]) - else: - stack[-1].data.token = m.group(2) - stack[-1].data.tag = m.group(3) - else: - stack[-1].data.tag = m.group(2) - stack[-1].order = len(stack[-1].children) - # FIXME: also check for vars - - for c in m.group(4): - stack.pop() - - assert len(stack) == 1 - if not insert_root or root.children[0].data.tag == 'ROOT': - return root.children[0] - else: - return root - -_bracket_split_notag_re = re.compile(r'([(]*)([^)/]*)([)]*)') - -def parse_bracketed_untagged_DT(line): - line = line.rstrip() - if not line or line == 'NULL': return None - - root = TreeNode(Symbol('TOP')) - stack = [root] - for part in line.rstrip().split(): - m = _bracket_split_notag_re.match(part) - - for c in m.group(1): - node = TreeNode(Symbol(None)) - stack[-1].insert(node) - stack.append(node) - - if stack[-1].data.token == None: - stack[-1].data.token = m.group(2) - stack[-1].order = len(stack[-1].children) - else: - child = TreeNode(Symbol(nonterm=None, term=m.group(2))) - stack[-1].insert(child) - - for c in m.group(3): - stack.pop() - - return root.children[0] diff --git a/gi/markov_al/Makefile.am b/gi/markov_al/Makefile.am deleted file mode 100644 index fe3e3349..00000000 --- a/gi/markov_al/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = ml - -ml_SOURCES = ml.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -AM_LDFLAGS = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/markov_al/README b/gi/markov_al/README deleted file mode 100644 index 9c10f7cd..00000000 --- a/gi/markov_al/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental translation models with Markovian dependencies. - diff --git a/gi/markov_al/ml.cc b/gi/markov_al/ml.cc deleted file mode 100644 index 1e71edd6..00000000 --- a/gi/markov_al/ml.cc +++ /dev/null @@ -1,470 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#include "tdict.h" -#include "filelib.h" -#include "sampler.h" -#include "ccrp_onetable.h" -#include "array2d.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -void PrintTopCustomers(const CCRP_OneTable& crp) { - for (CCRP_OneTable::const_iterator it = crp.begin(); it != crp.end(); ++it) { - cerr << " " << TD::Convert(it->first) << " = " << it->second << endl; - } -} - -void PrintAlignment(const vector& src, const vector& trg, const vector& a) { - cerr << TD::GetString(src) << endl << TD::GetString(trg) << endl; - Array2D al(src.size(), trg.size()); - for (int i = 0; i < a.size(); ++i) - if (a[i] != 255) al(a[i], i) = true; - cerr << al << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct Unigram; -struct Bigram { - Bigram() : trg(), cond() {} - Bigram(WordID prev, WordID cur, WordID t) : trg(t) { cond.first = prev; cond.second = cur; } - const pair& ConditioningPair() const { - return cond; - } - WordID& prev_src() { return cond.first; } - WordID& cur_src() { return cond.second; } - const WordID& prev_src() const { return cond.first; } - const WordID& cur_src() const { return cond.second; } - WordID trg; - private: - pair cond; -}; - -struct Unigram { - Unigram() : cur_src(), trg() {} - Unigram(WordID s, WordID t) : cur_src(s), trg(t) {} - WordID cur_src; - WordID trg; -}; - -ostream& operator<<(ostream& os, const Bigram& b) { - os << "( " << TD::Convert(b.trg) << " | " << TD::Convert(b.prev_src()) << " , " << TD::Convert(b.cur_src()) << " )"; - return os; -} - -ostream& operator<<(ostream& os, const Unigram& u) { - os << "( " << TD::Convert(u.trg) << " | " << TD::Convert(u.cur_src) << " )"; - return os; -} - -bool operator==(const Bigram& a, const Bigram& b) { - return a.trg == b.trg && a.cur_src() == b.cur_src() && a.prev_src() == b.prev_src(); -} - -bool operator==(const Unigram& a, const Unigram& b) { - return a.trg == b.trg && a.cur_src == b.cur_src; -} - -size_t hash_value(const Bigram& b) { - size_t h = boost::hash_value(b.prev_src()); - boost::hash_combine(h, boost::hash_value(b.cur_src())); - boost::hash_combine(h, boost::hash_value(b.trg)); - return h; -} - -size_t hash_value(const Unigram& u) { - size_t h = boost::hash_value(u.cur_src); - boost::hash_combine(h, boost::hash_value(u.trg)); - return h; -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UnigramModel { - UnigramModel(size_t src_voc_size, size_t trg_voc_size) : - unigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), - p0(1.0 / trg_voc_size) {} - - void increment(const Bigram& b) { - unigrams[b.cur_src()].increment(b.trg); - } - - void decrement(const Bigram& b) { - unigrams[b.cur_src()].decrement(b.trg); - } - - double prob(const Bigram& b) const { - const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); - return q0; - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < unigrams.size(); ++i) { - const CCRP_OneTable& crp = unigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - return llh; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < unigrams.size(); ++i) - unigrams[i].resample_hyperparameters(rng); - } - - vector > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src) - - const double p0; -}; - -struct BigramModel { - BigramModel(size_t src_voc_size, size_t trg_voc_size) : - unigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), - p0(1.0 / trg_voc_size) {} - - void increment(const Bigram& b) { - BigramMap::iterator it = bigrams.find(b.ConditioningPair()); - if (it == bigrams.end()) { - it = bigrams.insert(make_pair(b.ConditioningPair(), CCRP_OneTable(1,1,1,1))).first; - } - if (it->second.increment(b.trg)) - unigrams[b.cur_src()].increment(b.trg); - } - - void decrement(const Bigram& b) { - BigramMap::iterator it = bigrams.find(b.ConditioningPair()); - assert(it != bigrams.end()); - if (it->second.decrement(b.trg)) { - unigrams[b.cur_src()].decrement(b.trg); - if (it->second.num_customers() == 0) - bigrams.erase(it); - } - } - - double prob(const Bigram& b) const { - const double q0 = unigrams[b.cur_src()].prob(b.trg, p0); - const BigramMap::const_iterator it = bigrams.find(b.ConditioningPair()); - if (it == bigrams.end()) return q0; - return it->second.prob(b.trg, q0); - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < unigrams.size(); ++i) { - const CCRP_OneTable& crp = unigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - for (BigramMap::const_iterator it = bigrams.begin(); it != bigrams.end(); ++it) { - const CCRP_OneTable& crp = it->second; - const WordID cur_src = it->first.second; - llh += crp.log_crp_prob(); - for (CCRP_OneTable::const_iterator bit = crp.begin(); bit != crp.end(); ++bit) { - llh += log(unigrams[cur_src].prob(bit->second, p0)); - } - } - return llh; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < unigrams.size(); ++i) - unigrams[i].resample_hyperparameters(rng); - for (BigramMap::iterator it = bigrams.begin(); it != bigrams.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - typedef unordered_map, CCRP_OneTable, boost::hash > > BigramMap; - BigramMap bigrams; // bigrams[(src-1,src)].prob(trg, q0) = p(trg|src,src-1) - vector > unigrams; // unigrams[src].prob(trg, p0) = p(trg|src) - - const double p0; -}; - -struct BigramAlignmentModel { - BigramAlignmentModel(size_t src_voc_size, size_t trg_voc_size) : bigrams(TD::NumWords() + 1, CCRP_OneTable(1,1,1,1)), p0(1.0 / src_voc_size) {} - void increment(WordID prev, WordID next) { - bigrams[prev].increment(next); // hierarchy? - } - void decrement(WordID prev, WordID next) { - bigrams[prev].decrement(next); // hierarchy? - } - double prob(WordID prev, WordID next) { - return bigrams[prev].prob(next, p0); - } - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < bigrams.size(); ++i) { - const CCRP_OneTable& crp = bigrams[i]; - if (crp.num_customers() > 0) { - llh += crp.log_crp_prob(); - llh += crp.num_tables() * log(p0); - } - } - return llh; - } - - vector > bigrams; // bigrams[prev].prob(next, p0) = p(next|prev) - const double p0; -}; - -struct Alignment { - vector a; -}; - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned samples = conf["samples"].as(); - - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - const size_t corpus_len = corpusf.size(); - const WordID kNULL = TD::Convert(""); - const WordID kBOS = TD::Convert(""); - const WordID kEOS = TD::Convert(""); - Bigram TT(kBOS, TD::Convert("我"), TD::Convert("i")); - Bigram TT2(kBOS, TD::Convert("要"), TD::Convert("i")); - - UnigramModel model(vocabf.size(), vocabe.size()); - vector alignments(corpus_len); - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - alg.resize(trg.size()); - int lenp1 = src.size() + 1; - WordID prev_src = kBOS; - for (int j = 0; j < trg.size(); ++j) { - int samp = lenp1 * rng.next(); - --samp; - if (samp < 0) samp = 255; - alg[j] = samp; - WordID cur_src = (samp == 255 ? kNULL : src[alg[j]]); - Bigram b(prev_src, cur_src, trg[j]); - model.increment(b); - prev_src = cur_src; - } - Bigram b(prev_src, kEOS, kEOS); - model.increment(b); - } - cerr << "Initial LLH: " << model.LogLikelihood() << endl; - - SampleSet ss; - for (unsigned si = 0; si < 50; ++si) { - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (unsigned j = 0; j < trg.size(); ++j) { - unsigned char& a_j = alg[j]; - WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - Bigram b(prev_src, cur_e_a_j, trg[j]); - //cerr << "DEC: " << b << "\t" << nextb << endl; - model.decrement(b); - ss.clear(); - for (unsigned i = 0; i <= src.size(); ++i) { - const WordID cur_src = (i ? src[i-1] : kNULL); - b.cur_src() = cur_src; - ss.add(model.prob(b)); - } - int sampled_a_j = rng.SelectSample(ss); - a_j = (sampled_a_j ? sampled_a_j - 1 : 255); - cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - b.cur_src() = cur_e_a_j; - //cerr << "INC: " << b << "\t" << nextb << endl; - model.increment(b); - prev_src = cur_e_a_j; - } - } - cerr << '.' << flush; - if (si % 10 == 9) { - cerr << "[LLH prev=" << model.LogLikelihood(); - //model.ResampleHyperparameters(&rng); - cerr << " new=" << model.LogLikelihood() << "]\n"; - //pair xx = make_pair(kBOS, TD::Convert("我")); - //PrintTopCustomers(model.bigrams.find(xx)->second); - cerr << "p(" << TT << ") = " << model.prob(TT) << endl; - cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; - PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); - } - } - { - // MODEL 2 - BigramModel model(vocabf.size(), vocabe.size()); - BigramAlignmentModel amodel(vocabf.size(), vocabe.size()); - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (int j = 0; j < trg.size(); ++j) { - WordID cur_src = (alg[j] == 255 ? kNULL : src[alg[j]]); - Bigram b(prev_src, cur_src, trg[j]); - model.increment(b); - amodel.increment(prev_src, cur_src); - prev_src = cur_src; - } - amodel.increment(prev_src, kEOS); - Bigram b(prev_src, kEOS, kEOS); - model.increment(b); - } - cerr << "Initial LLH: " << model.LogLikelihood() << " " << amodel.LogLikelihood() << endl; - - SampleSet ss; - for (unsigned si = 0; si < samples; ++si) { - for (unsigned ci = 0; ci < corpus_len; ++ci) { - const vector& src = corpusf[ci]; - const vector& trg = corpuse[ci]; - vector& alg = alignments[ci].a; - WordID prev_src = kBOS; - for (unsigned j = 0; j < trg.size(); ++j) { - unsigned char& a_j = alg[j]; - WordID cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - Bigram b(prev_src, cur_e_a_j, trg[j]); - WordID next_src = kEOS; - WordID next_trg = kEOS; - if (j < (trg.size() - 1)) { - next_src = (alg[j+1] == 255 ? kNULL : src[alg[j + 1]]); - next_trg = trg[j + 1]; - } - Bigram nextb(cur_e_a_j, next_src, next_trg); - //cerr << "DEC: " << b << "\t" << nextb << endl; - model.decrement(b); - model.decrement(nextb); - amodel.decrement(prev_src, cur_e_a_j); - amodel.decrement(cur_e_a_j, next_src); - ss.clear(); - for (unsigned i = 0; i <= src.size(); ++i) { - const WordID cur_src = (i ? src[i-1] : kNULL); - b.cur_src() = cur_src; - ss.add(model.prob(b) * model.prob(nextb) * amodel.prob(prev_src, cur_src) * amodel.prob(cur_src, next_src)); - //cerr << log(ss[ss.size() - 1]) << "\t" << b << endl; - } - int sampled_a_j = rng.SelectSample(ss); - a_j = (sampled_a_j ? sampled_a_j - 1 : 255); - cur_e_a_j = (a_j == 255 ? kNULL : src[a_j]); - b.cur_src() = cur_e_a_j; - nextb.prev_src() = cur_e_a_j; - //cerr << "INC: " << b << "\t" << nextb << endl; - //exit(1); - model.increment(b); - model.increment(nextb); - amodel.increment(prev_src, cur_e_a_j); - amodel.increment(cur_e_a_j, next_src); - prev_src = cur_e_a_j; - } - } - cerr << '.' << flush; - if (si % 10 == 9) { - cerr << "[LLH prev=" << (model.LogLikelihood() + amodel.LogLikelihood()); - //model.ResampleHyperparameters(&rng); - cerr << " new=" << model.LogLikelihood() << "]\n"; - pair xx = make_pair(kBOS, TD::Convert("我")); - cerr << "p(" << TT << ") = " << model.prob(TT) << endl; - cerr << "p(" << TT2 << ") = " << model.prob(TT2) << endl; - pair xx2 = make_pair(kBOS, TD::Convert("要")); - PrintTopCustomers(model.bigrams.find(xx)->second); - //PrintTopCustomers(amodel.bigrams[TD::Convert("")]); - //PrintTopCustomers(model.unigrams[TD::Convert("")]); - PrintAlignment(corpusf[0], corpuse[0], alignments[0].a); - } - } - } - return 0; -} - diff --git a/gi/morf-segmentation/filter_docs.pl b/gi/morf-segmentation/filter_docs.pl deleted file mode 100755 index a78575da..00000000 --- a/gi/morf-segmentation/filter_docs.pl +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/perl - -#Filters the phrase&cluster document set to retain only documents that correspond to words or morphs, i.e. not crossing word boundaries. - -#Usage: filter_docs.pl [mark] -# STDIN: data in the doc.txt format (i.e. phrase\t blahblah ), most likely from cdec extractor -# STDOUT: the matching subset, same format - -use utf8; -my $letter=qr/\p{L}\p{M}*/; # see http://www.regular-expressions.info/unicode.html - -my $morph=qr/$letter+/; - -my $m = "##"; # marker used to indicate morphemes -if ((scalar @ARGV) >= 1) { - $m = $ARGV[0]; - shift; -} -print STDERR "Using $m to filter for morphemes\n"; - -my $expr = qr/^($morph\Q$m\E)? ?(\Q$m\E$morph\Q$m\E)* ?(\Q$m\E$morph)?\t/; #\Q and \E bounded sections are escaped -while(<>) { - /$expr/ && print; -} diff --git a/gi/morf-segmentation/invalid_vocab.patterns b/gi/morf-segmentation/invalid_vocab.patterns deleted file mode 100644 index 473ce1b1..00000000 --- a/gi/morf-segmentation/invalid_vocab.patterns +++ /dev/null @@ -1,6 +0,0 @@ -[[:digit:]] -[] !"#$%&()*+,./:;<=>?@[\^_`{|}~] -^'$ --$ -^- -^$ diff --git a/gi/morf-segmentation/linestripper.py b/gi/morf-segmentation/linestripper.py deleted file mode 100755 index 04e9044a..00000000 --- a/gi/morf-segmentation/linestripper.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/python - -import sys - -#linestripper file file maxlen [numlines] - -if len(sys.argv) < 3: - print "linestripper file1 file2 maxlen [numlines]" - print " outputs subset of file1 to stdout, ..of file2 to stderr" - sys.exit(1) - - -f1 = open(sys.argv[1],'r') -f2 = open(sys.argv[2],'r') - -maxlen=int(sys.argv[3]) -numlines = 0 - -if len(sys.argv) > 4: - numlines = int(sys.argv[4]) - -count=0 -for line1 in f1: - line2 = f2.readline() - - w1 = len(line1.strip().split()) - w2 = len(line2.strip().split()) - - if w1 <= maxlen and w2 <= maxlen: - count = count + 1 - sys.stdout.write(line1) - sys.stderr.write(line2) - - if numlines > 0 and count >= numlines: - break - -f1.close() -f2.close() - - diff --git a/gi/morf-segmentation/morf-pipeline.pl b/gi/morf-segmentation/morf-pipeline.pl deleted file mode 100755 index 46eb5b46..00000000 --- a/gi/morf-segmentation/morf-pipeline.pl +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - - -# Preprocessing pipeline to take care of word segmentation -# Learns a segmentation model for each/either side of the parallel corpus using all train/dev/test data -# Applies the segmentation where necessary. -# Learns word alignments on the preprocessed training data. -# Outputs script files used later to score output. - - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; - -my $MORF_TRAIN = "$SCRIPT_DIR/morftrain.sh"; -my $MORF_SEGMENT = "$SCRIPT_DIR/morfsegment.py"; - -my $LINESTRIPPER = "$SCRIPT_DIR/linestripper.py"; -my $ALIGNER = "/export/ws10smt/software/berkeleyaligner/berkeleyaligner.jar"; -#java -d64 -Xmx10g -jar $ALIGNER ++word-align.conf >> aligner.log -assert_exec($MORF_TRAIN, $LINESTRIPPER, $MORF_SEGMENT, $ALIGNER); - -my $OUTPUT = './morfwork'; -my $PPL_SRC = 50; -my $PPL_TRG = 50; -my $MARKER = "#"; -my $MAX_WORDS = 40; -my $SENTENCES;# = 100000; -my $SPLIT_TYPE = ""; #possible values: s, t, st, or (empty string) -my $NAME_SHORTCUT; - -usage() unless &GetOptions('max_words=i' => \$MAX_WORDS, - 'output=s' => \$OUTPUT, - 'ppl_src=i' => \$PPL_SRC, - 'ppl_trg=i' => \$PPL_TRG, - 'sentences=i' => \$SENTENCES, - 'marker=s' => \$MARKER, - 'split=s' => \$SPLIT_TYPE, - 'get_name_only' => \$NAME_SHORTCUT, - ); - -usage() unless scalar @ARGV >= 2; - -my %CORPUS; # for (src,trg) it has (orig, name, filtered, final) - -$CORPUS{'src'}{'orig'} = $ARGV[0]; -open F, "<$CORPUS{'src'}{'orig'}" or die "Can't read $CORPUS{'src'}{'orig'}: $!"; close F; -$CORPUS{'src'}{'name'} = get_basename($CORPUS{'src'}{'orig'}); - -$CORPUS{'trg'}{'orig'} = $ARGV[1]; -open F, "<$CORPUS{'trg'}{'orig'}" or die "Can't read $CORPUS{'trg'}{'orig'}: $!"; close F; -$CORPUS{'trg'}{'name'} = get_basename($CORPUS{'trg'}{'orig'}); - -my %DEV; # for (src,trg) has (orig, final.split final.unsplit -if (@ARGV >= 4) { - $DEV{'src'}{'orig'} = $ARGV[2]; - open F, "<$DEV{'src'}{'orig'}" or die "Can't read $DEV{'src'}{'orig'}: $!"; close F; - $DEV{'src'}{'name'} = get_basename($DEV{'src'}{'orig'}); - $DEV{'trg'}{'orig'} = $ARGV[3]; - open F, "<$DEV{'trg'}{'orig'}" or die "Can't read $DEV{'trg'}{'orig'}: $!"; close F; - $DEV{'trg'}{'name'} = get_basename($DEV{'trg'}{'orig'}); -} - -my %TEST; # for (src,trg) has (orig, name) -if (@ARGV >= 6) { - $TEST{'src'}{'orig'} = $ARGV[4]; - open F, "<$TEST{'src'}{'orig'}" or die "Can't read $TEST{'src'}{'orig'}: $!"; close F; - $TEST{'src'}{'name'} = get_basename($TEST{'src'}{'orig'}); - $TEST{'trg'}{'orig'} = $ARGV[5]; - open F, "<$TEST{'trg'}{'orig'}" or die "Can't read $TEST{'trg'}{'orig'}: $!"; close F; - $TEST{'trg'}{'name'} = get_basename($TEST{'trg'}{'orig'}); -} - -my $SPLIT_SRC; #use these to check whether that part is being split -my $SPLIT_TRG; - -#OUTPUT WILL GO IN THESE -my $CORPUS_DIR = $OUTPUT . '/' . corpus_dir(); #subsampled corpus -my $MODEL_SRC_DIR = $OUTPUT . '/' . model_dir("src"); #splitting.. -my $MODEL_TRG_DIR = $OUTPUT . '/' . model_dir("trg"); # .. models -my $PROCESSED_DIR = $OUTPUT . '/' . processed_dir(); #segmented copora+alignments -my $ALIGNMENT_DIR = $PROCESSED_DIR . '/alignments'; - -$CORPUS{'src'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'src'}{'name'}"; -$CORPUS{'trg'}{'filtered'} = $CORPUS_DIR . "/$CORPUS{'trg'}{'name'}"; - -print STDERR "Output: $OUTPUT\n"; -print STDERR "Corpus: $CORPUS_DIR\n"; -print STDERR "Model-src: $MODEL_SRC_DIR\n"; -print STDERR "Model-trg: $MODEL_TRG_DIR\n"; -print STDERR "Finaldir: $PROCESSED_DIR\n"; - -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($CORPUS_DIR) or die "Couldn't create output directory $CORPUS_DIR: $!"; -filter_corpus(); - -safemkdir($PROCESSED_DIR); -safemkdir($ALIGNMENT_DIR); - -if ($SPLIT_SRC) { - safemkdir($MODEL_SRC_DIR) or die "Couldn't create output directory $MODEL_SRC_DIR: $!"; - learn_segmentation("src"); - apply_segmentation_side("src", $MODEL_SRC_DIR); -} - -#assume that unsplit hypotheses will be scored against an aritificially split target test set; thus obtain a target splitting model -#TODO: add a flag to override this behaviour -safemkdir($MODEL_TRG_DIR) or die "Couldn't create output directory $MODEL_TRG_DIR: $!"; -learn_segmentation("trg"); -$TEST{'trg'}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}"; -copy($TEST{'trg'}{'orig'}, $TEST{'trg'}{'finalunsplit'}) or die "Could not copy unsegmented test set"; - -if ($SPLIT_TRG) { - apply_segmentation_side("trg", $MODEL_TRG_DIR); - } else { - $TEST{'trg'}{'finalsplit'} = "$PROCESSED_DIR/$TEST{'trg'}{'name'}.split"; - apply_segmentation_any($MODEL_TRG_DIR, $TEST{'trg'}{'finalunsplit'}, $TEST{'trg'}{'finalsplit'}); -} - -write_eval_sh("$PROCESSED_DIR/eval-devtest.sh"); - -#copy corpora if they haven't been put in place by splitting operations -place_missing_data_side('src'); -place_missing_data_side('trg'); - -do_align(); - -if ($CORPUS{'src'}{'orig'} && $DEV{'src'}{'orig'} && $TEST{'src'}{'orig'}) { - print STDERR "Putting the config file entry in $PROCESSED_DIR/exp.config\n"; -#format is: - # nlfr100k_unsplit /export/ws10smt/jan/nlfr/morfwork/s100k.w40.sp_0 corpus.nl-fr.al fr-3.lm.gz dev.nl dev.fr test2008.nl eval-devtest.sh - my $line = split_name() . " $PROCESSED_DIR corpus.src-trg.al LMFILE.lm.gz"; - $line = $line . " $DEV{'src'}{'name'} $DEV{'trg'}{'name'}"; - $line = $line . " " . get_basename($TEST{'src'}{$SPLIT_SRC ? "finalsplit" : "finalunsplit"}) . " eval-devtest.sh"; - safesystem("echo '$line' > $PROCESSED_DIR/exp.config"); -} - -system("date"); -print STDERR "All done. You now need to train a language model (if target split), put it in the right dir and update the config file.\n\n"; - -############################## BILINGUAL ################################### - -sub filter_corpus { - print STDERR "\n!!!FILTERING TRAINING COPRUS!!!\n"; - if ( -f $CORPUS{'src'}{'filtered'} && -f $CORPUS{'trg'}{'filtered'}) { - print STDERR "$CORPUS{'src'}{'filtered'} and $CORPUS{'trg'}{'filtered'} exist, reusing...\n"; - return; - } - my $args = "$CORPUS{'src'}{'orig'} $CORPUS{'trg'}{'orig'} $MAX_WORDS"; - if ($SENTENCES) { $args = $args . " $SENTENCES"; } - safesystem("$LINESTRIPPER $args 1> $CORPUS{'src'}{'filtered'} 2> $CORPUS{'trg'}{'filtered'}") or die "Failed to filter training corpus for length."; -} - -sub learn_segmentation -{ - my $WHICH = shift; - my $corpus; my $dev; my $test; my $moddir; my $ppl; - - $corpus = $CORPUS{$WHICH}{'filtered'}; - $dev = $DEV{$WHICH}{'orig'}; - $test = $TEST{$WHICH}{'orig'}; - - if ($WHICH eq "src") { - $moddir = $MODEL_SRC_DIR; - $ppl = $PPL_SRC; - } else { - $moddir = $MODEL_TRG_DIR; - $ppl = $PPL_TRG; - } - my $cmd = "cat $corpus"; - if ($dev) { $cmd = "$cmd $dev"; } - if ($test) { $cmd = "$cmd $test"; } - my $tmpfile = "$CORPUS_DIR/all.tmp.gz"; - safesystem("$cmd | $GZIP > $tmpfile") or die "Failed to concatenate data for model learning.."; - assert_marker($tmpfile); - - learn_segmentation_side($tmpfile, $moddir, $ppl, $WHICH); - safesystem("rm $tmpfile"); -} - -sub do_align { - print STDERR "\n!!!WORD ALIGNMENT!!!\n"; - system("date"); - - my $ALIGNMENTS = "$ALIGNMENT_DIR/training.align"; - if ( -f $ALIGNMENTS ) { - print STDERR "$ALIGNMENTS exists, reusing...\n"; - return; - } - my $conf_file = "$ALIGNMENT_DIR/word-align.conf"; - - #decorate training files with identifiers to stop the aligner from training on dev and test when rerun in future. - safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'src'}{'name'} corpus.src") or die "Failed to symlink: $!"; - safesystem("cd $PROCESSED_DIR && ln -s $CORPUS{'trg'}{'name'} corpus.trg") or die "Failed to symlink: $!"; - - write_wconf($conf_file, $PROCESSED_DIR); - system("java -d64 -Xmx24g -jar $ALIGNER ++$conf_file > $ALIGNMENT_DIR/aligner.log"); - - if (! -f $ALIGNMENTS) { die "Failed to run word alignment.";} - - my $cmd = "paste $PROCESSED_DIR/corpus.src $PROCESSED_DIR/corpus.trg $ALIGNMENTS"; - $cmd = $cmd . " | sed 's/\\t/ \|\|\| /g' > $PROCESSED_DIR/corpus.src-trg.al"; - safesystem($cmd) or die "Failed to paste into aligned corpus file."; - -} - -############################# MONOLINGUAL ################################# - -#copy the necessary data files that weren't place by segmentation -sub place_missing_data_side { - my $side = shift; - - ifne_copy($CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}") ; - - if ($DEV{$side}{'orig'} && ! -f "$PROCESSED_DIR/$DEV{$side}{'name'}") { - $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; - copy($DEV{$side}{'orig'}, $DEV{$side}{'final'}) or die "Copy failed: $!"; - } - - if ($TEST{$side}{'orig'} && ! -f "$PROCESSED_DIR/$TEST{$side}{'name'}" && ! $TEST{$side}{'finalunsplit'}) { - $TEST{$side}{'finalunsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}"; - copy($TEST{$side}{'orig'}, $TEST{$side}{'finalunsplit'}) or die "Copy failed: $!"; - } - -} - -sub apply_segmentation_side { - my ($side, $moddir) = @_; - - print STDERR "\n!!!APPLYING SEGMENTATION MODEL ($side)!!!\n"; - apply_segmentation_any($moddir, $CORPUS{$side}{'filtered'}, "$PROCESSED_DIR/$CORPUS{$side}{'name'}"); - if ($DEV{$side}{'orig'}) { - $DEV{$side}{'final'} = "$PROCESSED_DIR/$DEV{$side}{'name'}"; - apply_segmentation_any($moddir, $DEV{$side}{'orig'}, "$DEV{$side}{'final'}"); - } - if ($TEST{$side}{'orig'}) { - $TEST{$side}{'finalsplit'} = "$PROCESSED_DIR/$TEST{$side}{'name'}.split"; - apply_segmentation_any($moddir, $TEST{$side}{'orig'}, $TEST{$side}{'finalsplit'} ); - } - -} - -sub learn_segmentation_side { - my($INPUT_FILE, $SEGOUT_DIR, $PPL, $LANG) = @_; - - print STDERR "\n!!!LEARNING SEGMENTATION MODEL ($LANG)!!!\n"; - system("date"); - my $SEG_FILE = $SEGOUT_DIR . "/segmentation.ready"; - if ( -f $SEG_FILE) { - print STDERR "$SEG_FILE exists, reusing...\n"; - return; - } - my $cmd = "$MORF_TRAIN $INPUT_FILE $SEGOUT_DIR $PPL \"$MARKER\""; - safesystem($cmd) or die "Failed to learn segmentation model"; -} - -sub apply_segmentation_any { - my($moddir, $datfile, $outfile) = @_; - if ( -f $outfile) { - print STDERR "$outfile exists, reusing...\n"; - return; - } - - my $args = "$moddir/inputvocab.gz $moddir/segmentation.ready \"$MARKER\""; - safesystem("cat $datfile | $MORF_SEGMENT $args &> $outfile") or die "Could not segment $datfile"; -} - -##################### PATH FUNCTIONS ########################## - -sub beautify_numlines { - return ($SENTENCES ? $SENTENCES : "_all"); -} - -sub corpus_dir { - return "s" . beautify_numlines() . ".w" . $MAX_WORDS; -} - -sub model_dir { - my $lang = shift; - if ($lang eq "src") { - return corpus_dir() . ".PPL" . $PPL_SRC . ".src"; - } elsif ($lang eq "trg") { - return corpus_dir() . ".PPL" . $PPL_TRG . ".trg"; - } else { - return "PPLundef"; - } -} - -sub processed_dir { - return corpus_dir() . "." . split_name(); -} - -########################## HELPER FUNCTIONS ############################ - -sub ifne_copy { - my ($src, $dest) = @_; - if (! -f $dest) { - copy($src, $dest) or die "Copy failed: $!"; - } -} - -sub split_name { - #parses SPLIT_TYPE, which can have the following values - # t|s|ts|st (last 2 are equiv) - # or is undefined when no splitting is done - my $name = ""; - - if ($SPLIT_TYPE) { - $SPLIT_SRC = lc($SPLIT_TYPE) =~ /s/; - $SPLIT_TRG = lc($SPLIT_TYPE) =~ /t/; - $name = $name . ($SPLIT_SRC ? $PPL_SRC : "0"); - $name = $name . "_" . ($SPLIT_TRG ? $PPL_TRG : "0"); - } else { - #no splitting - $name = "0"; - } - - return "sp_" . $name; - -} - -sub usage { - print <> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - -sub get_basename -{ - my $x = shift; - $x = `basename $x`; - $x =~ s/\n//; - return $x; -} - -sub assert_marker { - my $file = shift; - my $result = `zcat $file| grep '$MARKER' | wc -l` or die "Cannot read $file: $!"; - print $result; - if (scalar($result) != 0) { die "Data contains marker '$MARKER'; use something else.";} -} -########################### Dynamic config files ############################## - -sub write_wconf { - my ($filename, $train_dir) = @_; - open WCONF, ">$filename" or die "Can't write $filename: $!"; - - print WCONF <$filename" or die "Can't write $filename: $!"; - - print EVALFILE < "\$1.recombined" - -\$EVAL_MAIN "\$1.recombined" $TEST{'trg'}{'finalunsplit'} -EOT - - } else { - print EVALFILE < "\$1.split" - -\$EVAL_MAIN "\$1.split" $TEST{'trg'}{'finalsplit'} - -echo "DIRECT EVALUATION" -echo "--------------------------" -\$EVAL_MAIN "\$1" $TEST{'trg'}{'finalunsplit'} - -EOT - - } - close EVALFILE; - -} - - - - diff --git a/gi/morf-segmentation/morfsegment.py b/gi/morf-segmentation/morfsegment.py deleted file mode 100755 index 85b9d4fb..00000000 --- a/gi/morf-segmentation/morfsegment.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/python - -import sys -import gzip - -#usage: morfsegment.py inputvocab.gz segmentation.ready -# stdin: the data to segment -# stdout: the segmented data - -if len(sys.argv) < 3: - print "usage: morfsegment.py inputvocab.gz segmentation.ready [marker]" - print " stdin: the data to segment" - print " stdout: the segmented data" - sys.exit() - -#read index: -split_index={} - -marker="##" - -if len(sys.argv) > 3: - marker=sys.argv[3] - -word_vocab=gzip.open(sys.argv[1], 'rb') #inputvocab.gz -seg_vocab=open(sys.argv[2], 'r') #segm.ready.. - -for seg in seg_vocab: - #seg = ver# #wonder\n - #wordline = 1 verwonder\n - word = word_vocab.readline().strip().split(' ') - assert(len(word) == 2) - word = word[1] - seg=seg.strip() - - if seg != word: - split_index[word] = seg - -word_vocab.close() -seg_vocab.close() - -for line in sys.stdin: - words = line.strip().split() - - newsent = [] - for word in words: - splitword = split_index.get(word, word) - newsent.append(splitword) - - print ' '.join(newsent) - diff --git a/gi/morf-segmentation/morftrain.sh b/gi/morf-segmentation/morftrain.sh deleted file mode 100755 index 9004922f..00000000 --- a/gi/morf-segmentation/morftrain.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash - -if [[ $# -lt 3 ]]; then - echo "Trains a morfessor model and places the result in writedir" - echo - echo "Usage: `basename $0` corpus_input_file writedir [PPL] [marker] [lines]" - echo -e "\tcorpus_input_file contains a sentence per line." - exit 1 -fi - -MORFESSOR_DIR="/export/ws10smt/software/morfessor_catmap0.9.2" -SCRIPT_DIR=$(dirname `readlink -f $0`) - -MORFBINDIR="$MORFESSOR_DIR/bin" -MORFMAKEFILE_TRAIN="$MORFESSOR_DIR/train/Makefile" -VOCABEXT="$SCRIPT_DIR/vocabextractor.sh" - -MARKER="#" - -if [[ ! -f $VOCABEXT ]]; then - echo "$VOCABEXT doesn't exist!" - exit 1 -fi -if [[ ! -f $MORFMAKEFILE_TRAIN ]]; then - echo "$MORFMAKEFILE_TRAIN doesn't exist!" - exit 1 -fi - - -CORPUS="$1" -WRITETODIR=$2 - -if [[ ! -f $CORPUS ]]; then - echo "$CORPUS doesn't exist!" - exit 1 -fi - -PPL=10 -LINES=0 -if [[ $# -gt 2 ]]; then - PPL=$3 -fi -if [[ $# -gt 3 ]]; then - MARKER="$4" -fi -if [[ $# -gt 4 ]]; then - LINES=$5 -fi - -mkdir -p $WRITETODIR - -#extract vocabulary to train on -echo "Extracting vocabulary..." -if [[ -f $WRITETODIR/inputvocab.gz ]]; then - echo " ....$WRITETODIR/inputvocab.gz exists, reusing." -else - if [[ $LINES -gt 0 ]]; then - $VOCABEXT $CORPUS $LINES | gzip > $WRITETODIR/inputvocab.gz - else - $VOCABEXT $CORPUS | gzip > $WRITETODIR/inputvocab.gz - fi -fi - - -#train it -echo "Training morf model..." -if [[ -f $WRITETODIR/segmentation.final.gz ]]; then - echo " ....$WRITETODIR/segmentation.final.gz exists, reusing.." -else - OLDPWD=`pwd` - cd $WRITETODIR - - #put the training Makefile in place, with appropriate modifications - sed -e "s/^GZIPPEDINPUTDATA = .*$/GZIPPEDINPUTDATA = inputvocab.gz/" \ - -e "s/^PPLTHRESH = .*$/PPLTHRESH = $PPL/" \ - -e "s;^BINDIR = .*$;BINDIR = $MORFBINDIR;" \ - $MORFMAKEFILE_TRAIN > ./Makefile - - date - make > ./trainmorf.log 2>&1 - cd $OLDPWD - - - echo "Post processing..." - #remove comments, counts and morph types - #mark morphs - - if [[ ! -f $WRITETODIR/segmentation.final.gz ]]; then - echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" - exit 1 - fi - - zcat $WRITETODIR/segmentation.final.gz | \ - awk '$1 !~ /^#/ {print}' | \ - cut -d ' ' --complement -f 1 | \ - sed -e "s/\/...//g" -e "s/ + /$MARKER $MARKER/g" \ - > $WRITETODIR/segmentation.ready - - if [[ ! -f $WRITETODIR/segmentation.ready ]]; then - echo "Failed to learn segmentation model: $WRITETODIR/segmentation.final.gz not written" - exit 1 - fi - - - - echo "Done training." - date -fi -echo "Segmentation model is $WRITETODIR/segmentation.ready." - diff --git a/gi/morf-segmentation/vocabextractor.sh b/gi/morf-segmentation/vocabextractor.sh deleted file mode 100755 index 00ae7109..00000000 --- a/gi/morf-segmentation/vocabextractor.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -d=$(dirname `readlink -f $0`) -if [ $# -lt 1 ]; then - echo "Extracts unique words and their frequencies from a subset of a corpus." - echo - echo "Usage: `basename $0` input_file [number_of_lines] > output_file" - echo -e "\tinput_file contains a sentence per line." - echo - echo "Script also removes words from the vocabulary if they contain a digit or a special character. Output is printed to stdout in a format suitable for use with Morfessor." - echo - exit -fi - -srcname=$1 -reallen=0 - -if [[ $# -gt 1 ]]; then - reallen=$2 -fi - -pattern_file=$d/invalid_vocab.patterns - -if [[ ! -f $pattern_file ]]; then - echo "Pattern file missing" - exit 1 -fi - -#this awk strips entries from the vocabulary if they contain invalid characters -#invalid characters are digits and punctuation marks, and words beginning or ending with a dash -#uniq -c extracts the unique words and counts the occurrences - -if [[ $reallen -eq 0 ]]; then - #when a zero is passed, use the whole file - zcat -f $srcname | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//' - -else - zcat -f $srcname | head -n $reallen | sed 's/ /\n/g' | egrep -v -f $pattern_file | sort | uniq -c | sed 's/^ *//' -fi - diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am deleted file mode 100644 index 86f8e07b..00000000 --- a/gi/pf/Makefile.am +++ /dev/null @@ -1,44 +0,0 @@ -bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl pf_test bayes_lattice_score - -noinst_LIBRARIES = libpf.a - -libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc hpyp_tm.cc pyp_tm.cc - -bayes_lattice_score_SOURCES = bayes_lattice_score.cc -bayes_lattice_score_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -pf_test_SOURCES = pf_test.cc -pf_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -nuisance_test_SOURCES = nuisance_test.cc -nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc -align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -align_tl_SOURCES = align-tl.cc -align_tl_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz - -itg_SOURCES = itg.cc - -pyp_lm_SOURCES = pyp_lm.cc - -learn_cfg_SOURCES = learn_cfg.cc - -condnaive_SOURCES = condnaive.cc - -dpnaive_SOURCES = dpnaive.cc - -pfdist_SOURCES = pfdist.cc - -pfnaive_SOURCES = pfnaive.cc - -cbgi_SOURCES = cbgi.cc - -brat_SOURCES = brat.cc - -pfbrat_SOURCES = pfbrat.cc - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops -I$(top_srcdir)/utils $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/klm - -AM_LDFLAGS = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a $(top_srcdir)/utils/libutils.a -lz diff --git a/gi/pf/README b/gi/pf/README deleted file mode 100644 index 62e47541..00000000 --- a/gi/pf/README +++ /dev/null @@ -1,2 +0,0 @@ -Experimental Bayesian alignment tools. Nothing to see here. - diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc deleted file mode 100644 index e7509f57..00000000 --- a/gi/pf/align-lexonly-pyp.cc +++ /dev/null @@ -1,243 +0,0 @@ -#include -#include - -#include -#include - -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "array2d.h" -#include "sampler.h" -#include "corpus.h" -#include "pyp_tm.h" -#include "hpyp_tm.h" -#include "quasi_model2.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("infer_alignment_hyperparameters,I", "Infer alpha and p_null, otherwise fixed values will be assumed") - ("p_null,0", po::value()->default_value(0.08), "probability of aligning to null") - ("align_alpha,a", po::value()->default_value(4.0), "how 'tight' is the bias toward be along the diagonal?") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -MT19937* prng; - -struct LexicalAlignment { - unsigned char src_index; - bool is_transliteration; - vector > derivation; -}; - -struct AlignedSentencePair { - vector src; - vector trg; - vector a; - Array2D posterior; -}; - -template -struct Aligner { - Aligner(const vector >& lets, - int vocab_size, - int num_letters, - const po::variables_map& conf, - vector* c) : - corpus(*c), - paj_model(conf["align_alpha"].as(), conf["p_null"].as()), - infer_paj(conf.count("infer_alignment_hyperparameters") > 0), - model(lets, vocab_size, num_letters), - kNULL(TD::Convert("NULL")) { - assert(lets[kNULL].size() == 0); - } - - vector& corpus; - QuasiModel2 paj_model; - const bool infer_paj; - LexicalTranslationModel model; - const WordID kNULL; - - void ResampleHyperparameters() { - model.ResampleHyperparameters(prng); - if (infer_paj) paj_model.ResampleHyperparameters(prng); - } - - void InitializeRandom() { - cerr << "Initializing with random alignments ...\n"; - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - asp.a.resize(asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - unsigned char& a_j = asp.a[j].src_index; - a_j = prng->next() * (1 + asp.src.size()); - const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Increment(f_a_j, asp.trg[j], &*prng); - paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); - } - } - cerr << "Corpus intialized randomly." << endl; - cerr << "LLH = " << Likelihood() << " \t(Amodel=" << paj_model.Likelihood() - << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl; - } - - void ResampleCorpus() { - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - SampleSet ss; ss.resize(asp.src.size() + 1); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - unsigned char& a_j = asp.a[j].src_index; - const WordID e_j = asp.trg[j]; - WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Decrement(f_a_j, e_j, prng); - paj_model.Decrement(a_j, j, asp.src.size(), asp.trg.size()); - - for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { - const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); - ss[prop_a_j] = model.Prob(prop_f, e_j); - ss[prop_a_j] *= paj_model.Prob(prop_a_j, j, asp.src.size(), asp.trg.size()); - } - a_j = prng->SelectSample(ss); - f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - model.Increment(f_a_j, e_j, prng); - paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size()); - } - } - } - - prob_t Likelihood() const { - return model.Likelihood() * paj_model.Likelihood(); - } -}; - -void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { - for (set::const_iterator it = v.begin(); it != v.end(); ++it) { - vector& letters = (*l)[*it]; - if (letters.size()) continue; // if e and f have the same word - - const string& w = TD::Convert(*it); - - size_t cur = 0; - while (cur < w.size()) { - const size_t len = UTF8Len(w[cur]); - letters.push_back(TD::Convert(w.substr(cur, len))); - if (letset) letset->insert(letters.back()); - cur += len; - } - } -} - -void Debug(const AlignedSentencePair& asp) { - cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; - Array2D a(asp.src.size(), asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - assert(asp.a[j].src_index <= asp.src.size()); - if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; - } - cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { - for (unsigned j = 0; j < asp->trg.size(); ++j) - asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { - bool first = true; - for (unsigned j = 0; j < asp.trg.size(); ++j) { - int src_index = -1; - int mc = -1; - for (unsigned i = 0; i <= asp.src.size(); ++i) { - if (asp.posterior(i, j) > mc) { - mc = asp.posterior(i, j); - src_index = i; - } - } - - if (src_index) { - if (first) first = false; else cout << ' '; - cout << (src_index - 1) << '-' << j; - } - } - cout << endl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - if (conf.count("random_seed")) - prng = new MT19937(conf["random_seed"].as()); - else - prng = new MT19937; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - vector corpus(corpuse.size()); - for (unsigned i = 0; i < corpuse.size(); ++i) { - corpus[i].src.swap(corpusf[i]); - corpus[i].trg.swap(corpuse[i]); - corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); - } - corpusf.clear(); corpuse.clear(); - - vocabf.insert(TD::Convert("NULL")); - vector > letters(TD::NumWords()); - set letset; - ExtractLetters(vocabe, &letters, &letset); - ExtractLetters(vocabf, &letters, NULL); - letters[TD::Convert("NULL")].clear(); - - //Aligner aligner(letters, vocabe.size(), letset.size(), conf, &corpus); - Aligner aligner(letters, vocabe.size(), letset.size(), conf, &corpus); - aligner.InitializeRandom(); - - const unsigned samples = conf["samples"].as(); - for (int i = 0; i < samples; ++i) { - for (int j = 65; j < 67; ++j) Debug(corpus[j]); - if (i % 10 == 9) { - aligner.ResampleHyperparameters(); - cerr << "LLH = " << aligner.Likelihood() << " \t(Amodel=" << aligner.paj_model.Likelihood() - << " TModel=" << aligner.model.Likelihood() << ") contexts=" << aligner.model.UniqueConditioningContexts() << endl; - } - aligner.ResampleCorpus(); - if (i > (samples / 5) && (i % 6 == 5)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]); - } - for (unsigned i = 0; i < corpus.size(); ++i) - WriteAlignments(corpus[i]); - aligner.model.Summary(); - - return 0; -} diff --git a/gi/pf/align-tl.cc b/gi/pf/align-tl.cc deleted file mode 100644 index f6608f1d..00000000 --- a/gi/pf/align-tl.cc +++ /dev/null @@ -1,339 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "backward.h" -#include "array2d.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "stringlib.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "mfcr.h" -#include "corpus.h" -#include "ngram_base.h" -#include "transliterations.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("s2t", po::value(), "character level source-to-target prior transliteration probabilities") - ("t2s", po::value(), "character level target-to-source prior transliteration probabilities") - ("max_src_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in source") - ("max_trg_chunk", po::value()->default_value(4), "Maximum size of translitered chunk in target") - ("expected_src_to_trg_ratio", po::value()->default_value(1.0), "If a word is transliterated, what is the expected length ratio from source to target?") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -struct LexicalAlignment { - unsigned char src_index; - bool is_transliteration; - vector > derivation; -}; - -struct AlignedSentencePair { - vector src; - vector trg; - vector a; - Array2D posterior; -}; - -struct HierarchicalWordBase { - explicit HierarchicalWordBase(const unsigned vocab_e_size) : - base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} - - void ResampleHyperparameters(MT19937* rng) { - r.resample_hyperparameters(rng); - } - - inline double logp0(const vector& s) const { - return Md::log_poisson(s.size(), 7.5) + s.size() * u0; - } - - // return p0 of rule.e_ - prob_t operator()(const TRule& rule) const { - v[0].logeq(logp0(rule.e_)); - return r.prob(rule.e_, v.begin(), l.begin()); - } - - void Increment(const TRule& rule) { - v[0].logeq(logp0(rule.e_)); - if (r.increment(rule.e_, v.begin(), l.begin(), &*prng).count) { - base *= v[0] * l[0]; - } - } - - void Decrement(const TRule& rule) { - if (r.decrement(rule.e_, &*prng).count) { - base /= prob_t(exp(logp0(rule.e_))); - } - } - - prob_t Likelihood() const { - prob_t p; p.logeq(r.log_crp_prob()); - p *= base; - return p; - } - - void Summary() const { - cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",s=" << r.strength() << ')' << endl; - for (MFCR<1,vector >::const_iterator it = r.begin(); it != r.end(); ++it) - cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl; - } - - prob_t base; - MFCR<1,vector > r; - const double u0; - const vector l; - mutable vector v; -}; - -struct BasicLexicalAlignment { - explicit BasicLexicalAlignment(const vector >& lets, - const unsigned words_e, - const unsigned letters_e, - vector* corp) : - letters(lets), - corpus(*corp), - //up0(words_e), - //up0("en.chars.1gram", letters_e), - //up0("en.words.1gram"), - up0(letters_e), - //up0("en.chars.2gram"), - tmodel(up0) { - } - - void InstantiateRule(const WordID src, - const WordID trg, - TRule* rule) const { - static const WordID kX = TD::Convert("X") * -1; - rule->lhs_ = kX; - rule->e_ = letters[trg]; - rule->f_ = letters[src]; - } - - void InitializeRandom() { - const WordID kNULL = TD::Convert("NULL"); - cerr << "Initializing with random alignments ...\n"; - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - asp.a.resize(asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - const unsigned char a_j = prng->next() * (1 + asp.src.size()); - const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - TRule r; - InstantiateRule(f_a_j, asp.trg[j], &r); - asp.a[j].is_transliteration = false; - asp.a[j].src_index = a_j; - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); - } - } - cerr << " LLH = " << Likelihood() << endl; - } - - prob_t Likelihood() const { - prob_t p = tmodel.Likelihood(); - p *= up0.Likelihood(); - return p; - } - - void ResampleHyperparemeters() { - tmodel.ResampleHyperparameters(&*prng); - up0.ResampleHyperparameters(&*prng); - cerr << " (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n"; - } - - void ResampleCorpus(); - - const vector >& letters; // spelling dictionary - vector& corpus; - //PhraseConditionalUninformativeBase up0; - //PhraseConditionalUninformativeUnigramBase up0; - //UnigramWordBase up0; - //HierarchicalUnigramBase up0; - HierarchicalWordBase up0; - //CompletelyUniformBase up0; - //FixedNgramBase up0; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - MConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; - //ConditionalTranslationModel tmodel; -}; - -void BasicLexicalAlignment::ResampleCorpus() { - static const WordID kNULL = TD::Convert("NULL"); - for (unsigned i = 0; i < corpus.size(); ++i) { - AlignedSentencePair& asp = corpus[i]; - SampleSet ss; ss.resize(asp.src.size() + 1); - for (unsigned j = 0; j < asp.trg.size(); ++j) { - TRule r; - unsigned char& a_j = asp.a[j].src_index; - WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.DecrementRule(r, &*prng)) - up0.Decrement(r); - - for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { - const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); - InstantiateRule(prop_f, asp.trg[j], &r); - ss[prop_a_j] = tmodel.RuleProbability(r); - } - a_j = prng->SelectSample(ss); - f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); - InstantiateRule(f_a_j, asp.trg[j], &r); - if (tmodel.IncrementRule(r, &*prng)) - up0.Increment(r); - } - } - cerr << " LLH = " << Likelihood() << endl; -} - -void ExtractLetters(const set& v, vector >* l, set* letset = NULL) { - for (set::const_iterator it = v.begin(); it != v.end(); ++it) { - vector& letters = (*l)[*it]; - if (letters.size()) continue; // if e and f have the same word - - const string& w = TD::Convert(*it); - - size_t cur = 0; - while (cur < w.size()) { - const size_t len = UTF8Len(w[cur]); - letters.push_back(TD::Convert(w.substr(cur, len))); - if (letset) letset->insert(letters.back()); - cur += len; - } - } -} - -void Debug(const AlignedSentencePair& asp) { - cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl; - Array2D a(asp.src.size(), asp.trg.size()); - for (unsigned j = 0; j < asp.trg.size(); ++j) - if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true; - cerr << a << endl; -} - -void AddSample(AlignedSentencePair* asp) { - for (unsigned j = 0; j < asp->trg.size(); ++j) - asp->posterior(asp->a[j].src_index, j)++; -} - -void WriteAlignments(const AlignedSentencePair& asp) { - bool first = true; - for (unsigned j = 0; j < asp.trg.size(); ++j) { - int src_index = -1; - int mc = -1; - for (unsigned i = 0; i <= asp.src.size(); ++i) { - if (asp.posterior(i, j) > mc) { - mc = asp.posterior(i, j); - src_index = i; - } - } - - if (src_index) { - if (first) first = false; else cout << ' '; - cout << (src_index - 1) << '-' << j; - } - } - cout << endl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - vector corpus(corpuse.size()); - for (unsigned i = 0; i < corpuse.size(); ++i) { - corpus[i].src.swap(corpusf[i]); - corpus[i].trg.swap(corpuse[i]); - corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size()); - } - corpusf.clear(); corpuse.clear(); - - vocabf.insert(TD::Convert("NULL")); - vector > letters(TD::NumWords() + 1); - set letset; - ExtractLetters(vocabe, &letters, &letset); - ExtractLetters(vocabf, &letters, NULL); - letters[TD::Convert("NULL")].clear(); - - // TODO configure this - const int max_src_chunk = conf["max_src_chunk"].as(); - const int max_trg_chunk = conf["max_trg_chunk"].as(); - const double s2t_rat = conf["expected_src_to_trg_ratio"].as(); - const BackwardEstimator be(conf["s2t"].as(), conf["t2s"].as()); - Transliterations tl(max_src_chunk, max_trg_chunk, s2t_rat, be); - - cerr << "Initializing transliteration graph structures ...\n"; - for (int i = 0; i < corpus.size(); ++i) { - const vector& src = corpus[i].src; - const vector& trg = corpus[i].trg; - for (int j = 0; j < src.size(); ++j) { - const vector& src_let = letters[src[j]]; - for (int k = 0; k < trg.size(); ++k) { - const vector& trg_let = letters[trg[k]]; - tl.Initialize(src[j], src_let, trg[k], trg_let); - //if (src_let.size() < min_trans_src) - // tl.Forbid(src[j], src_let, trg[k], trg_let); - } - } - } - cerr << endl; - tl.GraphSummary(); - - return 0; -} diff --git a/gi/pf/backward.cc b/gi/pf/backward.cc deleted file mode 100644 index b92629fd..00000000 --- a/gi/pf/backward.cc +++ /dev/null @@ -1,89 +0,0 @@ -#include "backward.h" - -#include -#include - -#include "array2d.h" -#include "reachability.h" -#include "base_distributions.h" - -using namespace std; - -BackwardEstimator::BackwardEstimator(const string& s2t, - const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {} - -BackwardEstimator::~BackwardEstimator() { - delete m1; m1 = NULL; - delete m1inv; m1inv = NULL; -} - -float BackwardEstimator::ComputeBackwardProb(const std::vector& src, - const std::vector& trg, - unsigned src_covered, - unsigned trg_covered, - double s2t_ratio) const { - if (src_covered == src.size() || trg_covered == trg.size()) { - assert(src_covered == src.size()); - assert(trg_covered == trg.size()); - return 0; - } - static const WordID kNULL = TD::Convert(""); - const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1)); - // TODO factor in expected length ratio - prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_covered; j < trg.size(); ++j) { - prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12); - for (unsigned i = src_covered; i < src.size(); ++i) - p += (*m1)(src[i], trg[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n"; - assert(!"failed"); - } - p *= uniform_alignment; - e *= p; - } - // TODO factor in expected length ratio - const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio)); - for (unsigned i = src_covered; i < src.size(); ++i) { - prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12); - for (unsigned j = trg_covered; j < trg.size(); ++j) - p += (*m1inv)(trg[j], src[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n"; - assert(!"failed"); - } - p *= inv_uniform; - inv *= p; - } - return (log(e) + log(inv)) / 2; -} - -void BackwardEstimator::InitializeGrid(const vector& src, - const vector& trg, - const Reachability& r, - double s2t_ratio, - float* grid) const { - queue > q; - q.push(make_pair(0,0)); - Array2D done(src.size()+1, trg.size()+1, false); - //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl; - while(!q.empty()) { - const pair n = q.front(); - q.pop(); - if (done(n.first,n.second)) continue; - done(n.first,n.second) = true; - - float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio); - if (n.first == 0 && n.second == 0) grid[0] = lp; - //cerr << " " << n.first << "," << n.second << "\t" << lp << endl; - - if (n.first == src.size() || n.second == trg.size()) continue; - const vector >& edges = r.valid_deltas[n.first][n.second]; - for (int i = 0; i < edges.size(); ++i) - q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second)); - } - //static int cc = 0; ++cc; if (cc == 80) exit(1); -} - diff --git a/gi/pf/backward.h b/gi/pf/backward.h deleted file mode 100644 index e67eff0c..00000000 --- a/gi/pf/backward.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _BACKWARD_H_ -#define _BACKWARD_H_ - -#include -#include -#include "wordid.h" - -struct Reachability; -struct Model1; - -struct BackwardEstimator { - BackwardEstimator(const std::string& s2t, - const std::string& t2s); - ~BackwardEstimator(); - - void InitializeGrid(const std::vector& src, - const std::vector& trg, - const Reachability& r, - double src2trg_ratio, - float* grid) const; - - private: - float ComputeBackwardProb(const std::vector& src, - const std::vector& trg, - unsigned src_covered, - unsigned trg_covered, - double src2trg_ratio) const; - - Model1* m1; - Model1* m1inv; -}; - -#endif diff --git a/gi/pf/base_distributions.cc b/gi/pf/base_distributions.cc deleted file mode 100644 index 57e0bbe1..00000000 --- a/gi/pf/base_distributions.cc +++ /dev/null @@ -1,241 +0,0 @@ -#include "base_distributions.h" - -#include - -#include "filelib.h" - -using namespace std; - -TableLookupBase::TableLookupBase(const string& fname) { - cerr << "TableLookupBase reading from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - vector le, lf; - TRule x; - x.lhs_ = -TD::Convert("X"); - bool flag = false; - while(getline(in, line)) { - ++lc; - if (lc % 1000000 == 0) { cerr << " [" << lc << ']' << endl; flag = false; } - else if (lc % 25000 == 0) { cerr << '.' << flush; flag = true; } - tmp.clear(); - TD::ConvertSentence(line, &tmp); - x.f_.clear(); - x.e_.clear(); - size_t pos = 0; - int cc = 0; - while(pos < tmp.size()) { - const WordID cur = tmp[pos++]; - if (cur == kDIV) { - ++cc; - } else if (cc == 0) { - x.f_.push_back(cur); - } else if (cc == 1) { - x.e_.push_back(cur); - } else if (cc == 2) { - table[x].logeq(atof(TD::Convert(cur).c_str())); - ++cc; - } else { - if (flag) cerr << endl; - cerr << "Bad format in " << lc << ": " << line << endl; abort(); - } - } - if (cc != 3) { - if (flag) cerr << endl; - cerr << "Bad format in " << lc << ": " << line << endl; abort(); - } - } - if (flag) cerr << endl; - cerr << " read " << lc << " entries\n"; -} - -prob_t PhraseConditionalUninformativeUnigramBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t p; - p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - //p.logeq(log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) - p *= u(vtrg[i + start_trg]); // draw e_i ~Uniform - return p; -} - -prob_t PhraseConditionalUninformativeBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t p; - //p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - p.logeq(Md::log_poisson(elen, 1)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) - p *= kUNIFORM_TARGET; // draw e_i ~Uniform - return p; -} - -void Model1::LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; -} - -prob_t PhraseConditionalBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(Md::log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - return p; -} - -prob_t PhraseJointBase::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) - // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); - p *= ptrglen; - p *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform - for (int i = 0; i < elen; ++i) { // for each position i in E - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - return p; -} - -prob_t PhraseJointBase_BiDir::p0(const vector& vsrc, - const vector& vtrg, - int start_src, int start_trg) const { - const int flen = vsrc.size() - start_src; - const int elen = vtrg.size() - start_trg; - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1)); - - prob_t p1; - p1.logeq(Md::log_poisson(flen, 1.0)); // flen ~Pois(1) - // elen | flen ~Pois(flen + 0.01) - prob_t ptrglen; ptrglen.logeq(Md::log_poisson(elen, flen + 0.01)); - p1 *= ptrglen; - p1 *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform - for (int i = 0; i < elen; ++i) { // for each position i in E - const WordID trg = vtrg[i + start_trg]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? 0 : vsrc[j + start_src]; - tp += kM1MIXTURE * model1(src, trg); - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p1 *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p1.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - - prob_t p2; - p2.logeq(Md::log_poisson(elen, 1.0)); // elen ~Pois(1) - // flen | elen ~Pois(flen + 0.01) - prob_t psrclen; psrclen.logeq(Md::log_poisson(flen, elen + 0.01)); - p2 *= psrclen; - p2 *= kUNIFORM_TARGET.pow(elen); // each f in F ~Uniform - for (int i = 0; i < flen; ++i) { // for each position i in E - const WordID src = vsrc[i + start_src]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < elen; ++j) { - const WordID trg = j < 0 ? 0 : vtrg[j + start_trg]; - tp += kM1MIXTURE * invmodel1(trg, src); - tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE; - } - tp *= uniform_trg_alignment; // draw a_i ~uniform - p2 *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - if (p2.is_0()) { - cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; - abort(); - } - - static const prob_t kHALF(0.5); - return (p1 + p2) * kHALF; -} - -JumpBase::JumpBase() : p(200) { - for (unsigned src_len = 1; src_len < 200; ++src_len) { - map& cpd = p[src_len]; - int min_jump = 1 - src_len; - int max_jump = src_len; - prob_t z; - for (int j = min_jump; j <= max_jump; ++j) { - prob_t& cp = cpd[j]; - if (j < 0) - cp.logeq(Md::log_poisson(1.5-j, 1)); - else if (j > 0) - cp.logeq(Md::log_poisson(j, 1)); - cp.poweq(0.2); - z += cp; - } - for (int j = min_jump; j <= max_jump; ++j) { - cpd[j] /= z; - } - } -} - diff --git a/gi/pf/base_distributions.h b/gi/pf/base_distributions.h deleted file mode 100644 index 41b513f8..00000000 --- a/gi/pf/base_distributions.h +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef _BASE_MEASURES_H_ -#define _BASE_MEASURES_H_ - -#include -#include -#include -#include -#include -#include - -#include "unigrams.h" -#include "trule.h" -#include "prob.h" -#include "tdict.h" -#include "sampler.h" -#include "m.h" -#include "os_phrase.h" - -struct Model1 { - explicit Model1(const std::string& fname) : - kNULL(TD::Convert("")), - kZERO() { - LoadModel1(fname); - } - - void LoadModel1(const std::string& fname); - - // returns prob 0 if src or trg is not found - const prob_t& operator()(WordID src, WordID trg) const { - if (src == 0) src = kNULL; - if (src < ttable.size()) { - const std::map& cpd = ttable[src]; - const std::map::const_iterator it = cpd.find(trg); - if (it != cpd.end()) - return it->second; - } - return kZERO; - } - - const WordID kNULL; - const prob_t kZERO; - std::vector > ttable; -}; - -struct PoissonUniformUninformativeBase { - explicit PoissonUniformUninformativeBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} - prob_t operator()(const TRule& r) const { - prob_t p; p.logeq(Md::log_poisson(r.e_.size(), 1.0)); - prob_t q = kUNIFORM; q.poweq(r.e_.size()); - p *= q; - return p; - } - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM; -}; - -struct CompletelyUniformBase { - explicit CompletelyUniformBase(const unsigned ves) : kUNIFORM(1.0 / ves) {} - prob_t operator()(const TRule&) const { - return kUNIFORM; - } - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM; -}; - -struct UnigramWordBase { - explicit UnigramWordBase(const std::string& fname) : un(fname) {} - prob_t operator()(const TRule& r) const { - return un(r.e_); - } - const UnigramWordModel un; -}; - -struct RuleHasher { - size_t operator()(const TRule& r) const { - return hash_value(r); - } -}; - -struct TableLookupBase { - TableLookupBase(const std::string& fname); - - prob_t operator()(const TRule& rule) const { - const std::tr1::unordered_map::const_iterator it = table.find(rule); - if (it == table.end()) { - std::cerr << rule << " not found\n"; - abort(); - } - return it->second; - } - - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - void Summary() const {} - - std::tr1::unordered_map table; -}; - -struct PhraseConditionalUninformativeBase { - explicit PhraseConditionalUninformativeBase(const unsigned vocab_e_size) : - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - void Summary() const {} - void ResampleHyperparameters(MT19937*) {} - void Increment(const TRule&) {} - void Decrement(const TRule&) {} - prob_t Likelihood() const { return prob_t::One(); } - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseConditionalUninformativeUnigramBase { - explicit PhraseConditionalUninformativeUnigramBase(const std::string& file, const unsigned vocab_e_size) : u(file, vocab_e_size) {} - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const UnigramModel u; -}; - -struct PhraseConditionalBase { - explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) : - model1(m1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase { - explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) : - model1(m1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_SOURCE(1.0 / vocab_f_size), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ , rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_SOURCE; - const prob_t kUNIFORM_TARGET; -}; - -struct PhraseJointBase_BiDir { - explicit PhraseJointBase_BiDir(const Model1& m1, - const Model1& im1, - const double m1mixture, - const unsigned vocab_e_size, - const unsigned vocab_f_size) : - model1(m1), - invmodel1(im1), - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_SOURCE(1.0 / vocab_f_size), - kUNIFORM_TARGET(1.0 / vocab_e_size) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - } - - // return p0 of rule.e_ , rule.f_ - prob_t operator()(const TRule& rule) const { - return p0(rule.f_, rule.e_, 0, 0); - } - - prob_t p0(const std::vector& vsrc, const std::vector& vtrg, int start_src, int start_trg) const; - - const Model1& model1; - const Model1& invmodel1; - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_SOURCE; - const prob_t kUNIFORM_TARGET; -}; - -// base distribution for jump size multinomials -// basically p(0) = 0 and then, p(1) is max, and then -// you drop as you move to the max jump distance -struct JumpBase { - JumpBase(); - - const prob_t& operator()(int jump, unsigned src_len) const { - assert(jump != 0); - const std::map::const_iterator it = p[src_len].find(jump); - assert(it != p[src_len].end()); - return it->second; - } - std::vector > p; -}; - - -#endif diff --git a/gi/pf/bayes_lattice_score.cc b/gi/pf/bayes_lattice_score.cc deleted file mode 100644 index 70cb8dc2..00000000 --- a/gi/pf/bayes_lattice_score.cc +++ /dev/null @@ -1,309 +0,0 @@ -#include -#include - -#include -#include -#include - -#include "inside_outside.h" -#include "hg.h" -#include "hg_io.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -unsigned ReadCorpus(const string& filename, - vector* e, - set* vocab_e) { - e->clear(); - vocab_e->clear(); - ReadFile rf(filename); - istream* in = rf.stream(); - assert(*in); - string line; - unsigned toks = 0; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(Lattice()); - Lattice& le = e->back(); - LatticeTools::ConvertTextOrPLF(line, & le); - for (unsigned i = 0; i < le.size(); ++i) - for (unsigned j = 0; j < le[i].size(); ++j) - vocab_e->insert(le[i][j].label); - toks += le.size(); - } - return toks; -} - -struct BaseModel { - explicit BaseModel(unsigned tc) : - unif(1.0 / tc), p(prob_t::One()) {} - prob_t prob(const TRule& r) const { - return unif; - } - void increment(const TRule& r, MT19937* rng) { - p *= prob(r); - } - void decrement(const TRule& r, MT19937* rng) { - p /= prob(r); - } - prob_t Likelihood() const { - return p; - } - const prob_t unif; - prob_t p; -}; - -struct UnigramModel { - explicit UnigramModel(unsigned tc) : base(tc), crp(1,1,1,1), glue(1,1,1,1) {} - BaseModel base; - CCRP crp; - CCRP glue; - - prob_t Prob(const TRule& r) const { - if (r.Arity() != 0) { - return glue.prob(r, prob_t(0.5)); - } - return crp.prob(r, base.prob(r)); - } - - int Increment(const TRule& r, MT19937* rng) { - if (r.Arity() != 0) { - glue.increment(r, 0.5, rng); - return 0; - } else { - if (crp.increment(r, base.prob(r), rng)) { - base.increment(r, rng); - return 1; - } - return 0; - } - } - - int Decrement(const TRule& r, MT19937* rng) { - if (r.Arity() != 0) { - glue.decrement(r, rng); - return 0; - } else { - if (crp.decrement(r, rng)) { - base.decrement(r, rng); - return -1; - } - return 0; - } - } - - prob_t Likelihood() const { - prob_t p; - p.logeq(crp.log_crp_prob() + glue.log_crp_prob()); - p *= base.Likelihood(); - return p; - } - - void ResampleHyperparameters(MT19937* rng) { - crp.resample_hyperparameters(rng); - glue.resample_hyperparameters(rng); - cerr << " d=" << crp.discount() << ", s=" << crp.strength() << "\t STOP d=" << glue.discount() << ", s=" << glue.strength() << endl; - } -}; - -UnigramModel* plm; - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector* sampled_deriv) { - vector node_probs; - Inside(hg, &node_probs); - queue q; - q.push(hg.nodes_.size() - 2); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - //prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = edge.edge_prob_; - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - //z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } -// for (unsigned i = 0; i < sampled_deriv->size(); ++i) { -// cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; -// } -} - -void IncrementDerivation(const Hypergraph& hg, const vector& d, UnigramModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector& d, UnigramModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -prob_t TotalProb(const Hypergraph& hg) { - return Inside(hg); -} - -void IncrementLatticePath(const Hypergraph& hg, const vector& d, Lattice* pl) { - Lattice& lat = *pl; - for (int i = 0; i < d.size(); ++i) { - const Hypergraph::Edge& edge = hg.edges_[d[i]]; - if (edge.rule_->Arity() != 0) continue; - WordID sym = edge.rule_->e_[0]; - vector& las = lat[edge.i_]; - int dist = edge.j_ - edge.i_; - assert(dist > 0); - for (int j = 0; j < las.size(); ++j) { - if (las[j].dist2next == dist && - las[j].label == sym) { - las[j].cost += 1; - } - } - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - vector grammars(2); - grammars[0].reset(new GlueGrammar("S","X")); - const unsigned samples = conf["samples"].as(); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector corpuse; - set vocabe; - cerr << "Reading corpus...\n"; - const unsigned toks = ReadCorpus(conf["input"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " lattices\t (" << vocabe.size() << " word types)\n"; - UnigramModel lm(vocabe.size()); - vector hgs(corpuse.size()); - vector > derivs(corpuse.size()); - for (int i = 0; i < corpuse.size(); ++i) { - grammars[1].reset(new PassThroughGrammar(corpuse[i], "X")); - ExhaustiveBottomUpParser parser("S", grammars); - bool res = parser.Parse(corpuse[i], &hgs[i]); // exhaustive parse - assert(res); - } - - double csamples = 0; - for (int SS=0; SS < samples; ++SS) { - const bool is_last = ((samples - 1) == SS); - prob_t dlh = prob_t::One(); - bool record_sample = (SS > (samples * 1 / 3) && (SS % 5 == 3)); - if (record_sample) csamples++; - for (int ci = 0; ci < corpuse.size(); ++ci) { - Lattice& lat = corpuse[ci]; - Hypergraph& hg = hgs[ci]; - vector& d = derivs[ci]; - if (!is_last) DecrementDerivation(hg, d, &lm, &rng); - for (unsigned i = 0; i < hg.edges_.size(); ++i) { - TRule& r = *hg.edges_[i].rule_; - if (r.Arity() != 0) - hg.edges_[i].edge_prob_ = prob_t::One(); - else - hg.edges_[i].edge_prob_ = lm.Prob(r); - } - if (!is_last) { - d.clear(); - SampleDerivation(hg, &rng, &d); - IncrementDerivation(hg, derivs[ci], &lm, &rng); - } else { - prob_t p = TotalProb(hg); - dlh *= p; - cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; - } - if (record_sample) IncrementLatticePath(hg, derivs[ci], &lat); - } - double llh = log(lm.Likelihood()); - cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; - if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); - if (is_last) { - double z = log(dlh); - cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; - } - } - cerr << lm.crp << endl; - cerr << lm.glue << endl; - for (int i = 0; i < corpuse.size(); ++i) { - for (int j = 0; j < corpuse[i].size(); ++j) - for (int k = 0; k < corpuse[i][j].size(); ++k) { - corpuse[i][j][k].cost /= csamples; - corpuse[i][j][k].cost += 1e-3; - corpuse[i][j][k].cost = log(corpuse[i][j][k].cost); - } - cout << HypergraphIO::AsPLF(corpuse[i]) << endl; - } - return 0; -} - diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/brat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { - explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size), - kNULL(TD::Convert("")) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - LoadModel1(model1fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // return logp0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - const int flen = rule.f_.size(); - const int elen = rule.e_.size(); - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = rule.e_[i]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? kNULL : rule.f_[j]; - const map::const_iterator it = ttable[src].find(trg); - if (it != ttable[src].end()) { - tp += kM1MIXTURE * it->second; - } - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - return p; - } - - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; - const WordID kNULL; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(3),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(3),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UniphraseLM { - UniphraseLM(const vector >& corpus, - const set& vocab, - const po::variables_map& conf) : - phrases_(1,1), - gen_(1,1), - corpus_(corpus), - uniform_word_(1.0 / vocab.size()), - gen_p0_(0.5), - p_end_(0.5), - use_poisson_(conf.count("poisson_length") > 0) {} - - void ResampleHyperparameters(MT19937* rng) { - phrases_.resample_hyperparameters(rng); - gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.alpha(); - } - - CCRP_NoTable > phrases_; - CCRP_NoTable gen_; - vector > z_; // z_[i] is there a phrase boundary after the ith word - const vector >& corpus_; - const double uniform_word_; - const double gen_p0_; - const double p_end_; // in base length distribution, p of the end of a phrase - const bool use_poisson_; -}; - -struct Reachability { - boost::multi_array edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; - }; - - struct NState { - NState() : next_src_covered(), next_trg_covered() {} - NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} - int next_src_covered; - int next_trg_covered; - }; - - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - assert(a[srclen][trglen].size() > 0); - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); -// typedef boost::multi_array, 2> narray_type; -// narray_type b(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } -// const NState nstate(i,j); - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; -// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; - assert(max_src_delta[0][0] > 0); - //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; - //for (int i = 0; i < b[0][0].size(); ++i) { - // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; - //} - } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { - explicit FSTState(int src_size) : - trg_covered_(), - src_covered_(), - src_coverage_(src_size) {} - - FSTState(short trg_covered, short src_covered, const vector& src_coverage, const vector& src_prefix) : - trg_covered_(trg_covered), - src_covered_(src_covered), - src_coverage_(src_coverage), - src_prefix_(src_prefix) { - if (src_coverage_.size() == src_covered) { - assert(src_prefix.size() == 0); - } - } - - // if we extend by the word at src_position, what are - // the next states that are reachable and lie on a valid - // path to the final state? - vector Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { - assert(src_position < src_coverage_.size()); - if (src_coverage_[src_position]) { - cerr << "Trying to extend " << *this << " with position " << src_position << endl; - abort(); - } - vector ncvg = src_coverage_; - ncvg[src_position] = true; - - vector res; - const int trg_remaining = trg_len - trg_covered_; - if (trg_remaining <= 0) { - cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; - abort(); - } - const int src_remaining = src_len - src_covered_; - if (src_remaining <= 0) { - cerr << "Source appears to have been covered: " << *this << endl; - abort(); - } - - for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { - if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { - int nc = src_prefix_.size() + 1 + src_covered_; - res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector())); - } - } - - if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { - vector nsp = src_prefix_; - nsp.push_back(src_position); - res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); - } - - if (res.size() == 0) { - cerr << *this << " can't be extended!\n"; - abort(); - } - return res; - } - - short trg_covered_, src_covered_; - vector src_coverage_; - vector src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { - if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; - if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; - if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; - return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { - os << "[" << q.trg_covered_ << " : "; - for (int i = 0; i < q.src_coverage_.size(); ++i) - os << q.src_coverage_[i]; - os << " : <"; - for (int i = 0; i < q.src_prefix_.size(); ++i) { - if (i != 0) os << ' '; - os << q.src_prefix_[i]; - } - return os << ">]"; -} - -struct MyModel { - MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} - typedef unordered_map, CCRP_NoTable, boost::hash > > SrcToRuleCRPMap; - - void DecrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - it->second.decrement(rule); - if (it->second.num_customers() == 0) rules.erase(it); - } - - void IncrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) { - CCRP_NoTable crp(1,1); - it = rules.insert(make_pair(rule.f_, crp)).first; - } - it->second.increment(rule); - } - - // conditioned on rule.f_ - prob_t RuleConditionalProbability(const TRule& rule) const { - const prob_t base = rp0(rule); - SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) { - return base; - } else { - const double lp = it->second.logprob(rule, log(base)); - prob_t q; q.logeq(lp); - return q; - } - } - - const ConditionalBase& rp0; - SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { - MyFST(const vector& ssrc, const vector& strg, MyModel* m) : - src(ssrc), trg(strg), - r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), - model(m) { - FSTState in(src.size()); - cerr << " INIT: " << in << endl; - init = GetNode(in); - for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; - in.src_covered_ = src.size(); - in.trg_covered_ = trg.size(); - cerr << "FINAL: " << in << endl; - final = GetNode(in); - } - virtual const WFSTNode* Final() const; - virtual const WFSTNode* Initial() const; - - const WFSTNode* GetNode(const FSTState& q); - map > m; - const vector& src; - const vector& trg; - Reachability r; - const WFSTNode* init; - const WFSTNode* final; - MyModel* model; -}; - -struct MyNode : public WFSTNode { - MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} - virtual vector > ExtendInput(unsigned srcindex) const; - const FSTState state; - mutable MyFST* container; -}; - -vector > MyNode::ExtendInput(unsigned srcindex) const { - cerr << "EXTEND " << state << " with " << srcindex << endl; - vector ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); - vector > res(ext.size()); - for (unsigned i = 0; i < ext.size(); ++i) { - res[i].first = container->GetNode(ext[i]); - if (ext[i].src_prefix_.size() == 0) { - const unsigned trg_from = state.trg_covered_; - const unsigned trg_to = ext[i].trg_covered_; - const unsigned prev_prfx_size = state.src_prefix_.size(); - res[i].second.reset(new TRule); - res[i].second->lhs_ = -TD::Convert("X"); - vector& src = res[i].second->f_; - vector& trg = res[i].second->e_; - src.resize(prev_prfx_size + 1); - for (unsigned j = 0; j < prev_prfx_size; ++j) - src[j] = container->src[state.src_prefix_[j]]; - src[prev_prfx_size] = container->src[srcindex]; - for (unsigned j = trg_from; j < trg_to; ++j) - trg.push_back(container->trg[j]); - res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); - } - } - return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { - boost::shared_ptr& res = m[q]; - if (!res) { - res.reset(new MyNode(q, this)); - } - return &*res; -} - -const WFSTNode* MyFST::Final() const { - return final; -} - -const WFSTNode* MyFST::Initial() const { - return init; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - ConditionalBase lp0(conf["model1_interpolation_weight"].as(), - vocabe.size(), - conf["model1"].as()); - MyModel m(lp0); - - TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); - m.IncrementRule(x); - TRule y("[X] ||| nY dyN ||| gave ||| 0"); - m.IncrementRule(y); - - - MyFST fst(corpusf[0], corpuse[0], &m); - ifstream in("./kimura.g"); - assert(in); - CFG_WFSTComposer comp(fst); - Hypergraph hg; - bool succeed = comp.Compose(&in, &hg); - hg.PrintGraphviz(); - if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 - ifstream in2("./amnabooks.g"); - assert(in2); - MyFST fst2(corpusf[1], corpuse[1], &m); - CFG_WFSTComposer comp2(fst2); - Hypergraph hg2; - bool succeed2 = comp2.Compose(&in2, &hg2); - if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - - SparseVector w; w.set_value(FD::Convert("Proposal"), 1.0); - hg.Reweight(w); - cerr << ViterbiFTree(hg) << endl; - return 0; -} - diff --git a/gi/pf/cbgi.cc b/gi/pf/cbgi.cc deleted file mode 100644 index 97f1ba34..00000000 --- a/gi/pf/cbgi.cc +++ /dev/null @@ -1,330 +0,0 @@ -#include -#include -#include - -#include -#include - -#include "sampler.h" -#include "filelib.h" -#include "hg_io.h" -#include "hg.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "inside_outside.h" - -using namespace std; -using namespace std::tr1; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -double log_decay(unsigned x, const double& b) { - assert(b > 1.0); - assert(x > 0); - return log(b - 1) - x * log(b); -} - -struct SimpleBase { - SimpleBase(unsigned esize, unsigned fsize, unsigned ntsize = 144) : - uniform_e(-log(esize)), - uniform_f(-log(fsize)), - uniform_nt(-log(ntsize)) { - } - - // binomial coefficient - static double choose(unsigned n, unsigned k) { - return exp(lgamma(n + 1) - lgamma(k + 1) - lgamma(n - k + 1)); - } - - // count the number of patterns of terminals and NTs in the rule, given elen and flen - static double log_number_of_patterns(const unsigned flen, const unsigned elen) { - static vector > counts; - if (elen >= counts.size()) counts.resize(elen + 1); - if (flen >= counts[elen].size()) counts[elen].resize(flen + 1); - double& count = counts[elen][flen]; - if (count) return log(count); - const unsigned max_arity = min(elen, flen); - for (unsigned a = 0; a <= max_arity; ++a) - count += choose(elen, a) * choose(flen, a); - return log(count); - } - - // return logp0 of rule | LHS - double operator()(const TRule& rule) const { - const unsigned flen = rule.f_.size(); - const unsigned elen = rule.e_.size(); -#if 0 - double p = 0; - p += log_poisson(flen, 0.5); // flen ~Pois(0.5) - p += log_poisson(elen, flen); // elen | flen ~Pois(flen) - p -= log_number_of_patterns(flen, elen); // pattern | flen,elen ~Uniform - for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS - if (rule.f_[i] <= 0) // according to pattern - p += uniform_nt; // draw NT ~Uniform - else - p += uniform_f; // draw f terminal ~Uniform - } - p -= lgamma(rule.Arity() + 1); // draw permutation ~Uniform - for (unsigned i = 0; i < elen; ++i) { // for each position in e-RHS - if (rule.e_[i] > 0) // according to pattern - p += uniform_e; // draw e|f term ~Uniform - // TODO this should prob be model 1 - } -#else - double p = 0; - bool is_abstract = rule.f_[0] <= 0; - p += log(0.5); - if (is_abstract) { - if (flen == 2) p += log(0.99); else p += log(0.01); - } else { - p += log_decay(flen, 3); - } - - for (unsigned i = 0; i < flen; ++i) { // for each position in f-RHS - if (rule.f_[i] <= 0) // according to pattern - p += uniform_nt; // draw NT ~Uniform - else - p += uniform_f; // draw f terminal ~Uniform - } -#endif - return p; - } - const double uniform_e; - const double uniform_f; - const double uniform_nt; - vector arities; -}; - -MT19937* rng = NULL; - -template -struct MHSamplerEdgeProb { - MHSamplerEdgeProb(const Hypergraph& hg, - const map >& rdp, - const Base& logp0, - const bool exclude_multiword_terminals) : edge_probs(hg.edges_.size()) { - for (int i = 0; i < edge_probs.size(); ++i) { - const TRule& rule = *hg.edges_[i].rule_; - const map >::const_iterator it = rdp.find(rule.lhs_); - assert(it != rdp.end()); - const CCRP_NoTable& crp = it->second; - edge_probs[i].logeq(crp.logprob(rule, logp0(rule))); - if (exclude_multiword_terminals && rule.f_[0] > 0 && rule.f_.size() > 1) - edge_probs[i] = prob_t::Zero(); - } - } - inline prob_t operator()(const Hypergraph::Edge& e) const { - return edge_probs[e.id_]; - } - prob_t DerivationProb(const vector& d) const { - prob_t p = prob_t::One(); - for (unsigned i = 0; i < d.size(); ++i) - p *= edge_probs[d[i]]; - return p; - } - vector edge_probs; -}; - -template -struct ModelAndData { - ModelAndData() : - base_lh(prob_t::One()), - logp0(10000, 10000), - mh_samples(), - mh_rejects() {} - - void SampleCorpus(const string& hgpath, int i); - void ResampleHyperparameters() { - for (map >::iterator it = rules.begin(); it != rules.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - CCRP_NoTable& RuleCRP(int lhs) { - map >::iterator it = rules.find(lhs); - if (it == rules.end()) { - rules.insert(make_pair(lhs, CCRP_NoTable(1,1))); - it = rules.find(lhs); - } - return it->second; - } - - void IncrementRule(const TRule& rule) { - CCRP_NoTable& crp = RuleCRP(rule.lhs_); - if (crp.increment(rule)) { - prob_t p; p.logeq(logp0(rule)); - base_lh *= p; - } - } - - void DecrementRule(const TRule& rule) { - CCRP_NoTable& crp = RuleCRP(rule.lhs_); - if (crp.decrement(rule)) { - prob_t p; p.logeq(logp0(rule)); - base_lh /= p; - } - } - - void DecrementDerivation(const Hypergraph& hg, const vector& d) { - for (unsigned i = 0; i < d.size(); ++i) { - const TRule& rule = *hg.edges_[d[i]].rule_; - DecrementRule(rule); - } - } - - void IncrementDerivation(const Hypergraph& hg, const vector& d) { - for (unsigned i = 0; i < d.size(); ++i) { - const TRule& rule = *hg.edges_[d[i]].rule_; - IncrementRule(rule); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (map >::const_iterator it = rules.begin(); it != rules.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - p *= base_lh; - return p; - } - - void ResampleDerivation(const Hypergraph& hg, vector* sampled_derivation); - - map > rules; // [lhs] -> distribution over RHSs - prob_t base_lh; - SimpleBase logp0; - vector > samples; // sampled derivations - unsigned int mh_samples; - unsigned int mh_rejects; -}; - -template -void ModelAndData::SampleCorpus(const string& hgpath, int n) { - vector hgs(n); hgs.clear(); - boost::unordered_map acc; - map tot; - for (int i = 0; i < n; ++i) { - ostringstream os; - os << hgpath << '/' << i << ".json.gz"; - if (!FileExists(os.str())) continue; - hgs.push_back(Hypergraph()); - ReadFile rf(os.str()); - HypergraphIO::ReadFromJSON(rf.stream(), &hgs.back()); - } - cerr << "Read " << hgs.size() << " alignment hypergraphs.\n"; - samples.resize(hgs.size()); - const unsigned SAMPLES = 2000; - const unsigned burnin = 3 * SAMPLES / 4; - const unsigned every = 20; - for (unsigned s = 0; s < SAMPLES; ++s) { - if (s % 10 == 0) { - if (s > 0) { cerr << endl; ResampleHyperparameters(); } - cerr << "[" << s << " LLH=" << log(Likelihood()) << " REJECTS=" << ((double)mh_rejects / mh_samples) << " LHS's=" << rules.size() << " base=" << log(base_lh) << "] "; - } - cerr << '.'; - for (unsigned i = 0; i < hgs.size(); ++i) { - ResampleDerivation(hgs[i], &samples[i]); - if (s > burnin && s % every == 0) { - for (unsigned j = 0; j < samples[i].size(); ++j) { - const TRule& rule = *hgs[i].edges_[samples[i][j]].rule_; - ++acc[rule]; - ++tot[rule.lhs_]; - } - } - } - } - cerr << endl; - for (boost::unordered_map::iterator it = acc.begin(); it != acc.end(); ++it) { - cout << it->first << " MyProb=" << log(it->second)-log(tot[it->first.lhs_]) << endl; - } -} - -template -void ModelAndData::ResampleDerivation(const Hypergraph& hg, vector* sampled_deriv) { - vector cur; - cur.swap(*sampled_deriv); - - const prob_t p_cur = Likelihood(); - DecrementDerivation(hg, cur); - if (cur.empty()) { - // first iteration, create restaurants - for (int i = 0; i < hg.edges_.size(); ++i) - RuleCRP(hg.edges_[i].rule_->lhs_); - } - MHSamplerEdgeProb wf(hg, rules, logp0, cur.empty()); -// MHSamplerEdgeProb wf(hg, rules, logp0, false); - const prob_t q_cur = wf.DerivationProb(cur); - vector node_probs; - Inside >(hg, &node_probs, wf); - queue q; - q.push(hg.nodes_.size() - 3); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = wf.edge_probs[edge.id_]; // edge proposal prob - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } - IncrementDerivation(hg, *sampled_deriv); - -// cerr << "sampled derivation contains " << sampled_deriv->size() << " edges\n"; -// cerr << "DERIV:\n"; -// for (int i = 0; i < sampled_deriv->size(); ++i) { -// cerr << " " << hg.edges_[(*sampled_deriv)[i]].rule_->AsString() << endl; -// } - - if (cur.empty()) return; // accept first sample - - ++mh_samples; - // only need to do MH if proposal is different to current state - if (cur != *sampled_deriv) { - const prob_t q_prop = wf.DerivationProb(*sampled_deriv); - const prob_t p_prop = Likelihood(); - if (!rng->AcceptMetropolisHastings(p_prop, p_cur, q_prop, q_cur)) { - ++mh_rejects; - DecrementDerivation(hg, *sampled_deriv); - IncrementDerivation(hg, cur); - swap(cur, *sampled_deriv); - } - } -} - -int main(int argc, char** argv) { - rng = new MT19937; - ModelAndData m; - m.SampleCorpus("./hgs", 50); - // m.SampleCorpus("./btec/hgs", 5000); - return 0; -} - diff --git a/gi/pf/cfg_wfst_composer.cc b/gi/pf/cfg_wfst_composer.cc deleted file mode 100644 index 21d5ec5b..00000000 --- a/gi/pf/cfg_wfst_composer.cc +++ /dev/null @@ -1,731 +0,0 @@ -#include "cfg_wfst_composer.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include "fast_lexical_cast.hpp" - -#include "phrasetable_fst.h" -#include "sparse_vector.h" -#include "tdict.h" -#include "hg.h" -#include "hg_remove_eps.h" - -namespace po = boost::program_options; -using namespace std; -using namespace std::tr1; - -WFSTNode::~WFSTNode() {} -WFST::~WFST() {} - -// Define the following macro if you want to see lots of debugging output -// when you run the chart parser -#undef DEBUG_CHART_PARSER - -// A few constants used by the chart parser /////////////// -static const int kMAX_NODES = 2000000; -static const string kPHRASE_STRING = "X"; -static bool constants_need_init = true; -static WordID kUNIQUE_START; -static WordID kPHRASE; -static TRulePtr kX1X2; -static TRulePtr kX1; -static WordID kEPS; -static TRulePtr kEPSRule; - -static void InitializeConstants() { - if (constants_need_init) { - kPHRASE = TD::Convert(kPHRASE_STRING) * -1; - kUNIQUE_START = TD::Convert("S") * -1; - kX1X2.reset(new TRule("[X] ||| [X,1] [X,2] ||| [X,1] [X,2]")); - kX1.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); - kEPSRule.reset(new TRule("[X] ||| ||| ")); - kEPS = TD::Convert(""); - constants_need_init = false; - } -} -//////////////////////////////////////////////////////////// - -class EGrammarNode { - friend bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest); - friend void AddGrammarRule(const string& r, map* g); - public: -#ifdef DEBUG_CHART_PARSER - string hint; -#endif - EGrammarNode() : is_some_rule_complete(false), is_root(false) {} - const map& GetTerminals() const { return tptr; } - const map& GetNonTerminals() const { return ntptr; } - bool HasNonTerminals() const { return (!ntptr.empty()); } - bool HasTerminals() const { return (!tptr.empty()); } - bool RuleCompletes() const { - return (is_some_rule_complete || (ntptr.empty() && tptr.empty())); - } - bool GrammarContinues() const { - return !(ntptr.empty() && tptr.empty()); - } - bool IsRoot() const { - return is_root; - } - // these are the features associated with the rule from the start - // node up to this point. If you use these features, you must - // not Extend() this rule. - const SparseVector& GetCFGProductionFeatures() const { - return input_features; - } - - const EGrammarNode* Extend(const WordID& t) const { - if (t < 0) { - map::const_iterator it = ntptr.find(t); - if (it == ntptr.end()) return NULL; - return &it->second; - } else { - map::const_iterator it = tptr.find(t); - if (it == tptr.end()) return NULL; - return &it->second; - } - } - - private: - map tptr; - map ntptr; - SparseVector input_features; - bool is_some_rule_complete; - bool is_root; -}; -typedef map EGrammar; // indexed by the rule LHS - -// edges are immutable once created -struct Edge { -#ifdef DEBUG_CHART_PARSER - static int id_count; - const int id; -#endif - const WordID cat; // lhs side of rule proved/being proved - const EGrammarNode* const dot; // dot position - const WFSTNode* const q; // start of span - const WFSTNode* const r; // end of span - const Edge* const active_parent; // back pointer, NULL for PREDICT items - const Edge* const passive_parent; // back pointer, NULL for SCAN and PREDICT items - TRulePtr tps; // translations - boost::shared_ptr > features; // features from CFG rule - - bool IsPassive() const { - // when a rule is completed, this value will be set - return static_cast(features); - } - bool IsActive() const { return !IsPassive(); } - bool IsInitial() const { - return !(active_parent || passive_parent); - } - bool IsCreatedByScan() const { - return active_parent && !passive_parent && !dot->IsRoot(); - } - bool IsCreatedByPredict() const { - return dot->IsRoot(); - } - bool IsCreatedByComplete() const { - return active_parent && passive_parent; - } - - // constructor for PREDICT - Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(NULL), passive_parent(NULL), tps() {} - Edge(WordID c, const EGrammarNode* d, const WFSTNode* q_and_r, const Edge* act_parent) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(q_and_r), r(q_and_r), active_parent(act_parent), passive_parent(NULL), tps() {} - - // constructors for SCAN - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const TRulePtr& translations) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations) {} - - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const TRulePtr& translations, - const SparseVector& feats) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(NULL), tps(translations), - features(new SparseVector(feats)) {} - - // constructors for COMPLETE - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const Edge *pas_par) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps() { - assert(pas_par->IsPassive()); - assert(act_par->IsActive()); - } - - Edge(WordID c, const EGrammarNode* d, const WFSTNode* i, const WFSTNode* j, - const Edge* act_par, const Edge *pas_par, const SparseVector& feats) : -#ifdef DEBUG_CHART_PARSER - id(++id_count), -#endif - cat(c), dot(d), q(i), r(j), active_parent(act_par), passive_parent(pas_par), tps(), - features(new SparseVector(feats)) { - assert(pas_par->IsPassive()); - assert(act_par->IsActive()); - } - - // constructor for COMPLETE query - Edge(const WFSTNode* _r) : -#ifdef DEBUG_CHART_PARSER - id(0), -#endif - cat(0), dot(NULL), q(NULL), - r(_r), active_parent(NULL), passive_parent(NULL), tps() {} - // constructor for MERGE quere - Edge(const WFSTNode* _q, int) : -#ifdef DEBUG_CHART_PARSER - id(0), -#endif - cat(0), dot(NULL), q(_q), - r(NULL), active_parent(NULL), passive_parent(NULL), tps() {} -}; -#ifdef DEBUG_CHART_PARSER -int Edge::id_count = 0; -#endif - -ostream& operator<<(ostream& os, const Edge& e) { - string type = "PREDICT"; - if (e.IsCreatedByScan()) - type = "SCAN"; - else if (e.IsCreatedByComplete()) - type = "COMPLETE"; - os << "[" -#ifdef DEBUG_CHART_PARSER - << '(' << e.id << ") " -#else - << '(' << &e << ") " -#endif - << "q=" << e.q << ", r=" << e.r - << ", cat="<< TD::Convert(e.cat*-1) << ", dot=" - << e.dot -#ifdef DEBUG_CHART_PARSER - << e.dot->hint -#endif - << (e.IsActive() ? ", Active" : ", Passive") - << ", " << type; -#ifdef DEBUG_CHART_PARSER - if (e.active_parent) { os << ", act.parent=(" << e.active_parent->id << ')'; } - if (e.passive_parent) { os << ", psv.parent=(" << e.passive_parent->id << ')'; } -#endif - if (e.tps) { os << ", tps=" << e.tps->AsString(); } - return os << ']'; -} - -struct Traversal { - const Edge* const edge; // result from the active / passive combination - const Edge* const active; - const Edge* const passive; - Traversal(const Edge* me, const Edge* a, const Edge* p) : edge(me), active(a), passive(p) {} -}; - -struct UniqueTraversalHash { - size_t operator()(const Traversal* t) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(t->active); - x = ((x << 5) + x) ^ reinterpret_cast(t->passive); - x = ((x << 5) + x) ^ t->edge->IsActive(); - return x; - } -}; - -struct UniqueTraversalEquals { - size_t operator()(const Traversal* a, const Traversal* b) const { - return (a->passive == b->passive && a->active == b->active && a->edge->IsActive() == b->edge->IsActive()); - } -}; - -struct UniqueEdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - if (e->IsActive()) { - x = ((x << 5) + x) ^ reinterpret_cast(e->dot); - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - x = ((x << 5) + x) ^ static_cast(e->cat); - x += 13; - } else { // with passive edges, we don't care about the dot - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - x = ((x << 5) + x) ^ static_cast(e->cat); - } - return x; - } -}; - -struct UniqueEdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - if (a->IsActive() != b->IsActive()) return false; - if (a->IsActive()) { - return (a->cat == b->cat) && (a->dot == b->dot) && (a->q == b->q) && (a->r == b->r); - } else { - return (a->cat == b->cat) && (a->q == b->q) && (a->r == b->r); - } - } -}; - -struct REdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(e->r); - return x; - } -}; - -struct REdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - return (a->r == b->r); - } -}; - -struct QEdgeHash { - size_t operator()(const Edge* e) const { - size_t x = 5381; - x = ((x << 5) + x) ^ reinterpret_cast(e->q); - return x; - } -}; - -struct QEdgeEquals { - bool operator()(const Edge* a, const Edge* b) const { - return (a->q == b->q); - } -}; - -struct EdgeQueue { - queue q; - EdgeQueue() {} - void clear() { while(!q.empty()) q.pop(); } - bool HasWork() const { return !q.empty(); } - const Edge* Next() { const Edge* res = q.front(); q.pop(); return res; } - void AddEdge(const Edge* s) { q.push(s); } -}; - -class CFG_WFSTComposerImpl { - public: - CFG_WFSTComposerImpl(WordID start_cat, - const WFSTNode* q_0, - const WFSTNode* q_final) : start_cat_(start_cat), q_0_(q_0), q_final_(q_final) {} - - // returns false if the intersection is empty - bool Compose(const EGrammar& g, Hypergraph* forest) { - goal_node = NULL; - EGrammar::const_iterator sit = g.find(start_cat_); - forest->ReserveNodes(kMAX_NODES); - assert(sit != g.end()); - Edge* init = new Edge(start_cat_, &sit->second, q_0_); - assert(IncorporateNewEdge(init)); - while (exp_agenda.HasWork() || agenda.HasWork()) { - while(exp_agenda.HasWork()) { - const Edge* edge = exp_agenda.Next(); - FinishEdge(edge, forest); - } - if (agenda.HasWork()) { - const Edge* edge = agenda.Next(); -#ifdef DEBUG_CHART_PARSER - cerr << "processing (" << edge->id << ')' << endl; -#endif - if (edge->IsActive()) { - if (edge->dot->HasTerminals()) - DoScan(edge); - if (edge->dot->HasNonTerminals()) { - DoMergeWithPassives(edge); - DoPredict(edge, g); - } - } else { - DoComplete(edge); - } - } - } - if (goal_node) { - forest->PruneUnreachable(goal_node->id_); - RemoveEpsilons(forest, kEPS); - } - FreeAll(); - return goal_node; - } - - void FreeAll() { - for (int i = 0; i < free_list_.size(); ++i) - delete free_list_[i]; - free_list_.clear(); - for (int i = 0; i < traversal_free_list_.size(); ++i) - delete traversal_free_list_[i]; - traversal_free_list_.clear(); - all_traversals.clear(); - exp_agenda.clear(); - agenda.clear(); - tps2node.clear(); - edge2node.clear(); - all_edges.clear(); - passive_edges.clear(); - active_edges.clear(); - } - - ~CFG_WFSTComposerImpl() { - FreeAll(); - } - - // returns the total number of edges created during composition - int EdgesCreated() const { - return free_list_.size(); - } - - private: - void DoScan(const Edge* edge) { - // here, we assume that the FST will potentially have many more outgoing - // edges than the grammar, which will be just a couple. If you want to - // efficiently handle the case where both are relatively large, this code - // will need to change how the intersection is done. The best general - // solution would probably be the Baeza-Yates double binary search. - - const EGrammarNode* dot = edge->dot; - const WFSTNode* r = edge->r; - const map& terms = dot->GetTerminals(); - for (map::const_iterator git = terms.begin(); - git != terms.end(); ++git) { - - if (!(TD::Convert(git->first)[0] >= '0' && TD::Convert(git->first)[0] <= '9')) { - std::cerr << "TERMINAL SYMBOL: " << TD::Convert(git->first) << endl; - abort(); - } - std::vector > extensions = r->ExtendInput(atoi(TD::Convert(git->first).c_str())); - for (unsigned nsi = 0; nsi < extensions.size(); ++nsi) { - const WFSTNode* next_r = extensions[nsi].first; - const EGrammarNode* next_dot = &git->second; - const bool grammar_continues = next_dot->GrammarContinues(); - const bool rule_completes = next_dot->RuleCompletes(); - if (extensions[nsi].second) - cerr << "!!! " << extensions[nsi].second->AsString() << endl; - // cerr << " rule completes: " << rule_completes << " after consuming " << TD::Convert(git->first) << endl; - assert(grammar_continues || rule_completes); - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - if (rule_completes) - IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second, input_features)); - if (grammar_continues) - IncorporateNewEdge(new Edge(edge->cat, next_dot, edge->q, next_r, edge, extensions[nsi].second)); - } - } - } - - void DoPredict(const Edge* edge, const EGrammar& g) { - const EGrammarNode* dot = edge->dot; - const map& non_terms = dot->GetNonTerminals(); - for (map::const_iterator git = non_terms.begin(); - git != non_terms.end(); ++git) { - const WordID nt_to_predict = git->first; - //cerr << edge->id << " -- " << TD::Convert(nt_to_predict*-1) << endl; - EGrammar::const_iterator egi = g.find(nt_to_predict); - if (egi == g.end()) { - cerr << "[ERROR] Can't find any grammar rules with a LHS of type " - << TD::Convert(-1*nt_to_predict) << '!' << endl; - continue; - } - assert(edge->IsActive()); - const EGrammarNode* new_dot = &egi->second; - Edge* new_edge = new Edge(nt_to_predict, new_dot, edge->r, edge); - IncorporateNewEdge(new_edge); - } - } - - void DoComplete(const Edge* passive) { -#ifdef DEBUG_CHART_PARSER - cerr << " complete: " << *passive << endl; -#endif - const WordID completed_nt = passive->cat; - const WFSTNode* q = passive->q; - const WFSTNode* next_r = passive->r; - const Edge query(q); - const pair::iterator, - unordered_multiset::iterator > p = - active_edges.equal_range(&query); - for (unordered_multiset::iterator it = p.first; - it != p.second; ++it) { - const Edge* active = *it; -#ifdef DEBUG_CHART_PARSER - cerr << " pos: " << *active << endl; -#endif - const EGrammarNode* next_dot = active->dot->Extend(completed_nt); - if (!next_dot) continue; - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - // add up to 2 rules - if (next_dot->RuleCompletes()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); - if (next_dot->GrammarContinues()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); - } - } - - void DoMergeWithPassives(const Edge* active) { - // edge is active, has non-terminals, we need to find the passives that can extend it - assert(active->IsActive()); - assert(active->dot->HasNonTerminals()); -#ifdef DEBUG_CHART_PARSER - cerr << " merge active with passives: ACT=" << *active << endl; -#endif - const Edge query(active->r, 1); - const pair::iterator, - unordered_multiset::iterator > p = - passive_edges.equal_range(&query); - for (unordered_multiset::iterator it = p.first; - it != p.second; ++it) { - const Edge* passive = *it; - const EGrammarNode* next_dot = active->dot->Extend(passive->cat); - if (!next_dot) continue; - const WFSTNode* next_r = passive->r; - const SparseVector& input_features = next_dot->GetCFGProductionFeatures(); - if (next_dot->RuleCompletes()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive, input_features)); - if (next_dot->GrammarContinues()) - IncorporateNewEdge(new Edge(active->cat, next_dot, active->q, next_r, active, passive)); - } - } - - // take ownership of edge memory, add to various indexes, etc - // returns true if this edge is new - bool IncorporateNewEdge(Edge* edge) { - free_list_.push_back(edge); - if (edge->passive_parent && edge->active_parent) { - Traversal* t = new Traversal(edge, edge->active_parent, edge->passive_parent); - traversal_free_list_.push_back(t); - if (all_traversals.find(t) != all_traversals.end()) { - return false; - } else { - all_traversals.insert(t); - } - } - exp_agenda.AddEdge(edge); - return true; - } - - bool FinishEdge(const Edge* edge, Hypergraph* hg) { - bool is_new = false; - if (all_edges.find(edge) == all_edges.end()) { -#ifdef DEBUG_CHART_PARSER - cerr << *edge << " is NEW\n"; -#endif - all_edges.insert(edge); - is_new = true; - if (edge->IsPassive()) passive_edges.insert(edge); - if (edge->IsActive()) active_edges.insert(edge); - agenda.AddEdge(edge); - } else { -#ifdef DEBUG_CHART_PARSER - cerr << *edge << " is NOT NEW.\n"; -#endif - } - AddEdgeToTranslationForest(edge, hg); - return is_new; - } - - // build the translation forest - void AddEdgeToTranslationForest(const Edge* edge, Hypergraph* hg) { - assert(hg->nodes_.size() < kMAX_NODES); - Hypergraph::Node* tps = NULL; - // first add any target language rules - if (edge->tps) { - Hypergraph::Node*& node = tps2node[(size_t)edge->tps.get()]; - if (!node) { - // cerr << "Creating phrases for " << edge->tps << endl; - const TRulePtr& rule = edge->tps; - node = hg->AddNode(kPHRASE); - Hypergraph::Edge* hg_edge = hg->AddEdge(rule, Hypergraph::TailNodeVector()); - hg_edge->feature_values_ += rule->GetFeatureValues(); - hg->ConnectEdgeToHeadNode(hg_edge, node); - } - tps = node; - } - Hypergraph::Node*& head_node = edge2node[edge]; - if (!head_node) - head_node = hg->AddNode(kPHRASE); - if (edge->cat == start_cat_ && edge->q == q_0_ && edge->r == q_final_ && edge->IsPassive()) { - assert(goal_node == NULL || goal_node == head_node); - goal_node = head_node; - } - Hypergraph::TailNodeVector tail; - SparseVector extra; - if (edge->IsCreatedByPredict()) { - // extra.set_value(FD::Convert("predict"), 1); - } else if (edge->IsCreatedByScan()) { - tail.push_back(edge2node[edge->active_parent]->id_); - if (tps) { - tail.push_back(tps->id_); - } - //extra.set_value(FD::Convert("scan"), 1); - } else if (edge->IsCreatedByComplete()) { - tail.push_back(edge2node[edge->active_parent]->id_); - tail.push_back(edge2node[edge->passive_parent]->id_); - //extra.set_value(FD::Convert("complete"), 1); - } else { - assert(!"unexpected edge type!"); - } - //cerr << head_node->id_ << "<--" << *edge << endl; - -#ifdef DEBUG_CHART_PARSER - for (int i = 0; i < tail.size(); ++i) - if (tail[i] == head_node->id_) { - cerr << "ERROR: " << *edge << "\n i=" << i << endl; - if (i == 1) { cerr << "\tP: " << *edge->passive_parent << endl; } - if (i == 0) { cerr << "\tA: " << *edge->active_parent << endl; } - assert(!"self-loop found!"); - } -#endif - Hypergraph::Edge* hg_edge = NULL; - if (tail.size() == 0) { - hg_edge = hg->AddEdge(kEPSRule, tail); - } else if (tail.size() == 1) { - hg_edge = hg->AddEdge(kX1, tail); - } else if (tail.size() == 2) { - hg_edge = hg->AddEdge(kX1X2, tail); - } - if (edge->features) - hg_edge->feature_values_ += *edge->features; - hg_edge->feature_values_ += extra; - hg->ConnectEdgeToHeadNode(hg_edge, head_node); - } - - Hypergraph::Node* goal_node; - EdgeQueue exp_agenda; - EdgeQueue agenda; - unordered_map tps2node; - unordered_map edge2node; - unordered_set all_traversals; - unordered_set all_edges; - unordered_multiset passive_edges; - unordered_multiset active_edges; - vector free_list_; - vector traversal_free_list_; - const WordID start_cat_; - const WFSTNode* const q_0_; - const WFSTNode* const q_final_; -}; - -#ifdef DEBUG_CHART_PARSER -static string TrimRule(const string& r) { - size_t start = r.find(" |||") + 5; - size_t end = r.rfind(" |||"); - return r.substr(start, end - start); -} -#endif - -void AddGrammarRule(const string& r, EGrammar* g) { - const size_t pos = r.find(" ||| "); - if (pos == string::npos || r[0] != '[') { - cerr << "Bad rule: " << r << endl; - return; - } - const size_t rpos = r.rfind(" ||| "); - string feats; - string rs = r; - if (rpos != pos) { - feats = r.substr(rpos + 5); - rs = r.substr(0, rpos); - } - string rhs = rs.substr(pos + 5); - string trule = rs + " ||| " + rhs + " ||| " + feats; - TRule tr(trule); - cerr << "X: " << tr.e_[0] << endl; -#ifdef DEBUG_CHART_PARSER - string hint_last_rule; -#endif - EGrammarNode* cur = &(*g)[tr.GetLHS()]; - cur->is_root = true; - for (int i = 0; i < tr.FLength(); ++i) { - WordID sym = tr.f()[i]; -#ifdef DEBUG_CHART_PARSER - hint_last_rule = TD::Convert(sym < 0 ? -sym : sym); - cur->hint += " <@@> (*" + hint_last_rule + ") " + TrimRule(tr.AsString()); -#endif - if (sym < 0) - cur = &cur->ntptr[sym]; - else - cur = &cur->tptr[sym]; - } -#ifdef DEBUG_CHART_PARSER - cur->hint += " <@@> (" + hint_last_rule + "*) " + TrimRule(tr.AsString()); -#endif - cur->is_some_rule_complete = true; - cur->input_features = tr.GetFeatureValues(); -} - -CFG_WFSTComposer::~CFG_WFSTComposer() { - delete pimpl_; -} - -CFG_WFSTComposer::CFG_WFSTComposer(const WFST& wfst) { - InitializeConstants(); - pimpl_ = new CFG_WFSTComposerImpl(kUNIQUE_START, wfst.Initial(), wfst.Final()); -} - -bool CFG_WFSTComposer::Compose(const Hypergraph& src_forest, Hypergraph* trg_forest) { - // first, convert the src forest into an EGrammar - EGrammar g; - const int nedges = src_forest.edges_.size(); - const int nnodes = src_forest.nodes_.size(); - vector cats(nnodes); - bool assign_cats = false; - for (int i = 0; i < nnodes; ++i) - if (assign_cats) { - cats[i] = TD::Convert("CAT_" + boost::lexical_cast(i)) * -1; - } else { - cats[i] = src_forest.nodes_[i].cat_; - } - // construct the grammar - for (int i = 0; i < nedges; ++i) { - const Hypergraph::Edge& edge = src_forest.edges_[i]; - const vector& src = edge.rule_->f(); - EGrammarNode* cur = &g[cats[edge.head_node_]]; - cur->is_root = true; - int ntc = 0; - for (int j = 0; j < src.size(); ++j) { - WordID sym = src[j]; - if (sym <= 0) { - sym = cats[edge.tail_nodes_[ntc]]; - ++ntc; - cur = &cur->ntptr[sym]; - } else { - cur = &cur->tptr[sym]; - } - } - cur->is_some_rule_complete = true; - cur->input_features = edge.feature_values_; - } - EGrammarNode& goal_rule = g[kUNIQUE_START]; - assert((goal_rule.ntptr.size() == 1 && goal_rule.tptr.size() == 0) || - (goal_rule.ntptr.size() == 0 && goal_rule.tptr.size() == 1)); - - return pimpl_->Compose(g, trg_forest); -} - -bool CFG_WFSTComposer::Compose(istream* in, Hypergraph* trg_forest) { - EGrammar g; - while(*in) { - string line; - getline(*in, line); - if (line.empty()) continue; - AddGrammarRule(line, &g); - } - - return pimpl_->Compose(g, trg_forest); -} diff --git a/gi/pf/cfg_wfst_composer.h b/gi/pf/cfg_wfst_composer.h deleted file mode 100644 index cf47f459..00000000 --- a/gi/pf/cfg_wfst_composer.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef _CFG_WFST_COMPOSER_H_ -#define _CFG_WFST_COMPOSER_H_ - -#include -#include -#include - -#include "trule.h" -#include "wordid.h" - -class CFG_WFSTComposerImpl; -class Hypergraph; - -struct WFSTNode { - virtual ~WFSTNode(); - // returns the next states reachable by consuming srcindex (which identifies a word) - // paired with the output string generated by taking that transition. - virtual std::vector > ExtendInput(unsigned srcindex) const = 0; -}; - -struct WFST { - virtual ~WFST(); - virtual const WFSTNode* Final() const = 0; - virtual const WFSTNode* Initial() const = 0; -}; - -class CFG_WFSTComposer { - public: - ~CFG_WFSTComposer(); - explicit CFG_WFSTComposer(const WFST& wfst); - bool Compose(const Hypergraph& in_forest, Hypergraph* trg_forest); - - // reads the grammar from a file. There must be a single top-level - // S -> X rule. Anything else is possible. Format is: - // [S] ||| [SS,1] - // [SS] ||| [NP,1] [VP,2] ||| Feature1=0.2 Feature2=-2.3 - // [SS] ||| [VP,1] [NP,2] ||| Feature1=0.8 - // [NP] ||| [DET,1] [N,2] ||| Feature3=2 - // ... - bool Compose(std::istream* grammar_file, Hypergraph* trg_forest); - - private: - CFG_WFSTComposerImpl* pimpl_; -}; - -#endif diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h deleted file mode 100644 index 81ddb206..00000000 --- a/gi/pf/conditional_pseg.h +++ /dev/null @@ -1,275 +0,0 @@ -#ifndef _CONDITIONAL_PSEG_H_ -#define _CONDITIONAL_PSEG_H_ - -#include -#include -#include -#include - -#include "m.h" -#include "prob.h" -#include "ccrp_nt.h" -#include "mfcr.h" -#include "trule.h" -#include "base_distributions.h" -#include "tdict.h" - -template -struct MConditionalTranslationModel { - explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {} - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl; - for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl; - } - } - - double log_likelihood(const double& dd, const double& aa) const { - if (aa <= -dd) return -std::numeric_limits::infinity(); - //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); - double llh = Md::log_beta_density(dd, 1, 1) + - Md::log_gamma_density(dd + aa, 1, 1); - typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::const_iterator it; - for (it = r.begin(); it != r.end(); ++it) - llh += it->second.log_crp_prob(dd, aa); - return llh; - } - - struct DiscountResampler { - DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {} - const MConditionalTranslationModel& m_; - double operator()(const double& proposed_discount) const { - return m_.log_likelihood(proposed_discount, m_.strength); - } - }; - - struct AlphaResampler { - AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {} - const MConditionalTranslationModel& m_; - double operator()(const double& proposed_strength) const { - return m_.log_likelihood(m_.d, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng) { - typename std::tr1::unordered_map, MFCR<1,TRule>, boost::hash > >::iterator it; -#if 1 - for (it = r.begin(); it != r.end(); ++it) { - it->second.resample_hyperparameters(rng); - } -#else - const unsigned nloop = 5; - const unsigned niterations = 10; - DiscountResampler dr(*this); - AlphaResampler ar(*this); - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - d = slice_sampler1d(dr, d, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - strength = slice_sampler1d(ar, strength, *rng, -d, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl; - for (it = r.begin(); it != r.end(); ++it) { - it->second.set_discount(d); - it->second.set_strength(strength); - } -#endif - } - - int DecrementRule(const TRule& rule, MT19937* rng) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - const TableCount delta = it->second.decrement(rule, rng); - if (delta.count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return delta.count; - } - - int IncrementRule(const TRule& rule, MT19937* rng) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - //it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first; - it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1,1,1,1,0.6, -0.12))).first; - } - p0s[0] = rp0(rule); - TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); - return delta.count; - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p = rp0(rule); - } else { - p0s[0] = rp0(rule); - p = it->second.prob(rule, p0s.begin(), lambdas.begin()); - } - return p; - } - - prob_t Likelihood() const { - prob_t p; p.logeq(log_likelihood(d, strength)); - return p; - } - - const ConditionalBaseMeasure& rp0; - typedef std::tr1::unordered_map, - MFCR<1, TRule>, - boost::hash > > RuleModelHash; - RuleModelHash r; - double d, strength; - std::vector lambdas; - mutable std::vector p0s; -}; - -template -struct ConditionalTranslationModel { - explicit ConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : - rp0(rcp0) {} - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second << '\t' << i2->first << std::endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) - it->second.resample_hyperparameters(rng); - } - - int DecrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - int count = it->second.decrement(rule); - if (count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return count; - } - - int IncrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - it = r.insert(make_pair(rule.f_, CCRP_NoTable(1.0, 1.0, 8.0))).first; - } - int count = it->second.increment(rule); - return count; - } - - void IncrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p.logeq(log(rp0(rule))); - } else { - p.logeq(it->second.logprob(rule, log(rp0(rule)))); - } - return p; - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - p *= rp0(i2->first); - } - return p; - } - - const ConditionalBaseMeasure& rp0; - typedef std::tr1::unordered_map, - CCRP_NoTable, - boost::hash > > RuleModelHash; - RuleModelHash r; -}; - -template -struct ConditionalParallelSegementationModel { - explicit ConditionalParallelSegementationModel(ConditionalBaseMeasure& rcp0) : - tmodel(rcp0), base(prob_t::One()), aligns(1,1) {} - - ConditionalTranslationModel tmodel; - - void DecrementRule(const TRule& rule) { - tmodel.DecrementRule(rule); - } - - void IncrementRule(const TRule& rule) { - tmodel.IncrementRule(rule); - } - - void IncrementRulesAndAlignments(const std::vector& rules) { - tmodel.IncrementRules(rules); - for (int i = 0; i < rules.size(); ++i) { - IncrementAlign(rules[i]->f_.size()); - } - } - - void DecrementRulesAndAlignments(const std::vector& rules) { - tmodel.DecrementRules(rules); - for (int i = 0; i < rules.size(); ++i) { - DecrementAlign(rules[i]->f_.size()); - } - } - - prob_t RuleProbability(const TRule& rule) const { - return tmodel.RuleProbability(rule); - } - - void IncrementAlign(unsigned span) { - if (aligns.increment(span)) { - // TODO - } - } - - void DecrementAlign(unsigned span) { - if (aligns.decrement(span)) { - // TODO - } - } - - prob_t AlignProbability(unsigned span) const { - prob_t p; - p.logeq(aligns.logprob(span, Md::log_poisson(span, 1.0))); - return p; - } - - prob_t Likelihood() const { - prob_t p; p.logeq(aligns.log_crp_prob()); - p *= base; - p *= tmodel.Likelihood(); - return p; - } - - prob_t base; - CCRP_NoTable aligns; -}; - -#endif - diff --git a/gi/pf/condnaive.cc b/gi/pf/condnaive.cc deleted file mode 100644 index 419731ac..00000000 --- a/gi/pf/condnaive.cc +++ /dev/null @@ -1,298 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "conditional_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(4),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(4),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -struct ModelAndData { - explicit ModelAndData(ConditionalParallelSegementationModel& m, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : - model(m), - rng(&*prng), - corpuse(ce), - corpusf(cf), - vocabe(ve), - vocabf(vf), - mh_samples(), - mh_rejects(), - kX(-TD::Convert("X")), - derivations(corpuse.size()) {} - - void ResampleHyperparameters() { - } - - void InstantiateRule(const pair& from, - const pair& to, - const vector& sentf, - const vector& sente, - TRule* rule) const { - rule->f_.clear(); - rule->e_.clear(); - rule->lhs_ = kX; - for (short i = from.first; i < to.first; ++i) - rule->f_.push_back(sentf[i]); - for (short i = from.second; i < to.second; ++i) - rule->e_.push_back(sente[i]); - } - - void DecrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.DecrementRule(x); - model.DecrementAlign(x.f_.size()); - } - } - - void PrintDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - cerr << i << '/' << (d.size() - 1) << ": " << x << endl; - } - } - - void IncrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.IncrementRule(x); - model.IncrementAlign(x.f_.size()); - } - } - - prob_t Likelihood() const { - return model.Likelihood(); - } - - prob_t DerivationProposalProbability(const vector >& d, const vector& sentf, const vector& sente) const { - prob_t p = prob_t::One(); - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - p *= model.RuleProbability(x); - p *= model.AlignProbability(x.f_.size()); - } - return p; - } - - void Sample(); - - ConditionalParallelSegementationModel& model; - MT19937* rng; - const vector >& corpuse, corpusf; - const set& vocabe, vocabf; - unsigned mh_samples, mh_rejects; - const int kX; - vector > > derivations; -}; - -void ModelAndData::Sample() { - unsigned MAXK = kMAX_SRC_PHRASE; - unsigned MAXL = kMAX_TRG_PHRASE; - TRule x; - x.lhs_ = -TD::Convert("X"); - - for (int samples = 0; samples < 1000; ++samples) { - if (samples % 1 == 0 && samples > 0) { - //ResampleHyperparameters(); - cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; - for (int i = 0; i < 10; ++i) { - cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; - PrintDerivation(derivations[i], corpusf[i], corpuse[i]); - } - static TRule xx("[X] ||| w n ||| s h ||| X=0"); - const CCRP_NoTable& dcrp = model.tmodel.r.find(xx.f_)->second; - for (CCRP_NoTable::const_iterator it = dcrp.begin(); it != dcrp.end(); ++it) { - cerr << "\t" << it->second << "\t" << it->first << endl; - } - } - cerr << '.' << flush; - for (int s = 0; s < corpuse.size(); ++s) { - const vector& sentf = corpusf[s]; - const vector& sente = corpuse[s]; -// cerr << " CUSTOMERS: " << rules.num_customers() << endl; -// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - - vector >& deriv = derivations[s]; - const prob_t p_cur = Likelihood(); - DecrementDerivation(deriv, sentf, sente); - - boost::multi_array a(boost::extents[sentf.size() + 1][sente.size() + 1]); - boost::multi_array trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); - a[0][0] = prob_t::One(); - for (int i = 0; i < sentf.size(); ++i) { - for (int j = 0; j < sente.size(); ++j) { - const prob_t src_a = a[i][j]; - x.f_.clear(); - for (int k = 1; k <= MAXK; ++k) { - if (i + k > sentf.size()) break; - x.f_.push_back(sentf[i + k - 1]); - x.e_.clear(); - const prob_t p_span = model.AlignProbability(k); // prob of consuming this much source - for (int l = 1; l <= MAXL; ++l) { - if (j + l > sente.size()) break; - x.e_.push_back(sente[j + l - 1]); - trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * p_span; - a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; - } - } - } - } -// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; - const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - - vector > newderiv; - int cur_i = sentf.size(); - int cur_j = sente.size(); - while(cur_i > 0 && cur_j > 0) { - newderiv.push_back(pair(cur_i, cur_j)); -// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; - SampleSet ss; - vector > nexts; - for (int k = 1; k <= MAXK; ++k) { - const int hyp_i = cur_i - k; - if (hyp_i < 0) break; - for (int l = 1; l <= MAXL; ++l) { - const int hyp_j = cur_j - l; - if (hyp_j < 0) break; - const prob_t& inside = a[hyp_i][hyp_j]; - if (inside == prob_t::Zero()) continue; - const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; - if (transp == prob_t::Zero()) continue; - const prob_t p = inside * transp; - ss.add(p); - nexts.push_back(pair(hyp_i, hyp_j)); -// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl; - } - } -// cerr << " sample set has " << nexts.size() << " elements.\n"; - const int selected = rng->SelectSample(ss); - cur_i = nexts[selected].first; - cur_j = nexts[selected].second; - } - newderiv.push_back(pair(0,0)); - const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); - IncrementDerivation(newderiv, sentf, sente); -// cerr << "SANITY: " << q_new << " " <(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - Model1 m1(conf["model1"].as()); - - PhraseConditionalBase pcb0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - ConditionalParallelSegementationModel x(pcb0); - - ModelAndData posterior(x, corpuse, corpusf, vocabe, vocabf); - posterior.Sample(); - - TRule r1("[X] ||| x ||| l e ||| X=0"); - TRule r2("[X] ||| A ||| a d ||| X=0"); - TRule r3("[X] ||| n ||| e r ||| X=0"); - TRule r4("[X] ||| x A n ||| b l a g ||| X=0"); - - PhraseConditionalUninformativeBase u0(vocabe.size()); - - cerr << (pcb0(r1)*pcb0(r2)*pcb0(r3)) << endl; - cerr << (u0(r4)) << endl; - - return 0; -} - diff --git a/gi/pf/corpus.cc b/gi/pf/corpus.cc deleted file mode 100644 index cb6e4ed7..00000000 --- a/gi/pf/corpus.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "corpus.h" - -#include -#include -#include - -#include "tdict.h" -#include "filelib.h" - -using namespace std; - -namespace corpus { - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - ReadFile rf(filename); - istream* in = rf.stream(); - assert(*in); - string line; - unsigned lc = 0; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(getline(*in, line)) { - ++lc; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { - isf = false; - } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - if (cur == kDIV) { - cerr << "ERROR in " << lc << ": " << line << endl << endl; - abort(); - } - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } -} - -} - diff --git a/gi/pf/corpus.h b/gi/pf/corpus.h deleted file mode 100644 index e7febdb7..00000000 --- a/gi/pf/corpus.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _CORPUS_H_ -#define _CORPUS_H_ - -#include -#include -#include -#include "wordid.h" - -namespace corpus { - -void ReadParallelCorpus(const std::string& filename, - std::vector >* f, - std::vector >* e, - std::set* vocab_f, - std::set* vocab_e); - -} - -#endif diff --git a/gi/pf/dpnaive.cc b/gi/pf/dpnaive.cc deleted file mode 100644 index 75ccad72..00000000 --- a/gi/pf/dpnaive.cc +++ /dev/null @@ -1,301 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "corpus.h" - -using namespace std; -using namespace std::tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(4),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(4),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -boost::shared_ptr prng; - -template -struct ModelAndData { - explicit ModelAndData(MonotonicParallelSegementationModel& m, const Base& b, const vector >& ce, const vector >& cf, const set& ve, const set& vf) : - model(m), - rng(&*prng), - p0(b), - baseprob(prob_t::One()), - corpuse(ce), - corpusf(cf), - vocabe(ve), - vocabf(vf), - mh_samples(), - mh_rejects(), - kX(-TD::Convert("X")), - derivations(corpuse.size()) {} - - void ResampleHyperparameters() { - } - - void InstantiateRule(const pair& from, - const pair& to, - const vector& sentf, - const vector& sente, - TRule* rule) const { - rule->f_.clear(); - rule->e_.clear(); - rule->lhs_ = kX; - for (short i = from.first; i < to.first; ++i) - rule->f_.push_back(sentf[i]); - for (short i = from.second; i < to.second; ++i) - rule->e_.push_back(sente[i]); - } - - void DecrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.DecrementRule(x); - model.DecrementContinue(); - } - model.DecrementStop(); - } - - void PrintDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - cerr << i << '/' << (d.size() - 1) << ": " << x << endl; - } - } - - void IncrementDerivation(const vector >& d, const vector& sentf, const vector& sente) { - if (d.size() < 2) return; - TRule x; - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - model.IncrementRule(x); - model.IncrementContinue(); - } - model.IncrementStop(); - } - - prob_t Likelihood() const { - return model.Likelihood(); - } - - prob_t DerivationProposalProbability(const vector >& d, const vector& sentf, const vector& sente) const { - prob_t p = model.StopProbability(); - if (d.size() < 2) return p; - TRule x; - const prob_t p_cont = model.ContinueProbability(); - for (int i = 1; i < d.size(); ++i) { - InstantiateRule(d[i], d[i-1], sentf, sente, &x); - p *= p_cont; - p *= model.RuleProbability(x); - } - return p; - } - - void Sample(); - - MonotonicParallelSegementationModel& model; - MT19937* rng; - const Base& p0; - prob_t baseprob; // cached value of generating the table table labels from p0 - // this can't be used if we go to a hierarchical prior! - const vector >& corpuse, corpusf; - const set& vocabe, vocabf; - unsigned mh_samples, mh_rejects; - const int kX; - vector > > derivations; -}; - -template -void ModelAndData::Sample() { - unsigned MAXK = kMAX_SRC_PHRASE; - unsigned MAXL = kMAX_TRG_PHRASE; - TRule x; - x.lhs_ = -TD::Convert("X"); - for (int samples = 0; samples < 1000; ++samples) { - if (samples % 1 == 0 && samples > 0) { - //ResampleHyperparameters(); - cerr << " [" << samples << " LLH=" << log(Likelihood()) << " MH=" << ((double)mh_rejects / mh_samples) << "]\n"; - for (int i = 0; i < 10; ++i) { - cerr << "SENTENCE: " << TD::GetString(corpusf[i]) << " ||| " << TD::GetString(corpuse[i]) << endl; - PrintDerivation(derivations[i], corpusf[i], corpuse[i]); - } - } - cerr << '.' << flush; - for (int s = 0; s < corpuse.size(); ++s) { - const vector& sentf = corpusf[s]; - const vector& sente = corpuse[s]; -// cerr << " CUSTOMERS: " << rules.num_customers() << endl; -// cerr << "SENTENCE: " << TD::GetString(sentf) << " ||| " << TD::GetString(sente) << endl; - - vector >& deriv = derivations[s]; - const prob_t p_cur = Likelihood(); - DecrementDerivation(deriv, sentf, sente); - - boost::multi_array a(boost::extents[sentf.size() + 1][sente.size() + 1]); - boost::multi_array trans(boost::extents[sentf.size() + 1][sente.size() + 1][MAXK][MAXL]); - a[0][0] = prob_t::One(); - const prob_t q_stop = model.StopProbability(); - const prob_t q_cont = model.ContinueProbability(); - for (int i = 0; i < sentf.size(); ++i) { - for (int j = 0; j < sente.size(); ++j) { - const prob_t src_a = a[i][j]; - x.f_.clear(); - for (int k = 1; k <= MAXK; ++k) { - if (i + k > sentf.size()) break; - x.f_.push_back(sentf[i + k - 1]); - x.e_.clear(); - for (int l = 1; l <= MAXL; ++l) { - if (j + l > sente.size()) break; - x.e_.push_back(sente[j + l - 1]); - const bool stop_now = ((j + l) == sente.size()) && ((i + k) == sentf.size()); - const prob_t& cp = stop_now ? q_stop : q_cont; - trans[i][j][k - 1][l - 1] = model.RuleProbability(x) * cp; - a[i + k][j + l] += src_a * trans[i][j][k - 1][l - 1]; - } - } - } - } -// cerr << "Inside: " << log(a[sentf.size()][sente.size()]) << endl; - const prob_t q_cur = DerivationProposalProbability(deriv, sentf, sente); - - vector > newderiv; - int cur_i = sentf.size(); - int cur_j = sente.size(); - while(cur_i > 0 && cur_j > 0) { - newderiv.push_back(pair(cur_i, cur_j)); -// cerr << "NODE: (" << cur_i << "," << cur_j << ")\n"; - SampleSet ss; - vector > nexts; - for (int k = 1; k <= MAXK; ++k) { - const int hyp_i = cur_i - k; - if (hyp_i < 0) break; - for (int l = 1; l <= MAXL; ++l) { - const int hyp_j = cur_j - l; - if (hyp_j < 0) break; - const prob_t& inside = a[hyp_i][hyp_j]; - if (inside == prob_t::Zero()) continue; - const prob_t& transp = trans[hyp_i][hyp_j][k - 1][l - 1]; - if (transp == prob_t::Zero()) continue; - const prob_t p = inside * transp; - ss.add(p); - nexts.push_back(pair(hyp_i, hyp_j)); -// cerr << " (" << hyp_i << "," << hyp_j << ") <--- " << log(p) << endl; - } - } -// cerr << " sample set has " << nexts.size() << " elements.\n"; - const int selected = rng->SelectSample(ss); - cur_i = nexts[selected].first; - cur_j = nexts[selected].second; - } - newderiv.push_back(pair(0,0)); - const prob_t q_new = DerivationProposalProbability(newderiv, sentf, sente); - IncrementDerivation(newderiv, sentf, sente); -// cerr << "SANITY: " << q_new << " " <(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (!conf.count("inverse_model1")) { - cerr << argv[0] << "Please use --inverse_model1 to specify inverse model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); -// MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); -// PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MonotonicParallelSegementationModel m(alp0); - - ModelAndData posterior(m, alp0, corpuse, corpusf, vocabe, vocabf); - posterior.Sample(); - - return 0; -} - diff --git a/gi/pf/guess-translits.pl b/gi/pf/guess-translits.pl deleted file mode 100755 index d00c2168..00000000 --- a/gi/pf/guess-translits.pl +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; - -my $MIN_PMI = -3; - -my %fs; -my %es; -my %ef; - -die "Usage: $0 < input.utf8.txt\n" if scalar @ARGV > 0; - -binmode(STDIN,":utf8"); -binmode(STDOUT,":utf8"); -binmode(STDERR,":utf8"); - -my $tot = 0; -print STDERR "Reading alignments from STDIN ...\n"; -while() { - chomp; - my ($fsent, $esent, $alsent) = split / \|\|\| /; - die "Format should be 'foreign sentence ||| english sentence ||| 0-0 1-1 ...'\n" unless defined $fsent && defined $esent && defined $alsent; - - my @fws = split /\s+/, $fsent; - my @ews = split /\s+/, $esent; - my @as = split /\s+/, $alsent; - my %a2b; - my %b2a; - for my $ap (@as) { - my ($a,$b) = split /-/, $ap; - die "BAD INPUT: $_\n" unless defined $a && defined $b; - $a2b{$a}->{$b} = 1; - $b2a{$b}->{$a} = 1; - } - for my $a (keys %a2b) { - my $bref = $a2b{$a}; - next unless scalar keys %$bref < 2; - my $b = (keys %$bref)[0]; - next unless scalar keys %{$b2a{$b}} < 2; - my $f = $fws[$a]; - next unless defined $f; - next unless length($f) > 3; - my $e = $ews[$b]; - next unless defined $e; - next unless length($e) > 3; - - $ef{$f}->{$e}++; - $es{$e}++; - $fs{$f}++; - $tot++; - } -} -my $ltot = log($tot); -my $num = 0; -print STDERR "Extracting pairs for PMI > $MIN_PMI ...\n"; -for my $f (keys %fs) { - my $logf = log($fs{$f}); - my $esref = $ef{$f}; - for my $e (keys %$esref) { - my $loge = log($es{$e}); - my $ef = $esref->{$e}; - my $logef = log($ef); - my $pmi = $logef - ($loge + $logf); - next if $pmi < $MIN_PMI; - my @flets = split //, $f; - my @elets = split //, $e; - print "@flets ||| @elets\n"; - $num++; - } -} -print STDERR "Extracted $num pairs.\n"; -print STDERR "Recommend running:\n ../../training/model1 -v -d -t -99999 output.txt\n"; diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc deleted file mode 100644 index f362d3f8..00000000 --- a/gi/pf/hpyp_tm.cc +++ /dev/null @@ -1,133 +0,0 @@ -#include "hpyp_tm.h" - -#include -#include -#include - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { - FreqBinner(const std::string& fname) { fd_.Load(fname); } - unsigned NumberOfBins() const { return fd_.Max() + 1; } - unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } - FreqDict fd_; -}; - -template -struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : - base(*b), - binner(bnr), - btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - - void Summary() const { - cerr << "Number of conditioning contexts: " << r.size() << endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; - for (CCRP >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - cerr << " " << i2->second << endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - btr.ResampleHyperparameters(rng); - } - - prob_t Prob(const WordID src, const vector& trglets) const { - RuleModelHash::const_iterator it = r.find(src); - if (it == r.end()) { - return base(trglets); - } else { - return it->second.prob(trglets, base(trglets)); - } - } - - void Increment(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - if (it == r.end()) { - it = r.insert(make_pair(src, CCRP >(0.5,1.0))).first; - static const WordID kNULL = TD::Convert("NULL"); - unsigned bin = (src == kNULL ? 0 : 1); - if (binner && bin) { bin = binner->Bin(src) + 1; } - btr.Add(bin, &it->second); - } - if (it->second.increment(trglets, base(trglets), rng)) - base.Increment(trglets, rng); - } - - void Decrement(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - assert(it != r.end()); - if (it->second.decrement(trglets, rng)) { - base.Decrement(trglets, rng); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - return p; - } - - unsigned UniqueConditioningContexts() const { - return r.size(); - } - - // TODO tie PYP hyperparameters based on source word frequency bins - Base& base; - const Binner* binner; - BinTiedResampler > > btr; - typedef unordered_map > > RuleModelHash; - RuleModelHash r; -}; - -HPYPLexicalTranslation::HPYPLexicalTranslation(const vector >& lets, - const unsigned vocab_size, - const unsigned num_letters) : - letters(lets), - base(vocab_size, num_letters, 5), - up0(new PYPWordModel(&base)), - tmodel(new ConditionalPYPWordModel >(up0, new FreqBinner("10k.freq"))), - kX(-TD::Convert("X")) {} - -void HPYPLexicalTranslation::Summary() const { - tmodel->Summary(); - up0->Summary(); -} - -prob_t HPYPLexicalTranslation::Likelihood() const { - prob_t p = up0->Likelihood(); - p *= tmodel->Likelihood(); - return p; -} - -void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { - tmodel->ResampleHyperparameters(rng); - up0->ResampleHyperparameters(rng); -} - -unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const { - return tmodel->UniqueConditioningContexts(); -} - -prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const { - return tmodel->Prob(src, letters[trg]); -} - -void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { - tmodel->Increment(src, letters[trg], rng); -} - -void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { - tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/hpyp_tm.h b/gi/pf/hpyp_tm.h deleted file mode 100644 index af3215ba..00000000 --- a/gi/pf/hpyp_tm.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef HPYP_LEX_TRANS -#define HPYP_LEX_TRANS - -#include -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template struct PYPWordModel; -template struct ConditionalPYPWordModel; - -struct HPYPLexicalTranslation { - explicit HPYPLexicalTranslation(const std::vector >& lets, - const unsigned vocab_size, - const unsigned num_letters); - - prob_t Likelihood() const; - - void ResampleHyperparameters(MT19937* rng); - prob_t Prob(WordID src, WordID trg) const; // return p(trg | src) - void Summary() const; - void Increment(WordID src, WordID trg, MT19937* rng); - void Decrement(WordID src, WordID trg, MT19937* rng); - unsigned UniqueConditioningContexts() const; - - private: - const std::vector >& letters; // spelling dictionary - PoissonUniformWordModel base; // "generator" of English types - PYPWordModel* up0; // model English lexicon - ConditionalPYPWordModel, FreqBinner>* tmodel; // translation distributions - // (model English word | French word) - const WordID kX; -}; - -#endif diff --git a/gi/pf/itg.cc b/gi/pf/itg.cc deleted file mode 100644 index 29ec3860..00000000 --- a/gi/pf/itg.cc +++ /dev/null @@ -1,275 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -ostream& operator<<(ostream& os, const vector& p) { - os << '['; - for (int i = 0; i < p.size(); ++i) - os << (i==0 ? "" : " ") << TD::Convert(p[i]); - return os << ']'; -} - -struct UnigramModel { - explicit UnigramModel(const string& fname, unsigned vocab_size, double p0null = 0.05) : - use_uniform_(fname.size() == 0), - p0null_(p0null), - uniform_((1.0 - p0null) / vocab_size), - probs_(TD::NumWords() + 1) { - if (fname.size() > 0) LoadUnigrams(fname); - probs_[0] = p0null_; - } - -// -// \data\ -// ngram 1=9295 -// -// \1-grams: -// -3.191193 " - - void LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - const WordID w = TD::Convert(line.substr(pos + 1)); - line[pos] = 0; - float p = atof(&line[0]); - const prob_t pnon_null(1.0 - p0null_.as_float()); - if (w < probs_.size()) probs_[w].logeq(p * log(10) + log(pnon_null)); else abort(); - } - } - - const prob_t& operator()(const WordID& w) const { - if (!w) return p0null_; - if (use_uniform_) return uniform_; - return probs_[w]; - } - - const bool use_uniform_; - const prob_t p0null_; - const prob_t uniform_; - vector probs_; -}; - -struct Model1 { - explicit Model1(const string& fname) : - kNULL(TD::Convert("")), - kZERO() { - LoadModel1(fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // returns prob 0 if src or trg is not found! - const prob_t& operator()(WordID src, WordID trg) const { - if (src == 0) src = kNULL; - if (src < ttable.size()) { - const map& cpd = ttable[src]; - const map::const_iterator it = cpd.find(trg); - if (it != cpd.end()) - return it->second; - } - return kZERO; - } - - const WordID kNULL; - const prob_t kZERO; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(25),"Number of particles") - ("input,i",po::value(),"Read parallel data from") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("src_unigram,u",po::value()->default_value(""),"Source unigram distribution; empty for uniform") - ("trg_unigram,U",po::value()->default_value(""),"Target unigram distribution; empty for uniform") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - TD::Convert(""); - TD::Convert(""); - TD::Convert(""); - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - UnigramModel src_unigram(conf["src_unigram"].as(), vocabf.size()); - UnigramModel trg_unigram(conf["trg_unigram"].as(), vocabe.size()); - const prob_t kHALF(0.5); - - const string kEMPTY = "NULL"; - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - for (int si = 0; si < conf["samples"].as(); ++si) { - cerr << '.' << flush; - for (int ci = 0; ci < corpusf.size(); ++ci) { - const vector& trg = corpuse[ci]; - const vector& src = corpusf[ci]; - for (int i = 0; i <= trg.size(); ++i) { - const WordID e_i = i > 0 ? trg[i-1] : 0; - for (int j = 0; j <= src.size(); ++j) { - const WordID f_j = j > 0 ? src[j-1] : 0; - if (e_i == 0 && f_j == 0) continue; - prob_t je = kHALF * src_unigram(f_j) * m1(f_j,e_i) + kHALF * trg_unigram(e_i) * invm1(e_i,f_j); - cerr << "p( " << (e_i ? TD::Convert(e_i) : kEMPTY) << " , " << (f_j ? TD::Convert(f_j) : kEMPTY) << " ) = " << log(je) << endl; - if (e_i && f_j) - cout << "[X] ||| " << TD::Convert(f_j) << " ||| " << TD::Convert(e_i) << " ||| LogProb=" << log(je) << endl; - } - } - } - } -} - diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc deleted file mode 100644 index 1d5126e4..00000000 --- a/gi/pf/learn_cfg.cc +++ /dev/null @@ -1,428 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "inside_outside.h" -#include "hg.h" -#include "bottom_up_parser.h" -#include "fdict.h" -#include "grammar.h" -#include "m.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; -vector nt_vocab; -vector nt_id_to_index; -static unsigned kMAX_RULE_SIZE = 0; -static unsigned kMAX_ARITY = 0; -static bool kALLOW_MIXED = true; // allow rules with mixed terminals and NTs -static bool kHIERARCHICAL_PRIOR = false; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_rule_size,m", po::value()->default_value(0), "Maximum rule size (0 for unlimited)") - ("max_arity,a", po::value()->default_value(0), "Maximum number of nonterminals in a rule (0 for unlimited)") - ("no_mixed_rules,M", "Do not mix terminals and nonterminals in a rule RHS") - ("nonterminals,n", po::value()->default_value(1), "Size of nonterminal vocabulary") - ("hierarchical_prior,h", "Use hierarchical prior") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -unsigned ReadCorpus(const string& filename, - vector >* e, - set* vocab_e) { - e->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - unsigned toks = 0; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - vector& le = e->back(); - TD::ConvertSentence(line, &le); - for (unsigned i = 0; i < le.size(); ++i) - vocab_e->insert(le[i]); - toks += le.size(); - } - if (in != &cin) delete in; - return toks; -} - -struct Grid { - // a b c d e - // 0 - 0 - - - vector grid; -}; - -struct BaseRuleModel { - explicit BaseRuleModel(unsigned term_size, - unsigned nonterm_size = 1) : - unif_term(1.0 / term_size), - unif_nonterm(1.0 / nonterm_size) {} - prob_t operator()(const TRule& r) const { - prob_t p; p.logeq(Md::log_poisson(1.0, r.f_.size())); - const prob_t term_prob((2.0 + 0.01*r.f_.size()) / (r.f_.size() + 2)); - const prob_t nonterm_prob(1.0 - term_prob.as_float()); - for (unsigned i = 0; i < r.f_.size(); ++i) { - if (r.f_[i] <= 0) { // nonterminal - if (kALLOW_MIXED) p *= nonterm_prob; - p *= unif_nonterm; - } else { // terminal - if (kALLOW_MIXED) p *= term_prob; - p *= unif_term; - } - } - return p; - } - const prob_t unif_term, unif_nonterm; -}; - -struct HieroLMModel { - explicit HieroLMModel(unsigned vocab_size, unsigned num_nts = 1) : - base(vocab_size, num_nts), - q0(1,1,1,1), - nts(num_nts, CCRP(1,1,1,1)) {} - - prob_t Prob(const TRule& r) const { - return nts[nt_id_to_index[-r.lhs_]].prob(r, p0(r)); - } - - inline prob_t p0(const TRule& r) const { - if (kHIERARCHICAL_PRIOR) - return q0.prob(r, base(r)); - else - return base(r); - } - - int Increment(const TRule& r, MT19937* rng) { - const int delta = nts[nt_id_to_index[-r.lhs_]].increment(r, p0(r), rng); - if (kHIERARCHICAL_PRIOR && delta) - q0.increment(r, base(r), rng); - return delta; - // return x.increment(r); - } - - int Decrement(const TRule& r, MT19937* rng) { - const int delta = nts[nt_id_to_index[-r.lhs_]].decrement(r, rng); - if (kHIERARCHICAL_PRIOR && delta) - q0.decrement(r, rng); - return delta; - //return x.decrement(r); - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (unsigned i = 0; i < nts.size(); ++i) { - prob_t q; q.logeq(nts[i].log_crp_prob()); - p *= q; - for (CCRP::const_iterator it = nts[i].begin(); it != nts[i].end(); ++it) { - prob_t tp = p0(it->first); - tp.poweq(it->second.num_tables()); - p *= tp; - } - } - if (kHIERARCHICAL_PRIOR) { - prob_t q; q.logeq(q0.log_crp_prob()); - p *= q; - for (CCRP::const_iterator it = q0.begin(); it != q0.end(); ++it) { - prob_t tp = base(it->first); - tp.poweq(it->second.num_tables()); - p *= tp; - } - } - //for (CCRP_OneTable::const_iterator it = x.begin(); it != x.end(); ++it) - // p *= base(it->first); - return p; - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < nts.size(); ++i) - nts[i].resample_hyperparameters(rng); - if (kHIERARCHICAL_PRIOR) { - q0.resample_hyperparameters(rng); - cerr << "[base d=" << q0.discount() << ", s=" << q0.strength() << "]"; - } - cerr << " d=" << nts[0].discount() << ", s=" << nts[0].strength() << endl; - } - - const BaseRuleModel base; - CCRP q0; - vector > nts; - //CCRP_OneTable x; -}; - -vector tofreelist; - -HieroLMModel* plm; - -struct NPGrammarIter : public GrammarIter, public RuleBin { - NPGrammarIter() : arity() { tofreelist.push_back(this); } - NPGrammarIter(const TRulePtr& inr, const int a, int symbol) : arity(a) { - if (inr) { - r.reset(new TRule(*inr)); - } else { - r.reset(new TRule); - } - TRule& rr = *r; - rr.lhs_ = nt_vocab[0]; - rr.f_.push_back(symbol); - rr.e_.push_back(symbol < 0 ? (1-int(arity)) : symbol); - tofreelist.push_back(this); - } - inline static unsigned NextArity(int cur_a, int symbol) { - return cur_a + (symbol <= 0 ? 1 : 0); - } - virtual int GetNumRules() const { - if (r) return nt_vocab.size(); else return 0; - } - virtual TRulePtr GetIthRule(int i) const { - if (i == 0) return r; - TRulePtr nr(new TRule(*r)); - nr->lhs_ = nt_vocab[i]; - return nr; - } - virtual int Arity() const { - return arity; - } - virtual const RuleBin* GetRules() const { - if (!r) return NULL; else return this; - } - virtual const GrammarIter* Extend(int symbol) const { - const int next_arity = NextArity(arity, symbol); - if (kMAX_ARITY && next_arity > kMAX_ARITY) - return NULL; - if (!kALLOW_MIXED && r) { - bool t1 = r->f_.front() <= 0; - bool t2 = symbol <= 0; - if (t1 != t2) return NULL; - } - if (!kMAX_RULE_SIZE || !r || (r->f_.size() < kMAX_RULE_SIZE)) - return new NPGrammarIter(r, next_arity, symbol); - else - return NULL; - } - const unsigned char arity; - TRulePtr r; -}; - -struct NPGrammar : public Grammar { - virtual const GrammarIter* GetRoot() const { - return new NPGrammarIter; - } -}; - -prob_t TotalProb(const Hypergraph& hg) { - return Inside(hg); -} - -void SampleDerivation(const Hypergraph& hg, MT19937* rng, vector* sampled_deriv) { - vector node_probs; - Inside(hg, &node_probs); - queue q; - q.push(hg.nodes_.size() - 2); - while(!q.empty()) { - unsigned cur_node_id = q.front(); -// cerr << "NODE=" << cur_node_id << endl; - q.pop(); - const Hypergraph::Node& node = hg.nodes_[cur_node_id]; - const unsigned num_in_edges = node.in_edges_.size(); - unsigned sampled_edge = 0; - if (num_in_edges == 1) { - sampled_edge = node.in_edges_[0]; - } else { - //prob_t z; - assert(num_in_edges > 1); - SampleSet ss; - for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; - prob_t p = edge.edge_prob_; - for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) - p *= node_probs[edge.tail_nodes_[k]]; - ss.add(p); -// cerr << log(ss[j]) << " ||| " << edge.rule_->AsString() << endl; - //z += p; - } -// for (unsigned j = 0; j < num_in_edges; ++j) { -// const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; -// cerr << exp(log(ss[j] / z)) << " ||| " << edge.rule_->AsString() << endl; -// } -// cerr << " --- \n"; - sampled_edge = node.in_edges_[rng->SelectSample(ss)]; - } - sampled_deriv->push_back(sampled_edge); - const Hypergraph::Edge& edge = hg.edges_[sampled_edge]; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - q.push(edge.tail_nodes_[j]); - } - } - for (unsigned i = 0; i < sampled_deriv->size(); ++i) { - cerr << *hg.edges_[(*sampled_deriv)[i]].rule_ << endl; - } -} - -void IncrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Increment(*hg.edges_[d[i]].rule_, rng); -} - -void DecrementDerivation(const Hypergraph& hg, const vector& d, HieroLMModel* plm, MT19937* rng) { - for (unsigned i = 0; i < d.size(); ++i) - plm->Decrement(*hg.edges_[d[i]].rule_, rng); -} - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - nt_vocab.resize(conf["nonterminals"].as()); - assert(nt_vocab.size() > 0); - assert(nt_vocab.size() < 26); - { - string nt = "X"; - for (unsigned i = 0; i < nt_vocab.size(); ++i) { - if (nt_vocab.size() > 1) nt[0] = ('A' + i); - int pid = TD::Convert(nt); - nt_vocab[i] = -pid; - if (pid >= nt_id_to_index.size()) { - nt_id_to_index.resize(pid + 1, -1); - } - nt_id_to_index[pid] = i; - } - } - vector grammars; - grammars.push_back(GrammarPtr(new NPGrammar)); - - const unsigned samples = conf["samples"].as(); - kMAX_RULE_SIZE = conf["max_rule_size"].as(); - if (kMAX_RULE_SIZE == 1) { - cerr << "Invalid maximum rule size: must be 0 or >1\n"; - return 1; - } - kMAX_ARITY = conf["max_arity"].as(); - if (kMAX_ARITY == 1) { - cerr << "Invalid maximum arity: must be 0 or >1\n"; - return 1; - } - kALLOW_MIXED = !conf.count("no_mixed_rules"); - - kHIERARCHICAL_PRIOR = conf.count("hierarchical_prior"); - - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector > corpuse; - set vocabe; - cerr << "Reading corpus...\n"; - const unsigned toks = ReadCorpus(conf["input"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - HieroLMModel lm(vocabe.size(), nt_vocab.size()); - - plm = &lm; - ExhaustiveBottomUpParser parser(TD::Convert(-nt_vocab[0]), grammars); - - Hypergraph hg; - const int kGoal = -TD::Convert("Goal"); - const int kLP = FD::Convert("LogProb"); - SparseVector v; v.set_value(kLP, 1.0); - vector > derivs(corpuse.size()); - vector cl(corpuse.size()); - for (int ci = 0; ci < corpuse.size(); ++ci) { - vector& src = corpuse[ci]; - Lattice& lat = cl[ci]; - lat.resize(src.size()); - for (unsigned i = 0; i < src.size(); ++i) - lat[i].push_back(LatticeArc(src[i], 0.0, 1)); - } - for (int SS=0; SS < samples; ++SS) { - const bool is_last = ((samples - 1) == SS); - prob_t dlh = prob_t::One(); - for (int ci = 0; ci < corpuse.size(); ++ci) { - const vector& src = corpuse[ci]; - const Lattice& lat = cl[ci]; - cerr << TD::GetString(src) << endl; - hg.clear(); - parser.Parse(lat, &hg); // exhaustive parse - vector& d = derivs[ci]; - if (!is_last) DecrementDerivation(hg, d, &lm, &rng); - for (unsigned i = 0; i < hg.edges_.size(); ++i) { - TRule& r = *hg.edges_[i].rule_; - if (r.lhs_ == kGoal) - hg.edges_[i].edge_prob_ = prob_t::One(); - else - hg.edges_[i].edge_prob_ = lm.Prob(r); - } - if (!is_last) { - d.clear(); - SampleDerivation(hg, &rng, &d); - IncrementDerivation(hg, derivs[ci], &lm, &rng); - } else { - prob_t p = TotalProb(hg); - dlh *= p; - cerr << " p(sentence) = " << log(p) << "\t" << log(dlh) << endl; - } - if (tofreelist.size() > 200000) { - cerr << "Freeing ... "; - for (unsigned i = 0; i < tofreelist.size(); ++i) - delete tofreelist[i]; - tofreelist.clear(); - cerr << "Freed.\n"; - } - } - double llh = log(lm.Likelihood()); - cerr << "LLH=" << llh << "\tENTROPY=" << (-llh / log(2) / toks) << "\tPPL=" << pow(2, -llh / log(2) / toks) << endl; - if (SS % 10 == 9) lm.ResampleHyperparameters(&rng); - if (is_last) { - double z = log(dlh); - cerr << "TOTAL_PROB=" << z << "\tENTROPY=" << (-z / log(2) / toks) << "\tPPL=" << pow(2, -z / log(2) / toks) << endl; - } - } - for (unsigned i = 0; i < nt_vocab.size(); ++i) - cerr << lm.nts[i] << endl; - return 0; -} - diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl deleted file mode 100755 index fdcd3555..00000000 --- a/gi/pf/make-freq-bins.pl +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $BASE = 6; -my $CUTOFF = 3; - -my %d; -my $num = 0; -while(<>){ - chomp; - my @words = split /\s+/; - for my $w (@words) {$d{$w}++; $num++;} -} - -my @vocab = sort {$d{$b} <=> $d{$a}} keys %d; - -for (my $i=0; $i -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - - Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP(0.8, 0.5)) {} - - double p0(int x) const { - assert(x > 0); - assert(x < 5); - return 1.0/4.0; - } - - double llh() const { - double lh = bp + base.log_crp_prob(); - for (int ctx = 1; ctx < 5; ++ctx) - lh += ccrps[ctx].log_crp_prob(); - return lh; - } - - double prob(int ctx, int x) const { - assert(ctx > 0 && ctx < 5); - return ccrps[ctx].prob(x, base.prob(x, p0(x))); - } - - void increment(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { - if (base.increment(x, p0(x), &rng)) { - bp += log(1.0 / 4.0); - } - } - } - - // this is just a biased estimate - double est_base_prob(int x) { - return (x + 1) * x / 40.0; - } - - void increment_is(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - SampleSet ss; - const int PARTICLES = 25; - vector > s1s(PARTICLES, CCRP(0.5,0.5)); - vector > sbs(PARTICLES, CCRP(0.5,0.5)); - vector sp0s(PARTICLES); - - CCRP s1 = ccrps[ctx]; - CCRP sb = base; - double sp0 = bp; - for (int pp = 0; pp < PARTICLES; ++pp) { - if (pp > 0) { - ccrps[ctx] = s1; - base = sb; - bp = sp0; - } - - double q = 1; - double gamma = 1; - double est_p = est_base_prob(x); - //base.prob(x, p0(x)) + rng.next() * 0.1; - if (ccrps[ctx].increment(x, est_p, &rng, &q)) { - gamma = q * base.prob(x, p0(x)); - q *= est_p; - if (verbose) cerr << "(DP-base draw) "; - double qq = -1; - if (base.increment(x, p0(x), &rng, &qq)) { - if (verbose) cerr << "(G0 draw) "; - bp += log(p0(x)); - qq *= p0(x); - } - } else { gamma = q; } - double w = gamma / q; - if (verbose) - cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; - ss.add(w); - s1s[pp] = ccrps[ctx]; - sbs[pp] = base; - sp0s[pp] = bp; - } - int ps = rng.SelectSample(ss); - ccrps[ctx] = s1s[ps]; - base = sbs[ps]; - bp = sp0s[ps]; - if (verbose) { - cerr << "SELECTED: " << ps << endl; - static int cc = 0; cc++; if (cc ==10) exit(1); - } - } - - void decrement(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].decrement(x, &rng)) { - if (base.decrement(x, &rng)) { - bp -= log(p0(x)); - } - } - } - - double bp; - CCRP base; - vector > ccrps; - -}; - -int main(int argc, char** argv) { - if (argc > 1) { verbose = true; } - vector counts(15, 0); - vector tcounts(15, 0); - int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; - double tlh = 0; - double tt = 0; - for (int n = 0; n < 1000; ++n) { - if (n % 10 == 0) cerr << '.'; - if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; - Model m; - for (int *x = points; *x; x += 2) - m.increment(x[0], x[1]); - - for (int j = 0; j < 24; ++j) { - for (int *x = points; *x; x += 2) { - if (rng.next() < 0.8) { - m.decrement(x[0], x[1]); - m.increment_is(x[0], x[1]); - } - } - } - counts[m.base.num_customers()]++; - tcounts[m.base.num_tables()]++; - tlh += m.llh(); - tt += 1.0; - } - cerr << "mean LLH = " << (tlh / tt) << endl; - for (int i = 0; i < 15; ++i) - cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/monotonic_pseg.h b/gi/pf/monotonic_pseg.h deleted file mode 100644 index 10d171fe..00000000 --- a/gi/pf/monotonic_pseg.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef _MONOTONIC_PSEG_H_ -#define _MONOTONIC_PSEG_H_ - -#include - -#include "prob.h" -#include "ccrp_nt.h" -#include "trule.h" -#include "base_distributions.h" - -template -struct MonotonicParallelSegementationModel { - explicit MonotonicParallelSegementationModel(BaseMeasure& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), stop(1.0) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRulesAndStops(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - if (rules.size()) IncrementContinue(rules.size() - 1); - IncrementStop(); - } - - void DecrementRulesAndStops(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - if (rules.size()) { - DecrementContinue(rules.size() - 1); - DecrementStop(); - } - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - q.logeq(stop.log_crp_prob()); - p *= q; - return p; - } - - void IncrementStop() { - stop.increment(true); - } - - void IncrementContinue(int n = 1) { - for (int i = 0; i < n; ++i) - stop.increment(false); - } - - void DecrementStop() { - stop.decrement(true); - } - - void DecrementContinue(int n = 1) { - for (int i = 0; i < n; ++i) - stop.decrement(false); - } - - prob_t StopProbability() const { - return prob_t(stop.prob(true, 0.5)); - } - - prob_t ContinueProbability() const { - return prob_t(stop.prob(false, 0.5)); - } - - const BaseMeasure& rp0; - prob_t base; - CCRP_NoTable rules; - CCRP_NoTable stop; -}; - -#endif - diff --git a/gi/pf/ngram_base.cc b/gi/pf/ngram_base.cc deleted file mode 100644 index 1299f06f..00000000 --- a/gi/pf/ngram_base.cc +++ /dev/null @@ -1,69 +0,0 @@ -#include "ngram_base.h" - -#include "lm/model.hh" -#include "tdict.h" - -using namespace std; - -namespace { -struct GICSVMapper : public lm::EnumerateVocab { - GICSVMapper(vector* out) : out_(out), kLM_UNKNOWN_TOKEN(0) { out_->clear(); } - void Add(lm::WordIndex index, const StringPiece &str) { - const WordID cdec_id = TD::Convert(str.as_string()); - if (cdec_id >= out_->size()) - out_->resize(cdec_id + 1, kLM_UNKNOWN_TOKEN); - (*out_)[cdec_id] = index; - } - vector* out_; - const lm::WordIndex kLM_UNKNOWN_TOKEN; -}; -} - -struct FixedNgramBaseImpl { - FixedNgramBaseImpl(const string& param) { - GICSVMapper vm(&cdec2klm_map_); - lm::ngram::Config conf; - conf.enumerate_vocab = &vm; - cerr << "Reading character LM from " << param << endl; - model = new lm::ngram::ProbingModel(param.c_str(), conf); - order = model->Order(); - kEOS = MapWord(TD::Convert("")); - assert(kEOS > 0); - } - - lm::WordIndex MapWord(const WordID w) const { - if (w < cdec2klm_map_.size()) return cdec2klm_map_[w]; - return 0; - } - - ~FixedNgramBaseImpl() { delete model; } - - prob_t StringProbability(const vector& s) const { - lm::ngram::State state = model->BeginSentenceState(); - double prob = 0; - for (unsigned i = 0; i < s.size(); ++i) { - const lm::ngram::State scopy(state); - prob += model->Score(scopy, MapWord(s[i]), state); - } - const lm::ngram::State scopy(state); - prob += model->Score(scopy, kEOS, state); - prob_t p; p.logeq(prob * log(10)); - return p; - } - - lm::ngram::ProbingModel* model; - unsigned order; - vector cdec2klm_map_; - lm::WordIndex kEOS; -}; - -FixedNgramBase::~FixedNgramBase() { delete impl; } - -FixedNgramBase::FixedNgramBase(const string& lmfname) { - impl = new FixedNgramBaseImpl(lmfname); -} - -prob_t FixedNgramBase::StringProbability(const vector& s) const { - return impl->StringProbability(s); -} - diff --git a/gi/pf/ngram_base.h b/gi/pf/ngram_base.h deleted file mode 100644 index 4ea999f3..00000000 --- a/gi/pf/ngram_base.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NGRAM_BASE_H_ -#define _NGRAM_BASE_H_ - -#include -#include -#include "trule.h" -#include "wordid.h" -#include "prob.h" - -struct FixedNgramBaseImpl; -struct FixedNgramBase { - FixedNgramBase(const std::string& lmfname); - ~FixedNgramBase(); - prob_t StringProbability(const std::vector& s) const; - - prob_t operator()(const TRule& rule) const { - return StringProbability(rule.e_); - } - - private: - FixedNgramBaseImpl* impl; - -}; - -#endif diff --git a/gi/pf/nuisance_test.cc b/gi/pf/nuisance_test.cc deleted file mode 100644 index fc0af9cb..00000000 --- a/gi/pf/nuisance_test.cc +++ /dev/null @@ -1,161 +0,0 @@ -#include "ccrp.h" - -#include -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -ostream& operator<<(ostream&os, const vector& v) { - os << '[' << v[0]; - if (v.size() == 2) os << ' ' << v[1]; - return os << ']'; -} - -struct Base { - Base() : llh(), v(2), v1(1), v2(1), crp(0.25, 0.5) {} - inline double p0(const vector& x) const { - double p = 0.75; - if (x.size() == 2) p = 0.25; - p *= 1.0 / 3.0; - if (x.size() == 2) p *= 1.0 / 3.0; - return p; - } - double est_deriv_prob(int a, int b, int seg) const { - assert(a > 0 && a < 4); // a \in {1,2,3} - assert(b > 0 && b < 4); // b \in {1,2,3} - assert(seg == 0 || seg == 1); // seg \in {0,1} - if (seg == 0) { - v[0] = a; - v[1] = b; - return crp.prob(v, p0(v)); - } else { - v1[0] = a; - v2[0] = b; - return crp.prob(v1, p0(v1)) * crp.prob(v2, p0(v2)); - } - } - double est_marginal_prob(int a, int b) const { - return est_deriv_prob(a,b,0) + est_deriv_prob(a,b,1); - } - int increment(int a, int b, double* pw = NULL) { - double p1 = est_deriv_prob(a, b, 0); - double p2 = est_deriv_prob(a, b, 1); - //p1 = 0.5; p2 = 0.5; - int seg = rng.SelectSample(p1,p2); - double tmp = 0; - if (!pw) pw = &tmp; - double& w = *pw; - if (seg == 0) { - v[0] = a; - v[1] = b; - w = crp.prob(v, p0(v)) / p1; - if (crp.increment(v, p0(v), &rng)) { - llh += log(p0(v)); - } - } else { - v1[0] = a; - w = crp.prob(v1, p0(v1)) / p2; - if (crp.increment(v1, p0(v1), &rng)) { - llh += log(p0(v1)); - } - v2[0] = b; - w *= crp.prob(v2, p0(v2)); - if (crp.increment(v2, p0(v2), &rng)) { - llh += log(p0(v2)); - } - } - return seg; - } - void increment(int a, int b, int seg) { - if (seg == 0) { - v[0] = a; - v[1] = b; - if (crp.increment(v, p0(v), &rng)) { - llh += log(p0(v)); - } - } else { - v1[0] = a; - if (crp.increment(v1, p0(v1), &rng)) { - llh += log(p0(v1)); - } - v2[0] = b; - if (crp.increment(v2, p0(v2), &rng)) { - llh += log(p0(v2)); - } - } - } - void decrement(int a, int b, int seg) { - if (seg == 0) { - v[0] = a; - v[1] = b; - if (crp.decrement(v, &rng)) { - llh -= log(p0(v)); - } - } else { - v1[0] = a; - if (crp.decrement(v1, &rng)) { - llh -= log(p0(v1)); - } - v2[0] = b; - if (crp.decrement(v2, &rng)) { - llh -= log(p0(v2)); - } - } - } - double log_likelihood() const { - return llh + crp.log_crp_prob(); - } - double llh; - mutable vector v, v1, v2; - CCRP > crp; -}; - -int main(int argc, char** argv) { - double tl = 0; - const int ITERS = 1000; - const int PARTICLES = 20; - const int DATAPOINTS = 50; - WordID x = TD::Convert("souvenons"); - WordID y = TD::Convert("remember"); - vector src; TD::ConvertSentence("s o u v e n o n s", &src); - vector trg; TD::ConvertSentence("r e m e m b e r", &trg); -// Transliterations xx; -// xx.Initialize(x, src, y, trg); -// return 1; - - for (int j = 0; j < ITERS; ++j) { - Base b; - vector segs(DATAPOINTS); - SampleSet ss; - vector sss; - for (int i = 0; i < DATAPOINTS; i++) { - ss.clear(); - sss.clear(); - int x = ((i / 10) % 3) + 1; - int y = (i % 3) + 1; - //double ep = b.est_marginal_prob(x,y); - //cerr << "est p(" << x << "," << y << ") = " << ep << endl; - for (int n = 0; n < PARTICLES; ++n) { - double w; - int seg = b.increment(x,y,&w); - //cerr << seg << " w=" << w << endl; - ss.add(w); - sss.push_back(seg); - b.decrement(x,y,seg); - } - int seg = sss[rng.SelectSample(ss)]; - b.increment(x, y, seg); - //cerr << "Selected: " << seg << endl; - //return 1; - segs[i] = seg; - } - tl += b.log_likelihood(); - } - cerr << "LLH=" << tl / ITERS << endl; -} - diff --git a/gi/pf/os_phrase.h b/gi/pf/os_phrase.h deleted file mode 100644 index dfe40cb1..00000000 --- a/gi/pf/os_phrase.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _OS_PHRASE_H_ -#define _OS_PHRASE_H_ - -#include -#include -#include "tdict.h" - -inline std::ostream& operator<<(std::ostream& os, const std::vector& p) { - os << '['; - for (int i = 0; i < p.size(); ++i) - os << (i==0 ? "" : " ") << TD::Convert(p[i]); - return os << ']'; -} - -#endif diff --git a/gi/pf/pf.h b/gi/pf/pf.h deleted file mode 100644 index ede7cda8..00000000 --- a/gi/pf/pf.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef _PF_H_ -#define _PF_H_ - -#include -#include -#include "sampler.h" -#include "prob.h" - -template -struct ParticleRenormalizer { - void operator()(std::vector* pv) const { - if (pv->empty()) return; - prob_t z = prob_t::Zero(); - for (unsigned i = 0; i < pv->size(); ++i) - z += (*pv)[i].weight; - assert(z > prob_t::Zero()); - for (unsigned i = 0; i < pv->size(); ++i) - (*pv)[i].weight /= z; - } -}; - -template -struct MultinomialResampleFilter { - explicit MultinomialResampleFilter(MT19937* rng) : rng_(rng) {} - - void operator()(std::vector* pv) { - if (pv->empty()) return; - std::vector& ps = *pv; - SampleSet ss; - for (int i = 0; i < ps.size(); ++i) - ss.add(ps[i].weight); - std::vector nps; nps.reserve(ps.size()); - const prob_t uniform_weight(1.0 / ps.size()); - for (int i = 0; i < ps.size(); ++i) { - nps.push_back(ps[rng_->SelectSample(ss)]); - nps[i].weight = uniform_weight; - } - nps.swap(ps); - } - - private: - MT19937* rng_; -}; - -template -struct SystematicResampleFilter { - explicit SystematicResampleFilter(MT19937* rng) : rng_(rng), renorm_() {} - - void operator()(std::vector* pv) { - if (pv->empty()) return; - renorm_(pv); - std::vector& ps = *pv; - std::vector nps; nps.reserve(ps.size()); - double lower = 0, upper = 0; - const double skip = 1.0 / ps.size(); - double u_j = rng_->next() * skip; - //std::cerr << "u_0: " << u_j << std::endl; - int j = 0; - for (unsigned i = 0; i < ps.size(); ++i) { - upper += ps[i].weight.as_float(); - //std::cerr << "lower: " << lower << " upper: " << upper << std::endl; - // how many children does ps[i] have? - while (u_j < lower) { u_j += skip; ++j; } - while (u_j >= lower && u_j <= upper) { - assert(j < ps.size()); - nps.push_back(ps[i]); - u_j += skip; - //std::cerr << " add u_j=" << u_j << std::endl; - ++j; - } - lower = upper; - } - //std::cerr << ps.size() << " " << nps.size() << "\n"; - assert(ps.size() == nps.size()); - //exit(1); - ps.swap(nps); - } - - private: - MT19937* rng_; - ParticleRenormalizer renorm_; -}; - -#endif diff --git a/gi/pf/pf_test.cc b/gi/pf/pf_test.cc deleted file mode 100644 index 296e7285..00000000 --- a/gi/pf/pf_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -#include "ccrp.h" - -#include -#include - -#include "tdict.h" -#include "transliterations.h" - -using namespace std; - -MT19937 rng; - -static bool verbose = false; - -struct Model { - - Model() : bp(), base(0.2, 0.6) , ccrps(5, CCRP(0.8, 0.5)) {} - - double p0(int x) const { - assert(x > 0); - assert(x < 5); - return 1.0/4.0; - } - - double llh() const { - double lh = bp + base.log_crp_prob(); - for (int ctx = 1; ctx < 5; ++ctx) - lh += ccrps[ctx].log_crp_prob(); - return lh; - } - - double prob(int ctx, int x) const { - assert(ctx > 0 && ctx < 5); - return ccrps[ctx].prob(x, base.prob(x, p0(x))); - } - - void increment(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].increment(x, base.prob(x, p0(x)), &rng)) { - if (base.increment(x, p0(x), &rng)) { - bp += log(1.0 / 4.0); - } - } - } - - // this is just a biased estimate - double est_base_prob(int x) { - return (x + 1) * x / 40.0; - } - - void increment_is(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - SampleSet ss; - const int PARTICLES = 25; - vector > s1s(PARTICLES, CCRP(0.5,0.5)); - vector > sbs(PARTICLES, CCRP(0.5,0.5)); - vector sp0s(PARTICLES); - - CCRP s1 = ccrps[ctx]; - CCRP sb = base; - double sp0 = bp; - for (int pp = 0; pp < PARTICLES; ++pp) { - if (pp > 0) { - ccrps[ctx] = s1; - base = sb; - bp = sp0; - } - - double q = 1; - double gamma = 1; - double est_p = est_base_prob(x); - //base.prob(x, p0(x)) + rng.next() * 0.1; - if (ccrps[ctx].increment(x, est_p, &rng, &q)) { - gamma = q * base.prob(x, p0(x)); - q *= est_p; - if (verbose) cerr << "(DP-base draw) "; - double qq = -1; - if (base.increment(x, p0(x), &rng, &qq)) { - if (verbose) cerr << "(G0 draw) "; - bp += log(p0(x)); - qq *= p0(x); - } - } else { gamma = q; } - double w = gamma / q; - if (verbose) - cerr << "gamma=" << gamma << " q=" << q << "\tw=" << w << endl; - ss.add(w); - s1s[pp] = ccrps[ctx]; - sbs[pp] = base; - sp0s[pp] = bp; - } - int ps = rng.SelectSample(ss); - ccrps[ctx] = s1s[ps]; - base = sbs[ps]; - bp = sp0s[ps]; - if (verbose) { - cerr << "SELECTED: " << ps << endl; - static int cc = 0; cc++; if (cc ==10) exit(1); - } - } - - void decrement(int ctx, int x) { - assert(ctx > 0 && ctx < 5); - if (ccrps[ctx].decrement(x, &rng)) { - if (base.decrement(x, &rng)) { - bp -= log(p0(x)); - } - } - } - - double bp; - CCRP base; - vector > ccrps; - -}; - -int main(int argc, char** argv) { - if (argc > 1) { verbose = true; } - vector counts(15, 0); - vector tcounts(15, 0); - int points[] = {1,2, 2,2, 3,2, 4,1, 3, 4, 3, 3, 2, 3, 4, 1, 4, 1, 3, 2, 1, 3, 1, 4, 0, 0}; - double tlh = 0; - double tt = 0; - for (int n = 0; n < 1000; ++n) { - if (n % 10 == 0) cerr << '.'; - if ((n+1) % 400 == 0) cerr << " [" << (n+1) << "]\n"; - Model m; - for (int *x = points; *x; x += 2) - m.increment(x[0], x[1]); - - for (int j = 0; j < 24; ++j) { - for (int *x = points; *x; x += 2) { - if (rng.next() < 0.8) { - m.decrement(x[0], x[1]); - m.increment_is(x[0], x[1]); - } - } - } - counts[m.base.num_customers()]++; - tcounts[m.base.num_tables()]++; - tlh += m.llh(); - tt += 1.0; - } - cerr << "mean LLH = " << (tlh / tt) << endl; - for (int i = 0; i < 15; ++i) - cerr << i << ": " << (counts[i] / tt) << "\t" << (tcounts[i] / tt) << endl; -} - diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc deleted file mode 100644 index 832f22cf..00000000 --- a/gi/pf/pfbrat.cc +++ /dev/null @@ -1,543 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "cfg_wfst_composer.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -static unsigned kMAX_SRC_PHRASE; -static unsigned kMAX_TRG_PHRASE; -struct FSTState; - -double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - -struct ConditionalBase { - explicit ConditionalBase(const double m1mixture, const unsigned vocab_e_size, const string& model1fname) : - kM1MIXTURE(m1mixture), - kUNIFORM_MIXTURE(1.0 - m1mixture), - kUNIFORM_TARGET(1.0 / vocab_e_size), - kNULL(TD::Convert("")) { - assert(m1mixture >= 0.0 && m1mixture <= 1.0); - assert(vocab_e_size > 0); - LoadModel1(model1fname); - } - - void LoadModel1(const string& fname) { - cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - int cur = 0; - int start = 0; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - const WordID src = TD::Convert(&line[0]); - ++cur; - start = cur; - while(cur < line.size() && line[cur] != ' ') { ++cur; } - assert(cur != line.size()); - line[cur] = 0; - WordID trg = TD::Convert(&line[start]); - const double logprob = strtod(&line[cur + 1], NULL); - if (src >= ttable.size()) ttable.resize(src + 1); - ttable[src][trg].logeq(logprob); - } - cerr << " read " << lc << " parameters.\n"; - } - - // return logp0 of rule.e_ | rule.f_ - prob_t operator()(const TRule& rule) const { - const int flen = rule.f_.size(); - const int elen = rule.e_.size(); - prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); - prob_t p; - p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01) - for (int i = 0; i < elen; ++i) { // for each position i in e-RHS - const WordID trg = rule.e_[i]; - prob_t tp = prob_t::Zero(); - for (int j = -1; j < flen; ++j) { - const WordID src = j < 0 ? kNULL : rule.f_[j]; - const map::const_iterator it = ttable[src].find(trg); - if (it != ttable[src].end()) { - tp += kM1MIXTURE * it->second; - } - tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; - } - tp *= uniform_src_alignment; // draw a_i ~uniform - p *= tp; // draw e_i ~Model1(f_a_i) / uniform - } - return p; - } - - const prob_t kM1MIXTURE; // Model 1 mixture component - const prob_t kUNIFORM_MIXTURE; // uniform mixture component - const prob_t kUNIFORM_TARGET; - const WordID kNULL; - vector > ttable; -}; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(3),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(3),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -struct UniphraseLM { - UniphraseLM(const vector >& corpus, - const set& vocab, - const po::variables_map& conf) : - phrases_(1,1), - gen_(1,1), - corpus_(corpus), - uniform_word_(1.0 / vocab.size()), - gen_p0_(0.5), - p_end_(0.5), - use_poisson_(conf.count("poisson_length") > 0) {} - - void ResampleHyperparameters(MT19937* rng) { - phrases_.resample_hyperparameters(rng); - gen_.resample_hyperparameters(rng); - cerr << " " << phrases_.alpha(); - } - - CCRP_NoTable > phrases_; - CCRP_NoTable gen_; - vector > z_; // z_[i] is there a phrase boundary after the ith word - const vector >& corpus_; - const double uniform_word_; - const double gen_p0_; - const double p_end_; // in base length distribution, p of the end of a phrase - const bool use_poisson_; -}; - -struct Reachability { - boost::multi_array edges; // edges[src_covered][trg_covered][x][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; - }; - - struct NState { - NState() : next_src_covered(), next_trg_covered() {} - NState(int i, int j) : next_src_covered(i), next_trg_covered(j) {} - int next_src_covered; - int next_trg_covered; - }; - - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - cerr << "Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - assert(a[srclen][trglen].size() > 0); - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); -// typedef boost::multi_array, 2> narray_type; -// narray_type b(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } -// const NState nstate(i,j); - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; -// b[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(nstate); - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - cerr << " MAX SRC DELTA[0][0] = " << max_src_delta[0][0] << endl; - assert(max_src_delta[0][0] > 0); - //cerr << "First cell contains " << b[0][0].size() << " forward pointers\n"; - //for (int i = 0; i < b[0][0].size(); ++i) { - // cerr << " -> (" << b[0][0][i].next_src_covered << "," << b[0][0][i].next_trg_covered << ")\n"; - //} - } -}; - -ostream& operator<<(ostream& os, const FSTState& q); -struct FSTState { - explicit FSTState(int src_size) : - trg_covered_(), - src_covered_(), - src_coverage_(src_size) {} - - FSTState(short trg_covered, short src_covered, const vector& src_coverage, const vector& src_prefix) : - trg_covered_(trg_covered), - src_covered_(src_covered), - src_coverage_(src_coverage), - src_prefix_(src_prefix) { - if (src_coverage_.size() == src_covered) { - assert(src_prefix.size() == 0); - } - } - - // if we extend by the word at src_position, what are - // the next states that are reachable and lie on a valid - // path to the final state? - vector Extensions(int src_position, int src_len, int trg_len, const Reachability& r) const { - assert(src_position < src_coverage_.size()); - if (src_coverage_[src_position]) { - cerr << "Trying to extend " << *this << " with position " << src_position << endl; - abort(); - } - vector ncvg = src_coverage_; - ncvg[src_position] = true; - - vector res; - const int trg_remaining = trg_len - trg_covered_; - if (trg_remaining <= 0) { - cerr << "Target appears to have been covered: " << *this << " (trg_len=" << trg_len << ",trg_covered=" << trg_covered_ << ")" << endl; - abort(); - } - const int src_remaining = src_len - src_covered_; - if (src_remaining <= 0) { - cerr << "Source appears to have been covered: " << *this << endl; - abort(); - } - - for (int tc = 1; tc <= kMAX_TRG_PHRASE; ++tc) { - if (r.edges[src_covered_][trg_covered_][src_prefix_.size() + 1][tc]) { - int nc = src_prefix_.size() + 1 + src_covered_; - res.push_back(FSTState(trg_covered_ + tc, nc, ncvg, vector())); - } - } - - if ((src_prefix_.size() + 1) < r.max_src_delta[src_covered_][trg_covered_]) { - vector nsp = src_prefix_; - nsp.push_back(src_position); - res.push_back(FSTState(trg_covered_, src_covered_, ncvg, nsp)); - } - - if (res.size() == 0) { - cerr << *this << " can't be extended!\n"; - abort(); - } - return res; - } - - short trg_covered_, src_covered_; - vector src_coverage_; - vector src_prefix_; -}; -bool operator<(const FSTState& q, const FSTState& r) { - if (q.trg_covered_ != r.trg_covered_) return q.trg_covered_ < r.trg_covered_; - if (q.src_covered_!= r.src_covered_) return q.src_covered_ < r.src_covered_; - if (q.src_coverage_ != r.src_coverage_) return q.src_coverage_ < r.src_coverage_; - return q.src_prefix_ < r.src_prefix_; -} - -ostream& operator<<(ostream& os, const FSTState& q) { - os << "[" << q.trg_covered_ << " : "; - for (int i = 0; i < q.src_coverage_.size(); ++i) - os << q.src_coverage_[i]; - os << " : <"; - for (int i = 0; i < q.src_prefix_.size(); ++i) { - if (i != 0) os << ' '; - os << q.src_prefix_[i]; - } - return os << ">]"; -} - -struct MyModel { - MyModel(ConditionalBase& rcp0) : rp0(rcp0) {} - typedef unordered_map, CCRP_NoTable, boost::hash > > SrcToRuleCRPMap; - - void DecrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - it->second.decrement(rule); - if (it->second.num_customers() == 0) rules.erase(it); - } - - void IncrementRule(const TRule& rule) { - SrcToRuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) { - CCRP_NoTable crp(1,1); - it = rules.insert(make_pair(rule.f_, crp)).first; - } - it->second.increment(rule); - } - - // conditioned on rule.f_ - prob_t RuleConditionalProbability(const TRule& rule) const { - const prob_t base = rp0(rule); - SrcToRuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) { - return base; - } else { - const double lp = it->second.logprob(rule, log(base)); - prob_t q; q.logeq(lp); - return q; - } - } - - const ConditionalBase& rp0; - SrcToRuleCRPMap rules; -}; - -struct MyFST : public WFST { - MyFST(const vector& ssrc, const vector& strg, MyModel* m) : - src(ssrc), trg(strg), - r(src.size(),trg.size(),kMAX_SRC_PHRASE, kMAX_TRG_PHRASE), - model(m) { - FSTState in(src.size()); - cerr << " INIT: " << in << endl; - init = GetNode(in); - for (int i = 0; i < in.src_coverage_.size(); ++i) in.src_coverage_[i] = true; - in.src_covered_ = src.size(); - in.trg_covered_ = trg.size(); - cerr << "FINAL: " << in << endl; - final = GetNode(in); - } - virtual const WFSTNode* Final() const; - virtual const WFSTNode* Initial() const; - - const WFSTNode* GetNode(const FSTState& q); - map > m; - const vector& src; - const vector& trg; - Reachability r; - const WFSTNode* init; - const WFSTNode* final; - MyModel* model; -}; - -struct MyNode : public WFSTNode { - MyNode(const FSTState& q, MyFST* fst) : state(q), container(fst) {} - virtual vector > ExtendInput(unsigned srcindex) const; - const FSTState state; - mutable MyFST* container; -}; - -vector > MyNode::ExtendInput(unsigned srcindex) const { - cerr << "EXTEND " << state << " with " << srcindex << endl; - vector ext = state.Extensions(srcindex, container->src.size(), container->trg.size(), container->r); - vector > res(ext.size()); - for (unsigned i = 0; i < ext.size(); ++i) { - res[i].first = container->GetNode(ext[i]); - if (ext[i].src_prefix_.size() == 0) { - const unsigned trg_from = state.trg_covered_; - const unsigned trg_to = ext[i].trg_covered_; - const unsigned prev_prfx_size = state.src_prefix_.size(); - res[i].second.reset(new TRule); - res[i].second->lhs_ = -TD::Convert("X"); - vector& src = res[i].second->f_; - vector& trg = res[i].second->e_; - src.resize(prev_prfx_size + 1); - for (unsigned j = 0; j < prev_prfx_size; ++j) - src[j] = container->src[state.src_prefix_[j]]; - src[prev_prfx_size] = container->src[srcindex]; - for (unsigned j = trg_from; j < trg_to; ++j) - trg.push_back(container->trg[j]); - res[i].second->scores_.set_value(FD::Convert("Proposal"), log(container->model->RuleConditionalProbability(*res[i].second))); - } - } - return res; -} - -const WFSTNode* MyFST::GetNode(const FSTState& q) { - boost::shared_ptr& res = m[q]; - if (!res) { - res.reset(new MyNode(q, this)); - } - return &*res; -} - -const WFSTNode* MyFST::Final() const { - return final; -} - -const WFSTNode* MyFST::Initial() const { - return init; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - boost::shared_ptr prng; - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "f-Corpus size: " << corpusf.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabf.size() << " types\n"; - cerr << "f-Corpus size: " << corpuse.size() << " sentences\n"; - cerr << "f-Vocabulary size: " << vocabe.size() << " types\n"; - assert(corpusf.size() == corpuse.size()); - - ConditionalBase lp0(conf["model1_interpolation_weight"].as(), - vocabe.size(), - conf["model1"].as()); - MyModel m(lp0); - - TRule x("[X] ||| kAnwntR myN ||| at the convent ||| 0"); - m.IncrementRule(x); - TRule y("[X] ||| nY dyN ||| gave ||| 0"); - m.IncrementRule(y); - - - MyFST fst(corpusf[0], corpuse[0], &m); - ifstream in("./kimura.g"); - assert(in); - CFG_WFSTComposer comp(fst); - Hypergraph hg; - bool succeed = comp.Compose(&in, &hg); - hg.PrintGraphviz(); - if (succeed) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } - -#if 0 - ifstream in2("./amnabooks.g"); - assert(in2); - MyFST fst2(corpusf[1], corpuse[1], &m); - CFG_WFSTComposer comp2(fst2); - Hypergraph hg2; - bool succeed2 = comp2.Compose(&in2, &hg2); - if (succeed2) { cerr << "SUCCESS.\n"; } else { cerr << "FAILURE REPORTED.\n"; } -#endif - - SparseVector w; w.set_value(FD::Convert("Proposal"), 1.0); - hg.Reweight(w); - cerr << ViterbiFTree(hg) << endl; - return 0; -} - diff --git a/gi/pf/pfdist.cc b/gi/pf/pfdist.cc deleted file mode 100644 index a3e46064..00000000 --- a/gi/pf/pfdist.cc +++ /dev/null @@ -1,598 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "pf.h" -#include "base_distributions.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(30),"Number of particles") - ("filter_frequency,f",po::value()->default_value(5),"Number of time steps between filterings") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { - MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - prob_t srcp0(const vector& src) const { - prob_t p(1.0 / 3000.0); - p.poweq(src.size()); - prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); - p *= lenp; - return p; - } - - void DecrementRule(const TRule& rule) { - const RuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - if (it->second.decrement(rule)) { - base /= (*rp0)(rule); - if (it->second.num_customers() == 0) - rules.erase(it); - } - if (src_phrases.decrement(rule.f_)) - base /= srcp0(rule.f_); - } - - void IncrementRule(const TRule& rule) { - RuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) - it = rules.insert(make_pair(rule.f_, CCRP_NoTable(1,1))).first; - if (it->second.increment(rule)) { - base *= (*rp0)(rule); - } - if (src_phrases.increment(rule.f_)) - base *= srcp0(rule.f_); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - const prob_t p0 = (*rp0)(rule); - prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); - const RuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) return srcp * p0; - const double lp = it->second.logprob(rule, log(p0)); - prob_t q; q.logeq(lp); - return q * srcp; - } - - prob_t Likelihood() const { - prob_t p = base; - for (RuleCRPMap::const_iterator it = rules.begin(); - it != rules.end(); ++it) { - prob_t cl; cl.logeq(it->second.log_crp_prob()); - p *= cl; - } - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseConditionalBase* rp0; - prob_t base; - typedef unordered_map, CCRP_NoTable, boost::hash > > RuleCRPMap; - RuleCRPMap rules; - CCRP_NoTable > src_phrases; - vector > src_jumps; -}; - -#endif - -struct MyJointModel { - MyJointModel(PhraseJointBase& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseJointBase& rp0; - prob_t base; - CCRP_NoTable rules; - vector > src_jumps; -}; - -struct BackwardEstimate { - BackwardEstimate(const Model1& m1, const vector& src, const vector& trg) : - model1_(m1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - r.push_back(0); // NULL word - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - } - return e; - } - const Model1& model1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} - prob_t weight; - prob_t gamma_last; - vector src_jumps; - vector rules; - vector src_cv; - int src_cov; - int trg_cov; - int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - const unsigned rejuv_freq = conf["filter_frequency"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - -#if 0 - PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - MyConditionalModel m(lp0); -#else - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MyJointModel m(lp0); -#endif - - MultinomialResampleFilter filter(&rng); - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRules(ps[ci].rules); - m.DecrementJumps(ps[ci].src_jumps, src.size()); - - //BackwardEstimate be(m1, src, trg); - BackwardEstimateSym be(m1, invm1, src, trg); - const Reachability& r = reaches[ci]; - vector lps(particles); - - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - p.src_cv.resize(src.size(), false); - } - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - filter(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - int first_uncovered = src.size(); - int last_uncovered = -1; - for (int i = 0; i < src.size(); ++i) { - const bool is_uncovered = !p.src_cv[i]; - if (i < first_uncovered && is_uncovered) first_uncovered = i; - if (is_uncovered && i > last_uncovered) last_uncovered = i; - } - assert(last_uncovered > -1); - assert(first_uncovered < src.size()); - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - const int last_possible_start = last_uncovered - src_len + 1; - assert(last_possible_start >= 0); - //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; - //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl; - for (int i = first_uncovered; i <= last_possible_start; ++i) { - if (p.src_cv[i]) continue; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - int gap_add = 0; - bool bad = false; - prob_t jp = prob_t::One(); - int prev_pos = p.prev_pos; - for (int j = 0; j < src_len; ++j) { - if ((j + i + gap_add) == src.size()) { bad = true; break; } - while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } - if ((j + i + gap_add) == src.size()) { bad = true; break; } - np.src_cv[i + j + gap_add] = true; - x.f_.push_back(src[i + j + gap_add]); - jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); - int jump = i + j + gap_add - prev_pos; - assert(jump != 0); - np.src_jumps.push_back(jump); - prev_pos = i + j + gap_add; - } - if (bad) continue; - np.prev_pos = prev_pos; - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - if (x.f_.size() != src_len) continue; - prob_t rp = m.RuleProbability(x); - np.gamma_last = rp * jp; - const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - - const bool completed = (p.trg_cov == trg.size()); - if (completed) { - int last_jump = src.size() - p.prev_pos; - assert(last_jump > 0); - p.src_jumps.push_back(last_jump); - p.weight *= m.JumpProbability(last_jump, src.size()); - } - } - } - } - cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRules(lps[sampled].rules); - m.IncrementJumps(lps[sampled].src_jumps, src.size()); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - for (int sni = 0; sni < 5; ++sni) { - for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } - } - } - return 0; -} - diff --git a/gi/pf/pfdist.new.cc b/gi/pf/pfdist.new.cc deleted file mode 100644 index 3169eb75..00000000 --- a/gi/pf/pfdist.new.cc +++ /dev/null @@ -1,620 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "base_measures.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -shared_ptr prng; - -size_t hash_value(const TRule& r) { - size_t h = boost::hash_value(r.e_); - boost::hash_combine(h, -r.lhs_); - boost::hash_combine(h, boost::hash_value(r.f_)); - return h; -} - -bool operator==(const TRule& a, const TRule& b) { - return (a.lhs_ == b.lhs_ && a.e_ == b.e_ && a.f_ == b.f_); -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(25),"Number of particles") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ReadParallelCorpus(const string& filename, - vector >* f, - vector >* e, - set* vocab_f, - set* vocab_e) { - f->clear(); - e->clear(); - vocab_f->clear(); - vocab_e->clear(); - istream* in; - if (filename == "-") - in = &cin; - else - in = new ifstream(filename.c_str()); - assert(*in); - string line; - const WordID kDIV = TD::Convert("|||"); - vector tmp; - while(*in) { - getline(*in, line); - if (line.empty() && !*in) break; - e->push_back(vector()); - f->push_back(vector()); - vector& le = e->back(); - vector& lf = f->back(); - tmp.clear(); - TD::ConvertSentence(line, &tmp); - bool isf = true; - for (unsigned i = 0; i < tmp.size(); ++i) { - const int cur = tmp[i]; - if (isf) { - if (kDIV == cur) { isf = false; } else { - lf.push_back(cur); - vocab_f->insert(cur); - } - } else { - assert(cur != kDIV); - le.push_back(cur); - vocab_e->insert(cur); - } - } - assert(isf == false); - } - if (in != &cin) delete in; -} - -#if 0 -struct MyConditionalModel { - MyConditionalModel(PhraseConditionalBase& rcp0) : rp0(&rcp0), base(prob_t::One()), src_phrases(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - prob_t srcp0(const vector& src) const { - prob_t p(1.0 / 3000.0); - p.poweq(src.size()); - prob_t lenp; lenp.logeq(log_poisson(src.size(), 1.0)); - p *= lenp; - return p; - } - - void DecrementRule(const TRule& rule) { - const RuleCRPMap::iterator it = rules.find(rule.f_); - assert(it != rules.end()); - if (it->second.decrement(rule)) { - base /= (*rp0)(rule); - if (it->second.num_customers() == 0) - rules.erase(it); - } - if (src_phrases.decrement(rule.f_)) - base /= srcp0(rule.f_); - } - - void IncrementRule(const TRule& rule) { - RuleCRPMap::iterator it = rules.find(rule.f_); - if (it == rules.end()) - it = rules.insert(make_pair(rule.f_, CCRP_NoTable(1,1))).first; - if (it->second.increment(rule)) { - base *= (*rp0)(rule); - } - if (src_phrases.increment(rule.f_)) - base *= srcp0(rule.f_); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - const prob_t p0 = (*rp0)(rule); - prob_t srcp; srcp.logeq(src_phrases.logprob(rule.f_, log(srcp0(rule.f_)))); - const RuleCRPMap::const_iterator it = rules.find(rule.f_); - if (it == rules.end()) return srcp * p0; - const double lp = it->second.logprob(rule, log(p0)); - prob_t q; q.logeq(lp); - return q * srcp; - } - - prob_t Likelihood() const { - prob_t p = base; - for (RuleCRPMap::const_iterator it = rules.begin(); - it != rules.end(); ++it) { - prob_t cl; cl.logeq(it->second.log_crp_prob()); - p *= cl; - } - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseConditionalBase* rp0; - prob_t base; - typedef unordered_map, CCRP_NoTable, boost::hash > > RuleCRPMap; - RuleCRPMap rules; - CCRP_NoTable > src_phrases; - vector > src_jumps; -}; - -#endif - -struct MyJointModel { - MyJointModel(PhraseJointBase& rcp0) : - rp0(rcp0), base(prob_t::One()), rules(1,1), src_jumps(200, CCRP_NoTable(1,1)) {} - - void DecrementRule(const TRule& rule) { - if (rules.decrement(rule)) - base /= rp0(rule); - } - - void IncrementRule(const TRule& rule) { - if (rules.increment(rule)) - base *= rp0(rule); - } - - void IncrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - void IncrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].increment(dist)) - base *= jp0(dist, src_len); - } - - void DecrementJump(int dist, unsigned src_len) { - assert(src_len > 0); - if (src_jumps[src_len].decrement(dist)) - base /= jp0(dist, src_len); - } - - void IncrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - IncrementJump(js[i], src_len); - } - - void DecrementJumps(const vector& js, unsigned src_len) { - for (unsigned i = 0; i < js.size(); ++i) - DecrementJump(js[i], src_len); - } - - // p(jump = dist | src_len , z) - prob_t JumpProbability(int dist, unsigned src_len) { - const prob_t p0 = jp0(dist, src_len); - const double lp = src_jumps[src_len].logprob(dist, log(p0)); - prob_t q; q.logeq(lp); - return q; - } - - // p(rule.f_ | z) * p(rule.e_ | rule.f_ , z) - prob_t RuleProbability(const TRule& rule) const { - prob_t p; p.logeq(rules.logprob(rule, log(rp0(rule)))); - return p; - } - - prob_t Likelihood() const { - prob_t p = base; - prob_t q; q.logeq(rules.log_crp_prob()); - p *= q; - for (unsigned l = 1; l < src_jumps.size(); ++l) { - if (src_jumps[l].num_customers() > 0) { - prob_t q; - q.logeq(src_jumps[l].log_crp_prob()); - p *= q; - } - } - return p; - } - - JumpBase jp0; - const PhraseJointBase& rp0; - prob_t base; - CCRP_NoTable rules; - vector > src_jumps; -}; - -struct BackwardEstimate { - BackwardEstimate(const Model1& m1, const vector& src, const vector& trg) : - model1_(m1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - r.push_back(0); // NULL word - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - } - return e; - } - const Model1& model1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(const vector& src_cov, unsigned trg_cov) const { - assert(src_.size() == src_cov.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = 0; i < src_cov.size(); ++i) - if (!src_cov[i]) r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map, map, boost::hash > > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov(), prev_pos(-1) {} - prob_t weight; - prob_t gamma_last; - vector src_jumps; - vector rules; - vector src_cv; - int src_cov; - int trg_cov; - int prev_pos; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[cv=" << p.src_cv << " src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " last_pos=" << p.prev_pos << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -void FilterCrapParticlesAndReweight(vector* pps) { - vector& ps = *pps; - SampleSet ss; - for (int i = 0; i < ps.size(); ++i) - ss.add(ps[i].weight); - vector nps; nps.reserve(ps.size()); - const prob_t uniform_weight(1.0 / ps.size()); - for (int i = 0; i < ps.size(); ++i) { - nps.push_back(ps[prng->SelectSample(ss)]); - nps[i].weight = uniform_weight; - } - nps.swap(ps); -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - -#if 0 - PhraseConditionalBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size()); - MyConditionalModel m(lp0); -#else - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MyJointModel m(lp0); -#endif - - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRules(ps[ci].rules); - m.DecrementJumps(ps[ci].src_jumps, src.size()); - - //BackwardEstimate be(m1, src, trg); - BackwardEstimateSym be(m1, invm1, src, trg); - const Reachability& r = reaches[ci]; - vector lps(particles); - - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - p.src_cv.resize(src.size(), false); - } - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - FilterCrapParticlesAndReweight(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - const int rejuv_freq = 1; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - int first_uncovered = src.size(); - int last_uncovered = -1; - for (int i = 0; i < src.size(); ++i) { - const bool is_uncovered = !p.src_cv[i]; - if (i < first_uncovered && is_uncovered) first_uncovered = i; - if (is_uncovered && i > last_uncovered) last_uncovered = i; - } - assert(last_uncovered > -1); - assert(first_uncovered < src.size()); - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - const int last_possible_start = last_uncovered - src_len + 1; - assert(last_possible_start >= 0); - //cerr << src_len << "," << trg_len << " is allowed. E=" << TD::GetString(x.e_) << endl; - //cerr << " first_uncovered=" << first_uncovered << " last_possible_start=" << last_possible_start << endl; - for (int i = first_uncovered; i <= last_possible_start; ++i) { - if (p.src_cv[i]) continue; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - int gap_add = 0; - bool bad = false; - prob_t jp = prob_t::One(); - int prev_pos = p.prev_pos; - for (int j = 0; j < src_len; ++j) { - if ((j + i + gap_add) == src.size()) { bad = true; break; } - while ((i+j+gap_add) < src.size() && p.src_cv[i + j + gap_add]) { ++gap_add; } - if ((j + i + gap_add) == src.size()) { bad = true; break; } - np.src_cv[i + j + gap_add] = true; - x.f_.push_back(src[i + j + gap_add]); - jp *= m.JumpProbability(i + j + gap_add - prev_pos, src.size()); - int jump = i + j + gap_add - prev_pos; - assert(jump != 0); - np.src_jumps.push_back(jump); - prev_pos = i + j + gap_add; - } - if (bad) continue; - np.prev_pos = prev_pos; - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - if (x.f_.size() != src_len) continue; - prob_t rp = m.RuleProbability(x); - np.gamma_last = rp * jp; - const prob_t u = pow(np.gamma_last * be(np.src_cv, np.trg_cov), 0.2); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << " jump=" << jp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - - const bool completed = (p.trg_cov == trg.size()); - if (completed) { - int last_jump = src.size() - p.prev_pos; - assert(last_jump > 0); - p.src_jumps.push_back(last_jump); - p.weight *= m.JumpProbability(last_jump, src.size()); - } - } - } - } - cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRules(lps[sampled].rules); - m.IncrementJumps(lps[sampled].src_jumps, src.size()); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - for (int sni = 0; sni < 5; ++sni) { - for (int i = 0; i < ps[sni].rules.size(); ++i) { cerr << "\t" << ps[sni].rules[i]->AsString() << endl; } - } - } - return 0; -} - diff --git a/gi/pf/pfnaive.cc b/gi/pf/pfnaive.cc deleted file mode 100644 index 958ec4e2..00000000 --- a/gi/pf/pfnaive.cc +++ /dev/null @@ -1,284 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "pf.h" -#include "base_distributions.h" -#include "monotonic_pseg.h" -#include "reachability.h" -#include "viterbi.h" -#include "hg.h" -#include "trule.h" -#include "tdict.h" -#include "filelib.h" -#include "dict.h" -#include "sampler.h" -#include "ccrp_nt.h" -#include "ccrp_onetable.h" -#include "corpus.h" - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,s",po::value()->default_value(1000),"Number of samples") - ("particles,p",po::value()->default_value(30),"Number of particles") - ("filter_frequency,f",po::value()->default_value(5),"Number of time steps between filterings") - ("input,i",po::value(),"Read parallel data from") - ("max_src_phrase",po::value()->default_value(5),"Maximum length of source language phrases") - ("max_trg_phrase",po::value()->default_value(5),"Maximum length of target language phrases") - ("model1,m",po::value(),"Model 1 parameters (used in base distribution)") - ("inverse_model1,M",po::value(),"Inverse Model 1 parameters (used in backward estimate)") - ("model1_interpolation_weight",po::value()->default_value(0.95),"Mixing proportion of model 1 with uniform target distribution") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("input") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct BackwardEstimateSym { - BackwardEstimateSym(const Model1& m1, - const Model1& invm1, const vector& src, const vector& trg) : - model1_(m1), invmodel1_(invm1), src_(src), trg_(trg) { - } - const prob_t& operator()(unsigned src_cov, unsigned trg_cov) const { - assert(src_cov <= src_.size()); - assert(trg_cov <= trg_.size()); - prob_t& e = cache_[src_cov][trg_cov]; - if (e.is_0()) { - if (trg_cov == trg_.size()) { e = prob_t::One(); return e; } - vector r(src_.size() + 1); r.clear(); - for (int i = src_cov; i < src_.size(); ++i) - r.push_back(src_[i]); - r.push_back(0); // NULL word - const prob_t uniform_alignment(1.0 / r.size()); - e.logeq(Md::log_poisson(trg_.size() - trg_cov, r.size() - 1)); // p(trg len remaining | src len remaining) - for (unsigned j = trg_cov; j < trg_.size(); ++j) { - prob_t p; - for (unsigned i = 0; i < r.size(); ++i) - p += model1_(r[i], trg_[j]); - if (p.is_0()) { - cerr << "ERROR: p(" << TD::Convert(trg_[j]) << " | " << TD::GetString(r) << ") = 0!\n"; - abort(); - } - p *= uniform_alignment; - e *= p; - } - r.pop_back(); - const prob_t inv_uniform(1.0 / (trg_.size() - trg_cov + 1.0)); - prob_t inv; - inv.logeq(Md::log_poisson(r.size(), trg_.size() - trg_cov)); - for (unsigned i = 0; i < r.size(); ++i) { - prob_t p; - for (unsigned j = trg_cov - 1; j < trg_.size(); ++j) - p += invmodel1_(j < trg_cov ? 0 : trg_[j], r[i]); - if (p.is_0()) { - cerr << "ERROR: p_inv(" << TD::Convert(r[i]) << " | " << TD::GetString(trg_) << ") = 0!\n"; - abort(); - } - p *= inv_uniform; - inv *= p; - } - prob_t x = pow(e * inv, 0.5); - e = x; - //cerr << "Forward: " << log(e) << "\tBackward: " << log(inv) << "\t prop: " << log(x) << endl; - } - return e; - } - const Model1& model1_; - const Model1& invmodel1_; - const vector& src_; - const vector& trg_; - mutable unordered_map > cache_; -}; - -struct Particle { - Particle() : weight(prob_t::One()), src_cov(), trg_cov() {} - prob_t weight; - prob_t gamma_last; - vector rules; - int src_cov; - int trg_cov; -}; - -ostream& operator<<(ostream& o, const vector& v) { - for (int i = 0; i < v.size(); ++i) - o << (v[i] ? '1' : '0'); - return o; -} -ostream& operator<<(ostream& o, const Particle& p) { - o << "[src_cov=" << p.src_cov << " trg_cov=" << p.trg_cov << " num_rules=" << p.rules.size() << " w=" << log(p.weight) << ']'; - return o; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const unsigned kMAX_TRG_PHRASE = conf["max_trg_phrase"].as(); - const unsigned kMAX_SRC_PHRASE = conf["max_src_phrase"].as(); - const unsigned particles = conf["particles"].as(); - const unsigned samples = conf["samples"].as(); - const unsigned rejuv_freq = conf["filter_frequency"].as(); - - if (!conf.count("model1")) { - cerr << argv[0] << "Please use --model1 to specify model 1 parameters\n"; - return 1; - } - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - - vector > corpuse, corpusf; - set vocabe, vocabf; - cerr << "Reading corpus...\n"; - corpus::ReadParallelCorpus(conf["input"].as(), &corpusf, &corpuse, &vocabf, &vocabe); - cerr << "F-corpus size: " << corpusf.size() << " sentences\t (" << vocabf.size() << " word types)\n"; - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - assert(corpusf.size() == corpuse.size()); - - const int kLHS = -TD::Convert("X"); - Model1 m1(conf["model1"].as()); - Model1 invm1(conf["inverse_model1"].as()); - - PhraseJointBase lp0(m1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - PhraseJointBase_BiDir alp0(m1, invm1, conf["model1_interpolation_weight"].as(), vocabe.size(), vocabf.size()); - MonotonicParallelSegementationModel m(alp0); - TRule xx("[X] ||| ms. kimura ||| MS. KIMURA ||| X=0"); - cerr << xx << endl << lp0(xx) << " " << alp0(xx) << endl; - TRule xx12("[X] ||| . ||| PHARMACY . ||| X=0"); - TRule xx21("[X] ||| pharmacy . ||| . ||| X=0"); -// TRule xx22("[X] ||| . ||| . ||| X=0"); - TRule xx22("[X] ||| . ||| THE . ||| X=0"); - cerr << xx12 << "\t" << lp0(xx12) << " " << alp0(xx12) << endl; - cerr << xx21 << "\t" << lp0(xx21) << " " << alp0(xx21) << endl; - cerr << xx22 << "\t" << lp0(xx22) << " " << alp0(xx22) << endl; - - cerr << "Initializing reachability limits...\n"; - vector ps(corpusf.size()); - vector reaches; reaches.reserve(corpusf.size()); - for (int ci = 0; ci < corpusf.size(); ++ci) - reaches.push_back(Reachability(corpusf[ci].size(), - corpuse[ci].size(), - kMAX_SRC_PHRASE, - kMAX_TRG_PHRASE)); - cerr << "Sampling...\n"; - vector tmp_p(10000); // work space - SampleSet pfss; - SystematicResampleFilter filter(&rng); - // MultinomialResampleFilter filter(&rng); - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpusf.size(); ++ci) { - vector& src = corpusf[ci]; - vector& trg = corpuse[ci]; - m.DecrementRulesAndStops(ps[ci].rules); - const prob_t q_stop = m.StopProbability(); - const prob_t q_cont = m.ContinueProbability(); - cerr << "P(stop)=" << q_stop << "\tP(continue)=" < lps(particles); - - bool all_complete = false; - while(!all_complete) { - SampleSet ss; - - // all particles have now been extended a bit, we will reweight them now - if (lps[0].trg_cov > 0) - filter(&lps); - - // loop over all particles and extend them - bool done_nothing = true; - for (int pi = 0; pi < particles; ++pi) { - Particle& p = lps[pi]; - int tic = 0; - while(p.trg_cov < trg.size() && tic < rejuv_freq) { - ++tic; - done_nothing = false; - ss.clear(); - TRule x; x.lhs_ = kLHS; - prob_t z; - - for (int trg_len = 1; trg_len <= kMAX_TRG_PHRASE; ++trg_len) { - x.e_.push_back(trg[trg_len - 1 + p.trg_cov]); - for (int src_len = 1; src_len <= kMAX_SRC_PHRASE; ++src_len) { - if (!r.edges[p.src_cov][p.trg_cov][src_len][trg_len]) continue; - - int i = p.src_cov; - assert(ss.size() < tmp_p.size()); // if fails increase tmp_p size - Particle& np = tmp_p[ss.size()]; - np = p; - x.f_.clear(); - for (int j = 0; j < src_len; ++j) - x.f_.push_back(src[i + j]); - np.src_cov += x.f_.size(); - np.trg_cov += x.e_.size(); - const bool stop_now = (np.src_cov == src_len && np.trg_cov == trg_len); - prob_t rp = m.RuleProbability(x) * (stop_now ? q_stop : q_cont); - np.gamma_last = rp; - const prob_t u = pow(np.gamma_last * pow(be(np.src_cov, np.trg_cov), 1.2), 0.1); - //cerr << "**rule=" << x << endl; - //cerr << " u=" << log(u) << " rule=" << rp << endl; - ss.add(u); - np.rules.push_back(TRulePtr(new TRule(x))); - z += u; - } - } - //cerr << "number of edges to consider: " << ss.size() << endl; - const int sampled = rng.SelectSample(ss); - prob_t q_n = ss[sampled] / z; - p = tmp_p[sampled]; - //m.IncrementRule(*p.rules.back()); - p.weight *= p.gamma_last / q_n; - //cerr << "[w=" << log(p.weight) << "]\tsampled rule: " << p.rules.back()->AsString() << endl; - //cerr << p << endl; - } - } // loop over particles (pi = 0 .. particles) - if (done_nothing) all_complete = true; - prob_t wv = prob_t::Zero(); - for (int pp = 0; pp < lps.size(); ++pp) - wv += lps[pp].weight; - for (int pp = 0; pp < lps.size(); ++pp) - lps[pp].weight /= wv; - } - pfss.clear(); - for (int i = 0; i < lps.size(); ++i) - pfss.add(lps[i].weight); - const int sampled = rng.SelectSample(pfss); - ps[ci] = lps[sampled]; - m.IncrementRulesAndStops(lps[sampled].rules); - for (int i = 0; i < lps[sampled].rules.size(); ++i) { cerr << "S:\t" << lps[sampled].rules[i]->AsString() << "\n"; } - cerr << "tmp-LLH: " << log(m.Likelihood()) << endl; - } - cerr << "LLH: " << log(m.Likelihood()) << endl; - } - return 0; -} - diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h deleted file mode 100644 index 76204a0e..00000000 --- a/gi/pf/poisson_uniform_word_model.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _POISSON_UNIFORM_WORD_MODEL_H_ -#define _POISSON_UNIFORM_WORD_MODEL_H_ - -#include -#include -#include "prob.h" -#include "m.h" - -// len ~ Poisson(lambda) -// for (1..len) -// e_i ~ Uniform({Vocabulary}) -struct PoissonUniformWordModel { - explicit PoissonUniformWordModel(const unsigned vocab_size, - const unsigned alphabet_size, - const double mean_len = 5) : - lh(prob_t::One()), - v0(-std::log(vocab_size)), - u0(-std::log(alphabet_size)), - mean_length(mean_len) {} - - void ResampleHyperparameters(MT19937*) {} - - inline prob_t operator()(const std::vector& s) const { - prob_t p; - p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0); - //p.logeq(v0); - return p; - } - - inline void Increment(const std::vector& w, MT19937*) { - lh *= (*this)(w); - } - - inline void Decrement(const std::vector& w, MT19937 *) { - lh /= (*this)(w); - } - - inline prob_t Likelihood() const { return lh; } - - void Summary() const {} - - private: - - prob_t lh; // keeps track of the draws from the base distribution - const double v0; // uniform log prob of generating a word - const double u0; // uniform log prob of generating a letter - const double mean_length; // mean length of a word in the base distribution -}; - -#endif diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc deleted file mode 100644 index 605d8206..00000000 --- a/gi/pf/pyp_lm.cc +++ /dev/null @@ -1,273 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#include "gamma_poisson.h" -#include "corpus_tools.h" -#include "m.h" -#include "tdict.h" -#include "sampler.h" -#include "ccrp.h" -#include "tied_resampler.h" - -// A not very memory-efficient implementation of an N-gram LM based on PYPs -// as described in Y.-W. Teh. (2006) A Hierarchical Bayesian Language Model -// based on Pitman-Yor Processes. In Proc. ACL. - -// I use templates to handle the recursive formalation of the prior, so -// the order of the model has to be specified here, at compile time: -#define kORDER 3 - -using namespace std; -using namespace tr1; -namespace po = boost::program_options; - -boost::shared_ptr prng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("samples,n",po::value()->default_value(300),"Number of samples") - ("train,i",po::value(),"Training data file") - ("test,T",po::value(),"Test data file") - ("discount_prior_a,a",po::value()->default_value(1.0), "discount ~ Beta(a,b): a=this") - ("discount_prior_b,b",po::value()->default_value(1.0), "discount ~ Beta(a,b): b=this") - ("strength_prior_s,s",po::value()->default_value(1.0), "strength ~ Gamma(s,r): s=this") - ("strength_prior_r,r",po::value()->default_value(1.0), "strength ~ Gamma(s,r): r=this") - ("random_seed,S",po::value(), "Random seed"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || (conf->count("train") == 0)) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -// uniform distribution over a fixed vocabulary -struct UniformVocabulary { - UniformVocabulary(unsigned vs, double, double, double, double) : p0(1.0 / vs), draws() {} - void increment(WordID, const vector&, MT19937*) { ++draws; } - void decrement(WordID, const vector&, MT19937*) { --draws; assert(draws >= 0); } - double prob(WordID, const vector&) const { return p0; } - void resample_hyperparameters(MT19937*) {} - double log_likelihood() const { return draws * log(p0); } - const double p0; - int draws; -}; - -// Lord Rothschild. 1986. THE DISTRIBUTION OF ENGLISH DICTIONARY WORD LENGTHS. -// Journal of Statistical Planning and Inference 14 (1986) 311-322 -struct PoissonLengthUniformCharWordModel { - explicit PoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : plen(5,5), uc(-log(95)), llh() {} - void increment(WordID w, const vector& v, MT19937*) { - llh += log(prob(w, v)); // this isn't quite right - plen.increment(TD::Convert(w).size() - 1); - } - void decrement(WordID w, const vector& v, MT19937*) { - plen.decrement(TD::Convert(w).size() - 1); - llh -= log(prob(w, v)); // this isn't quite right - } - double prob(WordID w, const vector&) const { - const unsigned len = TD::Convert(w).size(); - return plen.prob(len - 1) * exp(uc * len); - } - double log_likelihood() const { return llh; } - void resample_hyperparameters(MT19937*) {} - GammaPoisson plen; - const double uc; - double llh; -}; - -struct PYPAdaptedPoissonLengthUniformCharWordModel { - explicit PYPAdaptedPoissonLengthUniformCharWordModel(unsigned vocab_size, double, double, double, double) : - base(vocab_size,1,1,1,1), - crp(1,1,1,1) {} - void increment(WordID w, const vector& v, MT19937* rng) { - double p0 = base.prob(w, v); - if (crp.increment(w, p0, rng)) - base.increment(w, v, rng); - } - void decrement(WordID w, const vector& v, MT19937* rng) { - if (crp.decrement(w, rng)) - base.decrement(w, v, rng); - } - double prob(WordID w, const vector& v) const { - double p0 = base.prob(w, v); - return crp.prob(w, p0); - } - double log_likelihood() const { return crp.log_crp_prob() + base.log_likelihood(); } - void resample_hyperparameters(MT19937* rng) { crp.resample_hyperparameters(rng); } - PoissonLengthUniformCharWordModel base; - CCRP crp; -}; - -template struct PYPLM; - -#if 1 -template<> struct PYPLM<0> : public UniformVocabulary { - PYPLM(unsigned vs, double a, double b, double c, double d) : - UniformVocabulary(vs, a, b, c, d) {} -}; -#else -#if 0 -template<> struct PYPLM<0> : public PoissonLengthUniformCharWordModel { - PYPLM(unsigned vs, double a, double b, double c, double d) : - PoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#else -template<> struct PYPLM<0> : public PYPAdaptedPoissonLengthUniformCharWordModel { - PYPLM(unsigned vs, double a, double b, double c, double d) : - PYPAdaptedPoissonLengthUniformCharWordModel(vs, a, b, c, d) {} -}; -#endif -#endif - -// represents an N-gram LM -template struct PYPLM { - PYPLM(unsigned vs, double da, double db, double ss, double sr) : - backoff(vs, da, db, ss, sr), - tr(da, db, ss, sr, 0.8, 1.0), - lookup(N-1) {} - void increment(WordID w, const vector& context, MT19937* rng) { - const double bo = backoff.prob(w, context); - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); - if (it == p.end()) { - it = p.insert(make_pair(lookup, CCRP(0.5,1))).first; - tr.Add(&it->second); // add to resampler - } - if (it->second.increment(w, bo, rng)) - backoff.increment(w, context, rng); - } - void decrement(WordID w, const vector& context, MT19937* rng) { - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::iterator it = p.find(lookup); - assert(it != p.end()); - if (it->second.decrement(w, rng)) - backoff.decrement(w, context, rng); - } - double prob(WordID w, const vector& context) const { - const double bo = backoff.prob(w, context); - for (unsigned i = 0; i < N-1; ++i) - lookup[i] = context[context.size() - 1 - i]; - typename unordered_map, CCRP, boost::hash > >::const_iterator it = p.find(lookup); - if (it == p.end()) return bo; - return it->second.prob(w, bo); - } - - double log_likelihood() const { - double llh = backoff.log_likelihood(); - typename unordered_map, CCRP, boost::hash > >::const_iterator it; - for (it = p.begin(); it != p.end(); ++it) - llh += it->second.log_crp_prob(); - llh += tr.LogLikelihood(); - return llh; - } - - void resample_hyperparameters(MT19937* rng) { - tr.ResampleHyperparameters(rng); - backoff.resample_hyperparameters(rng); - } - - PYPLM backoff; - TiedResampler > tr; - double discount_a, discount_b, strength_s, strength_r; - double d, strength; - mutable vector lookup; // thread-local - unordered_map, CCRP, boost::hash > > p; -}; - -int main(int argc, char** argv) { - po::variables_map conf; - - InitCommandLine(argc, argv, &conf); - const unsigned samples = conf["samples"].as(); - if (conf.count("random_seed")) - prng.reset(new MT19937(conf["random_seed"].as())); - else - prng.reset(new MT19937); - MT19937& rng = *prng; - vector > corpuse; - set vocabe; - const WordID kEOS = TD::Convert(""); - cerr << "Reading corpus...\n"; - CorpusTools::ReadFromFile(conf["train"].as(), &corpuse, &vocabe); - cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n"; - vector > test; - if (conf.count("test")) - CorpusTools::ReadFromFile(conf["test"].as(), &test); - else - test = corpuse; - PYPLM lm(vocabe.size(), - conf["discount_prior_a"].as(), - conf["discount_prior_b"].as(), - conf["strength_prior_s"].as(), - conf["strength_prior_r"].as()); - vector ctx(kORDER - 1, TD::Convert("")); - for (int SS=0; SS < samples; ++SS) { - for (int ci = 0; ci < corpuse.size(); ++ci) { - ctx.resize(kORDER - 1); - const vector& s = corpuse[ci]; - for (int i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - if (SS > 0) lm.decrement(w, ctx, &rng); - lm.increment(w, ctx, &rng); - ctx.push_back(w); - } - } - if (SS % 10 == 9) { - cerr << " [LLH=" << lm.log_likelihood() << "]" << endl; - if (SS % 30 == 29) lm.resample_hyperparameters(&rng); - } else { cerr << '.' << flush; } - } - double llh = 0; - unsigned cnt = 0; - unsigned oovs = 0; - for (int ci = 0; ci < test.size(); ++ci) { - ctx.resize(kORDER - 1); - const vector& s = test[ci]; - for (int i = 0; i <= s.size(); ++i) { - WordID w = (i < s.size() ? s[i] : kEOS); - double lp = log(lm.prob(w, ctx)) / log(2); - if (i < s.size() && vocabe.count(w) == 0) { - cerr << "**OOV "; - ++oovs; - lp = 0; - } - cerr << "p(" << TD::Convert(w) << " |"; - for (int j = ctx.size() + 1 - kORDER; j < ctx.size(); ++j) - cerr << ' ' << TD::Convert(ctx[j]); - cerr << ") = " << lp << endl; - ctx.push_back(w); - llh -= lp; - cnt++; - } - } - cerr << " Log_10 prob: " << (-llh * log(2) / log(10)) << endl; - cerr << " Count: " << cnt << endl; - cerr << " OOVs: " << oovs << endl; - cerr << "Cross-entropy: " << (llh / cnt) << endl; - cerr << " Perplexity: " << pow(2, llh / cnt) << endl; - return 0; -} - - diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc deleted file mode 100644 index 37b9a604..00000000 --- a/gi/pf/pyp_tm.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include "pyp_tm.h" - -#include -#include -#include - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { - FreqBinner(const std::string& fname) { fd_.Load(fname); } - unsigned NumberOfBins() const { return fd_.Max() + 1; } - unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } - FreqDict fd_; -}; - -template -struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : - base(*b), - binner(bnr), - btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - - void Summary() const { - cerr << "Number of conditioning contexts: " << r.size() << endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; - for (CCRP >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - cerr << " " << i2->second << '\t' << TD::GetString(i2->first) << endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - btr.ResampleHyperparameters(rng); - } - - prob_t Prob(const WordID src, const vector& trglets) const { - RuleModelHash::const_iterator it = r.find(src); - if (it == r.end()) { - return base(trglets); - } else { - return it->second.prob(trglets, base(trglets)); - } - } - - void Increment(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - if (it == r.end()) { - it = r.insert(make_pair(src, CCRP >(0.5,1.0))).first; - static const WordID kNULL = TD::Convert("NULL"); - unsigned bin = (src == kNULL ? 0 : 1); - if (binner && bin) { bin = binner->Bin(src) + 1; } - btr.Add(bin, &it->second); - } - if (it->second.increment(trglets, base(trglets), rng)) - base.Increment(trglets, rng); - } - - void Decrement(const WordID src, const vector& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - assert(it != r.end()); - if (it->second.decrement(trglets, rng)) { - base.Decrement(trglets, rng); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - return p; - } - - unsigned UniqueConditioningContexts() const { - return r.size(); - } - - // TODO tie PYP hyperparameters based on source word frequency bins - Base& base; - const Binner* binner; - BinTiedResampler > > btr; - typedef unordered_map > > RuleModelHash; - RuleModelHash r; -}; - -PYPLexicalTranslation::PYPLexicalTranslation(const vector >& lets, - const unsigned vocab_size, - const unsigned num_letters) : - letters(lets), - base(vocab_size, num_letters, 5), - tmodel(new ConditionalPYPWordModel(&base, new FreqBinner("10k.freq"))), - kX(-TD::Convert("X")) {} - -void PYPLexicalTranslation::Summary() const { - tmodel->Summary(); -} - -prob_t PYPLexicalTranslation::Likelihood() const { - return tmodel->Likelihood() * base.Likelihood(); -} - -void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { - tmodel->ResampleHyperparameters(rng); -} - -unsigned PYPLexicalTranslation::UniqueConditioningContexts() const { - return tmodel->UniqueConditioningContexts(); -} - -prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const { - return tmodel->Prob(src, letters[trg]); -} - -void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { - tmodel->Increment(src, letters[trg], rng); -} - -void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { - tmodel->Decrement(src, letters[trg], rng); -} - diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h deleted file mode 100644 index 2b076a25..00000000 --- a/gi/pf/pyp_tm.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef PYP_LEX_TRANS -#define PYP_LEX_TRANS - -#include -#include "wordid.h" -#include "prob.h" -#include "sampler.h" -#include "freqdict.h" -#include "poisson_uniform_word_model.h" - -struct FreqBinner; -template struct ConditionalPYPWordModel; - -struct PYPLexicalTranslation { - explicit PYPLexicalTranslation(const std::vector >& lets, - const unsigned vocab_size, - const unsigned num_letters); - - prob_t Likelihood() const; - - void ResampleHyperparameters(MT19937* rng); - prob_t Prob(WordID src, WordID trg) const; // return p(trg | src) - void Summary() const; - void Increment(WordID src, WordID trg, MT19937* rng); - void Decrement(WordID src, WordID trg, MT19937* rng); - unsigned UniqueConditioningContexts() const; - - private: - const std::vector >& letters; // spelling dictionary - PoissonUniformWordModel base; // "generator" of English types - ConditionalPYPWordModel* tmodel; // translation distributions - // (model English word | French word) - const WordID kX; -}; - -#endif diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h deleted file mode 100644 index 0bebb751..00000000 --- a/gi/pf/pyp_word_model.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _PYP_WORD_MODEL_H_ -#define _PYP_WORD_MODEL_H_ - -#include -#include -#include -#include "prob.h" -#include "ccrp.h" -#include "m.h" -#include "tdict.h" -#include "os_phrase.h" - -// PYP(d,s,poisson-uniform) represented as a CRP -template -struct PYPWordModel { - explicit PYPWordModel(Base* b) : - base(*b), - r(1,1,1,1,0.66,50.0) - {} - - void ResampleHyperparameters(MT19937* rng) { - r.resample_hyperparameters(rng); - std::cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n"; - } - - inline prob_t operator()(const std::vector& s) const { - return r.prob(s, base(s)); - } - - inline void Increment(const std::vector& s, MT19937* rng) { - if (r.increment(s, base(s), rng)) - base.Increment(s, rng); - } - - inline void Decrement(const std::vector& s, MT19937 *rng) { - if (r.decrement(s, rng)) - base.Decrement(s, rng); - } - - inline prob_t Likelihood() const { - prob_t p; p.logeq(r.log_crp_prob()); - p *= base.Likelihood(); - return p; - } - - void Summary() const { - std::cerr << "PYPWordModel: generations=" << r.num_customers() - << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << std::endl; - for (typename CCRP >::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << " " << it->second - << TD::GetString(it->first) << std::endl; - } - } - - private: - - Base& base; // keeps track of the draws from the base distribution - CCRP > r; -}; - -#endif diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h deleted file mode 100644 index 4075affe..00000000 --- a/gi/pf/quasi_model2.h +++ /dev/null @@ -1,177 +0,0 @@ -#ifndef _QUASI_MODEL2_H_ -#define _QUASI_MODEL2_H_ - -#include -#include -#include -#include "boost/functional.hpp" -#include "prob.h" -#include "array2d.h" -#include "slice_sampler.h" -#include "m.h" -#include "have_64_bits.h" - -struct AlignmentObservation { - AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} - AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) : - src_len(sl), trg_len(tl), j(tw), a_j(sw) {} - unsigned short src_len; - unsigned short trg_len; - unsigned short j; - unsigned short a_j; -}; - -#ifdef HAVE_64_BITS -inline size_t hash_value(const AlignmentObservation& o) { - return reinterpret_cast(o); -} -inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) { - return hash_value(a) == hash_value(b); -} -#else -inline size_t hash_value(const AlignmentObservation& o) { - size_t h = 1; - boost::hash_combine(h, o.src_len); - boost::hash_combine(h, o.trg_len); - boost::hash_combine(h, o.j); - boost::hash_combine(h, o.a_j); - return h; -} -#endif - -struct QuasiModel2 { - explicit QuasiModel2(double alpha, double pnull = 0.1) : - alpha_(alpha), - pnull_(pnull), - pnotnull_(1 - pnull) {} - - // a_j = 0 => NULL; src_len does *not* include null - prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const { - if (!a_j) return pnull_; - return pnotnull_ * - prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len)); - } - - void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { - assert(a_j <= src_len); - assert(j < trg_len); - ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)]; - } - - void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { - const AlignmentObservation ao(src_len, trg_len, j, a_j); - int &cc = obs_[ao]; - assert(cc > 0); - --cc; - if (!cc) obs_.erase(ao); - } - - struct PNullResampler { - PNullResampler(const QuasiModel2& m) : m_(m) {} - const QuasiModel2& m_; - double operator()(const double& proposed_pnull) const { - return log(m_.Likelihood(m_.alpha_, proposed_pnull)); - } - }; - - struct AlphaResampler { - AlphaResampler(const QuasiModel2& m) : m_(m) {} - const QuasiModel2& m_; - double operator()(const double& proposed_alpha) const { - return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); - } - }; - - void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - const PNullResampler dr(*this); - const AlphaResampler ar(*this); - for (unsigned i = 0; i < nloop; ++i) { - double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, - 1.0, 0.0, niterations, 100*niterations); - pnull_ = prob_t(pnull); - alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - } - std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" - << pnull_.as_float() << ") = " << Likelihood() << std::endl; - zcache_.clear(); - } - - prob_t Likelihood() const { - return Likelihood(alpha_, pnull_.as_float()); - } - - prob_t Likelihood(double alpha, double ppnull) const { - const prob_t pnull(ppnull); - const prob_t pnotnull(1 - ppnull); - - prob_t p; - p.logeq(Md::log_gamma_density(alpha, 0.1, 25)); // TODO configure - assert(!p.is_0()); - prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); - assert(!prob_of_ppnull.is_0()); - p *= prob_of_ppnull; - for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { - const AlignmentObservation& ao = it->first; - if (ao.a_j) { - prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); - prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); - prob_t pa(u / z); - pa *= pnotnull; - pa.poweq(it->second); - p *= pa; - } else { - p *= pnull.pow(it->second); - } - } - return p; - } - - private: - static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - prob_t p; - p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); - return p; - } - - static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - prob_t z = prob_t::Zero(); - for (int a_j = 1; a_j <= src_len; ++a_j) - z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); - return z; - } - - static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); - } - - static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { - double z = 0; - for (int a_j = 1; a_j <= src_len; ++a_j) - z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha); - return z; - } - - const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const { - if (src_len >= zcache_.size()) - zcache_.resize(src_len + 1); - if (trg_len >= zcache_[src_len].size()) - zcache_[src_len].resize(trg_len + 1); - std::vector& zv = zcache_[src_len][trg_len]; - if (zv.size() == 0) - zv.resize(trg_len); - double& z = zv[j]; - if (!z) - z = ComputeZ(j, src_len, trg_len, alpha_); - return z; - } - - double alpha_; - prob_t pnull_; - prob_t pnotnull_; - mutable std::vector > > zcache_; - typedef std::tr1::unordered_map > ObsCount; - ObsCount obs_; -}; - -#endif diff --git a/gi/pf/reachability.cc b/gi/pf/reachability.cc deleted file mode 100644 index 7d0d04ac..00000000 --- a/gi/pf/reachability.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "reachability.h" - -#include -#include - -using namespace std; - -struct SState { - SState() : prev_src_covered(), prev_trg_covered() {} - SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {} - int prev_src_covered; - int prev_trg_covered; -}; - -void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) { - typedef boost::multi_array, 2> array_type; - array_type a(boost::extents[srclen + 1][trglen + 1]); - a[0][0].push_back(SState()); - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (a[i][j].size() == 0) continue; - const SState prev(i,j); - for (int k = 1; k <= src_max_phrase_len; ++k) { - if ((i + k) > srclen) continue; - for (int l = 1; l <= trg_max_phrase_len; ++l) { - if ((j + l) > trglen) continue; - a[i + k][j + l].push_back(prev); - } - } - } - } - a[0][0].clear(); - //cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n"; - if (a[srclen][trglen].empty()) { - cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n"; - nodes = 0; - return; - } - - typedef boost::multi_array rarray_type; - rarray_type r(boost::extents[srclen + 1][trglen + 1]); - r[srclen][trglen] = true; - nodes = 0; - for (int i = srclen; i >= 0; --i) { - for (int j = trglen; j >= 0; --j) { - vector& prevs = a[i][j]; - if (!r[i][j]) { prevs.clear(); } - for (int k = 0; k < prevs.size(); ++k) { - r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true; - int src_delta = i - prevs[k].prev_src_covered; - edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true; - valid_deltas[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(make_pair(src_delta,j - prevs[k].prev_trg_covered)); - short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered]; - if (src_delta > msd) msd = src_delta; - } - } - } - assert(!edges[0][0][1][0]); - assert(!edges[0][0][0][1]); - assert(!edges[0][0][0][0]); - assert(max_src_delta[0][0] > 0); - nodes = 0; - for (int i = 0; i < srclen; ++i) { - for (int j = 0; j < trglen; ++j) { - if (valid_deltas[i][j].size() > 0) { - node_addresses[i][j] = nodes++; - } else { - node_addresses[i][j] = -1; - } - } - } - cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") has " << valid_deltas[0][0].size() << " out edges in its root node, " << nodes << " nodes in total, and outside estimate matrix will require " << sizeof(float)*nodes << " bytes\n"; - } - diff --git a/gi/pf/reachability.h b/gi/pf/reachability.h deleted file mode 100644 index 1e22c76a..00000000 --- a/gi/pf/reachability.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _REACHABILITY_H_ -#define _REACHABILITY_H_ - -#include "boost/multi_array.hpp" - -// determines minimum and maximum lengths of outgoing edges from all -// coverage positions such that the alignment path respects src and -// trg maximum phrase sizes -// -// runs in O(n^2 * src_max * trg_max) time but should be relatively fast -// -// currently forbids 0 -> n and n -> 0 alignments - -struct Reachability { - unsigned nodes; - boost::multi_array edges; // edges[src_covered][trg_covered][src_delta][trg_delta] is this edge worth exploring? - boost::multi_array max_src_delta; // msd[src_covered][trg_covered] -- the largest src delta that's valid - boost::multi_array node_addresses; // na[src_covered][trg_covered] -- the index of the node in a one-dimensional array (of size "nodes") - boost::multi_array >, 2> valid_deltas; // valid_deltas[src_covered][trg_covered] list of valid transitions leaving a particular node - - Reachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) : - nodes(), - edges(boost::extents[srclen][trglen][src_max_phrase_len+1][trg_max_phrase_len+1]), - max_src_delta(boost::extents[srclen][trglen]), - node_addresses(boost::extents[srclen][trglen]), - valid_deltas(boost::extents[srclen][trglen]) { - ComputeReachability(srclen, trglen, src_max_phrase_len, trg_max_phrase_len); - } - - private: - void ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len); -}; - -#endif diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h deleted file mode 100644 index a4f4af36..00000000 --- a/gi/pf/tied_resampler.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef _TIED_RESAMPLER_H_ -#define _TIED_RESAMPLER_H_ - -#include -#include -#include "sampler.h" -#include "slice_sampler.h" -#include "m.h" - -template -struct TiedResampler { - explicit TiedResampler(double da, double db, double ss, double sr, double d=0.5, double s=1.0) : - d_alpha(da), - d_beta(db), - s_shape(ss), - s_rate(sr), - discount(d), - strength(s) {} - - void Add(CRP* crp) { - crps.insert(crp); - crp->set_discount(discount); - crp->set_strength(strength); - assert(!crp->has_discount_prior()); - assert(!crp->has_strength_prior()); - } - - void Remove(CRP* crp) { - crps.erase(crp); - } - - size_t size() const { - return crps.size(); - } - - double LogLikelihood(double d, double s) const { - if (s <= -d) return -std::numeric_limits::infinity(); - double llh = Md::log_beta_density(d, d_alpha, d_beta) + - Md::log_gamma_density(d + s, s_shape, s_rate); - for (typename std::set::iterator it = crps.begin(); it != crps.end(); ++it) - llh += (*it)->log_crp_prob(d, s); - return llh; - } - - double LogLikelihood() const { - return LogLikelihood(discount, strength); - } - - struct DiscountResampler { - DiscountResampler(const TiedResampler& m) : m_(m) {} - const TiedResampler& m_; - double operator()(const double& proposed_discount) const { - return m_.LogLikelihood(proposed_discount, m_.strength); - } - }; - - struct AlphaResampler { - AlphaResampler(const TiedResampler& m) : m_(m) {} - const TiedResampler& m_; - double operator()(const double& proposed_strength) const { - return m_.LogLikelihood(m_.discount, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { - if (size() == 0) { std::cerr << "EMPTY - not resampling\n"; return; } - const DiscountResampler dr(*this); - const AlphaResampler ar(*this); - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - discount = slice_sampler1d(dr, discount, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } - strength = slice_sampler1d(ar, strength, *rng, -discount + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "TiedCRPs(d=" << discount << ",s=" - << strength << ") = " << LogLikelihood(discount, strength) << std::endl; - for (typename std::set::iterator it = crps.begin(); it != crps.end(); ++it) - (*it)->set_hyperparameters(discount, strength); - } - private: - std::set crps; - const double d_alpha, d_beta, s_shape, s_rate; - double discount, strength; -}; - -// split according to some criterion -template -struct BinTiedResampler { - explicit BinTiedResampler(unsigned nbins) : - resamplers(nbins, TiedResampler(1,1,1,1)) {} - - void Add(unsigned bin, CRP* crp) { - resamplers[bin].Add(crp); - } - - void Remove(unsigned bin, CRP* crp) { - resamplers[bin].Remove(crp); - } - - void ResampleHyperparameters(MT19937* rng) { - for (unsigned i = 0; i < resamplers.size(); ++i) { - std::cerr << "BIN " << i << " (" << resamplers[i].size() << " CRPs): " << std::flush; - resamplers[i].ResampleHyperparameters(rng); - } - } - - double LogLikelihood() const { - double llh = 0; - for (unsigned i = 0; i < resamplers.size(); ++i) - llh += resamplers[i].LogLikelihood(); - return llh; - } - - private: - std::vector > resamplers; -}; - -#endif diff --git a/gi/pf/tpf.cc b/gi/pf/tpf.cc deleted file mode 100644 index 7348d21c..00000000 --- a/gi/pf/tpf.cc +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include -#include - -#include "sampler.h" - -using namespace std; -using namespace tr1; - -shared_ptr prng; - -struct Particle { - Particle() : weight(prob_t::One()) {} - vector states; - prob_t weight; - prob_t gamma_last; -}; - -ostream& operator<<(ostream& os, const Particle& p) { - os << "["; - for (int i = 0; i < p.states.size(); ++i) os << p.states[i] << ' '; - os << "| w=" << log(p.weight) << ']'; - return os; -} - -void Rejuvenate(vector& pps) { - SampleSet ss; - vector nps(pps.size()); - for (int i = 0; i < pps.size(); ++i) { -// cerr << pps[i] << endl; - ss.add(pps[i].weight); - } -// cerr << "REJUVINATING...\n"; - for (int i = 0; i < pps.size(); ++i) { - nps[i] = pps[prng->SelectSample(ss)]; - nps[i].weight = prob_t(1.0 / pps.size()); -// cerr << nps[i] << endl; - } - nps.swap(pps); -// exit(1); -} - -int main(int argc, char** argv) { - const unsigned particles = 100; - prng.reset(new MT19937); - MT19937& rng = *prng; - - // q(a) = 0.8 - // q(b) = 0.8 - // q(c) = 0.4 - SampleSet ssq; - ssq.add(0.4); - ssq.add(0.6); - ssq.add(0); - double qz = 1; - - // p(a) = 0.2 - // p(b) = 0.8 - vector p(3); - p[0] = 0.2; - p[1] = 0.8; - p[2] = 0; - - vector counts(3); - int tot = 0; - - vector pps(particles); - SampleSet ppss; - int LEN = 12; - int PP = 1; - while (pps[0].states.size() < LEN) { - for (int pi = 0; pi < particles; ++pi) { - Particle& prt = pps[pi]; - - bool redo = true; - const Particle savedp = prt; - while (redo) { - redo = false; - for (int i = 0; i < PP; ++i) { - int s = rng.SelectSample(ssq); - double gamma_last = p[s]; - if (!gamma_last) { redo = true; break; } - double q = ssq[s] / qz; - prt.states.push_back(s); - prt.weight *= prob_t(gamma_last / q); - } - if (redo) { prt = savedp; continue; } - } - } - Rejuvenate(pps); - } - ppss.clear(); - for (int i = 0; i < particles; ++i) { ppss.add(pps[i].weight); } - int sp = rng.SelectSample(ppss); - cerr << pps[sp] << endl; - - return 0; -} - diff --git a/gi/pf/transliterations.cc b/gi/pf/transliterations.cc deleted file mode 100644 index b2996f65..00000000 --- a/gi/pf/transliterations.cc +++ /dev/null @@ -1,334 +0,0 @@ -#include "transliterations.h" - -#include -#include - -#include "boost/shared_ptr.hpp" - -#include "backward.h" -#include "filelib.h" -#include "tdict.h" -#include "trule.h" -#include "filelib.h" -#include "ccrp_nt.h" -#include "m.h" -#include "reachability.h" - -using namespace std; -using namespace std::tr1; - -struct TruncatedConditionalLengthModel { - TruncatedConditionalLengthModel(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : - plens(max_src_size+1, vector(max_trg_size+1, 0.0)) { - for (unsigned i = 1; i <= max_src_size; ++i) { - prob_t z = prob_t::Zero(); - for (unsigned j = 1; j <= max_trg_size; ++j) - z += (plens[i][j] = prob_t(0.01 + exp(Md::log_poisson(j, i * expected_src_to_trg_ratio)))); - for (unsigned j = 1; j <= max_trg_size; ++j) - plens[i][j] /= z; - //for (unsigned j = 1; j <= max_trg_size; ++j) - // cerr << "P(trg_len=" << j << " | src_len=" << i << ") = " << plens[i][j] << endl; - } - } - - // return p(tlen | slen) for *chunks* not full words - inline const prob_t& operator()(int slen, int tlen) const { - return plens[slen][tlen]; - } - - vector > plens; -}; - -struct CondBaseDist { - CondBaseDist(unsigned max_src_size, unsigned max_trg_size, double expected_src_to_trg_ratio) : - tclm(max_src_size, max_trg_size, expected_src_to_trg_ratio) {} - - prob_t operator()(const vector& src, unsigned sf, unsigned st, - const vector& trg, unsigned tf, unsigned tt) const { - prob_t p = tclm(st - sf, tt - tf); // target len | source length ~ TCLM(source len) - assert(!"not impl"); - return p; - } - inline prob_t operator()(const vector& src, const vector& trg) const { - return (*this)(src, 0, src.size(), trg, 0, trg.size()); - } - TruncatedConditionalLengthModel tclm; -}; - -// represents transliteration phrase probabilities, e.g. -// p( a l - | A l ) , p( o | A w ) , ... -struct TransliterationChunkConditionalModel { - explicit TransliterationChunkConditionalModel(const CondBaseDist& pp0) : - d(0.0), - strength(1.0), - rp0(pp0) { - } - - void Summary() const { - std::cerr << "Number of conditioning contexts: " << r.size() << std::endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl; - for (CCRP_NoTable::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - std::cerr << " " << i2->second << '\t' << i2->first << std::endl; - } - } - - int DecrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - assert(it != r.end()); - int count = it->second.decrement(rule); - if (count) { - if (it->second.num_customers() == 0) r.erase(it); - } - return count; - } - - int IncrementRule(const TRule& rule) { - RuleModelHash::iterator it = r.find(rule.f_); - if (it == r.end()) { - it = r.insert(make_pair(rule.f_, CCRP_NoTable(strength))).first; - } - int count = it->second.increment(rule); - return count; - } - - void IncrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - IncrementRule(*rules[i]); - } - - void DecrementRules(const std::vector& rules) { - for (int i = 0; i < rules.size(); ++i) - DecrementRule(*rules[i]); - } - - prob_t RuleProbability(const TRule& rule) const { - prob_t p; - RuleModelHash::const_iterator it = r.find(rule.f_); - if (it == r.end()) { - p = rp0(rule.f_, rule.e_); - } else { - p = it->second.prob(rule, rp0(rule.f_, rule.e_)); - } - return p; - } - - double LogLikelihood(const double& dd, const double& aa) const { - if (aa <= -dd) return -std::numeric_limits::infinity(); - //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); - double llh = //Md::log_beta_density(dd, 1, 1) + - Md::log_gamma_density(dd + aa, 1, 1); - std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::const_iterator it; - for (it = r.begin(); it != r.end(); ++it) - llh += it->second.log_crp_prob(aa); - return llh; - } - - struct AlphaResampler { - AlphaResampler(const TransliterationChunkConditionalModel& m) : m_(m) {} - const TransliterationChunkConditionalModel& m_; - double operator()(const double& proposed_strength) const { - return m_.LogLikelihood(m_.d, proposed_strength); - } - }; - - void ResampleHyperparameters(MT19937* rng) { - std::tr1::unordered_map, CCRP_NoTable, boost::hash > >::iterator it; - //const unsigned nloop = 5; - const unsigned niterations = 10; - //DiscountResampler dr(*this); - AlphaResampler ar(*this); -#if 0 - for (int iter = 0; iter < nloop; ++iter) { - strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits::min(), - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - double min_discount = std::numeric_limits::min(); - if (strength < 0.0) min_discount -= strength; - d = slice_sampler1d(dr, d, *rng, min_discount, - 1.0, 0.0, niterations, 100*niterations); - } -#endif - strength = slice_sampler1d(ar, strength, *rng, -d, - std::numeric_limits::infinity(), 0.0, niterations, 100*niterations); - std::cerr << "CTMModel(alpha=" << strength << ") = " << LogLikelihood(d, strength) << std::endl; - for (it = r.begin(); it != r.end(); ++it) { -#if 0 - it->second.set_discount(d); -#endif - it->second.set_alpha(strength); - } - } - - prob_t Likelihood() const { - prob_t p; p.logeq(LogLikelihood(d, strength)); - return p; - } - - const CondBaseDist& rp0; - typedef std::tr1::unordered_map, - CCRP_NoTable, - boost::hash > > RuleModelHash; - RuleModelHash r; - double d, strength; -}; - -struct GraphStructure { - GraphStructure() : r() {} - // leak memory - these are basically static - const Reachability* r; - bool IsReachable() const { return r->nodes > 0; } -}; - -struct ProbabilityEstimates { - ProbabilityEstimates() : gs(), backward() {} - explicit ProbabilityEstimates(const GraphStructure& g) : - gs(&g), backward() { - if (g.r->nodes > 0) - backward = new float[g.r->nodes]; - } - // leak memory, these are static - - // returns an estimate of the marginal probability - double MarginalEstimate() const { - if (!backward) return 0; - return backward[0]; - } - - // returns an backward estimate - double Backward(int src_covered, int trg_covered) const { - if (!backward) return 0; - int ind = gs->r->node_addresses[src_covered][trg_covered]; - if (ind < 0) return 0; - return backward[ind]; - } - - prob_t estp; - float* backward; - private: - const GraphStructure* gs; -}; - -struct TransliterationsImpl { - TransliterationsImpl(int max_src, int max_trg, double sr, const BackwardEstimator& b) : - cp0(max_src, max_trg, sr), - tccm(cp0), - be(b), - kMAX_SRC_CHUNK(max_src), - kMAX_TRG_CHUNK(max_trg), - kS2T_RATIO(sr), - tot_pairs(), tot_mem() { - } - const CondBaseDist cp0; - TransliterationChunkConditionalModel tccm; - const BackwardEstimator& be; - - void Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - const size_t src_len = src_lets.size(); - const size_t trg_len = trg_lets.size(); - - // init graph structure - if (src_len >= graphs.size()) graphs.resize(src_len + 1); - if (trg_len >= graphs[src_len].size()) graphs[src_len].resize(trg_len + 1); - GraphStructure& gs = graphs[src_len][trg_len]; - if (!gs.r) { - double rat = exp(fabs(log(trg_len / (src_len * kS2T_RATIO)))); - if (rat > 1.5 || (rat > 2.4 && src_len < 6)) { - cerr << " ** Forbidding transliterations of size " << src_len << "," << trg_len << ": " << rat << endl; - gs.r = new Reachability(src_len, trg_len, 0, 0); - } else { - gs.r = new Reachability(src_len, trg_len, kMAX_SRC_CHUNK, kMAX_TRG_CHUNK); - } - } - - const Reachability& r = *gs.r; - - // init backward estimates - if (src >= ests.size()) ests.resize(src + 1); - unordered_map::iterator it = ests[src].find(trg); - if (it != ests[src].end()) return; // already initialized - - it = ests[src].insert(make_pair(trg, ProbabilityEstimates(gs))).first; - ProbabilityEstimates& est = it->second; - if (!gs.r->nodes) return; // not derivable subject to length constraints - - be.InitializeGrid(src_lets, trg_lets, r, kS2T_RATIO, est.backward); - cerr << TD::GetString(src_lets) << " ||| " << TD::GetString(trg_lets) << " ||| " << (est.backward[0] / trg_lets.size()) << endl; - tot_pairs++; - tot_mem += sizeof(float) * gs.r->nodes; - } - - void Forbid(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - const size_t src_len = src_lets.size(); - const size_t trg_len = trg_lets.size(); - // TODO - } - - prob_t EstimateProbability(WordID s, const vector& src, WordID t, const vector& trg) const { - assert(src.size() < graphs.size()); - const vector& tv = graphs[src.size()]; - assert(trg.size() < tv.size()); - const GraphStructure& gs = tv[trg.size()]; - if (gs.r->nodes == 0) - return prob_t::Zero(); - const unordered_map::const_iterator it = ests[s].find(t); - assert(it != ests[s].end()); - return it->second.estp; - } - - void GraphSummary() const { - double to = 0; - double tn = 0; - double tt = 0; - for (int i = 0; i < graphs.size(); ++i) { - const vector& vt = graphs[i]; - for (int j = 0; j < vt.size(); ++j) { - const GraphStructure& gs = vt[j]; - if (!gs.r) continue; - tt++; - for (int k = 0; k < i; ++k) { - for (int l = 0; l < j; ++l) { - size_t c = gs.r->valid_deltas[k][l].size(); - if (c) { - tn += 1; - to += c; - } - } - } - } - } - cerr << " Average nodes = " << (tn / tt) << endl; - cerr << "Average out-degree = " << (to / tn) << endl; - cerr << " Unique structures = " << tt << endl; - cerr << " Unique pairs = " << tot_pairs << endl; - cerr << " BEs size = " << (tot_mem / (1024.0*1024.0)) << " MB" << endl; - } - - const int kMAX_SRC_CHUNK; - const int kMAX_TRG_CHUNK; - const double kS2T_RATIO; - unsigned tot_pairs; - size_t tot_mem; - vector > graphs; // graphs[src_len][trg_len] - vector > ests; // ests[src][trg] -}; - -Transliterations::Transliterations(int max_src, int max_trg, double sr, const BackwardEstimator& be) : - pimpl_(new TransliterationsImpl(max_src, max_trg, sr, be)) {} -Transliterations::~Transliterations() { delete pimpl_; } - -void Transliterations::Initialize(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - pimpl_->Initialize(src, src_lets, trg, trg_lets); -} - -prob_t Transliterations::EstimateProbability(WordID s, const vector& src, WordID t, const vector& trg) const { - return pimpl_->EstimateProbability(s, src,t, trg); -} - -void Transliterations::Forbid(WordID src, const vector& src_lets, WordID trg, const vector& trg_lets) { - pimpl_->Forbid(src, src_lets, trg, trg_lets); -} - -void Transliterations::GraphSummary() const { - pimpl_->GraphSummary(); -} - diff --git a/gi/pf/transliterations.h b/gi/pf/transliterations.h deleted file mode 100644 index 49d14684..00000000 --- a/gi/pf/transliterations.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _TRANSLITERATIONS_H_ -#define _TRANSLITERATIONS_H_ - -#include -#include "wordid.h" -#include "prob.h" - -struct BackwardEstimator; -struct TransliterationsImpl; -struct Transliterations { - // max_src and max_trg indicate how big the transliteration phrases can be - // see reachability.h for information about filter_ratio - explicit Transliterations(int max_src, int max_trg, double s2t_rat, const BackwardEstimator& be); - ~Transliterations(); - void Initialize(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); - void Forbid(WordID src, const std::vector& src_lets, WordID trg, const std::vector& trg_lets); - void GraphSummary() const; - prob_t EstimateProbability(WordID s, const std::vector& src, WordID t, const std::vector& trg) const; - private: - TransliterationsImpl* pimpl_; -}; - -#endif - diff --git a/gi/pf/unigrams.cc b/gi/pf/unigrams.cc deleted file mode 100644 index 40829775..00000000 --- a/gi/pf/unigrams.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "unigrams.h" - -#include -#include - -#include "stringlib.h" -#include "filelib.h" - -using namespace std; - -void UnigramModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - const WordID w = TD::Convert(line.substr(pos + 1)); - line[pos] = 0; - float p = atof(&line[0]); - if (w < probs_.size()) probs_[w].logeq(p * log(10)); else cerr << "WARNING: don't know about '" << TD::Convert(w) << "'\n"; - } -} - -void UnigramWordModel::LoadUnigrams(const string& fname) { - cerr << "Loading unigram probabilities from " << fname << " ..." << endl; - ReadFile rf(fname); - string line; - istream& in = *rf.stream(); - assert(in); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\data\\"); - getline(in, line); - size_t pos = line.find("ngram 1="); - assert(pos == 0); - assert(line.size() > 8); - const size_t num_unigrams = atoi(&line[8]); - getline(in, line); - assert(line.empty()); - getline(in, line); - assert(line == "\\1-grams:"); - for (size_t i = 0; i < num_unigrams; ++i) { - getline(in, line); - assert(line.size() > 0); - pos = line.find('\t'); - assert(pos > 0); - assert(pos + 1 < line.size()); - size_t cur = pos + 1; - vector w; - while (cur < line.size()) { - const size_t len = UTF8Len(line[cur]); - w.push_back(TD::Convert(line.substr(cur, len))); - cur += len; - } - line[pos] = 0; - float p = atof(&line[0]); - probs_[w].logeq(p * log(10.0)); - } -} - diff --git a/gi/pf/unigrams.h b/gi/pf/unigrams.h deleted file mode 100644 index 1660d1ed..00000000 --- a/gi/pf/unigrams.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _UNIGRAMS_H_ -#define _UNIGRAMS_H_ - -#include -#include -#include -#include - -#include "wordid.h" -#include "prob.h" -#include "tdict.h" - -struct UnigramModel { - explicit UnigramModel(const std::string& fname, unsigned vocab_size) : - use_uniform_(fname.size() == 0), - uniform_(1.0 / vocab_size), - probs_() { - if (fname.size() > 0) { - probs_.resize(TD::NumWords() + 1); - LoadUnigrams(fname); - } - } - - const prob_t& operator()(const WordID& w) const { - assert(w); - if (use_uniform_) return uniform_; - return probs_[w]; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - std::vector probs_; -}; - - -// reads an ARPA unigram file and converts words like 'cat' into a string 'c a t' -struct UnigramWordModel { - explicit UnigramWordModel(const std::string& fname) : - use_uniform_(false), - uniform_(1.0), - probs_() { - LoadUnigrams(fname); - } - - explicit UnigramWordModel(const unsigned vocab_size) : - use_uniform_(true), - uniform_(1.0 / vocab_size), - probs_() {} - - const prob_t& operator()(const std::vector& s) const { - if (use_uniform_) return uniform_; - const VectorProbHash::const_iterator it = probs_.find(s); - assert(it != probs_.end()); - return it->second; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - typedef std::tr1::unordered_map, prob_t, boost::hash > > VectorProbHash; - VectorProbHash probs_; -}; - -#endif diff --git a/gi/pipeline/OLD.clsp.config b/gi/pipeline/OLD.clsp.config deleted file mode 100644 index cd0f9d65..00000000 --- a/gi/pipeline/OLD.clsp.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM xfeats.grammar dev dev-refs test1 testt-eval.sh ... -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz xgrammar/grammar.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al -aren /export/ws10smt/data/arabic-english corpus.ar-en.al -uren /export/ws10smt/data/urdu-english corpus.ur-en.al -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/OLD.evaluation-pipeline.pl b/gi/pipeline/OLD.evaluation-pipeline.pl deleted file mode 100755 index 49c303eb..00000000 --- a/gi/pipeline/OLD.evaluation-pipeline.pl +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my @DEFAULT_FEATS = qw( - LogRuleCount SingletonRule LexE2F LexF2E WordPenalty - LogFCount LanguageModel Glue GlueTop PassThrough SingletonF -); - -my %init_weights = qw( - LogRuleCount 0.2 - LexE2F -0.3 - LexF2E -0.3 - LogFCount 0.1 - WordPenalty -1.5 - LanguageModel 1.2 - Glue -1.0 - GlueTop 0.00001 - PassThrough -10.0 - SingletonRule -0.1 - X_EGivenF -0.3 - X_FGivenE -0.3 - X_LogECount -1 - X_LogFCount -0.1 - X_LogRuleCount 0.3 - X_SingletonE -0.1 - X_SingletonF -0.1 - X_SingletonRule -0.5 -); - -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTSCORE = "$EXTOOLS/filter_score_grammar"; -my $ADDXFEATS = "$SCRIPT_DIR/scripts/xfeats.pl"; -assert_exec($CDEC, $PARALLELIZE, $FILTSCORE, $DISTVEST, $ADDXFEATS); - -my $config = "$SCRIPT_DIR/OLD.clsp.config"; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my %xgrammars; -print STDERR " LANGUAGE PAIRS:"; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - s/^\s+//; - s/\s+$//; - my ($name, $path, $corpus, $lm, $xgrammar, $dev, $devref, @xtests) = split /\s+/; - $paths{$name} = $path; - $corpora{$name} = $corpus; - $lms{$name} = $lm; - $xgrammars{$name} = $xgrammar; - $devs{$name} = $dev; - $devrefs{$name} = $devref; - $tests{$name} = $xtests[0]; - $testevals{$name} = $xtests[1]; - print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $XFEATS; -my $EXTRA_FILTER = ''; -my $dataDir = '/export/ws10smt/data'; -if (GetOptions( - "data=s" => \$dataDir, - "xfeats" => \$XFEATS, -) == 0 || @ARGV!=2 || $help) { - print_help(); - exit; -} -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR " CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR " GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR " LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -die "Can't find test: $test\n" unless -f $test; -assert_exec($teval); - -if ($XFEATS) { - my $xgram = mydircat($corpdir, $xgrammars{$lp}); - die "Can't find x-grammar: $xgram" unless -f $xgram; - $EXTRA_FILTER = "$ADDXFEATS $xgram |"; - print STDERR "ADDING X-FEATS FROM $xgram\n"; -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -`mkdir -p $outdir`; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { - print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { - my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini"; - print STDERR "MERT COMMAND: $cmd\n"; - `rm -rf $outdir/vest 2> /dev/null`; - chdir $outdir or die "Can't chdir to $outdir: $!"; - $weights = `$cmd`; - die "MERT reported non-zero exit code" unless $? == 0; - chomp $weights; - safesystem($tuned_weights, "cp $weights $tuned_weights"); - print STDERR "TUNED WEIGHTS: $tuned_weights\n"; - die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { - my ($file, @extras) = @_; - open F, ">$file" or die "Can't write $file: $!"; - my @feats = (@DEFAULT_FEATS, @extras); - if ($XFEATS) { - my @xfeats = qw( - X_LogRuleCount X_LogECount X_LogFCount X_EGivenF X_FGivenE X_SingletonRule X_SingletonE X_SingletonF - ); - @feats = (@feats, @xfeats); - } - for my $feat (@feats) { - my $r = rand(1.6); - my $w = $init_weights{$feat} * $r; - if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } - print F "$feat $w\n"; - } - close F; -} - -sub filter { - my ($grammar, $set, $name, $outdir) = @_; - my $outgrammar = mydircat($outdir, "$name.scfg.gz"); - if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { - my $cmd = "gunzip -c $grammar | $FILTSCORE -c $CORPUS -t $set | $EXTRA_FILTER gzip > $outgrammar"; - safesystem($outgrammar, $cmd) or die "Can't filter and score grammar!"; - } - return $outgrammar; -} - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { - my ($filename, $grammar_path) = (@_); - open CDECINI, ">$filename" or die "Can't write $filename: $!"; - print CDECINI <> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - diff --git a/gi/pipeline/backoff-pipe.pl b/gi/pipeline/backoff-pipe.pl deleted file mode 100644 index ac103c8b..00000000 --- a/gi/pipeline/backoff-pipe.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my @grammars; -my $OUTPUTPREFIX = './giwork/bo.hier.grammar'; -safemkdir($OUTPUTPREFIX); -my $backoff_levels = 1; -my $glue_levels = 1; - -usage() unless &GetOptions('grmr=s@' => \ @grammars, - 'outprefix=s' => \ $OUTPUTPREFIX, - 'bo-lvls=i' => \ $backoff_levels, - 'glue-lvls=i' => \ $glue_levels, -); - -my $OUTDIR = $OUTPUTPREFIX . '/hier'; -print STDERR "@grammars\n"; - - -my %grmr = (); -foreach my $grammar (@grammars) { - $grammar =~ m/\/[^\/]*\.t(\d+)\.[^\/]*/; - $grmr{$1} = $grammar; -} - -my @index = sort keys %grmr; -$OUTDIR = $OUTDIR . join('-',@index); -safemkdir($OUTDIR); -my $BACKOFF_GRMR = $OUTDIR . '/backoff.hier.gz'; -safesystem("echo \"\" | gzip > $BACKOFF_GRMR"); -my $GLUE_GRMR = $OUTDIR . '/glue.hier.gz'; -safesystem("echo \"\" | gzip > $GLUE_GRMR"); -my $joinedgrammars = $OUTDIR . '/grammar.hier.gz'; - -join_grammars(); - -for my $i (0..(scalar @index)-2) { - my $freqs = extract_freqs($index[$i], $index[$i+1]); - if ($i < $backoff_levels) { - create_backoff_rules($index[$i],$index[$i+1],$freqs); - } - if ($i < $glue_levels) { - add_glue_rules($index[$i]); - } -} - -output_grammar_info(); - - -sub usage { - print <> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - - -sub join_grammars { - print STDERR "\n!!! JOINING GRAMMARS\n"; - if(-e $joinedgrammars) { - print STDERR "$joinedgrammars exists, reusing...\n"; - return; - } - safesystem("echo \"\" | gzip > $joinedgrammars"); - foreach my $i (@index) { - my $g = $grmr{$i}; - safesystem("zcat $g | sed -r -e 's/X([0-9]+)/X$i\\1/g' - | gzip > $g.2.gz"); - safesystem("zcat $joinedgrammars $g.2.gz | gzip > $joinedgrammars.2.gz"); - safesystem("mv $joinedgrammars.2.gz $joinedgrammars"); - } -} - - -sub extract_freqs { - my($grmr1,$grmr2) = @_; - print STDERR "\n!!!EXTRACTING FREQUENCIES: $grmr1->$grmr2\n"; - my $IN_COARSE = substr($grmr{$grmr1},0,index($grmr{$grmr1},".grammar/")) . "/labeled_spans.txt"; - my $IN_FINE = substr($grmr{$grmr2},0,index($grmr{$grmr2},".grammar/")) . "/labeled_spans.txt"; - my $OUT_SPANS = "$OUTDIR/labeled_spans.hier$grmr1-$grmr2.txt"; - my $FREQS = "$OUTDIR/label_freq.hier$grmr1-$grmr2.txt"; - if(-e $OUT_SPANS && -e $FREQS) { - print STDERR "$OUT_SPANS exists, reusing...\n"; - print STDERR "$FREQS exists, reusing...\n"; - return $FREQS; - } - - safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); - - my %FREQ_HIER = (); - my %finehier = (); - - open SPANS, $OUT_SPANS or die $!; - while () { - my ($tmp, $coarse, $fine) = split /\|\|\|/; - my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; - my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; - - foreach my $i (0..(scalar @coarse_spans)-1) { - my $coarse_cat = $coarse_spans[$i]; - my $fine_cat = $fine_spans[$i]; - - $FREQ_HIER{$coarse_cat}{$fine_cat}++; - } - } - close SPANS; - foreach (values %FREQ_HIER) { - my $coarse_freq = $_; - my $total = 0; - $total+=$_ for (values %{ $coarse_freq }); - $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); - } - open FREQS, ">", $FREQS or die $!; - foreach my $coarse_cat (keys %FREQ_HIER) { - print FREQS "$coarse_cat |||"; - foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { - my $freq = $FREQ_HIER{$coarse_cat}{$fine_cat}; - print FREQS " $fine_cat:$freq"; - if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $freq) { - $finehier{$fine_cat} = $coarse_cat; - } - } - print FREQS "\n"; - } -# foreach my $fine_cat (keys %finehier) { -# print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -# } - close FREQS; - return $FREQS; -} - - -sub create_backoff_rules { - print STDERR "\n!!! CREATING BACKOFF RULES\n"; - my ($grmr1, $grmr2, $freq) = @_; - my $OUTFILE = "$OUTDIR/backoff.hier$grmr1-$grmr2.txt"; - if(-e $OUTFILE) { - print STDERR "$OUTFILE exists, reusing...\n"; - return; - } - open FREQS, $freq or die $!; - open TMP, ">", $OUTFILE or die $!; - while () { - my $line = $_; - $line = m/^(\d+) \|\|\| (.+)$/; - my $coarse = $1; - $line = $2; - my @finefreq = $line =~ m/(\d+):(\S+)/g; - for(my $i = 0; $i < scalar @finefreq; $i+=2) { - my $finecat = $finefreq[$i]; - my $finefreq = $finefreq[$i+1]; - print TMP "[X$grmr1$coarse] ||| [X$grmr2$finecat,1]\t[1] ||| BackoffRule=$finefreq A=0-0\n"; - } - } - close TMP; - close FREQS; - safesystem("zcat $BACKOFF_GRMR | cat - $OUTFILE | gzip > $BACKOFF_GRMR.2.gz"); - safesystem("mv $BACKOFF_GRMR.2.gz $BACKOFF_GRMR"); -} - -sub add_glue_rules { - print STDERR "\n!!! CREATING GLUE RULES\n"; - my ($grmr) = @_; - my $OUTFILE = "$OUTDIR/glue.$grmr.gz"; - if (-e $OUTFILE) { - print STDERR "$OUTFILE exists, reusing...\n"; - return; - } - open TMP, ">", $OUTFILE or die $!; - for my $i (0..($grmr-1)) { - print TMP "[S] ||| [S,1] [X$grmr$i,2] ||| [1] [2] ||| Glue=1\n"; - print TMP "[S] ||| [X$grmr$i,1] ||| [1] ||| GlueTop=1\n"; - } - close TMP; - safesystem("zcat $GLUE_GRMR | cat - $OUTFILE | gzip > $GLUE_GRMR.2.gz"); - safesystem("mv $GLUE_GRMR.2.gz $GLUE_GRMR"); -} - -sub output_grammar_info { - print STDERR "\n!!! GRAMMAR INFORMATION\n"; - print STDOUT "GRAMMAR: \t$joinedgrammars\n"; - print STDOUT "GLUE: \t$GLUE_GRMR\n"; - print STDOUT "BACKOFF: \t$BACKOFF_GRMR\n"; -} diff --git a/gi/pipeline/blacklight.config b/gi/pipeline/blacklight.config deleted file mode 100644 index fc59a604..00000000 --- a/gi/pipeline/blacklight.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/usr/users/0/cdyer/ws10smt/data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /usr/users/0/cdyer/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config deleted file mode 100644 index c23d409f..00000000 --- a/gi/pipeline/clsp.config +++ /dev/null @@ -1,10 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/export/ws10smt/data -btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /export/ws10smt/data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /export/ws10smt/data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl deleted file mode 100755 index 4b4529d9..00000000 --- a/gi/pipeline/evaluation-pipeline.pl +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use Getopt::Long; -use Cwd; -my $CWD = getcwd; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } -use LocalConfig; - -my $JOBS = 15; -my $PMEM = "9G"; -my $NUM_TRANSLATIONS = 50; -my $GOAL = "S"; - -# featurize_grammar may add multiple features from a single feature extractor -# the key in this map is the extractor name, the value is a list of the extracted features -my $feat_map = { - "LogRuleCount" => [ "LogRuleCount", "SingletonRule" ] , -# "XFeatures" => [ "XFE","XEF" ] , - "XFeatures" => [ "XFE","XEF","LabelledEF","LabelledFE"], # ,"XE_Singleton","XF_Singleton"] , - "LabelledRuleConditionals" => [ "LabelledFE","LabelledEF" ] , - "LexProb" => [ "LexE2F", "LexF2E" ] , - "BackoffRule" => [ "BackoffRule" ] , - "RulePenalty" => [ "RulePenalty" ] , - "LHSProb" => [ "LHSProb" ] , - "LabellingShape" => [ "LabellingShape" ] , - "GenerativeProb" => [ "GenerativeProb" ] , -}; - -my %init_weights = qw( - EGivenF -0.735245 - FGivenE -0.219391 - Glue -0.306709 - GlueTop 0.0473331 - LanguageModel 2.40403 - LexE2F -0.266989 - LexF2E -0.550373 - LogECount -0.129853 - LogFCount -0.194037 - LogRuleCount 0.256706 - BackoffRule 0.5 - XFE -0.256706 - XEF -0.256706 - XF_Singleton -0.05 - XE_Singleton -0.8 - LabelledFE -0.256706 - LabelledEF -0.256706 - PassThrough -0.9304905 - SingletonE -3.04161 - SingletonF 0.0714027 - SingletonRule -0.889377 - WordPenalty -1.99495 - RulePenalty -0.1 - LabellingShape -0.1 - LHSProb -0.1 - GenerativeProb -0.1 -); - - -# these features are included by default -my @DEFAULT_FEATS = qw( PassThrough Glue GlueTop LanguageModel WordPenalty ); - - - -my $FILTERBYF = "$SCRIPT_DIR/scripts/filter-by-f.pl"; -my $CDEC = "$SCRIPT_DIR/../../decoder/cdec"; -my $PARALLELIZE = "$SCRIPT_DIR/../../vest/parallelize.pl"; -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $VEST = "$SCRIPT_DIR/../../vest"; -die "Can't find vest: $VEST" unless -e $VEST && -d $VEST; -my $DISTVEST = "$VEST/dist-vest.pl"; -my $FILTER = "$EXTOOLS/filter_grammar"; -my $FEATURIZE = "$EXTOOLS/featurize_grammar"; -assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST, $FILTERBYF); - -my $numtopics = 25; - -my $config = "$SCRIPT_DIR/" . (lc environment_name()) . '.config'; -print STDERR "CORPORA CONFIGURATION: $config\n"; -open CONF, "<$config" or die "Can't read $config: $!"; -my %paths; -my %corpora; -my %lms; -my %devs; -my %devrefs; -my %tests; -my %testevals; -my $datadir; -print STDERR " LANGUAGE PAIRS:"; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - s/^\s+//; - s/\s+$//; - if (! defined $datadir) { $datadir = $_; next; } - my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/; - $paths{$name} = $path; - $corpora{$name} = $corpus; - $lms{$name} = $lm; - $devs{$name} = $dev; - $devrefs{$name} = $devref; - $tests{$name} = $xtests[0]; - $testevals{$name} = $xtests[1]; - print STDERR " $name"; -} -print STDERR "\n"; - -my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); - -my $outdir = "$CWD/exp"; -my $help; -my $FEATURIZER_OPTS = ''; -my $dataDir = '/export/ws10smt/data'; -my @features; -my $bkoffgram; -my $gluegram; -my $oovgram; -my $usefork; -my $lmorder = 3; -my $density; -if (GetOptions( - "backoff-grammar=s" => \$bkoffgram, - "density-prune=f" => \$density, - "glue-grammar=s" => \$gluegram, - "oov-grammar=s" => \$oovgram, - "data=s" => \$dataDir, - "pmem=s" => \$PMEM, - "n=i" => \$NUM_TRANSLATIONS, - "features=s@" => \@features, - "use-fork" => \$usefork, - "jobs=i" => \$JOBS, - "out-dir=s" => \$outdir, - "lmorder=i" => \$lmorder, - "goal=s" => \$GOAL, -) == 0 || @ARGV!=2 || $help) { - print_help(); - exit; -} -my $DENSITY_PRUNE = ''; -if ($density) { - $DENSITY_PRUNE = "--density-prune $density"; -} -if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; } -my @fkeys = keys %$feat_map; -die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0; -my @xfeats; -for my $feat (@features) { - my $rs = $feat_map->{$feat}; - if (!defined $rs) { die "DON'T KNOW ABOUT FEATURE $feat\n"; } - my @xfs = @$rs; - @xfeats = (@xfeats, @xfs); - $FEATURIZER_OPTS .= " -f $feat" unless $feat eq "BackoffRule"; -} -print STDERR "X-FEATS: @xfeats\n"; - -my $lp = $ARGV[0]; -my $grammar = $ARGV[1]; -print STDERR " CORPUS REPO: $dataDir\n"; -print STDERR " LANGUAGE PAIR: $lp\n"; -die "I don't know about that language pair\n" unless $paths{$lp}; -my $corpdir = "$dataDir"; -if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } -die "I can't find the corpora directory: $corpdir" unless -d $corpdir; -print STDERR " GRAMMAR: $grammar\n"; -my $LANG_MODEL = mydircat($corpdir, $lms{$lp}); -print STDERR " LM: $LANG_MODEL\n"; -my $CORPUS = mydircat($corpdir, $corpora{$lp}); -die "Can't find corpus: $CORPUS" unless -f $CORPUS; - -my $dev = mydircat($corpdir, $devs{$lp}); -my $drefs = $devrefs{$lp}; -die "Can't find dev: $dev\n" unless -f $dev; -die "Dev refs not set" unless $drefs; -$drefs = mydircat($corpdir, $drefs); - -my $test = mydircat($corpdir, $tests{$lp}); -my $teval = mydircat($corpdir, $testevals{$lp}); -#die "Can't find test: $test\n" unless -f $test; -#assert_exec($teval); - -`mkdir -p $outdir`; - -# CREATE INIT WEIGHTS -print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n"; -my $weights = mydircat($outdir, "weights.init"); -write_random_weights_file($weights, @xfeats); - -my $bkoff_grmr; -my $glue_grmr; -if($bkoffgram) { - print STDERR "Placing backoff grammar…\n"; - $bkoff_grmr = mydircat($outdir, "backoff.scfg.gz"); - print STDERR "cp $bkoffgram $bkoff_grmr\n"; - safesystem(undef,"cp $bkoffgram $bkoff_grmr"); -} -if($gluegram) { - print STDERR "Placing glue grammar…\n"; - $glue_grmr = mydircat($outdir, "glue.bo.scfg.gz"); - print STDERR "cp $gluegram $glue_grmr\n"; - safesystem(undef,"cp $gluegram $glue_grmr"); -} - -# MAKE DEV -print STDERR "\nFILTERING FOR dev...\n"; -print STDERR "DEV: $dev (REFS=$drefs)\n"; -my $devgrammar = filter($grammar, $dev, 'dev', $outdir); -my $devini = mydircat($outdir, "cdec-dev.ini"); -write_cdec_ini($devini, $devgrammar); - - -# MAKE TEST -print STDERR "\nFILTERING FOR test...\n"; -print STDERR "TEST: $test (EVAL=$teval)\n"; -`mkdir -p $outdir`; -my $testgrammar = filter($grammar, $test, 'test', $outdir); -my $testini = mydircat($outdir, "cdec-test.ini"); -write_cdec_ini($testini, $testgrammar); - - -# VEST -print STDERR "\nMINIMUM ERROR TRAINING\n"; -my $tuned_weights = mydircat($outdir, 'weights.tuned'); -if (-f $tuned_weights) { - print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n"; -} else { - my $cmd = "$DISTVEST $usefork $DENSITY_PRUNE --decode-nodes $JOBS --pmem=$PMEM --ref-files=$drefs --source-file=$dev --weights $weights $devini"; - print STDERR "MERT COMMAND: $cmd\n"; - `rm -rf $outdir/vest 2> /dev/null`; - chdir $outdir or die "Can't chdir to $outdir: $!"; - $weights = `$cmd`; - die "MERT reported non-zero exit code" unless $? == 0; - chomp $weights; - safesystem($tuned_weights, "cp $weights $tuned_weights"); - print STDERR "TUNED WEIGHTS: $tuned_weights\n"; - die "$tuned_weights is missing!" unless -f $tuned_weights; -} - -# DECODE -print STDERR "\nDECODE TEST SET\n"; -my $decolog = mydircat($outdir, "test-decode.log"); -my $testtrans = mydircat($outdir, "test.trans"); -my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans"; -safesystem($testtrans, $cmd) or die "Failed to decode test set!"; - - -# EVALUATE -print STDERR "\nEVALUATE TEST SET\n"; -print STDERR "TEST: $testtrans\n"; -$cmd = "$teval $testtrans"; -safesystem(undef, $cmd) or die "Failed to evaluate!"; -exit 0; - - -sub write_random_weights_file { - my ($file, @extras) = @_; - if (-f $file) { - print STDERR "$file exists - REUSING!\n"; - return; - } - open F, ">$file" or die "Can't write $file: $!"; - my @feats = (@DEFAULT_FEATS, @extras); - for my $feat (@feats) { - my $r = rand(0.4) + 0.8; - my $w = $init_weights{$feat} * $r; - if ($w == 0) { $w = 0.0001; print STDERR "WARNING: $feat had no initial weight!\n"; } - print F "$feat $w\n"; - } - close F; -} - -sub filter { - my ($grammar, $set, $name, $outdir) = @_; - my $out1 = mydircat($outdir, "$name.filt.gz"); - my $out2 = mydircat($outdir, "$name.f_feat.gz"); - my $outgrammar = mydircat($outdir, "$name.scfg.gz"); - if (-f $outgrammar) { print STDERR "$outgrammar exists - REUSING!\n"; } else { - my $cmd = "gunzip -c $grammar | $FILTER -t $set | gzip > $out1"; - safesystem($out1, $cmd) or die "Filtering failed."; - $cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2"; - safesystem($out2, $cmd) or die "Featurizing failed"; - $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar"; - safesystem($outgrammar, $cmd) or die "Secondary filtering failed"; - } - return $outgrammar; -} - -sub mydircat { - my ($base, $suffix) = @_; - if ($suffix =~ /^\//) { return $suffix; } - my $res = $base . '/' . $suffix; - $res =~ s/\/\//\//g; - return $res; -} - -sub write_cdec_ini { - my ($filename, $grammar_path) = (@_); - open CDECINI, ">$filename" or die "Can't write $filename: $!"; - my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz"); - my $oov = ($oovgram ? "$oovgram" : "$datadir/oov.scfg.gz"); - print CDECINI <> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl deleted file mode 100755 index e31167a2..00000000 --- a/gi/pipeline/local-gi-pipeline.pl +++ /dev/null @@ -1,465 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use File::Copy; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -use Getopt::Long "GetOptions"; - -my $GZIP = 'gzip'; -my $ZCAT = 'gunzip -c'; -my $SED = 'sed -e'; -my $BASE_PHRASE_MAX_SIZE = 10; -my $COMPLETE_CACHE = 1; -my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors -my $NUM_TOPICS = 50; -my $NUM_TOPICS_COARSE; -my $NUM_TOPICS_FINE = $NUM_TOPICS; -my $NUM_SAMPLES = 1000; -my $CONTEXT_SIZE = 1; -my $BIDIR = 0; -my $TOPICS_CONFIG = "pyp-topics.conf"; -my $LANGUAGE = "target"; -my $LABEL_THRESHOLD = "0"; -my $PRESERVE_PHRASES; - -my $MODEL = "pyp"; -my $NUM_ITERS = 100; -my $PR_SCALE_P = 0; -my $PR_SCALE_C = 0; -my $PR_FLAGS = ""; -my $MORFMARK = ""; - -my $EXTOOLS = "$SCRIPT_DIR/../../extools"; -die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; -my $PYPTOOLS = "$SCRIPT_DIR/../pyp-topics/src"; -die "Can't find pyp-topics: $PYPTOOLS" unless -e $PYPTOOLS && -d $PYPTOOLS; -my $PYPSCRIPTS = "$SCRIPT_DIR/../pyp-topics/scripts"; -die "Can't find pyp-topics: $PYPSCRIPTS" unless -e $PYPSCRIPTS && -d $PYPSCRIPTS; -my $PRTOOLS = "$SCRIPT_DIR/../posterior-regularisation"; -die "Can't find posterior-regularisation: $PRTOOLS" unless -e $PRTOOLS && -d $PRTOOLS; -my $REDUCER = "$EXTOOLS/mr_stripe_rule_reduce"; -my $C2D = "$PYPSCRIPTS/contexts2documents.py"; -my $S2L = "$PYPSCRIPTS/spans2labels.py"; -my $SPLIT = "$SCRIPT_DIR/../posterior-regularisation/split-languages.py"; - -my $PREM_TRAIN="$PRTOOLS/prjava/train-PR-cluster.sh"; - -my $SORT_KEYS = "$SCRIPT_DIR/scripts/sort-by-key.sh"; -my $PATCH_CORPUS = "$SCRIPT_DIR/scripts/patch-corpus.pl"; -my $REMOVE_TAGS_CORPUS = "$SCRIPT_DIR/scripts/remove-tags-from-corpus.pl"; -my $REMOVE_TAGS_CONTEXT = "$SCRIPT_DIR/scripts/remove-tags-from-contexts.pl"; -my $EXTRACTOR = "$EXTOOLS/extractor"; -my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train"; -my $MORF_DOC_FILTER = "$SCRIPT_DIR/../morf-segmentation/filter_docs.pl"; - -assert_exec($PATCH_CORPUS, $SORT_KEYS, $REDUCER, $EXTRACTOR, - $S2L, $C2D, $TOPIC_TRAIN, $SPLIT, $REMOVE_TAGS_CONTEXT, $REMOVE_TAGS_CORPUS, $MORF_DOC_FILTER); - -my $BACKOFF_GRAMMAR; -my $DEFAULT_CAT; -my $HIER_CAT; -my %FREQ_HIER = (); -my $TAGGED_CORPUS; - -my $NAME_SHORTCUT; - -my $OUTPUT = './giwork'; -usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE, - 'backoff_grammar' => \$BACKOFF_GRAMMAR, - 'output=s' => \$OUTPUT, - 'model=s' => \$MODEL, - 'topics=i' => \$NUM_TOPICS_FINE, - 'coarse_topics=i' => \$NUM_TOPICS_COARSE, - 'trg_context=i' => \$CONTEXT_SIZE, - 'samples=i' => \$NUM_SAMPLES, - 'label_threshold=f' => \$LABEL_THRESHOLD, - 'use_default_cat' => \$DEFAULT_CAT, - 'topics-config=s' => \$TOPICS_CONFIG, - 'iterations=i' => \$NUM_ITERS, - 'pr-scale-phrase=f' => \$PR_SCALE_P, - 'pr-scale-context=f' => \$PR_SCALE_C, - 'pr-flags=s' => \$PR_FLAGS, - 'tagged_corpus=s' => \$TAGGED_CORPUS, - 'language=s' => \$LANGUAGE, - 'get_name_only' => \$NAME_SHORTCUT, - 'preserve_phrases' => \$PRESERVE_PHRASES, - 'morf=s' => \$MORFMARK, - ); -if ($NAME_SHORTCUT) { - $NUM_TOPICS = $NUM_TOPICS_FINE; - print STDERR labeled_dir(); - exit 0; -} -usage() unless scalar @ARGV == 1; -my $CORPUS = $ARGV[0]; -open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F; - -$NUM_TOPICS = $NUM_TOPICS_FINE; - -$HIER_CAT = ( $NUM_TOPICS_COARSE ? 1 : 0 ); - -print STDERR " Output: $OUTPUT\n"; -my $DATA_DIR = $OUTPUT . '/corpora'; -my $LEX_NAME = "corpus.f_e_a.$LANGUAGE.lex"; -my $CORPUS_LEX = $DATA_DIR . '/' . $LEX_NAME; # corpus used to extract rules -my $CORPUS_CLUSTER = $DATA_DIR . "/corpus.f_e_a.$LANGUAGE.cluster"; # corpus used for clustering (often identical) - -my $CONTEXT_DIR = $OUTPUT . '/' . context_dir(); -my $CLUSTER_DIR = $OUTPUT . '/' . cluster_dir(); -my $LABELED_DIR = $OUTPUT . '/' . labeled_dir(); -my $CLUSTER_DIR_C; -my $CLUSTER_DIR_F; -my $LABELED_DIR_C; -my $LABELED_DIR_F; -if($HIER_CAT) { - $CLUSTER_DIR_F = $CLUSTER_DIR; - $LABELED_DIR_F = $LABELED_DIR; - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR_C = $OUTPUT . '/' . cluster_dir(); - $LABELED_DIR_C = $OUTPUT . '/' . labeled_dir(); - $NUM_TOPICS = $NUM_TOPICS_FINE; -} -my $GRAMMAR_DIR = $OUTPUT . '/' . grammar_dir(); -print STDERR " Context: $CONTEXT_DIR\n Cluster: $CLUSTER_DIR\n Labeled: $LABELED_DIR\n Grammar: $GRAMMAR_DIR\n"; -safemkdir($OUTPUT) or die "Couldn't create output directory $OUTPUT: $!"; -safemkdir($DATA_DIR) or die "Couldn't create output directory $DATA_DIR: $!"; -safemkdir($CONTEXT_DIR) or die "Couldn't create output directory $CONTEXT_DIR: $!"; -safemkdir($CLUSTER_DIR) or die "Couldn't create output directory $CLUSTER_DIR: $!"; -if($HIER_CAT) { - safemkdir($CLUSTER_DIR_C) or die "Couldn't create output directory $CLUSTER_DIR_C: $!"; - safemkdir($LABELED_DIR_C) or die "Couldn't create output directory $LABELED_DIR_C: $!"; -} -safemkdir($LABELED_DIR) or die "Couldn't create output directory $LABELED_DIR: $!"; -safemkdir($GRAMMAR_DIR) or die "Couldn't create output directory $GRAMMAR_DIR: $!"; -if(-e $TOPICS_CONFIG) { - copy($TOPICS_CONFIG, $CLUSTER_DIR) or die "Copy failed: $!"; -} - -setup_data(); - -if (lc($MODEL) eq "blagree") { - extract_bilingual_context(); -} else { - extract_context(); -} - -if (lc($MODEL) eq "pyp") { - if($HIER_CAT) { - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR = $CLUSTER_DIR_C; - topic_train(); - $NUM_TOPICS = $NUM_TOPICS_FINE; - $CLUSTER_DIR = $CLUSTER_DIR_F; - topic_train(); - } else { - topic_train(); - } -} elsif (lc($MODEL) =~ /pr|em|agree/) { - prem_train(); -} else { die "Unsupported model type: $MODEL. Must be one of PYP or PREM.\n"; } -if($HIER_CAT) { - $NUM_TOPICS = $NUM_TOPICS_COARSE; - $CLUSTER_DIR = $CLUSTER_DIR_C; - $LABELED_DIR = $LABELED_DIR_C; - label_spans_with_topics(); - $NUM_TOPICS = $NUM_TOPICS_FINE; - $CLUSTER_DIR = $CLUSTER_DIR_F; - $LABELED_DIR = $LABELED_DIR_F; - label_spans_with_topics(); - extract_freqs(); -} else { - label_spans_with_topics(); -} -my $res; -if ($BIDIR) { - $res = grammar_extract_bidir(); -} else { - $res = grammar_extract(); -} -print STDERR "\n!!!COMPLETE!!!\n"; -print STDERR "GRAMMAR: $res\nYou should probably run: $SCRIPT_DIR/evaluation-pipeline.pl LANGPAIR giwork/ct1s0.L10.PYP.t4.s20.grammar/grammar.gz -f FEAT1 -f FEAT2\n\n"; -exit 0; - -sub setup_data { - print STDERR "\n!!!PREPARE CORPORA!!!\n"; - if (-f $CORPUS_LEX && $CORPUS_CLUSTER) { - print STDERR "$CORPUS_LEX and $CORPUS_CLUSTER exist, reusing...\n"; - return; - } - copy($CORPUS, $CORPUS_LEX); - if ($TAGGED_CORPUS) { - die "Can't find $TAGGED_CORPUS" unless -f $TAGGED_CORPUS; - my $opt=""; - $opt = "-s" if ($LANGUAGE eq "source"); - $opt = $opt . " -a" if ($PRESERVE_PHRASES); - my $cmd="$PATCH_CORPUS $opt $TAGGED_CORPUS $CORPUS_LEX > $CORPUS_CLUSTER"; - safesystem($cmd) or die "Failed to extract contexts."; - } else { - symlink($LEX_NAME, $CORPUS_CLUSTER); - } -} - -sub context_dir { - return "ct${CONTEXT_SIZE}s0.L$BASE_PHRASE_MAX_SIZE.l$LANGUAGE"; -} - -sub cluster_dir { - if (lc($MODEL) eq "pyp") { - return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES"; - } elsif (lc($MODEL) eq "em") { - return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_ITERS"; - } elsif (lc($MODEL) eq "pr") { - return context_dir() . ".PR.t$NUM_TOPICS.i$NUM_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C"; - } elsif (lc($MODEL) eq "agree") { - return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_ITERS"; - } elsif (lc($MODEL) eq "blagree") { - return context_dir() . ".BLAGREE.t$NUM_TOPICS.i$NUM_ITERS"; - } -} - -sub labeled_dir { - if (lc($MODEL) eq "pyp" && $LABEL_THRESHOLD ne "0") { - return cluster_dir() . "_lt$LABEL_THRESHOLD"; - } else { - return cluster_dir(); - } -} - -sub grammar_dir { - # TODO add grammar config options -- adjacent NTs, etc - if($HIER_CAT) { - return cluster_dir() . ".hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.grammar"; - } else { - return labeled_dir() . ".grammar"; - } -} - - - -sub safemkdir { - my $dir = shift; - if (-d $dir) { return 1; } - return mkdir($dir); -} - -sub usage { - print < $CLUSTER_DIR/clusters.txt") or die "Failed to unzip"; - safesystem("$EXTRACTOR --base_phrase_spans -i $CORPUS_CLUSTER -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -S $CONTEXT_SIZE | $S2L $CLUSTER_DIR/clusters.txt $CONTEXT_SIZE $LABEL_THRESHOLD $extra > $OUT_SPANS") or die "Failed to label spans"; - unlink("$CLUSTER_DIR/clusters.txt") or warn "Failed to remove $CLUSTER_DIR/clusters.txt"; - safesystem("paste -d ' ' $CORPUS_LEX $OUT_SPANS | sed 's/ *||| *\$//' > $LABELED_DIR/corpus.src_trg_al_label") or die "Couldn't paste"; - } -} - -sub extract_freqs { - print STDERR "\n!!!EXTRACTING FREQUENCIES\n"; - my $IN_COARSE = "$LABELED_DIR_C/labeled_spans.txt"; - my $IN_FINE = "$LABELED_DIR_F/labeled_spans.txt"; - my $OUT_SPANS = "$LABELED_DIR_F/labeled_spans.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; - my $FREQS = "$LABELED_DIR_F/label_freq.hier$NUM_TOPICS_COARSE-$NUM_TOPICS_FINE.txt"; - my $COARSE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1c/g\'"; #' - my $FINE_EXPR = "\'s/\\(X[0-9][0-9]*\\)/\\1f/g\'"; #' - my %finehier = (); - if (-e $OUT_SPANS) { - print STDERR "$OUT_SPANS exists, reusing...\n"; - } else { - safesystem("paste -d ' ' $IN_COARSE $IN_FINE > $OUT_SPANS"); - } - open SPANS, $OUT_SPANS or die $!; - while () { - my ($tmp, $coarse, $fine) = split /\|\|\|/; - my @coarse_spans = $coarse =~ /\d+-\d+:X(\d+)/g; - my @fine_spans = $fine =~ /\d+-\d+:X(\d+)/g; - - foreach my $i (0..(scalar @coarse_spans)-1) { - my $coarse_cat = $coarse_spans[$i]; - my $fine_cat = $fine_spans[$i]; - - $FREQ_HIER{$coarse_cat}{$fine_cat}++; - } - } - close SPANS; - foreach (values %FREQ_HIER) { - my $coarse_freq = $_; - my $total = 0; - $total+=$_ for (values %{ $coarse_freq }); - $coarse_freq->{$_}=log($coarse_freq->{$_}/$total) for (keys %{ $coarse_freq }); - } - open FREQS, ">", $FREQS or die $!; - foreach my $coarse_cat (keys %FREQ_HIER) { - print FREQS "$coarse_cat |||"; - foreach my $fine_cat (keys %{$FREQ_HIER{$coarse_cat}}) { - my $res = $FREQ_HIER{$coarse_cat}{$fine_cat}; - print FREQS " $fine_cat:$res"; - if(! exists $finehier{$fine_cat} || $finehier{$fine_cat} < $res) { - $finehier{$fine_cat} = $coarse_cat; - } - } - print FREQS "\n"; - } -# foreach my $fine_cat (keys %finehier) { -# print FREQS "$fine_cat -> $finehier{$fine_cat}\n"; -# } - close FREQS; - $CLUSTER_DIR = $CLUSTER_DIR_F; -} - -sub grammar_extract { - my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; - print STDERR "\n!!!EXTRACTING GRAMMAR\n"; - my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.gz"; - if (-e $OUTGRAMMAR) { - print STDERR "$OUTGRAMMAR exists, reusing...\n"; - } else { - my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); - my $DEFAULT_CAT_ARG = ($DEFAULT_CAT ? "-d X" : ""); - safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -t $NUM_TOPICS $BACKOFF_ARG $DEFAULT_CAT_ARG | $SORT_KEYS | $REDUCER -p | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; - } - return $OUTGRAMMAR; -} - -sub grammar_extract_bidir { -#gzcat ex.output.gz | ./mr_stripe_rule_reduce -p -b | sort -t $'\t' -k 1 | ./mr_stripe_rule_reduce | gzip > phrase-table.gz - my $LABELED = "$LABELED_DIR/corpus.src_trg_al_label"; - print STDERR "\n!!!EXTRACTING GRAMMAR\n"; - my $OUTGRAMMAR = "$GRAMMAR_DIR/grammar.bidir.gz"; - if (-e $OUTGRAMMAR) { - print STDERR "$OUTGRAMMAR exists, reusing...\n"; - } else { - my $BACKOFF_ARG = ($BACKOFF_GRAMMAR ? "-g" : ""); - safesystem("$EXTRACTOR -i $LABELED -c $ITEMS_IN_MEMORY -L $BASE_PHRASE_MAX_SIZE -b -t $NUM_TOPICS $BACKOFF_ARG | $SORT_KEYS | $REDUCER -p -b | $SORT_KEYS | $REDUCER | $GZIP > $OUTGRAMMAR") or die "Couldn't extract grammar"; - } - return $OUTGRAMMAR; -} - -sub safesystem { - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - exit(1); - } - else { - my $exitcode = $? >> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - diff --git a/gi/pipeline/lticluster.config b/gi/pipeline/lticluster.config deleted file mode 100644 index 3e23c8cb..00000000 --- a/gi/pipeline/lticluster.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/cdyer/ws10smt-data -btec /home/cdyer/ws10smt-data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -zhen /home/cdyer/ws10smt-data/chinese-english corpus.zh-en.al lm/c2e.3gram.lm.gz dev_and_test/mt02.src.txt dev_and_test/mt02.ref.* dev_and_test/mt03.src.txt eval-mt03.sh -aren /home/cdyer/ws10smt-data/arabic-english corpus.ar-en-al lm/a2e.3gram.lm.gz dev_and_test/dev.src.txt dev_and_test/dev.ref.txt.* dev_and_test/mt05.src.txt eval-mt05.sh -uren /home/cdyer/ws10smt-data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/cdyer/ws10smt-data/dutch-french corpus.nl-fr.al - diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl deleted file mode 100755 index 0cef0606..00000000 --- a/gi/pipeline/scripts/filter-by-f.pl +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } - -my $REKEY="$SCRIPT_DIR/rekey.pl"; -my $REFILTER="$SCRIPT_DIR/refilter.pl"; -my $SORT="$SCRIPT_DIR/sort-by-key.sh"; -assert_exec($REKEY, $REFILTER, $SORT); - - -die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; -my $translations = shift @ARGV; -die "Need number: $translations" unless $translations > 0; -die unless $ARGV[0] =~ /\.gz$/; -die unless $ARGV[1] =~ /\.gz$/; -die if $ARGV[0] eq $ARGV[1]; -die "Can't find $ARGV[0]" unless -f $ARGV[0]; - -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; -safesystem($ARGV[1], $cmd) or die "Filtering failed"; -exit 0; - -sub assert_exec { - my @files = @_; - for my $file (@files) { - die "Can't find $file - did you run make?\n" unless -e $file; - die "Can't execute $file" unless -e $file; - } -}; - -sub safesystem { - my $output = shift @_; - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - exit(1); - } - else { - my $exitcode = $? >> 8; - if ($exitcode) { - print STDERR "Exit code: $exitcode\n"; - if (defined $output && -e $output) { printf STDERR "Removing $output\n"; `rm -rf $output`; } - } - return ! $exitcode; - } -} - diff --git a/gi/pipeline/scripts/patch-corpus.pl b/gi/pipeline/scripts/patch-corpus.pl deleted file mode 100755 index c0eec43e..00000000 --- a/gi/pipeline/scripts/patch-corpus.pl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $PATCH = shift @ARGV; -my $TGT = 1; -my $APPEND; -while ($PATCH eq "-s" || $PATCH eq "-a") { - if ($PATCH eq "-s") { - undef $TGT; - } else { - $APPEND = 1; - } - $PATCH = shift @ARGV; -} - -die "Usage: $0 [-s] [-a] tagged.en[_fr] < lexical.en_fr_al[_...]\n" unless $PATCH; - -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -my $first=

; close P; -my @fields = split / \|\|\| /, $first; -die "Bad format!" if (scalar @fields > 2); - -if (scalar @fields != 1) { - # TODO support this - die "Patching source and target not supported yet!"; -} - -my $line = 0; -open P, "<$PATCH" or die "Can't read tagged corpus $PATCH: $!"; -while(my $pline =

) { - chomp $pline; - $line++; - my $line = <>; - die "Too few lines in lexical corpus!" unless $line; - chomp $line; - @fields = split / \|\|\| /, $line; - my @pwords = split /\s+/, $pline; - if ($TGT) { - my @lwords = split /\s+/, $fields[1]; - die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); - if ($APPEND) { - foreach my $i (0..(scalar @pwords-1)) { - $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; - } - $fields[1] = join ' ', @lwords; - } else { - $fields[1] = $pline; - } - } else { # source side - my @lwords = split /\s+/, $fields[0]; - die "Length mismatch in line $line!\n" unless (scalar @pwords == scalar @lwords); - if ($APPEND) { - foreach my $i (0..(scalar @pwords-1)) { - $lwords[$i] = $lwords[$i] . '_' . $pwords[$i]; - } - $fields[0] = join ' ', @lwords; - } else { - $fields[0] = $pline; - } - } - print join ' ||| ', @fields; - print "\n"; -} - - diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl deleted file mode 100755 index a783eb4e..00000000 --- a/gi/pipeline/scripts/refilter.pl +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $NUM_TRANSLATIONS = shift @ARGV; -unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } -print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; - -my $pk = ''; -my %dict; -while(<>) { - s/^(.+)\t//; - my $key = $1; - if ($key ne $pk) { - if ($pk) { - emit_dict(); - } - %dict = (); - $pk = $key; - } - my ($lhs, $f, $e, $s) = split / \|\|\| /; - my $score = 0; - if ($s =~ /XEF=([^ ]+)/) { - $score += $1; - } else { die; } - if ($s =~ /GenerativeProb=([^ ]+)/) { - $score += ($1 / 10); - } else { die; } - $dict{"$lhs ||| $f ||| $e ||| $s"} = $score; -} -emit_dict(); - -sub emit_dict { - my $cc = 0; - for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) { - print "$k"; - $cc++; - if ($cc >= $NUM_TRANSLATIONS) { last; } - } -} - diff --git a/gi/pipeline/scripts/rekey.pl b/gi/pipeline/scripts/rekey.pl deleted file mode 100755 index 31eb86b8..00000000 --- a/gi/pipeline/scripts/rekey.pl +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/perl - -while(<>) { - my ($lhs, $f, $e, $s) = split / \|\|\| /; - $f =~ s/\[X[0-9]+\]/\[X\]/g; - print "$f\t$_"; -} - diff --git a/gi/pipeline/scripts/remove-tags-from-contexts.pl b/gi/pipeline/scripts/remove-tags-from-contexts.pl deleted file mode 100755 index 20698816..00000000 --- a/gi/pipeline/scripts/remove-tags-from-contexts.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $PHRASE = 'tok'; -my $CONTEXT = 'tag'; - -die "Usage: $0 [--phrase=tok|tag] [--context=tok|tag] < corpus" - unless &GetOptions('phrase=s' => \$PHRASE, 'context=s' => \$CONTEXT); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - my @top = split /\t/, $line; - die unless (scalar @top == 2); - - my @pwords = split /\s+/, $top[0]; - foreach my $token (@pwords) { - #print $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - die unless (scalar @parts == 2); - if ($PHRASE eq "tok") { - $token = $parts[0] - } elsif ($PHRASE eq "tag") { - $token = $parts[1] - } - } - - my @fields = split / \|\|\| /, $top[1]; - foreach my $i (0..((scalar @fields) / 2 - 1)) { - #print $i . ": " . $fields[2*$i] . " of " . (scalar @fields) . "\n"; - my @cwords = split /\s+/, $fields[2*$i]; - foreach my $token (@cwords) { - #print $i . ": " . $token . "\n"; - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - if ($CONTEXT eq "tok") { - $token = $parts[0] - } elsif ($CONTEXT eq "tag") { - $token = $parts[1] - } - } - } - $fields[2*$i] = join ' ', @cwords; - } - - print join ' ', @pwords; - print "\t"; - print join ' ||| ', @fields; - print "\n"; -} diff --git a/gi/pipeline/scripts/remove-tags-from-corpus.pl b/gi/pipeline/scripts/remove-tags-from-corpus.pl deleted file mode 100755 index be3e97c0..00000000 --- a/gi/pipeline/scripts/remove-tags-from-corpus.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $LANGUAGE = shift @ARGV; -$LANGUAGE = 'target' unless ($LANGUAGE); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - - my @fields = split / \|\|\| /, $line; - - if ($LANGUAGE eq "source" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[0]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[0] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - if ($LANGUAGE eq "target" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[1]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[1] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - print join ' ||| ', @fields; - print "\n"; -} diff --git a/gi/pipeline/scripts/sort-by-key.sh b/gi/pipeline/scripts/sort-by-key.sh deleted file mode 100755 index 7ae33e03..00000000 --- a/gi/pipeline/scripts/sort-by-key.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -export LANG=C -sort -t $'\t' -k 1 -T /tmp -S 6000000000 - diff --git a/gi/pipeline/scripts/xfeats.pl b/gi/pipeline/scripts/xfeats.pl deleted file mode 100755 index dc578513..00000000 --- a/gi/pipeline/scripts/xfeats.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 x-grammar.scfg[.gz] < cat-grammar.scfg\n" unless scalar @ARGV > 0; - -my $xgrammar = shift @ARGV; -die "Can't find $xgrammar" unless -f $xgrammar; -my $fh; -if ($xgrammar =~ /\.gz$/) { - open $fh, "gunzip -c $xgrammar|" or die "Can't fork: $!"; -} else { - open $fh, "<$xgrammar" or die "Can't read $xgrammar: $!"; -} -print STDERR "Reading X-feats from $xgrammar...\n"; -my %dict; -while(<$fh>) { - chomp; - my ($lhs, $f, $e, $feats) = split / \|\|\| /; - my $xfeats; - my $cc = 0; - my @xfeats = (); - while ($feats =~ /(EGivenF|FGivenE|LogRuleCount|LogECount|LogFCount|SingletonRule|SingletonE|SingletonF)=([^ ]+)( |$)/og) { - push @xfeats, "X_$1=$2"; - } - #print "$lhs ||| $f ||| $e ||| @xfeats\n"; - $dict{"$lhs ||| $f ||| $e"} = "@xfeats"; -} -close $fh; - -print STDERR "Add features...\n"; -while(<>) { - chomp; - my ($lhs, $f, $e) = split / \|\|\| /; - $f=~ s/\[[^]]+,([12])\]/\[X,$1\]/g; - my $xfeats = $dict{"[X] ||| $f ||| $e"}; - die "Can't find x features for: $_\n" unless $xfeats; - print "$_ $xfeats\n"; -} - diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config deleted file mode 100644 index e00a8485..00000000 --- a/gi/pipeline/valhalla.config +++ /dev/null @@ -1,9 +0,0 @@ -# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED -# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... -/home/chris/ws10smt/data -btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh -fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al -zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al -aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al -uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh -nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al diff --git a/gi/posterior-regularisation/Corpus.java b/gi/posterior-regularisation/Corpus.java deleted file mode 100644 index 07b27387..00000000 --- a/gi/posterior-regularisation/Corpus.java +++ /dev/null @@ -1,167 +0,0 @@ -import gnu.trove.TIntArrayList; - -import java.io.*; -import java.util.*; -import java.util.regex.Pattern; - -public class Corpus -{ - private Lexicon tokenLexicon = new Lexicon(); - private Lexicon phraseLexicon = new Lexicon(); - private Lexicon contextLexicon = new Lexicon(); - private List edges = new ArrayList(); - private List> phraseToContext = new ArrayList>(); - private List> contextToPhrase = new ArrayList>(); - - public class Edge - { - Edge(int phraseId, int contextId, int count) - { - this.phraseId = phraseId; - this.contextId = contextId; - this.count = count; - } - public int getPhraseId() - { - return phraseId; - } - public TIntArrayList getPhrase() - { - return phraseLexicon.lookup(phraseId); - } - public String getPhraseString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getPhrase().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getContextId() - { - return contextId; - } - public TIntArrayList getContext() - { - return contextLexicon.lookup(contextId); - } - public String getContextString() - { - StringBuffer b = new StringBuffer(); - for (int tid: getContext().toNativeArray()) - { - if (b.length() > 0) - b.append(" "); - b.append(tokenLexicon.lookup(tid)); - } - return b.toString(); - } - public int getCount() - { - return count; - } - private int phraseId; - private int contextId; - private int count; - } - - List getEdges() - { - return edges; - } - - int getNumEdges() - { - return edges.size(); - } - - int getNumPhrases() - { - return phraseLexicon.size(); - } - - List getEdgesForPhrase(int phraseId) - { - return phraseToContext.get(phraseId); - } - - int getNumContexts() - { - return contextLexicon.size(); - } - - List getEdgesForContext(int contextId) - { - return contextToPhrase.get(contextId); - } - - int getNumTokens() - { - return tokenLexicon.size(); - } - - static Corpus readFromFile(Reader in) throws IOException - { - Corpus c = new Corpus(); - - // read in line-by-line - BufferedReader bin = new BufferedReader(in); - String line; - Pattern separator = Pattern.compile(" \\|\\|\\| "); - - while ((line = bin.readLine()) != null) - { - // split into phrase and contexts - StringTokenizer st = new StringTokenizer(line, "\t"); - assert (st.hasMoreTokens()); - String phraseToks = st.nextToken(); - assert (st.hasMoreTokens()); - String rest = st.nextToken(); - assert (!st.hasMoreTokens()); - - // process phrase - st = new StringTokenizer(phraseToks, " "); - TIntArrayList ptoks = new TIntArrayList(); - while (st.hasMoreTokens()) - ptoks.add(c.tokenLexicon.insert(st.nextToken())); - int phraseId = c.phraseLexicon.insert(ptoks); - if (phraseId == c.phraseToContext.size()) - c.phraseToContext.add(new ArrayList()); - - // process contexts - String[] parts = separator.split(rest); - assert (parts.length % 2 == 0); - for (int i = 0; i < parts.length; i += 2) - { - // process pairs of strings - context and count - TIntArrayList ctx = new TIntArrayList(); - String ctxString = parts[i]; - String countString = parts[i + 1]; - StringTokenizer ctxStrtok = new StringTokenizer(ctxString, " "); - while (ctxStrtok.hasMoreTokens()) - { - String token = ctxStrtok.nextToken(); - if (!token.equals("")) - ctx.add(c.tokenLexicon.insert(token)); - } - int contextId = c.contextLexicon.insert(ctx); - if (contextId == c.contextToPhrase.size()) - c.contextToPhrase.add(new ArrayList()); - - assert (countString.startsWith("C=")); - Edge e = c.new Edge(phraseId, contextId, - Integer.parseInt(countString.substring(2).trim())); - c.edges.add(e); - - // index the edge for fast phrase, context lookup - c.phraseToContext.get(phraseId).add(e); - c.contextToPhrase.get(contextId).add(e); - } - } - - return c; - } -} diff --git a/gi/posterior-regularisation/Lexicon.java b/gi/posterior-regularisation/Lexicon.java deleted file mode 100644 index 9f0245ee..00000000 --- a/gi/posterior-regularisation/Lexicon.java +++ /dev/null @@ -1,32 +0,0 @@ -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class Lexicon -{ - public int insert(T word) - { - Integer i = wordToIndex.get(word); - if (i == null) - { - i = indexToWord.size(); - wordToIndex.put(word, i); - indexToWord.add(word); - } - return i; - } - - public T lookup(int index) - { - return indexToWord.get(index); - } - - public int size() - { - return indexToWord.size(); - } - - private Map wordToIndex = new HashMap(); - private List indexToWord = new ArrayList(); -} \ No newline at end of file diff --git a/gi/posterior-regularisation/PhraseContextModel.java b/gi/posterior-regularisation/PhraseContextModel.java deleted file mode 100644 index 85bcfb89..00000000 --- a/gi/posterior-regularisation/PhraseContextModel.java +++ /dev/null @@ -1,466 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for tonight ? ||| C=1 ||| seats for ? ||| C=1 ||| i see ? ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating q(t) = P(t|p,c) for all x,c -// - m-step is estimating model parameters P(c,t|p) = P(t) P(c|t) -// - PR uses alternate e-step, which first optimizes lambda -// min_q KL(q||p) + delta sum_pt max_c E_q[phi_ptc] -// where -// q(t|p,c) propto p(t,c|p) exp( -phi_ptc ) -// Then q is used to obtain expectations for vanilla M-step. - -// Sexing it up: -// - learn p-specific conditionals P(t|p) -// - or generate phrase internals, e.g., generate edge words from -// different distribution to central words -// - agreement between phrase->context model and context->phrase model - -import java.io.*; -import optimization.gradientBasedMethods.*; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.gradientBasedMethods.stats.ProjectedOptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.GenericPickFirstStep; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.linesearch.WolfRuleLineSearch; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.NormalizedProjectedGradientL2Norm; -import optimization.stopCriteria.NormalizedValueDifference; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; -import optimization.util.MathUtils; -import java.util.*; -import java.util.regex.*; -import gnu.trove.TDoubleArrayList; -import gnu.trove.TIntArrayList; -import static java.lang.Math.*; - -class PhraseContextModel -{ - // model/optimisation configuration parameters - int numTags; - boolean posteriorRegularisation = true; - double constraintScale = 3; // FIXME: make configurable - - // copied from L1LMax in depparsing code - final double c1= 0.0001, c2=0.9, stoppingPrecision = 1e-5, maxStep = 10; - final int maxZoomEvals = 10, maxExtrapolationIters = 200; - int maxProjectionIterations = 200; - int minOccurrencesForProjection = 0; - - // book keeping - int numPositions; - Random rng = new Random(); - - // training set - Corpus training; - - // model parameters (learnt) - double emissions[][][]; // position in 0 .. 3 x tag x word Pr(word | tag, position) - double prior[][]; // phrase x tag Pr(tag | phrase) - double lambda[]; // edge = (phrase, context) x tag flattened lagrange multipliers - - PhraseContextModel(Corpus training, int tags) - { - this.training = training; - this.numTags = tags; - assert (!training.getEdges().isEmpty()); - assert (numTags > 1); - - // now initialise emissions - numPositions = training.getEdges().get(0).getContext().size(); - assert (numPositions > 0); - - emissions = new double[numPositions][numTags][training.getNumTokens()]; - prior = new double[training.getNumEdges()][numTags]; - if (posteriorRegularisation) - lambda = new double[training.getNumEdges() * numTags]; - - for (double[][] emissionTW : emissions) - { - for (double[] emissionW : emissionTW) - { - randomise(emissionW); -// for (int i = 0; i < emissionW.length; ++i) -// emissionW[i] = i+1; -// normalise(emissionW); - } - } - - for (double[] priorTag : prior) - { - randomise(priorTag); -// for (int i = 0; i < priorTag.length; ++i) -// priorTag[i] = i+1; -// normalise(priorTag); - } - } - - void expectationMaximisation(int numIterations) - { - double lastLlh = Double.NEGATIVE_INFINITY; - - for (int iteration = 0; iteration < numIterations; ++iteration) - { - double emissionsCounts[][][] = new double[numPositions][numTags][training.getNumTokens()]; - double priorCounts[][] = new double[training.getNumPhrases()][numTags]; - - // E-step - double llh = 0; - if (posteriorRegularisation) - { - EStepDualObjective objective = new EStepDualObjective(); - - // copied from x2y2withconstraints -// LineSearchMethod ls = new ArmijoLineSearchMinimizationAlongProjectionArc(new InterpolationPickFirstStep(1)); -// OptimizerStats stats = new OptimizerStats(); -// ProjectedGradientDescent optimizer = new ProjectedGradientDescent(ls); -// CompositeStopingCriteria compositeStop = new CompositeStopingCriteria(); -// compositeStop.add(new ProjectedGradientL2Norm(0.001)); -// compositeStop.add(new ValueDifference(0.001)); -// optimizer.setMaxIterations(50); -// boolean succeed = optimizer.optimize(objective,stats,compositeStop); - - // copied from depparser l1lmaxobjective - ProjectedOptimizerStats stats = new ProjectedOptimizerStats(); - GenericPickFirstStep pickFirstStep = new GenericPickFirstStep(1); - LineSearchMethod linesearch = new WolfRuleLineSearch(pickFirstStep, c1, c2); - ProjectedGradientDescent optimizer = new ProjectedGradientDescent(linesearch); - optimizer.setMaxIterations(maxProjectionIterations); - CompositeStopingCriteria stop = new CompositeStopingCriteria(); - stop.add(new NormalizedProjectedGradientL2Norm(stoppingPrecision)); - stop.add(new NormalizedValueDifference(stoppingPrecision)); - boolean succeed = optimizer.optimize(objective, stats, stop); - - System.out.println("Ended optimzation Projected Gradient Descent\n" + stats.prettyPrint(1)); - //System.out.println("Solution: " + objective.parameters); - if (!succeed) - System.out.println("Failed to optimize"); - //System.out.println("Ended optimization in " + optimizer.getCurrentIteration()); - - //lambda = objective.getParameters(); - llh = objective.primal(); - - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - for (int t = 0; t < numTags; t++) - { - double p = objective.q.get(i).get(j).get(t); - priorCounts[i][t] += e.getCount() * p; - TIntArrayList tokens = e.getContext(); - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[k][t][tokens.get(k)] += e.getCount() * p; - } - } - } - } - else - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - priorCounts[i][t] += e.getCount() * probs[t]; - for (int k = 0; k < tokens.size(); ++k) - emissionsCounts[j][t][tokens.get(k)] += e.getCount() * probs[t]; - } - } - } - } - - // M-step: normalise - for (double[][] emissionTW : emissionsCounts) - for (double[] emissionW : emissionTW) - normalise(emissionW); - - for (double[] priorTag : priorCounts) - normalise(priorTag); - - emissions = emissionsCounts; - prior = priorCounts; - - System.out.println("Iteration " + iteration + " llh " + llh); - -// if (llh - lastLlh < 1e-4) -// break; -// else -// lastLlh = llh; - } - } - - static double normalise(double probs[]) - { - double z = 0; - for (double p : probs) - z += p; - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - return z; - } - - void randomise(double probs[]) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - z += probs[i]; - } - - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - static int argmax(double probs[]) - { - double m = Double.NEGATIVE_INFINITY; - int mi = -1; - for (int i = 0; i < probs.length; ++i) - { - if (probs[i] > m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - - double[] posterior(int phraseId, Corpus.Edge e) // unnormalised - { - double probs[] = new double[numTags]; - TIntArrayList tokens = e.getContext(); - for (int t = 0; t < numTags; ++t) - { - probs[t] = prior[phraseId][t]; - for (int k = 0; k < tokens.size(); ++k) - probs[t] *= emissions[k][t][tokens.get(k)]; - } - return probs; - } - - void displayPosterior() - { - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (Corpus.Edge e: edges) - { - double probs[] = posterior(i, e); - normalise(probs); - - // emit phrase - System.out.print(e.getPhraseString()); - System.out.print("\t"); - System.out.print(e.getContextString()); - System.out.print("||| C=" + e.getCount() + " |||"); - - int t = argmax(probs); - System.out.print(" " + t + " ||| " + probs[t]); - // for (int t = 0; t < numTags; ++t) - // System.out.print(" " + probs[t]); - System.out.println(); - } - } - } - - public static void main(String[] args) - { - assert (args.length >= 2); - try - { - Corpus corpus = Corpus.readFromFile(new FileReader(new File(args[0]))); - PhraseContextModel model = new PhraseContextModel(corpus, Integer.parseInt(args[1])); - model.expectationMaximisation(Integer.parseInt(args[2])); - model.displayPosterior(); - } - catch (IOException e) - { - System.out.println("Failed to read input file: " + args[0]); - e.printStackTrace(); - } - } - - class EStepDualObjective extends ProjectedObjective - { - List> conditionals; // phrase id x context # x tag - precomputed - List> q; // ditto, but including exp(-lambda) terms - double objective = 0; // log(z) - // Objective.gradient = d log(z) / d lambda = E_q[phi] - double llh = 0; - - public EStepDualObjective() - { - super(); - // compute conditionals p(context, tag | phrase) for all training instances - conditionals = new ArrayList>(training.getNumPhrases()); - q = new ArrayList>(training.getNumPhrases()); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - conditionals.add(new ArrayList(edges.size())); - q.add(new ArrayList(edges.size())); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - double probs[] = posterior(i, e); - double z = normalise(probs); - llh += log(z) * e.getCount(); - conditionals.get(i).add(new TDoubleArrayList(probs)); - q.get(i).add(new TDoubleArrayList(probs)); - } - } - - gradient = new double[training.getNumEdges()*numTags]; - setInitialParameters(lambda); - computeObjectiveAndGradient(); - } - - @Override - public double[] projectPoint(double[] point) - { - SimplexProjection p = new SimplexProjection(constraintScale); - - double[] newPoint = point.clone(); - int edgeIndex = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - for (int t = 0; t < numTags; t++) - { - double[] subPoint = new double[edges.size()]; - for (int j = 0; j < edges.size(); ++j) - subPoint[j] = point[edgeIndex+j*numTags+t]; - - p.project(subPoint); - for (int j = 0; j < edges.size(); ++j) - newPoint[edgeIndex+j*numTags+t] = subPoint[j]; - } - - edgeIndex += edges.size() * numTags; - } -// System.out.println("Proj from: " + Arrays.toString(point)); -// System.out.println("Proj to: " + Arrays.toString(newPoint)); - return newPoint; - } - - @Override - public void setParameters(double[] params) - { - super.setParameters(params); - computeObjectiveAndGradient(); - } - - @Override - public double[] getGradient() - { - gradientCalls += 1; - return gradient; - } - - @Override - public double getValue() - { - functionCalls += 1; - return objective; - } - - public void computeObjectiveAndGradient() - { - int edgeIndex = 0; - objective = 0; - Arrays.fill(gradient, 0); - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - - for (int j = 0; j < edges.size(); ++j) - { - Corpus.Edge e = edges.get(j); - - double z = 0; - for (int t = 0; t < numTags; t++) - { - double v = conditionals.get(i).get(j).get(t) * exp(-parameters[edgeIndex+t]); - q.get(i).get(j).set(t, v); - z += v; - } - objective += log(z) * e.getCount(); - - for (int t = 0; t < numTags; t++) - { - double v = q.get(i).get(j).get(t) / z; - q.get(i).get(j).set(t, v); - gradient[edgeIndex+t] -= e.getCount() * v; - } - - edgeIndex += numTags; - } - } -// System.out.println("computeObjectiveAndGradient logz=" + objective); -// System.out.println("lambda= " + Arrays.toString(parameters)); -// System.out.println("gradient=" + Arrays.toString(gradient)); - } - - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append(getClass().getCanonicalName()).append(" with "); - sb.append(parameters.length).append(" parameters and "); - sb.append(training.getNumPhrases() * numTags).append(" constraints"); - return sb.toString(); - } - - double primal() - { - // primal = llh + KL(q||p) + scale * sum_pt max_c E_q[phi_pct] - // kl = sum_Y q(Y) log q(Y) / p(Y|X) - // = sum_Y q(Y) { -lambda . phi(Y) - log Z } - // = -log Z - lambda . E_q[phi] - // = -objective + lambda . gradient - - double kl = -objective + MathUtils.dotProduct(parameters, gradient); - double l1lmax = 0; - for (int i = 0; i < training.getNumPhrases(); ++i) - { - List edges = training.getEdgesForPhrase(i); - for (int t = 0; t < numTags; t++) - { - double lmax = Double.NEGATIVE_INFINITY; - for (int j = 0; j < edges.size(); ++j) - lmax = max(lmax, q.get(i).get(j).get(t)); - l1lmax += lmax; - } - } - - return llh + kl + constraintScale * l1lmax; - } - } -} diff --git a/gi/posterior-regularisation/README b/gi/posterior-regularisation/README deleted file mode 100644 index a3d54ffc..00000000 --- a/gi/posterior-regularisation/README +++ /dev/null @@ -1,3 +0,0 @@ - 557 ./cdec_extools/extractor -i btec/split.zh-en.al -c 500000 -L 12 -C | sort -t $'\t' -k 1 | ./cdec_extools/mr_stripe_rule_reduce > btec.concordance - 559 wc -l btec.concordance - 588 cat btec.concordance | sed 's/.* //' | awk '{ for (i=1; i < NF; i++) { x=substr($i, 1, 2); if (x == "C=") printf "\n"; else if (x != "||") printf "%s ", $i; }; printf "\n"; }' | sort | uniq | wc -l diff --git a/gi/posterior-regularisation/alphabet.hh b/gi/posterior-regularisation/alphabet.hh deleted file mode 100644 index 1db928da..00000000 --- a/gi/posterior-regularisation/alphabet.hh +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _alphabet_hh -#define _alphabet_hh - -#include -#include -#include -#include -#include - -// Alphabet: indexes a set of types -template -class Alphabet: protected std::map -{ -public: - Alphabet() {}; - - bool empty() const { return std::map::empty(); } - int size() const { return std::map::size(); } - - int operator[](const T &k) const - { - typename std::map::const_iterator cit = find(k); - if (cit != std::map::end()) - return cit->second; - else - return -1; - } - - int lookup(const T &k) const { return (*this)[k]; } - - int insert(const T &k) - { - int sz = size(); - assert((unsigned) sz == _items.size()); - - std::pair::iterator, bool> - ins = std::map::insert(make_pair(k, sz)); - - if (ins.second) - _items.push_back(k); - - return ins.first->second; - } - - const T &type(int i) const - { - assert(i >= 0); - assert(i < size()); - return _items[i]; - } - - std::ostream &display(std::ostream &out, int i) const - { - return out << type(i); - } - -private: - std::vector _items; -}; - -#endif diff --git a/gi/posterior-regularisation/canned.concordance b/gi/posterior-regularisation/canned.concordance deleted file mode 100644 index 710973ff..00000000 --- a/gi/posterior-regularisation/canned.concordance +++ /dev/null @@ -1,4 +0,0 @@ -a 0 0 0 0 ||| C=1 ||| 1 1 1 1 ||| C=1 ||| 2 2 2 2 ||| C=1 -b 0 0 0 0 ||| C=1 ||| 1 1 1 1 ||| C=1 -c 2 2 2 2 ||| C=1 ||| 4 4 4 4 ||| C=1 ||| 5 5 5 5 ||| C=1 -d 4 4 4 4 ||| C=1 ||| 5 5 5 5 ||| C=1 diff --git a/gi/posterior-regularisation/em.cc b/gi/posterior-regularisation/em.cc deleted file mode 100644 index f6c9fd68..00000000 --- a/gi/posterior-regularisation/em.cc +++ /dev/null @@ -1,830 +0,0 @@ -// Input of the form: -// " the phantom of the opera " tickets for tonight ? ||| C=1 ||| seats for ? ||| C=1 ||| i see ? ||| C=1 -// phrase TAB [context]+ -// where context = phrase ||| C=... which are separated by ||| - -// Model parameterised as follows: -// - each phrase, p, is allocated a latent state, t -// - this is used to generate the contexts, c -// - each context is generated using 4 independent multinomials, one for each position LL, L, R, RR - -// Training with EM: -// - e-step is estimating P(t|p,c) for all x,c -// - m-step is estimating model parameters P(p,c,t) = P(t) P(p|t) P(c|t) - -// Sexing it up: -// - constrain the posteriors P(t|c) and P(t|p) to have few high-magnitude entries -// - improve the generation of phrase internals, e.g., generate edge words from -// different distribution to central words - -#include "alphabet.hh" -#include "log_add.hh" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace std::tr1; - -const int numTags = 5; -const int numIterations = 100; -const bool posterior_regularisation = true; -const double PHRASE_VIOLATION_WEIGHT = 10; -const double CONTEXT_VIOLATION_WEIGHT = 0; -const bool includePhraseProb = false; - -// Data structures: -Alphabet lexicon; -typedef vector Phrase; -typedef tuple Context; -Alphabet phrases; -Alphabet contexts; - -typedef map ContextCounts; -typedef map PhraseCounts; -typedef map PhraseToContextCounts; -typedef map ContextToPhraseCounts; - -PhraseToContextCounts concordancePhraseToContexts; -ContextToPhraseCounts concordanceContextToPhrases; - -typedef vector Dist; -typedef vector ConditionalDist; -Dist prior; // class -> P(class) -vector probCtx; // word -> class -> P(word | class), for each position of context word -ConditionalDist probPhrase; // class -> P(word | class) -Dist probPhraseLength; // class -> P(length | class) expressed as geometric distribution parameter - -mt19937 randomGenerator((size_t) time(NULL)); -uniform_real uniDist(0.0, 1e-1); -variate_generator< mt19937, uniform_real > rng(randomGenerator, uniDist); - -void addRandomNoise(Dist &d); -void normalise(Dist &d); -void addTo(Dist &d, const Dist &e); -int argmax(const Dist &d); - -map > lambda_indices; - -Dist conditional_probs(const Phrase &phrase, const Context &context, double *normalisation = 0); -template -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation); -//Dist penalised_conditionals(const Phrase &phrase, const Context &context, const double *lambda, double *normalisation = 0); -double penalised_log_likelihood(int n, const double *lambda, double *gradient, void *data); -void optimise_lambda(double delta, double gamma, vector &lambda); -double expected_violation_phrases(const double *lambda); -double expected_violation_contexts(const double *lambda); -double primal_kl_divergence(const double *lambda); -double dual(const double *lambda); -void print_primal_dual(const double *lambda, double delta, double gamma); - -ostream &operator<<(ostream &, const Phrase &); -ostream &operator<<(ostream &, const Context &); -ostream &operator<<(ostream &, const Dist &); -ostream &operator<<(ostream &, const ConditionalDist &); - -int -main(int argc, char *argv[]) -{ - randomGenerator.seed(time(NULL)); - - int edges = 0; - istream &input = cin; - while (input.good()) - { - // read the phrase - string phraseString; - Phrase phrase; - getline(input, phraseString, '\t'); - istringstream pinput(phraseString); - string token; - while (pinput >> token) - phrase.push_back(lexicon.insert(token)); - int phraseId = phrases.insert(phrase); - - // read the rest, storing each context - string remainder; - getline(input, remainder, '\n'); - istringstream rinput(remainder); - Context context(-1, -1, -1, -1); - int index = 0; - while (rinput >> token) - { - if (token != "|||" && token != "") - { - if (index < 4) - { - // eugh! damn templates - switch (index) - { - case 0: get<0>(context) = lexicon.insert(token); break; - case 1: get<1>(context) = lexicon.insert(token); break; - case 2: get<2>(context) = lexicon.insert(token); break; - case 3: get<3>(context) = lexicon.insert(token); break; - default: assert(false); - } - index += 1; - } - else if (token.find("C=") == 0) - { - int contextId = contexts.insert(context); - int count = atoi(token.substr(strlen("C=")).c_str()); - concordancePhraseToContexts[phraseId][contextId] += count; - concordanceContextToPhrases[contextId][phraseId] += count; - index = 0; - context = Context(-1, -1, -1, -1); - edges += 1; - } - } - } - - // trigger EOF - input >> ws; - } - - cout << "Read in " << phrases.size() << " phrases" - << " and " << contexts.size() << " contexts" - << " and " << edges << " edges" - << " and " << lexicon.size() << " word types\n"; - - // FIXME: filter out low count phrases and low count contexts (based on individual words?) - // now populate model parameters with uniform + random noise - prior.resize(numTags, 1.0); - addRandomNoise(prior); - normalise(prior); - - probCtx.resize(4, ConditionalDist(numTags, Dist(lexicon.size(), 1.0))); - if (includePhraseProb) - probPhrase.resize(numTags, Dist(lexicon.size(), 1.0)); - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - { - addRandomNoise(probCtx[j][t]); - normalise(probCtx[j][t]); - } - if (includePhraseProb) - { - addRandomNoise(probPhrase[t]); - normalise(probPhrase[t]); - } - } - if (includePhraseProb) - { - probPhraseLength.resize(numTags, 0.5); // geometric distribution p=0.5 - addRandomNoise(probPhraseLength); - } - - cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << endl; - - vector lambda; - - // now do EM training - for (int iteration = 0; iteration < numIterations; ++iteration) - { - cout << "EM iteration " << iteration << endl; - - if (posterior_regularisation) - optimise_lambda(PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT, lambda); - //cout << "\tlambda " << lambda << endl; - - Dist countsPrior(numTags, 0.0); - vector countsCtx(4, ConditionalDist(numTags, Dist(lexicon.size(), 1e-10))); - ConditionalDist countsPhrase(numTags, Dist(lexicon.size(), 1e-10)); - Dist countsPhraseLength(numTags, 0.0); - Dist nPhrases(numTags, 0.0); - - double llh = 0; - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - - // e-step: estimate latent class probs; compile (class,word) stats for m-step - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double z = 0; - Dist tagCounts; - if (!posterior_regularisation) - tagCounts = conditional_probs(phrase, context, &z); - else - tagCounts = penalised_conditionals(phrase, context, lambda, &z); - - llh += log(z) * ccit->second; - addTo(countsPrior, tagCounts); // FIXME: times ccit->secon - - for (int t = 0; t < numTags; ++t) - { - for (int j = 0; j < 4; ++j) - countsCtx[j][t][get<0>(context)] += tagCounts[t] * ccit->second; - - if (includePhraseProb) - { - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - countsPhrase[t][*pit] += tagCounts[t] * ccit->second; - countsPhraseLength[t] += phrase.size() * tagCounts[t] * ccit->second; - nPhrases[t] += tagCounts[t] * ccit->second; - } - } - } - } - - cout << "M-step\n"; - - // m-step: normalise prior and (class,word) stats and assign to model parameters - normalise(countsPrior); - prior = countsPrior; - for (int t = 0; t < numTags; ++t) - { - //cout << "\t\tt " << t << " prior " << countsPrior[t] << "\n"; - for (int j = 0; j < 4; ++j) - normalise(countsCtx[j][t]); - if (includePhraseProb) - { - normalise(countsPhrase[t]); - countsPhraseLength[t] = nPhrases[t] / countsPhraseLength[t]; - } - } - probCtx = countsCtx; - if (includePhraseProb) - { - probPhrase = countsPhrase; - probPhraseLength = countsPhraseLength; - } - - double *larray = new double[lambda.size()]; - copy(lambda.begin(), lambda.end(), larray); - print_primal_dual(larray, PHRASE_VIOLATION_WEIGHT, CONTEXT_VIOLATION_WEIGHT); - delete [] larray; - - //cout << "\tllh " << llh << endl; - //cout << "\tprior: " << prior << "\n"; - //cout << "\tcontext: " << probCtx << "\n"; - //cout << "\tphrase: " << probPhrase << "\n"; - //cout << "\tphraseLen: " << probPhraseLength << "\n"; - } - - // output class membership - for (PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.begin(); - pcit != concordancePhraseToContexts.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist tagCounts = conditional_probs(phrase, context, 0); - cout << phrase << " ||| " << context << " ||| " << argmax(tagCounts) << "\n"; - } - } - - return 0; -} - -void addRandomNoise(Dist &d) -{ - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit += rng(); -} - -void normalise(Dist &d) -{ - double z = 0; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - z += *dit; - for (Dist::iterator dit = d.begin(); dit != d.end(); ++dit) - *dit /= z; -} - -void addTo(Dist &d, const Dist &e) -{ - assert(d.size() == e.size()); - for (int i = 0; i < (int) d.size(); ++i) - d[i] += e[i]; -} - -int argmax(const Dist &d) -{ - double best = d[0]; - int index = 0; - for (int i = 1; i < (int) d.size(); ++i) - { - if (d[i] > best) - { - best = d[i]; - index = i; - } - } - return index; -} - -ostream &operator<<(ostream &out, const Phrase &phrase) -{ - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - lexicon.display(((pit == phrase.begin()) ? out : out << " "), *pit); - return out; -} - -ostream &operator<<(ostream &out, const Context &context) -{ - lexicon.display(out, get<0>(context)); - lexicon.display(out << " ", get<1>(context)); - lexicon.display(out << " ", get<2>(context)); - lexicon.display(out << " ", get<3>(context)); - return out; -} - -ostream &operator<<(ostream &out, const Dist &dist) -{ - for (Dist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : " ") << *dit; - return out; -} - -ostream &operator<<(ostream &out, const ConditionalDist &dist) -{ - for (ConditionalDist::const_iterator dit = dist.begin(); dit != dist.end(); ++dit) - out << ((dit == dist.begin()) ? "" : "; ") << *dit; - return out; -} - -// FIXME: slow - just use the phrase index, context index to do the mapping -// (n.b. it's a sparse setup, not just equal to 3d array index) -int -lambda_index(const Phrase &phrase, const Context &context, int tag) -{ - return lambda_indices[phrase][context] + tag; -} - -template -Dist -penalised_conditionals(const Phrase &phrase, const Context &context, - const T &lambda, double *normalisation) -{ - Dist d = conditional_probs(phrase, context, 0); - - double z = 0; - for (int t = 0; t < numTags; ++t) - { - d[t] *= exp(-lambda[lambda_index(phrase, context, t)]); - z += d[t]; - } - - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - d[t] /= z; - - return d; -} - -Dist -conditional_probs(const Phrase &phrase, const Context &context, double *normalisation) -{ - Dist tagCounts(numTags, 0.0); - double z = 0; - for (int t = 0; t < numTags; ++t) - { - double prob = prior[t]; - prob *= (probCtx[0][t][get<0>(context)] * probCtx[1][t][get<1>(context)] * - probCtx[2][t][get<2>(context)] * probCtx[3][t][get<3>(context)]); - - if (includePhraseProb) - { - prob *= pow(1 - probPhraseLength[t], phrase.size() - 1) * probPhraseLength[t]; - for (Phrase::const_iterator pit = phrase.begin(); pit != phrase.end(); ++pit) - prob *= probPhrase[t][*pit]; - } - - tagCounts[t] = prob; - z += prob; - } - if (normalisation) - *normalisation = z; - - for (int t = 0; t < numTags; ++t) - tagCounts[t] /= z; - - return tagCounts; -} - -double -penalised_log_likelihood(int n, const double *lambda, double *grad, void *) -{ - // return log Z(lambda, theta) over the corpus - // where theta are the global parameters (prior, probCtx*, probPhrase*) - // and lambda are lagrange multipliers for the posterior sparsity constraints - // - // this is formulated as: - // f = log Z(lambda) = sum_i log ( sum_i p_theta(t_i|p_i,c_i) exp [-lambda_{t_i,p_i,c_i}] ) - // where i indexes the training examples - specifying the (p, c) pair (which may occur with count > 1) - // - // with derivative: - // f'_{tpc} = frac { - count(t,p,c) p_theta(t|p,c) exp (-lambda_{t,p,c}) } - // { sum_t' p_theta(t'|p,c) exp (-lambda_{t',p,c}) } - - //cout << "penalised_log_likelihood with lambda "; - //copy(lambda, lambda+n, ostream_iterator(cout, " ")); - //cout << "\n"; - - double f = 0; - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - - f += ccit->second * log(z); - //cout << "\tphrase: " << phrase << " context: " << context << " count: " << ccit->second << " z " << z << endl; - //cout << "\t\tscores: " << scores << "\n"; - - if (grad) - { - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); // FIXME: redundant lookups - assert(grad[i] == 0.0); - grad[i] = - ccit->second * scores[t]; - } - } - } - } - - //cout << "penalised_log_likelihood returning " << f; - //if (grad) - //{ - //cout << "\ngradient: "; - //copy(grad, grad+n, ostream_iterator(cout, " ")); - //} - //cout << "\n"; - - return f; -} - -typedef struct -{ - // one of p or c should be set to -1, in which case it will be marginalised out - // i.e. sum_p' lambda_{p'ct} <= threshold - // or sum_c' lambda_{pc't} <= threshold - int p, c, t, threshold; -} constraint_data; - -double -constraint_and_gradient(int n, const double *lambda, double *grad, void *data) -{ - constraint_data *d = (constraint_data *) data; - assert(d->t >= 0); - assert(d->threshold >= 0); - - //cout << "constraint_and_gradient: t " << d->t << " p " << d->p << " c " << d->c << " tau " << d->threshold << endl; - //cout << "\tlambda "; - //copy(lambda, lambda+n, ostream_iterator(cout, " ")); - //cout << "\n"; - - // FIXME: it's crazy to use a dense gradient here => will only have a handful of non-zero entries - if (grad) - { - for (int i = 0; i < n; ++i) - grad[i] = 0.0; - } - - //cout << "constraint_and_gradient: " << d->p << "; " << d->c << "; " << d->t << "; " << d->threshold << endl; - - if (d->p >= 0) - { - assert(d->c < 0); - // sum_c lambda_pct <= delta [a.k.a. threshold] - // => sum_c lambda_pct - delta <= 0 - // derivative_pct = { 1, if p and t match; 0, otherwise } - - double val = -d->threshold; - - const Phrase &phrase = phrases.type(d->p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(d->p); - assert(pcit != concordancePhraseToContexts.end()); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } - else - { - assert(d->c >= 0); - assert(d->p < 0); - // sum_p lambda_pct <= gamma [a.k.a. threshold] - // => sum_p lambda_pct - gamma <= 0 - // derivative_pct = { 1, if c and t match; 0, otherwise } - - double val = -d->threshold; - - const Context &context = contexts.type(d->c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(d->c); - assert(cpit != concordanceContextToPhrases.end()); - for (PhraseCounts::iterator pcit = cpit->second.begin(); - pcit != cpit->second.end(); ++pcit) - { - const Phrase &phrase = phrases.type(pcit->first); - int i = lambda_index(phrase, context, d->t); - val += lambda[i]; - if (grad) grad[i] = 1; - } - //cout << "\treturning " << val << endl; - - return val; - } -} - -void -optimise_lambda(double delta, double gamma, vector &lambdav) -{ - int num_lambdas = lambdav.size(); - if (lambda_indices.empty() || lambdav.empty()) - { - lambda_indices.clear(); - lambdav.clear(); - - int i = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - lambda_indices[phrase][context] = i; - i += numTags; - } - } - num_lambdas = i; - lambdav.resize(num_lambdas); - } - //cout << "optimise_lambda: #langrange multipliers " << num_lambdas << endl; - - // FIXME: better to work with an implicit representation to save memory usage - int num_constraints = (((delta > 0) ? phrases.size() : 0) + ((gamma > 0) ? contexts.size() : 0)) * numTags; - //cout << "optimise_lambda: #constraints " << num_constraints << endl; - constraint_data *data = new constraint_data[num_constraints]; - int i = 0; - if (delta > 0) - { - for (int p = 0; p < phrases.size(); ++p) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = p; - d.c = -1; - d.t = t; - d.threshold = delta; - } - } - } - - if (gamma > 0) - { - for (int c = 0; c < contexts.size(); ++c) - { - for (int t = 0; t < numTags; ++t, ++i) - { - constraint_data &d = data[i]; - d.p = -1; - d.c = c; - d.t = t; - d.threshold = gamma; - } - } - } - assert(i == num_constraints); - - double lambda[num_lambdas]; - double lb[num_lambdas], ub[num_lambdas]; - for (i = 0; i < num_lambdas; ++i) - { - lambda[i] = lambdav[i]; // starting value - lb[i] = 0; // lower bound - if (delta <= 0) // upper bound - ub[i] = gamma; - else if (gamma <= 0) - ub[i] = delta; - else - assert(false); - } - - //print_primal_dual(lambda, delta, gamma); - - double minf; - int error_code = nlopt_minimize_constrained(NLOPT_LN_COBYLA, num_lambdas, penalised_log_likelihood, NULL, - num_constraints, constraint_and_gradient, data, sizeof(constraint_data), - lb, ub, lambda, &minf, -HUGE_VAL, 0.0, 0.0, 1e-4, NULL, 0, 0.0); - //cout << "optimise error code " << error_code << endl; - - //print_primal_dual(lambda, delta, gamma); - - delete [] data; - - if (error_code < 0) - cout << "WARNING: optimisation failed with error code: " << error_code << endl; - //else - //{ - //cout << "success; minf " << minf << endl; - //print_primal_dual(lambda, delta, gamma); - //} - - lambdav = vector(&lambda[0], &lambda[0] + num_lambdas); -} - -// FIXME: inefficient - cache the scores -double -expected_violation_phrases(const double *lambda) -{ - // sum_pt max_c E_q[phi_pct] - double violation = 0; - - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: inefficient - cache the scores -double -expected_violation_contexts(const double *lambda) -{ - // sum_ct max_p E_q[phi_pct] - double violation = 0; - - for (int c = 0; c < contexts.size(); ++c) - { - const Context &context = contexts.type(c); - ContextToPhraseCounts::iterator cpit = concordanceContextToPhrases.find(c); - - for (int t = 0; t < numTags; ++t) - { - double best = 0; - for (PhraseCounts::iterator pit = cpit->second.begin(); - pit != cpit->second.end(); ++pit) - { - const Phrase &phrase = phrases.type(pit->first); - Dist scores = penalised_conditionals(phrase, context, lambda, 0); - best = max(best, scores[t]); - } - violation += best; - } - } - - return violation; -} - -// FIXME: possibly inefficient -double -primal_likelihood() // FIXME: primal evaluation needs to use lambda and calculate l1linf terms -{ - double llh = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double z = 0; - Dist scores = conditional_probs(phrase, context, &z); - llh += ccit->second * log(z); - } - } - return llh; -} - -// FIXME: inefficient - cache the scores -double -primal_kl_divergence(const double *lambda) -{ - // return KL(q || p) = sum_y q(y) { log q(y) - log p(y | x) } - // = sum_y q(y) { log p(y | x) - lambda . phi(x, y) - log Z - log p(y | x) } - // = sum_y q(y) { - lambda . phi(x, y) } - log Z - // and q(y) factors with each edge, ditto for Z - - double feature_sum = 0, log_z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - - double local_z = 0; - double local_f = 0; - Dist d = conditional_probs(phrase, context, 0); - for (int t = 0; t < numTags; ++t) - { - int i = lambda_index(phrase, context, t); - double s = d[t] * exp(-lambda[i]); - local_f += lambda[i] * s; - local_z += s; - } - - log_z += ccit->second * log(local_z); - feature_sum += ccit->second * (local_f / local_z); - } - } - - return -feature_sum - log_z; -} - -// FIXME: inefficient - cache the scores -double -dual(const double *lambda) -{ - // return log(Z) = - log { sum_y p(y | x) exp( - lambda . phi(x, y) } - // n.b. have flipped the sign as we're minimising - - double z = 0; - for (int p = 0; p < phrases.size(); ++p) - { - const Phrase &phrase = phrases.type(p); - PhraseToContextCounts::const_iterator pcit = concordancePhraseToContexts.find(p); - for (ContextCounts::const_iterator ccit = pcit->second.begin(); - ccit != pcit->second.end(); ++ccit) - { - const Context &context = contexts.type(ccit->first); - double lz = 0; - Dist scores = penalised_conditionals(phrase, context, lambda, &z); - z += lz * ccit->second; - } - } - return log(z); -} - -void -print_primal_dual(const double *lambda, double delta, double gamma) -{ - double likelihood = primal_likelihood(); - double kl = primal_kl_divergence(lambda); - double sum_pt = expected_violation_phrases(lambda); - double sum_ct = expected_violation_contexts(lambda); - //double d = dual(lambda); - - cout << "\tllh=" << likelihood - << " kl=" << kl - << " violations phrases=" << sum_pt - << " contexts=" << sum_ct - //<< " primal=" << (kl + delta * sum_pt + gamma * sum_ct) - //<< " dual=" << d - << " objective=" << (likelihood - kl + delta * sum_pt + gamma * sum_ct) - << endl; -} diff --git a/gi/posterior-regularisation/invert.hh b/gi/posterior-regularisation/invert.hh deleted file mode 100644 index d06356e9..00000000 --- a/gi/posterior-regularisation/invert.hh +++ /dev/null @@ -1,45 +0,0 @@ -// The following code inverts the matrix input using LU-decomposition with -// backsubstitution of unit vectors. Reference: Numerical Recipies in C, 2nd -// ed., by Press, Teukolsky, Vetterling & Flannery. -// Code written by Fredrik Orderud. -// http://www.crystalclearsoftware.com/cgi-bin/boost_wiki/wiki.pl?LU_Matrix_Inversion - -#ifndef INVERT_MATRIX_HPP -#define INVERT_MATRIX_HPP - -// REMEMBER to update "lu.hpp" header includes from boost-CVS -#include -#include -#include -#include -#include -#include - -namespace ublas = boost::numeric::ublas; - -/* Matrix inversion routine. - Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ -template -bool invert_matrix(const ublas::matrix& input, ublas::matrix& inverse) -{ - using namespace boost::numeric::ublas; - typedef permutation_matrix pmatrix; - // create a working copy of the input - matrix A(input); - // create a permutation matrix for the LU-factorization - pmatrix pm(A.size1()); - - // perform LU-factorization - int res = lu_factorize(A,pm); - if( res != 0 ) return false; - - // create identity matrix of "inverse" - inverse.assign(ublas::identity_matrix(A.size1())); - - // backsubstitute to get the inverse - lu_substitute(A, pm, inverse); - - return true; -} - -#endif //INVERT_MATRIX_HPP diff --git a/gi/posterior-regularisation/linesearch.py b/gi/posterior-regularisation/linesearch.py deleted file mode 100644 index 5a3f2e9c..00000000 --- a/gi/posterior-regularisation/linesearch.py +++ /dev/null @@ -1,58 +0,0 @@ -## Automatically adapted for scipy Oct 07, 2005 by convertcode.py - -from scipy.optimize import minpack2 -import numpy - -import __builtin__ -pymin = __builtin__.min - -def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, - args=(), c1=1e-4, c2=0.9, amax=50): - - fc = 0 - gc = 0 - phi0 = old_fval - derphi0 = numpy.dot(gfk,pk) - alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0) - # trevor: added this test - alpha1 = pymin(alpha1,amax) - - if isinstance(myfprime,type(())): - eps = myfprime[1] - fprime = myfprime[0] - newargs = (f,eps) + args - gradient = False - else: - fprime = myfprime - newargs = args - gradient = True - - xtol = 1e-14 - amin = 1e-8 - isave = numpy.zeros((2,), numpy.intc) - dsave = numpy.zeros((13,), float) - task = 'START' - fval = old_fval - gval = gfk - - while 1: - stp,fval,derphi,task = minpack2.dcsrch(alpha1, phi0, derphi0, c1, c2, - xtol, task, amin, amax,isave,dsave) - #print 'minpack2.dcsrch', alpha1, phi0, derphi0, c1, c2, xtol, task, amin, amax,isave,dsave - #print 'returns', stp,fval,derphi,task - - if task[:2] == 'FG': - alpha1 = stp - fval = f(xk+stp*pk,*args) - fc += 1 - gval = fprime(xk+stp*pk,*newargs) - if gradient: gc += 1 - else: fc += len(xk) + 1 - phi0 = fval - derphi0 = numpy.dot(gval,pk) - else: - break - - if task[:5] == 'ERROR' or task[1:4] == 'WARN': - stp = None # failed - return stp, fc, gc, fval, old_fval, gval diff --git a/gi/posterior-regularisation/log_add.hh b/gi/posterior-regularisation/log_add.hh deleted file mode 100644 index e0620c5a..00000000 --- a/gi/posterior-regularisation/log_add.hh +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef log_add_hh -#define log_add_hh - -#include -#include -#include -#include - -template -struct Log -{ - static T zero() { return -std::numeric_limits::infinity(); } - - static T add(T l1, T l2) - { - if (l1 == zero()) return l2; - if (l1 > l2) - return l1 + std::log(1 + exp(l2 - l1)); - else - return l2 + std::log(1 + exp(l1 - l2)); - } - - static T subtract(T l1, T l2) - { - //std::assert(l1 >= l2); - return l1 + log(1 - exp(l2 - l1)); - } -}; - -#endif diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar deleted file mode 120000 index da8bf761..00000000 --- a/gi/posterior-regularisation/prjava.jar +++ /dev/null @@ -1 +0,0 @@ -prjava/prjava-20100708.jar \ No newline at end of file diff --git a/gi/posterior-regularisation/prjava/Makefile b/gi/posterior-regularisation/prjava/Makefile deleted file mode 100755 index bd3bfca0..00000000 --- a/gi/posterior-regularisation/prjava/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -all: - ant dist - -check: - echo no tests - -clean: - ant clean diff --git a/gi/posterior-regularisation/prjava/build.xml b/gi/posterior-regularisation/prjava/build.xml deleted file mode 100644 index 7222b3c8..00000000 --- a/gi/posterior-regularisation/prjava/build.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar b/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar deleted file mode 100644 index 43b4b369..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/commons-math-2.1.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar b/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar deleted file mode 100644 index 56373621..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/jopt-simple-3.2.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar b/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar deleted file mode 100644 index 3e59fbf3..00000000 Binary files a/gi/posterior-regularisation/prjava/lib/trove-2.0.2.jar and /dev/null differ diff --git a/gi/posterior-regularisation/prjava/src/arr/F.java b/gi/posterior-regularisation/prjava/src/arr/F.java deleted file mode 100644 index be0a6ed6..00000000 --- a/gi/posterior-regularisation/prjava/src/arr/F.java +++ /dev/null @@ -1,99 +0,0 @@ -package arr; - -import java.util.Arrays; -import java.util.Random; - -public class F { - public static Random rng = new Random(); - - public static void randomise(double probs[]) - { - randomise(probs, true); - } - - public static void randomise(double probs[], boolean normalise) - { - double z = 0; - for (int i = 0; i < probs.length; ++i) - { - probs[i] = 10 + rng.nextDouble(); - if (normalise) - z += probs[i]; - } - - if (normalise) - for (int i = 0; i < probs.length; ++i) - probs[i] /= z; - } - - public static void uniform(double probs[]) - { - for (int i = 0; i < probs.length; ++i) - probs[i] = 1.0 / probs.length; - } - - public static void l1normalize(double [] a){ - double sum=0; - for(int i=0;i m) - { - m = probs[i]; - mi = i; - } - } - return mi; - } - -} diff --git a/gi/posterior-regularisation/prjava/src/data/Corpus.java b/gi/posterior-regularisation/prjava/src/data/Corpus.java deleted file mode 100644 index 425ede11..00000000 --- a/gi/posterior-regularisation/prjava/src/data/Corpus.java +++ /dev/null @@ -1,233 +0,0 @@ -package data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Scanner; - -public class Corpus { - - public static final String alphaFilename="../posdata/corpus.alphabet"; - public static final String tagalphaFilename="../posdata/corpus.tag.alphabet"; - -// public static final String START_SYM=""; - public static final String END_SYM=""; - public static final String NUM_TOK=""; - - public static final String UNK_TOK=""; - - private ArrayListsent; - private ArrayListdata; - - public ArrayListtag; - public ArrayListtagData; - - public static boolean convertNumTok=true; - - private HashMapfreq; - public HashMapvocab; - - public HashMaptagVocab; - private int tagV; - - private int V; - - public static void main(String[] args) { - Corpus c=new Corpus("../posdata/en_test.conll"); - System.out.println( - Arrays.toString(c.get(0)) - ); - System.out.println( - Arrays.toString(c.getInt(0)) - ); - - System.out.println( - Arrays.toString(c.get(1)) - ); - System.out.println( - Arrays.toString(c.getInt(1)) - ); - } - - public Corpus(String filename,HashMapdict){ - V=0; - tagV=0; - freq=new HashMap(); - tagVocab=new HashMap(); - vocab=dict; - - sent=new ArrayList(); - tag=new ArrayList(); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - // s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - sent.add(s.toArray(new String[0])); - s=new ArrayList(); - // s.add(START_SYM); - continue; - } - String tok=toks[1].toLowerCase(); - s.add(tok); - } - sc.close(); - - buildData(); - } - - public Corpus(String filename){ - V=0; - freq=new HashMap(); - vocab=new HashMap(); - tagVocab=new HashMap(); - - sent=new ArrayList(); - tag=new ArrayList(); - - System.out.println("Reading:"+filename); - - Scanner sc=io.FileUtil.openInFile(filename); - ArrayLists=new ArrayList(); - ArrayListtags=new ArrayList(); - //s.add(START_SYM); - while(sc.hasNextLine()){ - String line=sc.nextLine(); - String toks[]=line.split("\t"); - if(toks.length<2){ - s.add(END_SYM); - tags.add(END_SYM); - if(s.size()>2){ - sent.add(s.toArray(new String[0])); - tag.add(tags.toArray(new String [0])); - } - s=new ArrayList(); - tags=new ArrayList(); - // s.add(START_SYM); - continue; - } - - String tok=toks[1].toLowerCase(); - if(convertNumTok && tok.matches(".*\\d.*")){ - tok=NUM_TOK; - } - s.add(tok); - - if(toks.length>3){ - tok=toks[3].toLowerCase(); - }else{ - tok="_"; - } - tags.add(tok); - - } - sc.close(); - - for(int i=0;i(); - for(int i=0;i(); - for(int i=0;i2){ - vocab.put(key, V); - V++; - } - } - io.SerializedObjects.writeSerializedObject(vocab, alphaFilename); - io.SerializedObjects.writeSerializedObject(tagVocab,tagalphaFilename); - } - - private void addTag(String tag){ - Integer i=tagVocab.get(tag); - if(i==null){ - tagVocab.put(tag, tagV); - tagV++; - } - } - -} diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMM.java b/gi/posterior-regularisation/prjava/src/hmm/HMM.java deleted file mode 100644 index 17a4679f..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMM.java +++ /dev/null @@ -1,579 +0,0 @@ -package hmm; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Scanner; - -public class HMM { - - - //trans[i][j]=prob of going FROM i to j - double [][]trans; - double [][]emit; - double []pi; - int [][]data; - int [][]tagdata; - - double logtrans[][]; - - public HMMObjective o; - - public static void main(String[] args) { - - } - - public HMM(int n_state,int n_emit,int [][]data){ - trans=new double [n_state][n_state]; - emit=new double[n_state][n_emit]; - pi=new double [n_state]; - System.out.println(" random initial parameters"); - fillRand(trans); - fillRand(emit); - fillRand(pi); - - this.data=data; - - } - - private void fillRand(double [][] a){ - for(int i=0;i=0;n--){ - for(int i=0;imaxprob){ - maxprob=p[seq.length-1][i]; - maxIdx=i; - } - } - int ans[]=new int [seq.length]; - ans[seq.length-1]=maxIdx; - for(int i=seq.length-2;i>=0;i--){ - ans[i]=backp[i+1][ans[i+1]]; - } - return ans; - } - - public double l1norm(double a[]){ - double norm=0; - for(int i=0;i s=new ArrayList(); - int state=sample(pi); - int sym=sample(emit[state]); - while(sym!=terminalSym){ - s.add(sym); - state=sample(trans[state]); - sym=sample(emit[state]); - } - - int ans[]=new int [s.size()]; - for(int i=0;i=r){ - return i; - } - } - return p.length-1; - } - - public void train(int tagdata[][]){ - double trans_exp_cnt[][]=new double [trans.length][trans.length]; - double emit_exp_cnt[][]=new double[trans.length][emit[0].length]; - double start_exp_cnt[]=new double[trans.length]; - - for(int i=0;imaxwt[i][d[sentNum][n]]){ - maxwt[i][d[sentNum][n]]=py; - } - - } - } - - //the last state - int len=post.length; - for(int i=0;imaxwt[i][d[sentNum][len-1]]){ - maxwt[i][d[sentNum][len-1]]=py; - } - - } - - } - - } - -}//end of class diff --git a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java b/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java deleted file mode 100644 index 70b6c966..00000000 --- a/gi/posterior-regularisation/prjava/src/hmm/HMMObjective.java +++ /dev/null @@ -1,351 +0,0 @@ -package hmm; - -import gnu.trove.TIntArrayList; -import optimization.gradientBasedMethods.ProjectedGradientDescent; -import optimization.gradientBasedMethods.ProjectedObjective; -import optimization.gradientBasedMethods.stats.OptimizerStats; -import optimization.linesearch.ArmijoLineSearchMinimizationAlongProjectionArc; -import optimization.linesearch.InterpolationPickFirstStep; -import optimization.linesearch.LineSearchMethod; -import optimization.projections.SimplexProjection; -import optimization.stopCriteria.CompositeStopingCriteria; -import optimization.stopCriteria.ProjectedGradientL2Norm; -import optimization.stopCriteria.StopingCriteria; -import optimization.stopCriteria.ValueDifference; - -public class HMMObjective extends ProjectedObjective{ - - - private static final double GRAD_DIFF = 3; - public static double INIT_STEP_SIZE=10; - public static double VAL_DIFF=1000; - - private HMM hmm; - double[] newPoint ; - - //posterior[sent num][tok num][tag]=index into lambda - private int posteriorMap[][][]; - //projection[word][tag].get(occurence)=index into lambda - private TIntArrayList projectionMap[][]; - - //Size of the simplex - public double scale=10; - private SimplexProjection projection; - - private int wordFreq[]; - private static int MIN_FREQ=10; - private int numWordsToProject=0; - - private int n_param; - - public double loglikelihood; - - public HMMObjective(HMM h){ - hmm=h; - - countWords(); - buildMap(); - - gradient=new double [n_param]; - projection = new SimplexProjection(scale); - newPoint = new double[n_param]; - setInitialParameters(new double[n_param]); - - } - - /**@brief counts word frequency in the corpus - * - */ - private void countWords(){ - wordFreq=new int [hmm.emit[0].length]; - for(int i=0;i

- %(title)s - - Headers - - - Classes - - - Index - -
-''' % self.args ) - self.section = { - 'headers' : self._getChild('library-reference',id='%(id)s.headers' % self.args), - 'classes' : self._getChild('index',id='%(id)s.classes' % self.args), - 'index' : self._getChild('index',id='%(id)s.index' % self.args) - } - #~ Remove the index sections if we aren't generating it. - if not self.args['index']: - self.section['classes'].parentNode.removeChild(self.section['classes']) - self.section['classes'].unlink() - del self.section['classes'] - self.section['index'].parentNode.removeChild(self.section['index']) - self.section['index'].unlink() - del self.section['index'] - #~ The symbols, per Doxygen notion, that we translated. - self.symbols = {} - #~ Map of Doxygen IDs and BoostBook IDs, so we can translate as needed. - self.idmap = {} - #~ Marks generation, to prevent redoing it. - self.generated = False - - #~ Add an Doxygen generated XML document to the content we are translating. - def addDox( self, document ): - self._translateNode(document.documentElement) - - #~ Turns the internal XML tree into an output UTF-8 string. - def tostring( self ): - self._generate() - #~ return self.boostbook.toprettyxml(' ') - return self.boostbook.toxml('utf-8') - - #~ Does post-processing on the partial generated content to generate additional info - #~ now that we have the complete source documents. - def _generate( self ): - if not self.generated: - self.generated = True - symbols = self.symbols.keys() - symbols.sort() - #~ Populate the header section. - for symbol in symbols: - if self.symbols[symbol]['kind'] in ('header'): - self.section['headers'].appendChild(self.symbols[symbol]['dom']) - for symbol in symbols: - if self.symbols[symbol]['kind'] not in ('namespace', 'header'): - container = self._resolveContainer(self.symbols[symbol], - self.symbols[self.symbols[symbol]['header']]['dom']) - if container.nodeName != 'namespace': - ## The current BoostBook to Docbook translation doesn't - ## respect, nor assign, IDs to inner types of any kind. - ## So nuke the ID entry so as not create bogus links. - del self.idmap[self.symbols[symbol]['id']] - container.appendChild(self.symbols[symbol]['dom']) - self._rewriteIDs(self.boostbook.documentElement) - - #~ Rewrite the various IDs from Doxygen references to the newly created - #~ BoostBook references. - def _rewriteIDs( self, node ): - if node.nodeName in ('link'): - if (self.idmap.has_key(node.getAttribute('linkend'))): - #~ A link, and we have someplace to repoint it at. - node.setAttribute('linkend',self.idmap[node.getAttribute('linkend')]) - else: - #~ A link, but we don't have a generated target for it. - node.removeAttribute('linkend') - elif hasattr(node,'hasAttribute') and node.hasAttribute('id') and self.idmap.has_key(node.getAttribute('id')): - #~ Simple ID, and we have a translation. - node.setAttribute('id',self.idmap[node.getAttribute('id')]) - #~ Recurse, and iterate, depth-first traversal which turns out to be - #~ left-to-right and top-to-bottom for the document. - if node.firstChild: - self._rewriteIDs(node.firstChild) - if node.nextSibling: - self._rewriteIDs(node.nextSibling) - - def _resolveContainer( self, cpp, root ): - container = root - for ns in cpp['namespace']: - node = self._getChild('namespace',name=ns,root=container) - if not node: - node = container.appendChild( - self._createNode('namespace',name=ns)) - container = node - for inner in cpp['name'].split('::'): - node = self._getChild(name=inner,root=container) - if not node: - break - container = node - return container - - def _setID( self, id, name ): - self.idmap[id] = name.replace('::','.').replace('/','.') - #~ print '--| setID:',id,'::',self.idmap[id] - - #~ Translate a given node within a given context. - #~ The translation dispatches to a local method of the form - #~ "_translate[_context0,...,_contextN]", and the keyword args are - #~ passed along. If there is no translation handling method we - #~ return None. - def _translateNode( self, *context, **kwargs ): - node = None - names = [ ] - for c in context: - if c: - if not isinstance(c,xml.dom.Node): - suffix = '_'+c.replace('-','_') - else: - suffix = '_'+c.nodeName.replace('-','_') - node = c - names.append('_translate') - names = map(lambda x: x+suffix,names) - if node: - for name in names: - if hasattr(self,name): - return getattr(self,name)(node,**kwargs) - return None - - #~ Translates the children of the given parent node, appending the results - #~ to the indicated target. For nodes not translated by the translation method - #~ it copies the child over and recurses on that child to translate any - #~ possible interior nodes. Hence this will translate the entire subtree. - def _translateChildren( self, parent, **kwargs ): - target = kwargs['target'] - for n in parent.childNodes: - child = self._translateNode(n,target=target) - if child: - target.appendChild(child) - else: - child = n.cloneNode(False) - if hasattr(child,'data'): - child.data = re.sub(r'\s+',' ',child.data) - target.appendChild(child) - self._translateChildren(n,target=child) - - #~ Translate the given node as a description, into the description subnode - #~ of the target. If no description subnode is present in the target it - #~ is created. - def _translateDescription( self, node, target=None, tag='description', **kwargs ): - description = self._getChild(tag,root=target) - if not description: - description = target.appendChild(self._createNode(tag)) - self._translateChildren(node,target=description) - return description - - #~ Top level translation of: ..., - #~ translates the children. - def _translate_doxygen( self, node ): - #~ print '_translate_doxygen:', node.nodeName - result = [] - for n in node.childNodes: - newNode = self._translateNode(n) - if newNode: - result.append(newNode) - return result - - #~ Top level translation of: - #~ - #~ - #~ - #~ ... - #~ - #~ ... - #~ - #~ ... - #~ - #~ builds the class and symbol sections, if requested. - def _translate_doxygenindex( self, node ): - #~ print '_translate_doxygenindex:', node.nodeName - if self.args['index']: - entries = [] - classes = [] - #~ Accumulate all the index entries we care about. - for n in node.childNodes: - if n.nodeName == 'compound': - if n.getAttribute('kind') not in ('file','dir','define'): - cpp = self._cppName(self._getChildData('name',root=n)) - entry = { - 'name' : cpp['name'], - 'compoundname' : cpp['compoundname'], - 'id' : n.getAttribute('refid') - } - if n.getAttribute('kind') in ('class','struct'): - classes.append(entry) - entries.append(entry) - for m in n.childNodes: - if m.nodeName == 'member': - cpp = self._cppName(self._getChildData('name',root=m)) - entry = { - 'name' : cpp['name'], - 'compoundname' : cpp['compoundname'], - 'id' : n.getAttribute('refid') - } - if hasattr(m,'getAttribute') and m.getAttribute('kind') in ('class','struct'): - classes.append(entry) - entries.append(entry) - #~ Put them in a sensible order. - entries.sort(lambda x,y: cmp(x['name'].lower(),y['name'].lower())) - classes.sort(lambda x,y: cmp(x['name'].lower(),y['name'].lower())) - #~ And generate the BoostBook for them. - self._translate_index_(entries,target=self.section['index']) - self._translate_index_(classes,target=self.section['classes']) - return None - - #~ Translate a set of index entries in the BoostBook output. The output - #~ is grouped into groups of the first letter of the entry names. - def _translate_index_(self, entries, target=None, **kwargs ): - i = 0 - targetID = target.getAttribute('id') - while i < len(entries): - dividerKey = entries[i]['name'][0].upper() - divider = target.appendChild(self._createNode('indexdiv',id=targetID+'.'+dividerKey)) - divider.appendChild(self._createText('title',dividerKey)) - while i < len(entries) and dividerKey == entries[i]['name'][0].upper(): - iename = entries[i]['name'] - ie = divider.appendChild(self._createNode('indexentry')) - ie = ie.appendChild(self._createText('primaryie',iename)) - while i < len(entries) and entries[i]['name'] == iename: - ie.appendChild(self.boostbook.createTextNode(' (')) - ie.appendChild(self._createText( - 'link',entries[i]['compoundname'],linkend=entries[i]['id'])) - ie.appendChild(self.boostbook.createTextNode(')')) - i += 1 - - #~ Translate a ..., - #~ by retranslating with the "kind" of compounddef. - def _translate_compounddef( self, node, target=None, **kwargs ): - return self._translateNode(node,node.getAttribute('kind')) - - #~ Translate a .... For - #~ namespaces we just collect the information for later use as there is no - #~ currently namespaces are not included in the BoostBook format. In the future - #~ it might be good to generate a namespace index. - def _translate_compounddef_namespace( self, node, target=None, **kwargs ): - namespace = { - 'id' : node.getAttribute('id'), - 'kind' : 'namespace', - 'name' : self._getChildData('compoundname',root=node), - 'brief' : self._getChildData('briefdescription',root=node), - 'detailed' : self._getChildData('detaileddescription',root=node), - 'parsed' : False - } - if self.symbols.has_key(namespace['name']): - if not self.symbols[namespace['name']]['parsed']: - self.symbols[namespace['name']]['parsed'] = True - #~ for n in node.childNodes: - #~ if hasattr(n,'getAttribute'): - #~ self._translateNode(n,n.getAttribute('kind'),target=target,**kwargs) - else: - self.symbols[namespace['name']] = namespace - #~ self._setID(namespace['id'],namespace['name']) - return None - - #~ Translate a ..., which - #~ forwards to the kind=struct as they are the same. - def _translate_compounddef_class( self, node, target=None, **kwargs ): - return self._translate_compounddef_struct(node,tag='class',target=target,**kwargs) - - #~ Translate a ... into: - #~
- #~ - #~ ... - #~ - #~
- def _translate_compounddef_struct( self, node, tag='struct', target=None, **kwargs ): - result = None - includes = self._getChild('includes',root=node) - if includes: - ## Add the header into the output table. - self._translate_compounddef_includes_(includes,includes,**kwargs) - ## Compounds are the declared symbols, classes, types, etc. - ## We add them to the symbol table, along with the partial DOM for them - ## so that they can be organized into the output later. - compoundname = self._getChildData('compoundname',root=node) - compoundname = self._cppName(compoundname) - self._setID(node.getAttribute('id'),compoundname['compoundname']) - struct = self._createNode(tag,name=compoundname['name'].split('::')[-1]) - self.symbols[compoundname['compoundname']] = { - 'header' : includes.firstChild.data, - 'namespace' : compoundname['namespace'], - 'id' : node.getAttribute('id'), - 'kind' : tag, - 'name' : compoundname['name'], - 'dom' : struct - } - ## Add the children which will be the members of the struct. - for n in node.childNodes: - self._translateNode(n,target=struct,scope=compoundname['compoundname']) - result = struct - return result - - #~ Translate a ..., - def _translate_compounddef_includes_( self, node, target=None, **kwargs ): - name = node.firstChild.data - if not self.symbols.has_key(name): - self._setID(node.getAttribute('refid'),name) - self.symbols[name] = { - 'kind' : 'header', - 'id' : node.getAttribute('refid'), - 'dom' : self._createNode('header', - id=node.getAttribute('refid'), - name=name) - } - return None - - #~ Translate a ... into: - #~ - #~ ... - #~ - def _translate_basecompoundref( self, ref, target=None, **kwargs ): - inherit = target.appendChild(self._createNode('inherit', - access=ref.getAttribute('prot'))) - self._translateChildren(ref,target=inherit) - return - - #~ Translate: - #~ - #~ - #~ ... - #~ ... - #~ ... - #~ ... - #~ - #~ ... - #~ - #~ Into: - #~ - def _translate_templateparamlist( self, templateparamlist, target=None, **kwargs ): - template = target.appendChild(self._createNode('template')) - for param in templateparamlist.childNodes: - if param.nodeName == 'param': - type = self._getChildData('type',root=param) - defval = self._getChild('defval',root=param) - paramKind = None - if type in ('class','typename'): - paramKind = 'template-type-parameter' - else: - paramKind = 'template-nontype-parameter' - templateParam = template.appendChild( - self._createNode(paramKind, - name=self._getChildData('declname',root=param))) - if paramKind == 'template-nontype-parameter': - template_type = templateParam.appendChild(self._createNode('type')) - self._translate_type( - self._getChild('type',root=param),target=template_type) - if defval: - value = self._getChildData('ref',root=defval.firstChild) - if not value: - value = self._getData(defval) - templateParam.appendChild(self._createText('default',value)) - return template - - #~ Translate: - #~ ... - #~ Into: - #~ ... - def _translate_briefdescription( self, brief, target=None, **kwargs ): - self._translateDescription(brief,target=target,**kwargs) - return self._translateDescription(brief,target=target,tag='purpose',**kwargs) - - #~ Translate: - #~ ... - #~ Into: - #~ ... - def _translate_detaileddescription( self, detailed, target=None, **kwargs ): - return self._translateDescription(detailed,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ With kind specific translation. - def _translate_sectiondef( self, sectiondef, target=None, **kwargs ): - self._translateNode(sectiondef,sectiondef.getAttribute('kind'),target=target,**kwargs) - - #~ Translate non-function sections. - def _translate_sectiondef_x_( self, sectiondef, target=None, **kwargs ): - for n in sectiondef.childNodes: - if hasattr(n,'getAttribute'): - self._translateNode(n,n.getAttribute('kind'),target=target,**kwargs) - return None - - #~ Translate: - #~ ... - def _translate_sectiondef_public_type( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_x_(sectiondef,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_public_attrib( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_x_(sectiondef,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ All the various function group translations end up here for which - #~ they are translated into: - #~ - #~ ... - #~ - def _translate_sectiondef_func_( self, sectiondef, name='functions', target=None, **kwargs ): - members = target.appendChild(self._createNode('method-group',name=name)) - for n in sectiondef.childNodes: - if hasattr(n,'getAttribute'): - self._translateNode(n,n.getAttribute('kind'),target=members,**kwargs) - return members - - #~ Translate: - #~ ... - def _translate_sectiondef_public_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='public member functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_public_static_func( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_func_(sectiondef, - name='public static functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_protected_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='protected member functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_private_static_func( self, sectiondef, target=None, **kwargs): - return self._translate_sectiondef_func_(sectiondef, - name='private static functions',target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_sectiondef_private_func( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name='private member functions',target=target,**kwargs) - - #~ Translate: - #~
...
...
- def _translate_sectiondef_user_defined( self, sectiondef, target=None, **kwargs ): - return self._translate_sectiondef_func_(sectiondef, - name=self._getChildData('header', root=sectiondef),target=target,**kwargs) - - #~ Translate: - #~ - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_typedef( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - typedef = target.appendChild(self._createNode('typedef', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - typedef_type = typedef.appendChild(self._createNode('type')) - self._translate_type(self._getChild('type',root=memberdef),target=typedef_type) - return typedef - - #~ Translate: - #~ - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_function( self, memberdef, target=None, scope=None, **kwargs ): - name = self._getChildData('name',root=memberdef) - self._setID(memberdef.getAttribute('id'),scope+'::'+name) - ## Check if we have some specific kind of method. - if name == scope.split('::')[-1]: - kind = 'constructor' - target = target.parentNode - elif name == '~'+scope.split('::')[-1]: - kind = 'destructor' - target = target.parentNode - elif name == 'operator=': - kind = 'copy-assignment' - target = target.parentNode - else: - kind = 'method' - method = target.appendChild(self._createNode(kind, - # id=memberdef.getAttribute('id'), - name=name, - cv=' '.join([ - if_attribute(memberdef,'const','const','').strip() - ]), - specifiers=' '.join([ - if_attribute(memberdef,'static','static',''), - if_attribute(memberdef,'explicit','explicit',''), - if_attribute(memberdef,'inline','inline','') - ]).strip() - )) - ## We iterate the children to translate each part of the function. - for n in memberdef.childNodes: - self._translateNode(memberdef,'function',n,target=method) - return method - - #~ Translate: - #~ ... - def _translate_memberdef_function_templateparamlist( - self, templateparamlist, target=None, **kwargs ): - return self._translate_templateparamlist(templateparamlist,target=target,**kwargs) - - #~ Translate: - #~ ... - #~ To: - #~ ...? - def _translate_memberdef_function_type( self, resultType, target=None, **kwargs ): - methodType = self._createNode('type') - self._translate_type(resultType,target=methodType) - if methodType.hasChildNodes(): - target.appendChild(methodType) - return methodType - - #~ Translate: - #~ ... - def _translate_memberdef_function_briefdescription( self, description, target=None, **kwargs ): - result = self._translateDescription(description,target=target,**kwargs) - ## For functions if we translate the brief docs to the purpose they end up - ## right above the regular description. And since we just added the brief to that - ## on the previous line, don't bother with the repetition. - # result = self._translateDescription(description,target=target,tag='purpose',**kwargs) - return result - - #~ Translate: - #~ ... - def _translate_memberdef_function_detaileddescription( self, description, target=None, **kwargs ): - return self._translateDescription(description,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_memberdef_function_inbodydescription( self, description, target=None, **kwargs ): - return self._translateDescription(description,target=target,**kwargs) - - #~ Translate: - #~ ... - def _translate_memberdef_function_param( self, param, target=None, **kwargs ): - return self._translate_param(param,target=target,**kwargs) - - #~ Translate: - #~ - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_variable( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - data_member = target.appendChild(self._createNode('data-member', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - data_member_type = data_member.appendChild(self._createNode('type')) - self._translate_type(self._getChild('type',root=memberdef),target=data_member_type) - - #~ Translate: - #~ - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_enum( self, memberdef, target=None, scope=None, **kwargs ): - self._setID(memberdef.getAttribute('id'), - scope+'::'+self._getChildData('name',root=memberdef)) - enum = target.appendChild(self._createNode('enum', - id=memberdef.getAttribute('id'), - name=self._getChildData('name',root=memberdef))) - for n in memberdef.childNodes: - self._translateNode(memberdef,'enum',n,target=enum,scope=scope,**kwargs) - return enum - - #~ Translate: - #~ - #~ - #~ ... - #~ ... - #~ - #~ - #~ To: - #~ - #~ ... - #~ - def _translate_memberdef_enum_enumvalue( self, enumvalue, target=None, scope=None, **kwargs ): - self._setID(enumvalue.getAttribute('id'), - scope+'::'+self._getChildData('name',root=enumvalue)) - value = target.appendChild(self._createNode('enumvalue', - id=enumvalue.getAttribute('id'), - name=self._getChildData('name',root=enumvalue))) - initializer = self._getChild('initializer',root=enumvalue) - if initializer: - self._translateChildren(initializer, - target=target.appendChild(self._createNode('default'))) - return value - - #~ Translate: - #~ - #~ ... - #~ ... - #~ ... - #~ - #~ To: - #~ - #~ ... - #~ ... - #~ - def _translate_param( self, param, target=None, **kwargs): - parameter = target.appendChild(self._createNode('parameter', - name=self._getChildData('declname',root=param))) - paramtype = parameter.appendChild(self._createNode('paramtype')) - self._translate_type(self._getChild('type',root=param),target=paramtype) - defval = self._getChild('defval',root=param) - if defval: - self._translateChildren(self._getChild('defval',root=param),target=parameter) - return parameter - - #~ Translate: - #~ ... - def _translate_ref( self, ref, **kwargs ): - return self._translateNode(ref,ref.getAttribute('kindref')) - - #~ Translate: - #~ ... - #~ To: - #~ ... - def _translate_ref_compound( self, ref, **kwargs ): - result = self._createNode('link',linkend=ref.getAttribute('refid')) - classname = result.appendChild(self._createNode('classname')) - self._translateChildren(ref,target=classname) - return result - - #~ Translate: - #~ ... - #~ To: - #~ ... - def _translate_ref_member( self, ref, **kwargs ): - result = self._createNode('link',linkend=ref.getAttribute('refid')) - self._translateChildren(ref,target=result) - return result - - #~ Translate: - #~ ... - def _translate_type( self, type, target=None, **kwargs ): - result = self._translateChildren(type,target=target,**kwargs) - #~ Filter types to clean up various readability problems, most notably - #~ with really long types. - xml = target.toxml('utf-8'); - if ( - xml.startswith('boost::mpl::') or - xml.startswith('BOOST_PP_') or - re.match('boost::(lazy_)?(enable|disable)_if',xml) - ): - while target.firstChild: - target.removeChild(target.firstChild) - target.appendChild(self._createText('emphasis','unspecified')) - return result - - def _getChild( self, tag = None, id = None, name = None, root = None ): - if not root: - root = self.boostbook.documentElement - for n in root.childNodes: - found = True - if tag and found: - found = found and tag == n.nodeName - if id and found: - if n.hasAttribute('id'): - found = found and n.getAttribute('id') == id - else: - found = found and n.hasAttribute('id') and n.getAttribute('id') == id - if name and found: - found = found and n.hasAttribute('name') and n.getAttribute('name') == name - if found: - #~ print '--|', n - return n - return None - - def _getChildData( self, tag, **kwargs ): - return self._getData(self._getChild(tag,**kwargs),**kwargs) - - def _getData( self, node, **kwargs ): - if node: - text = self._getChild('#text',root=node) - if text: - return text.data.strip() - return '' - - def _cppName( self, type ): - parts = re.search('^([^<]+)[<]?(.*)[>]?$',type.strip().strip(':')) - result = { - 'compoundname' : parts.group(1), - 'namespace' : parts.group(1).split('::')[0:-1], - 'name' : parts.group(1).split('::')[-1], - 'specialization' : parts.group(2) - } - if result['namespace'] and len(result['namespace']) > 0: - namespace = '::'.join(result['namespace']) - while ( - len(result['namespace']) > 0 and ( - not self.symbols.has_key(namespace) or - self.symbols[namespace]['kind'] != 'namespace') - ): - result['name'] = result['namespace'].pop()+'::'+result['name'] - namespace = '::'.join(result['namespace']) - return result - - def _createNode( self, tag, **kwargs ): - result = self.boostbook.createElement(tag) - for k in kwargs.keys(): - if kwargs[k] != '': - if k == 'id': - result.setAttribute('id',kwargs[k]) - else: - result.setAttribute(k,kwargs[k]) - return result - - def _createText( self, tag, data, **kwargs ): - result = self._createNode(tag,**kwargs) - data = data.strip() - if len(data) > 0: - result.appendChild(self.boostbook.createTextNode(data)) - return result - - -def main( xmldir=None, output=None, id=None, title=None, index=False ): - #~ print '--- main: xmldir = %s, output = %s' % (xmldir,output) - - input = glob.glob( os.path.abspath( os.path.join( xmldir, "*.xml" ) ) ) - input.sort - translator = Doxygen2BoostBook(id=id, title=title, index=index) - #~ Feed in the namespaces first to build up the set of namespaces - #~ and definitions so that lookup is unambiguous when reading in the definitions. - namespace_files = filter( - lambda x: - os.path.basename(x).startswith('namespace'), - input) - decl_files = filter( - lambda x: - not os.path.basename(x).startswith('namespace') and not os.path.basename(x).startswith('_'), - input) - for dox in namespace_files: - #~ print '--|',os.path.basename(dox) - translator.addDox(xml.dom.minidom.parse(dox)) - for dox in decl_files: - #~ print '--|',os.path.basename(dox) - translator.addDox(xml.dom.minidom.parse(dox)) - - if output: - output = open(output,'w') - else: - output = sys.stdout - if output: - output.write(translator.tostring()) - - -main( **get_args() ) diff --git a/jam-files/boost-build/tools/doxygen-config.jam b/jam-files/boost-build/tools/doxygen-config.jam deleted file mode 100644 index 2cd2ccae..00000000 --- a/jam-files/boost-build/tools/doxygen-config.jam +++ /dev/null @@ -1,11 +0,0 @@ -#~ Copyright 2005, 2006 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Doxygen tools. To use, just import this module. - -import toolset : using ; - -ECHO "warning: doxygen-config.jam is deprecated. Use 'using doxygen ;' instead." ; - -using doxygen ; diff --git a/jam-files/boost-build/tools/doxygen.jam b/jam-files/boost-build/tools/doxygen.jam deleted file mode 100644 index 8394848d..00000000 --- a/jam-files/boost-build/tools/doxygen.jam +++ /dev/null @@ -1,776 +0,0 @@ -# Copyright 2003, 2004 Douglas Gregor -# Copyright 2003, 2004, 2005 Vladimir Prus -# Copyright 2006 Rene Rivera -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines rules to handle generation of various outputs from source -# files documented with doxygen comments. The supported transformations are: -# -# * Source -> Doxygen XML -> BoostBook XML -# * Source -> Doxygen HTML -# -# The type of transformation is selected based on the target requested. For -# BoostBook XML, the default, specifying a target with an ".xml" suffix, or an -# empty suffix, will produce a .xml and .boostbook. For Doxygen -# HTML specifying a target with an ".html" suffix will produce a directory -# with the Doxygen html files, and a .html file redirecting to -# that directory. - -import "class" : new ; -import targets ; -import feature ; -import property ; -import generators ; -import boostbook ; -import type ; -import path ; -import print ; -import regex ; -import stage ; -import project ; -import xsltproc ; -import make ; -import os ; -import toolset : flags ; -import alias ; -import common ; -import modules ; -import project ; -import utility ; -import errors ; - - -# Use to specify extra configuration paramters. These get translated -# into a doxyfile which configures the building of the docs. -feature.feature doxygen:param : : free ; - -# Specify the "boost.doxygen.header.prefix" XSLT option. -feature.feature prefix : : free ; - -# Specify the "boost.doxygen.reftitle" XSLT option. -feature.feature reftitle : : free ; - -# Which processor to use for various translations from Doxygen. -feature.feature doxygen.processor : xsltproc doxproc : propagated implicit ; - -# To generate, or not, index sections. -feature.feature doxygen.doxproc.index : no yes : propagated incidental ; - -# The ID for the resulting BoostBook reference section. -feature.feature doxygen.doxproc.id : : free ; - -# The title for the resulting BoostBook reference section. -feature.feature doxygen.doxproc.title : : free ; - -# Location for images when generating XML -feature.feature doxygen:xml-imagedir : : free ; - -# Indicates whether the entire directory should be deleted -feature.feature doxygen.rmdir : off on : optional incidental ; - -# Doxygen configuration input file. -type.register DOXYFILE : doxyfile ; - -# Doxygen XML multi-file output. -type.register DOXYGEN_XML_MULTIFILE : xml-dir : XML ; - -# Doxygen XML coallesed output. -type.register DOXYGEN_XML : doxygen : XML ; - -# Doxygen HTML multifile directory. -type.register DOXYGEN_HTML_MULTIFILE : html-dir : HTML ; - -# Redirection HTML file to HTML multifile directory. -type.register DOXYGEN_HTML : : HTML ; - -type.register DOXYGEN_XML_IMAGES : doxygen-xml-images ; - -# Initialize the Doxygen module. Parameters are: -# name: the name of the 'doxygen' executable. If not specified, the name -# 'doxygen' will be used -# -rule init ( name ? ) -{ - if ! $(.initialized) - { - .initialized = true ; - - .doxproc = [ modules.binding $(__name__) ] ; - .doxproc = $(.doxproc:D)/doxproc.py ; - - generators.register-composing doxygen.headers-to-doxyfile - : H HPP CPP : DOXYFILE ; - generators.register-standard doxygen.run - : DOXYFILE : DOXYGEN_XML_MULTIFILE ; - generators.register-standard doxygen.xml-dir-to-boostbook - : DOXYGEN_XML_MULTIFILE : BOOSTBOOK : doxproc ; - generators.register-standard doxygen.xml-to-boostbook - : DOXYGEN_XML : BOOSTBOOK : xsltproc ; - generators.register-standard doxygen.collect - : DOXYGEN_XML_MULTIFILE : DOXYGEN_XML ; - generators.register-standard doxygen.run - : DOXYFILE : DOXYGEN_HTML_MULTIFILE ; - generators.register-standard doxygen.html-redirect - : DOXYGEN_HTML_MULTIFILE : DOXYGEN_HTML ; - generators.register-standard doxygen.copy-latex-pngs - : DOXYGEN_HTML : DOXYGEN_XML_IMAGES ; - - IMPORT $(__name__) : doxygen : : doxygen ; - } - - if $(name) - { - modify-config ; - .doxygen = $(name) ; - check-doxygen ; - } - - if ! $(.doxygen) - { - check-doxygen ; - } -} - -rule freeze-config ( ) -{ - if ! $(.initialized) - { - errors.user-error "doxygen must be initialized before it can be used." ; - } - if ! $(.config-frozen) - { - .config-frozen = true ; - - if [ .is-cygwin ] - { - .is-cygwin = true ; - } - } -} - -rule modify-config ( ) -{ - if $(.config-frozen) - { - errors.user-error "Cannot change doxygen after it has been used." ; - } -} - -rule check-doxygen ( ) -{ - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using doxygen ":" $(.doxygen) ; - } - local extra-paths ; - if [ os.name ] = NT - { - local ProgramFiles = [ modules.peek : ProgramFiles ] ; - if $(ProgramFiles) - { - extra-paths = "$(ProgramFiles:J= )" ; - } - else - { - extra-paths = "C:\\Program Files" ; - } - } - .doxygen = [ common.get-invocation-command doxygen : - doxygen : $(.doxygen) : $(extra-paths) ] ; -} - -rule name ( ) -{ - freeze-config ; - return $(.doxygen) ; -} - -rule .is-cygwin ( ) -{ - if [ os.on-windows ] - { - local file = [ path.make [ modules.binding $(__name__) ] ] ; - local dir = [ path.native - [ path.join [ path.parent $(file) ] doxygen ] ] ; - local command = - "cd \"$(dir)\" && \"$(.doxygen)\" windows-paths-check.doxyfile 2>&1" ; - result = [ SHELL $(command) ] ; - if [ MATCH "(Parsing file /)" : $(result) ] - { - return true ; - } - } -} - -# Runs Doxygen on the given Doxygen configuration file (the source) to generate -# the Doxygen files. The output is dumped according to the settings in the -# Doxygen configuration file, not according to the target! Because of this, we -# essentially "touch" the target file, in effect making it look like we have -# really written something useful to it. Anyone that uses this action must deal -# with this behavior. -# -actions doxygen-action -{ - $(RM) "$(*.XML)" & "$(NAME:E=doxygen)" "$(>)" && echo "Stamped" > "$(<)" -} - - -# Runs the Python doxproc XML processor. -# -actions doxproc -{ - python "$(DOXPROC)" "--xmldir=$(>)" "--output=$(<)" "$(OPTIONS)" "--id=$(ID)" "--title=$(TITLE)" -} - - -rule translate-path ( path ) -{ - freeze-config ; - if [ os.on-windows ] - { - if [ os.name ] = CYGWIN - { - if $(.is-cygwin) - { - return $(path) ; - } - else - { - return $(path:W) ; - } - } - else - { - if $(.is-cygwin) - { - match = [ MATCH ^(.):(.*) : $(path) ] ; - if $(match) - { - return /cygdrive/$(match[1])$(match[2]:T) ; - } - else - { - return $(path:T) ; - } - } - else - { - return $(path) ; - } - } - } - else - { - return $(path) ; - } -} - - -# Generates a doxygen configuration file (doxyfile) given a set of C++ sources -# and a property list that may contain features. -# -rule headers-to-doxyfile ( target : sources * : properties * ) -{ - local text "# Generated by Boost.Build version 2" ; - - local output-dir ; - - # Translate into command line flags. - for local param in [ feature.get-values : $(properties) ] - { - local namevalue = [ regex.match ([^=]*)=(.*) : $(param) ] ; - if $(namevalue[1]) = OUTPUT_DIRECTORY - { - output-dir = [ translate-path - [ utility.unquote $(namevalue[2]) ] ] ; - text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ; - } - else - { - text += "$(namevalue[1]) = $(namevalue[2])" ; - } - } - - if ! $(output-dir) - { - output-dir = [ translate-path [ on $(target) return $(LOCATE) ] ] ; - text += "OUTPUT_DIRECTORY = \"$(output-dir)\"" ; - } - - local headers = ; - for local header in $(sources:G=) - { - header = [ translate-path $(header) ] ; - headers += \"$(header)\" ; - } - - # Doxygen generates LaTex by default. So disable it unconditionally, or at - # least until someone needs, and hence writes support for, LaTex output. - text += "GENERATE_LATEX = NO" ; - text += "INPUT = $(headers:J= )" ; - print.output $(target) plain ; - print.text $(text) : true ; -} - - -# Run Doxygen. See doxygen-action for a description of the strange properties of -# this rule. -# -rule run ( target : source : properties * ) -{ - freeze-config ; - if on in $(properties) - { - local output-dir = - [ path.make - [ MATCH OUTPUT_DIRECTORY=\"?([^\"]*) : - $(properties) ] ] ; - local html-dir = - [ path.make - [ MATCH HTML_OUTPUT=(.*) : - $(properties) ] ] ; - if $(output-dir) && $(html-dir) && - [ path.glob $(output-dir) : $(html-dir) ] - { - HTMLDIR on $(target) = - [ path.native [ path.join $(output-dir) $(html-dir) ] ] ; - rm-htmldir $(target) ; - } - } - doxygen-action $(target) : $(source) ; - NAME on $(target) = $(.doxygen) ; - RM on $(target) = [ modules.peek common : RM ] ; - *.XML on $(target) = - [ path.native - [ path.join - [ path.make [ on $(target) return $(LOCATE) ] ] - $(target:B:S=) - *.xml ] ] ; -} - -if [ os.name ] = NT -{ - RMDIR = rmdir /s /q ; -} -else -{ - RMDIR = rm -rf ; -} - -actions quietly rm-htmldir -{ - $(RMDIR) $(HTMLDIR) -} - -# The rules below require Boost.Book stylesheets, so we need some code to check -# that the boostbook module has actualy been initialized. -# -rule check-boostbook ( ) -{ - if ! [ modules.peek boostbook : .initialized ] - { - ECHO "error: the boostbook module is not initialized" ; - ECHO "error: you've attempted to use the 'doxygen' toolset, " ; - ECHO "error: which requires Boost.Book," ; - ECHO "error: but never initialized Boost.Book." ; - EXIT "error: Hint: add 'using boostbook ;' to your user-config.jam" ; - } -} - - -# Collect the set of Doxygen XML files into a single XML source file that can be -# handled by an XSLT processor. The source is completely ignored (see -# doxygen-action), because this action picks up the Doxygen XML index file -# xml/index.xml. This is because we can not teach Doxygen to act like a NORMAL -# program and take a "-o output.xml" argument (grrrr). The target of the -# collection will be a single Doxygen XML file. -# -rule collect ( target : source : properties * ) -{ - check-boostbook ; - local collect-xsl-dir - = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen collect ] ] ; - local source-path - = [ path.make [ on $(source) return $(LOCATE) ] ] ; - local collect-path - = [ path.root [ path.join $(source-path) $(source:B) ] [ path.pwd ] ] ; - local native-path - = [ path.native $(collect-path) ] ; - local real-source - = [ path.native [ path.join $(collect-path) index.xml ] ] ; - xsltproc.xslt $(target) : $(real-source) $(collect-xsl-dir:S=.xsl) - : doxygen.xml.path=$(native-path) ; -} - - -# Translate Doxygen XML into BoostBook. -# -rule xml-to-boostbook ( target : source : properties * ) -{ - check-boostbook ; - local xsl-dir = [ boostbook.xsl-dir ] ; - local d2b-xsl = [ path.native [ path.join [ boostbook.xsl-dir ] doxygen - doxygen2boostbook.xsl ] ] ; - - local xslt-properties = $(properties) ; - for local prefix in [ feature.get-values : $(properties) ] - { - xslt-properties += "boost.doxygen.header.prefix=$(prefix)" ; - } - for local title in [ feature.get-values : $(properties) ] - { - xslt-properties += "boost.doxygen.reftitle=$(title)" ; - } - - xsltproc.xslt $(target) : $(source) $(d2b-xsl) : $(xslt-properties) ; -} - - -flags doxygen.xml-dir-to-boostbook OPTIONS yes : --enable-index ; -flags doxygen.xml-dir-to-boostbook ID ; -flags doxygen.xml-dir-to-boostbook TITLE ; - - -rule xml-dir-to-boostbook ( target : source : properties * ) -{ - DOXPROC on $(target) = $(.doxproc) ; - - LOCATE on $(source:S=) = [ on $(source) return $(LOCATE) ] ; - - doxygen.doxproc $(target) : $(source:S=) ; -} - - -# Generate the HTML redirect to HTML dir index.html file. -# -rule html-redirect ( target : source : properties * ) -{ - local uri = "$(target:B)/index.html" ; - print.output $(target) plain ; - print.text -" - - - - - - - - - Automatic redirection failed, please go to $(uri). - - -" - : true ; -} - -rule copy-latex-pngs ( target : source : requirements * ) -{ - local directory = [ path.native - [ feature.get-values : - $(requirements) ] ] ; - - local location = [ on $(target) return $(LOCATE) ] ; - - local pdf-location = - [ path.native - [ path.join - [ path.make $(location) ] - [ path.make $(directory) ] ] ] ; - local html-location = - [ path.native - [ path.join - . - html - [ path.make $(directory) ] ] ] ; - - common.MkDir $(pdf-location) ; - common.MkDir $(html-location) ; - - DEPENDS $(target) : $(pdf-location) $(html-location) ; - - if [ os.name ] = NT - { - CP on $(target) = copy /y ; - FROM on $(target) = \\*.png ; - TOHTML on $(target) = .\\html\\$(directory) ; - TOPDF on $(target) = \\$(directory) ; - } - else - { - CP on $(target) = cp ; - FROM on $(target) = /*.png ; - TOHTML on $(target) = ./html/$(directory) ; - TOPDF on $(target) = $(target:D)/$(directory) ; - } -} - -actions copy-latex-pngs -{ - $(CP) $(>:S=)$(FROM) $(TOHTML) - $(CP) $(>:S=)$(FROM) $(<:D)$(TOPDF) - echo "Stamped" > "$(<)" -} - -# building latex images for doxygen XML depends -# on latex, dvips, and ps being in your PATH. -# This is true for most Unix installs, but -# not on Win32, where you will need to install -# MkTex and Ghostscript and add these tools -# to your path. - -actions check-latex -{ - latex -version >$(<) -} - -actions check-dvips -{ - dvips -version >$(<) -} - -if [ os.name ] = "NT" -{ - actions check-gs - { - gswin32c -version >$(<) - } -} -else -{ - actions check-gs - { - gs -version >$(<) - } -} - -rule check-tools ( ) -{ - if ! $(.check-tools-targets) - { - # Find the root project. - local root-project = [ project.current ] ; - root-project = [ $(root-project).project-module ] ; - while - [ project.attribute $(root-project) parent-module ] && - [ project.attribute $(root-project) parent-module ] != user-config - { - root-project = - [ project.attribute $(root-project) parent-module ] ; - } - - .latex.check = [ new file-target latex.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-latex ] - : - ] ; - .dvips.check = [ new file-target dvips.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-dvips ] - : - ] ; - .gs.check = [ new file-target gs.check - : - : [ project.target $(root-project) ] - : [ new action : doxygen.check-gs ] - : - ] ; - .check-tools-targets = $(.latex.check) $(.dvips.check) $(.gs.check) ; - } - return $(.check-tools-targets) ; -} - -project.initialize $(__name__) ; -project doxygen ; - -class doxygen-check-tools-target-class : basic-target -{ - import doxygen ; - rule construct ( name : sources * : property-set ) - { - return [ property-set.empty ] [ doxygen.check-tools ] ; - } -} - -local project = [ project.current ] ; - -targets.main-target-alternative - [ new doxygen-check-tools-target-class check-tools : $(project) - : [ targets.main-target-sources : check-tools : no-renaming ] - : [ targets.main-target-requirements : $(project) ] - : [ targets.main-target-default-build : $(project) ] - : [ targets.main-target-usage-requirements : $(project) ] - ] ; - -# User-level rule to generate BoostBook XML from a set of headers via Doxygen. -# -rule doxygen ( target : sources * : requirements * : default-build * : usage-requirements * ) -{ - freeze-config ; - local project = [ project.current ] ; - - if $(target:S) = .html - { - # Build an HTML directory from the sources. - local html-location = [ feature.get-values : $(requirements) ] ; - local output-dir ; - if [ $(project).get build-dir ] - { - # Explicitly specified build dir. Add html at the end. - output-dir = [ path.join [ $(project).build-dir ] $(html-location:E=html) ] ; - } - else - { - # Trim 'bin' from implicit build dir, for no other reason that backward - # compatibility. - output-dir = [ path.join [ path.parent [ $(project).build-dir ] ] - $(html-location:E=html) ] ; - } - output-dir = [ path.root $(output-dir) [ path.pwd ] ] ; - local output-dir-native = [ path.native $(output-dir) ] ; - requirements = [ property.change $(requirements) : ] ; - - ## The doxygen configuration file. - targets.main-target-alternative - [ new typed-target $(target:S=.tag) : $(project) : DOXYFILE - : [ targets.main-target-sources $(sources) : $(target:S=.tag) ] - : [ targets.main-target-requirements $(requirements) - GENERATE_HTML=YES - GENERATE_XML=NO - "OUTPUT_DIRECTORY=\"$(output-dir-native)\"" - HTML_OUTPUT=$(target:B) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.tag) ; - - ## The html directory to generate by running doxygen. - targets.main-target-alternative - [ new typed-target $(target:S=.dir) : $(project) : DOXYGEN_HTML_MULTIFILE - : $(target:S=.tag) - : [ targets.main-target-requirements $(requirements) - "OUTPUT_DIRECTORY=\"$(output-dir-native)\"" - HTML_OUTPUT=$(target:B) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.dir) ; - - ## The redirect html file into the generated html. - targets.main-target-alternative - [ new typed-target $(target) : $(project) : DOXYGEN_HTML - : $(target:S=.dir) - : [ targets.main-target-requirements $(requirements) - $(output-dir) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - } - else - { - # Build a BoostBook XML file from the sources. - local location-xml = [ feature.get-values : $(requirements) ] ; - requirements = [ property.change $(requirements) : ] ; - local target-xml = $(target:B=$(target:B)-xml) ; - - # Check whether we need to build images - local images-location = - [ feature.get-values : $(requirements) ] ; - if $(images-location) - { - doxygen $(target).doxygen-xml-images.html : $(sources) - : $(requirements) - on - QUIET=YES - WARNINGS=NO - WARN_IF_UNDOCUMENTED=NO - /doxygen//check-tools ; - $(project).mark-target-as-explicit - $(target).doxygen-xml-images.html ; - - targets.main-target-alternative - [ new typed-target $(target).doxygen-xml-images - : $(project) : DOXYGEN_XML_IMAGES - : $(target).doxygen-xml-images.html - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) - : $(project) ] - ] ; - - $(project).mark-target-as-explicit - $(target).doxygen-xml-images ; - - if ! [ regex.match "^(.*/)$" : $(images-location) ] - { - images-location = $(images-location)/ ; - } - - requirements += - $(target).doxygen-xml-images - boost.doxygen.formuladir=$(images-location) ; - } - - ## The doxygen configuration file. - targets.main-target-alternative - [ new typed-target $(target-xml:S=.tag) : $(project) : DOXYFILE - : [ targets.main-target-sources $(sources) : $(target-xml:S=.tag) ] - : [ targets.main-target-requirements $(requirements) - GENERATE_HTML=NO - GENERATE_XML=YES - XML_OUTPUT=$(target-xml) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml:S=.tag) ; - - ## The Doxygen XML directory of the processed source files. - targets.main-target-alternative - [ new typed-target $(target-xml:S=.dir) : $(project) : DOXYGEN_XML_MULTIFILE - : $(target-xml:S=.tag) - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml:S=.dir) ; - - ## The resulting BoostBook file is generated by the processor tool. The - ## tool can be either the xsltproc plus accompanying XSL scripts. Or it - ## can be the python doxproc.py script. - targets.main-target-alternative - [ new typed-target $(target-xml) : $(project) : BOOSTBOOK - : $(target-xml:S=.dir) - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target-xml) ; - - targets.main-target-alternative - [ new install-target-class $(target:S=.xml) : $(project) - : $(target-xml) - : [ targets.main-target-requirements $(requirements) - $(location-xml:E=.) - $(target:S=.xml) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(target:S=.xml) ; - - targets.main-target-alternative - [ new alias-target-class $(target) : $(project) - : - : [ targets.main-target-requirements $(requirements) - : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) - $(target:S=.xml) - : $(project) ] - ] ; - } -} diff --git a/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile b/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile deleted file mode 100644 index 9b969df9..00000000 --- a/jam-files/boost-build/tools/doxygen/windows-paths-check.doxyfile +++ /dev/null @@ -1,3 +0,0 @@ -INPUT = windows-paths-check.hpp -GENERATE_HTML = NO -GENERATE_LATEX = NO diff --git a/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp b/jam-files/boost-build/tools/doxygen/windows-paths-check.hpp deleted file mode 100644 index e69de29b..00000000 diff --git a/jam-files/boost-build/tools/fop.jam b/jam-files/boost-build/tools/fop.jam deleted file mode 100644 index c24b8725..00000000 --- a/jam-files/boost-build/tools/fop.jam +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (C) 2003-2004 Doug Gregor and Dave Abrahams. Distributed -# under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) -# -# This module defines rules to handle generation of PDF and -# PostScript files from XSL Formatting Objects via Apache FOP - -import generators ; -import common ; -import boostbook ; - -generators.register-standard fop.render.pdf : FO : PDF ; -generators.register-standard fop.render.ps : FO : PS ; - -# Initializes the fop toolset. -# -rule init ( fop-command ? : java-home ? : java ? ) -{ - local has-command = $(.has-command) ; - - if $(fop-command) - { - .has-command = true ; - } - - if $(fop-command) || ! $(has-command) - { - fop-command = [ common.get-invocation-command fop : fop : $(fop-command) - : [ modules.peek : FOP_DIR ] ] ; - } - - if $(fop-command) - { - .FOP_COMMAND = $(fop-command) ; - } - - if $(java-home) || $(java) - { - .FOP_SETUP = ; - - - # JAVA_HOME is the location that java was installed to. - - if $(java-home) - { - .FOP_SETUP += [ common.variable-setting-command JAVA_HOME : $(java-home) ] ; - } - - # JAVACMD is the location that of the java executable, useful for a - # non-standard java installation, where the executable isn't at - # $JAVA_HOME/bin/java. - - if $(java) - { - .FOP_SETUP += [ common.variable-setting-command JAVACMD : $(java) ] ; - } - } -} - -actions render.pdf -{ - $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) $(<) -} - -actions render.ps -{ - $(.FOP_SETUP) $(.FOP_COMMAND:E=fop) $(>) -ps $(<) -} diff --git a/jam-files/boost-build/tools/fortran.jam b/jam-files/boost-build/tools/fortran.jam deleted file mode 100644 index 37665825..00000000 --- a/jam-files/boost-build/tools/fortran.jam +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# -# This file contains common settings for all fortran tools -# - -import "class" : new ; -import feature : feature ; - -import type ; -import generators ; -import common ; - -type.register FORTRAN : f F for f77 ; -type.register FORTRAN90 : f90 F90 ; - -feature fortran : : free ; -feature fortran90 : : free ; - -class fortran-compiling-generator : generator -{ - rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * ) - { - generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ; - } -} - -rule register-fortran-compiler ( id : source-types + : target-types + : requirements * : optional-properties * ) -{ - local g = [ new fortran-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ; - generators.register $(g) ; -} - -class fortran90-compiling-generator : generator -{ - rule __init__ ( id : source-types + : target-types + : requirements * : optional-properties * ) - { - generator.__init__ $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ; - } -} - -rule register-fortran90-compiler ( id : source-types + : target-types + : requirements * : optional-properties * ) -{ - local g = [ new fortran90-compiling-generator $(id) : $(source-types) : $(target-types) : $(requirements) : $(optional-properties) ] ; - generators.register $(g) ; -} - -# FIXME: this is ugly, should find a better way (we'd want client code to -# register all generators as "generator.some-rule", not with "some-module.some-rule".) -IMPORT $(__name__) : register-fortran-compiler : : generators.register-fortran-compiler ; -IMPORT $(__name__) : register-fortran90-compiler : : generators.register-fortran90-compiler ; diff --git a/jam-files/boost-build/tools/gcc.jam b/jam-files/boost-build/tools/gcc.jam deleted file mode 100644 index f7b0da54..00000000 --- a/jam-files/boost-build/tools/gcc.jam +++ /dev/null @@ -1,1185 +0,0 @@ -# Copyright 2001 David Abrahams. -# Copyright 2002-2006 Rene Rivera. -# Copyright 2002-2003 Vladimir Prus. -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov. -# Copyright 2007 Roland Schwarz -# Copyright 2007 Boris Gubenko. -# -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import pch ; -import property ; -import property-set ; -import toolset ; -import type ; -import rc ; -import regex ; -import set ; -import unix ; -import fortran ; - - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - - -feature.extend toolset : gcc ; -# feature.subfeature toolset gcc : flavor : : optional ; - -toolset.inherit-generators gcc : unix : unix.link unix.link.dll ; -toolset.inherit-flags gcc : unix ; -toolset.inherit-rules gcc : unix ; - -generators.override gcc.prebuilt : builtin.prebuilt ; -generators.override gcc.searched-lib-generator : searched-lib-generator ; - -# Make gcc toolset object files use the "o" suffix on all platforms. -type.set-generated-target-suffix OBJ : gcc : o ; -type.set-generated-target-suffix OBJ : gcc windows : o ; -type.set-generated-target-suffix OBJ : gcc cygwin : o ; - -# Initializes the gcc toolset for the given version. If necessary, command may -# be used to specify where the compiler is located. The parameter 'options' is a -# space-delimited list of options, each one specified as -# option-value. Valid option names are: cxxflags, linkflags and -# linker-type. Accepted linker-type values are aix, darwin, gnu, hpux, osf or -# sun and the default value will be selected based on the current OS. -# Example: -# using gcc : 3.4 : : foo bar sun ; -# -# The compiler command to use is detected in a three step manner: -# 1) If an explicit command is specified by the user, it will be used and must available. -# 2) If only a certain version is specified, it is enforced: -# - either a command 'g++-VERSION' must be available -# - or the default command 'g++' must be available and match the exact version. -# 3) Without user-provided restrictions use default 'g++' -rule init ( version ? : command * : options * ) -{ - #1): use user-provided command - local tool-command = ; - if $(command) - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ : $(command) ] ; - if ! $(tool-command) - { - errors.error "toolset gcc initialization:" : - "provided command '$(command)' not found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - #2): enforce user-provided version - else if $(version) - { - tool-command = [ common.get-invocation-command-nodefault gcc : "g++-$(version[1])" ] ; - - #2.1) fallback: check whether "g++" reports the requested version - if ! $(tool-command) - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ ] ; - if $(tool-command) - { - local tool-command-string = $(tool-command:J=" ") ; - local tool-version = [ MATCH "^([0-9.]+)" : [ SHELL "$(tool-command-string) -dumpversion" ] ] ; - if $(tool-version) != $(version) - { - # Permit a match betwen two-digit version specified by the user - # (e.g. 4.4) and 3-digit version reported by gcc. - # Since only two digits are present in binary name anyway, - # insisting that user specify 3-digit version when - # configuring Boost.Build while it's not required on - # command like would be strange. - local stripped = [ MATCH "^([0-9]+\.[0-9]+).*" : $(tool-version) ] ; - if $(stripped) != $(version) - { - errors.error "toolset gcc initialization:" : - "version '$(version)' requested but 'g++-$(version)' not found and version '$(tool-version)' of default '$(tool-command)' does not match" : - "initialized from" [ errors.nearest-user-location ] ; - tool-command = ; - } - # Use full 3-digit version to be compatible with the 'using gcc ;' case - version = $(tool-version) ; - } - } - else - { - errors.error "toolset gcc initialization:" : - "version '$(version)' requested but neither 'g++-$(version)' nor default 'g++' found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - } - #3) default: no command and no version specified, try using default command "g++" - else - { - tool-command = [ common.get-invocation-command-nodefault gcc : g++ ] ; - if ! $(tool-command) - { - errors.error "toolset gcc initialization:" : - "no command provided, default command 'g++' not found" : - "initialized from" [ errors.nearest-user-location ] ; - } - } - - - # Information about the gcc command... - # The command. - local command = $(tool-command) ; - # The root directory of the tool install. - local root = [ feature.get-values : $(options) ] ; - # The bin directory where to find the command to execute. - local bin ; - # The flavor of compiler. - local flavor = [ feature.get-values : $(options) ] ; - # Autodetect the root and bin dir if not given. - if $(command) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - } - # The 'command' variable can have multiple elements. When calling - # the SHELL builtin we need a single string. - local command-string = $(command:J=" ") ; - # Autodetect the version and flavor if not given. - if $(command) - { - local machine = [ MATCH "^([^ ]+)" - : [ SHELL "$(command-string) -dumpmachine" ] ] ; - version ?= [ MATCH "^([0-9.]+)" - : [ SHELL "$(command-string) -dumpversion" ] ] ; - switch $(machine:L) - { - case *mingw* : flavor ?= mingw ; - } - } - - local condition ; - if $(flavor) - { - condition = [ common.check-init-parameters gcc - : version $(version) - : flavor $(flavor) - ] ; - } - else - { - condition = [ common.check-init-parameters gcc - : version $(version) - ] ; - condition = $(condition) ; #/ ; - } - - common.handle-options gcc : $(condition) : $(command) : $(options) ; - - local linker = [ feature.get-values : $(options) ] ; - # The logic below should actually be keyed on - if ! $(linker) - { - if [ os.name ] = OSF - { - linker = osf ; - } - else if [ os.name ] = HPUX - { - linker = hpux ; - } - else if [ os.name ] = AIX - { - linker = aix ; - } - else if [ os.name ] = SOLARIS - { - linker = sun ; - } - else - { - linker = gnu ; - } - } - init-link-flags gcc $(linker) $(condition) ; - - - # If gcc is installed in non-standard location, we'd need to add - # LD_LIBRARY_PATH when running programs created with it (for unit-test/run - # rules). - if $(command) - { - # On multilib 64-bit boxes, there are both 32-bit and 64-bit libraries - # and all must be added to LD_LIBRARY_PATH. The linker will pick the - # right onces. Note that we don't provide a clean way to build 32-bit - # binary with 64-bit compiler, but user can always pass -m32 manually. - local lib_path = $(root)/bin $(root)/lib $(root)/lib32 $(root)/lib64 ; - if $(.debug-configuration) - { - ECHO notice: using gcc libraries :: $(condition) :: $(lib_path) ; - } - toolset.flags gcc.link RUN_PATH $(condition) : $(lib_path) ; - } - - # If it's not a system gcc install we should adjust the various programs as - # needed to prefer using the install specific versions. This is essential - # for correct use of MinGW and for cross-compiling. - - local nl = " -" ; - - # - The archive builder. - local archiver = [ common.get-invocation-command gcc - : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" : [ SHELL "$(command-string) -print-prog-name=ar" ] ] ] - : [ feature.get-values : $(options) ] - : $(bin) - : search-path ] ; - toolset.flags gcc.archive .AR $(condition) : $(archiver[1]) ; - if $(.debug-configuration) - { - ECHO notice: using gcc archiver :: $(condition) :: $(archiver[1]) ; - } - - # - Ranlib - local ranlib = [ common.get-invocation-command gcc - : [ NORMALIZE_PATH [ MATCH "(.*)[$(nl)]+" : [ SHELL "$(command-string) -print-prog-name=ranlib" ] ] ] - : [ feature.get-values : $(options) ] - : $(bin) - : search-path ] ; - toolset.flags gcc.archive .RANLIB $(condition) : $(ranlib[1]) ; - if $(.debug-configuration) - { - ECHO notice: using gcc ranlib :: $(condition) :: $(ranlib[1]) ; - } - - - # - The resource compiler. - local rc = - [ common.get-invocation-command-nodefault gcc - : windres : [ feature.get-values : $(options) ] : $(bin) : search-path ] ; - local rc-type = - [ feature.get-values : $(options) ] ; - rc-type ?= windres ; - if ! $(rc) - { - # If we can't find an RC compiler we fallback to a null RC compiler that - # creates empty object files. This allows the same Jamfiles to work - # across the board. The null RC uses the assembler to create the empty - # objects, so configure that. - rc = [ common.get-invocation-command gcc : as : : $(bin) : search-path ] ; - rc-type = null ; - } - rc.configure $(rc) : $(condition) : $(rc-type) ; -} - -if [ os.name ] = NT -{ - # This causes single-line command invocation to not go through .bat files, - # thus avoiding command-line length limitations. - JAMSHELL = % ; -} - -generators.register-c-compiler gcc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : gcc ; -generators.register-c-compiler gcc.compile.c.preprocess : C : PREPROCESSED_C : gcc ; -generators.register-c-compiler gcc.compile.c++ : CPP : OBJ : gcc ; -generators.register-c-compiler gcc.compile.c : C : OBJ : gcc ; -generators.register-c-compiler gcc.compile.asm : ASM : OBJ : gcc ; -generators.register-fortran-compiler gcc.compile.fortran : FORTRAN FORTRAN90 : OBJ : gcc ; - -# pch support - -# The compiler looks for a precompiled header in each directory just before it -# looks for the include file in that directory. The name searched for is the -# name specified in the #include directive with ".gch" suffix appended. The -# logic in gcc-pch-generator will make sure that BASE_PCH suffix is appended to -# full name of the header. - -type.set-generated-target-suffix PCH : gcc : gch ; - -# GCC-specific pch generator. -class gcc-pch-generator : pch-generator -{ - import project ; - import property-set ; - import type ; - - rule run-pch ( project name ? : property-set : sources + ) - { - # Find the header in sources. Ignore any CPP sources. - local header ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] H ] - { - header = $(s) ; - } - } - - # Error handling: Base header file name should be the same as the base - # precompiled header name. - local header-name = [ $(header).name ] ; - local header-basename = $(header-name:B) ; - if $(header-basename) != $(name) - { - local location = [ $(project).project-module ] ; - errors.user-error "in" $(location)": pch target name `"$(name)"' should be the same as the base name of header file `"$(header-name)"'" ; - } - - local pch-file = [ generator.run $(project) $(name) : $(property-set) - : $(header) ] ; - - # return result of base class and pch-file property as usage-requirements - return - [ property-set.create $(pch-file) -Winvalid-pch ] - $(pch-file) - ; - } - - # Calls the base version specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - rule generated-targets ( sources + : property-set : project name ? ) - { - name = [ $(sources[1]).name ] ; - return [ generator.generated-targets $(sources) - : $(property-set) : $(project) $(name) ] ; - } -} - -# Note: the 'H' source type will catch both '.h' header and '.hpp' header. The -# latter have HPP type, but HPP type is derived from H. The type of compilation -# is determined entirely by the destination type. -generators.register [ new gcc-pch-generator gcc.compile.c.pch : H : C_PCH : on gcc ] ; -generators.register [ new gcc-pch-generator gcc.compile.c++.pch : H : CPP_PCH : on gcc ] ; - -# Override default do-nothing generators. -generators.override gcc.compile.c.pch : pch.default-c-pch-generator ; -generators.override gcc.compile.c++.pch : pch.default-cpp-pch-generator ; - -toolset.flags gcc.compile PCH_FILE on : ; - -# Declare flags and action for compilation. -toolset.flags gcc.compile OPTIONS off : -O0 ; -toolset.flags gcc.compile OPTIONS speed : -O3 ; -toolset.flags gcc.compile OPTIONS space : -Os ; - -toolset.flags gcc.compile OPTIONS off : -fno-inline ; -toolset.flags gcc.compile OPTIONS on : -Wno-inline ; -toolset.flags gcc.compile OPTIONS full : -finline-functions -Wno-inline ; - -toolset.flags gcc.compile OPTIONS off : -w ; -toolset.flags gcc.compile OPTIONS on : -Wall ; -toolset.flags gcc.compile OPTIONS all : -Wall -pedantic ; -toolset.flags gcc.compile OPTIONS on : -Werror ; - -toolset.flags gcc.compile OPTIONS on : -g ; -toolset.flags gcc.compile OPTIONS on : -pg ; -toolset.flags gcc.compile OPTIONS off : -fno-rtti ; - -rule setup-fpic ( targets * : sources * : properties * ) -{ - local link = [ feature.get-values link : $(properties) ] ; - if $(link) = shared - { - local target = [ feature.get-values target-os : $(properties) ] ; - - # This logic will add -fPIC for all compilations: - # - # lib a : a.cpp b ; - # obj b : b.cpp ; - # exe c : c.cpp a d ; - # obj d : d.cpp ; - # - # This all is fine, except that 'd' will be compiled with -fPIC even though - # it is not needed, as 'd' is used only in exe. However, it is hard to - # detect where a target is going to be used. Alternatively, we can set -fPIC - # only when main target type is LIB but than 'b' would be compiled without - # -fPIC which would lead to link errors on x86-64. So, compile everything - # with -fPIC. - # - # Yet another alternative would be to create a propagated - # feature and set it when building shared libraries, but that would be hard - # to implement and would increase the target path length even more. - - # On Windows, fPIC is default, specifying -fPIC explicitly leads to - # a warning. - if $(target) != cygwin && $(target) != windows - { - OPTIONS on $(targets) += -fPIC ; - } - } -} - -rule setup-address-model ( targets * : sources * : properties * ) -{ - local model = [ feature.get-values address-model : $(properties) ] ; - if $(model) - { - local option ; - local os = [ feature.get-values target-os : $(properties) ] ; - if $(os) = aix - { - if $(model) = 32 - { - option = -maix32 ; - } - else - { - option = -maix64 ; - } - } - else if $(os) = hpux - { - if $(model) = 32 - { - option = -milp32 ; - } - else - { - option = -mlp64 ; - } - } - else - { - if $(model) = 32 - { - option = -m32 ; - } - else if $(model) = 64 - { - option = -m64 ; - } - # For darwin, the model can be 32_64. darwin.jam will handle that - # on its own. - } - OPTIONS on $(targets) += $(option) ; - } -} - - -# FIXME: this should not use os.name. -if [ os.name ] != NT && [ os.name ] != OSF && [ os.name ] != HPUX && [ os.name ] != AIX -{ - # OSF does have an option called -soname but it does not seem to work as - # expected, therefore it has been disabled. - HAVE_SONAME = "" ; - SONAME_OPTION = -h ; -} - -# HPUX, for some reason, seem to use '+h', not '-h'. -if [ os.name ] = HPUX -{ - HAVE_SONAME = "" ; - SONAME_OPTION = +h ; -} - -toolset.flags gcc.compile USER_OPTIONS ; -toolset.flags gcc.compile.c++ USER_OPTIONS ; -toolset.flags gcc.compile DEFINES ; -toolset.flags gcc.compile INCLUDES ; -toolset.flags gcc.compile.c++ TEMPLATE_DEPTH ; -toolset.flags gcc.compile.fortran USER_OPTIONS ; - -rule compile.c++.pch ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c++.pch -{ - "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c.pch ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c.pch -{ - "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c++.preprocess ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C - { - LANG on $(<) = "-x c++" ; - } - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.c.preprocess ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - LANG on $(<) = "-x c" ; - #} - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.c++ ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - if ! $(>:S) in .cc .cp .cxx .cpp .c++ .C - { - LANG on $(<) = "-x c++" ; - } - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; - - # Here we want to raise the template-depth parameter value to something - # higher than the default value of 17. Note that we could do this using the - # feature.set-default rule but we do not want to set the default value for - # all toolsets as well. - # - # TODO: This 'modified default' has been inherited from some 'older Boost - # Build implementation' and has most likely been added to make some Boost - # library parts compile correctly. We should see what exactly prompted this - # and whether we can get around the problem more locally. - local template-depth = [ on $(<) return $(TEMPLATE_DEPTH) ] ; - if ! $(template-depth) - { - TEMPLATE_DEPTH on $(<) = 128 ; - } -} - -rule compile.c ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - LANG on $(<) = "-x c" ; - #} - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -rule compile.fortran ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c++ bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<:W)" "$(>:W)" -} - -actions compile.c bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++.preprocess bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>:W)" -E >"$(<:W)" -} - -actions compile.c.preprocess bind PCH_FILE -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" "$(>)" -E >$(<) -} - -actions compile.fortran -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.asm ( targets * : sources * : properties * ) -{ - setup-fpic $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - LANG on $(<) = "-x assembler-with-cpp" ; -} - -actions compile.asm -{ - "$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# The class which check that we don't try to use the static -# property while creating or using shared library, since it's not supported by -# gcc/libc. -class gcc-linking-generator : unix-linking-generator -{ - rule run ( project name ? : property-set : sources + ) - { - # TODO: Replace this with the use of a target-os property. - local no-static-link = ; - if [ modules.peek : UNIX ] - { - switch [ modules.peek : JAMUNAME ] - { - case * : no-static-link = true ; - } - } - - local properties = [ $(property-set).raw ] ; - local reason ; - if $(no-static-link) && static in $(properties) - { - if shared in $(properties) - { - reason = - "On gcc, DLL can't be build with 'static'." ; - } - else if [ type.is-derived $(self.target-types[1]) EXE ] - { - for local s in $(sources) - { - local type = [ $(s).type ] ; - if $(type) && [ type.is-derived $(type) SHARED_LIB ] - { - reason = - "On gcc, using DLLS together with the" - "static options is not possible " ; - } - } - } - } - if $(reason) - { - ECHO warning: - $(reason) ; - ECHO warning: - "It is suggested to use 'static' together" - "with 'static'." ; - return ; - } - else - { - local generated-targets = [ unix-linking-generator.run $(project) - $(name) : $(property-set) : $(sources) ] ; - return $(generated-targets) ; - } - } -} - -# The set of permissible input types is different on mingw. -# So, define two sets of generators, with mingw generators -# selected when target-os=windows. - -local g ; -g = [ new gcc-linking-generator gcc.mingw.link - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : EXE - : gcc windows ] ; -$(g).set-rule-name gcc.link ; -generators.register $(g) ; - -g = [ new gcc-linking-generator gcc.mingw.link.dll - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : IMPORT_LIB SHARED_LIB - : gcc windows ] ; -$(g).set-rule-name gcc.link.dll ; -generators.register $(g) ; - -generators.register - [ new gcc-linking-generator gcc.link - : LIB OBJ - : EXE - : gcc ] ; -generators.register - [ new gcc-linking-generator gcc.link.dll - : LIB OBJ - : SHARED_LIB - : gcc ] ; - -generators.override gcc.mingw.link : gcc.link ; -generators.override gcc.mingw.link.dll : gcc.link.dll ; - -# Cygwin is similar to msvc and mingw in that it uses import libraries. -# While in simple cases, it can directly link to a shared library, -# it is believed to be slower, and not always possible. Define cygwin-specific -# generators here. - -g = [ new gcc-linking-generator gcc.cygwin.link - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : EXE - : gcc cygwin ] ; -$(g).set-rule-name gcc.link ; -generators.register $(g) ; - -g = [ new gcc-linking-generator gcc.cygwin.link.dll - : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB - : IMPORT_LIB SHARED_LIB - : gcc cygwin ] ; -$(g).set-rule-name gcc.link.dll ; -generators.register $(g) ; - -generators.override gcc.cygwin.link : gcc.link ; -generators.override gcc.cygwin.link.dll : gcc.link.dll ; - -# Declare flags for linking. -# First, the common flags. -toolset.flags gcc.link OPTIONS on : -g ; -toolset.flags gcc.link OPTIONS on : -pg ; -toolset.flags gcc.link USER_OPTIONS ; -toolset.flags gcc.link LINKPATH ; -toolset.flags gcc.link FINDLIBS-ST ; -toolset.flags gcc.link FINDLIBS-SA ; -toolset.flags gcc.link LIBRARIES ; - -toolset.flags gcc.link.dll .IMPLIB-COMMAND windows : "-Wl,--out-implib," ; -toolset.flags gcc.link.dll .IMPLIB-COMMAND cygwin : "-Wl,--out-implib," ; - -# For static we made sure there are no dynamic libraries in the -# link. On HP-UX not all system libraries exist as archived libraries (for -# example, there is no libunwind.a), so, on this platform, the -static option -# cannot be specified. -if [ os.name ] != HPUX -{ - toolset.flags gcc.link OPTIONS static : -static ; -} - -# Now, the vendor specific flags. -# The parameter linker can be either aix, darwin, gnu, hpux, osf or sun. -rule init-link-flags ( toolset linker condition ) -{ - switch $(linker) - { - case aix : - { - # - # On AIX we *have* to use the native linker. - # - # Using -brtl, the AIX linker will look for libraries with both the .a - # and .so extensions, such as libfoo.a and libfoo.so. Without -brtl, the - # AIX linker looks only for libfoo.a. Note that libfoo.a is an archived - # file that may contain shared objects and is different from static libs - # as on Linux. - # - # The -bnoipath strips the prepending (relative) path of libraries from - # the loader section in the target library or executable. Hence, during - # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded - # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without - # this option, the prepending (relative) path + library name is - # hard-coded in the loader section, causing *only* this path to be - # searched during load-time. Note that the AIX linker does not have an - # -soname equivalent, this is as close as it gets. - # - # The above options are definately for AIX 5.x, and most likely also for - # AIX 4.x and AIX 6.x. For details about the AIX linker see: - # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf - # - - toolset.flags $(toolset).link OPTIONS : -Wl,-brtl -Wl,-bnoipath - : unchecked ; - } - - case darwin : - { - # On Darwin, the -s option to ld does not work unless we pass -static, - # and passing -static unconditionally is a bad idea. So, don't pass -s. - # at all, darwin.jam will use separate 'strip' invocation. - toolset.flags $(toolset).link RPATH $(condition) : : unchecked ; - toolset.flags $(toolset).link RPATH_LINK $(condition) : : unchecked ; - } - - case gnu : - { - # Strip the binary when no debugging is needed. We use --strip-all flag - # as opposed to -s since icc (intel's compiler) is generally - # option-compatible with and inherits from the gcc toolset, but does not - # support -s. - toolset.flags $(toolset).link OPTIONS $(condition)/on : -Wl,--strip-all : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : : unchecked ; - toolset.flags $(toolset).link RPATH_LINK $(condition) : : unchecked ; - toolset.flags $(toolset).link START-GROUP $(condition) : -Wl,--start-group : unchecked ; - toolset.flags $(toolset).link END-GROUP $(condition) : -Wl,--end-group : unchecked ; - - # gnu ld has the ability to change the search behaviour for libraries - # referenced by -l switch. These modifiers are -Bstatic and -Bdynamic - # and change search for -l switches that follow them. The following list - # shows the tried variants. - # The search stops at the first variant that has a match. - # *nix: -Bstatic -lxxx - # libxxx.a - # - # *nix: -Bdynamic -lxxx - # libxxx.so - # libxxx.a - # - # windows (mingw,cygwin) -Bstatic -lxxx - # libxxx.a - # xxx.lib - # - # windows (mingw,cygwin) -Bdynamic -lxxx - # libxxx.dll.a - # xxx.dll.a - # libxxx.a - # xxx.lib - # cygxxx.dll (*) - # libxxx.dll - # xxx.dll - # libxxx.a - # - # (*) This is for cygwin - # Please note that -Bstatic and -Bdynamic are not a guarantee that a - # static or dynamic lib indeed gets linked in. The switches only change - # search patterns! - - # On *nix mixing shared libs with static runtime is not a good idea. - toolset.flags $(toolset).link FINDLIBS-ST-PFX $(condition)/shared - : -Wl,-Bstatic : unchecked ; - toolset.flags $(toolset).link FINDLIBS-SA-PFX $(condition)/shared - : -Wl,-Bdynamic : unchecked ; - - # On windows allow mixing of static and dynamic libs with static - # runtime. - toolset.flags $(toolset).link FINDLIBS-ST-PFX $(condition)/static/windows - : -Wl,-Bstatic : unchecked ; - toolset.flags $(toolset).link FINDLIBS-SA-PFX $(condition)/static/windows - : -Wl,-Bdynamic : unchecked ; - toolset.flags $(toolset).link OPTIONS $(condition)/static/windows - : -Wl,-Bstatic : unchecked ; - } - - case hpux : - { - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link OPTIONS $(condition)/shared - : -fPIC : unchecked ; - } - - case osf : - { - # No --strip-all, just -s. - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : - : unchecked ; - # This does not supports -R. - toolset.flags $(toolset).link RPATH_OPTION $(condition) : -rpath - : unchecked ; - # -rpath-link is not supported at all. - } - - case sun : - { - toolset.flags $(toolset).link OPTIONS $(condition)/on - : -Wl,-s : unchecked ; - toolset.flags $(toolset).link RPATH $(condition) : - : unchecked ; - # Solaris linker does not have a separate -rpath-link, but allows to use - # -L for the same purpose. - toolset.flags $(toolset).link LINKPATH $(condition) : - : unchecked ; - - # This permits shared libraries with non-PIC code on Solaris. - # VP, 2004/09/07: Now that we have -fPIC hardcode in link.dll, the - # following is not needed. Whether -fPIC should be hardcoded, is a - # separate question. - # AH, 2004/10/16: it is still necessary because some tests link against - # static libraries that were compiled without PIC. - toolset.flags $(toolset).link OPTIONS $(condition)/shared - : -mimpure-text : unchecked ; - } - - case * : - { - errors.user-error - "$(toolset) initialization: invalid linker '$(linker)'" : - "The value '$(linker)' specified for is not recognized." : - "Possible values are 'aix', 'darwin', 'gnu', 'hpux', 'osf' or 'sun'" ; - } - } -} - -# Enclose the RPATH variable on 'targets' in (double) quotes, -# unless it's already enclosed in single quotes. -# This special casing is done because it's common to pass -# '$ORIGIN' to linker -- and it has to have single quotes -# to prevent expansion by shell -- and if we add double -# quotes then preventing properties of single quotes disappear. -rule quote-rpath ( targets * ) -{ - local r = [ on $(targets[1]) return $(RPATH) ] ; - if ! [ MATCH "('.*')" : $(r) ] - { - r = "\"$(r)\"" ; - } - RPATH on $(targets) = $(r) ; -} - -# Declare actions for linking. -rule link ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only gcc links, it might be a - # good idea to serialize all links. - JAM_SEMAPHORE on $(targets) = gcc-link-semaphore ; - quote-rpath $(targets) ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS) - -} - -# Default value. Mostly for the sake of intel-linux that inherits from gcc, but -# does not have the same logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble as on Linux, 'ar' is -# always available. -.AR = ar ; -.RANLIB = ranlib ; - -toolset.flags gcc.archive AROPTIONS ; - -rule archive ( targets * : sources * : properties * ) -{ - # Always remove archive and start again. Here is the rationale from - # - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. I moved a1.c to - # a2.c, updated my Jamfiles and rebuilt. My program was crashing with absurd - # errors. After some debugging I traced it back to the fact that a1.o was - # *still* in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM onto the - # archive action. That makes archives fail to build on NT when they have - # many files because it will no longer execute the action directly and blow - # the line length limit. Instead we remove the file in a different action, - # just before building the archive. - # - local clean.a = $(targets[1])(clean) ; - TEMPORARY $(clean.a) ; - NOCARE $(clean.a) ; - LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ; - DEPENDS $(clean.a) : $(sources) ; - DEPENDS $(targets) : $(clean.a) ; - common.RmTemps $(clean.a) : $(targets) ; -} - -# Declare action for creating static libraries. -# The letter 'r' means to add files to the archive with replacement. Since we -# remove archive, we don't care about replacement, but there's no option "add -# without replacement". -# The letter 'c' suppresses the warning in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -actions piecemeal archive -{ - "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)" - "$(.RANLIB)" "$(<)" -} - -rule link.dll ( targets * : sources * : properties * ) -{ - setup-threading $(targets) : $(sources) : $(properties) ; - setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = gcc-link-semaphore ; - quote-rpath $(targets) ; -} - -# Differs from 'link' above only by -shared. -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,$(RPATH) "$(.IMPLIB-COMMAND)$(<[1])" -o "$(<[-1])" $(HAVE_SONAME)-Wl,$(SONAME_OPTION)$(SPACE)-Wl,$(<[-1]:D=) -shared $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) -l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) $(OPTIONS) $(USER_OPTIONS) -} - -rule setup-threading ( targets * : sources * : properties * ) -{ - local threading = [ feature.get-values threading : $(properties) ] ; - if $(threading) = multi - { - local target = [ feature.get-values target-os : $(properties) ] ; - local option ; - local libs ; - - switch $(target) - { - case windows : - { - option = -mthreads ; - } - case cygwin : - { - option = -mthreads ; - } - case solaris : - { - option = -pthreads ; - libs = rt ; - } - case beos : - { - # BeOS has no threading options, so do not set anything here. - } - case *bsd : - { - option = -pthread ; - # There is no -lrt on BSD. - } - case sgi : - { - # gcc on IRIX does not support multi-threading so do not set anything - # here. - } - case darwin : - { - # Darwin has no threading options so do not set anything here. - } - case * : - { - option = -pthread ; - libs = rt ; - } - } - - if $(option) - { - OPTIONS on $(targets) += $(option) ; - } - if $(libs) - { - FINDLIBS-SA on $(targets) += $(libs) ; - } - } -} - -local rule cpu-flags ( toolset variable : architecture : instruction-set + : values + : default ? ) -{ - if $(default) - { - toolset.flags $(toolset) $(variable) - $(architecture)/ - : $(values) ; - } - toolset.flags $(toolset) $(variable) - /$(instruction-set) - $(architecture)/$(instruction-set) - : $(values) ; -} - -# Set architecture/instruction-set options. -# -# x86 and compatible -# The 'native' option appeared in gcc 4.2 so we cannot safely use it -# as default. Use conservative i386 instead. -cpu-flags gcc OPTIONS : x86 : native : -march=native ; -cpu-flags gcc OPTIONS : x86 : i386 : -march=i386 : default ; -cpu-flags gcc OPTIONS : x86 : i486 : -march=i486 ; -cpu-flags gcc OPTIONS : x86 : i586 : -march=i586 ; -cpu-flags gcc OPTIONS : x86 : i686 : -march=i686 ; -cpu-flags gcc OPTIONS : x86 : pentium : -march=pentium ; -cpu-flags gcc OPTIONS : x86 : pentium-mmx : -march=pentium-mmx ; -cpu-flags gcc OPTIONS : x86 : pentiumpro : -march=pentiumpro ; -cpu-flags gcc OPTIONS : x86 : pentium2 : -march=pentium2 ; -cpu-flags gcc OPTIONS : x86 : pentium3 : -march=pentium3 ; -cpu-flags gcc OPTIONS : x86 : pentium3m : -march=pentium3m ; -cpu-flags gcc OPTIONS : x86 : pentium-m : -march=pentium-m ; -cpu-flags gcc OPTIONS : x86 : pentium4 : -march=pentium4 ; -cpu-flags gcc OPTIONS : x86 : pentium4m : -march=pentium4m ; -cpu-flags gcc OPTIONS : x86 : prescott : -march=prescott ; -cpu-flags gcc OPTIONS : x86 : nocona : -march=nocona ; -cpu-flags gcc OPTIONS : x86 : core2 : -march=core2 ; -cpu-flags gcc OPTIONS : x86 : k6 : -march=k6 ; -cpu-flags gcc OPTIONS : x86 : k6-2 : -march=k6-2 ; -cpu-flags gcc OPTIONS : x86 : k6-3 : -march=k6-3 ; -cpu-flags gcc OPTIONS : x86 : athlon : -march=athlon ; -cpu-flags gcc OPTIONS : x86 : athlon-tbird : -march=athlon-tbird ; -cpu-flags gcc OPTIONS : x86 : athlon-4 : -march=athlon-4 ; -cpu-flags gcc OPTIONS : x86 : athlon-xp : -march=athlon-xp ; -cpu-flags gcc OPTIONS : x86 : athlon-mp : -march=athlon-mp ; -## -cpu-flags gcc OPTIONS : x86 : k8 : -march=k8 ; -cpu-flags gcc OPTIONS : x86 : opteron : -march=opteron ; -cpu-flags gcc OPTIONS : x86 : athlon64 : -march=athlon64 ; -cpu-flags gcc OPTIONS : x86 : athlon-fx : -march=athlon-fx ; -cpu-flags gcc OPTIONS : x86 : winchip-c6 : -march=winchip-c6 ; -cpu-flags gcc OPTIONS : x86 : winchip2 : -march=winchip2 ; -cpu-flags gcc OPTIONS : x86 : c3 : -march=c3 ; -cpu-flags gcc OPTIONS : x86 : c3-2 : -march=c3-2 ; -# Sparc -cpu-flags gcc OPTIONS : sparc : c3 : -mcpu=c3 : default ; -cpu-flags gcc OPTIONS : sparc : v7 : -mcpu=v7 ; -cpu-flags gcc OPTIONS : sparc : cypress : -mcpu=cypress ; -cpu-flags gcc OPTIONS : sparc : v8 : -mcpu=v8 ; -cpu-flags gcc OPTIONS : sparc : supersparc : -mcpu=supersparc ; -cpu-flags gcc OPTIONS : sparc : sparclite : -mcpu=sparclite ; -cpu-flags gcc OPTIONS : sparc : hypersparc : -mcpu=hypersparc ; -cpu-flags gcc OPTIONS : sparc : sparclite86x : -mcpu=sparclite86x ; -cpu-flags gcc OPTIONS : sparc : f930 : -mcpu=f930 ; -cpu-flags gcc OPTIONS : sparc : f934 : -mcpu=f934 ; -cpu-flags gcc OPTIONS : sparc : sparclet : -mcpu=sparclet ; -cpu-flags gcc OPTIONS : sparc : tsc701 : -mcpu=tsc701 ; -cpu-flags gcc OPTIONS : sparc : v9 : -mcpu=v9 ; -cpu-flags gcc OPTIONS : sparc : ultrasparc : -mcpu=ultrasparc ; -cpu-flags gcc OPTIONS : sparc : ultrasparc3 : -mcpu=ultrasparc3 ; -# RS/6000 & PowerPC -cpu-flags gcc OPTIONS : power : 403 : -mcpu=403 ; -cpu-flags gcc OPTIONS : power : 505 : -mcpu=505 ; -cpu-flags gcc OPTIONS : power : 601 : -mcpu=601 ; -cpu-flags gcc OPTIONS : power : 602 : -mcpu=602 ; -cpu-flags gcc OPTIONS : power : 603 : -mcpu=603 ; -cpu-flags gcc OPTIONS : power : 603e : -mcpu=603e ; -cpu-flags gcc OPTIONS : power : 604 : -mcpu=604 ; -cpu-flags gcc OPTIONS : power : 604e : -mcpu=604e ; -cpu-flags gcc OPTIONS : power : 620 : -mcpu=620 ; -cpu-flags gcc OPTIONS : power : 630 : -mcpu=630 ; -cpu-flags gcc OPTIONS : power : 740 : -mcpu=740 ; -cpu-flags gcc OPTIONS : power : 7400 : -mcpu=7400 ; -cpu-flags gcc OPTIONS : power : 7450 : -mcpu=7450 ; -cpu-flags gcc OPTIONS : power : 750 : -mcpu=750 ; -cpu-flags gcc OPTIONS : power : 801 : -mcpu=801 ; -cpu-flags gcc OPTIONS : power : 821 : -mcpu=821 ; -cpu-flags gcc OPTIONS : power : 823 : -mcpu=823 ; -cpu-flags gcc OPTIONS : power : 860 : -mcpu=860 ; -cpu-flags gcc OPTIONS : power : 970 : -mcpu=970 ; -cpu-flags gcc OPTIONS : power : 8540 : -mcpu=8540 ; -cpu-flags gcc OPTIONS : power : power : -mcpu=power ; -cpu-flags gcc OPTIONS : power : power2 : -mcpu=power2 ; -cpu-flags gcc OPTIONS : power : power3 : -mcpu=power3 ; -cpu-flags gcc OPTIONS : power : power4 : -mcpu=power4 ; -cpu-flags gcc OPTIONS : power : power5 : -mcpu=power5 ; -cpu-flags gcc OPTIONS : power : powerpc : -mcpu=powerpc ; -cpu-flags gcc OPTIONS : power : powerpc64 : -mcpu=powerpc64 ; -cpu-flags gcc OPTIONS : power : rios : -mcpu=rios ; -cpu-flags gcc OPTIONS : power : rios1 : -mcpu=rios1 ; -cpu-flags gcc OPTIONS : power : rios2 : -mcpu=rios2 ; -cpu-flags gcc OPTIONS : power : rsc : -mcpu=rsc ; -cpu-flags gcc OPTIONS : power : rs64a : -mcpu=rs64 ; -# AIX variant of RS/6000 & PowerPC -toolset.flags gcc AROPTIONS 64/aix : "-X 64" ; diff --git a/jam-files/boost-build/tools/gcc.py b/jam-files/boost-build/tools/gcc.py deleted file mode 100644 index 2a3e675e..00000000 --- a/jam-files/boost-build/tools/gcc.py +++ /dev/null @@ -1,796 +0,0 @@ -# Status: being ported by Steven Watanabe -# Base revision: 47077 -# TODO: common.jam needs to be ported -# TODO: generators.jam needs to have register_c_compiler. -# -# Copyright 2001 David Abrahams. -# Copyright 2002-2006 Rene Rivera. -# Copyright 2002-2003 Vladimir Prus. -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov. -# Copyright 2007 Roland Schwarz -# Copyright 2007 Boris Gubenko. -# Copyright 2008 Steven Watanabe -# -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import os -import subprocess -import re - -import bjam - -from b2.tools import unix, common, rc, pch, builtin -from b2.build import feature, type, toolset, generators -from b2.util.utility import os_name, on_windows -from b2.manager import get_manager -from b2.build.generators import Generator -from b2.build.toolset import flags -from b2.util.utility import to_seq - -__debug = None - -def debug(): - global __debug - if __debug is None: - __debug = "--debug-configuration" in bjam.variable("ARGV") - return __debug - -feature.extend('toolset', ['gcc']) - - -toolset.inherit_generators('gcc', [], 'unix', ['unix.link', 'unix.link.dll']) -toolset.inherit_flags('gcc', 'unix') -toolset.inherit_rules('gcc', 'unix') - -generators.override('gcc.prebuilt', 'builtin.prebuilt') -generators.override('gcc.searched-lib-generator', 'searched-lib-generator') - -# Target naming is determined by types/lib.jam and the settings below this -# comment. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (mingw): -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (cygwin) i.e. cygwin -# libxxx.a static library -# xxx.dll DLL -# libxxx.dll.a import library -# -# Note: user can always override by using the @rule -# This settings have been choosen, so that mingw -# is in line with msvc naming conventions. For -# cygwin the cygwin naming convention has been choosen. - -# Make the "o" suffix used for gcc toolset on all -# platforms -type.set_generated_target_suffix('OBJ', ['gcc'], 'o') -type.set_generated_target_suffix('STATIC_LIB', ['gcc', 'cygwin'], 'a') - -type.set_generated_target_suffix('IMPORT_LIB', ['gcc', 'cygwin'], 'dll.a') -type.set_generated_target_prefix('IMPORT_LIB', ['gcc', 'cygwin'], 'lib') - -__machine_match = re.compile('^([^ ]+)') -__version_match = re.compile('^([0-9.]+)') - -def init(version = None, command = None, options = None): - """ - Initializes the gcc toolset for the given version. If necessary, command may - be used to specify where the compiler is located. The parameter 'options' is a - space-delimited list of options, each one specified as - option-value. Valid option names are: cxxflags, linkflags and - linker-type. Accepted linker-type values are gnu, darwin, osf, hpux or sun - and the default value will be selected based on the current OS. - Example: - using gcc : 3.4 : : foo bar sun ; - """ - - options = to_seq(options) - command = to_seq(command) - - # Information about the gcc command... - # The command. - command = to_seq(common.get_invocation_command('gcc', 'g++', command)) - # The root directory of the tool install. - root = feature.get_values('', options) ; - # The bin directory where to find the command to execute. - bin = None - # The flavor of compiler. - flavor = feature.get_values('', options) - # Autodetect the root and bin dir if not given. - if command: - if not bin: - bin = common.get_absolute_tool_path(command[-1]) - if not root: - root = os.path.dirname(bin) - # Autodetect the version and flavor if not given. - if command: - machine_info = subprocess.Popen(command + ['-dumpmachine'], stdout=subprocess.PIPE).communicate()[0] - machine = __machine_match.search(machine_info).group(1) - - version_info = subprocess.Popen(command + ['-dumpversion'], stdout=subprocess.PIPE).communicate()[0] - version = __version_match.search(version_info).group(1) - if not flavor and machine.find('mingw') != -1: - flavor = 'mingw' - - condition = None - if flavor: - condition = common.check_init_parameters('gcc', None, - ('version', version), - ('flavor', flavor)) - else: - condition = common.check_init_parameters('gcc', None, - ('version', version)) - - if command: - command = command[0] - - common.handle_options('gcc', condition, command, options) - - linker = feature.get_values('', options) - if not linker: - if os_name() == 'OSF': - linker = 'osf' - elif os_name() == 'HPUX': - linker = 'hpux' ; - else: - linker = 'gnu' - - init_link_flags('gcc', linker, condition) - - # If gcc is installed in non-standard location, we'd need to add - # LD_LIBRARY_PATH when running programs created with it (for unit-test/run - # rules). - if command: - # On multilib 64-bit boxes, there are both 32-bit and 64-bit libraries - # and all must be added to LD_LIBRARY_PATH. The linker will pick the - # right onces. Note that we don't provide a clean way to build 32-bit - # binary with 64-bit compiler, but user can always pass -m32 manually. - lib_path = [os.path.join(root, 'bin'), - os.path.join(root, 'lib'), - os.path.join(root, 'lib32'), - os.path.join(root, 'lib64')] - if debug(): - print 'notice: using gcc libraries ::', condition, '::', lib_path - toolset.flags('gcc.link', 'RUN_PATH', condition, lib_path) - - # If it's not a system gcc install we should adjust the various programs as - # needed to prefer using the install specific versions. This is essential - # for correct use of MinGW and for cross-compiling. - - # - The archive builder. - archiver = common.get_invocation_command('gcc', - 'ar', feature.get_values('', options), [bin], path_last=True) - toolset.flags('gcc.archive', '.AR', condition, [archiver]) - if debug(): - print 'notice: using gcc archiver ::', condition, '::', archiver - - # - The resource compiler. - rc_command = common.get_invocation_command_nodefault('gcc', - 'windres', feature.get_values('', options), [bin], path_last=True) - rc_type = feature.get_values('', options) - - if not rc_type: - rc_type = 'windres' - - if not rc_command: - # If we can't find an RC compiler we fallback to a null RC compiler that - # creates empty object files. This allows the same Jamfiles to work - # across the board. The null RC uses the assembler to create the empty - # objects, so configure that. - rc_command = common.get_invocation_command('gcc', 'as', [], [bin], path_last=True) - rc_type = 'null' - rc.configure(rc_command, condition, '' + rc_type) - -###if [ os.name ] = NT -###{ -### # This causes single-line command invocation to not go through .bat files, -### # thus avoiding command-line length limitations. -### JAMSHELL = % ; -###} - -#FIXME: when register_c_compiler is moved to -# generators, these should be updated -builtin.register_c_compiler('gcc.compile.c++', ['CPP'], ['OBJ'], ['gcc']) -builtin.register_c_compiler('gcc.compile.c', ['C'], ['OBJ'], ['gcc']) -builtin.register_c_compiler('gcc.compile.asm', ['ASM'], ['OBJ'], ['gcc']) - -# pch support - -# The compiler looks for a precompiled header in each directory just before it -# looks for the include file in that directory. The name searched for is the -# name specified in the #include directive with ".gch" suffix appended. The -# logic in gcc-pch-generator will make sure that BASE_PCH suffix is appended to -# full name of the header. - -type.set_generated_target_suffix('PCH', ['gcc'], 'gch') - -# GCC-specific pch generator. -class GccPchGenerator(pch.PchGenerator): - - # Inherit the __init__ method - - def run_pch(self, project, name, prop_set, sources): - # Find the header in sources. Ignore any CPP sources. - header = None - for s in sources: - if type.is_derived(s.type, 'H'): - header = s - - # Error handling: Base header file name should be the same as the base - # precompiled header name. - header_name = header.name - header_basename = os.path.basename(header_name).rsplit('.', 1)[0] - if header_basename != name: - location = project.project_module - ###FIXME: - raise Exception() - ### errors.user-error "in" $(location)": pch target name `"$(name)"' should be the same as the base name of header file `"$(header-name)"'" ; - - pch_file = Generator.run(self, project, name, prop_set, [header]) - - # return result of base class and pch-file property as usage-requirements - # FIXME: what about multiple results from generator.run? - return (property_set.create('' + pch_file[0], '-Winvalid-pch'), - pch_file) - - # Calls the base version specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - def generated_targets(self, sources, prop_set, project, name = None): - name = sources[0].name - return Generator.generated_targets(self, sources, - prop_set, project, name) - -# Note: the 'H' source type will catch both '.h' header and '.hpp' header. The -# latter have HPP type, but HPP type is derived from H. The type of compilation -# is determined entirely by the destination type. -generators.register(GccPchGenerator('gcc.compile.c.pch', False, ['H'], ['C_PCH'], ['on', 'gcc' ])) -generators.register(GccPchGenerator('gcc.compile.c++.pch', False, ['H'], ['CPP_PCH'], ['on', 'gcc' ])) - -# Override default do-nothing generators. -generators.override('gcc.compile.c.pch', 'pch.default-c-pch-generator') -generators.override('gcc.compile.c++.pch', 'pch.default-cpp-pch-generator') - -flags('gcc.compile', 'PCH_FILE', ['on'], ['']) - -# Declare flags and action for compilation -flags('gcc.compile', 'OPTIONS', ['off'], ['-O0']) -flags('gcc.compile', 'OPTIONS', ['speed'], ['-O3']) -flags('gcc.compile', 'OPTIONS', ['space'], ['-Os']) - -flags('gcc.compile', 'OPTIONS', ['off'], ['-fno-inline']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Wno-inline']) -flags('gcc.compile', 'OPTIONS', ['full'], ['-finline-functions', '-Wno-inline']) - -flags('gcc.compile', 'OPTIONS', ['off'], ['-w']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Wall']) -flags('gcc.compile', 'OPTIONS', ['all'], ['-Wall', '-pedantic']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-Werror']) - -flags('gcc.compile', 'OPTIONS', ['on'], ['-g']) -flags('gcc.compile', 'OPTIONS', ['on'], ['-pg']) -flags('gcc.compile', 'OPTIONS', ['off'], ['-fno-rtti']) - -# On cygwin and mingw, gcc generates position independent code by default, and -# warns if -fPIC is specified. This might not be the right way of checking if -# we're using cygwin. For example, it's possible to run cygwin gcc from NT -# shell, or using crosscompiling. But we'll solve that problem when it's time. -# In that case we'll just add another parameter to 'init' and move this login -# inside 'init'. -if not os_name () in ['CYGWIN', 'NT']: - # This logic will add -fPIC for all compilations: - # - # lib a : a.cpp b ; - # obj b : b.cpp ; - # exe c : c.cpp a d ; - # obj d : d.cpp ; - # - # This all is fine, except that 'd' will be compiled with -fPIC even though - # it's not needed, as 'd' is used only in exe. However, it's hard to detect - # where a target is going to be used. Alternative, we can set -fPIC only - # when main target type is LIB but than 'b' will be compiled without -fPIC. - # In x86-64 that will lead to link errors. So, compile everything with - # -fPIC. - # - # Yet another alternative would be to create propagated - # feature, and set it when building shared libraries, but that's hard to - # implement and will increase target path length even more. - flags('gcc.compile', 'OPTIONS', ['shared'], ['-fPIC']) - -if os_name() != 'NT' and os_name() != 'OSF' and os_name() != 'HPUX': - # OSF does have an option called -soname but it doesn't seem to work as - # expected, therefore it has been disabled. - HAVE_SONAME = '' - SONAME_OPTION = '-h' - - -flags('gcc.compile', 'USER_OPTIONS', [], ['']) -flags('gcc.compile.c++', 'USER_OPTIONS',[], ['']) -flags('gcc.compile', 'DEFINES', [], ['']) -flags('gcc.compile', 'INCLUDES', [], ['']) - -engine = get_manager().engine() - -engine.register_action('gcc.compile.c++.pch', - '"$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"') - -engine.register_action('gcc.compile.c.pch', - '"$(CONFIG_COMMAND)" -x c-header $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"') - - -def gcc_compile_cpp(targets, sources, properties): - # Some extensions are compiled as C++ by default. For others, we need to - # pass -x c++. We could always pass -x c++ but distcc does not work with it. - extension = os.path.splitext (sources [0]) [1] - lang = '' - if not extension in ['.cc', '.cp', '.cxx', '.cpp', '.c++', '.C']: - lang = '-x c++' - get_manager().engine().set_target_variable (targets, 'LANG', lang) - engine.add_dependency(targets, bjam.call('get-target-variable', targets, 'PCH_FILE')) - -def gcc_compile_c(targets, sources, properties): - engine = get_manager().engine() - # If we use the name g++ then default file suffix -> language mapping does - # not work. So have to pass -x option. Maybe, we can work around this by - # allowing the user to specify both C and C++ compiler names. - #if $(>:S) != .c - #{ - engine.set_target_variable (targets, 'LANG', '-x c') - #} - engine.add_dependency(targets, bjam.call('get-target-variable', targets, 'PCH_FILE')) - -engine.register_action( - 'gcc.compile.c++', - '"$(CONFIG_COMMAND)" $(LANG) -ftemplate-depth-128 $(OPTIONS) ' + - '$(USER_OPTIONS) -D$(DEFINES) -I"$(PCH_FILE:D)" -I"$(INCLUDES)" ' + - '-c -o "$(<:W)" "$(>:W)"', - function=gcc_compile_cpp, - bound_list=['PCH_FILE']) - -engine.register_action( - 'gcc.compile.c', - '"$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) ' + - '-I"$(PCH_FILE:D)" -I"$(INCLUDES)" -c -o "$(<)" "$(>)"', - function=gcc_compile_c, - bound_list=['PCH_FILE']) - -def gcc_compile_asm(targets, sources, properties): - get_manager().engine().set_target_variable(targets, 'LANG', '-x assembler-with-cpp') - -engine.register_action( - 'gcc.compile.asm', - '"$(CONFIG_COMMAND)" $(LANG) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)"', - function=gcc_compile_asm) - - -class GccLinkingGenerator(unix.UnixLinkingGenerator): - """ - The class which check that we don't try to use the static - property while creating or using shared library, since it's not supported by - gcc/libc. - """ - def run(self, project, name, ps, sources): - # TODO: Replace this with the use of a target-os property. - - no_static_link = False - if bjam.variable('UNIX'): - no_static_link = True; - ##FIXME: what does this mean? -## { -## switch [ modules.peek : JAMUNAME ] -## { -## case * : no-static-link = true ; -## } -## } - - reason = None - if no_static_link and ps.get('runtime-link') == 'static': - if ps.get('link') == 'shared': - reason = "On gcc, DLL can't be build with 'static'." - elif type.is_derived(self.target_types[0], 'EXE'): - for s in sources: - source_type = s.type() - if source_type and type.is_derived(source_type, 'SHARED_LIB'): - reason = "On gcc, using DLLS together with the " +\ - "static options is not possible " - if reason: - print 'warning:', reason - print 'warning:',\ - "It is suggested to use 'static' together",\ - "with 'static'." ; - return - else: - generated_targets = unix.UnixLinkingGenerator.run(self, project, - name, ps, sources) - return generated_targets - -if on_windows(): - flags('gcc.link.dll', '.IMPLIB-COMMAND', [], ['-Wl,--out-implib,']) - generators.register( - GccLinkingGenerator('gcc.link', True, - ['OBJ', 'SEARCHED_LIB', 'STATIC_LIB', 'IMPORT_LIB'], - [ 'EXE' ], - [ 'gcc' ])) - generators.register( - GccLinkingGenerator('gcc.link.dll', True, - ['OBJ', 'SEARCHED_LIB', 'STATIC_LIB', 'IMPORT_LIB'], - ['IMPORT_LIB', 'SHARED_LIB'], - ['gcc'])) -else: - generators.register( - GccLinkingGenerator('gcc.link', True, - ['LIB', 'OBJ'], - ['EXE'], - ['gcc'])) - generators.register( - GccLinkingGenerator('gcc.link.dll', True, - ['LIB', 'OBJ'], - ['SHARED_LIB'], - ['gcc'])) - -# Declare flags for linking. -# First, the common flags. -flags('gcc.link', 'OPTIONS', ['on'], ['-g']) -flags('gcc.link', 'OPTIONS', ['on'], ['-pg']) -flags('gcc.link', 'USER_OPTIONS', [], ['']) -flags('gcc.link', 'LINKPATH', [], ['']) -flags('gcc.link', 'FINDLIBS-ST', [], ['']) -flags('gcc.link', 'FINDLIBS-SA', [], ['']) -flags('gcc.link', 'LIBRARIES', [], ['']) - -# For static we made sure there are no dynamic libraries in the -# link. On HP-UX not all system libraries exist as archived libraries (for -# example, there is no libunwind.a), so, on this platform, the -static option -# cannot be specified. -if os_name() != 'HPUX': - flags('gcc.link', 'OPTIONS', ['static'], ['-static']) - -# Now, the vendor specific flags. -# The parameter linker can be either gnu, darwin, osf, hpux or sun. -def init_link_flags(toolset, linker, condition): - """ - Now, the vendor specific flags. - The parameter linker can be either gnu, darwin, osf, hpux or sun. - """ - toolset_link = toolset + '.link' - if linker == 'gnu': - # Strip the binary when no debugging is needed. We use --strip-all flag - # as opposed to -s since icc (intel's compiler) is generally - # option-compatible with and inherits from the gcc toolset, but does not - # support -s. - - # FIXME: what does unchecked translate to? - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,--strip-all']) # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - flags(toolset_link, 'RPATH_LINK', condition, ['']) # : unchecked ; - flags(toolset_link, 'START-GROUP', condition, ['-Wl,--start-group'])# : unchecked ; - flags(toolset_link, 'END-GROUP', condition, ['-Wl,--end-group']) # : unchecked ; - - # gnu ld has the ability to change the search behaviour for libraries - # referenced by -l switch. These modifiers are -Bstatic and -Bdynamic - # and change search for -l switches that follow them. The following list - # shows the tried variants. - # The search stops at the first variant that has a match. - # *nix: -Bstatic -lxxx - # libxxx.a - # - # *nix: -Bdynamic -lxxx - # libxxx.so - # libxxx.a - # - # windows (mingw,cygwin) -Bstatic -lxxx - # libxxx.a - # xxx.lib - # - # windows (mingw,cygwin) -Bdynamic -lxxx - # libxxx.dll.a - # xxx.dll.a - # libxxx.a - # xxx.lib - # cygxxx.dll (*) - # libxxx.dll - # xxx.dll - # libxxx.a - # - # (*) This is for cygwin - # Please note that -Bstatic and -Bdynamic are not a guarantee that a - # static or dynamic lib indeed gets linked in. The switches only change - # search patterns! - - # On *nix mixing shared libs with static runtime is not a good idea. - flags(toolset_link, 'FINDLIBS-ST-PFX', - map(lambda x: x + '/shared', condition), - ['-Wl,-Bstatic']) # : unchecked ; - flags(toolset_link, 'FINDLIBS-SA-PFX', - map(lambda x: x + '/shared', condition), - ['-Wl,-Bdynamic']) # : unchecked ; - - # On windows allow mixing of static and dynamic libs with static - # runtime. - flags(toolset_link, 'FINDLIBS-ST-PFX', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bstatic']) # : unchecked ; - flags(toolset_link, 'FINDLIBS-SA-PFX', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bdynamic']) # : unchecked ; - flags(toolset_link, 'OPTIONS', - map(lambda x: x + '/static/windows', condition), - ['-Wl,-Bstatic']) # : unchecked ; - - elif linker == 'darwin': - # On Darwin, the -s option to ld does not work unless we pass -static, - # and passing -static unconditionally is a bad idea. So, don't pass -s. - # at all, darwin.jam will use separate 'strip' invocation. - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - flags(toolset_link, 'RPATH_LINK', condition, ['']) # : unchecked ; - - elif linker == 'osf': - # No --strip-all, just -s. - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,-s']) - # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - # This does not supports -R. - flags(toolset_link, 'RPATH_OPTION', condition, ['-rpath']) # : unchecked ; - # -rpath-link is not supported at all. - - elif linker == 'sun': - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), ['-Wl,-s']) - # : unchecked ; - flags(toolset_link, 'RPATH', condition, ['']) # : unchecked ; - # Solaris linker does not have a separate -rpath-link, but allows to use - # -L for the same purpose. - flags(toolset_link, 'LINKPATH', condition, ['']) # : unchecked ; - - # This permits shared libraries with non-PIC code on Solaris. - # VP, 2004/09/07: Now that we have -fPIC hardcode in link.dll, the - # following is not needed. Whether -fPIC should be hardcoded, is a - # separate question. - # AH, 2004/10/16: it is still necessary because some tests link against - # static libraries that were compiled without PIC. - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/shared', condition), ['-mimpure-text']) - # : unchecked ; - - elif linker == 'hpux': - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/off', condition), - ['-Wl,-s']) # : unchecked ; - flags(toolset_link, 'OPTIONS', map(lambda x: x + '/shared', condition), - ['-fPIC']) # : unchecked ; - - else: - # FIXME: - errors.user_error( - "$(toolset) initialization: invalid linker '$(linker)' " + - "The value '$(linker)' specified for is not recognized. " + - "Possible values are 'gnu', 'darwin', 'osf', 'hpux' or 'sun'") - -# Declare actions for linking. -def gcc_link(targets, sources, properties): - engine = get_manager().engine() - engine.set_target_variable(targets, 'SPACE', ' ') - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only gcc links, it might be a - # good idea to serialize all links. - engine.set_target_variable(targets, 'JAM_SEMAPHORE', 'gcc-link-semaphore') - -engine.register_action( - 'gcc.link', - '"$(CONFIG_COMMAND)" -L"$(LINKPATH)" ' + - '-Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" ' + - '-Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" ' + - '$(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) ' + - '-l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) ' + - '$(OPTIONS) $(USER_OPTIONS)', - function=gcc_link, - bound_list=['LIBRARIES']) - -# Default value. Mostly for the sake of intel-linux that inherits from gcc, but -# does not have the same logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble as on Linux, 'ar' is -# always available. -__AR = 'ar' - -flags('gcc.archive', 'AROPTIONS', [], ['']) - -def gcc_archive(targets, sources, properties): - # Always remove archive and start again. Here's rationale from - # - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. I moved a1.c to - # a2.c, updated my Jamfiles and rebuilt. My program was crashing with absurd - # errors. After some debugging I traced it back to the fact that a1.o was - # *still* in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM onto the - # archive action. That makes archives fail to build on NT when they have - # many files because it will no longer execute the action directly and blow - # the line length limit. Instead we remove the file in a different action, - # just before building the archive. - clean = targets[0] + '(clean)' - bjam.call('TEMPORARY', clean) - bjam.call('NOCARE', clean) - engine = get_manager().engine() - engine.set_target_variable('LOCATE', clean, bjam.call('get-target-variable', targets, 'LOCATE')) - engine.add_dependency(clean, sources) - engine.add_dependency(targets, clean) - engine.set_update_action('common.RmTemps', clean, targets) - -# Declare action for creating static libraries. -# The letter 'r' means to add files to the archive with replacement. Since we -# remove archive, we don't care about replacement, but there's no option "add -# without replacement". -# The letter 'c' suppresses the warning in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -engine.register_action('gcc.archive', - '"$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)"', - function=gcc_archive, - flags=['piecemeal']) - -def gcc_link_dll(targets, sources, properties): - engine = get_manager().engine() - engine.set_target_variable(targets, 'SPACE', ' ') - engine.set_target_variable(targets, 'JAM_SEMAPHORE', 'gcc-link-semaphore') - engine.set_target_variable(targets, "HAVE_SONAME", HAVE_SONAME) - engine.set_target_variable(targets, "SONAME_OPTION", SONAME_OPTION) - -engine.register_action( - 'gcc.link.dll', - # Differ from 'link' above only by -shared. - '"$(CONFIG_COMMAND)" -L"$(LINKPATH)" ' + - '-Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" ' + - '"$(.IMPLIB-COMMAND)$(<[1])" -o "$(<[-1])" ' + - '$(HAVE_SONAME)-Wl,$(SONAME_OPTION)$(SPACE)-Wl,$(<[-1]:D=) ' + - '-shared $(START-GROUP) "$(>)" "$(LIBRARIES)" $(FINDLIBS-ST-PFX) ' + - '-l$(FINDLIBS-ST) $(FINDLIBS-SA-PFX) -l$(FINDLIBS-SA) $(END-GROUP) ' + - '$(OPTIONS) $(USER_OPTIONS)', - function = gcc_link_dll, - bound_list=['LIBRARIES']) - -# Set up threading support. It's somewhat contrived, so perform it at the end, -# to avoid cluttering other code. - -if on_windows(): - flags('gcc', 'OPTIONS', ['multi'], ['-mthreads']) -elif bjam.variable('UNIX'): - jamuname = bjam.variable('JAMUNAME') - host_os_name = jamuname[0] - if host_os_name.startswith('SunOS'): - flags('gcc', 'OPTIONS', ['multi'], ['-pthreads']) - flags('gcc', 'FINDLIBS-SA', [], ['rt']) - elif host_os_name == 'BeOS': - # BeOS has no threading options, don't set anything here. - pass - elif host_os_name.endswith('BSD'): - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - # there is no -lrt on BSD - elif host_os_name == 'DragonFly': - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - # there is no -lrt on BSD - DragonFly is a FreeBSD variant, - # which anoyingly doesn't say it's a *BSD. - elif host_os_name == 'IRIX': - # gcc on IRIX does not support multi-threading, don't set anything here. - pass - elif host_os_name == 'Darwin': - # Darwin has no threading options, don't set anything here. - pass - else: - flags('gcc', 'OPTIONS', ['multi'], ['-pthread']) - flags('gcc', 'FINDLIBS-SA', [], ['rt']) - -def cpu_flags(toolset, variable, architecture, instruction_set, values, default=None): - #FIXME: for some reason this fails. Probably out of date feature code -## if default: -## flags(toolset, variable, -## ['' + architecture + '/'], -## values) - flags(toolset, variable, - #FIXME: same as above - [##'/' + instruction_set, - '' + architecture + '/' + instruction_set], - values) - -# Set architecture/instruction-set options. -# -# x86 and compatible -flags('gcc', 'OPTIONS', ['x86/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['x86/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i386', ['-march=i386'], default=True) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i486', ['-march=i486']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i586', ['-march=i586']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'i686', ['-march=i686']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium', ['-march=pentium']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium-mmx', ['-march=pentium-mmx']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentiumpro', ['-march=pentiumpro']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium2', ['-march=pentium2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium3', ['-march=pentium3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium3m', ['-march=pentium3m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium-m', ['-march=pentium-m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium4', ['-march=pentium4']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'pentium4m', ['-march=pentium4m']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'prescott', ['-march=prescott']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'nocona', ['-march=nocona']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6', ['-march=k6']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6-2', ['-march=k6-2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'k6-3', ['-march=k6-3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon', ['-march=athlon']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-tbird', ['-march=athlon-tbird']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-4', ['-march=athlon-4']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-xp', ['-march=athlon-xp']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-mp', ['-march=athlon-mp']) -## -cpu_flags('gcc', 'OPTIONS', 'x86', 'k8', ['-march=k8']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'opteron', ['-march=opteron']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon64', ['-march=athlon64']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'athlon-fx', ['-march=athlon-fx']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'winchip-c6', ['-march=winchip-c6']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'winchip2', ['-march=winchip2']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'c3', ['-march=c3']) -cpu_flags('gcc', 'OPTIONS', 'x86', 'c3-2', ['-march=c3-2']) -# Sparc -flags('gcc', 'OPTIONS', ['sparc/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['sparc/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'c3', ['-mcpu=c3'], default=True) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v7', ['-mcpu=v7']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'cypress', ['-mcpu=cypress']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v8', ['-mcpu=v8']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'supersparc', ['-mcpu=supersparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclite', ['-mcpu=sparclite']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'hypersparc', ['-mcpu=hypersparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclite86x', ['-mcpu=sparclite86x']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'f930', ['-mcpu=f930']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'f934', ['-mcpu=f934']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'sparclet', ['-mcpu=sparclet']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'tsc701', ['-mcpu=tsc701']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'v9', ['-mcpu=v9']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'ultrasparc', ['-mcpu=ultrasparc']) -cpu_flags('gcc', 'OPTIONS', 'sparc', 'ultrasparc3', ['-mcpu=ultrasparc3']) -# RS/6000 & PowerPC -flags('gcc', 'OPTIONS', ['power/32'], ['-m32']) -flags('gcc', 'OPTIONS', ['power/64'], ['-m64']) -cpu_flags('gcc', 'OPTIONS', 'power', '403', ['-mcpu=403']) -cpu_flags('gcc', 'OPTIONS', 'power', '505', ['-mcpu=505']) -cpu_flags('gcc', 'OPTIONS', 'power', '601', ['-mcpu=601']) -cpu_flags('gcc', 'OPTIONS', 'power', '602', ['-mcpu=602']) -cpu_flags('gcc', 'OPTIONS', 'power', '603', ['-mcpu=603']) -cpu_flags('gcc', 'OPTIONS', 'power', '603e', ['-mcpu=603e']) -cpu_flags('gcc', 'OPTIONS', 'power', '604', ['-mcpu=604']) -cpu_flags('gcc', 'OPTIONS', 'power', '604e', ['-mcpu=604e']) -cpu_flags('gcc', 'OPTIONS', 'power', '620', ['-mcpu=620']) -cpu_flags('gcc', 'OPTIONS', 'power', '630', ['-mcpu=630']) -cpu_flags('gcc', 'OPTIONS', 'power', '740', ['-mcpu=740']) -cpu_flags('gcc', 'OPTIONS', 'power', '7400', ['-mcpu=7400']) -cpu_flags('gcc', 'OPTIONS', 'power', '7450', ['-mcpu=7450']) -cpu_flags('gcc', 'OPTIONS', 'power', '750', ['-mcpu=750']) -cpu_flags('gcc', 'OPTIONS', 'power', '801', ['-mcpu=801']) -cpu_flags('gcc', 'OPTIONS', 'power', '821', ['-mcpu=821']) -cpu_flags('gcc', 'OPTIONS', 'power', '823', ['-mcpu=823']) -cpu_flags('gcc', 'OPTIONS', 'power', '860', ['-mcpu=860']) -cpu_flags('gcc', 'OPTIONS', 'power', '970', ['-mcpu=970']) -cpu_flags('gcc', 'OPTIONS', 'power', '8540', ['-mcpu=8540']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power', ['-mcpu=power']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power2', ['-mcpu=power2']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power3', ['-mcpu=power3']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power4', ['-mcpu=power4']) -cpu_flags('gcc', 'OPTIONS', 'power', 'power5', ['-mcpu=power5']) -cpu_flags('gcc', 'OPTIONS', 'power', 'powerpc', ['-mcpu=powerpc']) -cpu_flags('gcc', 'OPTIONS', 'power', 'powerpc64', ['-mcpu=powerpc64']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios', ['-mcpu=rios']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios1', ['-mcpu=rios1']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rios2', ['-mcpu=rios2']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rsc', ['-mcpu=rsc']) -cpu_flags('gcc', 'OPTIONS', 'power', 'rs64a', ['-mcpu=rs64']) -# AIX variant of RS/6000 & PowerPC -flags('gcc', 'OPTIONS', ['power/32/aix'], ['-maix32']) -flags('gcc', 'OPTIONS', ['power/64/aix'], ['-maix64']) -flags('gcc', 'AROPTIONS', ['power/64/aix'], ['-X 64']) diff --git a/jam-files/boost-build/tools/generate.jam b/jam-files/boost-build/tools/generate.jam deleted file mode 100644 index 6732fa35..00000000 --- a/jam-files/boost-build/tools/generate.jam +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Declares main target 'generate' used to produce targets by calling a -# user-provided rule that takes and produces virtual targets. - -import "class" : new ; -import errors ; -import feature ; -import project ; -import property ; -import property-set ; -import targets ; -import regex ; - - -feature.feature generating-rule : : free ; - - -class generated-target-class : basic-target -{ - import errors ; - import indirect ; - import virtual-target ; - - rule __init__ ( name : project : sources * : requirements * - : default-build * : usage-requirements * ) - { - basic-target.__init__ $(name) : $(project) : $(sources) - : $(requirements) : $(default-build) : $(usage-requirements) ; - - if ! [ $(self.requirements).get ] - { - errors.user-error "The generate rule requires the " - "property to be set" ; - } - } - - rule construct ( name : sources * : property-set ) - { - local result ; - local gr = [ $(property-set).get ] ; - - # FIXME: this is a copy-paste from virtual-target.jam. We should add a - # utility rule to call a rule like this. - local rule-name = [ MATCH ^@(.*) : $(gr) ] ; - if $(rule-name) - { - if $(gr[2]) - { - local target-name = [ full-name ] ; - errors.user-error "Multiple properties" - "encountered for target $(target-name)." ; - } - - result = [ indirect.call $(rule-name) $(self.project) $(name) - : $(property-set) : $(sources) ] ; - - if ! $(result) - { - ECHO "warning: Unable to construct" [ full-name ] ; - } - } - - local ur ; - local targets ; - - if $(result) - { - if [ class.is-a $(result[1]) : property-set ] - { - ur = $(result[1]) ; - targets = $(result[2-]) ; - } - else - { - ur = [ property-set.empty ] ; - targets = $(result) ; - } - } - # FIXME: the following loop should be doable using sequence.transform or - # some similar utility rule. - local rt ; - for local t in $(targets) - { - rt += [ virtual-target.register $(t) ] ; - } - return $(ur) $(rt) ; - } -} - - -rule generate ( name : sources * : requirements * : default-build * - : usage-requirements * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new generated-target-class $(name) : $(project) - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) : $(project) ] - ] ; -} - -IMPORT $(__name__) : generate : : generate ; diff --git a/jam-files/boost-build/tools/gettext.jam b/jam-files/boost-build/tools/gettext.jam deleted file mode 100644 index 99a43ffe..00000000 --- a/jam-files/boost-build/tools/gettext.jam +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module support GNU gettext internationalization utilities. -# -# It provides two main target rules: 'gettext.catalog', used for -# creating machine-readable catalogs from translations files, and -# 'gettext.update', used for update translation files from modified -# sources. -# -# To add i18n support to your application you should follow these -# steps. -# -# - Decide on a file name which will contain translations and -# what main target name will be used to update it. For example:: -# -# gettext.update update-russian : russian.po a.cpp my_app ; -# -# - Create the initial translation file by running:: -# -# bjam update-russian -# -# - Edit russian.po. For example, you might change fields like LastTranslator. -# -# - Create a main target for final message catalog:: -# -# gettext.catalog russian : russian.po ; -# -# The machine-readable catalog will be updated whenever you update -# "russian.po". The "russian.po" file will be updated only on explicit -# request. When you're ready to update translations, you should -# -# - Run:: -# -# bjam update-russian -# -# - Edit "russian.po" in appropriate editor. -# -# The next bjam run will convert "russian.po" into machine-readable form. -# -# By default, translations are marked by 'i18n' call. The 'gettext.keyword' -# feature can be used to alter this. - - -import targets ; -import property-set ; -import virtual-target ; -import "class" : new ; -import project ; -import type ; -import generators ; -import errors ; -import feature : feature ; -import toolset : flags ; -import regex ; - -.path = "" ; - -# Initializes the gettext module. -rule init ( path ? # Path where all tools are located. If not specified, - # they should be in PATH. - ) -{ - if $(.initialized) && $(.path) != $(path) - { - errors.error "Attempt to reconfigure with different path" ; - } - .initialized = true ; - if $(path) - { - .path = $(path)/ ; - } -} - -# Creates a main target 'name', which, when updated, will cause -# file 'existing-translation' to be updated with translations -# extracted from 'sources'. It's possible to specify main target -# in sources --- it which case all target from dependency graph -# of those main targets will be scanned, provided they are of -# appropricate type. The 'gettext.types' feature can be used to -# control the types. -# -# The target will be updated only if explicitly requested on the -# command line. -rule update ( name : existing-translation sources + : requirements * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(name) : $(project) : gettext.UPDATE : - $(existing-translation) $(sources) - : [ targets.main-target-requirements $(requirements) : $(project) ] - ] ; - $(project).mark-target-as-explicit $(name) ; -} - - -# The human editable source, containing translation. -type.register gettext.PO : po ; -# The machine readable message catalog. -type.register gettext.catalog : mo ; -# Intermediate type produce by extracting translations from -# sources. -type.register gettext.POT : pot ; -# Pseudo type used to invoke update-translations generator -type.register gettext.UPDATE ; - -# Identifies the keyword that should be used when scanning sources. -# Default: i18n -feature gettext.keyword : : free ; -# Contains space-separated list of sources types which should be scanned. -# Default: "C CPP" -feature gettext.types : : free ; - -generators.register-standard gettext.compile : gettext.PO : gettext.catalog ; - -class update-translations-generator : generator -{ - import regex : split ; - import property-set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - # The rule should be called with at least two sources. The first source - # is the translation (.po) file to update. The remaining sources are targets - # which should be scanned for new messages. All sources files for those targets - # will be found and passed to the 'xgettext' utility, which extracts the - # messages for localization. Those messages will be merged to the .po file. - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local types = [ $(property-set).get ] ; - types ?= "C CPP" ; - types = [ regex.split $(types) " " ] ; - - local keywords = [ $(property-set).get ] ; - property-set = [ property-set.create $(keywords:G=) ] ; - - # First deterime the list of sources that must be scanned for - # messages. - local all-sources ; - # CONSIDER: I'm not sure if the logic should be the same as for 'stage': - # i.e. following dependency properties as well. - for local s in $(sources[2-]) - { - all-sources += [ virtual-target.traverse $(s) : : include-sources ] ; - } - local right-sources ; - for local s in $(all-sources) - { - if [ $(s).type ] in $(types) - { - right-sources += $(s) ; - } - } - - local .constructed ; - if $(right-sources) - { - # Create the POT file, which will contain list of messages extracted - # from the sources. - local extract = - [ new action $(right-sources) : gettext.extract : $(property-set) ] ; - local new-messages = [ new file-target $(name) : gettext.POT - : $(project) : $(extract) ] ; - - # Create a notfile target which will update the existing translation file - # with new messages. - local a = [ new action $(sources[1]) $(new-messages) - : gettext.update-po-dispatch ] ; - local r = [ new notfile-target $(name) : $(project) : $(a) ] ; - .constructed = [ virtual-target.register $(r) ] ; - } - else - { - errors.error "No source could be scanned by gettext tools" ; - } - return $(.constructed) ; - } -} -generators.register [ new update-translations-generator gettext.update : : gettext.UPDATE ] ; - -flags gettext.extract KEYWORD ; -actions extract -{ - $(.path)xgettext -k$(KEYWORD:E=i18n) -o $(<) $(>) -} - -# Does realy updating of po file. The tricky part is that -# we're actually updating one of the sources: -# $(<) is the NOTFILE target we're updating -# $(>[1]) is the PO file to be really updated. -# $(>[2]) is the PO file created from sources. -# -# When file to be updated does not exist (during the -# first run), we need to copy the file created from sources. -# In all other cases, we need to update the file. -rule update-po-dispatch -{ - NOCARE $(>[1]) ; - gettext.create-po $(<) : $(>) ; - gettext.update-po $(<) : $(>) ; - _ on $(<) = " " ; - ok on $(<) = "" ; - EXISTING_PO on $(<) = $(>[1]) ; -} - -# Due to fancy interaction of existing and updated, this rule can be called with -# one source, in which case we copy the lonely source into EXISTING_PO, or with -# two sources, in which case the action body expands to nothing. I'd really like -# to have "missing" action modifier. -actions quietly existing updated create-po bind EXISTING_PO -{ - cp$(_)"$(>[1])"$(_)"$(EXISTING_PO)"$($(>[2]:E=ok)) -} - -actions updated update-po bind EXISTING_PO -{ - $(.path)msgmerge$(_)-U$(_)"$(EXISTING_PO)"$(_)"$(>[1])" -} - -actions gettext.compile -{ - $(.path)msgfmt -o $(<) $(>) -} - -IMPORT $(__name__) : update : : gettext.update ; diff --git a/jam-files/boost-build/tools/gfortran.jam b/jam-files/boost-build/tools/gfortran.jam deleted file mode 100644 index 0aa69b85..00000000 --- a/jam-files/boost-build/tools/gfortran.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags gfortran OPTIONS ; - -flags gfortran OPTIONS off : -O0 ; -flags gfortran OPTIONS speed : -O3 ; -flags gfortran OPTIONS space : -Os ; - -flags gfortran OPTIONS on : -g ; -flags gfortran OPTIONS on : -pg ; - -flags gfortran OPTIONS shared/LIB : -fPIC ; - -flags gfortran DEFINES ; -flags gfortran INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - gcc -Wall $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler gfortran.compile.fortran : FORTRAN FORTRAN90 : OBJ ; diff --git a/jam-files/boost-build/tools/hp_cxx.jam b/jam-files/boost-build/tools/hp_cxx.jam deleted file mode 100644 index 86cd783e..00000000 --- a/jam-files/boost-build/tools/hp_cxx.jam +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2001 David Abrahams. -# Copyright 2004, 2005 Markus Schoepflin. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# -# HP CXX compiler -# See http://h30097.www3.hp.com/cplus/?jumpid=reg_R1002_USEN -# -# -# Notes on this toolset: -# -# - Because of very subtle issues with the default ansi mode, strict_ansi mode -# is used for compilation. One example of things that don't work correctly in -# the default ansi mode is overload resolution of function templates when -# mixed with non-template functions. -# -# - For template instantiation "-timplicit_local" is used. Previously, -# "-tlocal" has been tried to avoid the need for a template repository -# but this doesn't work with manually instantiated templates. "-tweak" -# has not been used to avoid the stream of warning messages issued by -# ar or ld when creating a library or linking an application. -# -# - Debug symbols are generated with "-g3", as this works both in debug and -# release mode. When compiling C++ code without optimization, we additionally -# use "-gall", which generates full symbol table information for all classes, -# structs, and unions. As this turns off optimization, it can't be used when -# optimization is needed. -# - -import feature generators common ; -import toolset : flags ; - -feature.extend toolset : hp_cxx ; -feature.extend c++abi : cxxarm ; - -# Inherit from Unix toolset to get library ordering magic. -toolset.inherit hp_cxx : unix ; - -generators.override hp_cxx.prebuilt : builtin.lib-generator ; -generators.override hp_cxx.prebuilt : builtin.prebuilt ; -generators.override hp_cxx.searched-lib-generator : searched-lib-generator ; - - -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters hp_cxx : version $(version) ] ; - - local command = [ common.get-invocation-command hp_cxx : cxx : $(command) ] ; - - if $(command) - { - local root = [ common.get-absolute-tool-path $(command[-1]) ] ; - - if $(root) - { - flags hp_cxx .root $(condition) : "\"$(root)\"/" ; - } - } - # If we can't find 'cxx' anyway, at least show 'cxx' in the commands - command ?= cxx ; - - common.handle-options hp_cxx : $(condition) : $(command) : $(options) ; -} - -generators.register-c-compiler hp_cxx.compile.c++ : CPP : OBJ : hp_cxx ; -generators.register-c-compiler hp_cxx.compile.c : C : OBJ : hp_cxx ; - - - -# No static linking as far as I can tell. -# flags cxx LINKFLAGS static : -bstatic ; -flags hp_cxx.compile OPTIONS on : -g3 ; -flags hp_cxx.compile OPTIONS off/on : -gall ; -flags hp_cxx.link OPTIONS on : -g ; -flags hp_cxx.link OPTIONS off : -s ; - -flags hp_cxx.compile OPTIONS off : -O0 ; -flags hp_cxx.compile OPTIONS speed/on : -O2 ; -flags hp_cxx.compile OPTIONS speed : -O2 ; - -# This (undocumented) macro needs to be defined to get all C function -# overloads required by the C++ standard. -flags hp_cxx.compile.c++ OPTIONS : -D__CNAME_OVERLOADS ; - -# Added for threading support -flags hp_cxx.compile OPTIONS multi : -pthread ; -flags hp_cxx.link OPTIONS multi : -pthread ; - -flags hp_cxx.compile OPTIONS space/on : size ; -flags hp_cxx.compile OPTIONS space : -O1 ; -flags hp_cxx.compile OPTIONS off : -inline none ; - -# The compiler versions tried (up to V6.5-040) hang when compiling Boost code -# with full inlining enabled. So leave it at the default level for now. -# -# flags hp_cxx.compile OPTIONS full : -inline all ; - -flags hp_cxx.compile OPTIONS on : -pg ; -flags hp_cxx.link OPTIONS on : -pg ; - -# Selection of the object model. This flag is needed on both the C++ compiler -# and linker command line. - -# Unspecified ABI translates to '-model ansi' as most -# standard-conforming. -flags hp_cxx.compile.c++ OPTIONS : -model ansi : : hack-hack ; -flags hp_cxx.compile.c++ OPTIONS cxxarm : -model arm ; -flags hp_cxx.link OPTIONS : -model ansi : : hack-hack ; -flags hp_cxx.link OPTIONS cxxarm : -model arm ; - -# Display a descriptive tag together with each compiler message. This tag can -# be used by the user to explicitely suppress the compiler message. -flags hp_cxx.compile OPTIONS : -msg_display_tag ; - -flags hp_cxx.compile OPTIONS ; -flags hp_cxx.compile.c++ OPTIONS ; -flags hp_cxx.compile DEFINES ; -flags hp_cxx.compile INCLUDES ; -flags hp_cxx.link OPTIONS ; - -flags hp_cxx.link LIBPATH ; -flags hp_cxx.link LIBRARIES ; -flags hp_cxx.link FINDLIBS-ST ; -flags hp_cxx.link FINDLIBS-SA ; - -flags hp_cxx.compile.c++ TEMPLATE_DEPTH ; - -actions link bind LIBRARIES -{ - $(CONFIG_COMMAND) -noimplicit_include $(OPTIONS) -o "$(<)" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lrt -lm -} - -# When creating dynamic libraries, we don't want to be warned about unresolved -# symbols, therefore all unresolved symbols are marked as expected by -# '-expect_unresolved *'. This also mirrors the behaviour of the GNU tool -# chain. - -actions link.dll bind LIBRARIES -{ - $(CONFIG_COMMAND) -shared -expect_unresolved \* -noimplicit_include $(OPTIONS) -o "$(<[1])" -L$(LIBPATH) "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) -lm -} - - -# Note: Relaxed ANSI mode (-std) is used for compilation because in strict ANSI -# C89 mode (-std1) the compiler doesn't accept C++ comments in C files. As -std -# is the default, no special flag is needed. -actions compile.c -{ - $(.root:E=)cc -c $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)" -} - -# Note: The compiler is forced to compile the files as C++ (-x cxx) because -# otherwise it will silently ignore files with no file extension. -# -# Note: We deliberately don't suppress any warnings on the compiler command -# line, the user can always do this in a customized toolset later on. - -rule compile.c++ -{ - # We preprocess the TEMPLATE_DEPTH command line option here because we found - # no way to do it correctly in the actual action code. There we either get - # the -pending_instantiations parameter when no c++-template-depth property - # has been specified or we get additional quotes around - # "-pending_instantiations ". - local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ; - TEMPLATE_DEPTH on $(1) = "-pending_instantiations "$(template-depth) ; -} - -actions compile.c++ -{ - $(CONFIG_COMMAND) -x cxx -c -std strict_ansi -nopure_cname -noimplicit_include -timplicit_local -ptr "$(<[1]:D)/cxx_repository" $(OPTIONS) $(TEMPLATE_DEPTH) -D$(DEFINES) -I"$(INCLUDES)" -o "$(<)" "$(>)" -} - -# Always create archive from scratch. See the gcc toolet for rationale. -RM = [ common.rm-command ] ; -actions together piecemeal archive -{ - $(RM) "$(<)" - ar rc $(<) $(>) -} diff --git a/jam-files/boost-build/tools/hpfortran.jam b/jam-files/boost-build/tools/hpfortran.jam deleted file mode 100644 index 96e8d18b..00000000 --- a/jam-files/boost-build/tools/hpfortran.jam +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags hpfortran OPTIONS off : -O0 ; -flags hpfortran OPTIONS speed : -O3 ; -flags hpfortran OPTIONS space : -O1 ; - -flags hpfortran OPTIONS on : -g ; -flags hpfortran OPTIONS on : -pg ; - -flags hpfortran DEFINES ; -flags hpfortran INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - f77 +DD64 $(OPTIONS) -D$(DEFINES) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler hpfortran.compile.fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/ifort.jam b/jam-files/boost-build/tools/ifort.jam deleted file mode 100644 index eb7c1988..00000000 --- a/jam-files/boost-build/tools/ifort.jam +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags ifort OPTIONS ; - -flags ifort OPTIONS off : /Od ; -flags ifort OPTIONS speed : /O3 ; -flags ifort OPTIONS space : /O1 ; - -flags ifort OPTIONS on : /debug:full ; -flags ifort OPTIONS on : /Qprof_gen ; - -flags ifort.compile FFLAGS off/shared : /MD ; -flags ifort.compile FFLAGS on/shared : /MDd ; -flags ifort.compile FFLAGS off/static/single : /ML ; -flags ifort.compile FFLAGS on/static/single : /MLd ; -flags ifort.compile FFLAGS off/static/multi : /MT ; -flags ifort.compile FFLAGS on/static/multi : /MTd ; - -flags ifort DEFINES ; -flags ifort INCLUDES ; - -rule compile.fortran -{ -} - -actions compile.fortran -{ - ifort $(FFLAGS) $(OPTIONS) /names:lowercase /D$(DEFINES) /I"$(INCLUDES)" /c /object:"$(<)" "$(>)" -} - -generators.register-fortran-compiler ifort.compile.fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/intel-darwin.jam b/jam-files/boost-build/tools/intel-darwin.jam deleted file mode 100644 index aa0fd8fb..00000000 --- a/jam-files/boost-build/tools/intel-darwin.jam +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -import intel ; -import feature : feature ; -import os ; -import toolset ; -import toolset : flags ; -import gcc ; -import common ; -import errors ; -import generators ; - -feature.extend-subfeature toolset intel : platform : darwin ; - -toolset.inherit-generators intel-darwin - intel darwin - : gcc - # Don't inherit PCH generators. They were not tested, and probably - # don't work for this compiler. - : gcc.mingw.link gcc.mingw.link.dll gcc.compile.c.pch gcc.compile.c++.pch - ; - -generators.override intel-darwin.prebuilt : builtin.lib-generator ; -generators.override intel-darwin.prebuilt : builtin.prebuilt ; -generators.override intel-darwin.searched-lib-generator : searched-lib-generator ; - -toolset.inherit-rules intel-darwin : gcc ; -toolset.inherit-flags intel-darwin : gcc - : off on full space - off all on - x86/32 - x86/64 - ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# vectorization diagnostics -feature vectorize : off on full ; - -# Initializes the intel-darwin toolset -# version in mandatory -# name (default icc) is used to invoke the specified intel complier -# compile and link options allow you to specify addition command line options for each version -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters intel-darwin - : version $(version) ] ; - - command = [ common.get-invocation-command intel-darwin : icc - : $(command) : /opt/intel_cc_80/bin ] ; - - common.handle-options intel-darwin : $(condition) : $(command) : $(options) ; - - gcc.init-link-flags intel-darwin darwin $(condition) ; - - # handle - # local library-path = [ feature.get-values : $(options) ] ; - # flags intel-darwin.link USER_OPTIONS $(condition) : [ feature.get-values : $(options) ] ; - - local root = [ feature.get-values : $(options) ] ; - local bin ; - if $(command) || $(root) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - - if $(root) - { - # Libraries required to run the executable may be in either - # $(root)/lib (10.1 and earlier) - # or - # $(root)/lib/architecture-name (11.0 and later: - local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ; - if $(.debug-configuration) - { - ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ; - } - flags intel-darwin.link RUN_PATH $(condition) : $(lib_path) ; - } - } - - local m = [ MATCH (..).* : $(version) ] ; - local n = [ MATCH (.)\\. : $(m) ] ; - if $(n) { - m = $(n) ; - } - - local major = $(m) ; - - if $(major) = "9" { - flags intel-darwin.compile OPTIONS $(condition)/off : -Ob0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -Ob1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -Ob2 ; - flags intel-darwin.compile OPTIONS $(condition)/off : -vec-report0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -vec-report1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -vec-report5 ; - flags intel-darwin.link OPTIONS $(condition)/static : -static -static-libcxa -lstdc++ -lpthread ; - flags intel-darwin.link OPTIONS $(condition)/shared : -shared-libcxa -lstdc++ -lpthread ; - } - else { - flags intel-darwin.compile OPTIONS $(condition)/off : -inline-level=0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -inline-level=1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -inline-level=2 ; - flags intel-darwin.compile OPTIONS $(condition)/off : -vec-report0 ; - flags intel-darwin.compile OPTIONS $(condition)/on : -vec-report1 ; - flags intel-darwin.compile OPTIONS $(condition)/full : -vec-report5 ; - flags intel-darwin.link OPTIONS $(condition)/static : -static -static-intel -lstdc++ -lpthread ; - flags intel-darwin.link OPTIONS $(condition)/shared : -shared-intel -lstdc++ -lpthread ; - } - - local minor = [ MATCH ".*\\.(.).*" : $(version) ] ; - - # wchar_t char_traits workaround for compilers older than 10.2 - if $(major) = "9" || ( $(major) = "10" && ( $(minor) = "0" || $(minor) = "1" ) ) { - flags intel-darwin.compile DEFINES $(condition) : __WINT_TYPE__=int : unchecked ; - } -} - -SPACE = " " ; - -flags intel-darwin.compile OPTIONS ; -flags intel-darwin.compile OPTIONS ; -# flags intel-darwin.compile INCLUDES ; - -flags intel-darwin.compile OPTIONS space : -O1 ; # no specific space optimization flag in icc - -# -cpu-type-em64t = prescott nocona ; -flags intel-darwin.compile OPTIONS $(cpu-type-em64t)/32 : -m32 ; # -mcmodel=small ; -flags intel-darwin.compile OPTIONS $(cpu-type-em64t)/64 : -m64 ; # -mcmodel=large ; - -flags intel-darwin.compile.c OPTIONS off : -w0 ; -flags intel-darwin.compile.c OPTIONS on : -w1 ; -flags intel-darwin.compile.c OPTIONS all : -w2 ; - -flags intel-darwin.compile.c++ OPTIONS off : -w0 ; -flags intel-darwin.compile.c++ OPTIONS on : -w1 ; -flags intel-darwin.compile.c++ OPTIONS all : -w2 ; - -actions compile.c -{ - "$(CONFIG_COMMAND)" -xc $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -xc++ $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -flags intel-darwin ARFLAGS ; - -# Default value. Mostly for the sake of intel-linux -# that inherits from gcc, but does not has the same -# logic to set the .AR variable. We can put the same -# logic in intel-linux, but that's hardly worth the trouble -# as on Linux, 'ar' is always available. -.AR = ar ; - -rule archive ( targets * : sources * : properties * ) -{ - # Always remove archive and start again. Here's rationale from - # Andre Hentz: - # - # I had a file, say a1.c, that was included into liba.a. - # I moved a1.c to a2.c, updated my Jamfiles and rebuilt. - # My program was crashing with absurd errors. - # After some debugging I traced it back to the fact that a1.o was *still* - # in liba.a - # - # Rene Rivera: - # - # Originally removing the archive was done by splicing an RM - # onto the archive action. That makes archives fail to build on NT - # when they have many files because it will no longer execute the - # action directly and blow the line length limit. Instead we - # remove the file in a different action, just before the building - # of the archive. - # - local clean.a = $(targets[1])(clean) ; - TEMPORARY $(clean.a) ; - NOCARE $(clean.a) ; - LOCATE on $(clean.a) = [ on $(targets[1]) return $(LOCATE) ] ; - DEPENDS $(clean.a) : $(sources) ; - DEPENDS $(targets) : $(clean.a) ; - common.RmTemps $(clean.a) : $(targets) ; -} - -actions piecemeal archive -{ - "$(.AR)" $(AROPTIONS) rc "$(<)" "$(>)" - "ranlib" -cs "$(<)" -} - -flags intel-darwin.link USER_OPTIONS ; - -# Declare actions for linking -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since - # running N links in parallel is just slower. - JAM_SEMAPHORE on $(targets) = intel-darwin-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(USER_OPTIONS) -L"$(LINKPATH)" -o "$(<)" -single_module -dynamiclib -install_name "$(<[1]:D=)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) -} diff --git a/jam-files/boost-build/tools/intel-linux.jam b/jam-files/boost-build/tools/intel-linux.jam deleted file mode 100644 index d9164add..00000000 --- a/jam-files/boost-build/tools/intel-linux.jam +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (c) 2003 Michael Stevens -# Copyright (c) 2011 Bryce Lelbach -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import toolset ; -import feature ; -import toolset : flags ; - -import intel ; -import gcc ; -import common ; -import errors ; -import generators ; -import type ; -import numbers ; - -feature.extend-subfeature toolset intel : platform : linux ; - -toolset.inherit-generators intel-linux - intel linux : gcc : gcc.mingw.link gcc.mingw.link.dll ; -generators.override intel-linux.prebuilt : builtin.lib-generator ; -generators.override intel-linux.prebuilt : builtin.prebuilt ; -generators.override intel-linux.searched-lib-generator : searched-lib-generator ; - -# Override default do-nothing generators. -generators.override intel-linux.compile.c.pch : pch.default-c-pch-generator ; -generators.override intel-linux.compile.c++.pch : pch.default-cpp-pch-generator ; - -type.set-generated-target-suffix PCH : intel linux : pchi ; - -toolset.inherit-rules intel-linux : gcc ; -toolset.inherit-flags intel-linux : gcc - : off on full - space speed - off all on - ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Initializes the intel-linux toolset -# version in mandatory -# name (default icpc) is used to invoke the specified intel-linux complier -# compile and link options allow you to specify addition command line options for each version -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters intel-linux - : version $(version) ] ; - - if $(.debug-configuration) - { - ECHO "notice: intel-linux version is" $(version) ; - } - - local default_path ; - - # Intel C++ Composer XE 2011 for Linux, aka Intel C++ Compiler XE 12.0, - # aka intel-linux-12.0. In this version, Intel thankfully decides to install - # to a sane 'intel' folder in /opt. - if [ MATCH "(12[.]0|12)" : $(version) ] - { default_path = /opt/intel/bin ; } - # Intel C++ Compiler 11.1. - else if [ MATCH "(11[.]1)" : $(version) ] - { default_path = /opt/intel_cce_11.1.064.x86_64/bin ; } - # Intel C++ Compiler 11.0. - else if [ MATCH "(11[.]0|11)" : $(version) ] - { default_path = /opt/intel_cce_11.0.074.x86_64/bin ; } - # Intel C++ Compiler 10.1. - else if [ MATCH "(10[.]1)" : $(version) ] - { default_path = /opt/intel_cce_10.1.013_x64/bin ; } - # Intel C++ Compiler 9.1. - else if [ MATCH "(9[.]1)" : $(version) ] - { default_path = /opt/intel_cc_91/bin ; } - # Intel C++ Compiler 9.0. - else if [ MATCH "(9[.]0|9)" : $(version) ] - { default_path = /opt/intel_cc_90/bin ; } - # Intel C++ Compiler 8.1. - else if [ MATCH "(8[.]1)" : $(version) ] - { default_path = /opt/intel_cc_81/bin ; } - # Intel C++ Compiler 8.0 - this used to be the default, so now it's the - # fallback. - else - { default_path = /opt/intel_cc_80/bin ; } - - if $(.debug-configuration) - { - ECHO "notice: default search path for intel-linux is" $(default_path) ; - } - - command = [ common.get-invocation-command intel-linux : icpc - : $(command) : $(default_path) ] ; - - common.handle-options intel-linux : $(condition) : $(command) : $(options) ; - - gcc.init-link-flags intel-linux gnu $(condition) ; - - local root = [ feature.get-values : $(options) ] ; - local bin ; - if $(command) || $(root) - { - bin ?= [ common.get-absolute-tool-path $(command[-1]) ] ; - root ?= $(bin:D) ; - - local command-string = $(command:J=" ") ; - local version-output = [ SHELL "$(command-string) --version" ] ; - local real-version = [ MATCH "([0-9.]+)" : $(version-output) ] ; - local major = [ MATCH "([0-9]+).*" : $(real-version) ] ; - - # If we failed to determine major version, use the behaviour for - # the current compiler. - if $(major) && [ numbers.less $(major) 10 ] - { - flags intel-linux.compile OPTIONS $(condition)/off : "-Ob0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-Ob1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-Ob2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-O1" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - else if $(major) && [ numbers.less $(major) 11 ] - { - flags intel-linux.compile OPTIONS $(condition)/off : "-inline-level=0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-inline-level=1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-inline-level=2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-O1" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - else # newer version of intel do have -Os (at least 11+, don't know about 10) - { - flags intel-linux.compile OPTIONS $(condition)/off : "-inline-level=0" ; - flags intel-linux.compile OPTIONS $(condition)/on : "-inline-level=1" ; - flags intel-linux.compile OPTIONS $(condition)/full : "-inline-level=2" ; - flags intel-linux.compile OPTIONS $(condition)/space : "-Os" ; - flags intel-linux.compile OPTIONS $(condition)/speed : "-O3 -ip" ; - } - - if $(root) - { - # Libraries required to run the executable may be in either - # $(root)/lib (10.1 and earlier) - # or - # $(root)/lib/architecture-name (11.0 and later: - local lib_path = $(root)/lib $(root:P)/lib/$(bin:B) ; - if $(.debug-configuration) - { - ECHO notice: using intel libraries :: $(condition) :: $(lib_path) ; - } - flags intel-linux.link RUN_PATH $(condition) : $(lib_path) ; - } - } -} - -SPACE = " " ; - -flags intel-linux.compile OPTIONS off : -w0 ; -flags intel-linux.compile OPTIONS on : -w1 ; -flags intel-linux.compile OPTIONS all : -w2 ; - -rule compile.c++ ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -actions compile.c++ bind PCH_FILE -{ - "$(CONFIG_COMMAND)" -c -xc++ $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)" -} - -rule compile.c ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - DEPENDS $(<) : [ on $(<) return $(PCH_FILE) ] ; -} - -actions compile.c bind PCH_FILE -{ - "$(CONFIG_COMMAND)" -c -xc $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -use-pch"$(PCH_FILE)" -c -o "$(<)" "$(>)" -} - -rule compile.c++.pch ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; -} -# -# Compiling a pch first deletes any existing *.pchi file, as Intel's compiler -# won't over-write an existing pch: instead it creates filename$1.pchi, filename$2.pchi -# etc - which appear not to do anything except take up disk space :-( -# -actions compile.c++.pch -{ - rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c++-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)" -} - -actions compile.fortran -{ - "ifort" -c $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.c.pch ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-fpic $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; -} - -actions compile.c.pch -{ - rm -f "$(<)" && "$(CONFIG_COMMAND)" -x c-header $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -pch-create "$(<)" "$(>)" -} - -rule link ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = intel-linux-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS) -} - -rule link.dll ( targets * : sources * : properties * ) -{ - gcc.setup-threading $(targets) : $(sources) : $(properties) ; - gcc.setup-address-model $(targets) : $(sources) : $(properties) ; - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = intel-linux-link-semaphore ; -} - -# Differ from 'link' above only by -shared. -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) $(OPTIONS) $(USER_OPTIONS) -} - - - diff --git a/jam-files/boost-build/tools/intel-win.jam b/jam-files/boost-build/tools/intel-win.jam deleted file mode 100644 index 691b5dce..00000000 --- a/jam-files/boost-build/tools/intel-win.jam +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# Importing common is needed because the rules we inherit here depend on it. -# That is nasty. -import common ; -import errors ; -import feature ; -import intel ; -import msvc ; -import os ; -import toolset ; -import generators ; -import type ; - -feature.extend-subfeature toolset intel : platform : win ; - -toolset.inherit-generators intel-win intel win : msvc ; -toolset.inherit-flags intel-win : msvc : : YLOPTION ; -toolset.inherit-rules intel-win : msvc ; - -# Override default do-nothing generators. -generators.override intel-win.compile.c.pch : pch.default-c-pch-generator ; -generators.override intel-win.compile.c++.pch : pch.default-cpp-pch-generator ; -generators.override intel-win.compile.rc : rc.compile.resource ; -generators.override intel-win.compile.mc : mc.compile ; - -toolset.flags intel-win.compile PCH_SOURCE on : ; - -toolset.add-requirements intel-win,shared:multi ; - -# Initializes the intel toolset for windows -rule init ( version ? : # the compiler version - command * : # the command to invoke the compiler itself - options * # Additional option: - # either 'vc6', 'vc7', 'vc7.1' - # or 'native'(default). - ) -{ - local compatibility = - [ feature.get-values : $(options) ] ; - local condition = [ common.check-init-parameters intel-win - : version $(version) : compatibility $(compatibility) ] ; - - command = [ common.get-invocation-command intel-win : icl.exe : - $(command) ] ; - - common.handle-options intel-win : $(condition) : $(command) : $(options) ; - - local root ; - if $(command) - { - root = [ common.get-absolute-tool-path $(command[-1]) ] ; - root = $(root)/ ; - } - - local setup ; - setup = [ GLOB $(root) : iclvars_*.bat ] ; - if ! $(setup) - { - setup = $(root)/iclvars.bat ; - } - setup = "call \""$(setup)"\" > nul " ; - - if [ os.name ] = NT - { - setup = $(setup)" -" ; - } - else - { - setup = "cmd /S /C "$(setup)" \"&&\" " ; - } - - toolset.flags intel-win.compile .CC $(condition) : $(setup)icl ; - toolset.flags intel-win.link .LD $(condition) : $(setup)xilink ; - toolset.flags intel-win.archive .LD $(condition) : $(setup)xilink /lib ; - toolset.flags intel-win.link .MT $(condition) : $(setup)mt -nologo ; - toolset.flags intel-win.compile .MC $(condition) : $(setup)mc ; - toolset.flags intel-win.compile .RC $(condition) : $(setup)rc ; - - local m = [ MATCH (.).* : $(version) ] ; - local major = $(m[1]) ; - - local C++FLAGS ; - - C++FLAGS += /nologo ; - - # Reduce the number of spurious error messages - C++FLAGS += /Qwn5 /Qwd985 ; - - # Enable ADL - C++FLAGS += -Qoption,c,--arg_dep_lookup ; #"c" works for C++, too - - # Disable Microsoft "secure" overloads in Dinkumware libraries since they - # cause compile errors with Intel versions 9 and 10. - C++FLAGS += -D_SECURE_SCL=0 ; - - if $(major) > 5 - { - C++FLAGS += /Zc:forScope ; # Add support for correct for loop scoping. - } - - # Add options recognized only by intel7 and above. - if $(major) >= 7 - { - C++FLAGS += /Qansi_alias ; - } - - if $(compatibility) = vc6 - { - C++FLAGS += - # Emulate VC6 - /Qvc6 - - # No wchar_t support in vc6 dinkum library. Furthermore, in vc6 - # compatibility-mode, wchar_t is not a distinct type from unsigned - # short. - -DBOOST_NO_INTRINSIC_WCHAR_T - ; - } - else - { - if $(major) > 5 - { - # Add support for wchar_t - C++FLAGS += /Zc:wchar_t - # Tell the dinkumware library about it. - -D_NATIVE_WCHAR_T_DEFINED - ; - } - } - - if $(compatibility) && $(compatibility) != native - { - C++FLAGS += /Q$(base-vc) ; - } - else - { - C++FLAGS += - -Qoption,cpp,--arg_dep_lookup - # The following options were intended to disable the Intel compiler's - # 'bug-emulation' mode, but were later reported to be causing ICE with - # Intel-Win 9.0. It is not yet clear which options can be safely used. - # -Qoption,cpp,--const_string_literals - # -Qoption,cpp,--new_for_init - # -Qoption,cpp,--no_implicit_typename - # -Qoption,cpp,--no_friend_injection - # -Qoption,cpp,--no_microsoft_bugs - ; - } - - toolset.flags intel-win CFLAGS $(condition) : $(C++FLAGS) ; - # By default, when creating PCH, intel adds 'i' to the explicitly - # specified name of the PCH file. Of course, Boost.Build is not - # happy when compiler produces not the file it was asked for. - # The option below stops this behaviour. - toolset.flags intel-win CFLAGS : -Qpchi- ; - - if ! $(compatibility) - { - # If there's no backend version, assume 7.1. - compatibility = vc7.1 ; - } - - local extract-version = [ MATCH ^vc(.*) : $(compatibility) ] ; - if ! $(extract-version) - { - errors.user-error "Invalid value for compatibility option:" - $(compatibility) ; - } - - # Depending on the settings, running of tests require some runtime DLLs. - toolset.flags intel-win RUN_PATH $(condition) : $(root) ; - - msvc.configure-version-specific intel-win : $(extract-version[1]) : $(condition) ; -} - -toolset.flags intel-win.link LIBRARY_OPTION intel : "" ; - -toolset.flags intel-win YLOPTION ; - diff --git a/jam-files/boost-build/tools/intel.jam b/jam-files/boost-build/tools/intel.jam deleted file mode 100644 index 67038aa2..00000000 --- a/jam-files/boost-build/tools/intel.jam +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# This is a generic 'intel' toolset. Depending on the current -# system, it forwards either to 'intel-linux' or 'intel-win' -# modules. - -import feature ; -import os ; -import toolset ; - -feature.extend toolset : intel ; -feature.subfeature toolset intel : platform : : propagated link-incompatible ; - -rule init ( * : * ) -{ - if [ os.name ] = LINUX - { - toolset.using intel-linux : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else if [ os.name ] = MACOSX - { - toolset.using intel-darwin : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else - { - toolset.using intel-win : - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } -} diff --git a/jam-files/boost-build/tools/lex.jam b/jam-files/boost-build/tools/lex.jam deleted file mode 100644 index 75d64131..00000000 --- a/jam-files/boost-build/tools/lex.jam +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -import type ; -import generators ; -import feature ; -import property ; - - -feature.feature flex.prefix : : free ; -type.register LEX : l ; -type.register LEX++ : ll ; -generators.register-standard lex.lex : LEX : C ; -generators.register-standard lex.lex : LEX++ : CPP ; - -rule init ( ) -{ -} - -rule lex ( target : source : properties * ) -{ - local r = [ property.select flex.prefix : $(properties) ] ; - if $(r) - { - PREFIX on $(<) = $(r:G=) ; - } -} - -actions lex -{ - flex -P$(PREFIX) -o$(<) $(>) -} diff --git a/jam-files/boost-build/tools/make.jam b/jam-files/boost-build/tools/make.jam deleted file mode 100644 index 08567285..00000000 --- a/jam-files/boost-build/tools/make.jam +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2003 Douglas Gregor -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'make' main target rule. - -import "class" : new ; -import errors : error ; -import project ; -import property ; -import property-set ; -import regex ; -import targets ; - - -class make-target-class : basic-target -{ - import type regex virtual-target ; - import "class" : new ; - - rule __init__ ( name : project : sources * : requirements * - : default-build * : usage-requirements * ) - { - basic-target.__init__ $(name) : $(project) : $(sources) : - $(requirements) : $(default-build) : $(usage-requirements) ; - } - - rule construct ( name : source-targets * : property-set ) - { - local action-name = [ $(property-set).get ] ; - # 'm' will always be set -- we add '@' ourselves in the 'make' rule - # below. - local m = [ MATCH ^@(.*) : $(action-name) ] ; - - local a = [ new action $(source-targets) : $(m[1]) : $(property-set) ] ; - local t = [ new file-target $(self.name) exact : [ type.type - $(self.name) ] : $(self.project) : $(a) ] ; - return [ property-set.empty ] [ virtual-target.register $(t) ] ; - } -} - - -# Declares the 'make' main target. -# -rule make ( target-name : sources * : generating-rule + : requirements * : - usage-requirements * ) -{ - local project = [ project.current ] ; - - # The '@' sign causes the feature.jam module to qualify rule name with the - # module name of current project, if needed. - local m = [ MATCH ^(@).* : $(generating-rule) ] ; - if ! $(m) - { - generating-rule = @$(generating-rule) ; - } - requirements += $(generating-rule) ; - - targets.main-target-alternative - [ new make-target-class $(target-name) : $(project) - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build : $(project) ] - : [ targets.main-target-usage-requirements $(usage-requirements) : - $(project) ] ] ; -} - - -IMPORT $(__name__) : make : : make ; diff --git a/jam-files/boost-build/tools/make.py b/jam-files/boost-build/tools/make.py deleted file mode 100644 index 10baa1cb..00000000 --- a/jam-files/boost-build/tools/make.py +++ /dev/null @@ -1,59 +0,0 @@ -# Status: ported. -# Base revision: 64068 - -# Copyright 2003 Dave Abrahams -# Copyright 2003 Douglas Gregor -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'make' main target rule. - -from b2.build.targets import BasicTarget -from b2.build.virtual_target import Action, FileTarget -from b2.build import type -from b2.manager import get_manager -import b2.build.property_set - - -class MakeTarget(BasicTarget): - - def construct(self, name, source_targets, property_set): - - action_name = property_set.get("")[0] - action = Action(get_manager(), source_targets, action_name[1:], property_set) - target = FileTarget(self.name(), type.type(self.name()), - self.project(), action, exact=True) - return [ b2.build.property_set.empty(), - [self.project().manager().virtual_targets().register(target)]] - -def make (target_name, sources, generating_rule, - requirements=None, usage_requirements=None): - - target_name = target_name[0] - generating_rule = generating_rule[0] - if generating_rule[0] != '@': - generating_rule = '@' + generating_rule - - if not requirements: - requirements = [] - - - requirements.append("%s" % generating_rule) - - m = get_manager() - targets = m.targets() - project = m.projects().current() - engine = m.engine() - engine.register_bjam_action(generating_rule) - - targets.main_target_alternative(MakeTarget( - target_name, project, - targets.main_target_sources(sources, target_name), - targets.main_target_requirements(requirements, project), - targets.main_target_default_build([], project), - targets.main_target_usage_requirements(usage_requirements or [], project))) - -get_manager().projects().add_rule("make", make) - diff --git a/jam-files/boost-build/tools/mc.jam b/jam-files/boost-build/tools/mc.jam deleted file mode 100644 index 57837773..00000000 --- a/jam-files/boost-build/tools/mc.jam +++ /dev/null @@ -1,44 +0,0 @@ -#~ Copyright 2005 Alexey Pakhunov. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Support for Microsoft message compiler tool. -# Notes: -# - there's just message compiler tool, there's no tool for -# extracting message strings from sources -# - This file allows to use Microsoft message compiler -# with any toolset. In msvc.jam, there's more specific -# message compiling action. - -import common ; -import generators ; -import feature : feature get-values ; -import toolset : flags ; -import type ; -import rc ; - -rule init ( ) -{ -} - -type.register MC : mc ; - - -# Command line options -feature mc-input-encoding : ansi unicode : free ; -feature mc-output-encoding : unicode ansi : free ; -feature mc-set-customer-bit : no yes : free ; - -flags mc.compile MCFLAGS ansi : -a ; -flags mc.compile MCFLAGS unicode : -u ; -flags mc.compile MCFLAGS ansi : -A ; -flags mc.compile MCFLAGS unicode : -U ; -flags mc.compile MCFLAGS no : ; -flags mc.compile MCFLAGS yes : -c ; - -generators.register-standard mc.compile : MC : H RC ; - -actions compile -{ - mc $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)" -} diff --git a/jam-files/boost-build/tools/message.jam b/jam-files/boost-build/tools/message.jam deleted file mode 100644 index 212d8542..00000000 --- a/jam-files/boost-build/tools/message.jam +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2008 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines main target type 'message', that prints a message when built for the -# first time. - -import project ; -import "class" : new ; -import targets ; -import property-set ; - -class message-target-class : basic-target -{ - rule __init__ ( name-and-dir : project : * ) - { - basic-target.__init__ $(name-and-dir) : $(project) ; - self.3 = $(3) ; - self.4 = $(4) ; - self.5 = $(5) ; - self.6 = $(6) ; - self.7 = $(7) ; - self.8 = $(8) ; - self.9 = $(9) ; - self.built = ; - } - - rule construct ( name : source-targets * : property-set ) - { - if ! $(self.built) - { - for i in 3 4 5 6 7 8 9 - { - if $(self.$(i)) - { - ECHO $(self.$(i)) ; - } - } - self.built = 1 ; - } - - return [ property-set.empty ] ; - } -} - - -rule message ( name : * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new message-target-class $(name) : $(project) - : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) ] ; -} -IMPORT $(__name__) : message : : message ; \ No newline at end of file diff --git a/jam-files/boost-build/tools/message.py b/jam-files/boost-build/tools/message.py deleted file mode 100644 index cc0b946f..00000000 --- a/jam-files/boost-build/tools/message.py +++ /dev/null @@ -1,46 +0,0 @@ -# Status: ported. -# Base revision: 64488. -# -# Copyright 2008, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines main target type 'message', that prints a message when built for the -# first time. - -import b2.build.targets as targets -import b2.build.property_set as property_set - -from b2.manager import get_manager - -class MessageTargetClass(targets.BasicTarget): - - def __init__(self, name, project, *args): - - targets.BasicTarget.__init__(self, name, project, []) - self.args = args - self.built = False - - def construct(self, name, sources, ps): - - if not self.built: - for arg in self.args: - if type(arg) == type([]): - arg = " ".join(arg) - print arg - self.built = True - - return (property_set.empty(), []) - -def message(name, *args): - - if type(name) == type([]): - name = name[0] - - t = get_manager().targets() - - project = get_manager().projects().current() - - return t.main_target_alternative(MessageTargetClass(*((name, project) + args))) - -get_manager().projects().add_rule("message", message) diff --git a/jam-files/boost-build/tools/midl.jam b/jam-files/boost-build/tools/midl.jam deleted file mode 100644 index 0aa5dda3..00000000 --- a/jam-files/boost-build/tools/midl.jam +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2005 Alexey Pakhunov. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Microsoft Interface Definition Language (MIDL) related routines - -import common ; -import generators ; -import feature : feature get-values ; -import os ; -import scanner ; -import toolset : flags ; -import type ; - -rule init ( ) -{ -} - -type.register IDL : idl ; - -# A type library (.tlb) is generated by MIDL compiler and can be included -# to resources of an application (.rc). In order to be found by a resource -# compiler its target type should be derived from 'H' - otherwise -# the property '' will be ignored. -type.register MSTYPELIB : tlb : H ; - - -# Register scanner for MIDL files -class midl-scanner : scanner -{ - import path property-set regex scanner type virtual-target ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - self.includes = $(includes) ; - - # List of quoted strings - self.re-strings = "[ \t]*\"([^\"]*)\"([ \t]*,[ \t]*\"([^\"]*)\")*[ \t]*" ; - - # 'import' and 'importlib' directives - self.re-import = "import"$(self.re-strings)"[ \t]*;" ; - self.re-importlib = "importlib[ \t]*[(]"$(self.re-strings)"[)][ \t]*;" ; - - # C preprocessor 'include' directive - self.re-include-angle = "#[ \t]*include[ \t]*<(.*)>" ; - self.re-include-quoted = "#[ \t]*include[ \t]*\"(.*)\"" ; - } - - rule pattern ( ) - { - # Match '#include', 'import' and 'importlib' directives - return "((#[ \t]*include|import(lib)?).+(<(.*)>|\"(.*)\").+)" ; - } - - rule process ( target : matches * : binding ) - { - local included-angle = [ regex.transform $(matches) : $(self.re-include-angle) : 1 ] ; - local included-quoted = [ regex.transform $(matches) : $(self.re-include-quoted) : 1 ] ; - local imported = [ regex.transform $(matches) : $(self.re-import) : 1 3 ] ; - local imported_tlbs = [ regex.transform $(matches) : $(self.re-importlib) : 1 3 ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - local g2 = $(g)"#"$(b) ; - - included-angle = $(included-angle:G=$(g)) ; - included-quoted = $(included-quoted:G=$(g2)) ; - imported = $(imported:G=$(g2)) ; - imported_tlbs = $(imported_tlbs:G=$(g2)) ; - - local all = $(included-angle) $(included-quoted) $(imported) ; - - INCLUDES $(target) : $(all) ; - DEPENDS $(target) : $(imported_tlbs) ; - NOCARE $(all) $(imported_tlbs) ; - SEARCH on $(included-angle) = $(self.includes:G=) ; - SEARCH on $(included-quoted) = $(b) $(self.includes:G=) ; - SEARCH on $(imported) = $(b) $(self.includes:G=) ; - SEARCH on $(imported_tlbs) = $(b) $(self.includes:G=) ; - - scanner.propagate - [ type.get-scanner CPP : [ property-set.create $(self.includes) ] ] : - $(included-angle) $(included-quoted) : $(target) ; - - scanner.propagate $(__name__) : $(imported) : $(target) ; - } -} - -scanner.register midl-scanner : include ; -type.set-scanner IDL : midl-scanner ; - - -# Command line options -feature midl-stubless-proxy : yes no : propagated ; -feature midl-robust : yes no : propagated ; - -flags midl.compile.idl MIDLFLAGS yes : /Oicf ; -flags midl.compile.idl MIDLFLAGS no : /Oic ; -flags midl.compile.idl MIDLFLAGS yes : /robust ; -flags midl.compile.idl MIDLFLAGS no : /no_robust ; - -# Architecture-specific options -architecture-x86 = x86 ; -address-model-32 = 32 ; -address-model-64 = 64 ; - -flags midl.compile.idl MIDLFLAGS $(architecture-x86)/$(address-model-32) : /win32 ; -flags midl.compile.idl MIDLFLAGS $(architecture-x86)/64 : /x64 ; -flags midl.compile.idl MIDLFLAGS ia64/$(address-model-64) : /ia64 ; - - -flags midl.compile.idl DEFINES ; -flags midl.compile.idl UNDEFS ; -flags midl.compile.idl INCLUDES ; - - -generators.register-c-compiler midl.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) ; - - -# MIDL does not always generate '%_proxy.c' and '%_dlldata.c'. This behavior -# depends on contents of the source IDL file. Calling TOUCH_FILE below ensures -# that both files will be created so bjam will not try to recreate them -# constantly. -TOUCH_FILE = [ common.file-touch-command ] ; - -actions compile.idl -{ - midl /nologo @"@($(<[1]:W).rsp:E=$(nl)"$(>:W)" $(nl)-D$(DEFINES) $(nl)"-I$(INCLUDES)" $(nl)-U$(UNDEFS) $(nl)$(MIDLFLAGS) $(nl)/tlb "$(<[1]:W)" $(nl)/h "$(<[2]:W)" $(nl)/iid "$(<[3]:W)" $(nl)/proxy "$(<[4]:W)" $(nl)/dlldata "$(<[5]:W)")" - $(TOUCH_FILE) "$(<[4]:W)" - $(TOUCH_FILE) "$(<[5]:W)" -} diff --git a/jam-files/boost-build/tools/mipspro.jam b/jam-files/boost-build/tools/mipspro.jam deleted file mode 100644 index 417eaefc..00000000 --- a/jam-files/boost-build/tools/mipspro.jam +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import fortran ; -import type ; -import common ; - -feature.extend toolset : mipspro ; -toolset.inherit mipspro : unix ; -generators.override mipspro.prebuilt : builtin.lib-generator ; -generators.override mipspro.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.sgi.com/products/software/irix/tools/ - -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters mipspro : version $(version) ] ; - - command = [ common.get-invocation-command mipspro : CC : $(command) ] ; - - common.handle-options mipspro : $(condition) : $(command) : $(options) ; - - command_c = $(command_c[1--2]) $(command[-1]:B=cc) ; - - toolset.flags mipspro CONFIG_C_COMMAND $(condition) : $(command_c) ; - - # fortran support - local command = [ - common.get-invocation-command mipspro : f77 : $(command) : $(install_dir) ] ; - - command_f = $(command_f[1--2]) $(command[-1]:B=f77) ; - toolset.flags mipspro CONFIG_F_COMMAND $(condition) : $(command_f) ; - - # set link flags - flags mipspro.link FINDLIBS-ST : [ - feature.get-values : $(options) ] : unchecked ; - - flags mipspro.link FINDLIBS-SA : [ - feature.get-values : $(options) ] : unchecked ; -} - -# Declare generators -generators.register-c-compiler mipspro.compile.c : C : OBJ : mipspro ; -generators.register-c-compiler mipspro.compile.c++ : CPP : OBJ : mipspro ; -generators.register-fortran-compiler mipspro.compile.fortran : FORTRAN : OBJ : mipspro ; - -cpu-arch-32 = - / - /32 ; - -cpu-arch-64 = - /64 ; - -flags mipspro.compile OPTIONS $(cpu-arch-32) : -n32 ; -flags mipspro.compile OPTIONS $(cpu-arch-64) : -64 ; - -# Declare flags and actions for compilation -flags mipspro.compile OPTIONS on : -g ; -# flags mipspro.compile OPTIONS on : -xprofile=tcov ; -flags mipspro.compile OPTIONS off : -w ; -flags mipspro.compile OPTIONS on : -ansiW -diag_suppress 1429 ; # suppress long long is nonstandard warning -flags mipspro.compile OPTIONS all : -fullwarn ; -flags mipspro.compile OPTIONS speed : -Ofast ; -flags mipspro.compile OPTIONS space : -O2 ; -flags mipspro.compile OPTIONS : -LANG:std ; -flags mipspro.compile.c++ OPTIONS off : -INLINE:none ; -flags mipspro.compile.c++ OPTIONS ; -flags mipspro.compile DEFINES ; -flags mipspro.compile INCLUDES ; - - -flags mipspro.compile.fortran OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags mipspro.link OPTIONS on : -g ; -# Strip the binary when no debugging is needed -# flags mipspro.link OPTIONS off : -s ; -# flags mipspro.link OPTIONS on : -xprofile=tcov ; -# flags mipspro.link OPTIONS multi : -mt ; - -flags mipspro.link OPTIONS $(cpu-arch-32) : -n32 ; -flags mipspro.link OPTIONS $(cpu-arch-64) : -64 ; - -flags mipspro.link OPTIONS speed : -Ofast ; -flags mipspro.link OPTIONS space : -O2 ; -flags mipspro.link OPTIONS ; -flags mipspro.link LINKPATH ; -flags mipspro.link FINDLIBS-ST ; -flags mipspro.link FINDLIBS-SA ; -flags mipspro.link FINDLIBS-SA multi : pthread ; -flags mipspro.link LIBRARIES ; -flags mipspro.link LINK-RUNTIME static : static ; -flags mipspro.link LINK-RUNTIME shared : dynamic ; -flags mipspro.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -FE:template_in_elf_section -ptused $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -lm -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Declare action for creating static libraries -actions piecemeal archive -{ - ar -cr "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/mpi.jam b/jam-files/boost-build/tools/mpi.jam deleted file mode 100644 index 0fe490be..00000000 --- a/jam-files/boost-build/tools/mpi.jam +++ /dev/null @@ -1,583 +0,0 @@ -# Support for the Message Passing Interface (MPI) -# -# (C) Copyright 2005, 2006 Trustees of Indiana University -# (C) Copyright 2005 Douglas Gregor -# -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.) -# -# Authors: Douglas Gregor -# Andrew Lumsdaine -# -# ==== MPI Configuration ==== -# -# For many users, MPI support can be enabled simply by adding the following -# line to your user-config.jam file: -# -# using mpi ; -# -# This should auto-detect MPI settings based on the MPI wrapper compiler in -# your path, e.g., "mpic++". If the wrapper compiler is not in your path, or -# has a different name, you can pass the name of the wrapper compiler as the -# first argument to the mpi module: -# -# using mpi : /opt/mpich2-1.0.4/bin/mpiCC ; -# -# If your MPI implementation does not have a wrapper compiler, or the MPI -# auto-detection code does not work with your MPI's wrapper compiler, -# you can pass MPI-related options explicitly via the second parameter to the -# mpi module: -# -# using mpi : : lammpio lammpi++ -# mpi lam -# dl ; -# -# To see the results of MPI auto-detection, pass "--debug-configuration" on -# the bjam command line. -# -# The (optional) fourth argument configures Boost.MPI for running -# regression tests. These parameters specify the executable used to -# launch jobs (default: "mpirun") followed by any necessary arguments -# to this to run tests and tell the program to expect the number of -# processors to follow (default: "-np"). With the default parameters, -# for instance, the test harness will execute, e.g., -# -# mpirun -np 4 all_gather_test -# -# ==== Linking Against the MPI Libraries === -# -# To link against the MPI libraries, import the "mpi" module and add the -# following requirement to your target: -# -# /mpi//mpi -# -# Since MPI support is not always available, you should check -# "mpi.configured" before trying to link against the MPI libraries. - -import "class" : new ; -import common ; -import feature : feature ; -import generators ; -import os ; -import project ; -import property ; -import testing ; -import toolset ; -import type ; -import path ; - -# Make this module a project -project.initialize $(__name__) ; -project mpi ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Assuming the first part of the command line is the given prefix -# followed by some non-empty value, remove the first argument. Returns -# either nothing (if there was no prefix or no value) or a pair -# -# value rest-of-cmdline -# -# This is a subroutine of cmdline_to_features -rule add_feature ( prefix name cmdline ) -{ - local match = [ MATCH "^$(prefix)([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - - # If there was no value associated with the prefix, abort - if ! $(match) { - return ; - } - - local value = $(match[1]) ; - - if [ MATCH " +" : $(value) ] { - value = "\"$(value)\"" ; - } - - return "<$(name)>$(value)" $(match[2]) ; -} - -# Strip any end-of-line characters off the given string and return the -# result. -rule strip-eol ( string ) -{ - local match = [ MATCH "^(([A-Za-z0-9~`\.!@#$%^&*()_+={};:'\",.<>/?\\| -]|[|])*).*$" : $(string) ] ; - - if $(match) - { - return $(match[1]) ; - } - else - { - return $(string) ; - } -} - -# Split a command-line into a set of features. Certain kinds of -# compiler flags are recognized (e.g., -I, -D, -L, -l) and replaced -# with their Boost.Build equivalents (e.g., , , -# , ). All other arguments are introduced -# using the features in the unknown-features parameter, because we -# don't know how to deal with them. For instance, if your compile and -# correct. The incoming command line should be a string starting with -# an executable (e.g., g++ -I/include/path") and may contain any -# number of command-line arguments thereafter. The result is a list of -# features corresponding to the given command line, ignoring the -# executable. -rule cmdline_to_features ( cmdline : unknown-features ? ) -{ - local executable ; - local features ; - local otherflags ; - local result ; - - unknown-features ?= ; - - # Pull the executable out of the command line. At this point, the - # executable is just thrown away. - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - executable = $(match[1]) ; - cmdline = $(match[2]) ; - - # List the prefix/feature pairs that we will be able to transform. - # Every kind of parameter not mentioned here will be placed in both - # cxxflags and linkflags, because we don't know where they should go. - local feature_kinds-D = "define" ; - local feature_kinds-I = "include" ; - local feature_kinds-L = "library-path" ; - local feature_kinds-l = "find-shared-library" ; - - while $(cmdline) { - - # Check for one of the feature prefixes we know about. If we - # find one (and the associated value is nonempty), convert it - # into a feature. - local match = [ MATCH "^(-.)(.*)" : $(cmdline) ] ; - local matched ; - if $(match) && $(match[2]) { - local prefix = $(match[1]) ; - if $(feature_kinds$(prefix)) { - local name = $(feature_kinds$(prefix)) ; - local add = [ add_feature $(prefix) $(name) $(cmdline) ] ; - - if $(add) { - - if $(add[1]) = pthread - { - # Uhm. It's not really nice that this MPI implementation - # uses -lpthread as opposed to -pthread. We do want to - # set multi, instead of -lpthread. - result += "multi" ; - MPI_EXTRA_REQUIREMENTS += "multi" ; - } - else - { - result += $(add[1]) ; - } - - cmdline = $(add[2]) ; - matched = yes ; - } - } - } - - # If we haven't matched a feature prefix, just grab the command-line - # argument itself. If we can map this argument to a feature - # (e.g., -pthread -> multi), then do so; otherwise, - # and add it to the list of "other" flags that we don't - # understand. - if ! $(matched) { - match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" : $(cmdline) ] ; - local value = $(match[1]) ; - cmdline = $(match[2]) ; - - # Check for multithreading support - if $(value) = "-pthread" || $(value) = "-pthreads" - { - result += "multi" ; - - # DPG: This is a hack intended to work around a BBv2 bug where - # requirements propagated from libraries are not checked for - # conflicts when BBv2 determines which "common" properties to - # apply to a target. In our case, the single property - # gets propagated from the common properties to Boost.MPI - # targets, even though multi is in the usage - # requirements of /mpi//mpi. - MPI_EXTRA_REQUIREMENTS += "multi" ; - } - else if [ MATCH "(.*[a-zA-Z0-9<>?-].*)" : $(value) ] { - otherflags += $(value) ; - } - } - } - - # If there are other flags that we don't understand, add them to the - # result as both and - if $(otherflags) { - for unknown in $(unknown-features) - { - result += "$(unknown)$(otherflags:J= )" ; - } - } - - return $(result) ; -} - -# Determine if it is safe to execute the given shell command by trying -# to execute it and determining whether the exit code is zero or -# not. Returns true for an exit code of zero, false otherwise. -local rule safe-shell-command ( cmdline ) -{ - local result = [ SHELL "$(cmdline) > /dev/null 2>/dev/null; if [ "$?" -eq "0" ]; then echo SSCOK; fi" ] ; - return [ MATCH ".*(SSCOK).*" : $(result) ] ; -} - -# Initialize the MPI module. -rule init ( mpicxx ? : options * : mpirun-with-options * ) -{ - if ! $(options) && $(.debug-configuration) - { - ECHO "===============MPI Auto-configuration===============" ; - } - - if ! $(mpicxx) && [ os.on-windows ] - { - # Try to auto-configure to the Microsoft Compute Cluster Pack - local cluster_pack_path_native = "C:\\Program Files\\Microsoft Compute Cluster Pack" ; - local cluster_pack_path = [ path.make $(cluster_pack_path_native) ] ; - if [ GLOB $(cluster_pack_path_native)\\Include : mpi.h ] - { - if $(.debug-configuration) - { - ECHO "Found Microsoft Compute Cluster Pack: $(cluster_pack_path_native)" ; - } - - # Pick up either the 32-bit or 64-bit library, depending on which address - # model the user has selected. Default to 32-bit. - options = $(cluster_pack_path)/Include - 64:$(cluster_pack_path)/Lib/amd64 - $(cluster_pack_path)/Lib/i386 - msmpi - msvc:_SECURE_SCL=0 - ; - - # Setup the "mpirun" equivalent (mpiexec) - .mpirun = "\"$(cluster_pack_path_native)\\Bin\\mpiexec.exe"\" ; - .mpirun_flags = -n ; - } - else if $(.debug-configuration) - { - ECHO "Did not find Microsoft Compute Cluster Pack in $(cluster_pack_path_native)." ; - } - } - - if ! $(options) - { - # Try to auto-detect options based on the wrapper compiler - local command = [ common.get-invocation-command mpi : mpic++ : $(mpicxx) ] ; - - if ! $(mpicxx) && ! $(command) - { - # Try "mpiCC", which is used by MPICH - command = [ common.get-invocation-command mpi : mpiCC ] ; - } - - if ! $(mpicxx) && ! $(command) - { - # Try "mpicxx", which is used by OpenMPI and MPICH2 - command = [ common.get-invocation-command mpi : mpicxx ] ; - } - - local result ; - local compile_flags ; - local link_flags ; - - if ! $(command) - { - # Do nothing: we'll complain later - } - # OpenMPI and newer versions of LAM-MPI have -showme:compile and - # -showme:link. - else if [ safe-shell-command "$(command) -showme:compile" ] && - [ safe-shell-command "$(command) -showme:link" ] - { - if $(.debug-configuration) - { - ECHO "Found recent LAM-MPI or Open MPI wrapper compiler: $(command)" ; - } - - compile_flags = [ SHELL "$(command) -showme:compile" ] ; - link_flags = [ SHELL "$(command) -showme:link" ] ; - - # Prepend COMPILER as the executable name, to match the format of - # other compilation commands. - compile_flags = "COMPILER $(compile_flags)" ; - link_flags = "COMPILER $(link_flags)" ; - } - # Look for LAM-MPI's -showme - else if [ safe-shell-command "$(command) -showme" ] - { - if $(.debug-configuration) - { - ECHO "Found older LAM-MPI wrapper compiler: $(command)" ; - } - - result = [ SHELL "$(command) -showme" ] ; - } - # Look for MPICH - else if [ safe-shell-command "$(command) -show" ] - { - if $(.debug-configuration) - { - ECHO "Found MPICH wrapper compiler: $(command)" ; - } - compile_flags = [ SHELL "$(command) -compile_info" ] ; - link_flags = [ SHELL "$(command) -link_info" ] ; - } - # Sun HPC and Ibm POE - else if [ SHELL "$(command) -v 2>/dev/null" ] - { - compile_flags = [ SHELL "$(command) -c -v -xtarget=native64 2>/dev/null" ] ; - - local back = [ MATCH "--------------------(.*)" : $(compile_flags) ] ; - if $(back) - { - # Sun HPC - if $(.debug-configuration) - { - ECHO "Found Sun MPI wrapper compiler: $(command)" ; - } - - compile_flags = [ MATCH "(.*)--------------------" : $(back) ] ; - compile_flags = [ MATCH "(.*)-v" : $(compile_flags) ] ; - link_flags = [ SHELL "$(command) -v -xtarget=native64 2>/dev/null" ] ; - link_flags = [ MATCH "--------------------(.*)" : $(link_flags) ] ; - link_flags = [ MATCH "(.*)--------------------" : $(link_flags) ] ; - - # strip out -v from compile options - local front = [ MATCH "(.*)-v" : $(link_flags) ] ; - local back = [ MATCH "-v(.*)" : $(link_flags) ] ; - link_flags = "$(front) $(back)" ; - front = [ MATCH "(.*)-xtarget=native64" : $(link_flags) ] ; - back = [ MATCH "-xtarget=native64(.*)" : $(link_flags) ] ; - link_flags = "$(front) $(back)" ; - } - else - { - # Ibm POE - if $(.debug-configuration) - { - ECHO "Found IBM MPI wrapper compiler: $(command)" ; - } - - # - compile_flags = [ SHELL "$(command) -c -v 2>/dev/null" ] ; - compile_flags = [ MATCH "(.*)exec: export.*" : $(compile_flags) ] ; - local front = [ MATCH "(.*)-v" : $(compile_flags) ] ; - local back = [ MATCH "-v(.*)" : $(compile_flags) ] ; - compile_flags = "$(front) $(back)" ; - front = [ MATCH "(.*)-c" : $(compile_flags) ] ; - back = [ MATCH "-c(.*)" : $(compile_flags) ] ; - compile_flags = "$(front) $(back)" ; - link_flags = $(compile_flags) ; - - # get location of mpif.h from mpxlf - local f_flags = [ SHELL "mpxlf -v 2>/dev/null" ] ; - f_flags = [ MATCH "(.*)exec: export.*" : $(f_flags) ] ; - front = [ MATCH "(.*)-v" : $(f_flags) ] ; - back = [ MATCH "-v(.*)" : $(f_flags) ] ; - f_flags = "$(front) $(back)" ; - f_flags = [ MATCH "xlf_r(.*)" : $(f_flags) ] ; - f_flags = [ MATCH "-F:mpxlf_r(.*)" : $(f_flags) ] ; - compile_flags = [ strip-eol $(compile_flags) ] ; - compile_flags = "$(compile_flags) $(f_flags)" ; - } - } - - if $(result) || $(compile_flags) && $(link_flags) - { - if $(result) - { - result = [ strip-eol $(result) ] ; - options = [ cmdline_to_features $(result) ] ; - } - else - { - compile_flags = [ strip-eol $(compile_flags) ] ; - link_flags = [ strip-eol $(link_flags) ] ; - - # Separately process compilation and link features, then combine - # them at the end. - local compile_features = [ cmdline_to_features $(compile_flags) - : "" ] ; - local link_features = [ cmdline_to_features $(link_flags) - : "" ] ; - options = $(compile_features) $(link_features) ; - } - - # If requested, display MPI configuration information. - if $(.debug-configuration) - { - if $(result) - { - ECHO " Wrapper compiler command line: $(result)" ; - } - else - { - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" - : $(compile_flags) ] ; - ECHO "MPI compilation flags: $(match[2])" ; - local match = [ MATCH "^([^\" ]+|\"[^\"]+\") *(.*)$" - : $(link_flags) ] ; - ECHO "MPI link flags: $(match[2])" ; - } - } - } - else - { - if $(command) - { - ECHO "MPI auto-detection failed: unknown wrapper compiler $(command)" ; - ECHO "Please report this error to the Boost mailing list: http://www.boost.org" ; - } - else if $(mpicxx) - { - ECHO "MPI auto-detection failed: unable to find wrapper compiler $(mpicxx)" ; - } - else - { - ECHO "MPI auto-detection failed: unable to find wrapper compiler `mpic++' or `mpiCC'" ; - } - ECHO "You will need to manually configure MPI support." ; - } - - } - - # Find mpirun (or its equivalent) and its flags - if ! $(.mpirun) - { - .mpirun = - [ common.get-invocation-command mpi : mpirun : $(mpirun-with-options[1]) ] ; - .mpirun_flags = $(mpirun-with-options[2-]) ; - .mpirun_flags ?= -np ; - } - - if $(.debug-configuration) - { - if $(options) - { - echo "MPI build features: " ; - ECHO $(options) ; - } - - if $(.mpirun) - { - echo "MPI launcher: $(.mpirun) $(.mpirun_flags)" ; - } - - ECHO "====================================================" ; - } - - if $(options) - { - .configured = true ; - - # Set up the "mpi" alias - alias mpi : : : : $(options) ; - } -} - -# States whether MPI has bee configured -rule configured ( ) -{ - return $(.configured) ; -} - -# Returs the "extra" requirements needed to build MPI. These requirements are -# part of the /mpi//mpi library target, but they need to be added to anything -# that uses MPI directly to work around bugs in BBv2's propagation of -# requirements. -rule extra-requirements ( ) -{ - return $(MPI_EXTRA_REQUIREMENTS) ; -} - -# Support for testing; borrowed from Python -type.register RUN_MPI_OUTPUT ; -type.register RUN_MPI : : TEST ; - -class mpi-test-generator : generator -{ - import property-set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - self.composing = true ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - # Generate an executable from the sources. This is the executable we will run. - local executable = - [ generators.construct $(project) $(name) : EXE : $(property-set) : $(sources) ] ; - - result = - [ construct-result $(executable[2-]) : $(project) $(name)-run : $(property-set) ] ; - } -} - -# Use mpi-test-generator to generate MPI tests from sources -generators.register - [ new mpi-test-generator mpi.capture-output : : RUN_MPI_OUTPUT ] ; - -generators.register-standard testing.expect-success - : RUN_MPI_OUTPUT : RUN_MPI ; - -# The number of processes to spawn when executing an MPI test. -feature mpi:processes : : free incidental ; - -# The flag settings on testing.capture-output do not -# apply to mpi.capture output at the moment. -# Redo this explicitly. -toolset.flags mpi.capture-output ARGS ; -rule capture-output ( target : sources * : properties * ) -{ - # Use the standard capture-output rule to run the tests - testing.capture-output $(target) : $(sources[1]) : $(properties) ; - - # Determine the number of processes we should run on. - local num_processes = [ property.select : $(properties) ] ; - num_processes = $(num_processes:G=) ; - - # serialize the MPI tests to avoid overloading systems - JAM_SEMAPHORE on $(target) = mpi-run-semaphore ; - - # We launch MPI processes using the "mpirun" equivalent specified by the user. - LAUNCHER on $(target) = - [ on $(target) return $(.mpirun) $(.mpirun_flags) $(num_processes) ] ; -} - -# Creates a set of test cases to be run through the MPI launcher. The name, sources, -# and requirements are the same as for any other test generator. However, schedule is -# a list of numbers, which indicates how many processes each test run will use. For -# example, passing 1 2 7 will run the test with 1 process, then 2 processes, then 7 -# 7 processes. The name provided is just the base name: the actual tests will be -# the name followed by a hypen, then the number of processes. -rule mpi-test ( name : sources * : requirements * : schedule * ) -{ - sources ?= $(name).cpp ; - schedule ?= 1 2 3 4 7 8 13 17 ; - - local result ; - for processes in $(schedule) - { - result += [ testing.make-test - run-mpi : $(sources) /boost/mpi//boost_mpi - : $(requirements) msvc:static $(processes) : $(name)-$(processes) ] ; - } - return $(result) ; -} diff --git a/jam-files/boost-build/tools/msvc-config.jam b/jam-files/boost-build/tools/msvc-config.jam deleted file mode 100644 index 6c71e3b0..00000000 --- a/jam-files/boost-build/tools/msvc-config.jam +++ /dev/null @@ -1,12 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for VisualStudio toolset. To use, just import this module. - -import toolset : using ; - -ECHO "warning: msvc-config.jam is deprecated. Use 'using msvc : all ;' instead." ; - -using msvc : all ; - diff --git a/jam-files/boost-build/tools/msvc.jam b/jam-files/boost-build/tools/msvc.jam deleted file mode 100644 index e33a66d2..00000000 --- a/jam-files/boost-build/tools/msvc.jam +++ /dev/null @@ -1,1392 +0,0 @@ -# Copyright (c) 2003 David Abrahams. -# Copyright (c) 2005 Vladimir Prus. -# Copyright (c) 2005 Alexey Pakhunov. -# Copyright (c) 2006 Bojan Resnik. -# Copyright (c) 2006 Ilya Sokolov. -# Copyright (c) 2007 Rene Rivera -# Copyright (c) 2008 Jurko Gospodnetic -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -################################################################################ -# -# MSVC Boost Build toolset module. -# -------------------------------- -# -# All toolset versions need to have their location either auto-detected or -# explicitly specified except for the special 'default' version that expects the -# environment to find the needed tools or report an error. -# -################################################################################ - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import mc ; -import midl ; -import os ; -import path ; -import pch ; -import property ; -import rc ; -import toolset ; -import type ; - - -type.register MANIFEST : manifest ; -feature.feature embed-manifest : on off : incidental propagated ; - -type.register PDB : pdb ; - -################################################################################ -# -# Public rules. -# -################################################################################ - -# Initialize a specific toolset version configuration. As the result, path to -# compiler and, possible, program names are set up, and will be used when that -# version of compiler is requested. For example, you might have: -# -# using msvc : 6.5 : cl.exe ; -# using msvc : 7.0 : Y:/foo/bar/cl.exe ; -# -# The version parameter may be ommited: -# -# using msvc : : Z:/foo/bar/cl.exe ; -# -# The following keywords have special meanings when specified as versions: -# - all - all detected but not yet used versions will be marked as used -# with their default options. -# - default - this is an equivalent to an empty version. -# -# Depending on a supplied version, detected configurations and presence 'cl.exe' -# in the path different results may be achieved. The following table describes -# the possible scenarios: -# -# Nothing "x.y" -# Passed Nothing "x.y" detected, detected, -# version detected detected cl.exe in path cl.exe in path -# -# default Error Use "x.y" Create "default" Use "x.y" -# all None Use all None Use all -# x.y - Use "x.y" - Use "x.y" -# a.b Error Error Create "a.b" Create "a.b" -# -# "x.y" - refers to a detected version; -# "a.b" - refers to an undetected version. -# -# FIXME: Currently the command parameter and the property parameter -# seem to overlap in duties. Remove this duplication. This seems to be related -# to why someone started preparing to replace init with configure rules. -# -rule init ( - # The msvc version being configured. When omitted the tools invoked when no - # explicit version is given will be configured. - version ? - - # The command used to invoke the compiler. If not specified: - # - if version is given, default location for that version will be - # searched - # - # - if version is not given, default locations for MSVC 9.0, 8.0, 7.1, 7.0 - # and 6.* will be searched - # - # - if compiler is not found in the default locations, PATH will be - # searched. - : command * - - # Options may include: - # - # All options shared by multiple toolset types as handled by the - # common.handle-options() rule, e.g. , , , - # & . - # - # - # - # - # - # - # - # Exact tool names to be used by this msvc toolset configuration. - # - # - # Command through which to pipe the output of running the compiler. - # For example to pass the output to STLfilt. - # - # - # Global setup command to invoke before running any of the msvc tools. - # It will be passed additional option parameters depending on the actual - # target platform. - # - # - # - # - # Platform specific setup command to invoke before running any of the - # msvc tools used when builing a target for a specific platform, e.g. - # when building a 32 or 64 bit executable. - : options * -) -{ - if $(command) - { - options += $(command) ; - } - configure $(version) : $(options) ; -} - - -# 'configure' is a newer version of 'init'. The parameter 'command' is passed as -# a part of the 'options' list. See the 'init' rule comment for more detailed -# information. -# -rule configure ( version ? : options * ) -{ - switch $(version) - { - case "all" : - if $(options) - { - errors.error "MSVC toolset configuration: options should be" - "empty when '$(version)' is specified." ; - } - - # Configure (i.e. mark as used) all registered versions. - local all-versions = [ $(.versions).all ] ; - if ! $(all-versions) - { - if $(.debug-configuration) - { - ECHO "notice: [msvc-cfg] Asked to configure all registered" - "msvc toolset versions when there are none currently" - "registered." ; - } - } - else - { - for local v in $(all-versions) - { - # Note that there is no need to skip already configured - # versions here as this will request configure-really rule - # to configure the version using default options which will - # in turn cause it to simply do nothing in case the version - # has already been configured. - configure-really $(v) ; - } - } - - case "default" : - configure-really : $(options) ; - - case * : - configure-really $(version) : $(options) ; - } -} - - -# Sets up flag definitions dependent on the compiler version used. -# - 'version' is the version of compiler in N.M format. -# - 'conditions' is the property set to be used as flag conditions. -# - 'toolset' is the toolset for which flag settings are to be defined. -# This makes the rule reusable for other msvc-option-compatible compilers. -# -rule configure-version-specific ( toolset : version : conditions ) -{ - toolset.push-checking-for-flags-module unchecked ; - # Starting with versions 7.0, the msvc compiler have the /Zc:forScope and - # /Zc:wchar_t options that improve C++ standard conformance, but those - # options are off by default. If we are sure that the msvc version is at - # 7.*, add those options explicitly. We can be sure either if user specified - # version 7.* explicitly or if we auto-detected the version ourselves. - if ! [ MATCH ^(6\\.) : $(version) ] - { - toolset.flags $(toolset).compile CFLAGS $(conditions) : /Zc:forScope /Zc:wchar_t ; - toolset.flags $(toolset).compile.c++ C++FLAGS $(conditions) : /wd4675 ; - - # Explicitly disable the 'function is deprecated' warning. Some msvc - # versions have a bug, causing them to emit the deprecation warning even - # with /W0. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off : /wd4996 ; - - if [ MATCH ^([78]\\.) : $(version) ] - { - # 64-bit compatibility warning deprecated since 9.0, see - # http://msdn.microsoft.com/en-us/library/yt4xw8fh.aspx - toolset.flags $(toolset).compile CFLAGS $(conditions)/all : /Wp64 ; - } - } - - # - # Processor-specific optimization. - # - - if [ MATCH ^([67]) : $(version) ] - { - # 8.0 deprecates some of the options. - toolset.flags $(toolset).compile CFLAGS $(conditions)/speed $(conditions)/space : /Ogiy /Gs ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/speed : /Ot ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/space : /Os ; - - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/ : /GB ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/i386 : /G3 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/i486 : /G4 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g5) : /G5 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g6) : /G6 ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-i386)/$(.cpu-type-g7) : /G7 ; - - # Improve floating-point accuracy. Otherwise, some of C++ Boost's "math" - # tests will fail. - toolset.flags $(toolset).compile CFLAGS $(conditions) : /Op ; - - # 7.1 and below have single-threaded static RTL. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off/static/single : /ML ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/on/static/single : /MLd ; - } - else - { - # 8.0 and above adds some more options. - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/ : /favor:blend ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/$(.cpu-type-em64t) : /favor:EM64T ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/$(.cpu-arch-amd64)/$(.cpu-type-amd64) : /favor:AMD64 ; - - # 8.0 and above only has multi-threaded static RTL. - toolset.flags $(toolset).compile CFLAGS $(conditions)/off/static/single : /MT ; - toolset.flags $(toolset).compile CFLAGS $(conditions)/on/static/single : /MTd ; - - # Specify target machine type so the linker will not need to guess. - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-amd64) : /MACHINE:X64 ; - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-i386) : /MACHINE:X86 ; - toolset.flags $(toolset).link LINKFLAGS $(conditions)/$(.cpu-arch-ia64) : /MACHINE:IA64 ; - - # Make sure that manifest will be generated even if there is no - # dependencies to put there. - toolset.flags $(toolset).link LINKFLAGS $(conditions)/off : /MANIFEST ; - } - toolset.pop-checking-for-flags-module ; -} - - -# Registers this toolset including all of its flags, features & generators. Does -# nothing on repeated calls. -# -rule register-toolset ( ) -{ - if ! msvc in [ feature.values toolset ] - { - register-toolset-really ; - } -} - - -# Declare action for creating static libraries. If library exists, remove it -# before adding files. See -# http://article.gmane.org/gmane.comp.lib.boost.build/4241 for rationale. -if [ os.name ] in NT -{ - # The 'DEL' command would issue a message to stdout if the file does not - # exist, so need a check. - actions archive - { - if exist "$(<[1])" DEL "$(<[1])" - $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } -} -else -{ - actions archive - { - $(.RM) "$(<[1])" - $(.LD) $(AROPTIONS) /out:"$(<[1])" @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } -} - - -# For the assembler the following options are turned on by default: -# -# -Zp4 align structures to 4 bytes -# -Cp preserve case of user identifiers -# -Cx preserve case in publics, externs -# -actions compile.asm -{ - $(.ASM) -c -Zp4 -Cp -Cx -D$(DEFINES) $(ASMFLAGS) $(USER_ASMFLAGS) -Fo "$(<:W)" "$(>:W)" -} - - -rule compile.c ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets) : -TC ; - compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c.preprocess ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets) : -TC ; - preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c.pch ( targets + : sources * : properties * ) -{ - C++FLAGS on $(targets[1]) = ; - get-rspline $(targets[1]) : -TC ; - get-rspline $(targets[2]) : -TC ; - local pch-source = [ on $(<) return $(PCH_SOURCE) ] ; - if $(pch-source) - { - DEPENDS $(<) : $(pch-source) ; - compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ; - } - else - { - compile-c-c++-pch $(targets) : $(sources) ; - } -} - -toolset.flags msvc YLOPTION : "-Yl" ; - -# Action for running the C/C++ compiler without using precompiled headers. -# -# WARNING: Synchronize any changes this in action with intel-win -# -# Notes regarding PDB generation, for when we use on/database -# -# 1. PDB_CFLAG is only set for on/database, ensuring that the /Fd flag is dropped if PDB_CFLAG is empty -# -# 2. When compiling executables's source files, PDB_NAME is set on a per-source file basis by rule compile-c-c++. -# The linker will pull these into the executable's PDB -# -# 3. When compiling library's source files, PDB_NAME is updated to .pdb for each source file by rule archive, -# as in this case the compiler must be used to create a single PDB for our library. -# -actions compile-c-c++ bind PDB_NAME -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -Fo"$(<[1]:W)" $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" $(.CC.FILTER) -} - -actions preprocess-c-c++ bind PDB_NAME -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[1]:W)" -E $(PDB_CFLAG)"$(PDB_NAME)" -Yu"$(>[3]:D=)" -Fp"$(>[2]:W)" $(CC_RSPLINE))" >"$(<[1]:W)" -} - -rule compile-c-c++ ( targets + : sources * ) -{ - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ; - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ; - PDB_NAME on $(<) = $(<:S=.pdb) ; -} - -rule preprocess-c-c++ ( targets + : sources * ) -{ - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_HEADER) ] ; - DEPENDS $(<[1]) : [ on $(<[1]) return $(PCH_FILE) ] ; - PDB_NAME on $(<) = $(<:S=.pdb) ; -} - -# Action for running the C/C++ compiler using precompiled headers. In addition -# to whatever else it needs to compile, this action also adds a temporary source -# .cpp file used to compile the precompiled headers themselves. -# -# The global .escaped-double-quote variable is used to avoid messing up Emacs -# syntax highlighting in the messy N-quoted code below. -actions compile-c-c++-pch -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" "@($(<[1]:W).cpp:E=#include $(.escaped-double-quote)$(>[1]:D=)$(.escaped-double-quote)$(.nl))" $(.CC.FILTER) -} - - -# Action for running the C/C++ compiler using precompiled headers. An already -# built source file for compiling the precompiled headers is expected to be -# given as one of the source parameters. -actions compile-c-c++-pch-s -{ - $(.CC) @"@($(<[1]:W).rsp:E="$(>[2]:W)" -Fo"$(<[2]:W)" -Yc"$(>[1]:D=)" $(YLOPTION)"__bjam_pch_symbol_$(>[1]:D=)" -Fp"$(<[1]:W)" $(CC_RSPLINE))" $(.CC.FILTER) -} - - -rule compile.c++ ( targets + : sources * : properties * ) -{ - get-rspline $(targets) : -TP ; - compile-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - -rule compile.c++.preprocess ( targets + : sources * : properties * ) -{ - get-rspline $(targets) : -TP ; - preprocess-c-c++ $(<) : $(>) [ on $(<) return $(PCH_FILE) ] [ on $(<) return $(PCH_HEADER) ] ; -} - - -rule compile.c++.pch ( targets + : sources * : properties * ) -{ - get-rspline $(targets[1]) : -TP ; - get-rspline $(targets[2]) : -TP ; - local pch-source = [ on $(<) return $(PCH_SOURCE) ] ; - if $(pch-source) - { - DEPENDS $(<) : $(pch-source) ; - compile-c-c++-pch-s $(targets) : $(sources) $(pch-source) ; - } - else - { - compile-c-c++-pch $(targets) : $(sources) ; - } -} - - -# See midl.jam for details. -# -actions compile.idl -{ - $(.IDL) /nologo @"@($(<[1]:W).rsp:E=$(.nl)"$(>:W)" $(.nl)-D$(DEFINES) $(.nl)"-I$(INCLUDES:W)" $(.nl)-U$(UNDEFS) $(.nl)$(MIDLFLAGS) $(.nl)/tlb "$(<[1]:W)" $(.nl)/h "$(<[2]:W)" $(.nl)/iid "$(<[3]:W)" $(.nl)/proxy "$(<[4]:W)" $(.nl)/dlldata "$(<[5]:W)")" - $(.TOUCH_FILE) "$(<[4]:W)" - $(.TOUCH_FILE) "$(<[5]:W)" -} - - -actions compile.mc -{ - $(.MC) $(MCFLAGS) -h "$(<[1]:DW)" -r "$(<[2]:DW)" "$(>:W)" -} - - -actions compile.rc -{ - $(.RC) -l 0x409 -U$(UNDEFS) -D$(DEFINES) -I"$(INCLUDES:W)" -fo "$(<:W)" "$(>:W)" -} - - -rule link ( targets + : sources * : properties * ) -{ - if on in $(properties) - { - msvc.manifest $(targets) : $(sources) : $(properties) ; - } -} - -rule link.dll ( targets + : sources * : properties * ) -{ - DEPENDS $(<) : [ on $(<) return $(DEF_FILE) ] ; - if on in $(properties) - { - msvc.manifest.dll $(targets) : $(sources) : $(properties) ; - } -} - -# Incremental linking a DLL causes no end of problems: if the actual exports do -# not change, the import .lib file is never updated. Therefore, the .lib is -# always out-of-date and gets rebuilt every time. I am not sure that incremental -# linking is such a great idea in general, but in this case I am sure we do not -# want it. - -# Windows manifest is a new way to specify dependencies on managed DotNet -# assemblies and Windows native DLLs. The manifests are embedded as resources -# and are useful in any PE target (both DLL and EXE). - -if [ os.name ] in NT -{ - actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% - } - - actions manifest - { - if exist "$(<[1]).manifest" ( - $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);1" - ) - } - - actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - if %ERRORLEVEL% NEQ 0 EXIT %ERRORLEVEL% - } - - actions manifest.dll - { - if exist "$(<[1]).manifest" ( - $(.MT) -manifest "$(<[1]).manifest" "-outputresource:$(<[1]);2" - ) - } -} -else -{ - actions link bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) $(LINKFLAGS) /out:"$(<[1]:W)" /LIBPATH:"$(LINKPATH:W)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } - - actions manifest - { - if test -e "$(<[1]).manifest"; then - $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);1" - fi - } - - actions link.dll bind DEF_FILE LIBRARIES_MENTIONED_BY_FILE - { - $(.LD) /DLL $(LINKFLAGS) /out:"$(<[1]:W)" /IMPLIB:"$(<[2]:W)" /LIBPATH:"$(LINKPATH:W)" /def:"$(DEF_FILE)" $(OPTIONS) @"@($(<[1]:W).rsp:E=$(.nl)"$(>)" $(.nl)$(LIBRARIES_MENTIONED_BY_FILE) $(.nl)$(LIBRARIES) $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_ST).lib" $(.nl)"$(LIBRARY_OPTION)$(FINDLIBS_SA).lib")" - } - - actions manifest.dll - { - if test -e "$(<[1]).manifest"; then - $(.MT) -manifest "$(<[1]:W).manifest" "-outputresource:$(<[1]:W);2" - fi - } -} - -# this rule sets up the pdb file that will be used when generating static -# libraries and the debug-store option is database, so that the compiler -# puts all debug info into a single .pdb file named after the library -# -# Poking at source targets this way is probably not clean, but it's the -# easiest approach. -rule archive ( targets + : sources * : properties * ) -{ - PDB_NAME on $(>) = $(<:S=.pdb) ; -} - -################################################################################ -# -# Classes. -# -################################################################################ - -class msvc-pch-generator : pch-generator -{ - import property-set ; - - rule run-pch ( project name ? : property-set : sources * ) - { - # Searching for the header and source file in the sources. - local pch-header ; - local pch-source ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] H ] - { - pch-header = $(s) ; - } - else if - [ type.is-derived [ $(s).type ] CPP ] || - [ type.is-derived [ $(s).type ] C ] - { - pch-source = $(s) ; - } - } - - if ! $(pch-header) - { - errors.user-error "can not build pch without pch-header" ; - } - - # If we do not have the PCH source - that is fine. We will just create a - # temporary .cpp file in the action. - - local generated = [ generator.run $(project) $(name) - : [ property-set.create - # Passing of is a dirty trick, needed because - # non-composing generators with multiple inputs are subtly - # broken. For more detailed information see: - # https://zigzag.cs.msu.su:7813/boost.build/ticket/111 - $(pch-source) - [ $(property-set).raw ] ] - : $(pch-header) ] ; - - local pch-file ; - for local g in $(generated) - { - if [ type.is-derived [ $(g).type ] PCH ] - { - pch-file = $(g) ; - } - } - - return [ property-set.create $(pch-header) - $(pch-file) ] $(generated) ; - } -} - - -################################################################################ -# -# Local rules. -# -################################################################################ - -# Detects versions listed as '.known-versions' by checking registry information, -# environment variables & default paths. Supports both native Windows and -# Cygwin. -# -local rule auto-detect-toolset-versions ( ) -{ - if [ os.name ] in NT CYGWIN - { - # Get installation paths from the registry. - for local i in $(.known-versions) - { - if $(.version-$(i)-reg) - { - local vc-path ; - for local x in "" "Wow6432Node\\" - { - vc-path += [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\"$(x)"\\Microsoft\\"$(.version-$(i)-reg) - : "ProductDir" ] ; - } - - if $(vc-path) - { - vc-path = [ path.join [ path.make-NT $(vc-path[1]) ] "bin" ] ; - register-configuration $(i) : [ path.native $(vc-path[1]) ] ; - } - } - } - } - - # Check environment and default installation paths. - for local i in $(.known-versions) - { - if ! $(i) in [ $(.versions).all ] - { - register-configuration $(i) : [ default-path $(i) ] ; - } - } -} - - -# Worker rule for toolset version configuration. Takes an explicit version id or -# nothing in case it should configure the default toolset version (the first -# registered one or a new 'default' one in case no toolset versions have been -# registered yet). -# -local rule configure-really ( version ? : options * ) -{ - local v = $(version) ; - - # Decide what the 'default' version is. - if ! $(v) - { - # Take the first registered (i.e. auto-detected) version. - version = [ $(.versions).all ] ; - version = $(version[1]) ; - v = $(version) ; - - # Note: 'version' can still be empty at this point if no versions have - # been auto-detected. - version ?= "default" ; - } - - # Version alias -> real version number. - if $(.version-alias-$(version)) - { - version = $(.version-alias-$(version)) ; - } - - # Check whether the selected configuration is already in use. - if $(version) in [ $(.versions).used ] - { - # Allow multiple 'toolset.using' calls for the same configuration if the - # identical sets of options are used. - if $(options) && ( $(options) != [ $(.versions).get $(version) : options ] ) - { - errors.error "MSVC toolset configuration: Toolset version" - "'$(version)' already configured." ; - } - } - else - { - # Register a new configuration. - $(.versions).register $(version) ; - - # Add user-supplied to auto-detected options. - options = [ $(.versions).get $(version) : options ] $(options) ; - - # Mark the configuration as 'used'. - $(.versions).use $(version) ; - - # Generate conditions and save them. - local conditions = [ common.check-init-parameters msvc : version $(v) ] - ; - - $(.versions).set $(version) : conditions : $(conditions) ; - - local command = [ feature.get-values : $(options) ] ; - - # If version is specified, we try to search first in default paths, and - # only then in PATH. - command = [ common.get-invocation-command msvc : cl.exe : $(command) : - [ default-paths $(version) ] : $(version) ] ; - - common.handle-options msvc : $(conditions) : $(command) : $(options) ; - - if ! $(version) - { - # Even if version is not explicitly specified, try to detect the - # version from the path. - # FIXME: We currently detect both Microsoft Visual Studio 9.0 and - # 9.0express as 9.0 here. - if [ MATCH "(Microsoft Visual Studio 10)" : $(command) ] - { - version = 10.0 ; - } - else if [ MATCH "(Microsoft Visual Studio 9)" : $(command) ] - { - version = 9.0 ; - } - else if [ MATCH "(Microsoft Visual Studio 8)" : $(command) ] - { - version = 8.0 ; - } - else if [ MATCH "(NET 2003[\/\\]VC7)" : $(command) ] - { - version = 7.1 ; - } - else if [ MATCH "(Microsoft Visual C\\+\\+ Toolkit 2003)" : - $(command) ] - { - version = 7.1toolkit ; - } - else if [ MATCH "(.NET[\/\\]VC7)" : $(command) ] - { - version = 7.0 ; - } - else - { - version = 6.0 ; - } - } - - # Generate and register setup command. - - local below-8.0 = [ MATCH ^([67]\\.) : $(version) ] ; - - local cpu = i386 amd64 ia64 ; - if $(below-8.0) - { - cpu = i386 ; - } - - local setup-amd64 ; - local setup-i386 ; - local setup-ia64 ; - - if $(command) - { - # TODO: Note that if we specify a non-existant toolset version then - # this rule may find and use a corresponding compiler executable - # belonging to an incorrect toolset version. For example, if you - # have only MSVC 7.1 installed, have its executable on the path and - # specify you want Boost Build to use MSVC 9.0, then you want Boost - # Build to report an error but this may cause it to silently use the - # MSVC 7.1 compiler even though it thinks it is using the msvc-9.0 - # toolset version. - command = [ common.get-absolute-tool-path $(command[-1]) ] ; - } - - if $(command) - { - local parent = [ path.make $(command) ] ; - parent = [ path.parent $(parent) ] ; - parent = [ path.native $(parent) ] ; - - # Setup will be used if the command name has been specified. If - # setup is not specified explicitly then a default setup script will - # be used instead. Setup scripts may be global or arhitecture/ - # /platform/cpu specific. Setup options are used only in case of - # global setup scripts. - - # Default setup scripts provided with different VC distributions: - # - # VC 7.1 had only the vcvars32.bat script specific to 32 bit i386 - # builds. It was located in the bin folder for the regular version - # and in the root folder for the free VC 7.1 tools. - # - # Later 8.0 & 9.0 versions introduce separate platform specific - # vcvars*.bat scripts (e.g. 32 bit, 64 bit AMD or 64 bit Itanium) - # located in or under the bin folder. Most also include a global - # vcvarsall.bat helper script located in the root folder which runs - # one of the aforementioned vcvars*.bat scripts based on the options - # passed to it. So far only the version coming with some PlatformSDK - # distributions does not include this top level script but to - # support those we need to fall back to using the worker scripts - # directly in case the top level script can not be found. - - local global-setup = [ feature.get-values : $(options) ] ; - global-setup = $(global-setup[1]) ; - if ! $(below-8.0) - { - global-setup ?= [ locate-default-setup $(command) : $(parent) : - vcvarsall.bat ] ; - } - - local default-setup-amd64 = vcvarsx86_amd64.bat ; - local default-setup-i386 = vcvars32.bat ; - local default-setup-ia64 = vcvarsx86_ia64.bat ; - - # http://msdn2.microsoft.com/en-us/library/x4d2c09s(VS.80).aspx and - # http://msdn2.microsoft.com/en-us/library/x4d2c09s(vs.90).aspx - # mention an x86_IPF option, that seems to be a documentation bug - # and x86_ia64 is the correct option. - local default-global-setup-options-amd64 = x86_amd64 ; - local default-global-setup-options-i386 = x86 ; - local default-global-setup-options-ia64 = x86_ia64 ; - - # When using 64-bit Windows, and targeting 64-bit, it is possible to - # use a native 64-bit compiler, selected by the "amd64" & "ia64" - # parameters to vcvarsall.bat. There are two variables we can use -- - # PROCESSOR_ARCHITECTURE and PROCESSOR_IDENTIFIER. The first is - # 'x86' when running 32-bit Windows, no matter which processor is - # used, and 'AMD64' on 64-bit windows on x86 (either AMD64 or EM64T) - # Windows. - # - if [ MATCH ^(AMD64) : [ os.environ PROCESSOR_ARCHITECTURE ] ] - { - default-global-setup-options-amd64 = amd64 ; - } - # TODO: The same 'native compiler usage' should be implemented for - # the Itanium platform by using the "ia64" parameter. For this - # though we need someone with access to this platform who can find - # out how to correctly detect this case. - else if $(somehow-detect-the-itanium-platform) - { - default-global-setup-options-ia64 = ia64 ; - } - - local setup-prefix = "call " ; - local setup-suffix = " >nul"$(.nl) ; - if ! [ os.name ] in NT - { - setup-prefix = "cmd.exe /S /C call " ; - setup-suffix = " \">nul\" \"&&\" " ; - } - - for local c in $(cpu) - { - local setup-options ; - - setup-$(c) = [ feature.get-values : $(options) ] ; - - if ! $(setup-$(c))-is-not-empty - { - if $(global-setup)-is-not-empty - { - setup-$(c) = $(global-setup) ; - - # If needed we can easily add using configuration flags - # here for overriding which options get passed to the - # global setup command for which target platform: - # setup-options = [ feature.get-values : $(options) ] ; - - setup-options ?= $(default-global-setup-options-$(c)) ; - } - else - { - setup-$(c) = [ locate-default-setup $(command) : $(parent) : $(default-setup-$(c)) ] ; - } - } - - # Cygwin to Windows path translation. - setup-$(c) = "\""$(setup-$(c):W)"\"" ; - - # Append setup options to the setup name and add the final setup - # prefix & suffix. - setup-options ?= "" ; - setup-$(c) = $(setup-prefix)$(setup-$(c):J=" ")" "$(setup-options:J=" ")$(setup-suffix) ; - } - } - - # Get tool names (if any) and finish setup. - - compiler = [ feature.get-values : $(options) ] ; - compiler ?= cl ; - - linker = [ feature.get-values : $(options) ] ; - linker ?= link ; - - resource-compiler = [ feature.get-values : $(options) ] ; - resource-compiler ?= rc ; - - # Turn on some options for i386 assembler - # -coff generate COFF format object file (compatible with cl.exe output) - local default-assembler-amd64 = ml64 ; - local default-assembler-i386 = "ml -coff" ; - local default-assembler-ia64 = ias ; - - assembler = [ feature.get-values : $(options) ] ; - - idl-compiler = [ feature.get-values : $(options) ] ; - idl-compiler ?= midl ; - - mc-compiler = [ feature.get-values : $(options) ] ; - mc-compiler ?= mc ; - - manifest-tool = [ feature.get-values : $(options) ] ; - manifest-tool ?= mt ; - - local cc-filter = [ feature.get-values : $(options) ] ; - - for local c in $(cpu) - { - # Setup script is not required in some configurations. - setup-$(c) ?= "" ; - - local cpu-conditions = $(conditions)/$(.cpu-arch-$(c)) ; - - if $(.debug-configuration) - { - for local cpu-condition in $(cpu-conditions) - { - ECHO "notice: [msvc-cfg] condition: '$(cpu-condition)', setup: '$(setup-$(c))'" ; - } - } - - local cpu-assembler = $(assembler) ; - cpu-assembler ?= $(default-assembler-$(c)) ; - - toolset.flags msvc.compile .CC $(cpu-conditions) : $(setup-$(c))$(compiler) /Zm800 -nologo ; - toolset.flags msvc.compile .RC $(cpu-conditions) : $(setup-$(c))$(resource-compiler) ; - toolset.flags msvc.compile .ASM $(cpu-conditions) : $(setup-$(c))$(cpu-assembler) -nologo ; - toolset.flags msvc.link .LD $(cpu-conditions) : $(setup-$(c))$(linker) /NOLOGO /INCREMENTAL:NO ; - toolset.flags msvc.archive .LD $(cpu-conditions) : $(setup-$(c))$(linker) /lib /NOLOGO ; - toolset.flags msvc.compile .IDL $(cpu-conditions) : $(setup-$(c))$(idl-compiler) ; - toolset.flags msvc.compile .MC $(cpu-conditions) : $(setup-$(c))$(mc-compiler) ; - - toolset.flags msvc.link .MT $(cpu-conditions) : $(setup-$(c))$(manifest-tool) -nologo ; - - if $(cc-filter) - { - toolset.flags msvc .CC.FILTER $(cpu-conditions) : "|" $(cc-filter) ; - } - } - - # Set version-specific flags. - configure-version-specific msvc : $(version) : $(conditions) ; - } -} - - -# Returns the default installation path for the given version. -# -local rule default-path ( version ) -{ - # Use auto-detected path if possible. - local path = [ feature.get-values : [ $(.versions).get $(version) - : options ] ] ; - - if $(path) - { - path = $(path:D) ; - } - else - { - # Check environment. - if $(.version-$(version)-env) - { - local vc-path = [ os.environ $(.version-$(version)-env) ] ; - if $(vc-path) - { - vc-path = [ path.make $(vc-path) ] ; - vc-path = [ path.join $(vc-path) $(.version-$(version)-envpath) ] ; - vc-path = [ path.native $(vc-path) ] ; - - path = $(vc-path) ; - } - } - - # Check default path. - if ! $(path) && $(.version-$(version)-path) - { - path = [ path.native [ path.join $(.ProgramFiles) $(.version-$(version)-path) ] ] ; - } - } - - return $(path) ; -} - - -# Returns either the default installation path (if 'version' is not empty) or -# list of all known default paths (if no version is given) -# -local rule default-paths ( version ? ) -{ - local possible-paths ; - - if $(version) - { - possible-paths += [ default-path $(version) ] ; - } - else - { - for local i in $(.known-versions) - { - possible-paths += [ default-path $(i) ] ; - } - } - - return $(possible-paths) ; -} - - -rule get-rspline ( target : lang-opt ) -{ - CC_RSPLINE on $(target) = [ on $(target) return $(lang-opt) -U$(UNDEFS) - $(CFLAGS) $(C++FLAGS) $(OPTIONS) -c $(.nl)-D$(DEFINES) - $(.nl)\"-I$(INCLUDES:W)\" ] ; -} - -class msvc-linking-generator : linking-generator -{ - # Calls the base version. If necessary, also create a target for the - # manifest file.specifying source's name as the name of the created - # target. As result, the PCH will be named whatever.hpp.gch, and not - # whatever.gch. - rule generated-targets ( sources + : property-set : project name ? ) - { - local result = [ linking-generator.generated-targets $(sources) - : $(property-set) : $(project) $(name) ] ; - - if $(result) - { - local name-main = [ $(result[0]).name ] ; - local action = [ $(result[0]).action ] ; - - if [ $(property-set).get ] = "on" - { - # We force exact name on PDB. The reason is tagging -- the tag rule may - # reasonably special case some target types, like SHARED_LIB. The tag rule - # will not catch PDB, and it cannot even easily figure if PDB is paired with - # SHARED_LIB or EXE or something else. Because PDB always get the - # same name as the main target, with .pdb as extension, just force it. - local target = [ class.new file-target $(name-main:S=.pdb) exact : PDB : $(project) : $(action) ] ; - local registered-target = [ virtual-target.register $(target) ] ; - if $(target) != $(registered-target) - { - $(action).replace-targets $(target) : $(registered-target) ; - } - result += $(registered-target) ; - } - - if [ $(property-set).get ] = "off" - { - # Manifest is evil target. It has .manifest appened to the name of - # main target, including extension. E.g. a.exe.manifest. We use 'exact' - # name because to achieve this effect. - local target = [ class.new file-target $(name-main).manifest exact : MANIFEST : $(project) : $(action) ] ; - local registered-target = [ virtual-target.register $(target) ] ; - if $(target) != $(registered-target) - { - $(action).replace-targets $(target) : $(registered-target) ; - } - result += $(registered-target) ; - } - } - return $(result) ; - } -} - - - -# Unsafe worker rule for the register-toolset() rule. Must not be called -# multiple times. -# -local rule register-toolset-really ( ) -{ - feature.extend toolset : msvc ; - - # Intel and msvc supposedly have link-compatible objects. - feature.subfeature toolset msvc : vendor : intel : propagated optional ; - - # Inherit MIDL flags. - toolset.inherit-flags msvc : midl ; - - # Inherit MC flags. - toolset.inherit-flags msvc : mc ; - - # Dynamic runtime comes only in MT flavour. - toolset.add-requirements - msvc,shared:multi ; - - # Declare msvc toolset specific features. - { - feature.feature debug-store : object database : propagated ; - feature.feature pch-source : : dependency free ; - } - - # Declare generators. - { - # TODO: Is it possible to combine these? Make the generators - # non-composing so that they do not convert each source into a separate - # .rsp file. - generators.register [ new msvc-linking-generator - msvc.link : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : EXE : msvc ] ; - generators.register [ new msvc-linking-generator - msvc.link.dll : OBJ SEARCHED_LIB STATIC_LIB IMPORT_LIB : SHARED_LIB IMPORT_LIB : msvc ] ; - - generators.register-archiver msvc.archive : OBJ : STATIC_LIB : msvc ; - generators.register-c-compiler msvc.compile.c++ : CPP : OBJ : msvc ; - generators.register-c-compiler msvc.compile.c : C : OBJ : msvc ; - generators.register-c-compiler msvc.compile.c++.preprocess : CPP : PREPROCESSED_CPP : msvc ; - generators.register-c-compiler msvc.compile.c.preprocess : C : PREPROCESSED_C : msvc ; - - # Using 'register-c-compiler' adds the build directory to INCLUDES. - generators.register-c-compiler msvc.compile.rc : RC : OBJ(%_res) : msvc ; - generators.override msvc.compile.rc : rc.compile.resource ; - generators.register-standard msvc.compile.asm : ASM : OBJ : msvc ; - - generators.register-c-compiler msvc.compile.idl : IDL : MSTYPELIB H C(%_i) C(%_proxy) C(%_dlldata) : msvc ; - generators.override msvc.compile.idl : midl.compile.idl ; - - generators.register-standard msvc.compile.mc : MC : H RC : msvc ; - generators.override msvc.compile.mc : mc.compile ; - - # Note: the 'H' source type will catch both '.h' and '.hpp' headers as - # the latter have their HPP type derived from H. The type of compilation - # is determined entirely by the destination type. - generators.register [ new msvc-pch-generator msvc.compile.c.pch : H : C_PCH OBJ : on msvc ] ; - generators.register [ new msvc-pch-generator msvc.compile.c++.pch : H : CPP_PCH OBJ : on msvc ] ; - - generators.override msvc.compile.c.pch : pch.default-c-pch-generator ; - generators.override msvc.compile.c++.pch : pch.default-cpp-pch-generator ; - } - - toolset.flags msvc.compile PCH_FILE on : ; - toolset.flags msvc.compile PCH_SOURCE on : ; - toolset.flags msvc.compile PCH_HEADER on : ; - - # - # Declare flags for compilation. - # - - toolset.flags msvc.compile CFLAGS speed : /O2 ; - toolset.flags msvc.compile CFLAGS space : /O1 ; - - toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/$(.cpu-type-itanium) : /G1 ; - toolset.flags msvc.compile CFLAGS $(.cpu-arch-ia64)/$(.cpu-type-itanium2) : /G2 ; - - toolset.flags msvc.compile CFLAGS on/object : /Z7 ; - toolset.flags msvc.compile CFLAGS on/database : /Zi ; - toolset.flags msvc.compile CFLAGS off : /Od ; - toolset.flags msvc.compile CFLAGS off : /Ob0 ; - toolset.flags msvc.compile CFLAGS on : /Ob1 ; - toolset.flags msvc.compile CFLAGS full : /Ob2 ; - - toolset.flags msvc.compile CFLAGS on : /W3 ; - toolset.flags msvc.compile CFLAGS off : /W0 ; - toolset.flags msvc.compile CFLAGS all : /W4 ; - toolset.flags msvc.compile CFLAGS on : /WX ; - - toolset.flags msvc.compile C++FLAGS on/off/off : /EHs ; - toolset.flags msvc.compile C++FLAGS on/off/on : /EHsc ; - toolset.flags msvc.compile C++FLAGS on/on/off : /EHa ; - toolset.flags msvc.compile C++FLAGS on/on/on : /EHac ; - - # By default 8.0 enables rtti support while prior versions disabled it. We - # simply enable or disable it explicitly so we do not have to depend on this - # default behaviour. - toolset.flags msvc.compile CFLAGS on : /GR ; - toolset.flags msvc.compile CFLAGS off : /GR- ; - toolset.flags msvc.compile CFLAGS off/shared : /MD ; - toolset.flags msvc.compile CFLAGS on/shared : /MDd ; - - toolset.flags msvc.compile CFLAGS off/static/multi : /MT ; - toolset.flags msvc.compile CFLAGS on/static/multi : /MTd ; - - toolset.flags msvc.compile OPTIONS : ; - toolset.flags msvc.compile.c++ OPTIONS : ; - - toolset.flags msvc.compile PDB_CFLAG on/database : /Fd ; - - toolset.flags msvc.compile DEFINES ; - toolset.flags msvc.compile UNDEFS ; - toolset.flags msvc.compile INCLUDES ; - - # Declare flags for the assembler. - toolset.flags msvc.compile.asm USER_ASMFLAGS ; - - toolset.flags msvc.compile.asm ASMFLAGS on : "/Zi /Zd" ; - - toolset.flags msvc.compile.asm ASMFLAGS on : /W3 ; - toolset.flags msvc.compile.asm ASMFLAGS off : /W0 ; - toolset.flags msvc.compile.asm ASMFLAGS all : /W4 ; - toolset.flags msvc.compile.asm ASMFLAGS on : /WX ; - - toolset.flags msvc.compile.asm DEFINES ; - - # Declare flags for linking. - { - toolset.flags msvc.link PDB_LINKFLAG on/database : /PDB: ; # not used yet - toolset.flags msvc.link LINKFLAGS on : /DEBUG ; - toolset.flags msvc.link DEF_FILE ; - - # The linker disables the default optimizations when using /DEBUG so we - # have to enable them manually for release builds with debug symbols. - toolset.flags msvc LINKFLAGS on/off : /OPT:REF,ICF ; - - toolset.flags msvc LINKFLAGS console : /subsystem:console ; - toolset.flags msvc LINKFLAGS gui : /subsystem:windows ; - toolset.flags msvc LINKFLAGS wince : /subsystem:windowsce ; - toolset.flags msvc LINKFLAGS native : /subsystem:native ; - toolset.flags msvc LINKFLAGS auto : /subsystem:posix ; - - toolset.flags msvc.link OPTIONS ; - toolset.flags msvc.link LINKPATH ; - - toolset.flags msvc.link FINDLIBS_ST ; - toolset.flags msvc.link FINDLIBS_SA ; - toolset.flags msvc.link LIBRARY_OPTION msvc : "" : unchecked ; - toolset.flags msvc.link LIBRARIES_MENTIONED_BY_FILE : ; - } - - toolset.flags msvc.archive AROPTIONS ; -} - - -# Locates the requested setup script under the given folder and returns its full -# path or nothing in case the script can not be found. In case multiple scripts -# are found only the first one is returned. -# -# TODO: There used to exist a code comment for the msvc.init rule stating that -# we do not correctly detect the location of the vcvars32.bat setup script for -# the free VC7.1 tools in case user explicitly provides a path. This should be -# tested or simply remove this whole comment in case this toolset version is no -# longer important. -# -local rule locate-default-setup ( command : parent : setup-name ) -{ - local result = [ GLOB $(command) $(parent) : $(setup-name) ] ; - if $(result[1]) - { - return $(result[1]) ; - } -} - - -# Validates given path, registers found configuration and prints debug -# information about it. -# -local rule register-configuration ( version : path ? ) -{ - if $(path) - { - local command = [ GLOB $(path) : cl.exe ] ; - - if $(command) - { - if $(.debug-configuration) - { - ECHO "notice: [msvc-cfg] msvc-$(version) detected, command: '$(command)'" ; - } - - $(.versions).register $(version) ; - $(.versions).set $(version) : options : $(command) ; - } - } -} - - -################################################################################ -# -# Startup code executed when loading this module. -# -################################################################################ - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -# Miscellaneous constants. -.RM = [ common.rm-command ] ; -.nl = " -" ; -.ProgramFiles = [ path.make [ common.get-program-files-dir ] ] ; -.escaped-double-quote = "\"" ; -.TOUCH_FILE = [ common.file-touch-command ] ; - -# List of all registered configurations. -.versions = [ new configurations ] ; - -# Supported CPU architectures. -.cpu-arch-i386 = - / - /32 - x86/ - x86/32 ; - -.cpu-arch-amd64 = - /64 - x86/64 ; - -.cpu-arch-ia64 = - ia64/ - ia64/64 ; - - -# Supported CPU types (only Itanium optimization options are supported from -# VC++ 2005 on). See -# http://msdn2.microsoft.com/en-us/library/h66s5s0e(vs.90).aspx for more -# detailed information. -.cpu-type-g5 = i586 pentium pentium-mmx ; -.cpu-type-g6 = i686 pentiumpro pentium2 pentium3 pentium3m pentium-m k6 - k6-2 k6-3 winchip-c6 winchip2 c3 c3-2 ; -.cpu-type-em64t = prescott nocona conroe conroe-xe conroe-l allendale mermon - mermon-xe kentsfield kentsfield-xe penryn wolfdale - yorksfield nehalem ; -.cpu-type-amd64 = k8 opteron athlon64 athlon-fx ; -.cpu-type-g7 = pentium4 pentium4m athlon athlon-tbird athlon-4 athlon-xp - athlon-mp $(.cpu-type-em64t) $(.cpu-type-amd64) ; -.cpu-type-itanium = itanium itanium1 merced ; -.cpu-type-itanium2 = itanium2 mckinley ; - - -# Known toolset versions, in order of preference. -.known-versions = 10.0 10.0express 9.0 9.0express 8.0 8.0express 7.1 7.1toolkit 7.0 6.0 ; - -# Version aliases. -.version-alias-6 = 6.0 ; -.version-alias-6.5 = 6.0 ; -.version-alias-7 = 7.0 ; -.version-alias-8 = 8.0 ; -.version-alias-9 = 9.0 ; -.version-alias-10 = 10.0 ; - -# Names of registry keys containing the Visual C++ installation path (relative -# to "HKEY_LOCAL_MACHINE\SOFTWARE\\Microsoft"). -.version-6.0-reg = "VisualStudio\\6.0\\Setup\\Microsoft Visual C++" ; -.version-7.0-reg = "VisualStudio\\7.0\\Setup\\VC" ; -.version-7.1-reg = "VisualStudio\\7.1\\Setup\\VC" ; -.version-8.0-reg = "VisualStudio\\8.0\\Setup\\VC" ; -.version-8.0express-reg = "VCExpress\\8.0\\Setup\\VC" ; -.version-9.0-reg = "VisualStudio\\9.0\\Setup\\VC" ; -.version-9.0express-reg = "VCExpress\\9.0\\Setup\\VC" ; -.version-10.0-reg = "VisualStudio\\10.0\\Setup\\VC" ; -.version-10.0express-reg = "VCExpress\\10.0\\Setup\\VC" ; - -# Visual C++ Toolkit 2003 does not store its installation path in the registry. -# The environment variable 'VCToolkitInstallDir' and the default installation -# path will be checked instead. -.version-7.1toolkit-path = "Microsoft Visual C++ Toolkit 2003" "bin" ; -.version-7.1toolkit-env = VCToolkitInstallDir ; - -# Path to the folder containing "cl.exe" relative to the value of the -# corresponding environment variable. -.version-7.1toolkit-envpath = "bin" ; - - -# Auto-detect all the available msvc installations on the system. -auto-detect-toolset-versions ; - - -# And finally trigger the actual Boost Build toolset registration. -register-toolset ; diff --git a/jam-files/boost-build/tools/notfile.jam b/jam-files/boost-build/tools/notfile.jam deleted file mode 100644 index 97a5b0e8..00000000 --- a/jam-files/boost-build/tools/notfile.jam +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2005 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import generators ; -import project ; -import targets ; -import toolset ; -import type ; - - -type.register NOTFILE_MAIN ; - - -class notfile-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local action ; - local action-name = [ $(property-set).get ] ; - - local m = [ MATCH ^@(.*) : $(action-name) ] ; - - if $(m) - { - action = [ new action $(sources) : $(m[1]) - : $(property-set) ] ; - } - else - { - action = [ new action $(sources) : notfile.run - : $(property-set) ] ; - } - return [ virtual-target.register - [ new notfile-target $(name) : $(project) : $(action) ] ] ; - } -} - - -generators.register [ new notfile-generator notfile.main : : NOTFILE_MAIN ] ; - - -toolset.flags notfile.run ACTION : ; - - -actions run -{ - $(ACTION) -} - - -rule notfile ( target-name : action + : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - requirements += $(action) ; - - targets.main-target-alternative - [ new typed-target $(target-name) : $(project) : NOTFILE_MAIN - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - -IMPORT $(__name__) : notfile : : notfile ; diff --git a/jam-files/boost-build/tools/notfile.py b/jam-files/boost-build/tools/notfile.py deleted file mode 100644 index afbf68fb..00000000 --- a/jam-files/boost-build/tools/notfile.py +++ /dev/null @@ -1,51 +0,0 @@ -# Status: ported. -# Base revision: 64429. -# -# Copyright (c) 2005-2010 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - - -import b2.build.type as type -import b2.build.generators as generators -import b2.build.virtual_target as virtual_target -import b2.build.toolset as toolset -import b2.build.targets as targets - -from b2.manager import get_manager -from b2.util import bjam_signature - -type.register("NOTFILE_MAIN") - -class NotfileGenerator(generators.Generator): - - def run(self, project, name, ps, sources): - pass - action_name = ps.get('action')[0] - if action_name[0] == '@': - action = virtual_target.Action(get_manager(), sources, action_name[1:], ps) - else: - action = virtual_target.Action(get_manager(), sources, "notfile.run", ps) - - return [get_manager().virtual_targets().register( - virtual_target.NotFileTarget(name, project, action))] - -generators.register(NotfileGenerator("notfile.main", False, [], ["NOTFILE_MAIN"])) - -toolset.flags("notfile.run", "ACTION", [], [""]) - -get_manager().engine().register_action("notfile.run", "$(ACTION)") - -@bjam_signature((["target_name"], ["action"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"])) -def notfile(target_name, action, sources, requirements, default_build): - - requirements.append("" + action) - - return targets.create_typed_metatarget(target_name, "NOTFILE_MAIN", sources, requirements, - default_build, []) - - -get_manager().projects().add_rule("notfile", notfile) diff --git a/jam-files/boost-build/tools/package.jam b/jam-files/boost-build/tools/package.jam deleted file mode 100644 index 198c2231..00000000 --- a/jam-files/boost-build/tools/package.jam +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2005 Vladimir Prus. -# Copyright 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Provides mechanism for installing whole packages into a specific directory -# structure. This is opposed to the 'install' rule, that installs a number of -# targets to a single directory, and does not care about directory structure at -# all. - -# Example usage: -# -# package.install boost : -# : -# : -# : -# ; -# -# This will install binaries, libraries and headers to the 'proper' location, -# given by command line options --prefix, --exec-prefix, --bindir, --libdir and -# --includedir. -# -# The rule is just a convenient wrapper, avoiding the need to define several -# 'install' targets. -# -# The only install-related feature is . It will apply to -# headers only and if present, paths of headers relatively to source root will -# be retained after installing. If it is not specified, then "." is assumed, so -# relative paths in headers are always preserved. - -import "class" : new ; -import option ; -import project ; -import feature ; -import property ; -import stage ; -import targets ; -import modules ; - -feature.feature install-default-prefix : : free incidental ; - -rule install ( name package-name ? : requirements * : binaries * : libraries * : headers * ) -{ - package-name ?= $(name) ; - if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ] - { - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of libdir/includir that - # is specified via options in config files. - option.set bindir : ; - option.set libdir : ; - option.set includedir : ; - } - - # If is not specified, all headers are installed to - # prefix/include, no matter what their relative path is. Sometimes that is - # what is needed. - local install-source-root = [ property.select : - $(requirements) ] ; - install-source-root = $(install-source-root:G=) ; - requirements = [ property.change $(requirements) : ] ; - - local install-header-subdir = [ property.select : - $(requirements) ] ; - install-header-subdir = /$(install-header-subdir:G=) ; - install-header-subdir ?= "" ; - requirements = [ property.change $(requirements) : ] - ; - - # First, figure out all locations. Use the default if no prefix option - # given. - local prefix = [ get-prefix $(name) : $(requirements) ] ; - - # Architecture dependent files. - local exec-locate = [ option.get exec-prefix : $(prefix) ] ; - - # Binaries. - local bin-locate = [ option.get bindir : $(prefix)/bin ] ; - - # Object code libraries. - local lib-locate = [ option.get libdir : $(prefix)/lib ] ; - - # Source header files. - local include-locate = [ option.get includedir : $(prefix)/include ] ; - - stage.install $(name)-bin : $(binaries) : $(requirements) - $(bin-locate) ; - alias $(name)-lib : $(name)-lib-shared $(name)-lib-static ; - - # Since the install location of shared libraries differs on universe - # and cygwin, use target alternatives to make different targets. - # We should have used indirection conditioanl requirements, but it's - # awkward to pass bin-locate and lib-locate from there to another rule. - alias $(name)-lib-shared : $(name)-lib-shared-universe ; - alias $(name)-lib-shared : $(name)-lib-shared-cygwin : cygwin ; - - # For shared libraries, we install both explicitly specified one and the - # shared libraries that the installed executables depend on. - stage.install $(name)-lib-shared-universe : $(binaries) $(libraries) : $(requirements) - $(lib-locate) on SHARED_LIB ; - stage.install $(name)-lib-shared-cygwin : $(binaries) $(libraries) : $(requirements) - $(bin-locate) on SHARED_LIB ; - - # For static libraries, we do not care about executable dependencies, since - # static libraries are already incorporated into them. - stage.install $(name)-lib-static : $(libraries) : $(requirements) - $(lib-locate) on STATIC_LIB ; - stage.install $(name)-headers : $(headers) : $(requirements) - $(include-locate)$(install-header-subdir) - $(install-source-root) ; - alias $(name) : $(name)-bin $(name)-lib $(name)-headers ; - - local c = [ project.current ] ; - local project-module = [ $(c).project-module ] ; - module $(project-module) - { - explicit $(1)-bin $(1)-lib $(1)-headers $(1) $(1)-lib-shared $(1)-lib-static - $(1)-lib-shared-universe $(1)-lib-shared-cygwin ; - } -} - -rule install-data ( target-name : package-name : data * : requirements * ) -{ - package-name ?= target-name ; - if [ MATCH --prefix=(.*) : [ modules.peek : ARGV ] ] - { - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of datarootdir - option.set datarootdir : ; - } - - local prefix = [ get-prefix $(package-name) : $(requirements) ] ; - local datadir = [ option.get datarootdir : $(prefix)/share ] ; - - stage.install $(target-name) - : $(data) - : $(requirements) $(datadir)/$(package-name) - ; - - local c = [ project.current ] ; - local project-module = [ $(c).project-module ] ; - module $(project-module) - { - explicit $(1) ; - } -} - -local rule get-prefix ( package-name : requirements * ) -{ - local prefix = [ option.get prefix : [ property.select - : $(requirements) ] ] ; - prefix = $(prefix:G=) ; - requirements = [ property.change $(requirements) : - ] ; - # Or some likely defaults if neither is given. - if ! $(prefix) - { - if [ modules.peek : NT ] { prefix = C:\\$(package-name) ; } - else if [ modules.peek : UNIX ] { prefix = /usr/local ; } - } - return $(prefix) ; -} - diff --git a/jam-files/boost-build/tools/package.py b/jam-files/boost-build/tools/package.py deleted file mode 100644 index aa081b4f..00000000 --- a/jam-files/boost-build/tools/package.py +++ /dev/null @@ -1,168 +0,0 @@ -# Status: ported -# Base revision: 64488 -# -# Copyright (c) 2005, 2010 Vladimir Prus. -# Copyright 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Provides mechanism for installing whole packages into a specific directory -# structure. This is opposed to the 'install' rule, that installs a number of -# targets to a single directory, and does not care about directory structure at -# all. - -# Example usage: -# -# package.install boost : -# : -# : -# : -# ; -# -# This will install binaries, libraries and headers to the 'proper' location, -# given by command line options --prefix, --exec-prefix, --bindir, --libdir and -# --includedir. -# -# The rule is just a convenient wrapper, avoiding the need to define several -# 'install' targets. -# -# The only install-related feature is . It will apply to -# headers only and if present, paths of headers relatively to source root will -# be retained after installing. If it is not specified, then "." is assumed, so -# relative paths in headers are always preserved. - -import b2.build.feature as feature -import b2.build.property as property -import b2.util.option as option -import b2.tools.stage as stage - -from b2.build.alias import alias - -from b2.manager import get_manager - -from b2.util import bjam_signature -from b2.util.utility import ungrist - - -import os - -feature.feature("install-default-prefix", [], ["free", "incidental"]) - -@bjam_signature((["name", "package_name", "?"], ["requirements", "*"], - ["binaries", "*"], ["libraries", "*"], ["headers", "*"])) -def install(name, package_name=None, requirements=[], binaries=[], libraries=[], headers=[]): - - requirements = requirements[:] - binaries = binaries[:] - libraries - - if not package_name: - package_name = name - - if option.get("prefix"): - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of libdir/includir that - # is specified via options in config files. - option.set("bindir", None) - option.set("libdir", None) - option.set("includedir", None) - - # If is not specified, all headers are installed to - # prefix/include, no matter what their relative path is. Sometimes that is - # what is needed. - install_source_root = property.select('install-source-root', requirements) - if install_source_root: - requirements = property.change(requirements, 'install-source-root', None) - - install_header_subdir = property.select('install-header-subdir', requirements) - if install_header_subdir: - install_header_subdir = ungrist(install_header_subdir[0]) - requirements = property.change(requirements, 'install-header-subdir', None) - - # First, figure out all locations. Use the default if no prefix option - # given. - prefix = get_prefix(name, requirements) - - # Architecture dependent files. - exec_locate = option.get("exec-prefix", prefix) - - # Binaries. - bin_locate = option.get("bindir", os.path.join(prefix, "bin")) - - # Object code libraries. - lib_locate = option.get("libdir", os.path.join(prefix, "lib")) - - # Source header files. - include_locate = option.get("includedir", os.path.join(prefix, "include")) - - stage.install(name + "-bin", binaries, requirements + ["" + bin_locate]) - - alias(name + "-lib", [name + "-lib-shared", name + "-lib-static"]) - - # Since the install location of shared libraries differs on universe - # and cygwin, use target alternatives to make different targets. - # We should have used indirection conditioanl requirements, but it's - # awkward to pass bin-locate and lib-locate from there to another rule. - alias(name + "-lib-shared", [name + "-lib-shared-universe"]) - alias(name + "-lib-shared", [name + "-lib-shared-cygwin"], ["cygwin"]) - - # For shared libraries, we install both explicitly specified one and the - # shared libraries that the installed executables depend on. - stage.install(name + "-lib-shared-universe", binaries + libraries, - requirements + ["" + lib_locate, "on", - "SHARED_LIB"]) - stage.install(name + "-lib-shared-cygwin", binaries + libraries, - requirements + ["" + bin_locate, "on", - "SHARED_LIB"]) - - # For static libraries, we do not care about executable dependencies, since - # static libraries are already incorporated into them. - stage.install(name + "-lib-static", libraries, requirements + - ["" + lib_locate, "on", "STATIC_LIB"]) - stage.install(name + "-headers", headers, requirements \ - + ["" + os.path.join(include_locate, s) for s in install_header_subdir] - + install_source_root) - - alias(name, [name + "-bin", name + "-lib", name + "-headers"]) - - pt = get_manager().projects().current() - - for subname in ["bin", "lib", "headers", "lib-shared", "lib-static", "lib-shared-universe", "lib-shared-cygwin"]: - pt.mark_targets_as_explicit([name + "-" + subname]) - -@bjam_signature((["target_name"], ["package_name"], ["data", "*"], ["requirements", "*"])) -def install_data(target_name, package_name, data, requirements): - if not package_name: - package_name = target_name - - if option.get("prefix"): - # If --prefix is explicitly specified on the command line, - # then we need wipe away any settings of datarootdir - option.set("datarootdir", None) - - prefix = get_prefix(package_name, requirements) - datadir = option.get("datarootdir", os.path.join(prefix, "share")) - - stage.install(target_name, data, - requirements + ["" + os.path.join(datadir, package_name)]) - - get_manager().projects().current().mark_targets_as_explicit([target_name]) - -def get_prefix(package_name, requirements): - - specified = property.select("install-default-prefix", requirements) - if specified: - specified = ungrist(specified[0]) - prefix = option.get("prefix", specified) - requirements = property.change(requirements, "install-default-prefix", None) - # Or some likely defaults if neither is given. - if not prefix: - if os.name == "nt": - prefix = "C:\\" + package_name - elif os.name == "posix": - prefix = "/usr/local" - - return prefix - diff --git a/jam-files/boost-build/tools/pathscale.jam b/jam-files/boost-build/tools/pathscale.jam deleted file mode 100644 index 454e3454..00000000 --- a/jam-files/boost-build/tools/pathscale.jam +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2006 Noel Belcourt -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import toolset : flags ; -import feature ; -import type ; -import common ; -import fortran ; - -feature.extend toolset : pathscale ; -toolset.inherit pathscale : unix ; -generators.override pathscale.prebuilt : builtin.prebuilt ; -generators.override pathscale.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.pathscale.com/docs.html - -rule init ( version ? : command * : options * ) -{ - command = [ common.get-invocation-command pathscale : pathCC : $(command) - : /opt/ekopath/bin ] ; - - # Determine the version - local command-string = $(command:J=" ") ; - if $(command) - { - version ?= [ MATCH "^([0-9.]+)" - : [ SHELL "$(command-string) -dumpversion" ] ] ; - } - - local condition = [ common.check-init-parameters pathscale - : version $(version) ] ; - - common.handle-options pathscale : $(condition) : $(command) : $(options) ; - - toolset.flags pathscale.compile.fortran90 OPTIONS $(condition) : - [ feature.get-values : $(options) ] : unchecked ; - - command_c = $(command_c[1--2]) $(command[-1]:B=pathcc) ; - - toolset.flags pathscale CONFIG_C_COMMAND $(condition) : $(command_c) ; - - # fortran support - local f-command = [ common.get-invocation-command pathscale : pathf90 : $(command) ] ; - local command_f = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ; - local command_f90 = $(command_f[1--2]) $(f-command[-1]:B=pathf90) ; - - toolset.flags pathscale CONFIG_F_COMMAND $(condition) : $(command_f) ; - toolset.flags pathscale CONFIG_F90_COMMAND $(condition) : $(command_f90) ; - - # always link lib rt to resolve clock_gettime() - flags pathscale.link FINDLIBS-SA : rt : unchecked ; -} - -# Declare generators -generators.register-c-compiler pathscale.compile.c : C : OBJ : pathscale ; -generators.register-c-compiler pathscale.compile.c++ : CPP : OBJ : pathscale ; -generators.register-fortran-compiler pathscale.compile.fortran : FORTRAN : OBJ : pathscale ; -generators.register-fortran90-compiler pathscale.compile.fortran90 : FORTRAN90 : OBJ : pathscale ; - -# Declare flags and actions for compilation -flags pathscale.compile OPTIONS off : -O0 ; -flags pathscale.compile OPTIONS speed : -O3 ; -flags pathscale.compile OPTIONS space : -Os ; - -flags pathscale.compile OPTIONS off : -noinline ; -flags pathscale.compile OPTIONS on : -inline ; -flags pathscale.compile OPTIONS full : -inline ; - -flags pathscale.compile OPTIONS off : -woffall ; -flags pathscale.compile OPTIONS on : -Wall ; -flags pathscale.compile OPTIONS all : -Wall -pedantic ; -flags pathscale.compile OPTIONS on : -Werror ; - -flags pathscale.compile OPTIONS on : -ggdb ; -flags pathscale.compile OPTIONS on : -pg ; -flags pathscale.compile OPTIONS shared : -fPIC ; -flags pathscale.compile OPTIONS 32 : -m32 ; -flags pathscale.compile OPTIONS 64 : -m64 ; - -flags pathscale.compile USER_OPTIONS ; -flags pathscale.compile.c++ USER_OPTIONS ; -flags pathscale.compile DEFINES ; -flags pathscale.compile INCLUDES ; - -flags pathscale.compile.fortran USER_OPTIONS ; -flags pathscale.compile.fortran90 USER_OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -rule compile.fortran90 ( targets * : sources * : properties * ) -{ - # the space rule inserts spaces between targets and it's necessary - SPACE on $(targets) = " " ; - # Serialize execution of the compile.fortran90 action - # F90 source must be compiled in a particular order so we - # serialize the build as a parallel F90 compile might fail - JAM_SEMAPHORE on $(targets) = pathscale-f90-semaphore ; -} - -actions compile.fortran90 -{ - "$(CONFIG_F90_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -module $(<[1]:D) -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags pathscale.link OPTIONS on : -ggdb -rdynamic ; -# Strip the binary when no debugging is needed -flags pathscale.link OPTIONS off : -g0 ; -flags pathscale.link OPTIONS on : -pg ; -flags pathscale.link USER_OPTIONS ; -flags pathscale.link LINKPATH ; -flags pathscale.link FINDLIBS-ST ; -flags pathscale.link FINDLIBS-SA ; -flags pathscale.link FINDLIBS-SA multi : pthread ; -flags pathscale.link LIBRARIES ; -flags pathscale.link LINK-RUNTIME static : static ; -flags pathscale.link LINK-RUNTIME shared : dynamic ; -flags pathscale.link RPATH ; -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags pathscale.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) $(USER_OPTIONS) -L"$(LINKPATH)" -Wl,$(RPATH_OPTION:E=-R)$(SPACE)-Wl,"$(RPATH)" -o "$(<)" -Wl,-soname$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-SA) -l$(FINDLIBS-ST) -} - -# Declare action for creating static libraries -# "$(CONFIG_COMMAND)" -ar -o "$(<)" "$(>)" -actions piecemeal archive -{ - ar $(ARFLAGS) ru "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/pch.jam b/jam-files/boost-build/tools/pch.jam deleted file mode 100644 index 0c6e98fa..00000000 --- a/jam-files/boost-build/tools/pch.jam +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##### Using Precompiled Headers (Quick Guide) ##### -# -# Make precompiled mypch.hpp: -# -# import pch ; -# -# cpp-pch mypch -# : # sources -# mypch.hpp -# : # requiremnts -# msvc:mypch.cpp -# ; -# -# Add cpp-pch to sources: -# -# exe hello -# : main.cpp hello.cpp mypch -# ; - -import "class" : new ; -import type ; -import feature ; -import generators ; - -type.register PCH : pch ; - -type.register C_PCH : : PCH ; -type.register CPP_PCH : : PCH ; - -# Control precompiled header (PCH) generation. -feature.feature pch : - on - off - : propagated ; - - -feature.feature pch-header : : free dependency ; -feature.feature pch-file : : free dependency ; - -# Base PCH generator. The 'run' method has the logic to prevent this generator -# from being run unless it's being used for a top-level PCH target. -class pch-generator : generator -{ - import property-set ; - - rule action-class ( ) - { - return compile-action ; - } - - rule run ( project name ? : property-set : sources + ) - { - if ! $(name) - { - # Unless this generator is invoked as the top-most generator for a - # main target, fail. This allows using 'H' type as input type for - # this generator, while preventing Boost.Build to try this generator - # when not explicitly asked for. - # - # One bad example is msvc, where pch generator produces both PCH - # target and OBJ target, so if there's any header generated (like by - # bison, or by msidl), we'd try to use pch generator to get OBJ from - # that H, which is completely wrong. By restricting this generator - # only to pch main target, such problem is solved. - } - else - { - local r = [ run-pch $(project) $(name) - : [ $(property-set).add-raw BOOST_BUILD_PCH_ENABLED ] - : $(sources) ] ; - return [ generators.add-usage-requirements $(r) - : BOOST_BUILD_PCH_ENABLED ] ; - } - } - - # This rule must be overridden by the derived classes. - rule run-pch ( project name ? : property-set : sources + ) - { - } -} - - -# NOTE: requirements are empty, default pch generator can be applied when -# pch=off. -generators.register - [ new dummy-generator pch.default-c-pch-generator : : C_PCH ] ; -generators.register - [ new dummy-generator pch.default-cpp-pch-generator : : CPP_PCH ] ; diff --git a/jam-files/boost-build/tools/pch.py b/jam-files/boost-build/tools/pch.py deleted file mode 100644 index 21d3db09..00000000 --- a/jam-files/boost-build/tools/pch.py +++ /dev/null @@ -1,83 +0,0 @@ -# Status: Being ported by Steven Watanabe -# Base revision: 47077 -# -# Copyright (c) 2005 Reece H. Dunn. -# Copyright 2006 Ilya Sokolov -# Copyright (c) 2008 Steven Watanabe -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##### Using Precompiled Headers (Quick Guide) ##### -# -# Make precompiled mypch.hpp: -# -# import pch ; -# -# cpp-pch mypch -# : # sources -# mypch.hpp -# : # requiremnts -# msvc:mypch.cpp -# ; -# -# Add cpp-pch to sources: -# -# exe hello -# : main.cpp hello.cpp mypch -# ; - -from b2.build import type, feature, generators - -type.register('PCH', ['pch']) -type.register('C_PCH', [], 'PCH') -type.register('CPP_PCH', [], 'PCH') - -# Control precompiled header (PCH) generation. -feature.feature('pch', - ['on', 'off'], - ['propagated']) - -feature.feature('pch-header', [], ['free', 'dependency']) -feature.feature('pch-file', [], ['free', 'dependency']) - -class PchGenerator(generators.Generator): - """ - Base PCH generator. The 'run' method has the logic to prevent this generator - from being run unless it's being used for a top-level PCH target. - """ - def action_class(self): - return 'compile-action' - - def run(self, project, name, prop_set, sources): - if not name: - # Unless this generator is invoked as the top-most generator for a - # main target, fail. This allows using 'H' type as input type for - # this generator, while preventing Boost.Build to try this generator - # when not explicitly asked for. - # - # One bad example is msvc, where pch generator produces both PCH - # target and OBJ target, so if there's any header generated (like by - # bison, or by msidl), we'd try to use pch generator to get OBJ from - # that H, which is completely wrong. By restricting this generator - # only to pch main target, such problem is solved. - pass - else: - r = self.run_pch(project, name, - prop_set.add_raw('BOOST_BUILD_PCH_ENABLED'), - sources) - return generators.add_usage_requirements( - r, ['BOOST_BUILD_PCH_ENABLED']) - - # This rule must be overridden by the derived classes. - def run_pch(self, project, name, prop_set, sources): - pass - -#FIXME: dummy-generator in builtins.jam needs to be ported. -# NOTE: requirements are empty, default pch generator can be applied when -# pch=off. -###generators.register( -### [ new dummy-generator pch.default-c-pch-generator : : C_PCH ] ; -###generators.register -### [ new dummy-generator pch.default-cpp-pch-generator : : CPP_PCH ] ; diff --git a/jam-files/boost-build/tools/pgi.jam b/jam-files/boost-build/tools/pgi.jam deleted file mode 100644 index 3a35c644..00000000 --- a/jam-files/boost-build/tools/pgi.jam +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright Noel Belcourt 2007. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import fortran ; -import type ; -import common ; -import gcc ; - -feature.extend toolset : pgi ; -toolset.inherit pgi : unix ; -generators.override pgi.prebuilt : builtin.lib-generator ; -generators.override pgi.searched-lib-generator : searched-lib-generator ; - -# Documentation and toolchain description located -# http://www.pgroup.com/resources/docs.htm - -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters pgi : version $(version) ] ; - - local l_command = [ common.get-invocation-command pgi : pgCC : $(command) ] ; - - common.handle-options pgi : $(condition) : $(l_command) : $(options) ; - - command_c = $(command_c[1--2]) $(l_command[-1]:B=cc) ; - - toolset.flags pgi CONFIG_C_COMMAND $(condition) : $(command_c) ; - - flags pgi.compile DEFINES $(condition) : - [ feature.get-values : $(options) ] : unchecked ; - - # IOV_MAX support - flags pgi.compile DEFINES $(condition) : __need_IOV_MAX : unchecked ; - - # set link flags - flags pgi.link FINDLIBS-ST : [ - feature.get-values : $(options) ] : unchecked ; - - # always link lib rt to resolve clock_gettime() - flags pgi.link FINDLIBS-SA : rt [ - feature.get-values : $(options) ] : unchecked ; - - gcc.init-link-flags pgi gnu $(condition) ; -} - -# Declare generators -generators.register-c-compiler pgi.compile.c : C : OBJ : pgi ; -generators.register-c-compiler pgi.compile.c++ : CPP : OBJ : pgi ; -generators.register-fortran-compiler pgi.compile.fortran : FORTRAN : OBJ : pgi ; - -# Declare flags and actions for compilation -flags pgi.compile OPTIONS : -Kieee ; -flags pgi.compile OPTIONS shared : -fpic -fPIC ; -flags pgi.compile OPTIONS on : -gopt ; -flags pgi.compile OPTIONS on : -xprofile=tcov ; -flags pgi.compile OPTIONS speed : -fast -Mx,8,0x10000000 ; -flags pgi.compile OPTIONS space : -xO2 -xspace ; -# flags pgi.compile OPTIONS multi : -mt ; - -flags pgi.compile OPTIONS off : -Minform=severe ; -flags pgi.compile OPTIONS on : -Minform=warn ; - -flags pgi.compile.c++ OPTIONS off : -INLINE:none ; - -flags pgi.compile OPTIONS ; -flags pgi.compile.c++ OPTIONS ; -flags pgi.compile DEFINES ; -flags pgi.compile INCLUDES ; - -flags pgi.compile.fortran OPTIONS ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.fortran -{ - "$(CONFIG_F_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags pgi.link OPTIONS on : -gopt ; -# Strip the binary when no debugging is needed -flags pgi.link OPTIONS off : -s ; -flags pgi.link OPTIONS on : -xprofile=tcov ; -flags pgi.link OPTIONS ; -flags pgi.link OPTIONS shared : -fpic -fPIC ; -flags pgi.link LINKPATH ; -flags pgi.link FINDLIBS-ST ; -flags pgi.link FINDLIBS-SA ; -flags pgi.link FINDLIBS-SA multi : pthread rt ; -flags pgi.link LIBRARIES ; -flags pgi.link LINK-RUNTIME static : static ; -flags pgi.link LINK-RUNTIME shared : dynamic ; -flags pgi.link RPATH ; - -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags pgi.link RPATH ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -# reddish can only link statically and, somehow, the presence of -Bdynamic on the link line -# marks the executable as a dynamically linked exec even though no dynamic libraries are supplied. -# Yod on redstorm refuses to load an executable that is dynamically linked. -# removing the dynamic link options should get us where we need to be on redstorm. -# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bstatic -l$(FINDLIBS-ST) -Bdynamic -l$(FINDLIBS-SA) -B$(LINK-RUNTIME) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -# "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -shared -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" -Wl,-h -Wl,$(<[1]:D=) "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -actions updated together piecemeal pgi.archive -{ - ar -rc$(ARFLAGS:E=) "$(<)" "$(>)" -} - diff --git a/jam-files/boost-build/tools/python-config.jam b/jam-files/boost-build/tools/python-config.jam deleted file mode 100644 index 40aa825b..00000000 --- a/jam-files/boost-build/tools/python-config.jam +++ /dev/null @@ -1,27 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Python tools and librries. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - for local R in 2.4 2.3 2.2 - { - local python-path = [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\Python\\PythonCore\\$(R)\\InstallPath" ] ; - local python-version = $(R) ; - - if $(python-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using python ":" $(python-version) ":" $(python-path) ; - } - using python : $(python-version) : $(python-path) ; - } - } -} diff --git a/jam-files/boost-build/tools/python.jam b/jam-files/boost-build/tools/python.jam deleted file mode 100644 index 97a9f9a5..00000000 --- a/jam-files/boost-build/tools/python.jam +++ /dev/null @@ -1,1267 +0,0 @@ -# Copyright 2004 Vladimir Prus. -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -# Support for Python and the the Boost.Python library. -# -# This module defines -# -# - a project 'python' with a target 'python' in it, that corresponds to the -# python library -# -# - a main target rule 'python-extension' which can be used to build a python -# extension. -# -# Extensions that use Boost.Python must explicitly link to it. - -import type ; -import testing ; -import generators ; -import project ; -import errors ; -import targets ; -import "class" : new ; -import os ; -import common ; -import toolset ; -import regex ; -import numbers ; -import string ; -import property ; -import sequence ; -import path ; -import feature ; -import set ; -import builtin ; -import version ; - - -# Make this module a project. -project.initialize $(__name__) ; -project python ; - -# Save the project so that if 'init' is called several times we define new -# targets in the python project, not in whatever project we were called by. -.project = [ project.current ] ; - -# Dynamic linker lib. Necessary to specify it explicitly on some platforms. -lib dl ; -# This contains 'openpty' function need by python. Again, on some system need to -# pass this to linker explicitly. -lib util ; -# Python uses pthread symbols. -lib pthread ; -# Extra library needed by phtread on some platforms. -lib rt ; - -# The pythonpath feature specifies additional elements for the PYTHONPATH -# environment variable, set by run-pyd. For example, pythonpath can be used to -# access Python modules that are part of the product being built, but are not -# installed in the development system's default paths. -feature.feature pythonpath : : free optional path ; - -# Initializes the Python toolset. Note that all parameters are optional. -# -# - version -- the version of Python to use. Should be in Major.Minor format, -# for example 2.3. Do not include the subminor version. -# -# - cmd-or-prefix: Preferably, a command that invokes a Python interpreter. -# Alternatively, the installation prefix for Python libraries and includes. If -# empty, will be guessed from the version, the platform's installation -# patterns, and the python executables that can be found in PATH. -# -# - includes: the include path to Python headers. If empty, will be guessed. -# -# - libraries: the path to Python library binaries. If empty, will be guessed. -# On MacOS/Darwin, you can also pass the path of the Python framework. -# -# - condition: if specified, should be a set of properties that are matched -# against the build configuration when Boost.Build selects a Python -# configuration to use. -# -# - extension-suffix: A string to append to the name of extension modules before -# the true filename extension. Ordinarily we would just compute this based on -# the value of the feature. However ubuntu's python-dbg -# package uses the windows convention of appending _d to debug-build extension -# modules. We have no way of detecting ubuntu, or of probing python for the -# "_d" requirement, and if you configure and build python using -# --with-pydebug, you'll be using the standard *nix convention. Defaults to "" -# (or "_d" when targeting windows and is set). -# -# Example usage: -# -# using python : 2.3 ; -# using python : 2.3 : /usr/local/bin/python ; -# -rule init ( version ? : cmd-or-prefix ? : includes * : libraries ? - : condition * : extension-suffix ? ) -{ - project.push-current $(.project) ; - - debug-message Configuring python... ; - for local v in version cmd-or-prefix includes libraries condition - { - if $($(v)) - { - debug-message " user-specified "$(v): \"$($(v))\" ; - } - } - - configure $(version) : $(cmd-or-prefix) : $(includes) : $(libraries) : $(condition) : $(extension-suffix) ; - - project.pop-current ; -} - -# A simpler version of SHELL that grabs stderr as well as stdout, but returns -# nothing if there was an error. -# -local rule shell-cmd ( cmd ) -{ - debug-message running command '$(cmd)" 2>&1"' ; - x = [ SHELL $(cmd)" 2>&1" : exit-status ] ; - if $(x[2]) = 0 - { - return $(x[1]) ; - } - else - { - return ; - } -} - - -# Try to identify Cygwin symlinks. Invoking such a file directly as an NT -# executable from a native Windows build of bjam would be fatal to the bjam -# process. One /can/ invoke them through sh.exe or bash.exe, if you can prove -# that those are not also symlinks. ;-) -# -# If a symlink is found returns non-empty; we try to extract the target of the -# symlink from the file and return that. -# -# Note: 1. only works on NT 2. path is a native path. -local rule is-cygwin-symlink ( path ) -{ - local is-symlink = ; - - # Look for a file with the given path having the S attribute set, as cygwin - # symlinks do. /-C means "do not use thousands separators in file sizes." - local dir-listing = [ shell-cmd "DIR /-C /A:S \""$(path)"\"" ] ; - - if $(dir-listing) - { - # Escape any special regex characters in the base part of the path. - local base-pat = [ regex.escape $(path:D=) : ].[()*+?|\\$^ : \\ ] ; - - # Extract the file's size from the directory listing. - local size-of-system-file = [ MATCH "([0-9]+) "$(base-pat) : $(dir-listing) : 1 ] ; - - # If the file has a reasonably small size, look for the special symlink - # identification text. - if $(size-of-system-file) && [ numbers.less $(size-of-system-file) 1000 ] - { - local link = [ SHELL "FIND /OFF \"!\" \""$(path)"\" 2>&1" ] ; - if $(link[2]) != 0 - { - local nl = " - -" ; - is-symlink = [ MATCH ".*!([^"$(nl)"]*)" : $(link[1]) : 1 ] ; - if $(is-symlink) - { - is-symlink = [ *nix-path-to-native $(is-symlink) ] ; - is-symlink = $(is-symlink:R=$(path:D)) ; - } - - } - } - } - return $(is-symlink) ; -} - - -# Append ext to each member of names that does not contain '.'. -# -local rule default-extension ( names * : ext * ) -{ - local result ; - for local n in $(names) - { - switch $(n) - { - case *.* : result += $(n) ; - case * : result += $(n)$(ext) ; - } - } - return $(result) ; -} - - -# Tries to determine whether invoking "cmd" would actually attempt to launch a -# cygwin symlink. -# -# Note: only works on NT. -# -local rule invokes-cygwin-symlink ( cmd ) -{ - local dirs = $(cmd:D) ; - if ! $(dirs) - { - dirs = . [ os.executable-path ] ; - } - local base = [ default-extension $(cmd:D=) : .exe .cmd .bat ] ; - local paths = [ GLOB $(dirs) : $(base) ] ; - if $(paths) - { - # Make sure we have not run into a Cygwin symlink. Invoking such a file - # as an NT executable would be fatal for the bjam process. - return [ is-cygwin-symlink $(paths[1]) ] ; - } -} - - -local rule debug-message ( message * ) -{ - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO notice: [python-cfg] $(message) ; - } -} - - -# Like W32_GETREG, except prepend HKEY_CURRENT_USER\SOFTWARE and -# HKEY_LOCAL_MACHINE\SOFTWARE to the first argument, returning the first result -# found. Also accounts for the fact that on 64-bit machines, 32-bit software has -# its own area, under SOFTWARE\Wow6432node. -# -local rule software-registry-value ( path : data ? ) -{ - local result ; - for local root in HKEY_CURRENT_USER HKEY_LOCAL_MACHINE - { - for local x64elt in "" Wow6432node\\ # Account for 64-bit windows - { - if ! $(result) - { - result = [ W32_GETREG $(root)\\SOFTWARE\\$(x64elt)$(path) : $(data) ] ; - } - } - - } - return $(result) ; -} - - -.windows-drive-letter-re = ^([A-Za-z]):[\\/](.*) ; -.cygwin-drive-letter-re = ^/cygdrive/([a-z])/(.*) ; - -.working-directory = [ PWD ] ; -.working-drive-letter = [ SUBST $(.working-directory) $(.windows-drive-letter-re) $1 ] ; -.working-drive-letter ?= [ SUBST $(.working-directory) $(.cygwin-drive-letter-re) $1 ] ; - - -local rule windows-to-cygwin-path ( path ) -{ - # If path is rooted with a drive letter, rewrite it using the /cygdrive - # mountpoint. - local p = [ SUBST $(path:T) $(.windows-drive-letter-re) /cygdrive/$1/$2 ] ; - - # Else if path is rooted without a drive letter, use the working directory. - p ?= [ SUBST $(path:T) ^/(.*) /cygdrive/$(.working-drive-letter:L)/$2 ] ; - - # Else return the path unchanged. - return $(p:E=$(path:T)) ; -} - - -# :W only works in Cygwin builds of bjam. This one works on NT builds as well. -# -local rule cygwin-to-windows-path ( path ) -{ - path = $(path:R="") ; # strip any trailing slash - - local drive-letter = [ SUBST $(path) $(.cygwin-drive-letter-re) $1:/$2 ] ; - if $(drive-letter) - { - path = $(drive-letter) ; - } - else if $(path:R=/x) = $(path) # already rooted? - { - # Look for a cygwin mount that includes each head sequence in $(path). - local head = $(path) ; - local tail = "" ; - - while $(head) - { - local root = [ software-registry-value - "Cygnus Solutions\\Cygwin\\mounts v2\\"$(head) : native ] ; - - if $(root) - { - path = $(tail:R=$(root)) ; - head = ; - } - tail = $(tail:R=$(head:D=)) ; - - if $(head) = / - { - head = ; - } - else - { - head = $(head:D) ; - } - } - } - return [ regex.replace $(path:R="") / \\ ] ; -} - - -# Convert a *nix path to native. -# -local rule *nix-path-to-native ( path ) -{ - if [ os.name ] = NT - { - path = [ cygwin-to-windows-path $(path) ] ; - } - return $(path) ; -} - - -# Convert an NT path to native. -# -local rule windows-path-to-native ( path ) -{ - if [ os.name ] = NT - { - return $(path) ; - } - else - { - return [ windows-to-cygwin-path $(path) ] ; - } -} - - -# Return nonempty if path looks like a windows path, i.e. it starts with a drive -# letter or contains backslashes. -# -local rule guess-windows-path ( path ) -{ - return [ SUBST $(path) ($(.windows-drive-letter-re)|.*([\\]).*) $1 ] ; -} - - -local rule path-to-native ( paths * ) -{ - local result ; - - for local p in $(paths) - { - if [ guess-windows-path $(p) ] - { - result += [ windows-path-to-native $(p) ] ; - } - else - { - result += [ *nix-path-to-native $(p:T) ] ; - } - } - return $(result) ; -} - - -# Validate the version string and extract the major/minor part we care about. -# -local rule split-version ( version ) -{ - local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ; - if ! $(major-minor[2]) || $(major-minor[3]) - { - ECHO "Warning: \"using python\" expects a two part (major, minor) version number; got" $(version) instead ; - - # Add a zero to account for the missing digit if necessary. - major-minor += 0 ; - } - - return $(major-minor[1]) $(major-minor[2]) ; -} - - -# Build a list of versions from 3.0 down to 1.5. Because bjam can not enumerate -# registry sub-keys, we have no way of finding a version with a 2-digit minor -# version, e.g. 2.10 -- let us hope that never happens. -# -.version-countdown = ; -for local v in [ numbers.range 15 30 ] -{ - .version-countdown = [ SUBST $(v) (.)(.*) $1.$2 ] $(.version-countdown) ; -} - - -local rule windows-installed-pythons ( version ? ) -{ - version ?= $(.version-countdown) ; - local interpreters ; - - for local v in $(version) - { - local install-path = [ - software-registry-value "Python\\PythonCore\\"$(v)"\\InstallPath" ] ; - - if $(install-path) - { - install-path = [ windows-path-to-native $(install-path) ] ; - debug-message Registry indicates Python $(v) installed at \"$(install-path)\" ; - } - - interpreters += $(:E=python:R=$(install-path)) ; - } - return $(interpreters) ; -} - - -local rule darwin-installed-pythons ( version ? ) -{ - version ?= $(.version-countdown) ; - - local prefix - = [ GLOB /System/Library/Frameworks /Library/Frameworks - : Python.framework ] ; - - return $(prefix)/Versions/$(version)/bin/python ; -} - - -# Assume "python-cmd" invokes a python interpreter and invoke it to extract all -# the information we care about from its "sys" module. Returns void if -# unsuccessful. -# -local rule probe ( python-cmd ) -{ - # Avoid invoking a Cygwin symlink on NT. - local skip-symlink ; - if [ os.name ] = NT - { - skip-symlink = [ invokes-cygwin-symlink $(python-cmd) ] ; - } - - if $(skip-symlink) - { - debug-message -------------------------------------------------------------------- ; - debug-message \"$(python-cmd)\" would attempt to invoke a Cygwin symlink, ; - debug-message causing a bjam built for Windows to hang. ; - debug-message ; - debug-message If you intend to target a Cygwin build of Python, please ; - debug-message replace the path to the link with the path to a real executable ; - debug-message (guessing: \"$(skip-symlink)\") "in" your 'using python' line ; - debug-message "in" user-config.jam or site-config.jam. Do not forget to escape ; - debug-message backslashes ; - debug-message -------------------------------------------------------------------- ; - } - else - { - # Prepare a List of Python format strings and expressions that can be - # used to print the constants we want from the sys module. - - # We do not really want sys.version since that is a complicated string, - # so get the information from sys.version_info instead. - local format = "version=%d.%d" ; - local exprs = "version_info[0]" "version_info[1]" ; - - for local s in $(sys-elements[2-]) - { - format += $(s)=%s ; - exprs += $(s) ; - } - - # Invoke Python and ask it for all those values. - if [ version.check-jam-version 3 1 17 ] || ( [ os.name ] != NT ) - { - # Prior to version 3.1.17 Boost Jam's SHELL command did not support - # quoted commands correctly on Windows. This means that on that - # platform we do not support using a Python command interpreter - # executable whose path contains a space character. - python-cmd = \"$(python-cmd)\" ; - } - local full-cmd = - $(python-cmd)" -c \"from sys import *; print('"$(format:J=\\n)"' % ("$(exprs:J=,)"))\"" ; - - local output = [ shell-cmd $(full-cmd) ] ; - if $(output) - { - # Parse the output to get all the results. - local nl = " - -" ; - for s in $(sys-elements) - { - # These variables are expected to be declared local in the - # caller, so Jam's dynamic scoping will set their values there. - sys.$(s) = [ SUBST $(output) \\<$(s)=([^$(nl)]+) $1 ] ; - } - } - return $(output) ; - } -} - - -# Make sure the "libraries" and "includes" variables (in an enclosing scope) -# have a value based on the information given. -# -local rule compute-default-paths ( target-os : version ? : prefix ? : - exec-prefix ? ) -{ - exec-prefix ?= $(prefix) ; - - if $(target-os) = windows - { - # The exec_prefix is where you're supposed to look for machine-specific - # libraries. - local default-library-path = $(exec-prefix)\\libs ; - local default-include-path = $(:E=Include:R=$(prefix)) ; - - # If the interpreter was found in a directory called "PCBuild" or - # "PCBuild8," assume we're looking at a Python built from the source - # distro, and go up one additional level to the default root. Otherwise, - # the default root is the directory where the interpreter was found. - - # We ask Python itself what the executable path is in case of - # intermediate symlinks or shell scripts. - local executable-dir = $(sys.executable:D) ; - - if [ MATCH ^(PCBuild) : $(executable-dir:D=) ] - { - debug-message "This Python appears to reside in a source distribution;" ; - debug-message "prepending \""$(executable-dir)"\" to default library search path" ; - - default-library-path = $(executable-dir) $(default-library-path) ; - - default-include-path = $(:E=PC:R=$(executable-dir:D)) $(default-include-path) ; - - debug-message "and \""$(default-include-path[1])"\" to default #include path" ; - } - - libraries ?= $(default-library-path) ; - includes ?= $(default-include-path) ; - } - else - { - includes ?= $(prefix)/include/python$(version) ; - - local lib = $(exec-prefix)/lib ; - libraries ?= $(lib)/python$(version)/config $(lib) ; - } -} - -# The version of the python interpreter to use. -feature.feature python : : propagated ; -feature.feature python.interpreter : : free ; - -toolset.flags python.capture-output PYTHON : ; - -# -# Support for Python configured --with-pydebug -# -feature.feature python-debugging : off on : propagated ; -builtin.variant debug-python : debug : on ; - - -# Return a list of candidate commands to try when looking for a Python -# interpreter. prefix is expected to be a native path. -# -local rule candidate-interpreters ( version ? : prefix ? : target-os ) -{ - local bin-path = bin ; - if $(target-os) = windows - { - # On Windows, look in the root directory itself and, to work with the - # result of a build-from-source, the PCBuild directory. - bin-path = PCBuild8 PCBuild "" ; - } - - bin-path = $(bin-path:R=$(prefix)) ; - - if $(target-os) in windows darwin - { - return # Search: - $(:E=python:R=$(bin-path)) # Relative to the prefix, if any - python # In the PATH - [ $(target-os)-installed-pythons $(version) ] # Standard install locations - ; - } - else - { - # Search relative to the prefix, or if none supplied, in PATH. - local unversioned = $(:E=python:R=$(bin-path:E=)) ; - - # If a version was specified, look for a python with that specific - # version appended before looking for one called, simply, "python" - return $(unversioned)$(version) $(unversioned) ; - } -} - - -# Compute system library dependencies for targets linking with static Python -# libraries. -# -# On many systems, Python uses libraries such as pthreads or libdl. Since static -# libraries carry no library dependency information of their own that the linker -# can extract, these extra dependencies have to be given explicitly on the link -# line of the client. The information about these dependencies is packaged into -# the "python" target below. -# -# Even where Python itself uses pthreads, it never allows extension modules to -# be entered concurrently (unless they explicitly give up the interpreter lock). -# Therefore, extension modules do not need the efficiency overhead of threadsafe -# code as produced by multi, and we handle libpthread along with -# other libraries here. Note: this optimization is based on an assumption that -# the compiler generates link-compatible code in both the single- and -# multi-threaded cases, and that system libraries do not change their ABIs -# either. -# -# Returns a list of usage-requirements that link to the necessary system -# libraries. -# -local rule system-library-dependencies ( target-os ) -{ - switch $(target-os) - { - case s[uo][nl]* : # solaris, sun, sunos - # Add a librt dependency for the gcc toolset on SunOS (the sun - # toolset adds -lrt unconditionally). While this appears to - # duplicate the logic already in gcc.jam, it does not as long as - # we are not forcing multi. - - # On solaris 10, distutils.sysconfig.get_config_var('LIBS') yields - # '-lresolv -lsocket -lnsl -lrt -ldl'. However, that does not seem - # to be the right list for extension modules. For example, on my - # installation, adding -ldl causes at least one test to fail because - # the library can not be found and removing it causes no failures. - - # Apparently, though, we need to add -lrt for gcc. - return gcc:rt ; - - case osf : return pthread gcc:rt ; - - case qnx* : return ; - case darwin : return ; - case windows : return ; - - case hpux : return rt ; - case *bsd : return pthread gcc:util ; - - case aix : return pthread dl ; - - case * : return pthread dl - gcc:util linux:util ; - } -} - - -# Declare a target to represent Python's library. -# -local rule declare-libpython-target ( version ? : requirements * ) -{ - # Compute the representation of Python version in the name of Python's - # library file. - local lib-version = $(version) ; - if windows in $(requirements) - { - local major-minor = [ split-version $(version) ] ; - lib-version = $(major-minor:J="") ; - if on in $(requirements) - { - lib-version = $(lib-version)_d ; - } - } - - if ! $(lib-version) - { - ECHO *** warning: could not determine Python version, which will ; - ECHO *** warning: probably prevent us from linking with the python ; - ECHO *** warning: library. Consider explicitly passing the version ; - ECHO *** warning: to 'using python'. ; - } - - # Declare it. - lib python.lib : : python$(lib-version) $(requirements) ; -} - - -# Implementation of init. -local rule configure ( version ? : cmd-or-prefix ? : includes * : libraries ? : - condition * : extension-suffix ? ) -{ - local prefix ; - local exec-prefix ; - local cmds-to-try ; - local interpreter-cmd ; - - local target-os = [ feature.get-values target-os : $(condition) ] ; - target-os ?= [ feature.defaults target-os ] ; - target-os = $(target-os:G=) ; - - if $(target-os) = windows && on in $(condition) - { - extension-suffix ?= _d ; - } - extension-suffix ?= "" ; - - # Normalize and dissect any version number. - local major-minor ; - if $(version) - { - major-minor = [ split-version $(version) ] ; - version = $(major-minor:J=.) ; - } - - local cmds-to-try ; - - if ! $(cmd-or-prefix) || [ GLOB $(cmd-or-prefix) : * ] - { - # If the user did not pass a command, whatever we got was a prefix. - prefix = $(cmd-or-prefix) ; - cmds-to-try = [ candidate-interpreters $(version) : $(prefix) : $(target-os) ] ; - } - else - { - # Work with the command the user gave us. - cmds-to-try = $(cmd-or-prefix) ; - - # On Windows, do not nail down the interpreter command just yet in case - # the user specified something that turns out to be a cygwin symlink, - # which could bring down bjam if we invoke it. - if $(target-os) != windows - { - interpreter-cmd = $(cmd-or-prefix) ; - } - } - - # Values to use in case we can not really find anything in the system. - local fallback-cmd = $(cmds-to-try[1]) ; - local fallback-version ; - - # Anything left to find or check? - if ! ( $(interpreter-cmd) && $(includes) && $(libraries) ) - { - # Values to be extracted from python's sys module. These will be set by - # the probe rule, above, using Jam's dynamic scoping. - local sys-elements = version platform prefix exec_prefix executable ; - local sys.$(sys-elements) ; - - # Compute the string Python's sys.platform needs to match. If not - # targeting Windows or cygwin we will assume only native builds can - # possibly run, so we will not require a match and we leave sys.platform - # blank. - local platform ; - switch $(target-os) - { - case windows : platform = win32 ; - case cygwin : platform = cygwin ; - } - - while $(cmds-to-try) - { - # Pop top command. - local cmd = $(cmds-to-try[1]) ; - cmds-to-try = $(cmds-to-try[2-]) ; - - debug-message Checking interpreter command \"$(cmd)\"... ; - if [ probe $(cmd) ] - { - fallback-version ?= $(sys.version) ; - - # Check for version/platform validity. - for local x in version platform - { - if $($(x)) && $($(x)) != $(sys.$(x)) - { - debug-message ...$(x) "mismatch (looking for" - $($(x)) but found $(sys.$(x))")" ; - cmd = ; - } - } - - if $(cmd) - { - debug-message ...requested configuration matched! ; - - exec-prefix = $(sys.exec_prefix) ; - - compute-default-paths $(target-os) : $(sys.version) : - $(sys.prefix) : $(sys.exec_prefix) ; - - version = $(sys.version) ; - interpreter-cmd ?= $(cmd) ; - cmds-to-try = ; # All done. - } - } - else - { - debug-message ...does not invoke a working interpreter ; - } - } - } - - # Anything left to compute? - if $(includes) && $(libraries) - { - .configured = true ; - } - else - { - version ?= $(fallback-version) ; - version ?= 2.5 ; - exec-prefix ?= $(prefix) ; - compute-default-paths $(target-os) : $(version) : $(prefix:E=) ; - } - - if ! $(interpreter-cmd) - { - fallback-cmd ?= python ; - debug-message No working Python interpreter found. ; - if [ os.name ] != NT || ! [ invokes-cygwin-symlink $(fallback-cmd) ] - { - interpreter-cmd = $(fallback-cmd) ; - debug-message falling back to \"$(interpreter-cmd)\" ; - } - } - - includes = [ path-to-native $(includes) ] ; - libraries = [ path-to-native $(libraries) ] ; - - debug-message "Details of this Python configuration:" ; - debug-message " interpreter command:" \"$(interpreter-cmd:E=)\" ; - debug-message " include path:" \"$(includes:E=)\" ; - debug-message " library path:" \"$(libraries:E=)\" ; - if $(target-os) = windows - { - debug-message " DLL search path:" \"$(exec-prefix:E=)\" ; - } - - # - # End autoconfiguration sequence. - # - local target-requirements = $(condition) ; - - # Add the version, if any, to the target requirements. - if $(version) - { - if ! $(version) in [ feature.values python ] - { - feature.extend python : $(version) ; - } - target-requirements += $(version:E=default) ; - } - - target-requirements += $(target-os) ; - - # See if we can find a framework directory on darwin. - local framework-directory ; - if $(target-os) = darwin - { - # Search upward for the framework directory. - local framework-directory = $(libraries[-1]) ; - while $(framework-directory:D=) && $(framework-directory:D=) != Python.framework - { - framework-directory = $(framework-directory:D) ; - } - - if $(framework-directory:D=) = Python.framework - { - debug-message framework directory is \"$(framework-directory)\" ; - } - else - { - debug-message "no framework directory found; using library path" ; - framework-directory = ; - } - } - - local dll-path = $(libraries) ; - - # Make sure that we can find the Python DLL on Windows. - if ( $(target-os) = windows ) && $(exec-prefix) - { - dll-path += $(exec-prefix) ; - } - - # - # Prepare usage requirements. - # - local usage-requirements = [ system-library-dependencies $(target-os) ] ; - usage-requirements += $(includes) $(interpreter-cmd) ; - if on in $(condition) - { - if $(target-os) = windows - { - # In pyconfig.h, Py_DEBUG is set if _DEBUG is set. If we define - # Py_DEBUG we will get multiple definition warnings. - usage-requirements += _DEBUG ; - } - else - { - usage-requirements += Py_DEBUG ; - } - } - - # Global, but conditional, requirements to give access to the interpreter - # for general utilities, like other toolsets, that run Python scripts. - toolset.add-requirements - $(target-requirements:J=,):$(interpreter-cmd) ; - - # Register the right suffix for extensions. - register-extension-suffix $(extension-suffix) : $(target-requirements) ; - - # - # Declare the "python" target. This should really be called - # python_for_embedding. - # - - if $(framework-directory) - { - alias python - : - : $(target-requirements) - : - : $(usage-requirements) $(framework-directory) - ; - } - else - { - declare-libpython-target $(version) : $(target-requirements) ; - - # This is an evil hack. On, Windows, when Python is embedded, nothing - # seems to set up sys.path to include Python's standard library - # (http://article.gmane.org/gmane.comp.python.general/544986). The evil - # here, aside from the workaround necessitated by Python's bug, is that: - # - # a. we're guessing the location of the python standard library from the - # location of pythonXX.lib - # - # b. we're hijacking the property to get the - # environment variable set up, and the user may want to use it for - # something else (e.g. launch the debugger). - local set-PYTHONPATH ; - if $(target-os) = windows - { - set-PYTHONPATH = [ common.prepend-path-variable-command PYTHONPATH : - $(libraries:D)/Lib ] ; - } - - alias python - : - : $(target-requirements) - : - # Why python.lib must be listed here instead of along with the - # system libs is a mystery, but if we do not do it, on cygwin, - # -lpythonX.Y never appears in the command line (although it does on - # linux). - : $(usage-requirements) - $(set-PYTHONPATH) - $(libraries) python.lib - ; - } - - # On *nix, we do not want to link either Boost.Python or Python extensions - # to libpython, because the Python interpreter itself provides all those - # symbols. If we linked to libpython, we would get duplicate symbols. So - # declare two targets -- one for building extensions and another for - # embedding. - # - # Unlike most *nix systems, Mac OS X's linker does not permit undefined - # symbols when linking a shared library. So, we still need to link against - # the Python framework, even when building extensions. Note that framework - # builds of Python always use shared libraries, so we do not need to worry - # about duplicate Python symbols. - if $(target-os) in windows cygwin darwin - { - alias python_for_extensions : python : $(target-requirements) ; - } - # On AIX we need Python extensions and Boost.Python to import symbols from - # the Python interpreter. Dynamic libraries opened with dlopen() do not - # inherit the symbols from the Python interpreter. - else if $(target-os) = aix - { - alias python_for_extensions - : - : $(target-requirements) - : - : $(usage-requirements) -Wl,-bI:$(libraries[1])/python.exp - ; - } - else - { - alias python_for_extensions - : - : $(target-requirements) - : - : $(usage-requirements) - ; - } -} - - -rule configured ( ) -{ - return $(.configured) ; -} - - -type.register PYTHON_EXTENSION : : SHARED_LIB ; - - -local rule register-extension-suffix ( root : condition * ) -{ - local suffix ; - - switch [ feature.get-values target-os : $(condition) ] - { - case windows : suffix = pyd ; - case cygwin : suffix = dll ; - case hpux : - { - if [ feature.get-values python : $(condition) ] in 1.5 1.6 2.0 2.1 2.2 2.3 2.4 - { - suffix = sl ; - } - else - { - suffix = so ; - } - } - case * : suffix = so ; - } - - type.set-generated-target-suffix PYTHON_EXTENSION : $(condition) : <$(root).$(suffix)> ; -} - - -# Unset 'lib' prefix for PYTHON_EXTENSION -type.set-generated-target-prefix PYTHON_EXTENSION : : "" ; - - -rule python-extension ( name : sources * : requirements * : default-build * : - usage-requirements * ) -{ - if [ configured ] - { - requirements += /python//python_for_extensions ; - } - requirements += true ; - - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(name) : $(project) : PYTHON_EXTENSION - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - -IMPORT python : python-extension : : python-extension ; - -rule py2to3 -{ - common.copy $(>) $(<) ; - 2to3 $(<) ; -} - -actions 2to3 -{ - 2to3 -wn "$(<)" - 2to3 -dwn "$(<)" -} - - -# Support for testing. -type.register PY : py ; -type.register RUN_PYD_OUTPUT ; -type.register RUN_PYD : : TEST ; - - -class python-test-generator : generator -{ - import set ; - - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - self.composing = true ; - } - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - local pyversion = [ $(property-set).get ] ; - local python ; - local other-pythons ; - - # Make new target that converting Python source by 2to3 when running with Python 3. - local rule make-2to3-source ( source ) - { - if $(pyversion) >= 3.0 - { - local a = [ new action $(source) : python.py2to3 : $(property-set) ] ; - local t = [ utility.basename [ $(s).name ] ] ; - local p = [ new file-target $(t) : PY : $(project) : $(a) ] ; - return $(p) ; - } - else - { - return $(source) ; - } - } - - for local s in $(sources) - { - if [ $(s).type ] = PY - { - if ! $(python) - { - # First Python source ends up on command line. - python = [ make-2to3-source $(s) ] ; - - } - else - { - # Other Python sources become dependencies. - other-pythons += [ make-2to3-source $(s) ] ; - } - } - } - - local extensions ; - for local s in $(sources) - { - if [ $(s).type ] = PYTHON_EXTENSION - { - extensions += $(s) ; - } - } - - local libs ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] LIB ] - && ! $(s) in $(extensions) - { - libs += $(s) ; - } - } - - local new-sources ; - for local s in $(sources) - { - if [ type.is-derived [ $(s).type ] CPP ] - { - local name = [ utility.basename [ $(s).name ] ] ; - if $(name) = [ utility.basename [ $(python).name ] ] - { - name = $(name)_ext ; - } - local extension = [ generators.construct $(project) $(name) : - PYTHON_EXTENSION : $(property-set) : $(s) $(libs) ] ; - - # The important part of usage requirements returned from - # PYTHON_EXTENSION generator are xdll-path properties that will - # allow us to find the python extension at runtime. - property-set = [ $(property-set).add $(extension[1]) ] ; - - # Ignore usage requirements. We're a top-level generator and - # nobody is going to use what we generate. - new-sources += $(extension[2-]) ; - } - } - - property-set = [ $(property-set).add-raw $(other-pythons) ] ; - - result = [ construct-result $(python) $(extensions) $(new-sources) : - $(project) $(name) : $(property-set) ] ; - } -} - - -generators.register - [ new python-test-generator python.capture-output : : RUN_PYD_OUTPUT ] ; - -generators.register-standard testing.expect-success - : RUN_PYD_OUTPUT : RUN_PYD ; - - -# There are two different ways of spelling OS names. One is used for [ os.name ] -# and the other is used for the and properties. Until that -# is remedied, this sets up a crude mapping from the latter to the former, that -# will work *for the purposes of cygwin/NT cross-builds only*. Could not think -# of a better name than "translate". -# -.translate-os-windows = NT ; -.translate-os-cygwin = CYGWIN ; -local rule translate-os ( src-os ) -{ - local x = $(.translate-os-$(src-os)) [ os.name ] ; - return $(x[1]) ; -} - - -# Extract the path to a single ".pyd" source. This is used to build the -# PYTHONPATH for running bpl tests. -# -local rule pyd-pythonpath ( source ) -{ - return [ on $(source) return $(LOCATE) $(SEARCH) ] ; -} - - -# The flag settings on testing.capture-output do not apply to python.capture -# output at the moment. Redo this explicitly. -toolset.flags python.capture-output ARGS ; - - -rule capture-output ( target : sources * : properties * ) -{ - # Setup up a proper DLL search path. Here, $(sources[1]) is a python module - # and $(sources[2]) is a DLL. Only $(sources[1]) is passed to - # testing.capture-output, so RUN_PATH variable on $(sources[2]) is not - # consulted. Move it over explicitly. - RUN_PATH on $(sources[1]) = [ on $(sources[2-]) return $(RUN_PATH) ] ; - - PYTHONPATH = [ sequence.transform pyd-pythonpath : $(sources[2-]) ] ; - PYTHONPATH += [ feature.get-values pythonpath : $(properties) ] ; - - # After test is run, we remove the Python module, but not the Python script. - testing.capture-output $(target) : $(sources[1]) : $(properties) : - $(sources[2-]) ; - - # PYTHONPATH is different; it will be interpreted by whichever Python is - # invoked and so must follow path rules for the target os. The only OSes - # where we can run python for other OSes currently are NT and CYGWIN so we - # only need to handle those cases. - local target-os = [ feature.get-values target-os : $(properties) ] ; - # Oddly, host-os is not in properties, so grab the default value. - local host-os = [ feature.defaults host-os ] ; - host-os = $(host-os:G=) ; - if $(target-os) != $(host-os) - { - PYTHONPATH = [ sequence.transform $(host-os)-to-$(target-os)-path : - $(PYTHONPATH) ] ; - } - local path-separator = [ os.path-separator [ translate-os $(target-os) ] ] ; - local set-PYTHONPATH = [ common.variable-setting-command PYTHONPATH : - $(PYTHONPATH:J=$(path-separator)) ] ; - LAUNCHER on $(target) = $(set-PYTHONPATH) [ on $(target) return \"$(PYTHON)\" ] ; -} - - -rule bpl-test ( name : sources * : requirements * ) -{ - local s ; - sources ?= $(name).py $(name).cpp ; - return [ testing.make-test run-pyd : $(sources) /boost/python//boost_python - : $(requirements) : $(name) ] ; -} - - -IMPORT $(__name__) : bpl-test : : bpl-test ; diff --git a/jam-files/boost-build/tools/qcc.jam b/jam-files/boost-build/tools/qcc.jam deleted file mode 100644 index 4f2a4fc1..00000000 --- a/jam-files/boost-build/tools/qcc.jam +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright (c) 2001 David Abrahams. -# Copyright (c) 2002-2003 Rene Rivera. -# Copyright (c) 2002-2003 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import "class" : new ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import property ; -import set ; -import toolset ; -import type ; -import unix ; - -feature.extend toolset : qcc ; - -toolset.inherit-generators qcc : unix : unix.link unix.link.dll ; -generators.override builtin.lib-generator : qcc.prebuilt ; -toolset.inherit-flags qcc : unix ; -toolset.inherit-rules qcc : unix ; - -# Initializes the qcc toolset for the given version. If necessary, command may -# be used to specify where the compiler is located. The parameter 'options' is a -# space-delimited list of options, each one being specified as -# option-value. Valid option names are: cxxflags, linkflags and -# linker-type. Accepted values for linker-type are gnu and sun, gnu being the -# default. -# -# Example: -# using qcc : 3.4 : : foo bar sun ; -# -rule init ( version ? : command * : options * ) -{ - local condition = [ common.check-init-parameters qcc : version $(version) ] ; - local command = [ common.get-invocation-command qcc : QCC : $(command) ] ; - common.handle-options qcc : $(condition) : $(command) : $(options) ; -} - - -generators.register-c-compiler qcc.compile.c++ : CPP : OBJ : qcc ; -generators.register-c-compiler qcc.compile.c : C : OBJ : qcc ; -generators.register-c-compiler qcc.compile.asm : ASM : OBJ : qcc ; - - -# Declare flags for compilation. -toolset.flags qcc.compile OPTIONS on : -gstabs+ ; - -# Declare flags and action for compilation. -toolset.flags qcc.compile OPTIONS off : -O0 ; -toolset.flags qcc.compile OPTIONS speed : -O3 ; -toolset.flags qcc.compile OPTIONS space : -Os ; - -toolset.flags qcc.compile OPTIONS off : -Wc,-fno-inline ; -toolset.flags qcc.compile OPTIONS on : -Wc,-Wno-inline ; -toolset.flags qcc.compile OPTIONS full : -Wc,-finline-functions -Wc,-Wno-inline ; - -toolset.flags qcc.compile OPTIONS off : -w ; -toolset.flags qcc.compile OPTIONS all : -Wc,-Wall ; -toolset.flags qcc.compile OPTIONS on : -Wc,-Werror ; - -toolset.flags qcc.compile OPTIONS on : -p ; - -toolset.flags qcc.compile OPTIONS ; -toolset.flags qcc.compile.c++ OPTIONS ; -toolset.flags qcc.compile DEFINES ; -toolset.flags qcc.compile INCLUDES ; - -toolset.flags qcc.compile OPTIONS shared : -shared ; - -toolset.flags qcc.compile.c++ TEMPLATE_DEPTH ; - - -rule compile.c++ -{ - # Here we want to raise the template-depth parameter value to something - # higher than the default value of 17. Note that we could do this using the - # feature.set-default rule but we do not want to set the default value for - # all toolsets as well. - # - # TODO: This 'modified default' has been inherited from some 'older Boost - # Build implementation' and has most likely been added to make some Boost - # library parts compile correctly. We should see what exactly prompted this - # and whether we can get around the problem more locally. - local template-depth = [ on $(1) return $(TEMPLATE_DEPTH) ] ; - if ! $(template-depth) - { - TEMPLATE_DEPTH on $(1) = 128 ; - } -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" -Wc,-ftemplate-depth-$(TEMPLATE_DEPTH) $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.asm -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - - -# The class checking that we do not try to use the static property -# while creating or using a shared library, since it is not supported by qcc/ -# /libc. -# -class qcc-linking-generator : unix-linking-generator -{ - rule generated-targets ( sources + : property-set : project name ? ) - { - if static in [ $(property-set).raw ] - { - local m ; - if [ id ] = "qcc.link.dll" - { - m = "on qcc, DLL can't be build with static" ; - } - if ! $(m) - { - for local s in $(sources) - { - local type = [ $(s).type ] ; - if $(type) && [ type.is-derived $(type) SHARED_LIB ] - { - m = "on qcc, using DLLS together with the static options is not possible " ; - } - } - } - if $(m) - { - errors.user-error $(m) : "It is suggested to use" - "static together with static." ; - } - } - - return [ unix-linking-generator.generated-targets - $(sources) : $(property-set) : $(project) $(name) ] ; - } -} - -generators.register [ new qcc-linking-generator qcc.link : LIB OBJ : EXE - : qcc ] ; - -generators.register [ new qcc-linking-generator qcc.link.dll : LIB OBJ - : SHARED_LIB : qcc ] ; - -generators.override qcc.prebuilt : builtin.prebuilt ; -generators.override qcc.searched-lib-generator : searched-lib-generator ; - - -# Declare flags for linking. -# First, the common flags. -toolset.flags qcc.link OPTIONS on : -gstabs+ ; -toolset.flags qcc.link OPTIONS on : -p ; -toolset.flags qcc.link OPTIONS ; -toolset.flags qcc.link LINKPATH ; -toolset.flags qcc.link FINDLIBS-ST ; -toolset.flags qcc.link FINDLIBS-SA ; -toolset.flags qcc.link LIBRARIES ; - -toolset.flags qcc.link FINDLIBS-SA : m ; - -# For static we made sure there are no dynamic libraries in the -# link. -toolset.flags qcc.link OPTIONS static : -static ; - -# Assuming this is just like with gcc. -toolset.flags qcc.link RPATH : : unchecked ; -toolset.flags qcc.link RPATH_LINK : : unchecked ; - - -# Declare actions for linking. -# -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - # Serialize execution of the 'link' action, since running N links in - # parallel is just slower. For now, serialize only qcc links while it might - # be a good idea to serialize all links. - JAM_SEMAPHORE on $(targets) = qcc-link-semaphore ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -Wl,-rpath-link$(SPACE)-Wl,"$(RPATH_LINK)" -o "$(<)" "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS) -} - - -# Always remove archive and start again. Here is the rationale from Andre Hentz: -# I had a file, say a1.c, that was included into liba.a. I moved a1.c to a2.c, -# updated my Jamfiles and rebuilt. My program was crashing with absurd errors. -# After some debugging I traced it back to the fact that a1.o was *still* in -# liba.a -RM = [ common.rm-command ] ; -if [ os.name ] = NT -{ - RM = "if exist \"$(<[1])\" DEL \"$(<[1])\"" ; -} - - -# Declare action for creating static libraries. The 'r' letter means to add -# files to the archive with replacement. Since we remove the archive, we do not -# care about replacement, but there is no option to "add without replacement". -# The 'c' letter suppresses warnings in case the archive does not exists yet. -# That warning is produced only on some platforms, for whatever reasons. -# -actions piecemeal archive -{ - $(RM) "$(<)" - ar rc "$(<)" "$(>)" -} - - -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; - JAM_SEMAPHORE on $(targets) = qcc-link-semaphore ; -} - - -# Differ from 'link' above only by -shared. -# -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" -L"$(LINKPATH)" -Wl,-R$(SPACE)-Wl,"$(RPATH)" -o "$(<)" $(HAVE_SONAME)-Wl,-h$(SPACE)-Wl,$(<[1]:D=) -shared "$(>)" "$(LIBRARIES)" -l$(FINDLIBS-ST) -l$(FINDLIBS-SA) $(OPTIONS) -} diff --git a/jam-files/boost-build/tools/qt.jam b/jam-files/boost-build/tools/qt.jam deleted file mode 100644 index 8aa7ca26..00000000 --- a/jam-files/boost-build/tools/qt.jam +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2006 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Forwarning toolset file to Qt GUI library. Forwards to the toolset file -# for the current version of Qt. - -import qt4 ; - -rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * ) -{ - qt4.init $(prefix) : $(full_bin) : $(full_inc) : $(full_lib) : $(version) : $(condition) ; -} - - diff --git a/jam-files/boost-build/tools/qt3.jam b/jam-files/boost-build/tools/qt3.jam deleted file mode 100644 index f82cf0ac..00000000 --- a/jam-files/boost-build/tools/qt3.jam +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Support for the Qt GUI library version 3 -# (http://www.trolltech.com/products/qt3/index.html). -# For new developments, it is recommended to use Qt4 via the qt4 Boost.Build -# module. - -import modules ; -import feature ; -import errors ; -import type ; -import "class" : new ; -import generators ; -import project ; -import toolset : flags ; - -# Convert this module into a project, so that we can declare targets here. -project.initialize $(__name__) ; -project qt3 ; - - -# Initialized the QT support module. The 'prefix' parameter tells where QT is -# installed. When not given, environmental variable QTDIR should be set. -# -rule init ( prefix ? ) -{ - if ! $(prefix) - { - prefix = [ modules.peek : QTDIR ] ; - if ! $(prefix) - { - errors.error - "QT installation prefix not given and QTDIR variable is empty" ; - } - } - - if $(.initialized) - { - if $(prefix) != $(.prefix) - { - errors.error - "Attempt the reinitialize QT with different installation prefix" ; - } - } - else - { - .initialized = true ; - .prefix = $(prefix) ; - - generators.register-standard qt3.moc : H : CPP(moc_%) : qt3 ; - # Note: the OBJ target type here is fake, take a look at - # qt4.jam/uic-h-generator for explanations that apply in this case as - # well. - generators.register [ new moc-h-generator-qt3 - qt3.moc.cpp : MOCCABLE_CPP : OBJ : qt3 ] ; - - # The UI type is defined in types/qt.jam, and UIC_H is only used in - # qt.jam, but not in qt4.jam, so define it here. - type.register UIC_H : : H ; - - generators.register-standard qt3.uic-h : UI : UIC_H : qt3 ; - - # The following generator is used to convert UI files to CPP. It creates - # UIC_H from UI, and constructs CPP from UI/UIC_H. In addition, it also - # returns UIC_H target, so that it can be mocced. - class qt::uic-cpp-generator : generator - { - rule __init__ ( ) - { - generator.__init__ qt3.uic-cpp : UI UIC_H : CPP : qt3 ; - } - - rule run ( project name ? : properties * : sources + ) - { - # Consider this: - # obj test : test_a.cpp : off ; - # - # This generator will somehow be called in this case, and, - # will fail -- which is okay. However, if there are - # properties they will be converted to sources, so the size of - # 'sources' will be more than 1. In this case, the base generator - # will just crash -- and that's not good. Just use a quick test - # here. - - local result ; - if ! $(sources[2]) - { - # Construct CPP as usual - result = [ generator.run $(project) $(name) - : $(properties) : $(sources) ] ; - - # If OK, process UIC_H with moc. It's pretty clear that - # the object generated with UIC will have Q_OBJECT macro. - if $(result) - { - local action = [ $(result[1]).action ] ; - local sources = [ $(action).sources ] ; - local mocced = [ generators.construct $(project) $(name) - : CPP : $(properties) : $(sources[2]) ] ; - result += $(mocced[2-]) ; - } - } - - return $(result) ; - } - } - - generators.register [ new qt::uic-cpp-generator ] ; - - # Finally, declare prebuilt target for QT library. - local usage-requirements = - $(.prefix)/include - $(.prefix)/lib - $(.prefix)/lib - qt3 - ; - lib qt : : qt-mt multi : : $(usage-requirements) ; - lib qt : : qt single : : $(usage-requirements) ; - } -} - -class moc-h-generator-qt3 : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP - { - name = [ $(sources[1]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt3.moc.cpp : - $(property-set) ] ; - - local target = [ - new file-target $(name) : MOC : $(project) : $(a) ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that the dependency from - # sources to this generated header is detected -- if Jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } - } -} - - -# Query the installation directory. This is needed in at least two scenarios. -# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt -# plugins to the Qt-Tree. -# -rule directory -{ - return $(.prefix) ; -} - -# -f forces moc to include the processed source file. Without it, it would think -# that .qpp is not a header and would not include it from the generated file. -# -actions moc -{ - $(.prefix)/bin/moc -f $(>) -o $(<) -} - -# When moccing .cpp files, we don't need -f, otherwise generated code will -# include .cpp and we'll get duplicated symbols. -# -actions moc.cpp -{ - $(.prefix)/bin/moc $(>) -o $(<) -} - - -space = " " ; - -# Sometimes it's required to make 'plugins' available during uic invocation. To -# help with this we add paths to all dependency libraries to uic commane line. -# The intention is that it's possible to write -# -# exe a : ... a.ui ... : some_plugin ; -# -# and have everything work. We'd add quite a bunch of unrelated paths but it -# won't hurt. -# -flags qt3.uic-h LIBRARY_PATH ; -actions uic-h -{ - $(.prefix)/bin/uic $(>) -o $(<) -L$(space)$(LIBRARY_PATH) -} - - -flags qt3.uic-cpp LIBRARY_PATH ; -# The second target is uic-generated header name. It's placed in build dir, but -# we want to include it using only basename. -actions uic-cpp -{ - $(.prefix)/bin/uic $(>[1]) -i $(>[2]:D=) -o $(<) -L$(space)$(LIBRARY_PATH) -} diff --git a/jam-files/boost-build/tools/qt4.jam b/jam-files/boost-build/tools/qt4.jam deleted file mode 100644 index 71d1b762..00000000 --- a/jam-files/boost-build/tools/qt4.jam +++ /dev/null @@ -1,724 +0,0 @@ -# Copyright 2002-2006 Vladimir Prus -# Copyright 2005 Alo Sarv -# Copyright 2005-2009 Juergen Hunold -# -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -# Qt4 library support module -# -# The module attempts to auto-detect QT installation location from QTDIR -# environment variable; failing that, installation location can be passed as -# argument: -# -# toolset.using qt4 : /usr/local/Trolltech/Qt-4.0.0 ; -# -# The module supports code generation from .ui and .qrc files, as well as -# running the moc preprocessor on headers. Note that you must list all your -# moc-able headers in sources. -# -# Example: -# -# exe myapp : myapp.cpp myapp.h myapp.ui myapp.qrc -# /qt4//QtGui /qt4//QtNetwork ; -# -# It's also possible to run moc on cpp sources: -# -# import cast ; -# -# exe myapp : myapp.cpp [ cast _ moccable-cpp : myapp.cpp ] /qt4//QtGui ; -# -# When moccing source file myapp.cpp you need to include "myapp.moc" from -# myapp.cpp. When moccing .h files, the output of moc will be automatically -# compiled and linked in, you don't need any includes. -# -# This is consistent with Qt guidelines: -# http://doc.trolltech.com/4.0/moc.html - -import modules ; -import feature ; -import errors ; -import type ; -import "class" : new ; -import generators ; -import project ; -import toolset : flags ; -import os ; -import virtual-target ; -import scanner ; - -# Qt3Support control feature -# -# Qt4 configure defaults to build Qt4 libraries with Qt3Support. -# The autodetection is missing, so we default to disable Qt3Support. -# This prevents the user from inadvertedly using a deprecated API. -# -# The Qt3Support library can be activated by adding -# "on" to requirements -# -# Use "on:QT3_SUPPORT_WARNINGS" -# to get warnings about deprecated Qt3 support funtions and classes. -# Files ported by the "qt3to4" conversion tool contain _tons_ of -# warnings, so this define is not set as default. -# -# Todo: Detect Qt3Support from Qt's configure data. -# Or add more auto-configuration (like python). -feature.feature qt3support : off on : propagated link-incompatible ; - -# The Qt version used for requirements -# Valid are 4.4 or 4.5.0 -# Auto-detection via qmake sets 'major.minor.patch' -feature.feature qt : : propagated ; - -project.initialize $(__name__) ; -project qt ; - -# Save the project so that we tolerate 'import + using' combo. -.project = [ project.current ] ; - -# Helper utils for easy debug output -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = TRUE ; -} - -local rule debug-message ( message * ) -{ - if $(.debug-configuration) = TRUE - { - ECHO notice: [qt4-cfg] $(message) ; - } -} - -# Capture qmake output line by line -local rule read-output ( content ) -{ - local lines ; - local nl = " -" ; - local << = "([^$(nl)]*)[$(nl)](.*)" ; - local line+ = [ MATCH "$(<<)" : "$(content)" ] ; - while $(line+) - { - lines += $(line+[1]) ; - line+ = [ MATCH "$(<<)" : "$(line+[2])" ] ; - } - return $(lines) ; -} - -# Capture Qt version from qmake -local rule check-version ( bin_prefix ) -{ - full-cmd = $(bin_prefix)"/qmake -v" ; - debug-message Running '$(full-cmd)' ; - local output = [ SHELL $(full-cmd) ] ; - for line in [ read-output $(output) ] - { - # Parse the output to get all the results. - if [ MATCH "QMake" : $(line) ] - { - # Skip first line of output - } - else - { - temp = [ MATCH "([0-9]*)\\.([0-9]*)\\.([0-9]*)" : $(line) ] ; - } - } - return $(temp) ; -} - -# Validate the version string and extract the major/minor part we care about. -# -local rule split-version ( version ) -{ - local major-minor = [ MATCH ^([0-9]+)\.([0-9]+)(.*)$ : $(version) : 1 2 3 ] ; - if ! $(major-minor[2]) || $(major-minor[3]) - { - ECHO "Warning: 'using qt' expects a two part (major, minor) version number; got" $(version) instead ; - - # Add a zero to account for the missing digit if necessary. - major-minor += 0 ; - } - - return $(major-minor[1]) $(major-minor[2]) ; -} - -# Initialize the QT support module. -# Parameters: -# - 'prefix' parameter tells where Qt is installed. -# - 'full_bin' optional full path to Qt binaries (qmake,moc,uic,rcc) -# - 'full_inc' optional full path to Qt top-level include directory -# - 'full_lib' optional full path to Qt library directory -# - 'version' optional version of Qt, else autodetected via 'qmake -v' -# - 'condition' optional requirements -rule init ( prefix : full_bin ? : full_inc ? : full_lib ? : version ? : condition * ) -{ - project.push-current $(.project) ; - - debug-message "==== Configuring Qt ... ====" ; - for local v in version cmd-or-prefix includes libraries condition - { - if $($(v)) - { - debug-message " user-specified "$(v): '$($(v))' ; - } - } - - # Needed as default value - .prefix = $(prefix) ; - - # pre-build paths to detect reinitializations changes - local inc_prefix lib_prefix bin_prefix ; - if $(full_inc) - { - inc_prefix = $(full_inc) ; - } - else - { - inc_prefix = $(prefix)/include ; - } - if $(full_lib) - { - lib_prefix = $(full_lib) ; - } - else - { - lib_prefix = $(prefix)/lib ; - } - if $(full_bin) - { - bin_prefix = $(full_bin) ; - } - else - { - bin_prefix = $(prefix)/bin ; - } - - # Globally needed variables - .incprefix = $(inc_prefix) ; - .libprefix = $(lib_prefix) ; - .binprefix = $(bin_prefix) ; - - if ! $(.initialized) - { - # Make sure this is initialised only once - .initialized = true ; - - # Generates cpp files from header files using "moc" tool - generators.register-standard qt4.moc : H : CPP(moc_%) : qt4 ; - - # The OBJ result type is a fake, 'H' will be really produced. See - # comments on the generator class, defined below the 'init' function. - generators.register [ new uic-generator qt4.uic : UI : OBJ : - qt4 ] ; - - # The OBJ result type is a fake here too. - generators.register [ new moc-h-generator - qt4.moc.inc : MOCCABLE_CPP : OBJ : qt4 ] ; - - generators.register [ new moc-inc-generator - qt4.moc.inc : MOCCABLE_H : OBJ : qt4 ] ; - - # Generates .cpp files from .qrc files. - generators.register-standard qt4.rcc : QRC : CPP(qrc_%) ; - - # dependency scanner for wrapped files. - type.set-scanner QRC : qrc-scanner ; - - # Save value of first occuring prefix - .PREFIX = $(prefix) ; - } - - if $(version) - { - major-minor = [ split-version $(version) ] ; - version = $(major-minor:J=.) ; - } - else - { - version = [ check-version $(bin_prefix) ] ; - if $(version) - { - version = $(version:J=.) ; - } - debug-message Detected version '$(version)' ; - } - - local target-requirements = $(condition) ; - - # Add the version, if any, to the target requirements. - if $(version) - { - if ! $(version) in [ feature.values qt ] - { - feature.extend qt : $(version) ; - } - target-requirements += $(version:E=default) ; - } - - local target-os = [ feature.get-values target-os : $(condition) ] ; - if ! $(target-os) - { - target-os ?= [ feature.defaults target-os ] ; - target-os = $(target-os:G=) ; - target-requirements += $(target-os) ; - } - - # Build exact requirements for the tools - local tools-requirements = $(target-requirements:J=/) ; - - debug-message "Details of this Qt configuration:" ; - debug-message " prefix: " '$(prefix:E=)' ; - debug-message " binary path: " '$(bin_prefix:E=)' ; - debug-message " include path:" '$(inc_prefix:E=)' ; - debug-message " library path:" '$(lib_prefix:E=)' ; - debug-message " target requirements:" '$(target-requirements)' ; - debug-message " tool requirements: " '$(tools-requirements)' ; - - # setup the paths for the tools - toolset.flags qt4.moc .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - toolset.flags qt4.rcc .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - toolset.flags qt4.uic .BINPREFIX $(tools-requirements) : $(bin_prefix) ; - - # TODO: 2009-02-12: Better support for directories - # Most likely needed are separate getters for: include,libraries,binaries and sources. - toolset.flags qt4.directory .PREFIX $(tools-requirements) : $(prefix) ; - - # Test for a buildable Qt. - if [ glob $(.prefix)/Jamroot ] - { - .bjam-qt = true - - # this will declare QtCore (and qtmain on windows) - add-shared-library QtCore ; - } - else - # Setup common pre-built Qt. - # Special setup for QtCore on which everything depends - { - local usage-requirements = - $(.incprefix) - $(.libprefix) - $(.libprefix) - multi - qt4 ; - - local suffix ; - - # Since Qt-4.2, debug versions on unix have to be built - # separately and therefore have no suffix. - .suffix_version = "" ; - .suffix_debug = "" ; - - # Control flag for auto-configuration of the debug libraries. - # This setup requires Qt 'configure -debug-and-release'. - # Only available on some platforms. - # ToDo: 2009-02-12: Maybe throw this away and - # require separate setup with debug as condition. - .have_separate_debug = FALSE ; - - # Setup other platforms - if $(target-os) in windows cygwin - { - .have_separate_debug = TRUE ; - - # On NT, the libs have "4" suffix, and "d" suffix in debug builds. - .suffix_version = "4" ; - .suffix_debug = "d" ; - - # On Windows we must link against the qtmain library - lib qtmain - : # sources - : # requirements - qtmain$(.suffix_debug) - debug - $(target-requirements) - ; - - lib qtmain - : # sources - : # requirements - qtmain - $(target-requirements) - ; - } - else if $(target-os) = darwin - { - # On MacOS X, both debug and release libraries are available. - .suffix_debug = "_debug" ; - - .have_separate_debug = TRUE ; - - alias qtmain ; - } - else - { - alias qtmain : : $(target-requirements) ; - } - - lib QtCore : qtmain - : # requirements - QtCore$(.suffix_version) - $(target-requirements) - : # default-build - : # usage-requirements - QT_CORE_LIB - QT_NO_DEBUG - $(.incprefix)/QtCore - $(usage-requirements) - ; - - if $(.have_separate_debug) = TRUE - { - debug-message Configure debug libraries with suffix '$(.suffix_debug)' ; - - lib QtCore : $(main) - : # requirements - QtCore$(.suffix_debug)$(.suffix_version) - debug - $(target-requirements) - : # default-build - : # usage-requirements - QT_CORE_LIB - $(.incprefix)/QtCore - $(usage-requirements) - ; - } - } - - # Initialising the remaining libraries is canonical - # parameters 'module' : 'depends-on' : 'usage-define' : 'requirements' : 'include' - # 'include' only for non-canonical include paths. - add-shared-library QtGui : QtCore : QT_GUI_LIB : $(target-requirements) ; - add-shared-library QtNetwork : QtCore : QT_NETWORK_LIB : $(target-requirements) ; - add-shared-library QtSql : QtCore : QT_SQL_LIB : $(target-requirements) ; - add-shared-library QtXml : QtCore : QT_XML_LIB : $(target-requirements) ; - - add-shared-library Qt3Support : QtGui QtNetwork QtXml QtSql - : QT_QT3SUPPORT_LIB QT3_SUPPORT - : on $(target-requirements) ; - - # Dummy target to enable "off" and - # "/qt//Qt3Support" at the same time. This enables quick - # switching from one to the other for test/porting purposes. - alias Qt3Support : : off $(target-requirements) ; - - # OpenGl Support - add-shared-library QtOpenGL : QtGui : QT_OPENGL_LIB : $(target-requirements) ; - - # SVG-Support (Qt 4.1) - add-shared-library QtSvg : QtXml QtOpenGL : QT_SVG_LIB : $(target-requirements) ; - - # Test-Support (Qt 4.1) - add-shared-library QtTest : QtCore : : $(target-requirements) ; - - # Qt designer library - add-shared-library QtDesigner : QtGui QtXml : : $(target-requirements) ; - add-shared-library QtDesignerComponents : QtGui QtXml : : $(target-requirements) ; - - # Support for dynamic Widgets (Qt 4.1) - add-static-library QtUiTools : QtGui QtXml : $(target-requirements) ; - - # DBus-Support (Qt 4.2) - add-shared-library QtDBus : QtXml : : $(target-requirements) ; - - # Script-Engine (Qt 4.3) - add-shared-library QtScript : QtGui QtXml : QT_SCRIPT_LIB : $(target-requirements) ; - - # Tools for the Script-Engine (Qt 4.5) - add-shared-library QtScriptTools : QtScript : QT_SCRIPTTOOLS_LIB : $(target-requirements) ; - - # WebKit (Qt 4.4) - add-shared-library QtWebKit : QtGui : QT_WEBKIT_LIB : $(target-requirements) ; - - # Phonon Multimedia (Qt 4.4) - add-shared-library phonon : QtGui QtXml : QT_PHONON_LIB : $(target-requirements) ; - - # Multimedia engine (Qt 4.6) - add-shared-library QtMultimedia : QtGui : QT_MULTIMEDIA_LIB : $(target-requirements) ; - - # XmlPatterns-Engine (Qt 4.4) - add-shared-library QtXmlPatterns : QtNetwork : QT_XMLPATTERNS_LIB : $(target-requirements) ; - - # Help-Engine (Qt 4.4) - add-shared-library QtHelp : QtGui QtSql QtXml : : $(target-requirements) ; - add-shared-library QtCLucene : QCore QtSql QtXml : : $(target-requirements) ; - - # QML-Engine (Qt 4.7) - add-shared-library QtDeclarative : QtGui QtXml : : $(target-requirements) ; - - # AssistantClient Support - # Compat library removed in 4.7.0 - # Pre-4.4 help system, use QtHelp for new programs - if $(version) < "4.7" - { - add-shared-library QtAssistantClient : QtGui : : $(target-requirements) : QtAssistant ; - } - debug-message "==== Configured Qt-$(version) ====" ; - - project.pop-current ; -} - -rule initialized ( ) -{ - return $(.initialized) ; -} - - - -# This custom generator is needed because in QT4, UI files are translated only -# into H files, and no C++ files are created. Further, the H files need not be -# passed via MOC. The header is used only via inclusion. If we define a standard -# UI -> H generator, Boost.Build will run MOC on H, and then compile the -# resulting cpp. It will give a warning, since output from moc will be empty. -# -# This generator is declared with a UI -> OBJ signature, so it gets invoked when -# linking generator tries to convert sources to OBJ, but it produces target of -# type H. This is non-standard, but allowed. That header won't be mocced. -# -class uic-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(name) - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - } - - local a = [ new action $(sources[1]) : qt4.uic : $(property-set) ] ; - - # The 'ui_' prefix is to match qmake's default behavior. - local target = [ new file-target ui_$(name) : H : $(project) : $(a) ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, we - # need the target to be seen by bjam, so that dependency from sources to - # this generated header is detected -- if jam does not know about this - # target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } -} - - -class moc-h-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_CPP - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt4.moc.inc : - $(property-set) ] ; - - local target = [ new file-target $(name) : MOC : $(project) : $(a) - ] ; - - local r = [ virtual-target.register $(target) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that dependency from - # sources to this generated header is detected -- if jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(r).actualize ] ; - - return $(r) ; - } - } -} - - -class moc-inc-generator : generator -{ - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - if ! $(sources[2]) && [ $(sources[1]).type ] = MOCCABLE_H - { - name = [ $(sources[0]).name ] ; - name = $(name:B) ; - - local a = [ new action $(sources[1]) : qt4.moc.inc : - $(property-set) ] ; - - local target = [ new file-target moc_$(name) : CPP : $(project) : - $(a) ] ; - - # Since this generator will return a H target, the linking generator - # won't use it at all, and won't set any dependency on it. However, - # we need the target to be seen by bjam, so that dependency from - # sources to this generated header is detected -- if jam does not - # know about this target, it won't do anything. - DEPENDS all : [ $(target).actualize ] ; - - return [ virtual-target.register $(target) ] ; - } - } -} - - -# Query the installation directory. This is needed in at least two scenarios. -# First, when re-using sources from the Qt-Tree. Second, to "install" custom Qt -# plugins to the Qt-Tree. -# -rule directory -{ - return $(.PREFIX) ; -} - -# Add a shared Qt library. -rule add-shared-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? ) -{ - add-library $(lib-name) : $(.suffix_version) : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ; -} - -# Add a static Qt library. -rule add-static-library ( lib-name : depends-on * : usage-defines * : requirements * : include ? ) -{ - add-library $(lib-name) : : $(depends-on) : $(usage-defines) : $(requirements) : $(include) ; -} - -# Add a Qt library. -# Static libs are unversioned, whereas shared libs have the major number as suffix. -# Creates both release and debug versions on platforms where both are enabled by Qt configure. -# Flags: -# - lib-name Qt library Name -# - version Qt major number used as shared library suffix (QtCore4.so) -# - depends-on other Qt libraries -# - usage-defines those are set by qmake, so set them when using this library -# - requirements addional requirements -# - include non-canonical include path. The canonical path is $(.incprefix)/$(lib-name). -rule add-library ( lib-name : version ? : depends-on * : usage-defines * : requirements * : include ? ) -{ - if $(.bjam-qt) - { - # Import Qt module - # Eveything will be setup there - alias $(lib-name) - : $(.prefix)//$(lib-name) - : - : - : qt4 ; - } - else - { - local real_include ; - real_include ?= $(include) ; - real_include ?= $(lib-name) ; - - lib $(lib-name) - : # sources - $(depends-on) - : # requirements - $(lib-name)$(version) - $(requirements) - : # default-build - : # usage-requirements - $(usage-defines) - $(.incprefix)/$(real_include) - ; - - if $(.have_separate_debug) = TRUE - { - lib $(lib-name) - : # sources - $(depends-on) - : # requirements - $(lib-name)$(.suffix_debug)$(version) - $(requirements) - debug - : # default-build - : # usage-requirements - $(usage-defines) - $(.incprefix)/$(real_include) - ; - } - } - - # Make library explicit so that a simple qt4 will not bring in everything. - # And some components like QtDBus/Phonon may not be available on all platforms. - explicit $(lib-name) ; -} - -# Use $(.BINPREFIX[-1]) for the paths as several tools-requirements can match. -# The exact match is the last one. - -# Get and from current toolset. -flags qt4.moc INCLUDES ; -flags qt4.moc DEFINES ; - -# need a newline for expansion of DEFINES and INCLUDES in the response file. -.nl = " -" ; - -# Processes headers to create Qt MetaObject information. Qt4-moc has its -# c++-parser, so pass INCLUDES and DEFINES. -# We use response file with one INCLUDE/DEFINE per line -# -actions moc -{ - $(.BINPREFIX[-1])/moc -f $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))" -} - -# When moccing files for include only, we don't need -f, otherwise the generated -# code will include the .cpp and we'll get duplicated symbols. -# -actions moc.inc -{ - $(.BINPREFIX[-1])/moc $(>) -o $(<) @"@($(<).rsp:E=-D$(DEFINES)$(.nl) -I$(INCLUDES:T)$(.nl))" -} - - -# Generates source files from resource files. -# -actions rcc -{ - $(.BINPREFIX[-1])/rcc $(>) -name $(>:B) -o $(<) -} - - -# Generates user-interface source from .ui files. -# -actions uic -{ - $(.BINPREFIX[-1])/uic $(>) -o $(<) -} - - -# Scanner for .qrc files. Look for the CDATA section of the tag. Ignore -# the "alias" attribute. See http://doc.trolltech.com/qt/resources.html for -# detailed documentation of the Qt Resource System. -# -class qrc-scanner : common-scanner -{ - rule pattern ( ) - { - return "(.*)" ; - } -} - - -# Wrapped files are "included". -scanner.register qrc-scanner : include ; diff --git a/jam-files/boost-build/tools/quickbook-config.jam b/jam-files/boost-build/tools/quickbook-config.jam deleted file mode 100644 index e983a78a..00000000 --- a/jam-files/boost-build/tools/quickbook-config.jam +++ /dev/null @@ -1,44 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for BoostBook tools. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - local boost-dir = ; - for local R in snapshot cvs 1.33.0 - { - boost-dir += [ W32_GETREG - "HKEY_LOCAL_MACHINE\\SOFTWARE\\Boost.org\\$(R)" - : "InstallRoot" ] ; - } - local quickbook-path = [ GLOB "$(boost-dir)\\bin" "\\Boost\\bin" : quickbook.exe ] ; - quickbook-path = $(quickbook-path[1]) ; - - if $(quickbook-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using quickbook ":" $(quickbook-path) ; - } - using quickbook : $(quickbook-path) ; - } -} -else -{ - local quickbook-path = [ GLOB "/usr/local/bin" "/usr/bin" "/opt/bin" : quickbook ] ; - quickbook-path = $(quickbook-path[1]) ; - - if $(quickbook-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using quickbook ":" $(quickbook-path) ; - } - using quickbook : $(quickbook-path) ; - } -} diff --git a/jam-files/boost-build/tools/quickbook.jam b/jam-files/boost-build/tools/quickbook.jam deleted file mode 100644 index 6de2d42f..00000000 --- a/jam-files/boost-build/tools/quickbook.jam +++ /dev/null @@ -1,361 +0,0 @@ -# -# Copyright (c) 2005 Joo Abecasis -# Copyright (c) 2005 Vladimir Prus -# Copyright (c) 2006 Rene Rivera -# -# Distributed under the Boost Software License, Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) -# - -# This toolset defines a generator to translate QuickBook to BoostBook. It can -# be used to generate nice (!) user documentation in different formats -# (pdf/html/...), from a single text file with simple markup. -# -# The toolset defines the QUICKBOOK type (file extension 'qbk') and -# a QUICKBOOK to XML (BOOSTBOOK) generator. -# -# -# =========================================================================== -# Q & A -# =========================================================================== -# -# If you don't know what this is all about, some Q & A will hopefully get you -# up to speed with QuickBook and this toolset. -# -# -# What is QuickBook ? -# -# QuickBook is a WikiWiki style documentation tool geared towards C++ -# documentation using simple rules and markup for simple formatting tasks. -# QuickBook extends the WikiWiki concept. Like the WikiWiki, QuickBook -# documents are simple text files. A single QuickBook document can -# generate a fully linked set of nice HTML and PostScript/PDF documents -# complete with images and syntax-colorized source code. -# -# -# Where can I get QuickBook ? -# -# Quickbook can be found in Boost's repository, under the tools/quickbook -# directory it was added there on Jan 2005, some time after the release of -# Boost v1.32.0 and has been an integral part of the Boost distribution -# since v1.33. -# -# Here's a link to the SVN repository: -# https://svn.boost.org/svn/boost/trunk/tools/quickbook -# -# And to QuickBook's QuickBook-generated docs: -# http://www.boost.org/doc/libs/release/tools/quickbook/index.html -# -# -# How do I use QuickBook and this toolset in my projects ? -# -# The minimal example is: -# -# using boostbook ; -# import quickbook ; -# -# boostbook my_docs : my_docs_source.qbk ; -# -# where my_docs is a target name and my_docs_source.qbk is a QuickBook -# file. The documentation format to be generated is determined by the -# boostbook toolset. By default html documentation should be generated, -# but you should check BoostBook's docs to be sure. -# -# -# What do I need ? -# -# You should start by setting up the BoostBook toolset. Please refer to -# boostbook.jam and the BoostBook documentation for information on how to -# do this. -# -# A QuickBook executable is also needed. The toolset will generate this -# executable if it can find the QuickBook sources. The following -# directories will be searched: -# -# BOOST_ROOT/tools/quickbook/ -# BOOST_BUILD_PATH/../../quickbook/ -# -# (BOOST_ROOT and BOOST_BUILD_PATH are environment variables) -# -# If QuickBook sources are not found the toolset will then try to use -# the shell command 'quickbook'. -# -# -# How do I provide a custom QuickBook executable ? -# -# You may put the following in your user-config.jam or site-config.jam: -# -# using quickbook : /path/to/quickbook ; -# -# or, if 'quickbook' can be found in your PATH, -# -# using quickbook : quickbook ; -# -# -# For convenience three alternatives are tried to get a QuickBook executable: -# -# 1. If the user points us to the a QuickBook executable, that is used. -# -# 2. Otherwise, we search for the QuickBook sources and compile QuickBook -# using the default toolset. -# -# 3. As a last resort, we rely on the shell for finding 'quickbook'. -# - -import boostbook ; -import "class" : new ; -import feature ; -import generators ; -import toolset ; -import type ; -import scanner ; -import project ; -import targets ; -import build-system ; -import path ; -import common ; -import errors ; - -# The one and only QUICKBOOK type! -type.register QUICKBOOK : qbk ; - -# shell command to run QuickBook -# targets to build QuickBook from sources. -feature.feature : : free ; -feature.feature : : free dependency ; -feature.feature : : free ; -feature.feature : : free ; -feature.feature : : free ; - - -# quickbook-binary-generator handles generation of the QuickBook executable, by -# marking it as a dependency for QuickBook docs. -# -# If the user supplied the QuickBook command that will be used. -# -# Otherwise we search some sensible places for the QuickBook sources and compile -# from scratch using the default toolset. -# -# As a last resort we rely on the shell to find 'quickbook'. -# -class quickbook-binary-generator : generator -{ - import modules path targets quickbook ; - - rule run ( project name ? : property-set : sources * : multiple ? ) - { - quickbook.freeze-config ; - # QuickBook invocation command and dependencies. - local quickbook-binary = [ modules.peek quickbook : .quickbook-binary ] ; - local quickbook-binary-dependencies ; - - if ! $(quickbook-binary) - { - # If the QuickBook source directory was found, mark its main target - # as a dependency for the current project. Otherwise, try to find - # 'quickbook' in user's PATH - local quickbook-dir = [ modules.peek quickbook : .quickbook-dir ] ; - if $(quickbook-dir) - { - # Get the main-target in QuickBook directory. - local quickbook-main-target = [ targets.resolve-reference $(quickbook-dir) : $(project) ] ; - - # The first element are actual targets, the second are - # properties found in target-id. We do not care about these - # since we have passed the id ourselves. - quickbook-main-target = - [ $(quickbook-main-target[1]).main-target quickbook ] ; - - quickbook-binary-dependencies = - [ $(quickbook-main-target).generate [ $(property-set).propagated ] ] ; - - # Ignore usage-requirements returned as first element. - quickbook-binary-dependencies = $(quickbook-binary-dependencies[2-]) ; - - # Some toolsets generate extra targets (e.g. RSP). We must mark - # all targets as dependencies for the project, but we will only - # use the EXE target for quickbook-to-boostbook translation. - for local target in $(quickbook-binary-dependencies) - { - if [ $(target).type ] = EXE - { - quickbook-binary = - [ path.native - [ path.join - [ $(target).path ] - [ $(target).name ] - ] - ] ; - } - } - } - } - - # Add $(quickbook-binary-dependencies) as a dependency of the current - # project and set it as the feature for the - # quickbook-to-boostbook rule, below. - property-set = [ $(property-set).add-raw - $(quickbook-binary-dependencies) - $(quickbook-binary) - $(quickbook-binary-dependencies) - ] ; - - return [ generator.run $(project) $(name) : $(property-set) : $(sources) : $(multiple) ] ; - } -} - - -# Define a scanner for tracking QBK include dependencies. -# -class qbk-scanner : common-scanner -{ - rule pattern ( ) - { - return "\\[[ ]*include[ ]+([^]]+)\\]" - "\\[[ ]*include:[a-zA-Z0-9_]+[ ]+([^]]+)\\]" - "\\[[ ]*import[ ]+([^]]+)\\]" ; - } -} - - -scanner.register qbk-scanner : include ; - -type.set-scanner QUICKBOOK : qbk-scanner ; - - -# Initialization of toolset. -# -# Parameters: -# command ? -> path to QuickBook executable. -# -# When command is not supplied toolset will search for QuickBook directory and -# compile the executable from source. If that fails we still search the path for -# 'quickbook'. -# -rule init ( - command ? # path to the QuickBook executable. - ) -{ - if $(command) - { - if $(.config-frozen) - { - errors.user-error "quickbook: configuration cannot be changed after it has been used." ; - } - .command = $(command) ; - } -} - -rule freeze-config ( ) -{ - if ! $(.config-frozen) - { - .config-frozen = true ; - - # QuickBook invocation command and dependencies. - - .quickbook-binary = $(.command) ; - - if $(.quickbook-binary) - { - # Use user-supplied command. - .quickbook-binary = [ common.get-invocation-command quickbook : quickbook : $(.quickbook-binary) ] ; - } - else - { - # Search for QuickBook sources in sensible places, like - # $(BOOST_ROOT)/tools/quickbook - # $(BOOST_BUILD_PATH)/../../quickbook - - # And build quickbook executable from sources. - - local boost-root = [ modules.peek : BOOST_ROOT ] ; - local boost-build-path = [ build-system.location ] ; - - if $(boost-root) - { - .quickbook-dir += [ path.join $(boost-root) tools ] ; - } - - if $(boost-build-path) - { - .quickbook-dir += $(boost-build-path)/../.. ; - } - - .quickbook-dir = [ path.glob $(.quickbook-dir) : quickbook ] ; - - # If the QuickBook source directory was found, mark its main target - # as a dependency for the current project. Otherwise, try to find - # 'quickbook' in user's PATH - if $(.quickbook-dir) - { - .quickbook-dir = [ path.make $(.quickbook-dir[1]) ] ; - } - else - { - ECHO "QuickBook warning: The path to the quickbook executable was" ; - ECHO " not provided. Additionally, couldn't find QuickBook" ; - ECHO " sources searching in" ; - ECHO " * BOOST_ROOT/tools/quickbook" ; - ECHO " * BOOST_BUILD_PATH/../../quickbook" ; - ECHO " Will now try to find a precompiled executable by searching" ; - ECHO " the PATH for 'quickbook'." ; - ECHO " To disable this warning in the future, or to completely" ; - ECHO " avoid compilation of quickbook, you can explicitly set the" ; - ECHO " path to a quickbook executable command in user-config.jam" ; - ECHO " or site-config.jam with the call" ; - ECHO " using quickbook : /path/to/quickbook ;" ; - - # As a last resort, search for 'quickbook' command in path. Note - # that even if the 'quickbook' command is not found, - # get-invocation-command will still return 'quickbook' and might - # generate an error while generating the virtual-target. - - .quickbook-binary = [ common.get-invocation-command quickbook : quickbook ] ; - } - } - } -} - - -generators.register [ new quickbook-binary-generator quickbook.quickbook-to-boostbook : QUICKBOOK : XML ] ; - - -# shell command to run QuickBook -# targets to build QuickBook from sources. -toolset.flags quickbook.quickbook-to-boostbook QB-COMMAND ; -toolset.flags quickbook.quickbook-to-boostbook QB-DEPENDENCIES ; -toolset.flags quickbook.quickbook-to-boostbook INCLUDES ; -toolset.flags quickbook.quickbook-to-boostbook QB-DEFINES ; -toolset.flags quickbook.quickbook-to-boostbook QB-INDENT ; -toolset.flags quickbook.quickbook-to-boostbook QB-LINE-WIDTH ; - - -rule quickbook-to-boostbook ( target : source : properties * ) -{ - # Signal dependency of quickbook sources on - # upon invocation of quickbook-to-boostbook. - DEPENDS $(target) : [ on $(target) return $(QB-DEPENDENCIES) ] ; -} - - -actions quickbook-to-boostbook -{ - "$(QB-COMMAND)" -I"$(INCLUDES)" -D"$(QB-DEFINES)" --indent="$(QB-INDENT)" --linewidth="$(QB-LINE-WIDTH)" --output-file="$(1)" "$(2)" -} - - -# Declare a main target to convert a quickbook source into a boostbook XML file. -# -rule to-boostbook ( target-name : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new typed-target $(target-name) : $(project) : XML - : [ targets.main-target-sources $(sources) : $(target-name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} diff --git a/jam-files/boost-build/tools/rc.jam b/jam-files/boost-build/tools/rc.jam deleted file mode 100644 index 9964d339..00000000 --- a/jam-files/boost-build/tools/rc.jam +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (C) Andre Hentz 2003. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. -# -# Copyright (c) 2006 Rene Rivera. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -import type ; -import generators ; -import feature ; -import errors ; -import scanner ; -import toolset : flags ; - -if [ MATCH (--debug-configuration) : [ modules.peek : ARGV ] ] -{ - .debug-configuration = true ; -} - -type.register RC : rc ; - -rule init ( ) -{ -} - -# Configures a new resource compilation command specific to a condition, -# usually a toolset selection condition. The possible options are: -# -# * (rc|windres) - Indicates the type of options the command -# accepts. -# -# Even though the arguments are all optional, only when a command, condition, -# and at minimum the rc-type option are given will the command be configured. -# This is so that callers don't have to check auto-configuration values -# before calling this. And still get the functionality of build failures when -# the resource compiler can't be found. -# -rule configure ( command ? : condition ? : options * ) -{ - local rc-type = [ feature.get-values : $(options) ] ; - - if $(command) && $(condition) && $(rc-type) - { - flags rc.compile.resource .RC $(condition) : $(command) ; - flags rc.compile.resource .RC_TYPE $(condition) : $(rc-type:L) ; - flags rc.compile.resource DEFINES ; - flags rc.compile.resource INCLUDES ; - if $(.debug-configuration) - { - ECHO notice: using rc compiler :: $(condition) :: $(command) ; - } - } -} - -rule compile.resource ( target : sources * : properties * ) -{ - local rc-type = [ on $(target) return $(.RC_TYPE) ] ; - rc-type ?= null ; - compile.resource.$(rc-type) $(target) : $(sources[1]) ; -} - -actions compile.resource.rc -{ - "$(.RC)" -l 0x409 "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -fo "$(<)" "$(>)" -} - -actions compile.resource.windres -{ - "$(.RC)" "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -o "$(<)" -i "$(>)" -} - -actions quietly compile.resource.null -{ - as /dev/null -o "$(<)" -} - -# Since it's a common practice to write -# exe hello : hello.cpp hello.rc -# we change the name of object created from RC file, to -# avoid conflict with hello.cpp. -# The reason we generate OBJ and not RES, is that gcc does not -# seem to like RES files, but works OK with OBJ. -# See http://article.gmane.org/gmane.comp.lib.boost.build/5643/ -# -# Using 'register-c-compiler' adds the build directory to INCLUDES -generators.register-c-compiler rc.compile.resource : RC : OBJ(%_res) ; - -# Register scanner for resources -class res-scanner : scanner -{ - import regex virtual-target path scanner ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - self.includes = $(includes) ; - } - - rule pattern ( ) - { - return "(([^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+([^ \"]+|\"[^\"]+\"))|(#include[ ]*(<[^<]+>|\"[^\"]+\")))" ; - } - - rule process ( target : matches * : binding ) - { - local angle = [ regex.transform $(matches) : "#include[ ]*<([^<]+)>" ] ; - local quoted = [ regex.transform $(matches) : "#include[ ]*\"([^\"]+)\"" ] ; - local res = [ regex.transform $(matches) : "[^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)[ ]+(([^ \"]+)|\"([^\"]+)\")" : 3 4 ] ; - - # Icons and other includes may referenced as - # - # IDR_MAINFRAME ICON "res\\icon.ico" - # - # so we have to replace double backslashes to single ones. - res = [ regex.replace-list $(res) : "\\\\\\\\" : "/" ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - # We don't need this extra information for angle includes, - # since they should not depend on including file (we can't - # get literal "." in include path). - local g2 = $(g)"#"$(b) ; - - angle = $(angle:G=$(g)) ; - quoted = $(quoted:G=$(g2)) ; - res = $(res:G=$(g2)) ; - - local all = $(angle) $(quoted) ; - - INCLUDES $(target) : $(all) ; - DEPENDS $(target) : $(res) ; - NOCARE $(all) $(res) ; - SEARCH on $(angle) = $(self.includes:G=) ; - SEARCH on $(quoted) = $(b) $(self.includes:G=) ; - SEARCH on $(res) = $(b) $(self.includes:G=) ; - - # Just propagate current scanner to includes, in a hope - # that includes do not change scanners. - scanner.propagate $(__name__) : $(angle) $(quoted) : $(target) ; - } -} - -scanner.register res-scanner : include ; -type.set-scanner RC : res-scanner ; diff --git a/jam-files/boost-build/tools/rc.py b/jam-files/boost-build/tools/rc.py deleted file mode 100644 index 0b82d231..00000000 --- a/jam-files/boost-build/tools/rc.py +++ /dev/null @@ -1,189 +0,0 @@ -# Status: being ported by Steven Watanabe -# Base revision: 47077 -# -# Copyright (C) Andre Hentz 2003. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. -# -# Copyright (c) 2006 Rene Rivera. -# -# Copyright (c) 2008 Steven Watanabe -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -##import type ; -##import generators ; -##import feature ; -##import errors ; -##import scanner ; -##import toolset : flags ; - -from b2.build import type, toolset, generators, scanner, feature -from b2.tools import builtin -from b2.util import regex -from b2.build.toolset import flags -from b2.manager import get_manager - -__debug = None - -def debug(): - global __debug - if __debug is None: - __debug = "--debug-configuration" in bjam.variable("ARGV") - return __debug - -type.register('RC', ['rc']) - -def init(): - pass - -def configure (command = None, condition = None, options = None): - """ - Configures a new resource compilation command specific to a condition, - usually a toolset selection condition. The possible options are: - - * (rc|windres) - Indicates the type of options the command - accepts. - - Even though the arguments are all optional, only when a command, condition, - and at minimum the rc-type option are given will the command be configured. - This is so that callers don't have to check auto-configuration values - before calling this. And still get the functionality of build failures when - the resource compiler can't be found. - """ - rc_type = feature.get_values('', options) - if rc_type: - assert(len(rc_type) == 1) - rc_type = rc_type[0] - - if command and condition and rc_type: - flags('rc.compile.resource', '.RC', condition, command) - flags('rc.compile.resource', '.RC_TYPE', condition, rc_type.lower()) - flags('rc.compile.resource', 'DEFINES', [], ['']) - flags('rc.compile.resource', 'INCLUDES', [], ['']) - if debug(): - print 'notice: using rc compiler ::', condition, '::', command - -engine = get_manager().engine() - -class RCAction: - """Class representing bjam action defined from Python. - The function must register the action to execute.""" - - def __init__(self, action_name, function): - self.action_name = action_name - self.function = function - - def __call__(self, targets, sources, property_set): - if self.function: - self.function(targets, sources, property_set) - -# FIXME: What is the proper way to dispatch actions? -def rc_register_action(action_name, function = None): - global engine - if engine.actions.has_key(action_name): - raise "Bjam action %s is already defined" % action_name - engine.actions[action_name] = RCAction(action_name, function) - -def rc_compile_resource(targets, sources, properties): - rc_type = bjam.call('get-target-variable', targets, '.RC_TYPE') - global engine - engine.set_update_action('rc.compile.resource.' + rc_type, targets, sources, properties) - -rc_register_action('rc.compile.resource', rc_compile_resource) - - -engine.register_action( - 'rc.compile.resource.rc', - '"$(.RC)" -l 0x409 "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -fo "$(<)" "$(>)"') - -engine.register_action( - 'rc.compile.resource.windres', - '"$(.RC)" "-U$(UNDEFS)" "-D$(DEFINES)" -I"$(>:D)" -I"$(<:D)" -I"$(INCLUDES)" -o "$(<)" -i "$(>)"') - -# FIXME: this was originally declared quietly -engine.register_action( - 'compile.resource.null', - 'as /dev/null -o "$(<)"') - -# Since it's a common practice to write -# exe hello : hello.cpp hello.rc -# we change the name of object created from RC file, to -# avoid conflict with hello.cpp. -# The reason we generate OBJ and not RES, is that gcc does not -# seem to like RES files, but works OK with OBJ. -# See http://article.gmane.org/gmane.comp.lib.boost.build/5643/ -# -# Using 'register-c-compiler' adds the build directory to INCLUDES -# FIXME: switch to generators -builtin.register_c_compiler('rc.compile.resource', ['RC'], ['OBJ(%_res)'], []) - -__angle_include_re = "#include[ ]*<([^<]+)>" - -# Register scanner for resources -class ResScanner(scanner.Scanner): - - def __init__(self, includes): - scanner.__init__ ; - self.includes = includes - - def pattern(self): - return "(([^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)" +\ - "[ ]+([^ \"]+|\"[^\"]+\"))|(#include[ ]*(<[^<]+>|\"[^\"]+\")))" ; - - def process(self, target, matches, binding): - - angle = regex.transform(matches, "#include[ ]*<([^<]+)>") - quoted = regex.transform(matches, "#include[ ]*\"([^\"]+)\"") - res = regex.transform(matches, - "[^ ]+[ ]+(BITMAP|CURSOR|FONT|ICON|MESSAGETABLE|RT_MANIFEST)" +\ - "[ ]+(([^ \"]+)|\"([^\"]+)\")", [3, 4]) - - # Icons and other includes may referenced as - # - # IDR_MAINFRAME ICON "res\\icon.ico" - # - # so we have to replace double backslashes to single ones. - res = [ re.sub(r'\\\\', '/', match) for match in res ] - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - g = bjam.call('get-target-variable', target, 'HDRGRIST') - b = os.path.normalize_path(os.path.dirname(binding)) - - # Attach binding of including file to included targets. - # When target is directly created from virtual target - # this extra information is unnecessary. But in other - # cases, it allows to distinguish between two headers of the - # same name included from different places. - # We don't need this extra information for angle includes, - # since they should not depend on including file (we can't - # get literal "." in include path). - g2 = g + "#" + b - - g = "<" + g + ">" - g2 = "<" + g2 + ">" - angle = [g + x for x in angle] - quoted = [g2 + x for x in quoted] - res = [g2 + x for x in res] - - all = angle + quoted - - bjam.call('mark-included', target, all) - - engine = get_manager().engine() - - engine.add_dependency(target, res) - bjam.call('NOCARE', all + res) - engine.set_target_variable(angle, 'SEARCH', ungrist(self.includes)) - engine.set_target_variable(quoted, 'SEARCH', b + ungrist(self.includes)) - engine.set_target_variable(res, 'SEARCH', b + ungrist(self.includes)) ; - - # Just propagate current scanner to includes, in a hope - # that includes do not change scanners. - get_manager().scanners().propagate(self, angle + quoted) - -scanner.register(ResScanner, 'include') -type.set_scanner('RC', ResScanner) diff --git a/jam-files/boost-build/tools/stage.jam b/jam-files/boost-build/tools/stage.jam deleted file mode 100644 index 296e7558..00000000 --- a/jam-files/boost-build/tools/stage.jam +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2005, 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'install' rule, used to copy a set of targets to a -# single location. - -import targets ; -import "class" : new ; -import errors ; -import type ; -import generators ; -import feature ; -import project ; -import virtual-target ; -import path ; -import types/register ; - - -feature.feature : off on : incidental ; -feature.feature : : free incidental ; -feature.feature : : free path ; -feature.feature : : free incidental ; - -# If 'on', version symlinks for shared libraries will not be created. Affects -# Unix builds only. -feature.feature : on : optional incidental ; - - -class install-target-class : basic-target -{ - import feature ; - import project ; - import type ; - import errors ; - import generators ; - import path ; - import stage ; - import "class" : new ; - import property ; - import property-set ; - - rule __init__ ( name-and-dir : project : sources * : requirements * : default-build * ) - { - basic-target.__init__ $(name-and-dir) : $(project) : $(sources) : - $(requirements) : $(default-build) ; - } - - # If is not set, sets it based on the project data. - # - rule update-location ( property-set ) - { - local loc = [ $(property-set).get ] ; - if ! $(loc) - { - loc = [ path.root $(self.name) [ $(self.project).get location ] ] ; - property-set = [ $(property-set).add-raw $(loc:G=) ] ; - } - - return $(property-set) ; - } - - # Takes a target that is installed and a property set which is used when - # installing. - # - rule adjust-properties ( target : build-property-set ) - { - local ps-raw ; - local a = [ $(target).action ] ; - if $(a) - { - local ps = [ $(a).properties ] ; - ps-raw = [ $(ps).raw ] ; - - # Unless true is in properties, which can happen - # only if the user has explicitly requested it, nuke all - # properties. - if [ $(build-property-set).get ] != true - { - ps-raw = [ property.change $(ps-raw) : ] ; - } - - # If any properties were specified for installing, add - # them. - local l = [ $(build-property-set).get ] ; - ps-raw += $(l:G=) ; - - # Also copy feature from current build set, to be used - # for relinking. - local l = [ $(build-property-set).get ] ; - ps-raw += $(l:G=) ; - - # Remove the feature on original targets. - ps-raw = [ property.change $(ps-raw) : ] ; - - # And . If stage target has another stage target in - # sources, then we shall get virtual targets with the - # property set. - ps-raw = [ property.change $(ps-raw) : ] ; - } - - local d = [ $(build-property-set).get ] ; - ps-raw += $(d:G=) ; - - local d = [ $(build-property-set).get ] ; - ps-raw += $(d:G=) ; - - local ns = [ $(build-property-set).get ] ; - ps-raw += $(ns:G=) ; - - local d = [ $(build-property-set).get ] ; - # Make the path absolute: we shall use it to compute relative paths and - # making the path absolute will help. - if $(d) - { - d = [ path.root $(d) [ path.pwd ] ] ; - ps-raw += $(d:G=) ; - } - - if $(ps-raw) - { - return [ property-set.create $(ps-raw) ] ; - } - else - { - return [ property-set.empty ] ; - } - } - - rule construct ( name : source-targets * : property-set ) - { - source-targets = [ targets-to-stage $(source-targets) : - $(property-set) ] ; - - property-set = [ update-location $(property-set) ] ; - - local ename = [ $(property-set).get ] ; - - if $(ename) && $(source-targets[2]) - { - errors.error "When property is used in 'install', only one" - "source is allowed" ; - } - - local result ; - for local i in $(source-targets) - { - local staged-targets ; - - local new-properties = [ adjust-properties $(i) : - $(property-set) ] ; - - # See if something special should be done when staging this type. It - # is indicated by the presence of a special "INSTALLED_" type. - local t = [ $(i).type ] ; - if $(t) && [ type.registered INSTALLED_$(t) ] - { - if $(ename) - { - errors.error "In 'install': property specified with target that requires relinking." ; - } - else - { - local targets = [ generators.construct $(self.project) - $(name) : INSTALLED_$(t) : $(new-properties) : $(i) ] ; - staged-targets += $(targets[2-]) ; - } - } - else - { - staged-targets = [ stage.copy-file $(self.project) $(ename) : - $(i) : $(new-properties) ] ; - } - - if ! $(staged-targets) - { - errors.error "Unable to generate staged version of " [ $(source).str ] ; - } - - for t in $(staged-targets) - { - result += [ virtual-target.register $(t) ] ; - } - } - - return [ property-set.empty ] $(result) ; - } - - # Given the list of source targets explicitly passed to 'stage', returns the - # list of targets which must be staged. - # - rule targets-to-stage ( source-targets * : property-set ) - { - local result ; - - # Traverse the dependencies, if needed. - if [ $(property-set).get ] = "on" - { - source-targets = [ collect-targets $(source-targets) ] ; - } - - # Filter the target types, if needed. - local included-types = [ $(property-set).get ] ; - for local r in $(source-targets) - { - local ty = [ $(r).type ] ; - if $(ty) - { - # Do not stage searched libs. - if $(ty) != SEARCHED_LIB - { - if $(included-types) - { - if [ include-type $(ty) : $(included-types) ] - { - result += $(r) ; - } - } - else - { - result += $(r) ; - } - } - } - else if ! $(included-types) - { - # Don't install typeless target if there is an explicit list of - # allowed types. - result += $(r) ; - } - } - - return $(result) ; - } - - # CONSIDER: figure out why we can not use virtual-target.traverse here. - # - rule collect-targets ( targets * ) - { - # Find subvariants - local s ; - for local t in $(targets) - { - s += [ $(t).creating-subvariant ] ; - } - s = [ sequence.unique $(s) ] ; - - local result = [ new set ] ; - $(result).add $(targets) ; - - for local i in $(s) - { - $(i).all-referenced-targets $(result) ; - } - local result2 ; - for local r in [ $(result).list ] - { - if $(r:G) != - { - result2 += $(r:G=) ; - } - } - DELETE_MODULE $(result) ; - result = [ sequence.unique $(result2) ] ; - } - - # Returns true iff 'type' is subtype of some element of 'types-to-include'. - # - local rule include-type ( type : types-to-include * ) - { - local found ; - while $(types-to-include) && ! $(found) - { - if [ type.is-subtype $(type) $(types-to-include[1]) ] - { - found = true ; - } - types-to-include = $(types-to-include[2-]) ; - } - - return $(found) ; - } -} - - -# Creates a copy of target 'source'. The 'properties' object should have a -# property which specifies where the target must be placed. -# -rule copy-file ( project name ? : source : properties ) -{ - name ?= [ $(source).name ] ; - local relative ; - - local new-a = [ new non-scanning-action $(source) : common.copy : - $(properties) ] ; - local source-root = [ $(properties).get ] ; - if $(source-root) - { - # Get the real path of the target. We probably need to strip relative - # path from the target name at construction. - local path = [ $(source).path ] ; - path = [ path.root $(name:D) $(path) ] ; - # Make the path absolute. Otherwise, it would be hard to compute the - # relative path. The 'source-root' is already absolute, see the - # 'adjust-properties' method above. - path = [ path.root $(path) [ path.pwd ] ] ; - - relative = [ path.relative-to $(source-root) $(path) ] ; - } - - # Note: Using $(name:D=$(relative)) might be faster here, but then we would - # need to explicitly check that relative is not ".", otherwise we might get - # paths like '/boost/.', try to create it and mkdir would obviously - # fail. - name = [ path.join $(relative) $(name:D=) ] ; - - return [ new file-target $(name) exact : [ $(source).type ] : $(project) : - $(new-a) ] ; -} - - -rule symlink ( name : project : source : properties ) -{ - local a = [ new action $(source) : symlink.ln : $(properties) ] ; - return [ new file-target $(name) exact : [ $(source).type ] : $(project) : - $(a) ] ; -} - - -rule relink-file ( project : source : property-set ) -{ - local action = [ $(source).action ] ; - local cloned-action = [ virtual-target.clone-action $(action) : $(project) : - "" : $(property-set) ] ; - return [ $(cloned-action).targets ] ; -} - - -# Declare installed version of the EXE type. Generator for this type will cause -# relinking to the new location. -type.register INSTALLED_EXE : : EXE ; - - -class installed-exe-generator : generator -{ - import type ; - import property-set ; - import modules ; - import stage ; - - rule __init__ ( ) - { - generator.__init__ install-exe : EXE : INSTALLED_EXE ; - } - - rule run ( project name ? : property-set : source : multiple ? ) - { - local need-relink ; - - if [ $(property-set).get ] in NT CYGWIN || - [ $(property-set).get ] in windows cygwin - { - } - else - { - # See if the dll-path properties are not changed during - # install. If so, copy, don't relink. - local a = [ $(source).action ] ; - local p = [ $(a).properties ] ; - local original = [ $(p).get ] ; - local current = [ $(property-set).get ] ; - - if $(current) != $(original) - { - need-relink = true ; - } - } - - - if $(need-relink) - { - return [ stage.relink-file $(project) - : $(source) : $(property-set) ] ; - } - else - { - return [ stage.copy-file $(project) - : $(source) : $(property-set) ] ; - } - } -} - - -generators.register [ new installed-exe-generator ] ; - - -# Installing a shared link on Unix might cause a creation of versioned symbolic -# links. -type.register INSTALLED_SHARED_LIB : : SHARED_LIB ; - - -class installed-shared-lib-generator : generator -{ - import type ; - import property-set ; - import modules ; - import stage ; - - rule __init__ ( ) - { - generator.__init__ install-shared-lib : SHARED_LIB - : INSTALLED_SHARED_LIB ; - } - - rule run ( project name ? : property-set : source : multiple ? ) - { - if [ $(property-set).get ] in NT CYGWIN || - [ $(property-set).get ] in windows cygwin - { - local copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - return [ virtual-target.register $(copied) ] ; - } - else - { - local a = [ $(source).action ] ; - local copied ; - if ! $(a) - { - # Non-derived file, just copy. - copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - } - else - { - local cp = [ $(a).properties ] ; - local current-dll-path = [ $(cp).get ] ; - local new-dll-path = [ $(property-set).get ] ; - - if $(current-dll-path) != $(new-dll-path) - { - # Rpath changed, need to relink. - copied = [ stage.relink-file $(project) : $(source) : - $(property-set) ] ; - } - else - { - copied = [ stage.copy-file $(project) : $(source) : - $(property-set) ] ; - } - } - - copied = [ virtual-target.register $(copied) ] ; - - local result = $(copied) ; - # If the name is in the form NNN.XXX.YYY.ZZZ, where all 'X', 'Y' and - # 'Z' are numbers, we need to create NNN.XXX and NNN.XXX.YYY - # symbolic links. - local m = [ MATCH (.*)\\.([0123456789]+)\\.([0123456789]+)\\.([0123456789]+)$ - : [ $(copied).name ] ] ; - if $(m) - { - # Symlink without version at all is used to make - # -lsome_library work. - result += [ stage.symlink $(m[1]) : $(project) : $(copied) : - $(property-set) ] ; - - # Symlinks of some libfoo.N and libfoo.N.M are used so that - # library can found at runtime, if libfoo.N.M.X has soname of - # libfoo.N. That happens when the library makes some binary - # compatibility guarantees. If not, it is possible to skip those - # symlinks. - local suppress = - [ $(property-set).get ] ; - - if $(suppress) != "on" - { - result += [ stage.symlink $(m[1]).$(m[2]) : $(project) - : $(copied) : $(property-set) ] ; - result += [ stage.symlink $(m[1]).$(m[2]).$(m[3]) : $(project) - : $(copied) : $(property-set) ] ; - } - } - - return $(result) ; - } - } -} - -generators.register [ new installed-shared-lib-generator ] ; - - -# Main target rule for 'install'. -# -rule install ( name : sources * : requirements * : default-build * ) -{ - local project = [ project.current ] ; - - # Unless the user has explicitly asked us to hardcode dll paths, add - # false in requirements, to override default value. - if ! true in $(requirements) - { - requirements += false ; - } - - if in $(requirements:G) - { - errors.user-error - "The property is not allowed for the 'install' rule" ; - } - - targets.main-target-alternative - [ new install-target-class $(name) : $(project) - : [ targets.main-target-sources $(sources) : $(name) ] - : [ targets.main-target-requirements $(requirements) : $(project) ] - : [ targets.main-target-default-build $(default-build) : $(project) ] - ] ; -} - - -IMPORT $(__name__) : install : : install ; -IMPORT $(__name__) : install : : stage ; diff --git a/jam-files/boost-build/tools/stage.py b/jam-files/boost-build/tools/stage.py deleted file mode 100644 index 25eccbe5..00000000 --- a/jam-files/boost-build/tools/stage.py +++ /dev/null @@ -1,350 +0,0 @@ -# Status: ported. -# Base revision 64444. -# -# Copyright 2003 Dave Abrahams -# Copyright 2005, 2006 Rene Rivera -# Copyright 2002, 2003, 2004, 2005, 2006, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module defines the 'install' rule, used to copy a set of targets to a -# single location. - -import b2.build.feature as feature -import b2.build.targets as targets -import b2.build.property as property -import b2.build.property_set as property_set -import b2.build.generators as generators -import b2.build.virtual_target as virtual_target - -from b2.manager import get_manager -from b2.util.sequence import unique -from b2.util import bjam_signature - -import b2.build.type - -import os.path -import re -import types - -feature.feature('install-dependencies', ['off', 'on'], ['incidental']) -feature.feature('install-type', [], ['free', 'incidental']) -feature.feature('install-source-root', [], ['free', 'path']) -feature.feature('so-version', [], ['free', 'incidental']) - -# If 'on', version symlinks for shared libraries will not be created. Affects -# Unix builds only. -feature.feature('install-no-version-symlinks', ['on'], ['optional', 'incidental']) - -class InstallTargetClass(targets.BasicTarget): - - def update_location(self, ps): - """If is not set, sets it based on the project data.""" - - loc = ps.get('location') - if not loc: - loc = os.path.join(self.project().get('location'), self.name()) - ps = ps.add_raw(["" + loc]) - - return ps - - def adjust_properties(self, target, build_ps): - a = target.action() - properties = [] - if a: - ps = a.properties() - properties = ps.all() - - # Unless true is in properties, which can happen - # only if the user has explicitly requested it, nuke all - # properties. - - if build_ps.get('hardcode-dll-paths') != ['true']: - properties = [p for p in properties if p.feature().name() != 'dll-path'] - - # If any properties were specified for installing, add - # them. - properties.extend(build_ps.get_properties('dll-path')) - - # Also copy feature from current build set, to be used - # for relinking. - properties.extend(build_ps.get_properties('linkflags')) - - # Remove the feature on original targets. - # And . If stage target has another stage target in - # sources, then we shall get virtual targets with the - # property set. - properties = [p for p in properties - if not p.feature().name() in ['tag', 'location']] - - properties.extend(build_ps.get_properties('dependency')) - - properties.extend(build_ps.get_properties('location')) - - - properties.extend(build_ps.get_properties('install-no-version-symlinks')) - - d = build_ps.get_properties('install-source-root') - - # Make the path absolute: we shall use it to compute relative paths and - # making the path absolute will help. - if d: - p = d[0] - properties.append(property.Property(p.feature(), os.path.abspath(p.value()))) - - return property_set.create(properties) - - - def construct(self, name, source_targets, ps): - - source_targets = self.targets_to_stage(source_targets, ps) - ps = self.update_location(ps) - - ename = ps.get('name') - if ename: - ename = ename[0] - if ename and len(source_targets) > 1: - get_manager().errors()("When property is used in 'install', only one source is allowed") - - result = [] - - for i in source_targets: - - staged_targets = [] - new_ps = self.adjust_properties(i, ps) - - # See if something special should be done when staging this type. It - # is indicated by the presence of a special "INSTALLED_" type. - t = i.type() - if t and b2.build.type.registered("INSTALLED_" + t): - - if ename: - get_manager().errors()("In 'install': property specified with target that requires relinking.") - else: - (r, targets) = generators.construct(self.project(), name, "INSTALLED_" + t, - new_ps, [i]) - assert isinstance(r, property_set.PropertySet) - staged_targets.extend(targets) - - else: - staged_targets.append(copy_file(self.project(), ename, i, new_ps)) - - if not staged_targets: - get_manager().errors()("Unable to generate staged version of " + i) - - result.extend(get_manager().virtual_targets().register(t) for t in staged_targets) - - return (property_set.empty(), result) - - def targets_to_stage(self, source_targets, ps): - """Given the list of source targets explicitly passed to 'stage', returns the - list of targets which must be staged.""" - - result = [] - - # Traverse the dependencies, if needed. - if ps.get('install-dependencies') == ['on']: - source_targets = self.collect_targets(source_targets) - - # Filter the target types, if needed. - included_types = ps.get('install-type') - for r in source_targets: - ty = r.type() - if ty: - # Do not stage searched libs. - if ty != "SEARCHED_LIB": - if included_types: - if self.include_type(ty, included_types): - result.append(r) - else: - result.append(r) - elif not included_types: - # Don't install typeless target if there is an explicit list of - # allowed types. - result.append(r) - - return result - - # CONSIDER: figure out why we can not use virtual-target.traverse here. - # - def collect_targets(self, targets): - - s = [t.creating_subvariant() for t in targets] - s = unique(s) - - result = set(targets) - for i in s: - i.all_referenced_targets(result) - - result2 = [] - for r in result: - if isinstance(r, property.Property): - - if r.feature().name() != 'use': - result2.append(r.value()) - else: - result2.append(r) - result2 = unique(result2) - return result2 - - # Returns true iff 'type' is subtype of some element of 'types-to-include'. - # - def include_type(self, type, types_to_include): - return any(b2.build.type.is_subtype(type, ti) for ti in types_to_include) - -# Creates a copy of target 'source'. The 'properties' object should have a -# property which specifies where the target must be placed. -# -def copy_file(project, name, source, ps): - - if not name: - name = source.name() - - relative = "" - - new_a = virtual_target.NonScanningAction([source], "common.copy", ps) - source_root = ps.get('install-source-root') - if source_root: - source_root = source_root[0] - # Get the real path of the target. We probably need to strip relative - # path from the target name at construction. - path = os.path.join(source.path(), os.path.dirname(name)) - # Make the path absolute. Otherwise, it would be hard to compute the - # relative path. The 'source-root' is already absolute, see the - # 'adjust-properties' method above. - path = os.path.abspath(path) - - relative = os.path.relpath(path, source_root) - - name = os.path.join(relative, os.path.basename(name)) - return virtual_target.FileTarget(name, source.type(), project, new_a, exact=True) - -def symlink(name, project, source, ps): - a = virtual_target.Action([source], "symlink.ln", ps) - return virtual_target.FileTarget(name, source.type(), project, a, exact=True) - -def relink_file(project, source, ps): - action = source.action() - cloned_action = virtual_target.clone_action(action, project, "", ps) - targets = cloned_action.targets() - # We relink only on Unix, where exe or shared lib is always a single file. - assert len(targets) == 1 - return targets[0] - - -# Declare installed version of the EXE type. Generator for this type will cause -# relinking to the new location. -b2.build.type.register('INSTALLED_EXE', [], 'EXE') - -class InstalledExeGenerator(generators.Generator): - - def __init__(self): - generators.Generator.__init__(self, "install-exe", False, ['EXE'], ['INSTALLED_EXE']) - - def run(self, project, name, ps, source): - - need_relink = False; - - if ps.get('os') in ['NT', 'CYGWIN'] or ps.get('target-os') in ['windows', 'cygwin']: - # Never relink - pass - else: - # See if the dll-path properties are not changed during - # install. If so, copy, don't relink. - need_relink = ps.get('dll-path') != source[0].action().properties().get('dll-path') - - if need_relink: - return [relink_file(project, source, ps)] - else: - return [copy_file(project, None, source[0], ps)] - -generators.register(InstalledExeGenerator()) - - -# Installing a shared link on Unix might cause a creation of versioned symbolic -# links. -b2.build.type.register('INSTALLED_SHARED_LIB', [], 'SHARED_LIB') - -class InstalledSharedLibGenerator(generators.Generator): - - def __init__(self): - generators.Generator.__init__(self, 'install-shared-lib', False, ['SHARED_LIB'], ['INSTALLED_SHARED_LIB']) - - def run(self, project, name, ps, source): - - source = source[0] - if ps.get('os') in ['NT', 'CYGWIN'] or ps.get('target-os') in ['windows', 'cygwin']: - copied = copy_file(project, None, source, ps) - return [get_manager().virtual_targets().register(copied)] - else: - a = source.action() - if not a: - # Non-derived file, just copy. - copied = copy_file(project, source, ps) - else: - - need_relink = ps.get('dll-path') != source.action().properties().get('dll-path') - - if need_relink: - # Rpath changed, need to relink. - copied = relink_file(project, source, ps) - else: - copied = copy_file(project, None, source, ps) - - result = [get_manager().virtual_targets().register(copied)] - # If the name is in the form NNN.XXX.YYY.ZZZ, where all 'X', 'Y' and - # 'Z' are numbers, we need to create NNN.XXX and NNN.XXX.YYY - # symbolic links. - m = re.match("(.*)\\.([0123456789]+)\\.([0123456789]+)\\.([0123456789]+)$", - copied.name()); - if m: - # Symlink without version at all is used to make - # -lsome_library work. - result.append(symlink(m.group(1), project, copied, ps)) - - # Symlinks of some libfoo.N and libfoo.N.M are used so that - # library can found at runtime, if libfoo.N.M.X has soname of - # libfoo.N. That happens when the library makes some binary - # compatibility guarantees. If not, it is possible to skip those - # symlinks. - if ps.get('install-no-version-symlinks') != ['on']: - - result.append(symlink(m.group(1) + '.' + m.group(2), project, copied, ps)) - result.append(symlink(m.group(1) + '.' + m.group(2) + '.' + m.group(3), - project, copied, ps)) - - return result - -generators.register(InstalledSharedLibGenerator()) - - -# Main target rule for 'install'. -# -@bjam_signature((["name"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"], ["usage_requirements", "*"])) -def install(name, sources, requirements=[], default_build=[], usage_requirements=[]): - - requirements = requirements[:] - # Unless the user has explicitly asked us to hardcode dll paths, add - # false in requirements, to override default value. - if not 'true' in requirements: - requirements.append('false') - - if any(r.startswith('') for r in requirements): - get_manager().errors()("The property is not allowed for the 'install' rule") - - from b2.manager import get_manager - t = get_manager().targets() - - project = get_manager().projects().current() - - return t.main_target_alternative( - InstallTargetClass(name, project, - t.main_target_sources(sources, name), - t.main_target_requirements(requirements, project), - t.main_target_default_build(default_build, project), - t.main_target_usage_requirements(usage_requirements, project))) - -get_manager().projects().add_rule("install", install) -get_manager().projects().add_rule("stage", install) - diff --git a/jam-files/boost-build/tools/stlport.jam b/jam-files/boost-build/tools/stlport.jam deleted file mode 100644 index 62eebda5..00000000 --- a/jam-files/boost-build/tools/stlport.jam +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright Gennadiy Rozental -# Copyright 2006 Rene Rivera -# Copyright 2003, 2004, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# The STLPort is usable by means of 'stdlib' feature. When -# stdlib=stlport is specified, default version of STLPort will be used, -# while stdlib=stlport-4.5 will use specific version. -# The subfeature value 'hostios' means to use host compiler's iostreams. -# -# The specific version of stlport is selected by features: -# The feature selects between static and shared library -# The on selects STLPort with debug symbols -# and stl debugging. -# There's no way to use STLPort with debug symbols but without -# stl debugging. - -# TODO: must implement selection of different STLPort installations based -# on used toolset. -# Also, finish various flags: -# -# This is copied from V1 toolset, "+" means "implemented" -#+flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_OWN_IOSTREAMS=1 _STLP_HAS_NO_NEW_IOSTREAMS=1 ; -#+flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_EXTENSIONS=1 ; -# flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_ANACHRONISMS=1 ; -# flags $(CURR_TOOLSET) DEFINES global : _STLP_VENDOR_GLOBAL_CSTD=1 ; -# flags $(CURR_TOOLSET) DEFINES off : _STLP_NO_EXCEPTIONS=1 ; -# flags $(CURR_TOOLSET) DEFINES on : _STLP_DEBUG_ALLOC=1 ; -#+flags $(CURR_TOOLSET) DEFINES debug : _STLP_DEBUG=1 _STLP_DEBUG_UNINITIALIZED=1 ; -#+flags $(CURR_TOOLSET) DEFINES dynamic : _STLP_USE_DYNAMIC_LIB=1 ; - - -import feature : feature subfeature ; -import project ; -import "class" : new ; -import targets ; -import property-set ; -import common ; -import type ; - -# Make this module into a project. -project.initialize $(__name__) ; -project stlport ; - -# The problem: how to request to use host compiler's iostreams? -# -# Solution 1: Global 'stlport-iostream' feature. -# That's ugly. Subfeature make more sense for stlport-specific thing. -# Solution 2: Use subfeature with two values, one of which ("use STLPort iostream") -# is default. -# The problem is that such subfeature will appear in target paths, and that's ugly -# Solution 3: Use optional subfeature with only one value. - -feature.extend stdlib : stlport ; -feature.compose stlport : /stlport//stlport ; - -# STLport iostreams or native iostreams -subfeature stdlib stlport : iostream : hostios : optional propagated ; - -# STLport extensions -subfeature stdlib stlport : extensions : noext : optional propagated ; - -# STLport anachronisms -- NOT YET SUPPORTED -# subfeature stdlib stlport : anachronisms : on off ; - -# STLport debug allocation -- NOT YET SUPPORTED -#subfeature stdlib stlport : debug-alloc : off on ; - -# Declare a special target class to handle the creation of search-lib-target -# instances for STLport. We need a special class, because otherwise we'll have -# - declare prebuilt targets for all possible toolsets. And by the time 'init' -# is called we don't even know the list of toolsets that are registered -# - when host iostreams are used, we really should produce nothing. It would -# be hard/impossible to achieve this using prebuilt targets. - -class stlport-target-class : basic-target -{ - import feature project type errors generators ; - import set : difference ; - - rule __init__ ( project : headers ? : libraries * : version ? ) - { - basic-target.__init__ stlport : $(project) ; - self.headers = $(headers) ; - self.libraries = $(libraries) ; - self.version = $(version) ; - self.version.5 = [ MATCH "^(5[.][0123456789]+).*" : $(version) ] ; - - local requirements ; - requirements += $(self.version) ; - self.requirements = [ property-set.create $(requirements) ] ; - } - - rule generate ( property-set ) - { - # Since this target is built with stlport, it will also - # have /stlport//stlport in requirements, which will - # cause a loop in main target references. Remove that property - # manually. - - property-set = [ property-set.create - [ difference - [ $(property-set).raw ] : - /stlport//stlport - stlport - ] - ] ; - return [ basic-target.generate $(property-set) ] ; - } - - rule construct ( name : source-targets * : property-set ) - { - # Deduce the name of stlport library, based on toolset and - # debug setting. - local raw = [ $(property-set).raw ] ; - local hostios = [ feature.get-values : $(raw) ] ; - local toolset = [ feature.get-values : $(raw) ] ; - - if $(self.version.5) - { - # Version 5.x - - # STLport host IO streams no longer supported. So we always - # need libraries. - - # name: stlport(stl)?[dg]?(_static)?.M.R - local name = stlport ; - if [ feature.get-values : $(raw) ] = "on" - { - name += stl ; - switch $(toolset) - { - case gcc* : name += g ; - case darwin* : name += g ; - case * : name += d ; - } - } - - if [ feature.get-values : $(raw) ] = "static" - { - name += _static ; - } - - # Starting with version 5.2.0, the STLport static libraries no longer - # include a version number in their name - local version.pre.5.2 = [ MATCH "^(5[.][01]+).*" : $(version) ] ; - if $(version.pre.5.2) || [ feature.get-values : $(raw) ] != "static" - { - name += .$(self.version.5) ; - } - - name = $(name:J=) ; - - if [ feature.get-values : $(raw) ] = "on" - { - #~ Allow explicitly asking to install the STLport lib by - #~ refering to it directly: /stlport//stlport/on - #~ This allows for install packaging of all libs one might need for - #~ a standalone distribution. - import path : make : path-make ; - local runtime-link - = [ feature.get-values : $(raw) ] ; - local lib-file.props - = [ property-set.create $(raw) $(runtime-link) ] ; - local lib-file.prefix - = [ type.generated-target-prefix $(runtime-link:U)_LIB : $(lib-file.props) ] ; - local lib-file.suffix - = [ type.generated-target-suffix $(runtime-link:U)_LIB : $(lib-file.props) ] ; - lib-file.prefix - ?= "" "lib" ; - lib-file.suffix - ?= "" ; - local lib-file - = [ GLOB $(self.libraries) [ modules.peek : PATH ] : - $(lib-file.prefix)$(name).$(lib-file.suffix) ] ; - lib-file - = [ new file-reference [ path-make $(lib-file[1]) ] : $(self.project) ] ; - lib-file - = [ $(lib-file).generate "" ] ; - local lib-file.requirements - = [ targets.main-target-requirements - [ $(lib-file.props).raw ] $(lib-file[-1]) - : $(self.project) ] ; - return [ generators.construct $(self.project) $(name) : LIB : $(lib-file.requirements) ] ; - } - else - { - #~ Otherwise, it's just a regular usage of the library. - return [ generators.construct - $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ; - } - } - else if ! $(hostios) && $(toolset) != msvc - { - # We don't need libraries if host istreams are used. For - # msvc, automatic library selection will be used. - - # name: stlport_(_stldebug)? - local name = stlport ; - name = $(name)_$(toolset) ; - if [ feature.get-values : $(raw) ] = "on" - { - name = $(name)_stldebug ; - } - - return [ generators.construct - $(self.project) $(name) : SEARCHED_LIB : $(property-set) ] ; - } - else - { - return [ property-set.empty ] ; - } - } - - rule compute-usage-requirements ( subvariant ) - { - local usage-requirements = - $(self.headers) - $(self.libraries) - $(self.libraries) - ; - - local rproperties = [ $(subvariant).build-properties ] ; - # CONSIDER: should this "if" sequence be replaced with - # some use of 'property-map' class? - if [ $(rproperties).get ] = "on" - { - usage-requirements += - _STLP_DEBUG=1 - _STLP_DEBUG_UNINITIALIZED=1 ; - } - if [ $(rproperties).get ] = "shared" - { - usage-requirements += - _STLP_USE_DYNAMIC_LIB=1 ; - } - if [ $(rproperties).get ] = noext - { - usage-requirements += - _STLP_NO_EXTENSIONS=1 ; - } - if [ $(rproperties).get ] = hostios - { - usage-requirements += - _STLP_NO_OWN_IOSTREAMS=1 - _STLP_HAS_NO_NEW_IOSTREAMS=1 ; - } - if $(self.version.5) - { - # Version 5.x - if [ $(rproperties).get ] = "single" - { - # Since STLport5 doesn't normally support single-thread - # we force STLport5 into the multi-thread mode. Hence - # getting what other libs provide of single-thread code - # linking against a multi-thread lib. - usage-requirements += - _STLP_THREADS=1 ; - } - } - - return [ property-set.create $(usage-requirements) ] ; - } -} - -rule stlport-target ( headers ? : libraries * : version ? ) -{ - local project = [ project.current ] ; - - targets.main-target-alternative - [ new stlport-target-class $(project) : $(headers) : $(libraries) - : $(version) - ] ; -} - -local .version-subfeature-defined ; - -# Initialize stlport support. -rule init ( - version ? : - headers : # Location of header files - libraries * # Location of libraries, lib and bin subdirs of STLport. - ) -{ - # FIXME: need to use common.check-init-parameters here. - # At the moment, that rule always tries to define subfeature - # of the 'toolset' feature, while we need to define subfeature - # of stlport, so tweaks to check-init-parameters are needed. - if $(version) - { - if ! $(.version-subfeature-defined) - { - feature.subfeature stdlib stlport : version : : propagated ; - .version-subfeature-defined = true ; - } - feature.extend-subfeature stdlib stlport : version : $(version) ; - } - - # Declare the main target for this STLPort version. - stlport-target $(headers) : $(libraries) : $(version) ; -} - diff --git a/jam-files/boost-build/tools/sun.jam b/jam-files/boost-build/tools/sun.jam deleted file mode 100644 index 0ca927d3..00000000 --- a/jam-files/boost-build/tools/sun.jam +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) Christopher Currie 2003. Permission to copy, use, -# modify, sell and distribute this software is granted provided this -# copyright notice appears in all copies. This software is provided -# "as is" without express or implied warranty, and with no claim as -# to its suitability for any purpose. - -import property ; -import generators ; -import os ; -import toolset : flags ; -import feature ; -import type ; -import common ; - -feature.extend toolset : sun ; -toolset.inherit sun : unix ; -generators.override sun.prebuilt : builtin.lib-generator ; -generators.override sun.prebuilt : builtin.prebuilt ; -generators.override sun.searched-lib-generator : searched-lib-generator ; - -feature.extend stdlib : sun-stlport ; -feature.compose sun-stlport - : -library=stlport4 -library=stlport4 - ; - -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters sun : version $(version) ] ; - - command = [ common.get-invocation-command sun : CC - : $(command) : "/opt/SUNWspro/bin" ] ; - - # Even if the real compiler is not found, put CC to - # command line so that user see command line that would have being executed. - command ?= CC ; - - common.handle-options sun : $(condition) : $(command) : $(options) ; - - command_c = $(command[1--2]) $(command[-1]:B=cc) ; - - toolset.flags sun CONFIG_C_COMMAND $(condition) : $(command_c) ; -} - -# Declare generators -generators.register-c-compiler sun.compile.c : C : OBJ : sun ; -generators.register-c-compiler sun.compile.c++ : CPP : OBJ : sun ; - -# Declare flags and actions for compilation -flags sun.compile OPTIONS on : -g ; -flags sun.compile OPTIONS on : -xprofile=tcov ; -flags sun.compile OPTIONS speed : -xO4 ; -flags sun.compile OPTIONS space : -xO2 -xspace ; -flags sun.compile OPTIONS multi : -mt ; -flags sun.compile OPTIONS off : -erroff ; -flags sun.compile OPTIONS on : -erroff=%none ; -flags sun.compile OPTIONS all : -erroff=%none ; -flags sun.compile OPTIONS on : -errwarn ; - -flags sun.compile.c++ OPTIONS off : +d ; - -# The -m32 and -m64 options are supported starting -# with Sun Studio 12. On earlier compilers, the -# 'address-model' feature is not supported and should not -# be used. Instead, use -xarch=generic64 command line -# option. -# See http://svn.boost.org/trac/boost/ticket/1186 -# for details. -flags sun OPTIONS 32 : -m32 ; -flags sun OPTIONS 64 : -m64 ; -# On sparc, there's a difference between -Kpic -# and -KPIC. The first is slightly more efficient, -# but has the limits on the size of GOT table. -# For minimal fuss on user side, we use -KPIC here. -# See http://svn.boost.org/trac/boost/ticket/1186#comment:6 -# for detailed explanation. -flags sun OPTIONS shared : -KPIC ; - -flags sun.compile OPTIONS ; -flags sun.compile.c++ OPTIONS ; -flags sun.compile DEFINES ; -flags sun.compile INCLUDES ; - -actions compile.c -{ - "$(CONFIG_C_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -actions compile.c++ -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -D$(DEFINES) -I"$(INCLUDES)" -c -o "$(<)" "$(>)" -} - -# Declare flags and actions for linking -flags sun.link OPTIONS on : -g ; -# Strip the binary when no debugging is needed -flags sun.link OPTIONS off : -s ; -flags sun.link OPTIONS on : -xprofile=tcov ; -flags sun.link OPTIONS multi : -mt ; -flags sun.link OPTIONS ; -flags sun.link LINKPATH ; -flags sun.link FINDLIBS-ST ; -flags sun.link FINDLIBS-SA ; -flags sun.link LIBRARIES ; -flags sun.link LINK-RUNTIME static : static ; -flags sun.link LINK-RUNTIME shared : dynamic ; -flags sun.link RPATH ; -# On gcc, there are separate options for dll path at runtime and -# link time. On Solaris, there's only one: -R, so we have to use -# it, even though it's bad idea. -flags sun.link RPATH ; - -# The POSIX real-time library is always needed (nanosleep, clock_gettime etc.) -flags sun.link FINDLIBS-SA : rt ; - -rule link ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Slight mods for dlls -rule link.dll ( targets * : sources * : properties * ) -{ - SPACE on $(targets) = " " ; -} - -actions link.dll bind LIBRARIES -{ - "$(CONFIG_COMMAND)" $(OPTIONS) -L"$(LINKPATH)" -R"$(RPATH)" -o "$(<)" -h$(<[1]:D=) -G "$(>)" "$(LIBRARIES)" -Bdynamic -l$(FINDLIBS-SA) -Bstatic -l$(FINDLIBS-ST) -B$(LINK-RUNTIME) -} - -# Declare action for creating static libraries -actions piecemeal archive -{ - "$(CONFIG_COMMAND)" -xar -o "$(<)" "$(>)" -} - diff --git a/jam-files/boost-build/tools/symlink.jam b/jam-files/boost-build/tools/symlink.jam deleted file mode 100644 index b33e8260..00000000 --- a/jam-files/boost-build/tools/symlink.jam +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004, 2005 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines the "symlink" special target. 'symlink' targets make symbolic links -# to the sources. - -import targets modules path class os feature project property-set ; - -.count = 0 ; - -feature.feature symlink-location : project-relative build-relative : incidental ; - -# The class representing "symlink" targets. -# -class symlink-targets : basic-target -{ - import numbers modules class property project path ; - - rule __init__ ( - project - : targets * - : sources * - ) - { - # Generate a fake name for now. Need unnamed targets eventually. - local c = [ modules.peek symlink : .count ] ; - modules.poke symlink : .count : [ numbers.increment $(c) ] ; - local fake-name = symlink#$(c) ; - - basic-target.__init__ $(fake-name) : $(project) : $(sources) ; - - # Remember the targets to map the sources onto. Pad or truncate - # to fit the sources given. - self.targets = ; - for local source in $(sources) - { - if $(targets) - { - self.targets += $(targets[1]) ; - targets = $(targets[2-]) ; - } - else - { - self.targets += $(source) ; - } - } - - # The virtual targets corresponding to the given targets. - self.virtual-targets = ; - } - - rule construct ( name : source-targets * : property-set ) - { - local i = 1 ; - for local t in $(source-targets) - { - local s = $(self.targets[$(i)]) ; - local a = [ class.new action $(t) : symlink.ln : $(property-set) ] ; - local vt = [ class.new file-target $(s:D=) - : [ $(t).type ] : $(self.project) : $(a) ] ; - - # Place the symlink in the directory relative to the project - # location, instead of placing it in the build directory. - if [ property.select : [ $(property-set).raw ] ] = project-relative - { - $(vt).set-path [ path.root $(s:D) [ $(self.project).get location ] ] ; - } - - self.virtual-targets += $(vt) ; - i = [ numbers.increment $(i) ] ; - } - return [ property-set.empty ] $(self.virtual-targets) ; - } -} - -# Creates a symbolic link from a set of targets to a set of sources. -# The targets and sources map one to one. The symlinks generated are -# limited to be the ones given as the sources. That is, the targets -# are either padded or trimmed to equate to the sources. The padding -# is done with the name of the corresponding source. For example:: -# -# symlink : one two ; -# -# Is equal to:: -# -# symlink one two : one two ; -# -# Names for symlink are relative to the project location. They cannot -# include ".." path components. -rule symlink ( - targets * - : sources * - ) -{ - local project = [ project.current ] ; - - return [ targets.main-target-alternative - [ class.new symlink-targets $(project) : $(targets) : - # Note: inline targets are not supported for symlink, intentionally, - # since it's used to linking existing non-local targets. - $(sources) ] ] ; -} - -rule ln -{ - local os ; - if [ modules.peek : UNIX ] { os = UNIX ; } - else { os ?= [ os.name ] ; } - # Remember the path to make the link relative to where the symlink is located. - local path-to-source = [ path.relative-to - [ path.make [ on $(<) return $(LOCATE) ] ] - [ path.make [ on $(>) return $(LOCATE) ] ] ] ; - if $(path-to-source) = . - { - PATH_TO_SOURCE on $(<) = "" ; - } - else - { - PATH_TO_SOURCE on $(<) = [ path.native $(path-to-source) ] ; - } - ln-$(os) $(<) : $(>) ; -} - -actions ln-UNIX -{ - ln -f -s '$(>:D=:R=$(PATH_TO_SOURCE))' '$(<)' -} - -# there is a way to do this; we fall back to a copy for now -actions ln-NT -{ - echo "NT symlinks not supported yet, making copy" - del /f /q "$(<)" 2>nul >nul - copy "$(>)" "$(<)" $(NULL_OUT) -} - -IMPORT $(__name__) : symlink : : symlink ; diff --git a/jam-files/boost-build/tools/symlink.py b/jam-files/boost-build/tools/symlink.py deleted file mode 100644 index 6345ded6..00000000 --- a/jam-files/boost-build/tools/symlink.py +++ /dev/null @@ -1,112 +0,0 @@ -# Status: ported. -# Base revision: 64488. - -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004, 2005 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Defines the "symlink" special target. 'symlink' targets make symbolic links -# to the sources. - -import b2.build.feature as feature -import b2.build.targets as targets -import b2.build.property_set as property_set -import b2.build.virtual_target as virtual_target -import b2.build.targets - -from b2.manager import get_manager - -import bjam - -import os - - -feature.feature("symlink-location", ["project-relative", "build-relative"], ["incidental"]) - -class SymlinkTarget(targets.BasicTarget): - - _count = 0 - - def __init__(self, project, targets, sources): - - # Generate a fake name for now. Need unnamed targets eventually. - fake_name = "symlink#%s" % SymlinkTarget._count - SymlinkTarget._count = SymlinkTarget._count + 1 - - b2.build.targets.BasicTarget.__init__(self, fake_name, project, sources) - - # Remember the targets to map the sources onto. Pad or truncate - # to fit the sources given. - assert len(targets) <= len(sources) - self.targets = targets[:] + sources[len(targets):] - - # The virtual targets corresponding to the given targets. - self.virtual_targets = [] - - def construct(self, name, source_targets, ps): - i = 0 - for t in source_targets: - s = self.targets[i] - a = virtual_target.Action(self.manager(), [t], "symlink.ln", ps) - vt = virtual_target.FileTarget(os.path.basename(s), t.type(), self.project(), a) - - # Place the symlink in the directory relative to the project - # location, instead of placing it in the build directory. - if not ps.get('symlink-location') == "project-relative": - vt.set_path(os.path.join(self.project().get('location'), os.path.dirname(s))) - - vt = get_manager().virtual_targets().register(vt) - self.virtual_targets.append(vt) - i = i + 1 - - return (property_set.empty(), self.virtual_targets) - -# Creates a symbolic link from a set of targets to a set of sources. -# The targets and sources map one to one. The symlinks generated are -# limited to be the ones given as the sources. That is, the targets -# are either padded or trimmed to equate to the sources. The padding -# is done with the name of the corresponding source. For example:: -# -# symlink : one two ; -# -# Is equal to:: -# -# symlink one two : one two ; -# -# Names for symlink are relative to the project location. They cannot -# include ".." path components. -def symlink(targets, sources): - - from b2.manager import get_manager - t = get_manager().targets() - p = get_manager().projects().current() - - return t.main_target_alternative( - SymlinkTarget(p, targets, - # Note: inline targets are not supported for symlink, intentionally, - # since it's used to linking existing non-local targets. - sources)) - - -def setup_ln(targets, sources, ps): - - source_path = bjam.call("get-target-variable", sources[0], "LOCATE")[0] - target_path = bjam.call("get-target-variable", targets[0], "LOCATE")[0] - rel = os.path.relpath(source_path, target_path) - if rel == ".": - bjam.call("set-target-variable", targets, "PATH_TO_SOURCE", "") - else: - bjam.call("set-target-variable", targets, "PATH_TO_SOURCE", rel) - -if os.name == 'nt': - ln_action = """echo "NT symlinks not supported yet, making copy" -del /f /q "$(<)" 2>nul >nul -copy "$(>)" "$(<)" $(NULL_OUT)""" -else: - ln_action = "ln -f -s '$(>:D=:R=$(PATH_TO_SOURCE))' '$(<)'" - -get_manager().engine().register_action("symlink.ln", ln_action, function=setup_ln) - -get_manager().projects().add_rule("symlink", symlink) diff --git a/jam-files/boost-build/tools/testing-aux.jam b/jam-files/boost-build/tools/testing-aux.jam deleted file mode 100644 index 525dafd0..00000000 --- a/jam-files/boost-build/tools/testing-aux.jam +++ /dev/null @@ -1,210 +0,0 @@ -# This module is imported by testing.py. The definitions here are -# too tricky to do in Python - -# Causes the 'target' to exist after bjam invocation if and only if all the -# dependencies were successfully built. -# -rule expect-success ( target : dependency + : requirements * ) -{ - **passed** $(target) : $(sources) ; -} -IMPORT testing : expect-success : : testing.expect-success ; - -# Causes the 'target' to exist after bjam invocation if and only if all some of -# the dependencies were not successfully built. -# -rule expect-failure ( target : dependency + : properties * ) -{ - local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ; - local marker = $(dependency:G=$(grist)*fail) ; - (failed-as-expected) $(marker) ; - FAIL_EXPECTED $(dependency) ; - LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ; - RMOLD $(marker) ; - DEPENDS $(marker) : $(dependency) ; - DEPENDS $(target) : $(marker) ; - **passed** $(target) : $(marker) ; -} -IMPORT testing : expect-failure : : testing.expect-failure ; - -# The rule/action combination used to report successful passing of a test. -# -rule **passed** -{ - # Force deletion of the target, in case any dependencies failed to build. - RMOLD $(<) ; -} - - -# Used to create test files signifying passed tests. -# -actions **passed** -{ - echo passed > "$(<)" -} - - -# Used to create replacement object files that do not get created during tests -# that are expected to fail. -# -actions (failed-as-expected) -{ - echo failed as expected > "$(<)" -} - -# Runs executable 'sources' and stores stdout in file 'target'. Unless -# --preserve-test-targets command line option has been specified, removes the -# executable. The 'target-to-remove' parameter controls what should be removed: -# - if 'none', does not remove anything, ever -# - if empty, removes 'source' -# - if non-empty and not 'none', contains a list of sources to remove. -# -rule capture-output ( target : source : properties * : targets-to-remove * ) -{ - output-file on $(target) = $(target:S=.output) ; - LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ; - - # The INCLUDES kill a warning about independent target... - INCLUDES $(target) : $(target:S=.output) ; - # but it also puts .output into dependency graph, so we must tell jam it is - # OK if it cannot find the target or updating rule. - NOCARE $(target:S=.output) ; - - # This has two-fold effect. First it adds input files to the dependendency - # graph, preventing a warning. Second, it causes input files to be bound - # before target is created. Therefore, they are bound using SEARCH setting - # on them and not LOCATE setting of $(target), as in other case (due to jam - # bug). - DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ; - - if $(targets-to-remove) = none - { - targets-to-remove = ; - } - else if ! $(targets-to-remove) - { - targets-to-remove = $(source) ; - } - - if [ on $(target) return $(REMOVE_TEST_TARGETS) ] - { - TEMPORARY $(targets-to-remove) ; - # Set a second action on target that will be executed after capture - # output action. The 'RmTemps' rule has the 'ignore' modifier so it is - # always considered succeeded. This is needed for 'run-fail' test. For - # that test the target will be marked with FAIL_EXPECTED, and without - # 'ignore' successful execution will be negated and be reported as - # failure. With 'ignore' we do not detect a case where removing files - # fails, but it is not likely to happen. - RmTemps $(target) : $(targets-to-remove) ; - } -} - - -if [ os.name ] = NT -{ - .STATUS = %status% ; - .SET_STATUS = "set status=%ERRORLEVEL%" ; - .RUN_OUTPUT_NL = "echo." ; - .STATUS_0 = "%status% EQU 0 (" ; - .STATUS_NOT_0 = "%status% NEQ 0 (" ; - .VERBOSE = "%verbose% EQU 1 (" ; - .ENDIF = ")" ; - .SHELL_SET = "set " ; - .CATENATE = type ; - .CP = copy ; -} -else -{ - .STATUS = "$status" ; - .SET_STATUS = "status=$?" ; - .RUN_OUTPUT_NL = "echo" ; - .STATUS_0 = "test $status -eq 0 ; then" ; - .STATUS_NOT_0 = "test $status -ne 0 ; then" ; - .VERBOSE = "test $verbose -eq 1 ; then" ; - .ENDIF = "fi" ; - .SHELL_SET = "" ; - .CATENATE = cat ; - .CP = cp ; -} - - -.VERBOSE_TEST = 0 ; -if --verbose-test in [ modules.peek : ARGV ] -{ - .VERBOSE_TEST = 1 ; -} - - -.RM = [ common.rm-command ] ; - - -actions capture-output bind INPUT_FILES output-file -{ - $(PATH_SETUP) - $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1 - $(.SET_STATUS) - $(.RUN_OUTPUT_NL) >> "$(output-file)" - echo EXIT STATUS: $(.STATUS) >> "$(output-file)" - if $(.STATUS_0) - $(.CP) "$(output-file)" "$(<)" - $(.ENDIF) - $(.SHELL_SET)verbose=$(.VERBOSE_TEST) - if $(.STATUS_NOT_0) - $(.SHELL_SET)verbose=1 - $(.ENDIF) - if $(.VERBOSE) - echo ====== BEGIN OUTPUT ====== - $(.CATENATE) "$(output-file)" - echo ====== END OUTPUT ====== - $(.ENDIF) - exit $(.STATUS) -} - -IMPORT testing : capture-output : : testing.capture-output ; - - -actions quietly updated ignore piecemeal together RmTemps -{ - $(.RM) "$(>)" -} - - -.MAKE_FILE = [ common.file-creation-command ] ; - -actions unit-test -{ - $(PATH_SETUP) - $(LAUNCHER) $(>) $(ARGS) && $(.MAKE_FILE) $(<) -} - -rule record-time ( target : source : start end user system ) -{ - local src-string = [$(source:G=:J=",")"] " ; - USER_TIME on $(target) += $(src-string)$(user) ; - SYSTEM_TIME on $(target) += $(src-string)$(system) ; -} - -# Calling this rule requests that Boost Build time how long it taks to build the -# 'source' target and display the results both on the standard output and in the -# 'target' file. -# -rule time ( target : source : properties * ) -{ - # Set up rule for recording timing information. - __TIMING_RULE__ on $(source) = testing.record-time $(target) ; - - # Make sure that the source is rebuilt any time we need to retrieve that - # information. - REBUILDS $(target) : $(source) ; -} - - -actions time -{ - echo user: $(USER_TIME) - echo system: $(SYSTEM_TIME) - - echo user: $(USER_TIME)" seconds" > "$(<)" - echo system: $(SYSTEM_TIME)" seconds" >> "$(<)" -} diff --git a/jam-files/boost-build/tools/testing.jam b/jam-files/boost-build/tools/testing.jam deleted file mode 100644 index c42075b7..00000000 --- a/jam-files/boost-build/tools/testing.jam +++ /dev/null @@ -1,581 +0,0 @@ -# Copyright 2005 Dave Abrahams -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module implements regression testing framework. It declares a number of -# main target rules which perform some action and, if the results are OK, -# creates an output file. -# -# The exact list of rules is: -# 'compile' -- creates .test file if compilation of sources was -# successful. -# 'compile-fail' -- creates .test file if compilation of sources failed. -# 'run' -- creates .test file is running of executable produced from -# sources was successful. Also leaves behind .output file -# with the output from program run. -# 'run-fail' -- same as above, but .test file is created if running fails. -# -# In all cases, presence of .test file is an indication that the test passed. -# For more convenient reporting, you might want to use C++ Boost regression -# testing utilities (see http://www.boost.org/more/regression.html). -# -# For historical reason, a 'unit-test' rule is available which has the same -# syntax as 'exe' and behaves just like 'run'. - -# Things to do: -# - Teach compiler_status handle Jamfile.v2. -# Notes: -# - is not implemented, since it is Como-specific, and it is not -# clear how to implement it -# - std::locale-support is not implemented (it is used in one test). - - -import alias ; -import "class" ; -import common ; -import errors ; -import feature ; -import generators ; -import os ; -import path ; -import project ; -import property ; -import property-set ; -import regex ; -import sequence ; -import targets ; -import toolset ; -import type ; -import virtual-target ; - - -rule init ( ) -{ -} - - -# Feature controling the command used to lanch test programs. -feature.feature testing.launcher : : free optional ; - -feature.feature test-info : : free incidental ; -feature.feature testing.arg : : free incidental ; -feature.feature testing.input-file : : free dependency ; - -feature.feature preserve-test-targets : on off : incidental propagated ; - -# Register target types. -type.register TEST : test ; -type.register COMPILE : : TEST ; -type.register COMPILE_FAIL : : TEST ; -type.register RUN_OUTPUT : run ; -type.register RUN : : TEST ; -type.register RUN_FAIL : : TEST ; -type.register LINK_FAIL : : TEST ; -type.register LINK : : TEST ; -type.register UNIT_TEST : passed : TEST ; - - -# Declare the rules which create main targets. While the 'type' module already -# creates rules with the same names for us, we need extra convenience: default -# name of main target, so write our own versions. - -# Helper rule. Create a test target, using basename of first source if no target -# name is explicitly passed. Remembers the created target in a global variable. -# -rule make-test ( target-type : sources + : requirements * : target-name ? ) -{ - target-name ?= $(sources[1]:D=:S=) ; - - # Having periods (".") in the target name is problematic because the typed - # generator will strip the suffix and use the bare name for the file - # targets. Even though the location-prefix averts problems most times it - # does not prevent ambiguity issues when referring to the test targets. For - # example when using the XML log output. So we rename the target to remove - # the periods, and provide an alias for users. - local real-name = [ regex.replace $(target-name) "[.]" "~" ] ; - - local project = [ project.current ] ; - # The forces the build system for generate paths in the - # form '$build_dir/array1.test/gcc/debug'. This is necessary to allow - # post-processing tools to work. - local t = [ targets.create-typed-target [ type.type-from-rule-name - $(target-type) ] : $(project) : $(real-name) : $(sources) : - $(requirements) $(real-name).test ] ; - - # The alias to the real target, per period replacement above. - if $(real-name) != $(target-name) - { - alias $(target-name) : $(t) ; - } - - # Remember the test (for --dump-tests). A good way would be to collect all - # given a project. This has some technical problems: e.g. we can not call - # this dump from a Jamfile since projects referred by 'build-project' are - # not available until the whole Jamfile has been loaded. - .all-tests += $(t) ; - return $(t) ; -} - - -# Note: passing more that one cpp file here is known to fail. Passing a cpp file -# and a library target works. -# -rule compile ( sources + : requirements * : target-name ? ) -{ - return [ make-test compile : $(sources) : $(requirements) : $(target-name) ] - ; -} - - -rule compile-fail ( sources + : requirements * : target-name ? ) -{ - return [ make-test compile-fail : $(sources) : $(requirements) : - $(target-name) ] ; -} - - -rule link ( sources + : requirements * : target-name ? ) -{ - return [ make-test link : $(sources) : $(requirements) : $(target-name) ] ; -} - - -rule link-fail ( sources + : requirements * : target-name ? ) -{ - return [ make-test link-fail : $(sources) : $(requirements) : $(target-name) - ] ; -} - - -rule handle-input-files ( input-files * ) -{ - if $(input-files[2]) - { - # Check that sorting made when creating property-set instance will not - # change the ordering. - if [ sequence.insertion-sort $(input-files) ] != $(input-files) - { - errors.user-error "Names of input files must be sorted alphabetically" - : "due to internal limitations" ; - } - } - return $(input-files) ; -} - - -rule run ( sources + : args * : input-files * : requirements * : target-name ? : - default-build * ) -{ - requirements += $(args:J=" ") ; - requirements += [ handle-input-files $(input-files) ] ; - return [ make-test run : $(sources) : $(requirements) : $(target-name) ] ; -} - - -rule run-fail ( sources + : args * : input-files * : requirements * : - target-name ? : default-build * ) -{ - requirements += $(args:J=" ") ; - requirements += [ handle-input-files $(input-files) ] ; - return [ make-test run-fail : $(sources) : $(requirements) : $(target-name) - ] ; -} - - -# Use 'test-suite' as a synonym for 'alias', for backward compatibility. -IMPORT : alias : : test-suite ; - - -# For all main targets in 'project-module', which are typed targets with type -# derived from 'TEST', produce some interesting information. -# -rule dump-tests -{ - for local t in $(.all-tests) - { - dump-test $(t) ; - } -} - - -# Given a project location in normalized form (slashes are forward), compute the -# name of the Boost library. -# -local rule get-library-name ( path ) -{ - # Path is in normalized form, so all slashes are forward. - local match1 = [ MATCH /(tools|libs)/(.*)/(test|example) : $(path) ] ; - local match2 = [ MATCH /(tools|libs)/(.*)$ : $(path) ] ; - local match3 = [ MATCH (/status$) : $(path) ] ; - - if $(match1) { return $(match1[2]) ; } - else if $(match2) { return $(match2[2]) ; } - else if $(match3) { return "" ; } - else if --dump-tests in [ modules.peek : ARGV ] - { - # The 'run' rule and others might be used outside boost. In that case, - # just return the path, since the 'library name' makes no sense. - return $(path) ; - } -} - - -# Was an XML dump requested? -.out-xml = [ MATCH --out-xml=(.*) : [ modules.peek : ARGV ] ] ; - - -# Takes a target (instance of 'basic-target') and prints -# - its type -# - its name -# - comments specified via the property -# - relative location of all source from the project root. -# -rule dump-test ( target ) -{ - local type = [ $(target).type ] ; - local name = [ $(target).name ] ; - local project = [ $(target).project ] ; - - local project-root = [ $(project).get project-root ] ; - local library = [ get-library-name [ path.root [ $(project).get location ] - [ path.pwd ] ] ] ; - if $(library) - { - name = $(library)/$(name) ; - } - - local sources = [ $(target).sources ] ; - local source-files ; - for local s in $(sources) - { - if [ class.is-a $(s) : file-reference ] - { - local location = [ path.root [ path.root [ $(s).name ] - [ $(s).location ] ] [ path.pwd ] ] ; - - source-files += [ path.relative-to [ path.root $(project-root) - [ path.pwd ] ] $(location) ] ; - } - } - - local target-name = [ $(project).get location ] // [ $(target).name ] .test - ; - target-name = $(target-name:J=) ; - - local r = [ $(target).requirements ] ; - # Extract values of the feature. - local test-info = [ $(r).get ] ; - - # If the user requested XML output on the command-line, add the test info to - # that XML file rather than dumping them to stdout. - if $(.out-xml) - { - local nl = " -" ; - .contents on $(.out-xml) += - "$(nl) " - "$(nl) " - "$(nl) " - "$(nl) " - "$(nl) " - ; - } - else - { - # Format them into a single string of quoted strings. - test-info = \"$(test-info:J=\"\ \")\" ; - - ECHO boost-test($(type)) \"$(name)\" [$(test-info)] ":" - \"$(source-files)\" ; - } -} - - -# Register generators. Depending on target type, either 'expect-success' or -# 'expect-failure' rule will be used. -generators.register-standard testing.expect-success : OBJ : COMPILE ; -generators.register-standard testing.expect-failure : OBJ : COMPILE_FAIL ; -generators.register-standard testing.expect-success : RUN_OUTPUT : RUN ; -generators.register-standard testing.expect-failure : RUN_OUTPUT : RUN_FAIL ; -generators.register-standard testing.expect-failure : EXE : LINK_FAIL ; -generators.register-standard testing.expect-success : EXE : LINK ; - -# Generator which runs an EXE and captures output. -generators.register-standard testing.capture-output : EXE : RUN_OUTPUT ; - -# Generator which creates a target if sources run successfully. Differs from RUN -# in that run output is not captured. The reason why it exists is that the 'run' -# rule is much better for automated testing, but is not user-friendly (see -# http://article.gmane.org/gmane.comp.lib.boost.build/6353). -generators.register-standard testing.unit-test : EXE : UNIT_TEST ; - - -# The action rules called by generators. - -# Causes the 'target' to exist after bjam invocation if and only if all the -# dependencies were successfully built. -# -rule expect-success ( target : dependency + : requirements * ) -{ - **passed** $(target) : $(sources) ; -} - - -# Causes the 'target' to exist after bjam invocation if and only if all some of -# the dependencies were not successfully built. -# -rule expect-failure ( target : dependency + : properties * ) -{ - local grist = [ MATCH ^<(.*)> : $(dependency:G) ] ; - local marker = $(dependency:G=$(grist)*fail) ; - (failed-as-expected) $(marker) ; - FAIL_EXPECTED $(dependency) ; - LOCATE on $(marker) = [ on $(dependency) return $(LOCATE) ] ; - RMOLD $(marker) ; - DEPENDS $(marker) : $(dependency) ; - DEPENDS $(target) : $(marker) ; - **passed** $(target) : $(marker) ; -} - - -# The rule/action combination used to report successful passing of a test. -# -rule **passed** -{ - # Dump all the tests, if needed. We do it here, since dump should happen - # only after all Jamfiles have been read, and there is no such place - # currently defined (but there should be). - if ! $(.dumped-tests) && ( --dump-tests in [ modules.peek : ARGV ] ) - { - .dumped-tests = true ; - dump-tests ; - } - - # Force deletion of the target, in case any dependencies failed to build. - RMOLD $(<) ; -} - - -# Used to create test files signifying passed tests. -# -actions **passed** -{ - echo passed > "$(<)" -} - - -# Used to create replacement object files that do not get created during tests -# that are expected to fail. -# -actions (failed-as-expected) -{ - echo failed as expected > "$(<)" -} - - -rule run-path-setup ( target : source : properties * ) -{ - # For testing, we need to make sure that all dynamic libraries needed by the - # test are found. So, we collect all paths from dependency libraries (via - # xdll-path property) and add whatever explicit dll-path user has specified. - # The resulting paths are added to the environment on each test invocation. - local dll-paths = [ feature.get-values : $(properties) ] ; - dll-paths += [ feature.get-values : $(properties) ] ; - dll-paths += [ on $(source) return $(RUN_PATH) ] ; - dll-paths = [ sequence.unique $(dll-paths) ] ; - if $(dll-paths) - { - dll-paths = [ sequence.transform path.native : $(dll-paths) ] ; - PATH_SETUP on $(target) = [ common.prepend-path-variable-command - [ os.shared-library-path-variable ] : $(dll-paths) ] ; - } -} - - -local argv = [ modules.peek : ARGV ] ; - -toolset.flags testing.capture-output ARGS ; -toolset.flags testing.capture-output INPUT_FILES ; -toolset.flags testing.capture-output LAUNCHER ; - - -# Runs executable 'sources' and stores stdout in file 'target'. Unless -# --preserve-test-targets command line option has been specified, removes the -# executable. The 'target-to-remove' parameter controls what should be removed: -# - if 'none', does not remove anything, ever -# - if empty, removes 'source' -# - if non-empty and not 'none', contains a list of sources to remove. -# -rule capture-output ( target : source : properties * : targets-to-remove * ) -{ - output-file on $(target) = $(target:S=.output) ; - LOCATE on $(target:S=.output) = [ on $(target) return $(LOCATE) ] ; - - # The INCLUDES kill a warning about independent target... - INCLUDES $(target) : $(target:S=.output) ; - # but it also puts .output into dependency graph, so we must tell jam it is - # OK if it cannot find the target or updating rule. - NOCARE $(target:S=.output) ; - - # This has two-fold effect. First it adds input files to the dependendency - # graph, preventing a warning. Second, it causes input files to be bound - # before target is created. Therefore, they are bound using SEARCH setting - # on them and not LOCATE setting of $(target), as in other case (due to jam - # bug). - DEPENDS $(target) : [ on $(target) return $(INPUT_FILES) ] ; - - if $(targets-to-remove) = none - { - targets-to-remove = ; - } - else if ! $(targets-to-remove) - { - targets-to-remove = $(source) ; - } - - run-path-setup $(target) : $(source) : $(properties) ; - - if [ feature.get-values preserve-test-targets : $(properties) ] = off - { - TEMPORARY $(targets-to-remove) ; - # Set a second action on target that will be executed after capture - # output action. The 'RmTemps' rule has the 'ignore' modifier so it is - # always considered succeeded. This is needed for 'run-fail' test. For - # that test the target will be marked with FAIL_EXPECTED, and without - # 'ignore' successful execution will be negated and be reported as - # failure. With 'ignore' we do not detect a case where removing files - # fails, but it is not likely to happen. - RmTemps $(target) : $(targets-to-remove) ; - } -} - - -if [ os.name ] = NT -{ - .STATUS = %status% ; - .SET_STATUS = "set status=%ERRORLEVEL%" ; - .RUN_OUTPUT_NL = "echo." ; - .STATUS_0 = "%status% EQU 0 (" ; - .STATUS_NOT_0 = "%status% NEQ 0 (" ; - .VERBOSE = "%verbose% EQU 1 (" ; - .ENDIF = ")" ; - .SHELL_SET = "set " ; - .CATENATE = type ; - .CP = copy ; -} -else -{ - .STATUS = "$status" ; - .SET_STATUS = "status=$?" ; - .RUN_OUTPUT_NL = "echo" ; - .STATUS_0 = "test $status -eq 0 ; then" ; - .STATUS_NOT_0 = "test $status -ne 0 ; then" ; - .VERBOSE = "test $verbose -eq 1 ; then" ; - .ENDIF = "fi" ; - .SHELL_SET = "" ; - .CATENATE = cat ; - .CP = cp ; -} - - -.VERBOSE_TEST = 0 ; -if --verbose-test in [ modules.peek : ARGV ] -{ - .VERBOSE_TEST = 1 ; -} - - -.RM = [ common.rm-command ] ; - - -actions capture-output bind INPUT_FILES output-file -{ - $(PATH_SETUP) - $(LAUNCHER) "$(>)" $(ARGS) "$(INPUT_FILES)" > "$(output-file)" 2>&1 - $(.SET_STATUS) - $(.RUN_OUTPUT_NL) >> "$(output-file)" - echo EXIT STATUS: $(.STATUS) >> "$(output-file)" - if $(.STATUS_0) - $(.CP) "$(output-file)" "$(<)" - $(.ENDIF) - $(.SHELL_SET)verbose=$(.VERBOSE_TEST) - if $(.STATUS_NOT_0) - $(.SHELL_SET)verbose=1 - $(.ENDIF) - if $(.VERBOSE) - echo ====== BEGIN OUTPUT ====== - $(.CATENATE) "$(output-file)" - echo ====== END OUTPUT ====== - $(.ENDIF) - exit $(.STATUS) -} - - -actions quietly updated ignore piecemeal together RmTemps -{ - $(.RM) "$(>)" -} - - -.MAKE_FILE = [ common.file-creation-command ] ; - -toolset.flags testing.unit-test LAUNCHER ; -toolset.flags testing.unit-test ARGS ; - - -rule unit-test ( target : source : properties * ) -{ - run-path-setup $(target) : $(source) : $(properties) ; -} - - -actions unit-test -{ - $(PATH_SETUP) - $(LAUNCHER) $(>) $(ARGS) && $(.MAKE_FILE) $(<) -} - - -IMPORT $(__name__) : compile compile-fail run run-fail link link-fail - : : compile compile-fail run run-fail link link-fail ; - - -type.register TIME : time ; -generators.register-standard testing.time : : TIME ; - - -rule record-time ( target : source : start end user system ) -{ - local src-string = [$(source:G=:J=",")"] " ; - USER_TIME on $(target) += $(src-string)$(user) ; - SYSTEM_TIME on $(target) += $(src-string)$(system) ; -} - - -IMPORT testing : record-time : : testing.record-time ; - - -# Calling this rule requests that Boost Build time how long it taks to build the -# 'source' target and display the results both on the standard output and in the -# 'target' file. -# -rule time ( target : source : properties * ) -{ - # Set up rule for recording timing information. - __TIMING_RULE__ on $(source) = testing.record-time $(target) ; - - # Make sure that the source is rebuilt any time we need to retrieve that - # information. - REBUILDS $(target) : $(source) ; -} - - -actions time -{ - echo user: $(USER_TIME) - echo system: $(SYSTEM_TIME) - - echo user: $(USER_TIME)" seconds" > "$(<)" - echo system: $(SYSTEM_TIME)" seconds" >> "$(<)" -} diff --git a/jam-files/boost-build/tools/testing.py b/jam-files/boost-build/tools/testing.py deleted file mode 100644 index 3b53500c..00000000 --- a/jam-files/boost-build/tools/testing.py +++ /dev/null @@ -1,342 +0,0 @@ -# Status: ported, except for --out-xml -# Base revision: 64488 -# -# Copyright 2005 Dave Abrahams -# Copyright 2002, 2003, 2004, 2005, 2010 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This module implements regression testing framework. It declares a number of -# main target rules which perform some action and, if the results are OK, -# creates an output file. -# -# The exact list of rules is: -# 'compile' -- creates .test file if compilation of sources was -# successful. -# 'compile-fail' -- creates .test file if compilation of sources failed. -# 'run' -- creates .test file is running of executable produced from -# sources was successful. Also leaves behind .output file -# with the output from program run. -# 'run-fail' -- same as above, but .test file is created if running fails. -# -# In all cases, presence of .test file is an indication that the test passed. -# For more convenient reporting, you might want to use C++ Boost regression -# testing utilities (see http://www.boost.org/more/regression.html). -# -# For historical reason, a 'unit-test' rule is available which has the same -# syntax as 'exe' and behaves just like 'run'. - -# Things to do: -# - Teach compiler_status handle Jamfile.v2. -# Notes: -# - is not implemented, since it is Como-specific, and it is not -# clear how to implement it -# - std::locale-support is not implemented (it is used in one test). - -import b2.build.feature as feature -import b2.build.type as type -import b2.build.targets as targets -import b2.build.generators as generators -import b2.build.toolset as toolset -import b2.tools.common as common -import b2.util.option as option -import b2.build_system as build_system - - - -from b2.manager import get_manager -from b2.util import stem, bjam_signature -from b2.util.sequence import unique - -import bjam - -import re -import os.path -import sys - -def init(): - pass - -# Feature controling the command used to lanch test programs. -feature.feature("testing.launcher", [], ["free", "optional"]) - -feature.feature("test-info", [], ["free", "incidental"]) -feature.feature("testing.arg", [], ["free", "incidental"]) -feature.feature("testing.input-file", [], ["free", "dependency"]) - -feature.feature("preserve-test-targets", ["on", "off"], ["incidental", "propagated"]) - -# Register target types. -type.register("TEST", ["test"]) -type.register("COMPILE", [], "TEST") -type.register("COMPILE_FAIL", [], "TEST") - -type.register("RUN_OUTPUT", ["run"]) -type.register("RUN", [], "TEST") -type.register("RUN_FAIL", [], "TEST") - -type.register("LINK", [], "TEST") -type.register("LINK_FAIL", [], "TEST") -type.register("UNIT_TEST", ["passed"], "TEST") - -__all_tests = [] - -# Declare the rules which create main targets. While the 'type' module already -# creates rules with the same names for us, we need extra convenience: default -# name of main target, so write our own versions. - -# Helper rule. Create a test target, using basename of first source if no target -# name is explicitly passed. Remembers the created target in a global variable. -def make_test(target_type, sources, requirements, target_name=None): - - if not target_name: - target_name = stem(os.path.basename(sources[0])) - - # Having periods (".") in the target name is problematic because the typed - # generator will strip the suffix and use the bare name for the file - # targets. Even though the location-prefix averts problems most times it - # does not prevent ambiguity issues when referring to the test targets. For - # example when using the XML log output. So we rename the target to remove - # the periods, and provide an alias for users. - real_name = target_name.replace(".", "~") - - project = get_manager().projects().current() - # The forces the build system for generate paths in the - # form '$build_dir/array1.test/gcc/debug'. This is necessary to allow - # post-processing tools to work. - t = get_manager().targets().create_typed_target( - type.type_from_rule_name(target_type), project, real_name, sources, - requirements + ["" + real_name + ".test"], [], []) - - # The alias to the real target, per period replacement above. - if real_name != target_name: - get_manager().projects().project_rules().all_names_["alias"]( - target_name, [t]) - - # Remember the test (for --dump-tests). A good way would be to collect all - # given a project. This has some technical problems: e.g. we can not call - # this dump from a Jamfile since projects referred by 'build-project' are - # not available until the whole Jamfile has been loaded. - __all_tests.append(t) - return t - - -# Note: passing more that one cpp file here is known to fail. Passing a cpp file -# and a library target works. -# -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def compile(sources, requirements, target_name=None): - return make_test("compile", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def compile_fail(sources, requirements, target_name=None): - return make_test("compile-fail", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def link(sources, requirements, target_name=None): - return make_test("link", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["requirements", "*"], ["target_name", "?"])) -def link_fail(sources, requirements, target_name=None): - return make_test("link-fail", sources, requirements, target_name) - -def handle_input_files(input_files): - if len(input_files) > 1: - # Check that sorting made when creating property-set instance will not - # change the ordering. - if sorted(input_files) != input_files: - get_manager().errors()("Names of input files must be sorted alphabetically\n" + - "due to internal limitations") - return ["" + f for f in input_files] - -@bjam_signature((["sources", "*"], ["args", "*"], ["input_files", "*"], - ["requirements", "*"], ["target_name", "?"], - ["default_build", "*"])) -def run(sources, args, input_files, requirements, target_name=None, default_build=[]): - if args: - requirements.append("" + " ".join(args)) - requirements.extend(handle_input_files(input_files)) - return make_test("run", sources, requirements, target_name) - -@bjam_signature((["sources", "*"], ["args", "*"], ["input_files", "*"], - ["requirements", "*"], ["target_name", "?"], - ["default_build", "*"])) -def run_fail(sources, args, input_files, requirements, target_name=None, default_build=[]): - if args: - requirements.append("" + " ".join(args)) - requirements.extend(handle_input_files(input_files)) - return make_test("run-fail", sources, requirements, target_name) - -# Register all the rules -for name in ["compile", "compile-fail", "link", "link-fail", "run", "run-fail"]: - get_manager().projects().add_rule(name, getattr(sys.modules[__name__], name.replace("-", "_"))) - -# Use 'test-suite' as a synonym for 'alias', for backward compatibility. -from b2.build.alias import alias -get_manager().projects().add_rule("test-suite", alias) - -# For all main targets in 'project-module', which are typed targets with type -# derived from 'TEST', produce some interesting information. -# -def dump_tests(): - for t in __all_tests: - dump_test(t) - -# Given a project location in normalized form (slashes are forward), compute the -# name of the Boost library. -# -__ln1 = re.compile("/(tools|libs)/(.*)/(test|example)") -__ln2 = re.compile("/(tools|libs)/(.*)$") -__ln3 = re.compile("(/status$)") -def get_library_name(path): - - path = path.replace("\\", "/") - match1 = __ln1.match(path) - match2 = __ln2.match(path) - match3 = __ln3.match(path) - - if match1: - return match1.group(2) - elif match2: - return match2.group(2) - elif match3: - return "" - elif option.get("dump-tests", False, True): - # The 'run' rule and others might be used outside boost. In that case, - # just return the path, since the 'library name' makes no sense. - return path - -# Was an XML dump requested? -__out_xml = option.get("out-xml", False, True) - -# Takes a target (instance of 'basic-target') and prints -# - its type -# - its name -# - comments specified via the property -# - relative location of all source from the project root. -# -def dump_test(target): - type = target.type() - name = target.name() - project = target.project() - - project_root = project.get('project-root') - library = get_library_name(os.path.abspath(project.get('location'))) - if library: - name = library + "/" + name - - sources = target.sources() - source_files = [] - for s in sources: - if isinstance(s, targets.FileReference): - location = os.path.abspath(os.path.join(s.location(), s.name())) - source_files.append(os.path.relpath(location, os.path.abspath(project_root))) - - target_name = project.get('location') + "//" + target.name() + ".test" - - test_info = target.requirements().get('test-info') - test_info = " ".join('"' + ti + '"' for ti in test_info) - - # If the user requested XML output on the command-line, add the test info to - # that XML file rather than dumping them to stdout. - #if $(.out-xml) - #{ -# local nl = " -#" ; -# .contents on $(.out-xml) += -# "$(nl) " -# "$(nl) " -# "$(nl) " -# "$(nl) " -# "$(nl) " -# ; -# } -# else - - source_files = " ".join('"' + s + '"' for s in source_files) - if test_info: - print 'boost-test(%s) "%s" [%s] : %s' % (type, name, test_info, source_files) - else: - print 'boost-test(%s) "%s" : %s' % (type, name, source_files) - -# Register generators. Depending on target type, either 'expect-success' or -# 'expect-failure' rule will be used. -generators.register_standard("testing.expect-success", ["OBJ"], ["COMPILE"]) -generators.register_standard("testing.expect-failure", ["OBJ"], ["COMPILE_FAIL"]) -generators.register_standard("testing.expect-success", ["RUN_OUTPUT"], ["RUN"]) -generators.register_standard("testing.expect-failure", ["RUN_OUTPUT"], ["RUN_FAIL"]) -generators.register_standard("testing.expect-success", ["EXE"], ["LINK"]) -generators.register_standard("testing.expect-failure", ["EXE"], ["LINK_FAIL"]) - -# Generator which runs an EXE and captures output. -generators.register_standard("testing.capture-output", ["EXE"], ["RUN_OUTPUT"]) - -# Generator which creates a target if sources run successfully. Differs from RUN -# in that run output is not captured. The reason why it exists is that the 'run' -# rule is much better for automated testing, but is not user-friendly (see -# http://article.gmane.org/gmane.comp.lib.boost.build/6353). -generators.register_standard("testing.unit-test", ["EXE"], ["UNIT_TEST"]) - -# FIXME: if those calls are after bjam.call, then bjam will crash -# when toolset.flags calls bjam.caller. -toolset.flags("testing.capture-output", "ARGS", [], [""]) -toolset.flags("testing.capture-output", "INPUT_FILES", [], [""]) -toolset.flags("testing.capture-output", "LAUNCHER", [], [""]) - -toolset.flags("testing.unit-test", "LAUNCHER", [], [""]) -toolset.flags("testing.unit-test", "ARGS", [], [""]) - -type.register("TIME", ["time"]) -generators.register_standard("testing.time", [], ["TIME"]) - - -# The following code sets up actions for this module. It's pretty convoluted, -# but the basic points is that we most of actions are defined by Jam code -# contained in testing-aux.jam, which we load into Jam module named 'testing' - -def run_path_setup(target, sources, ps): - - # For testing, we need to make sure that all dynamic libraries needed by the - # test are found. So, we collect all paths from dependency libraries (via - # xdll-path property) and add whatever explicit dll-path user has specified. - # The resulting paths are added to the environment on each test invocation. - dll_paths = ps.get('dll-path') - dll_paths.extend(ps.get('xdll-path')) - dll_paths.extend(bjam.call("get-target-variable", sources, "RUN_PATH")) - dll_paths = unique(dll_paths) - if dll_paths: - bjam.call("set-target-variable", target, "PATH_SETUP", - common.prepend_path_variable_command( - common.shared_library_path_variable(), dll_paths)) - -def capture_output_setup(target, sources, ps): - run_path_setup(target, sources, ps) - - if ps.get('preserve-test-targets') == ['off']: - bjam.call("set-target-variable", target, "REMOVE_TEST_TARGETS", "1") - -get_manager().engine().register_bjam_action("testing.capture-output", - capture_output_setup) - - -path = os.path.dirname(get_manager().projects().loaded_tool_module_path_[__name__]) -import b2.util.os_j -get_manager().projects().project_rules()._import_rule("testing", "os.name", - b2.util.os_j.name) -import b2.tools.common -get_manager().projects().project_rules()._import_rule("testing", "common.rm-command", - b2.tools.common.rm_command) -get_manager().projects().project_rules()._import_rule("testing", "common.file-creation-command", - b2.tools.common.file_creation_command) - -bjam.call("load", "testing", os.path.join(path, "testing-aux.jam")) - - -for name in ["expect-success", "expect-failure", "time"]: - get_manager().engine().register_bjam_action("testing." + name) - -get_manager().engine().register_bjam_action("testing.unit-test", - run_path_setup) - -if option.get("dump-tests", False, True): - build_system.add_pre_build_hook(dump_tests) diff --git a/jam-files/boost-build/tools/types/__init__.py b/jam-files/boost-build/tools/types/__init__.py deleted file mode 100644 index f972b714..00000000 --- a/jam-files/boost-build/tools/types/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -__all__ = [ - 'asm', - 'cpp', - 'exe', - 'html', - 'lib', - 'obj', - 'rsp', -] - -def register_all (): - for i in __all__: - m = __import__ (__name__ + '.' + i) - reg = i + '.register ()' - #exec (reg) - -# TODO: (PF) I thought these would be imported automatically. Anyone knows why they aren't? -register_all () diff --git a/jam-files/boost-build/tools/types/asm.jam b/jam-files/boost-build/tools/types/asm.jam deleted file mode 100644 index a340db36..00000000 --- a/jam-files/boost-build/tools/types/asm.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright Craig Rodrigues 2005. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type ASM : s S asm ; diff --git a/jam-files/boost-build/tools/types/asm.py b/jam-files/boost-build/tools/types/asm.py deleted file mode 100644 index b4e1c30e..00000000 --- a/jam-files/boost-build/tools/types/asm.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright Craig Rodrigues 2005. -# Copyright (c) 2008 Steven Watanabe -# -# Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register(): - type.register_type('ASM', ['s', 'S', 'asm']) - -register() diff --git a/jam-files/boost-build/tools/types/cpp.jam b/jam-files/boost-build/tools/types/cpp.jam deleted file mode 100644 index 3159cdd7..00000000 --- a/jam-files/boost-build/tools/types/cpp.jam +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright David Abrahams 2004. -# Copyright 2002, 2003, 2004, 2005, 2006 Vladimir Prus -# Copyright 2010 Rene Rivera -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) -import type ; -import scanner ; - -class c-scanner : scanner -{ - import path ; - import regex ; - import scanner ; - import sequence ; - import virtual-target ; - - rule __init__ ( includes * ) - { - scanner.__init__ ; - - for local i in $(includes) - { - self.includes += [ sequence.transform path.native - : [ regex.split $(i:G=) "&&" ] ] ; - } - } - - rule pattern ( ) - { - return "#[ \t]*include[ ]*(<(.*)>|\"(.*)\")" ; - } - - rule process ( target : matches * : binding ) - { - local angle = [ regex.transform $(matches) : "<(.*)>" ] ; - angle = [ sequence.transform path.native : $(angle) ] ; - local quoted = [ regex.transform $(matches) : "\"(.*)\"" ] ; - quoted = [ sequence.transform path.native : $(quoted) ] ; - - # CONSIDER: the new scoping rule seem to defeat "on target" variables. - local g = [ on $(target) return $(HDRGRIST) ] ; - local b = [ NORMALIZE_PATH $(binding:D) ] ; - - # Attach binding of including file to included targets. When a target is - # directly created from virtual target this extra information is - # unnecessary. But in other cases, it allows us to distinguish between - # two headers of the same name included from different places. We do not - # need this extra information for angle includes, since they should not - # depend on including file (we can not get literal "." in include path). - local g2 = $(g)"#"$(b) ; - - angle = $(angle:G=$(g)) ; - quoted = $(quoted:G=$(g2)) ; - - local all = $(angle) $(quoted) ; - - INCLUDES $(target) : $(all) ; - NOCARE $(all) ; - SEARCH on $(angle) = $(self.includes:G=) ; - SEARCH on $(quoted) = $(b) $(self.includes:G=) ; - - # Just propagate the current scanner to includes in hope that includes - # do not change scanners. - scanner.propagate $(__name__) : $(angle) $(quoted) : $(target) ; - - ISFILE $(angle) $(quoted) ; - } -} - -scanner.register c-scanner : include ; - -type.register CPP : cpp cxx cc ; -type.register H : h ; -type.register HPP : hpp : H ; -type.register C : c ; - -# It most cases where a CPP file or a H file is a source of some action, we -# should rebuild the result if any of files included by CPP/H are changed. One -# case when this is not needed is installation, which is handled specifically. -type.set-scanner CPP : c-scanner ; -type.set-scanner C : c-scanner ; -# One case where scanning of H/HPP files is necessary is PCH generation -- if -# any header included by HPP being precompiled changes, we need to recompile the -# header. -type.set-scanner H : c-scanner ; -type.set-scanner HPP : c-scanner ; diff --git a/jam-files/boost-build/tools/types/cpp.py b/jam-files/boost-build/tools/types/cpp.py deleted file mode 100644 index 7b56111c..00000000 --- a/jam-files/boost-build/tools/types/cpp.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('CPP', ['cpp', 'cxx', 'cc']) - -register () diff --git a/jam-files/boost-build/tools/types/exe.jam b/jam-files/boost-build/tools/types/exe.jam deleted file mode 100644 index 47109513..00000000 --- a/jam-files/boost-build/tools/types/exe.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register EXE ; -type.set-generated-target-suffix EXE : windows : "exe" ; -type.set-generated-target-suffix EXE : cygwin : "exe" ; diff --git a/jam-files/boost-build/tools/types/exe.py b/jam-files/boost-build/tools/types/exe.py deleted file mode 100644 index a4935e24..00000000 --- a/jam-files/boost-build/tools/types/exe.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('EXE', ['exe'], None, ['NT', 'CYGWIN']) - type.register_type ('EXE', [], None, []) - -register () diff --git a/jam-files/boost-build/tools/types/html.jam b/jam-files/boost-build/tools/types/html.jam deleted file mode 100644 index 5cd337d0..00000000 --- a/jam-files/boost-build/tools/types/html.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type HTML : html ; diff --git a/jam-files/boost-build/tools/types/html.py b/jam-files/boost-build/tools/types/html.py deleted file mode 100644 index 63af4d90..00000000 --- a/jam-files/boost-build/tools/types/html.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('HTML', ['html']) - -register () diff --git a/jam-files/boost-build/tools/types/lib.jam b/jam-files/boost-build/tools/types/lib.jam deleted file mode 100644 index 854ab8fd..00000000 --- a/jam-files/boost-build/tools/types/lib.jam +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; # for set-generated-target-suffix -import os ; - -# The following naming scheme is used for libraries. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (msvc) -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (mingw): -# libxxx.a static library -# libxxx.dll DLL -# libxxx.dll.a import library -# -# On cygwin i.e. cygwin -# libxxx.a static library -# cygxxx.dll DLL -# libxxx.dll.a import library -# - -type.register LIB ; - -# FIXME: should not register both extensions on both platforms. -type.register STATIC_LIB : a lib : LIB ; - -# The 'lib' prefix is used everywhere -type.set-generated-target-prefix STATIC_LIB : : lib ; - -# Use '.lib' suffix for windows -type.set-generated-target-suffix STATIC_LIB : windows : lib ; - -# Except with gcc. -type.set-generated-target-suffix STATIC_LIB : gcc windows : a ; - -# Use xxx.lib for import libs -type IMPORT_LIB : : STATIC_LIB ; -type.set-generated-target-prefix IMPORT_LIB : : "" ; -type.set-generated-target-suffix IMPORT_LIB : : lib ; - -# Except with gcc (mingw or cygwin), where use libxxx.dll.a -type.set-generated-target-prefix IMPORT_LIB : gcc : lib ; -type.set-generated-target-suffix IMPORT_LIB : gcc : dll.a ; - -type.register SHARED_LIB : so dll dylib : LIB ; - -# Both mingw and cygwin use libxxx.dll naming scheme. -# On Linux, use "lib" prefix -type.set-generated-target-prefix SHARED_LIB : : lib ; -# But don't use it on windows -type.set-generated-target-prefix SHARED_LIB : windows : "" ; -# But use it again on mingw -type.set-generated-target-prefix SHARED_LIB : gcc windows : lib ; -# And use 'cyg' on cygwin -type.set-generated-target-prefix SHARED_LIB : cygwin : cyg ; - - -type.set-generated-target-suffix SHARED_LIB : windows : dll ; -type.set-generated-target-suffix SHARED_LIB : cygwin : dll ; -type.set-generated-target-suffix SHARED_LIB : darwin : dylib ; - -type SEARCHED_LIB : : LIB ; -# This is needed so that when we create a target of SEARCHED_LIB -# type, there's no prefix or suffix automatically added. -type.set-generated-target-prefix SEARCHED_LIB : : "" ; -type.set-generated-target-suffix SEARCHED_LIB : : "" ; diff --git a/jam-files/boost-build/tools/types/lib.py b/jam-files/boost-build/tools/types/lib.py deleted file mode 100644 index d0ec1fb5..00000000 --- a/jam-files/boost-build/tools/types/lib.py +++ /dev/null @@ -1,77 +0,0 @@ -# Status: ported -# Base revision: 64456. -# Copyright David Abrahams 2004. -# Copyright Vladimir Prus 2010. -# Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import b2.build.type as type - -# The following naming scheme is used for libraries. -# -# On *nix: -# libxxx.a static library -# libxxx.so shared library -# -# On windows (msvc) -# libxxx.lib static library -# xxx.dll DLL -# xxx.lib import library -# -# On windows (mingw): -# libxxx.a static library -# libxxx.dll DLL -# libxxx.dll.a import library -# -# On cygwin i.e. cygwin -# libxxx.a static library -# cygxxx.dll DLL -# libxxx.dll.a import library -# - -type.register('LIB') - -# FIXME: should not register both extensions on both platforms. -type.register('STATIC_LIB', ['a', 'lib'], 'LIB') - -# The 'lib' prefix is used everywhere -type.set_generated_target_prefix('STATIC_LIB', [], 'lib') - -# Use '.lib' suffix for windows -type.set_generated_target_suffix('STATIC_LIB', ['windows'], 'lib') - -# Except with gcc. -type.set_generated_target_suffix('STATIC_LIB', ['gcc', 'windows'], 'a') - -# Use xxx.lib for import libs -type.register('IMPORT_LIB', [], 'STATIC_LIB') -type.set_generated_target_prefix('IMPORT_LIB', [], '') -type.set_generated_target_suffix('IMPORT_LIB', [], 'lib') - -# Except with gcc (mingw or cygwin), where use libxxx.dll.a -type.set_generated_target_prefix('IMPORT_LIB', ['gcc'], 'lib') -type.set_generated_target_suffix('IMPORT_LIB', ['gcc'], 'dll.a') - -type.register('SHARED_LIB', ['so', 'dll', 'dylib'], 'LIB') - -# Both mingw and cygwin use libxxx.dll naming scheme. -# On Linux, use "lib" prefix -type.set_generated_target_prefix('SHARED_LIB', [], 'lib') -# But don't use it on windows -type.set_generated_target_prefix('SHARED_LIB', ['windows'], '') -# But use it again on mingw -type.set_generated_target_prefix('SHARED_LIB', ['gcc', 'windows'], 'lib') -# And use 'cyg' on cygwin -type.set_generated_target_prefix('SHARED_LIB', ['cygwin'], 'cyg') - - -type.set_generated_target_suffix('SHARED_LIB', ['windows'], 'dll') -type.set_generated_target_suffix('SHARED_LIB', ['cygwin'], 'dll') -type.set_generated_target_suffix('SHARED_LIB', ['darwin'], 'dylib') - -type.register('SEARCHED_LIB', [], 'LIB') -# This is needed so that when we create a target of SEARCHED_LIB -# type, there's no prefix or suffix automatically added. -type.set_generated_target_prefix('SEARCHED_LIB', [], '') -type.set_generated_target_suffix('SEARCHED_LIB', [], '') diff --git a/jam-files/boost-build/tools/types/obj.jam b/jam-files/boost-build/tools/types/obj.jam deleted file mode 100644 index 6afbcaa6..00000000 --- a/jam-files/boost-build/tools/types/obj.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register OBJ : o obj ; -type.set-generated-target-suffix OBJ : windows : obj ; -type.set-generated-target-suffix OBJ : cygwin : obj ; diff --git a/jam-files/boost-build/tools/types/obj.py b/jam-files/boost-build/tools/types/obj.py deleted file mode 100644 index e61e99a8..00000000 --- a/jam-files/boost-build/tools/types/obj.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('OBJ', ['obj'], None, ['NT', 'CYGWIN']) - type.register_type ('OBJ', ['o']) - -register () diff --git a/jam-files/boost-build/tools/types/objc.jam b/jam-files/boost-build/tools/types/objc.jam deleted file mode 100644 index 709cbd0c..00000000 --- a/jam-files/boost-build/tools/types/objc.jam +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright Rene Rivera 2008, 2010. -# Distributed under the Boost Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -import type ; -import scanner ; -import types/cpp ; - -class objc-scanner : c-scanner -{ - rule __init__ ( includes * ) - { - c-scanner.__init__ $(includes) ; - } - - rule pattern ( ) - { - return "#[ \t]*include|import[ ]*(<(.*)>|\"(.*)\")" ; - } -} - -scanner.register objc-scanner : include ; - -type.register OBJECTIVE_C : m ; -type.register OBJECTIVE_CPP : mm ; -type.set-scanner OBJECTIVE_C : objc-scanner ; -type.set-scanner OBJECTIVE_CPP : objc-scanner ; diff --git a/jam-files/boost-build/tools/types/preprocessed.jam b/jam-files/boost-build/tools/types/preprocessed.jam deleted file mode 100644 index c9187ba6..00000000 --- a/jam-files/boost-build/tools/types/preprocessed.jam +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright Steven Watanabe 2011 -# Distributed under the Boost Software License Version 1.0. (See -# accompanying file LICENSE_1_0.txt or copy at -# http://www.boost.org/LICENSE_1_0.txt) - -import type ; - -type.register PREPROCESSED_C : i : C ; -type.register PREPROCESSED_CPP : ii : CPP ; diff --git a/jam-files/boost-build/tools/types/qt.jam b/jam-files/boost-build/tools/types/qt.jam deleted file mode 100644 index 6d1dfbd4..00000000 --- a/jam-files/boost-build/tools/types/qt.jam +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright Vladimir Prus 2005. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -type UI : ui ; -type QRC : qrc ; -type MOCCABLE_CPP ; -type MOCCABLE_H ; -# Result of running moc. -type MOC : moc : H ; diff --git a/jam-files/boost-build/tools/types/register.jam b/jam-files/boost-build/tools/types/register.jam deleted file mode 100644 index 203992ca..00000000 --- a/jam-files/boost-build/tools/types/register.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -# This module's job is to automatically import all the type -# registration modules in its directory. -import type os path modules ; - -# Register the given type on the specified OSes, or on remaining OSes -# if os is not specified. This rule is injected into each of the type -# modules for the sake of convenience. -local rule type ( type : suffixes * : base-type ? : os * ) -{ - if ! [ type.registered $(type) ] - { - if ( ! $(os) ) || [ os.name ] in $(os) - { - type.register $(type) : $(suffixes) : $(base-type) ; - } - } -} - -.this-module's-file = [ modules.binding $(__name__) ] ; -.this-module's-dir = [ path.parent $(.this-module's-file) ] ; -.sibling-jamfiles = [ path.glob $(.this-module's-dir) : *.jam ] ; -.sibling-modules = [ MATCH ^(.*)\.jam$ : $(.sibling-jamfiles) ] ; - -# A loop over all modules in this directory -for m in $(.sibling-modules) -{ - m = [ path.basename $(m) ] ; - m = types/$(m) ; - - # Inject the type rule into the new module - IMPORT $(__name__) : type : $(m) : type ; - import $(m) ; -} - - diff --git a/jam-files/boost-build/tools/types/rsp.jam b/jam-files/boost-build/tools/types/rsp.jam deleted file mode 100644 index bdf8a7c9..00000000 --- a/jam-files/boost-build/tools/types/rsp.jam +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -type RSP : rsp ; diff --git a/jam-files/boost-build/tools/types/rsp.py b/jam-files/boost-build/tools/types/rsp.py deleted file mode 100644 index ccb379e9..00000000 --- a/jam-files/boost-build/tools/types/rsp.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright David Abrahams 2004. Distributed under the Boost -# Software License, Version 1.0. (See accompanying -# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -from b2.build import type - -def register (): - type.register_type ('RSP', ['rsp']) - -register () diff --git a/jam-files/boost-build/tools/unix.jam b/jam-files/boost-build/tools/unix.jam deleted file mode 100644 index 75949851..00000000 --- a/jam-files/boost-build/tools/unix.jam +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright (c) 2004 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# This file implements linking semantic common to all unixes. On unix, static -# libraries must be specified in a fixed order on the linker command line. Generators -# declared there store information about the order and use it property. - -import feature ; -import "class" : new ; -import generators ; -import type ; -import set ; -import order ; -import builtin ; - -class unix-linking-generator : linking-generator -{ - import property-set ; - import type ; - import unix ; - - rule __init__ ( id - composing ? : # Specify if generator is composing. The generator will be - # composing if non-empty string is passed, or parameter is - # not given. To make generator non-composing, pass empty - # string ("") - source-types + : target-types + : - requirements * ) - { - composing ?= true ; - generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) : - $(requirements) ; - } - - rule run ( project name ? : property-set : sources + ) - { - local result = [ linking-generator.run $(project) $(name) : $(property-set) - : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - } - - rule generated-targets ( sources + : property-set : project name ? ) - { - local sources2 ; - local libraries ; - for local l in $(sources) - { - if [ type.is-derived [ $(l).type ] LIB ] - { - libraries += $(l) ; - } - else - { - sources2 += $(l) ; - } - } - - sources = $(sources2) [ unix.order-libraries $(libraries) ] ; - - return [ linking-generator.generated-targets $(sources) : $(property-set) - : $(project) $(name) ] ; - } - -} - -class unix-archive-generator : archive-generator -{ - import unix ; - - rule __init__ ( id composing ? : source-types + : target-types + : - requirements * ) - { - composing ?= true ; - archive-generator.__init__ $(id) $(composing) : $(source-types) : $(target-types) : - $(requirements) ; - } - - rule run ( project name ? : property-set : sources + ) - { - local result = [ archive-generator.run $(project) $(name) : $(property-set) - : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - - } -} - -class unix-searched-lib-generator : searched-lib-generator -{ - import unix ; - rule __init__ ( * : * ) - { - generator.__init__ - $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule optional-properties ( ) - { - return $(self.requirements) ; - } - - rule run ( project name ? : property-set : sources * ) - { - local result = [ searched-lib-generator.run $(project) $(name) - : $(property-set) : $(sources) ] ; - - unix.set-library-order $(sources) : $(property-set) : $(result[2-]) ; - - return $(result) ; - } -} - -class unix-prebuilt-lib-generator : generator -{ - import unix ; - rule __init__ ( * : * ) - { - generator.__init__ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - - rule run ( project name ? : property-set : sources * ) - { - local f = [ $(property-set).get ] ; - unix.set-library-order-aux $(f) : $(sources) ; - return $(f) $(sources) ; - } -} - -generators.register - [ new unix-prebuilt-lib-generator unix.prebuilt : : LIB - : unix ] ; - -generators.override unix.prebuilt : builtin.lib-generator ; - - -# Declare generators -generators.register [ new unix-linking-generator unix.link : LIB OBJ : EXE - : unix ] ; - -generators.register [ new unix-archive-generator unix.archive : OBJ : STATIC_LIB - : unix ] ; - -generators.register [ new unix-linking-generator unix.link.dll : LIB OBJ : SHARED_LIB - : unix ] ; - -generators.register [ new unix-searched-lib-generator - unix.searched-lib-generator : : SEARCHED_LIB : unix ] ; - - -# The derived toolset must specify their own actions. -actions link { -} - -actions link.dll { -} - -actions archive { -} - -actions searched-lib-generator { -} - -actions prebuilt { -} - - - - - -.order = [ new order ] ; - -rule set-library-order-aux ( from * : to * ) -{ - for local f in $(from) - { - for local t in $(to) - { - if $(f) != $(t) - { - $(.order).add-pair $(f) $(t) ; - } - } - } -} - -rule set-library-order ( sources * : property-set : result * ) -{ - local used-libraries ; - local deps = [ $(property-set).dependency ] ; - for local l in $(sources) $(deps:G=) - { - if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ] - { - used-libraries += $(l) ; - } - } - - local created-libraries ; - for local l in $(result) - { - if [ $(l).type ] && [ type.is-derived [ $(l).type ] LIB ] - { - created-libraries += $(l) ; - } - } - - created-libraries = [ set.difference $(created-libraries) : $(used-libraries) ] ; - set-library-order-aux $(created-libraries) : $(used-libraries) ; -} - -rule order-libraries ( libraries * ) -{ - local r = [ $(.order).order $(libraries) ] ; - return $(r) ; -} - \ No newline at end of file diff --git a/jam-files/boost-build/tools/unix.py b/jam-files/boost-build/tools/unix.py deleted file mode 100644 index d409c2e4..00000000 --- a/jam-files/boost-build/tools/unix.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) 2004 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -""" This file implements linking semantics common to all unixes. On unix, static - libraries must be specified in a fixed order on the linker command line. Generators - declared there store information about the order and use it properly. -""" - -import builtin -from b2.build import generators, type -from b2.util.utility import * -from b2.util import set, sequence - -class UnixLinkingGenerator (builtin.LinkingGenerator): - - def __init__ (self, id, composing, source_types, target_types, requirements): - builtin.LinkingGenerator.__init__ (self, id, composing, source_types, target_types, requirements) - - def run (self, project, name, prop_set, sources): - result = builtin.LinkingGenerator.run (self, project, name, prop_set, sources) - if result: - set_library_order (project.manager (), sources, prop_set, result [1]) - - return result - - def generated_targets (self, sources, prop_set, project, name): - sources2 = [] - libraries = [] - for l in sources: - if type.is_derived (l.type (), 'LIB'): - libraries.append (l) - - else: - sources2.append (l) - - sources = sources2 + order_libraries (libraries) - - return builtin.LinkingGenerator.generated_targets (self, sources, prop_set, project, name) - - -class UnixArchiveGenerator (builtin.ArchiveGenerator): - def __init__ (self, id, composing, source_types, target_types_and_names, requirements): - builtin.ArchiveGenerator.__init__ (self, id, composing, source_types, target_types_and_names, requirements) - - def run (self, project, name, prop_set, sources): - result = builtin.ArchiveGenerator.run(self, project, name, prop_set, sources) - set_library_order(project.manager(), sources, prop_set, result) - return result - -class UnixSearchedLibGenerator (builtin.SearchedLibGenerator): - - def __init__ (self): - builtin.SearchedLibGenerator.__init__ (self) - - def optional_properties (self): - return self.requirements () - - def run (self, project, name, prop_set, sources, multiple): - result = SearchedLibGenerator.run (project, name, prop_set, sources, multiple) - - set_library_order (sources, prop_set, result) - - return result - -class UnixPrebuiltLibGenerator (generators.Generator): - def __init__ (self, id, composing, source_types, target_types_and_names, requirements): - generators.Generator.__init__ (self, id, composing, source_types, target_types_and_names, requirements) - - def run (self, project, name, prop_set, sources, multiple): - f = prop_set.get ('') - set_library_order_aux (f, sources) - return (f, sources) - -### # The derived toolset must specify their own rules and actions. -# FIXME: restore? -# action.register ('unix.prebuilt', None, None) - - -generators.register (UnixPrebuiltLibGenerator ('unix.prebuilt', False, [], ['LIB'], ['', 'unix'])) - - - - - -### # Declare generators -### generators.register [ new UnixLinkingGenerator unix.link : LIB OBJ : EXE -### : unix ] ; -generators.register (UnixArchiveGenerator ('unix.archive', True, ['OBJ'], ['STATIC_LIB'], ['unix'])) - -### generators.register [ new UnixLinkingGenerator unix.link.dll : LIB OBJ : SHARED_LIB -### : unix ] ; -### -### generators.register [ new UnixSearchedLibGenerator -### unix.SearchedLibGenerator : : SEARCHED_LIB : unix ] ; -### -### -### # The derived toolset must specify their own actions. -### actions link { -### } -### -### actions link.dll { -### } - -def unix_archive (manager, targets, sources, properties): - pass - -# FIXME: restore? -#action.register ('unix.archive', unix_archive, ['']) - -### actions searched-lib-generator { -### } -### -### actions prebuilt { -### } - - -from b2.util.order import Order -__order = Order () - -def set_library_order_aux (from_libs, to_libs): - for f in from_libs: - for t in to_libs: - if f != t: - __order.add_pair (f, t) - -def set_library_order (manager, sources, prop_set, result): - used_libraries = [] - deps = prop_set.dependency () - - sources.extend(d.value() for d in deps) - sources = sequence.unique(sources) - - for l in sources: - if l.type () and type.is_derived (l.type (), 'LIB'): - used_libraries.append (l) - - created_libraries = [] - for l in result: - if l.type () and type.is_derived (l.type (), 'LIB'): - created_libraries.append (l) - - created_libraries = set.difference (created_libraries, used_libraries) - set_library_order_aux (created_libraries, used_libraries) - -def order_libraries (libraries): - return __order.order (libraries) - diff --git a/jam-files/boost-build/tools/vacpp.jam b/jam-files/boost-build/tools/vacpp.jam deleted file mode 100644 index f4080fc0..00000000 --- a/jam-files/boost-build/tools/vacpp.jam +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright Vladimir Prus 2004. -# Copyright Toon Knapen 2004. -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt -# or copy at http://www.boost.org/LICENSE_1_0.txt) - -# -# Boost.Build V2 toolset for the IBM XL C++ compiler -# - -import toolset : flags ; -import feature ; -import common ; -import generators ; -import os ; - -feature.extend toolset : vacpp ; -toolset.inherit vacpp : unix ; -generators.override vacpp.prebuilt : builtin.prebuilt ; -generators.override vacpp.searched-lib-generator : searched-lib-generator ; - -# Configure the vacpp toolset -rule init ( version ? : command * : options * ) -{ - local condition = [ - common.check-init-parameters vacpp : version $(version) ] ; - - command = [ common.get-invocation-command vacpp : xlC - : $(command) : "/usr/vacpp/bin/xlC" ] ; - - common.handle-options vacpp : $(condition) : $(command) : $(options) ; -} - -# Declare generators -generators.register-c-compiler vacpp.compile.c : C : OBJ : vacpp ; -generators.register-c-compiler vacpp.compile.c++ : CPP : OBJ : vacpp ; - -# Allow C++ style comments in C files -flags vacpp CFLAGS : -qcpluscmt ; - -# Declare flags -flags vacpp CFLAGS off : -qNOOPTimize ; -flags vacpp CFLAGS speed : -O3 -qstrict ; -flags vacpp CFLAGS space : -O2 -qcompact ; - -# Discretionary inlining (not recommended) -flags vacpp CFLAGS off : -qnoinline ; -flags vacpp CFLAGS on : -qinline ; -#flags vacpp CFLAGS full : -qinline ; -flags vacpp CFLAGS full : ; - -# Exception handling -flags vacpp C++FLAGS off : -qnoeh ; -flags vacpp C++FLAGS on : -qeh ; - -# Run-time Type Identification -flags vacpp C++FLAGS off : -qnortti ; -flags vacpp C++FLAGS on : -qrtti ; - -# Enable 64-bit memory addressing model -flags vacpp CFLAGS 64 : -q64 ; -flags vacpp LINKFLAGS 64 : -q64 ; -flags vacpp ARFLAGS aix/64 : -X 64 ; - -# Use absolute path when generating debug information -flags vacpp CFLAGS on : -g -qfullpath ; -flags vacpp LINKFLAGS on : -g -qfullpath ; -flags vacpp LINKFLAGS off : -s ; - -if [ os.name ] = AIX -{ - flags vacpp.compile C++FLAGS : -qfuncsect ; - - # The -bnoipath strips the prepending (relative) path of libraries from - # the loader section in the target library or executable. Hence, during - # load-time LIBPATH (identical to LD_LIBRARY_PATH) or a hard-coded - # -blibpath (*similar* to -lrpath/-lrpath-link) is searched. Without - # this option, the prepending (relative) path + library name is - # hard-coded in the loader section, causing *only* this path to be - # searched during load-time. Note that the AIX linker does not have an - # -soname equivalent, this is as close as it gets. - # - # The above options are definately for AIX 5.x, and most likely also for - # AIX 4.x and AIX 6.x. For details about the AIX linker see: - # http://download.boulder.ibm.com/ibmdl/pub/software/dw/aix/es-aix_ll.pdf - # - flags vacpp.link LINKFLAGS shared : -bnoipath ; - - # Run-time linking - flags vacpp.link EXE-LINKFLAGS shared : -brtl ; -} -else -{ - # Linux PPC - flags vacpp.compile CFLAGS shared : -qpic=large ; - flags vacpp FINDLIBS : rt ; -} - -# Profiling -flags vacpp CFLAGS on : -pg ; -flags vacpp LINKFLAGS on : -pg ; - -flags vacpp.compile OPTIONS ; -flags vacpp.compile.c++ OPTIONS ; -flags vacpp DEFINES ; -flags vacpp UNDEFS ; -flags vacpp HDRS ; -flags vacpp STDHDRS ; -flags vacpp.link OPTIONS ; -flags vacpp ARFLAGS ; - -flags vacpp LIBPATH ; -flags vacpp NEEDLIBS ; -flags vacpp FINDLIBS ; -flags vacpp FINDLIBS ; - -# Select the compiler name according to the threading model. -flags vacpp VA_C_COMPILER single : xlc ; -flags vacpp VA_C_COMPILER multi : xlc_r ; -flags vacpp VA_CXX_COMPILER single : xlC ; -flags vacpp VA_CXX_COMPILER multi : xlC_r ; - -SPACE = " " ; - -flags vacpp.link.dll HAVE_SONAME linux : "" ; - -actions vacpp.link bind NEEDLIBS -{ - $(VA_CXX_COMPILER) $(EXE-LINKFLAGS) $(LINKFLAGS) -o "$(<[1])" -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS) -} - -actions vacpp.link.dll bind NEEDLIBS -{ - xlC_r -G $(LINKFLAGS) -o "$(<[1])" $(HAVE_SONAME)-Wl,-soname$(SPACE)-Wl,$(<[-1]:D=) -L$(LIBPATH) -L$(STDLIBPATH) "$(>)" "$(NEEDLIBS)" "$(NEEDLIBS)" -l$(FINDLIBS) $(OPTIONS) $(USER_OPTIONS) -} - -actions vacpp.compile.c -{ - $(VA_C_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" -} - -actions vacpp.compile.c++ -{ - $(VA_CXX_COMPILER) -c $(OPTIONS) $(USER_OPTIONS) -I$(BOOST_ROOT) -U$(UNDEFS) -D$(DEFINES) $(CFLAGS) $(C++FLAGS) -I"$(HDRS)" -I"$(STDHDRS)" -o "$(<)" "$(>)" -} - -actions updated together piecemeal vacpp.archive -{ - ar $(ARFLAGS) ru "$(<)" "$(>)" -} diff --git a/jam-files/boost-build/tools/whale.jam b/jam-files/boost-build/tools/whale.jam deleted file mode 100644 index 9335ff0c..00000000 --- a/jam-files/boost-build/tools/whale.jam +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (C) Vladimir Prus 2002-2005. - -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# This module implements support for Whale/Dolphin/WD parser/lexer tools. -# See http://www.cs.queensu.ca/home/okhotin/whale/ for details. -# -# There are three interesting target types: -# - WHL (the parser sources), that are converted to CPP and H -# - DLP (the lexer sources), that are converted to CPP and H -# - WD (combined parser/lexer sources), that are converted to WHL + DLP - -import type ; -import generators ; -import path ; -import "class" : new ; -import errors ; - -rule init ( path # path the Whale/Dolphin/WD binaries - ) -{ - if $(.configured) && $(.path) != $(path) - { - errors.user-error "Attempt to reconfigure Whale support" : - "Previously configured with path \"$(.path:E=)\"" : - "Now configuring with path \"$(path:E=)\"" ; - - } - .configured = true ; - .path = $(path) ; - - .whale = [ path.join $(path) whale ] ; - .dolphin = [ path.join $(path) dolphin ] ; - .wd = [ path.join $(path) wd ] ; -} - - -# Declare the types. -type.register WHL : whl ; -type.register DLP : dlp ; -type.register WHL_LR0 : lr0 ; -type.register WD : wd ; - -# Declare standard generators. -generators.register-standard whale.whale : WHL : CPP H H(%_symbols) ; -generators.register-standard whale.dolphin : DLP : CPP H ; -generators.register-standard whale.wd : WD : WHL(%_parser) DLP(%_lexer) ; - -# The conversions defines above a ambiguious when we generated CPP from WD. -# We can either go via WHL type, or via DLP type. -# The following custom generator handles this by running both conversions. - -class wd-to-cpp : generator -{ - rule __init__ ( * : * : * ) - { - generator.__init__ $(1) : $(2) : $(3) ; - } - - rule run ( project name ? : property-set : source * ) - { - if ! $(source[2]) - { - local new-sources ; - if ! [ $(source).type ] in WHL DLP - { - local r1 = [ generators.construct $(project) $(name) - : WHL : $(property-set) : $(source) ] ; - local r2 = [ generators.construct $(project) $(name) - : DLP : $(property-set) : $(source) ] ; - - new-sources = [ sequence.unique $(r1[2-]) $(r2[2-]) ] ; - } - else - { - new-sources = $(source) ; - } - - local result ; - for local i in $(new-sources) - { - local t = [ generators.construct $(project) $(name) : CPP - : $(property-set) : $(i) ] ; - result += $(t[2-]) ; - } - return $(result) ; - } - } - -} - - -generators.override whale.wd-to-cpp : whale.whale ; -generators.override whale.wd-to-cpp : whale.dolphin ; - - -generators.register [ new wd-to-cpp whale.wd-to-cpp : : CPP ] ; - - -actions whale -{ - $(.whale) -d $(<[1]:D) $(>) -} - -actions dolphin -{ - $(.dolphin) -d $(<[1]:D) $(>) -} - -actions wd -{ - $(.wd) -d $(<[1]:D) -g $(>) -} - diff --git a/jam-files/boost-build/tools/xlf.jam b/jam-files/boost-build/tools/xlf.jam deleted file mode 100644 index e7fcc608..00000000 --- a/jam-files/boost-build/tools/xlf.jam +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2004 Toon Knapen -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# -# toolset configuration for the IBM Fortran compiler (xlf) -# - -import toolset : flags ; -import feature ; -import fortran ; - -rule init ( version ? : command * : options * ) -{ -} - -# Declare flags and action for compilation -flags xlf OPTIONS off : -O0 ; -flags xlf OPTIONS speed : -O3 ; -flags xlf OPTIONS space : -Os ; - -flags xlf OPTIONS on : -g ; -flags xlf OPTIONS on : -pg ; - -flags xlf DEFINES ; -flags xlf INCLUDES ; - -rule compile-fortran -{ -} - -actions compile-fortran -{ - xlf $(OPTIONS) -I$(INCLUDES) -c -o "$(<)" "$(>)" -} - -generators.register-fortran-compiler xlf.compile-fortran : FORTRAN : OBJ ; diff --git a/jam-files/boost-build/tools/xsltproc-config.jam b/jam-files/boost-build/tools/xsltproc-config.jam deleted file mode 100644 index de54a2eb..00000000 --- a/jam-files/boost-build/tools/xsltproc-config.jam +++ /dev/null @@ -1,37 +0,0 @@ -#~ Copyright 2005 Rene Rivera. -#~ Distributed under the Boost Software License, Version 1.0. -#~ (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Automatic configuration for Python tools and librries. To use, just import this module. - -import os ; -import toolset : using ; - -if [ os.name ] = NT -{ - local xsltproc-path = [ GLOB [ modules.peek : PATH ] "C:\\Boost\\bin" : xsltproc\.exe ] ; - xsltproc-path = $(xsltproc-path[1]) ; - - if $(xsltproc-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using xsltproc ":" $(xsltproc-path) ; - } - using xsltproc : $(xsltproc-path) ; - } -} -else -{ - local xsltproc-path = [ GLOB [ modules.peek : PATH ] : xsltproc ] ; - xsltproc-path = $(xsltproc-path[1]) ; - - if $(xsltproc-path) - { - if --debug-configuration in [ modules.peek : ARGV ] - { - ECHO "notice:" using xsltproc ":" $(xsltproc-path) ; - } - using xsltproc : $(xsltproc-path) ; - } -} diff --git a/jam-files/boost-build/tools/xsltproc.jam b/jam-files/boost-build/tools/xsltproc.jam deleted file mode 100644 index 96f5170b..00000000 --- a/jam-files/boost-build/tools/xsltproc.jam +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (C) 2003 Doug Gregor. Permission to copy, use, modify, sell and -# distribute this software is granted provided this copyright notice appears in -# all copies. This software is provided "as is" without express or implied -# warranty, and with no claim as to its suitability for any purpose. - -# This module defines rules to apply an XSLT stylesheet to an XML file using the -# xsltproc driver, part of libxslt. -# -# Note: except for 'init', this modules does not provide any rules for end -# users. - -import feature ; -import regex ; -import sequence ; -import common ; -import os ; -import modules ; -import path ; -import errors ; - -feature.feature xsl:param : : free ; -feature.feature xsl:path : : free ; -feature.feature catalog : : free ; - - -# Initialize xsltproc support. The parameters are: -# xsltproc: The xsltproc executable -# -rule init ( xsltproc ? ) -{ - if $(xsltproc) - { - modify-config ; - .xsltproc = $(xsltproc) ; - check-xsltproc ; - } -} - -rule freeze-config ( ) -{ - if ! $(.config-frozen) - { - .config-frozen = true ; - .xsltproc ?= [ modules.peek : XSLTPROC ] ; - .xsltproc ?= xsltproc ; - check-xsltproc ; - .is-cygwin = [ .is-cygwin $(.xsltproc) ] ; - } -} - -rule modify-config -{ - if $(.config-frozen) - { - errors.user-error "xsltproc: Cannot change xsltproc command after it has been used." ; - } -} - -rule check-xsltproc ( ) -{ - if $(.xsltproc) - { - local status = [ SHELL "\"$(.xsltproc)\" -V" : no-output : exit-status ] ; - if $(status[2]) != "0" - { - errors.user-error "xsltproc: Could not run \"$(.xsltproc)\" -V." ; - } - } -} - -# Returns a non-empty string if a cygwin xsltproc binary was specified. -rule is-cygwin ( ) -{ - freeze-config ; - return $(.is-cygwin) ; -} - -rule .is-cygwin ( xsltproc ) -{ - if [ os.on-windows ] - { - local file = [ path.make [ modules.binding $(__name__) ] ] ; - local dir = [ path.native - [ path.join [ path.parent $(file) ] xsltproc ] ] ; - if [ os.name ] = CYGWIN - { - dir = $(dir:W) ; - } - local command = - "\"$(xsltproc)\" \"$(dir)\\test.xsl\" \"$(dir)\\test.xml\" 2>&1" ; - local status = [ SHELL $(command) : no-output : exit-status ] ; - if $(status[2]) != "0" - { - return true ; - } - } -} - -rule compute-xslt-flags ( target : properties * ) -{ - local flags ; - - # Raw flags. - flags += [ feature.get-values : $(properties) ] ; - - # Translate into command line flags. - for local param in [ feature.get-values : $(properties) ] - { - local namevalue = [ regex.split $(param) "=" ] ; - flags += --stringparam $(namevalue[1]) \"$(namevalue[2])\" ; - } - - # Translate . - for local path in [ feature.get-values : $(properties) ] - { - flags += --path \"$(path:G=)\" ; - } - - # Take care of implicit dependencies. - local other-deps ; - for local dep in [ feature.get-values : $(properties) ] - { - other-deps += [ $(dep:G=).creating-subvariant ] ; - } - - local implicit-target-directories ; - for local dep in [ sequence.unique $(other-deps) ] - { - implicit-target-directories += [ $(dep).all-target-directories ] ; - } - - for local dir in $(implicit-target-directories) - { - flags += --path \"$(dir:T)\" ; - } - - return $(flags) ; -} - - -local rule .xsltproc ( target : source stylesheet : properties * : dirname ? : action ) -{ - freeze-config ; - STYLESHEET on $(target) = $(stylesheet) ; - FLAGS on $(target) += [ compute-xslt-flags $(target) : $(properties) ] ; - NAME on $(target) = $(.xsltproc) ; - - for local catalog in [ feature.get-values : $(properties) ] - { - CATALOG = [ common.variable-setting-command XML_CATALOG_FILES : $(catalog:T) ] ; - } - - if [ os.on-windows ] && ! [ is-cygwin ] - { - action = $(action).windows ; - } - - $(action) $(target) : $(source) ; -} - - -rule xslt ( target : source stylesheet : properties * ) -{ - return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) : : xslt-xsltproc ] ; -} - - -rule xslt-dir ( target : source stylesheet : properties * : dirname ) -{ - return [ .xsltproc $(target) : $(source) $(stylesheet) : $(properties) : $(dirname) : xslt-xsltproc-dir ] ; -} - -actions xslt-xsltproc.windows -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:W)" "$(>:W)" -} - - -actions xslt-xsltproc bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<)" "$(STYLESHEET:T)" "$(>:T)" -} - - -actions xslt-xsltproc-dir.windows bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:W)" "$(>:W)" -} - - -actions xslt-xsltproc-dir bind STYLESHEET -{ - $(CATALOG) "$(NAME:E=xsltproc)" $(FLAGS) --xinclude -o "$(<:D)/" "$(STYLESHEET:T)" "$(>:T)" -} diff --git a/jam-files/boost-build/tools/xsltproc/included.xsl b/jam-files/boost-build/tools/xsltproc/included.xsl deleted file mode 100644 index ef86394a..00000000 --- a/jam-files/boost-build/tools/xsltproc/included.xsl +++ /dev/null @@ -1,11 +0,0 @@ - - - - diff --git a/jam-files/boost-build/tools/xsltproc/test.xml b/jam-files/boost-build/tools/xsltproc/test.xml deleted file mode 100644 index 57c8ba18..00000000 --- a/jam-files/boost-build/tools/xsltproc/test.xml +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/jam-files/boost-build/tools/xsltproc/test.xsl b/jam-files/boost-build/tools/xsltproc/test.xsl deleted file mode 100644 index a142c91d..00000000 --- a/jam-files/boost-build/tools/xsltproc/test.xsl +++ /dev/null @@ -1,12 +0,0 @@ - - - - - diff --git a/jam-files/boost-build/tools/zlib.jam b/jam-files/boost-build/tools/zlib.jam deleted file mode 100644 index f9138fd5..00000000 --- a/jam-files/boost-build/tools/zlib.jam +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2010 Vladimir Prus. -# -# Use, modification and distribution is subject to the Boost Software -# License Version 1.0. (See accompanying file LICENSE_1_0.txt or -# http://www.boost.org/LICENSE_1_0.txt) - -# Supports the zlib library -# -# After 'using zlib', the following targets are available: -# -# /zlib//zlib -- The zlib library - - -# In addition to direct purpose of supporting zlib, this module also -# serves as canonical example of how third-party condiguration works -# in Boost.Build. The operation is as follows -# -# - For each 'using zlib : condition ... : ...' we create a target alternative -# for zlib, with the specified condition. -# - There's one target alternative for 'zlib' with no specific condition -# properties. -# -# Two invocations of 'using zlib' with the same condition but different -# properties are not permitted, e.g.: -# -# using zlib : condition windows : include foo ; -# using zlib : condition windows : include bar ; -# -# is in error. One exception is for empty condition, 'using' without any -# parameters is overridable. That is: -# -# using zlib ; -# using zlib : include foo ; -# -# Is OK then the first 'using' is ignored. Likewise if the order of the statements -# is reversed. -# -# When 'zlib' target is built, a target alternative is selected as usual for -# Boost.Build. The selected alternative is a custom target class, which: -# -# - calls ac.find-include-path to find header path. If explicit path is provided -# in 'using', only that path is checked, and if no header is found there, error -# is emitted. Otherwise, we check a directory specified using ZLIB_INCLUDE -# environment variable, and failing that, in standard directories. -# [TODO: document sysroot handling] -# - calls ac.find-library to find the library, in an identical fashion. -# - -import project ; -import ac ; -import errors ; -import "class" : new ; -import targets ; - -project.initialize $(__name__) ; -project = [ project.current ] ; -project zlib ; - -header = zlib.h ; -names = z zlib zll zdll ; - -.default-alternative = [ new ac-library zlib : $(project) ] ; -$(.default-alternative).set-header $(header) ; -$(.default-alternative).set-default-names $(names) ; -targets.main-target-alternative $(.default-alternative) ; - -rule init ( * : * ) -{ - if ! $(condition) - { - # Special case the no-condition case so that 'using' without parameters - # can mix with more specific 'using'. - $(.default-alternative).reconfigure $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ; - } - else - { - # FIXME: consider if we should allow overriding definitions for a given - # condition -- e.g. project-config.jam might want to override whatever is - # in user-config.jam. - local mt = [ new ac-library zlib : $(project) - : $(1) : $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - $(mt).set-header $(header) ; - $(mt).set-default-names $(names) ; - targets.main-target-alternative $(mt) ; - } -} - - - - - - diff --git a/jam-files/boost-build/user-config.jam b/jam-files/boost-build/user-config.jam deleted file mode 100644 index fbbf13fd..00000000 --- a/jam-files/boost-build/user-config.jam +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2003, 2005 Douglas Gregor -# Copyright 2004 John Maddock -# Copyright 2002, 2003, 2004, 2007 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# This file is used to configure your Boost.Build installation. You can modify -# this file in place, or you can place it in a permanent location so that it -# does not get overwritten should you get a new version of Boost.Build. See: -# -# http://www.boost.org/boost-build2/doc/html/bbv2/overview/configuration.html -# -# for documentation about possible permanent locations. - -# This file specifies which toolsets (C++ compilers), libraries, and other -# tools are available. Often, you should be able to just uncomment existing -# example lines and adjust them to taste. The complete list of supported tools, -# and configuration instructions can be found at: -# -# http://boost.org/boost-build2/doc/html/bbv2/reference/tools.html -# - -# This file uses Jam language syntax to describe available tools. Mostly, -# there are 'using' lines, that contain the name of the used tools, and -# parameters to pass to those tools -- where paremeters are separated by -# semicolons. Important syntax notes: -# -# - Both ':' and ';' must be separated from other tokens by whitespace -# - The '\' symbol is a quote character, so when specifying Windows paths you -# should use '/' or '\\' instead. -# -# More details about the syntax can be found at: -# -# http://boost.org/boost-build2/doc/html/bbv2/advanced.html#bbv2.advanced.jam_language -# - -# ------------------ -# GCC configuration. -# ------------------ - -# Configure gcc (default version). -# using gcc ; - -# Configure specific gcc version, giving alternative name to use. -# using gcc : 3.2 : g++-3.2 ; - - -# ------------------- -# MSVC configuration. -# ------------------- - -# Configure msvc (default version, searched for in standard locations and PATH). -# using msvc ; - -# Configure specific msvc version (searched for in standard locations and PATH). -# using msvc : 8.0 ; - - -# ---------------------- -# Borland configuration. -# ---------------------- -# using borland ; - - -# ---------------------- -# STLPort configuration. -# ---------------------- - -# Configure specifying location of STLPort headers. Libraries must be either -# not needed or available to the compiler by default. -# using stlport : : /usr/include/stlport ; - -# Configure specifying location of both headers and libraries explicitly. -# using stlport : : /usr/include/stlport /usr/lib ; - - -# ----------------- -# QT configuration. -# ----------------- - -# Configure assuming QTDIR gives the installation prefix. -# using qt ; - -# Configure with an explicit installation prefix. -# using qt : /usr/opt/qt ; - -# --------------------- -# Python configuration. -# --------------------- - -# Configure specific Python version. -# using python : 3.1 : /usr/bin/python3 : /usr/include/python3.1 : /usr/lib ; diff --git a/jam-files/boost-build/util/__init__.py b/jam-files/boost-build/util/__init__.py deleted file mode 100644 index f80fe70e..00000000 --- a/jam-files/boost-build/util/__init__.py +++ /dev/null @@ -1,136 +0,0 @@ - -import bjam -import re -import types - -# Decorator the specifies bjam-side prototype for a Python function -def bjam_signature(s): - - def wrap(f): - f.bjam_signature = s - return f - - return wrap - -def metatarget(f): - - f.bjam_signature = (["name"], ["sources", "*"], ["requirements", "*"], - ["default_build", "*"], ["usage_requirements", "*"]) - return f - -class cached(object): - - def __init__(self, function): - self.function = function - self.cache = {} - - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - v = self.function(*args) - self.cache[args] = v - return v - - def __get__(self, instance, type): - return types.MethodType(self, instance, type) - -def unquote(s): - if s and s[0] == '"' and s[-1] == '"': - return s[1:-1] - else: - return s - -_extract_jamfile_and_rule = re.compile("(Jamfile<.*>)%(.*)") - -def qualify_jam_action(action_name, context_module): - - if action_name.startswith("###"): - # Callable exported from Python. Don't touch - return action_name - elif _extract_jamfile_and_rule.match(action_name): - # Rule is already in indirect format - return action_name - else: - ix = action_name.find('.') - if ix != -1 and action_name[:ix] == context_module: - return context_module + '%' + action_name[ix+1:] - - return context_module + '%' + action_name - - -def set_jam_action(name, *args): - - m = _extract_jamfile_and_rule.match(name) - if m: - args = ("set-update-action-in-module", m.group(1), m.group(2)) + args - else: - args = ("set-update-action", name) + args - - return bjam.call(*args) - - -def call_jam_function(name, *args): - - m = _extract_jamfile_and_rule.match(name) - if m: - args = ("call-in-module", m.group(1), m.group(2)) + args - return bjam.call(*args) - else: - return bjam.call(*((name,) + args)) - -__value_id = 0 -__python_to_jam = {} -__jam_to_python = {} - -def value_to_jam(value, methods=False): - """Makes a token to refer to a Python value inside Jam language code. - - The token is merely a string that can be passed around in Jam code and - eventually passed back. For example, we might want to pass PropertySet - instance to a tag function and it might eventually call back - to virtual_target.add_suffix_and_prefix, passing the same instance. - - For values that are classes, we'll also make class methods callable - from Jam. - - Note that this is necessary to make a bit more of existing Jamfiles work. - This trick should not be used to much, or else the performance benefits of - Python port will be eaten. - """ - - global __value_id - - r = __python_to_jam.get(value, None) - if r: - return r - - exported_name = '###_' + str(__value_id) - __value_id = __value_id + 1 - __python_to_jam[value] = exported_name - __jam_to_python[exported_name] = value - - if methods and type(value) == types.InstanceType: - for field_name in dir(value): - field = getattr(value, field_name) - if callable(field) and not field_name.startswith("__"): - bjam.import_rule("", exported_name + "." + field_name, field) - - return exported_name - -def record_jam_to_value_mapping(jam_value, python_value): - __jam_to_python[jam_value] = python_value - -def jam_to_value_maybe(jam_value): - - if type(jam_value) == type(""): - return __jam_to_python.get(jam_value, jam_value) - else: - return jam_value - -def stem(filename): - i = filename.find('.') - if i != -1: - return filename[0:i] - else: - return filename diff --git a/jam-files/boost-build/util/assert.jam b/jam-files/boost-build/util/assert.jam deleted file mode 100644 index abedad52..00000000 --- a/jam-files/boost-build/util/assert.jam +++ /dev/null @@ -1,336 +0,0 @@ -# Copyright 2001, 2002, 2003 Dave Abrahams -# Copyright 2006 Rene Rivera -# Copyright 2002, 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -import errors ; -import modules ; - - -################################################################################ -# -# Private implementation details. -# -################################################################################ - -# Rule added as a replacement for the regular Jam = operator but which does not -# ignore trailing empty string elements. -# -local rule exact-equal-test ( lhs * : rhs * ) -{ - local lhs_extended = $(lhs) xxx ; - local rhs_extended = $(rhs) xxx ; - if $(lhs_extended) = $(rhs_extended) - { - return true ; - } -} - - -# Two lists are considered set-equal if they contain the same elements, ignoring -# duplicates and ordering. -# -local rule set-equal-test ( set1 * : set2 * ) -{ - if ( $(set1) in $(set2) ) && ( $(set2) in $(set1) ) - { - return true ; - } -} - - -################################################################################ -# -# Public interface. -# -################################################################################ - -# Assert the equality of A and B, ignoring trailing empty string elements. -# -rule equal ( a * : b * ) -{ - if $(a) != $(b) - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\" - (ignoring trailing empty strings) ; - } -} - - -# Assert that the result of calling RULE-NAME on the given arguments has a false -# logical value (is either an empty list or all empty strings). -# -rule false ( rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) - : $(7) : $(8) : $(9) ] ; - } - - if $(result) - { - errors.error-skip-frames 3 assertion failure: Expected false result from - "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) : - $(5) : $(6) : $(7) : $(8) : $(9) ] "]" : Got: "[" \"$(result)\" "]" ; - } -} - - -# Assert that ELEMENT is present in LIST. -# -rule "in" ( element : list * ) -{ - if ! $(element) in $(list) - { - errors.error-skip-frames 3 assertion failure: Expected \"$(element)\" in - "[" \"$(list)\" "]" ; - } -} - - -# Assert the inequality of A and B, ignoring trailing empty string elements. -# -rule not-equal ( a * : b * ) -{ - if $(a) = $(b) - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\" - (ignoring trailing empty strings) ; - } -} - - -# Assert that ELEMENT is not present in LIST. -# -rule not-in ( element : list * ) -{ - if $(element) in $(list) - { - errors.error-skip-frames 3 assertion failure: Did not expect - \"$(element)\" in "[" \"$(list)\" "]" ; - } -} - - -# Assert the inequality of A and B as sets. -# -rule not-set-equal ( a * : b * ) -{ - if [ set-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]" - and "[" \"$(b)\" "]" to not be equal as sets ; - } -} - - -# Assert that A and B are not exactly equal, not ignoring trailing empty string -# elements. -# -rule not-exact-equal ( a * : b * ) -{ - if [ exact-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "!=" \"$(b)\" ; - } -} - - -# Assert that EXPECTED is the result of calling RULE-NAME with the given -# arguments. -# -rule result ( expected * : rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) - : $(8) : $(9) ] ; - } - - if ! [ exact-equal-test $(result) : $(expected) ] - { - errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [ - errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : - $(9) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "[" - \"$(result)\" "]" ; - } -} - - -# Assert that EXPECTED is set-equal (i.e. duplicates and ordering are ignored) -# to the result of calling RULE-NAME with the given arguments. Note that rules -# called this way may accept at most 8 parameters. -# -rule result-set-equal ( expected * : rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(2) : $(3) : $(4) : $(5) : $(6) : $(7) - : $(8) : $(9) ] ; - } - - if ! [ set-equal-test $(result) : $(expected) ] - { - errors.error-skip-frames 3 assertion failure: "[" $(rule-name) [ - errors.lol->list $(args) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : - $(9) ] "]" : Expected: "[" \"$(expected)\" "]" : Got: "[" - \"$(result)\" "]" ; - } -} - - -# Assert the equality of A and B as sets. -# -rule set-equal ( a * : b * ) -{ - if ! [ set-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: Expected "[" \"$(a)\" "]" - and "[" \"$(b)\" "]" to be equal as sets ; - } -} - - -# Assert that the result of calling RULE-NAME on the given arguments has a true -# logical value (is neither an empty list nor all empty strings). -# -rule true ( rule-name args * : * ) -{ - local result ; - module [ CALLER_MODULE ] - { - modules.poke assert : result : [ $(1) : $(2) : $(3) : $(4) : $(5) : $(6) - : $(7) : $(8) : $(9) ] ; - } - - if ! $(result) - { - errors.error-skip-frames 3 assertion failure: Expected true result from - "[" $(rule-name) [ errors.lol->list $(args) : $(2) : $(3) : $(4) : - $(5) : $(6) : $(7) : $(8) : $(9) ] "]" ; - } -} - - -# Assert the exact equality of A and B, not ignoring trailing empty string -# elements. -# -rule exact-equal ( a * : b * ) -{ - if ! [ exact-equal-test $(a) : $(b) ] - { - errors.error-skip-frames 3 assertion failure: \"$(a)\" "==" \"$(b)\" ; - } -} - - -# Assert that the given variable is not an empty list. -# -rule variable-not-empty ( name ) -{ - local value = [ modules.peek [ CALLER_MODULE ] : $(name) ] ; - if ! $(value)-is-not-empty - { - errors.error-skip-frames 3 assertion failure: Expected variable - \"$(name)\" not to be an empty list ; - } -} - - -rule __test__ ( ) -{ - # Helper rule used to avoid test duplication related to different list - # equality test rules. - # - local rule run-equality-test ( equality-assert : ignore-trailing-empty-strings ? ) - { - local not-equality-assert = not-$(equality-assert) ; - - # When the given equality test is expected to ignore trailing empty - # strings some of the test results should be inverted. - local not-equality-assert-i = not-$(equality-assert) ; - if $(ignore-trailing-empty-strings) - { - not-equality-assert-i = $(equality-assert) ; - } - - $(equality-assert) : ; - $(equality-assert) "" "" : "" "" ; - $(not-equality-assert-i) : "" "" ; - $(equality-assert) x : x ; - $(not-equality-assert) : x ; - $(not-equality-assert) "" : x ; - $(not-equality-assert) "" "" : x ; - $(not-equality-assert-i) x : x "" ; - $(equality-assert) x "" : x "" ; - $(not-equality-assert) x : "" x ; - $(equality-assert) "" x : "" x ; - - $(equality-assert) 1 2 3 : 1 2 3 ; - $(not-equality-assert) 1 2 3 : 3 2 1 ; - $(not-equality-assert) 1 2 3 : 1 5 3 ; - $(not-equality-assert) 1 2 3 : 1 "" 3 ; - $(not-equality-assert) 1 2 3 : 1 1 2 3 ; - $(not-equality-assert) 1 2 3 : 1 2 2 3 ; - $(not-equality-assert) 1 2 3 : 5 6 7 ; - - # Extra variables used here just to make sure Boost Jam or Boost Build - # do not handle lists with empty strings differently depending on - # whether they are literals or stored in variables. - - local empty = ; - local empty-strings = "" "" ; - local x-empty-strings = x "" "" ; - local empty-strings-x = "" "" x ; - - $(equality-assert) : $(empty) ; - $(not-equality-assert-i) "" : $(empty) ; - $(not-equality-assert-i) "" "" : $(empty) ; - $(not-equality-assert-i) : $(empty-strings) ; - $(not-equality-assert-i) "" : $(empty-strings) ; - $(equality-assert) "" "" : $(empty-strings) ; - $(equality-assert) $(empty) : $(empty) ; - $(equality-assert) $(empty-strings) : $(empty-strings) ; - $(not-equality-assert-i) $(empty) : $(empty-strings) ; - $(equality-assert) $(x-empty-strings) : $(x-empty-strings) ; - $(equality-assert) $(empty-strings-x) : $(empty-strings-x) ; - $(not-equality-assert) $(empty-strings-x) : $(x-empty-strings) ; - $(not-equality-assert-i) x : $(x-empty-strings) ; - $(not-equality-assert) x : $(empty-strings-x) ; - $(not-equality-assert-i) x : $(x-empty-strings) ; - $(not-equality-assert-i) x "" : $(x-empty-strings) ; - $(equality-assert) x "" "" : $(x-empty-strings) ; - $(not-equality-assert) x : $(empty-strings-x) ; - $(not-equality-assert) "" x : $(empty-strings-x) ; - $(equality-assert) "" "" x : $(empty-strings-x) ; - } - - - # --------------- - # Equality tests. - # --------------- - - run-equality-test equal : ignore-trailing-empty-strings ; - run-equality-test exact-equal ; - - - # ------------------------- - # assert.set-equal() tests. - # ------------------------- - - set-equal : ; - not-set-equal "" "" : ; - set-equal "" "" : "" ; - set-equal "" "" : "" "" ; - set-equal a b c : a b c ; - set-equal a b c : b c a ; - set-equal a b c a : a b c ; - set-equal a b c : a b c a ; - not-set-equal a b c : a b c d ; - not-set-equal a b c d : a b c ; -} diff --git a/jam-files/boost-build/util/container.jam b/jam-files/boost-build/util/container.jam deleted file mode 100644 index dd496393..00000000 --- a/jam-files/boost-build/util/container.jam +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright 2003 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Copyright 2002, 2003, 2004 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Various container classes. - -# Base for container objects. This lets us construct recursive structures. That -# is containers with containers in them, specifically so we can tell literal -# values from node values. -# -class node -{ - rule __init__ ( - value ? # Optional value to set node to initially. - ) - { - self.value = $(value) ; - } - - # Set the value of this node, passing nothing will clear it. - # - rule set ( value * ) - { - self.value = $(value) ; - } - - # Get the value of this node. - # - rule get ( ) - { - return $(self.value) ; - } -} - - -# A simple vector. Interface mimics the C++ std::vector and std::list, with the -# exception that indices are one (1) based to follow Jam standard. -# -# TODO: Possibly add assertion checks. -# -class vector : node -{ - import numbers ; - import utility ; - import sequence ; - - rule __init__ ( - values * # Initial contents of vector. - ) - { - node.__init__ ; - self.value = $(values) ; - } - - # Get the value of the first element. - # - rule front ( ) - { - return $(self.value[1]) ; - } - - # Get the value of the last element. - # - rule back ( ) - { - return $(self.value[-1]) ; - } - - # Get the value of the element at the given index, one based. Access to - # elements of recursive structures is supported directly. Specifying - # additional index values recursively accesses the elements as containers. - # For example: [ $(v).at 1 : 2 ] would retrieve the second element of our - # first element, assuming the first element is a container. - # - rule at ( - index # The element index, one based. - : * # Additional indices to access recursively. - ) - { - local r = $(self.value[$(index)]) ; - if $(2) - { - r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - } - return $(r) ; - } - - # Get the value contained in the given element. This has the same - # functionality and interface as "at" but in addition gets the value of the - # referenced element, assuming it is a "node". - # - rule get-at ( - index # The element index, one based. - : * # Additional indices to access recursively. - ) - { - local r = $(self.value[$(index)]) ; - if $(2) - { - r = [ $(r).at $(2) : $(3) : $(4) : $(5) : $(6) : $(7) : $(8) : $(9) ] ; - } - return [ $(r).get ] ; - } - - # Insert the given value into the front of the vector pushing the rest of - # the elements back. - # - rule push-front ( - value # Value to become first element. - ) - { - self.value = $(value) $(self.value) ; - } - - # Remove the front element from the vector. Does not return the value. No - # effect if vector is empty. - # - rule pop-front ( ) - { - self.value = $(self.value[2-]) ; - } - - # Add the given value at the end of the vector. - # - rule push-back ( - value # Value to become back element. - ) - { - self.value += $(value) ; - } - - # Remove the back element from the vector. Does not return the value. No - # effect if vector is empty. - # - rule pop-back ( ) - { - self.value = $(self.value[1--2]) ; - } - - # Insert the given value at the given index, one based. The values at and to - # the right of the index are pushed back to make room for the new value. - # If the index is passed the end of the vector the element is added to the - # end. - # - rule insert ( - index # The index to insert at, one based. - : value # The value to insert. - ) - { - local left = $(self.value[1-$(index)]) ; - local right = $(self.value[$(index)-]) ; - if $(right)-is-not-empty - { - left = $(left[1--2]) ; - } - self.value = $(left) $(value) $(right) ; - } - - # Remove one or more elements from the vector. The range is inclusive, and - # not specifying an end is equivalent to the [start, start] range. - # - rule erase ( - start # Index of first element to remove. - end ? # Optional, index of last element to remove. - ) - { - end ?= $(start) ; - local left = $(self.value[1-$(start)]) ; - left = $(left[1--2]) ; - local right = $(self.value[$(end)-]) ; - right = $(right[2-]) ; - self.value = $(left) $(right) ; - } - - # Remove all elements from the vector. - # - rule clear ( ) - { - self.value = ; - } - - # The number of elements in the vector. - # - rule size ( ) - { - return [ sequence.length $(self.value) ] ; - } - - # Returns "true" if there are NO elements in the vector, empty otherwise. - # - rule empty ( ) - { - if ! $(self.value)-is-not-empty - { - return true ; - } - } - - # Returns the textual representation of content. - # - rule str ( ) - { - return "[" [ sequence.transform utility.str : $(self.value) ] "]" ; - } - - # Sorts the vector inplace, calling 'utility.less' for comparisons. - # - rule sort ( ) - { - self.value = [ sequence.insertion-sort $(self.value) : utility.less ] ; - } - - # Returns true if content is equal to the content of other vector. Uses - # 'utility.equal' for comparison. - # - rule equal ( another ) - { - local mismatch ; - local size = [ size ] ; - if $(size) = [ $(another).size ] - { - for local i in [ numbers.range 1 $(size) ] - { - if ! [ utility.equal [ at $(i) ] [ $(another).at $(i) ] ] - { - mismatch = true ; - } - } - } - else - { - mismatch = true ; - } - - if ! $(mismatch) - { - return true ; - } - } -} - - -rule __test__ ( ) -{ - import assert ; - import "class" : new ; - - local v1 = [ new vector ] ; - assert.true $(v1).equal $(v1) ; - assert.true $(v1).empty ; - assert.result 0 : $(v1).size ; - assert.result "[" "]" : $(v1).str ; - $(v1).push-back b ; - $(v1).push-front a ; - assert.result "[" a b "]" : $(v1).str ; - assert.result a : $(v1).front ; - assert.result b : $(v1).back ; - $(v1).insert 2 : d ; - $(v1).insert 2 : c ; - $(v1).insert 4 : f ; - $(v1).insert 4 : e ; - $(v1).pop-back ; - assert.result 5 : $(v1).size ; - assert.result d : $(v1).at 3 ; - $(v1).pop-front ; - assert.result c : $(v1).front ; - assert.false $(v1).empty ; - $(v1).erase 3 4 ; - assert.result 2 : $(v1).size ; - - local v2 = [ new vector q w e r t y ] ; - assert.result 6 : $(v2).size ; - $(v1).push-back $(v2) ; - assert.result 3 : $(v1).size ; - local v2-alias = [ $(v1).back ] ; - assert.result e : $(v2-alias).at 3 ; - $(v1).clear ; - assert.true $(v1).empty ; - assert.false $(v2-alias).empty ; - $(v2).pop-back ; - assert.result t : $(v2-alias).back ; - - local v3 = [ new vector ] ; - $(v3).push-back [ new vector 1 2 3 4 5 ] ; - $(v3).push-back [ new vector a b c ] ; - assert.result "[" "[" 1 2 3 4 5 "]" "[" a b c "]" "]" : $(v3).str ; - $(v3).push-back [ new vector [ new vector x y z ] [ new vector 7 8 9 ] ] ; - assert.result 1 : $(v3).at 1 : 1 ; - assert.result b : $(v3).at 2 : 2 ; - assert.result a b c : $(v3).get-at 2 ; - assert.result 7 8 9 : $(v3).get-at 3 : 2 ; - - local v4 = [ new vector 4 3 6 ] ; - $(v4).sort ; - assert.result 3 4 6 : $(v4).get ; - assert.false $(v4).equal $(v3) ; - - local v5 = [ new vector 3 4 6 ] ; - assert.true $(v4).equal $(v5) ; - # Check that vectors of different sizes are considered non-equal. - $(v5).pop-back ; - assert.false $(v4).equal $(v5) ; - - local v6 = [ new vector [ new vector 1 2 3 ] ] ; - assert.true $(v6).equal [ new vector [ new vector 1 2 3 ] ] ; - - local v7 = [ new vector 111 222 333 ] ; - assert.true $(v7).equal $(v7) ; - $(v7).insert 4 : 444 ; - assert.result 111 222 333 444 : $(v7).get ; - $(v7).insert 999 : xxx ; - assert.result 111 222 333 444 xxx : $(v7).get ; - - local v8 = [ new vector "" "" "" ] ; - assert.true $(v8).equal $(v8) ; - assert.false $(v8).empty ; - assert.result 3 : $(v8).size ; - assert.result "" : $(v8).at 1 ; - assert.result "" : $(v8).at 2 ; - assert.result "" : $(v8).at 3 ; - assert.result : $(v8).at 4 ; - $(v8).insert 2 : 222 ; - assert.result 4 : $(v8).size ; - assert.result "" 222 "" "" : $(v8).get ; - $(v8).insert 999 : "" ; - assert.result 5 : $(v8).size ; - assert.result "" 222 "" "" "" : $(v8).get ; - $(v8).insert 999 : xxx ; - assert.result 6 : $(v8).size ; - assert.result "" 222 "" "" "" xxx : $(v8).get ; - - # Regression test for a bug causing vector.equal to compare only the first - # and the last element in the given vectors. - local v9 = [ new vector 111 xxx 222 ] ; - local v10 = [ new vector 111 yyy 222 ] ; - assert.false $(v9).equal $(v10) ; -} diff --git a/jam-files/boost-build/util/doc.jam b/jam-files/boost-build/util/doc.jam deleted file mode 100644 index a7515588..00000000 --- a/jam-files/boost-build/util/doc.jam +++ /dev/null @@ -1,997 +0,0 @@ -# Copyright 2002, 2005 Dave Abrahams -# Copyright 2002, 2003, 2006 Rene Rivera -# Copyright 2003 Vladimir Prus -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -# Documentation system, handles --help requests. -# It defines rules that attach documentation to modules, rules, and variables. -# Collects and generates documentation for the various parts of the build -# system. The documentation is collected from comments integrated into the code. - -import modules ; -import print ; -import set ; -import container ; -import "class" ; -import sequence ; -import path ; - - -# The type of output to generate. -# "console" is formated text echoed to the console (the default); -# "text" is formated text appended to the output file; -# "html" is HTML output to the file. -# -help-output = console ; - - -# The file to output documentation to when generating "text" or "html" help. -# This is without extension as the extension is determined by the type of -# output. -# -help-output-file = help ; - -# Whether to include local rules in help output. -# -.option.show-locals ?= ; - -# When showing documentation for a module, whether to also generate -# automatically the detailed docs for each item in the module. -# -.option.detailed ?= ; - -# Generate debug output as the help is generated and modules are parsed. -# -.option.debug ?= ; - -# Enable or disable a documentation option. -# -local rule set-option ( - option # The option name. - : value ? # Enabled (non-empty), or disabled (empty) -) -{ - .option.$(option) = $(value) ; -} - - -# Set the type of output. -# -local rule set-output ( type ) -{ - help-output = $(type) ; -} - - -# Set the output to a file. -# -local rule set-output-file ( file ) -{ - help-output-file = $(file) ; -} - - -# Extracts the brief comment from a complete comment. The brief comment is the -# first sentence. -# -local rule brief-comment ( - docs * # The comment documentation. -) -{ - local d = $(docs:J=" ") ; - local p = [ MATCH ".*([.])$" : $(d) ] ; - if ! $(p) { d = $(d)"." ; } - d = $(d)" " ; - local m = [ MATCH "^([^.]+[.])(.*)" : $(d) ] ; - local brief = $(m[1]) ; - while $(m[2]) && [ MATCH "^([^ ])" : $(m[2]) ] - { - m = [ MATCH "^([^.]+[.])(.*)" : $(m[2]) ] ; - brief += $(m[1]) ; - } - return $(brief:J="") ; -} - - -# Specifies the documentation for the current module. -# -local rule set-module-doc ( - module-name ? # The name of the module to document. - : docs * # The documentation for the module. -) -{ - module-name ?= * ; - - $(module-name).brief = [ brief-comment $(docs) ] ; - $(module-name).docs = $(docs) ; - - if ! $(module-name) in $(documented-modules) - { - documented-modules += $(module-name) ; - } -} - - -# Specifies the documentation for the current module. -# -local rule set-module-copyright ( - module-name ? # The name of the module to document. - : copyright * # The copyright for the module. -) -{ - module-name ?= * ; - - $(module-name).copy-brief = [ brief-comment $(copyright) ] ; - $(module-name).copy-docs = $(docs) ; - - if ! $(module-name) in $(documented-modules) - { - documented-modules += $(module-name) ; - } -} - - -# Specifies the documentation for a rule in the current module. If called in the -# global module, this documents a global rule. -# -local rule set-rule-doc ( - name # The name of the rule. - module-name ? # The name of the module to document. - is-local ? # Whether the rule is local to the module. - : docs * # The documentation for the rule. -) -{ - module-name ?= * ; - - $(module-name).$(name).brief = [ brief-comment $(docs) ] ; - $(module-name).$(name).docs = $(docs) ; - $(module-name).$(name).is-local = $(is-local) ; - - if ! $(name) in $($(module-name).rules) - { - $(module-name).rules += $(name) ; - } -} - - -# Specify a class, will turn a rule into a class. -# -local rule set-class-doc ( - name # The name of the class. - module-name ? # The name of the module to document. - : super-name ? # The super class name. -) -{ - module-name ?= * ; - - $(module-name).$(name).is-class = true ; - $(module-name).$(name).super-name = $(super-name) ; - $(module-name).$(name).class-rules = - [ MATCH "^($(name)[.].*)" : $($(module-name).rules) ] ; - $(module-name).$($(module-name).$(name).class-rules).is-class-rule = true ; - - $(module-name).classes += $(name) ; - $(module-name).class-rules += $($(module-name).$(name).class-rules) ; - $(module-name).rules = - [ set.difference $($(module-name).rules) : - $(name) $($(module-name).$(name).class-rules) ] ; -} - - -# Set the argument call signature of a rule. -# -local rule set-rule-arguments-signature ( - name # The name of the rule. - module-name ? # The name of the module to document. - : signature * # The arguments signature. -) -{ - module-name ?= * ; - - $(module-name).$(name).signature = $(signature) ; -} - - -# Specifies the documentation for an argument of a rule. -# -local rule set-argument-doc ( - name # The name of the argument. - qualifier # Argument syntax qualifier, "*", "+", etc. - rule-name # The name of the rule. - module-name ? # THe optional name of the module. - : docs * # The documentation. -) -{ - module-name ?= * ; - - $(module-name).$(rule-name).args.$(name).qualifier = $(qualifier) ; - $(module-name).$(rule-name).args.$(name).docs = $(docs) ; - - if ! $(name) in $($(module-name).$(rule-name).args) - { - $(module-name).$(rule-name).args += $(name) ; - } -} - - -# Specifies the documentation for a variable in the current module. If called in -# the global module, the global variable is documented. -# -local rule set-variable-doc ( - name # The name of the variable. - default # The default value. - initial # The initial value. - module-name ? # The name of the module to document. - : docs * # The documentation for the variable. -) -{ - module-name ?= * ; - - $(module-name).$(name).brief = [ brief-comment $(docs) ] ; - $(module-name).$(name).default = $(default) ; - $(module-name).$(name).initial = $(initial) ; - $(module-name).$(name).docs = $(docs) ; - - if ! $(name) in $($(module-name).variables) - { - $(module-name).variables += $(name) ; - } -} - - -# Generates a general description of the documentation and help system. -# -local rule print-help-top ( ) -{ - print.section "General command line usage" ; - - print.text " bjam [options] [properties] [targets] - - Options, properties and targets can be specified in any order. - " ; - - print.section "Important Options" ; - - print.list-start ; - print.list-item "--clean Remove targets instead of building" ; - print.list-item "-a Rebuild everything" ; - print.list-item "-n Don't execute the commands, only print them" ; - print.list-item "-d+2 Show commands as they are executed" ; - print.list-item "-d0 Supress all informational messages" ; - print.list-item "-q Stop at first error" ; - print.list-item "--debug-configuration Diagnose configuration" ; - print.list-item "--debug-building Report which targets are built with what properties" ; - print.list-item "--debug-generator Diagnose generator search/execution" ; - print.list-end ; - - print.section "Further Help" - The following options can be used to obtain additional documentation. - ; - - print.list-start ; - print.list-item "--help-options Print more obscure command line options." ; - print.list-item "--help-internal Boost.Build implementation details." ; - print.list-item "--help-doc-options Implementation details doc formatting." ; - print.list-end ; -} - - -# Generate Jam/Boost.Jam command usage information. -# -local rule print-help-usage ( ) -{ - print.section "Boost.Jam Usage" - "bjam [ options... ] targets..." - ; - print.list-start ; - print.list-item -a; - Build all targets, even if they are current. ; - print.list-item -fx; - Read '"x"' as the Jamfile for building instead of searching for the - Boost.Build system. ; - print.list-item -jx; - Run up to '"x"' commands concurrently. ; - print.list-item -n; - Do not execute build commands. Instead print out the commands as they - would be executed if building. ; - print.list-item -ox; - Output the used build commands to file '"x"'. ; - print.list-item -q; - Quit as soon as a build failure is encountered. Without this option - Boost.Jam will continue building as many targets as it can. - print.list-item -sx=y; - Sets a Jam variable '"x"' to the value '"y"', overriding any value that - variable would have from the environment. ; - print.list-item -tx; - Rebuild the target '"x"', even if it is up-to-date. ; - print.list-item -v; - Display the version of bjam. ; - print.list-item --x; - Any option not explicitly handled by Boost.Jam remains available to - build scripts using the '"ARGV"' variable. ; - print.list-item -dn; - Enables output of diagnostic messages. The debug level '"n"' and all - below it are enabled by this option. ; - print.list-item -d+n; - Enables output of diagnostic messages. Only the output for debug level - '"n"' is enabled. ; - print.list-end ; - print.section "Debug Levels" - Each debug level shows a different set of information. Usually with - higher levels producing more verbose information. The following levels - are supported: ; - print.list-start ; - print.list-item 0; - Turn off all diagnostic output. Only errors are reported. ; - print.list-item 1; - Show the actions taken for building targets, as they are executed. ; - print.list-item 2; - Show "quiet" actions and display all action text, as they are executed. ; - print.list-item 3; - Show dependency analysis, and target/source timestamps/paths. ; - print.list-item 4; - Show arguments of shell invocations. ; - print.list-item 5; - Show rule invocations and variable expansions. ; - print.list-item 6; - Show directory/header file/archive scans, and attempts at binding to targets. ; - print.list-item 7; - Show variable settings. ; - print.list-item 8; - Show variable fetches, variable expansions, and evaluation of '"if"' expressions. ; - print.list-item 9; - Show variable manipulation, scanner tokens, and memory usage. ; - print.list-item 10; - Show execution times for rules. ; - print.list-item 11; - Show parsing progress of Jamfiles. ; - print.list-item 12; - Show graph for target dependencies. ; - print.list-item 13; - Show changes in target status (fate). ; - print.list-end ; -} - - -# Generates description of options controlling the help system. This -# automatically reads the options as all variables in the doc module of the form -# ".option.*". -# -local rule print-help-options ( - module-name # The doc module. -) -{ - print.section "Help Options" - These are all the options available for enabling or disabling to control - the help system in various ways. Options can be enabled or disabled with - '"--help-enable-