From 6da0534d0335e60e349582590e1aa60907c84171 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Tue, 8 May 2012 14:57:45 -0400
Subject: Output separate files for target hypergraphs.

---
 decoder/decoder.cc |  4 ++--
 decoder/hg_io.cc   | 17 +++++++++++------
 decoder/hg_io.h    |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'decoder')
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index ec6f75f7..4ce2ba86 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -407,7 +407,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
         ("show_partition,z", "Compute and show the partition (inside score)")
         ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation")
         ("show_cfg_search_space", "Show the search space as a CFG")
-        ("show_target_graph", "Output the target hypergraph")
+        ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to")
         ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)")
         ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found")
         ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse")
@@ -816,7 +816,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
   }
 
   if (conf.count("show_target_graph"))
-    HypergraphIO::WriteTarget(forest);
+    HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest);
 
   for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
     const RescoringPass& rp = rescoring_passes[pass];
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index d416dbf6..734c2ce8 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -1,5 +1,6 @@
 #include "hg_io.h"
 
+#include <fstream>
 #include <sstream>
 #include <iostream>
 
@@ -651,22 +652,26 @@ void HypergraphIO::WriteAsCFG(const Hypergraph& hg) {
  *   for each downward edge:
  *     RHS with [vertex_index] for NTs ||| scores
  */
-void HypergraphIO::WriteTarget(const Hypergraph& hg) {
-  cout << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';
+void HypergraphIO::WriteTarget(const std::string &base, unsigned int id, const Hypergraph& hg) {
+  std::string name(base);
+  name += '/';
+  name += boost::lexical_cast<std::string>(id);
+  std::fstream out(name.c_str(), std::fstream::out);
+  out << hg.nodes_.size() << ' ' << hg.edges_.size() << '\n';
   for (unsigned int i = 0; i < hg.nodes_.size(); ++i) {
     const Hypergraph::EdgesVector &edges = hg.nodes_[i].in_edges_;
-    cout << edges.size() << '\n';
+    out << edges.size() << '\n';
     for (unsigned int j = 0; j < edges.size(); ++j) {
       const Hypergraph::Edge &edge = hg.edges_[edges[j]];
       const std::vector<WordID> &e = edge.rule_->e();
       for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) {
         if (*word <= 0) {
-          cout << '[' << edge.tail_nodes_[-*word] << "] ";
+          out << '[' << edge.tail_nodes_[-*word] << "] ";
         } else {
-          cout << TD::Convert(*word) << ' ';
+          out << TD::Convert(*word) << ' ';
         }
       }
-      cout << "||| " << edge.rule_->scores_ << '\n';
+      out << "||| " << edge.rule_->scores_ << '\n';
     }
   }
 }
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 4e502a0c..58af8132 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -2,6 +2,7 @@
 #define _HG_IO_H_
 
 #include <iostream>
+#include <string>
 #include "lattice.h"
 
 class Hypergraph;
@@ -24,7 +25,7 @@ struct HypergraphIO {
   static void WriteAsCFG(const Hypergraph& hg);
 
   // Write only the target size information in bottom-up order.  
-  static void WriteTarget(const Hypergraph& hg);
+  static void WriteTarget(const std::string &base, unsigned int sent_id, const Hypergraph& hg);
 
   // serialization utils
   static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0);
-- 
cgit v1.2.3


From b84c2f7a650949dd8fa2e77ccf0362fc025b3ca6 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Fri, 11 May 2012 18:50:23 -0400
Subject: Pedantic rename to .ll for C++ lex files

---
 decoder/Makefile.am   |   2 +-
 decoder/rule_lexer.l  | 316 --------------------------------------------------
 decoder/rule_lexer.ll | 316 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+), 317 deletions(-)
 delete mode 100644 decoder/rule_lexer.l
 create mode 100644 decoder/rule_lexer.ll

(limited to 'decoder')

diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index d16a9147..00d01e53 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -21,7 +21,7 @@ cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libkl
 
 AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm
 
-rule_lexer.cc: rule_lexer.l
+rule_lexer.cc: rule_lexer.ll
 	$(LEX) -s -CF -8 -o$@ $<
 
 noinst_LIBRARIES = libcdec.a
diff --git a/decoder/rule_lexer.l b/decoder/rule_lexer.l
deleted file mode 100644
index 083a5bb1..00000000
--- a/decoder/rule_lexer.l
+++ /dev/null
@@ -1,316 +0,0 @@
-%option nounput
-%{
-#include "rule_lexer.h"
-
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <cstring>
-#include <cassert>
-#include <stack>
-#include "tdict.h"
-#include "fdict.h"
-#include "trule.h"
-#include "verbose.h"
-
-int lex_line = 0;
-std::istream* scfglex_stream = NULL;
-RuleLexer::RuleCallback rule_callback = NULL;
-void* rule_callback_extra = NULL;
-std::vector<int> scfglex_phrase_fnames;
-
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) (result = scfglex_stream->read(buf, max_size).gcount())
-
-#define YY_SKIP_YYWRAP 1
-int num_rules = 0;
-int yywrap() { return 1; }
-bool fl = true;
-#define MAX_TOKEN_SIZE 255
-std::string scfglex_tmp_token(MAX_TOKEN_SIZE, '\0');
-
-#define MAX_RULE_SIZE 200
-WordID scfglex_src_rhs[MAX_RULE_SIZE];
-WordID scfglex_trg_rhs[MAX_RULE_SIZE];
-int scfglex_src_rhs_size;
-int scfglex_trg_rhs_size;
-WordID scfglex_lhs;
-int scfglex_src_arity;
-int scfglex_trg_arity;
-
-#define MAX_FEATS 100
-int scfglex_feat_ids[MAX_FEATS];
-double scfglex_feat_vals[MAX_FEATS];
-int scfglex_num_feats;
-
-#define MAX_ARITY 200
-int scfglex_nt_sanity[MAX_ARITY];
-int scfglex_src_nts[MAX_ARITY];
-// float scfglex_nt_size_means[MAX_ARITY];
-// float scfglex_nt_size_vars[MAX_ARITY];
-std::stack<TRulePtr> ctf_rule_stack;
-unsigned int ctf_level = 0;
-
-#define MAX_ALS 200
-AlignmentPoint scfglex_als[MAX_ALS];
-int scfglex_num_als;
-
-void sanity_check_trg_symbol(WordID nt, int index) {
-  if (scfglex_src_nts[index-1] != nt) {
-    std::cerr << "Target symbol with index " << index << " is of type " << TD::Convert(nt*-1)
-              << " but corresponding source is of type "
-              << TD::Convert(scfglex_src_nts[index-1] * -1) << std::endl;
-    abort();
-  }
-}
-
-void sanity_check_trg_index(int index) {
-  if (index > scfglex_src_arity) {
-    std::cerr << "Target index " << index << " exceeds source arity " << scfglex_src_arity << std::endl;
-    abort();
-  }
-  int& flag = scfglex_nt_sanity[index - 1];
-  if (flag) {
-    std::cerr << "Target index " << index << " used multiple times!" << std::endl;
-    abort();
-  }
-  flag = 1;
-}
-
-void scfglex_reset() {
-  scfglex_src_arity = 0;
-  scfglex_trg_arity = 0;
-  scfglex_num_feats = 0;
-  scfglex_src_rhs_size = 0;
-  scfglex_trg_rhs_size = 0;
-  scfglex_num_als = 0;
-}
-
-void check_and_update_ctf_stack(const TRulePtr& rp) {
-  if (ctf_level > ctf_rule_stack.size()){
-    std::cerr << "Found rule at projection level " << ctf_level << " but previous rule was at level "
-      << ctf_rule_stack.size()-1 << " (cannot exceed previous level by more than one; line " << lex_line << ")" << std::endl;
-    abort();
-  }
-  while (ctf_rule_stack.size() > ctf_level)
-    ctf_rule_stack.pop();
-  // ensure that rule has the same signature as parent (coarse) rule.  Rules may *only*
-  // differ by the rhs nonterminals, not terminals or permutation of nonterminals.
-  if (ctf_rule_stack.size() > 0) {
-    TRulePtr& coarse_rp = ctf_rule_stack.top();
-    if (rp->f_.size() != coarse_rp->f_.size() || rp->e_ != coarse_rp->e_) {
-      std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
-        (coarse_rp->AsString()) << std::endl;
-      abort();
-    }
-    for (int i=0; i<rp->f_.size(); ++i) {
-      if (((rp->f_[i]<0) != (coarse_rp->f_[i]<0)) ||
-          ((rp->f_[i]>0) && (rp->f_[i] != coarse_rp->f_[i]))) {
-        std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
-          (coarse_rp->AsString()) << std::endl;
-        abort();
-      }
-    }
-  }
-}
-
-%}
-
-REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf
-NT [^\t \[\],]+
-
-%x LHS_END SRC TRG FEATS FEATVAL ALIGNS
-%%
-
-<INITIAL>[ \t]	{
-  ctf_level++;
-  };
-
-<INITIAL>\[{NT}\]   {
-		scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
-		scfglex_lhs = -TD::Convert(scfglex_tmp_token);
-		// std::cerr << scfglex_tmp_token << "\n";
-  		BEGIN(LHS_END);
-		}
-
-<SRC>\[{NT}\]   {
-		scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
-		scfglex_src_nts[scfglex_src_arity] = scfglex_src_rhs[scfglex_src_rhs_size] = -TD::Convert(scfglex_tmp_token);
-		++scfglex_src_arity;
-		++scfglex_src_rhs_size;
-		}
-
-<SRC>\[{NT},[1-9][0-9]?\]   {
-		int index = yytext[yyleng - 2] - '0';
-		if (yytext[yyleng - 3] == ',') {
-		  scfglex_tmp_token.assign(yytext + 1, yyleng - 4);
-		} else {
-		  scfglex_tmp_token.assign(yytext + 1, yyleng - 5);
-		  index += 10 * (yytext[yyleng - 3] - '0');
-		}
-		if ((scfglex_src_arity+1) != index) {
-			std::cerr << "Src indices must go in order: expected " << scfglex_src_arity << " but got " << index << std::endl;
-			abort();
-		}
-		scfglex_src_nts[scfglex_src_arity] = scfglex_src_rhs[scfglex_src_rhs_size] = -TD::Convert(scfglex_tmp_token);
-		++scfglex_src_rhs_size;
-		++scfglex_src_arity;
-		}
-
-<TRG>\[{NT},[1-9][0-9]?\]   {
-		int index = yytext[yyleng - 2] - '0';
-		if (yytext[yyleng - 3] == ',') {
-		  scfglex_tmp_token.assign(yytext + 1, yyleng - 4);
-		} else {
-		  scfglex_tmp_token.assign(yytext + 1, yyleng - 5);
-		  index += 10 * (yytext[yyleng - 3] - '0');
-		}
-		++scfglex_trg_arity;
-		// std::cerr << "TRG INDEX: " << index << std::endl;
-		sanity_check_trg_symbol(-TD::Convert(scfglex_tmp_token), index);
-		sanity_check_trg_index(index);
-		scfglex_trg_rhs[scfglex_trg_rhs_size] = 1 - index;
-		++scfglex_trg_rhs_size;
-}
-
-<TRG>\[[1-9][0-9]?\]   {
-		int index = yytext[yyleng - 2] - '0';
-		if (yyleng == 4) {
-		  index += 10 * (yytext[yyleng - 3] - '0');
-		}
-		++scfglex_trg_arity;
-		sanity_check_trg_index(index);
-		scfglex_trg_rhs[scfglex_trg_rhs_size] = 1 - index;
-		++scfglex_trg_rhs_size;
-}
-
-<LHS_END>[ \t] { ; }
-<LHS_END>\|\|\|	{
-		scfglex_reset();
-		BEGIN(SRC);
-		}
-<INITIAL,LHS_END>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
-		abort();
-		}
-
-<SRC>\|\|\|	{
-		memset(scfglex_nt_sanity, 0, scfglex_src_arity * sizeof(int));
-		BEGIN(TRG);
-		}
-<SRC>[^ \t]+	{
-		scfglex_tmp_token.assign(yytext, yyleng);
-		scfglex_src_rhs[scfglex_src_rhs_size] = TD::Convert(scfglex_tmp_token);
-		++scfglex_src_rhs_size;
-		}
-<SRC>[ \t]+	{ ; }
-
-<TRG>\|\|\|	{
-		BEGIN(FEATS);
-		}
-<TRG>[^ \t]+	{
-		scfglex_tmp_token.assign(yytext, yyleng);
-		scfglex_trg_rhs[scfglex_trg_rhs_size] = TD::Convert(scfglex_tmp_token);
-		++scfglex_trg_rhs_size;
-		}
-<TRG>[ \t]+	{ ; }
-
-<TRG,FEATS,ALIGNS>\n	{
-                if (scfglex_src_arity != scfglex_trg_arity) {
-                  std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
-                  abort();
-                }
-		// const bool ignore_grammar_features = false;
-		// if (ignore_grammar_features) scfglex_num_feats = 0;
-		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
-    check_and_update_ctf_stack(rp);
-    TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
-		rule_callback(rp, ctf_level, coarse_rp, rule_callback_extra);
-    ctf_rule_stack.push(rp);
-		// std::cerr << rp->AsString() << std::endl;
-		num_rules++;
-    lex_line++;
-    if (!SILENT) {
-      if (num_rules %   50000 == 0) { std::cerr << '.' << std::flush; fl = true; }
-      if (num_rules % 2000000 == 0) { std::cerr << " [" << num_rules << "]\n"; fl = false; }
-    }
-    ctf_level = 0;
-		BEGIN(INITIAL);
-		}
-
-<FEATS>[ \t;]	{ ; }
-<FEATS>[^ \t=;]+=	{
-		scfglex_tmp_token.assign(yytext, yyleng - 1);
-		const int fid = FD::Convert(scfglex_tmp_token);
-		if (fid < 1) {
-			std::cerr << "\nUNWEIGHED FEATURE " << scfglex_tmp_token << std::endl;
-			abort();
-		}
-		scfglex_feat_ids[scfglex_num_feats] = fid;
-		BEGIN(FEATVAL);
-		}
-<FEATS>\|\|\|	{
-		BEGIN(ALIGNS);
-		}
-<FEATVAL>{REAL}	{
-		scfglex_feat_vals[scfglex_num_feats] = strtod(yytext, NULL);
-		++scfglex_num_feats;
-		BEGIN(FEATS);
-		}
-<FEATVAL>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
-		abort();
-		}
-<FEATS>{REAL} 	{
-		scfglex_feat_ids[scfglex_num_feats] = scfglex_phrase_fnames[scfglex_num_feats];
-		scfglex_feat_vals[scfglex_num_feats] = strtod(yytext, NULL);
-		++scfglex_num_feats;
-		}
-<FEATS>.	{
-		std::cerr << "Line " << lex_line << " unexpected input in features: " << yytext << std::endl;
-		abort();
-		}
-<ALIGNS>[0-9]+-[0-9]+	{
-                int i = 0;
-		int a = 0;
-		int b = 0;
-		while (i < yyleng) {
-		  char c = yytext[i];
-		  if (c == '-') break;
-		  a *= 10;
-		  a += c - '0';
-		  ++i;
-		}
-		++i;
-		while (i < yyleng) {
-		  b *= 10;
-		  b += yytext[i] - '0';
-		  ++i;
-		}
-		scfglex_als[scfglex_num_als++]=AlignmentPoint(a,b);
-		}
-<ALIGNS>[ \t]	;
-<ALIGNS>.	{
-		std::cerr << "Line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
-		abort();
-		}
-%%
-
-#include "filelib.h"
-
-void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* extra) {
-  if (scfglex_phrase_fnames.empty()) {
-    scfglex_phrase_fnames.resize(100);
-    for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) {
-      std::ostringstream os;
-      os << "PhraseModel_" << i;
-      scfglex_phrase_fnames[i] = FD::Convert(os.str());
-    }
-  }
-  lex_line = 1;
-  scfglex_stream = in;
-  rule_callback_extra = extra,
-  rule_callback = func;
-  yylex();
-}
-
diff --git a/decoder/rule_lexer.ll b/decoder/rule_lexer.ll
new file mode 100644
index 00000000..083a5bb1
--- /dev/null
+++ b/decoder/rule_lexer.ll
@@ -0,0 +1,316 @@
+%option nounput
+%{
+#include "rule_lexer.h"
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <cstring>
+#include <cassert>
+#include <stack>
+#include "tdict.h"
+#include "fdict.h"
+#include "trule.h"
+#include "verbose.h"
+
+int lex_line = 0;
+std::istream* scfglex_stream = NULL;
+RuleLexer::RuleCallback rule_callback = NULL;
+void* rule_callback_extra = NULL;
+std::vector<int> scfglex_phrase_fnames;
+
+#undef YY_INPUT
+#define YY_INPUT(buf, result, max_size) (result = scfglex_stream->read(buf, max_size).gcount())
+
+#define YY_SKIP_YYWRAP 1
+int num_rules = 0;
+int yywrap() { return 1; }
+bool fl = true;
+#define MAX_TOKEN_SIZE 255
+std::string scfglex_tmp_token(MAX_TOKEN_SIZE, '\0');
+
+#define MAX_RULE_SIZE 200
+WordID scfglex_src_rhs[MAX_RULE_SIZE];
+WordID scfglex_trg_rhs[MAX_RULE_SIZE];
+int scfglex_src_rhs_size;
+int scfglex_trg_rhs_size;
+WordID scfglex_lhs;
+int scfglex_src_arity;
+int scfglex_trg_arity;
+
+#define MAX_FEATS 100
+int scfglex_feat_ids[MAX_FEATS];
+double scfglex_feat_vals[MAX_FEATS];
+int scfglex_num_feats;
+
+#define MAX_ARITY 200
+int scfglex_nt_sanity[MAX_ARITY];
+int scfglex_src_nts[MAX_ARITY];
+// float scfglex_nt_size_means[MAX_ARITY];
+// float scfglex_nt_size_vars[MAX_ARITY];
+std::stack<TRulePtr> ctf_rule_stack;
+unsigned int ctf_level = 0;
+
+#define MAX_ALS 200
+AlignmentPoint scfglex_als[MAX_ALS];
+int scfglex_num_als;
+
+void sanity_check_trg_symbol(WordID nt, int index) {
+  if (scfglex_src_nts[index-1] != nt) {
+    std::cerr << "Target symbol with index " << index << " is of type " << TD::Convert(nt*-1)
+              << " but corresponding source is of type "
+              << TD::Convert(scfglex_src_nts[index-1] * -1) << std::endl;
+    abort();
+  }
+}
+
+void sanity_check_trg_index(int index) {
+  if (index > scfglex_src_arity) {
+    std::cerr << "Target index " << index << " exceeds source arity " << scfglex_src_arity << std::endl;
+    abort();
+  }
+  int& flag = scfglex_nt_sanity[index - 1];
+  if (flag) {
+    std::cerr << "Target index " << index << " used multiple times!" << std::endl;
+    abort();
+  }
+  flag = 1;
+}
+
+void scfglex_reset() {
+  scfglex_src_arity = 0;
+  scfglex_trg_arity = 0;
+  scfglex_num_feats = 0;
+  scfglex_src_rhs_size = 0;
+  scfglex_trg_rhs_size = 0;
+  scfglex_num_als = 0;
+}
+
+void check_and_update_ctf_stack(const TRulePtr& rp) {
+  if (ctf_level > ctf_rule_stack.size()){
+    std::cerr << "Found rule at projection level " << ctf_level << " but previous rule was at level "
+      << ctf_rule_stack.size()-1 << " (cannot exceed previous level by more than one; line " << lex_line << ")" << std::endl;
+    abort();
+  }
+  while (ctf_rule_stack.size() > ctf_level)
+    ctf_rule_stack.pop();
+  // ensure that rule has the same signature as parent (coarse) rule.  Rules may *only*
+  // differ by the rhs nonterminals, not terminals or permutation of nonterminals.
+  if (ctf_rule_stack.size() > 0) {
+    TRulePtr& coarse_rp = ctf_rule_stack.top();
+    if (rp->f_.size() != coarse_rp->f_.size() || rp->e_ != coarse_rp->e_) {
+      std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
+        (coarse_rp->AsString()) << std::endl;
+      abort();
+    }
+    for (int i=0; i<rp->f_.size(); ++i) {
+      if (((rp->f_[i]<0) != (coarse_rp->f_[i]<0)) ||
+          ((rp->f_[i]>0) && (rp->f_[i] != coarse_rp->f_[i]))) {
+        std::cerr << "Rule " << (rp->AsString()) << " is not a projection of " <<
+          (coarse_rp->AsString()) << std::endl;
+        abort();
+      }
+    }
+  }
+}
+
+%}
+
+REAL [\-+]?[0-9]+(\.[0-9]*([eE][-+]*[0-9]+)?)?|inf|[\-+]inf
+NT [^\t \[\],]+
+
+%x LHS_END SRC TRG FEATS FEATVAL ALIGNS
+%%
+
+<INITIAL>[ \t]	{
+  ctf_level++;
+  };
+
+<INITIAL>\[{NT}\]   {
+		scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
+		scfglex_lhs = -TD::Convert(scfglex_tmp_token);
+		// std::cerr << scfglex_tmp_token << "\n";
+  		BEGIN(LHS_END);
+		}
+
+<SRC>\[{NT}\]   {
+		scfglex_tmp_token.assign(yytext + 1, yyleng - 2);
+		scfglex_src_nts[scfglex_src_arity] = scfglex_src_rhs[scfglex_src_rhs_size] = -TD::Convert(scfglex_tmp_token);
+		++scfglex_src_arity;
+		++scfglex_src_rhs_size;
+		}
+
+<SRC>\[{NT},[1-9][0-9]?\]   {
+		int index = yytext[yyleng - 2] - '0';
+		if (yytext[yyleng - 3] == ',') {
+		  scfglex_tmp_token.assign(yytext + 1, yyleng - 4);
+		} else {
+		  scfglex_tmp_token.assign(yytext + 1, yyleng - 5);
+		  index += 10 * (yytext[yyleng - 3] - '0');
+		}
+		if ((scfglex_src_arity+1) != index) {
+			std::cerr << "Src indices must go in order: expected " << scfglex_src_arity << " but got " << index << std::endl;
+			abort();
+		}
+		scfglex_src_nts[scfglex_src_arity] = scfglex_src_rhs[scfglex_src_rhs_size] = -TD::Convert(scfglex_tmp_token);
+		++scfglex_src_rhs_size;
+		++scfglex_src_arity;
+		}
+
+<TRG>\[{NT},[1-9][0-9]?\]   {
+		int index = yytext[yyleng - 2] - '0';
+		if (yytext[yyleng - 3] == ',') {
+		  scfglex_tmp_token.assign(yytext + 1, yyleng - 4);
+		} else {
+		  scfglex_tmp_token.assign(yytext + 1, yyleng - 5);
+		  index += 10 * (yytext[yyleng - 3] - '0');
+		}
+		++scfglex_trg_arity;
+		// std::cerr << "TRG INDEX: " << index << std::endl;
+		sanity_check_trg_symbol(-TD::Convert(scfglex_tmp_token), index);
+		sanity_check_trg_index(index);
+		scfglex_trg_rhs[scfglex_trg_rhs_size] = 1 - index;
+		++scfglex_trg_rhs_size;
+}
+
+<TRG>\[[1-9][0-9]?\]   {
+		int index = yytext[yyleng - 2] - '0';
+		if (yyleng == 4) {
+		  index += 10 * (yytext[yyleng - 3] - '0');
+		}
+		++scfglex_trg_arity;
+		sanity_check_trg_index(index);
+		scfglex_trg_rhs[scfglex_trg_rhs_size] = 1 - index;
+		++scfglex_trg_rhs_size;
+}
+
+<LHS_END>[ \t] { ; }
+<LHS_END>\|\|\|	{
+		scfglex_reset();
+		BEGIN(SRC);
+		}
+<INITIAL,LHS_END>.	{
+		std::cerr << "Line " << lex_line << ": unexpected input in LHS: " << yytext << std::endl;
+		abort();
+		}
+
+<SRC>\|\|\|	{
+		memset(scfglex_nt_sanity, 0, scfglex_src_arity * sizeof(int));
+		BEGIN(TRG);
+		}
+<SRC>[^ \t]+	{
+		scfglex_tmp_token.assign(yytext, yyleng);
+		scfglex_src_rhs[scfglex_src_rhs_size] = TD::Convert(scfglex_tmp_token);
+		++scfglex_src_rhs_size;
+		}
+<SRC>[ \t]+	{ ; }
+
+<TRG>\|\|\|	{
+		BEGIN(FEATS);
+		}
+<TRG>[^ \t]+	{
+		scfglex_tmp_token.assign(yytext, yyleng);
+		scfglex_trg_rhs[scfglex_trg_rhs_size] = TD::Convert(scfglex_tmp_token);
+		++scfglex_trg_rhs_size;
+		}
+<TRG>[ \t]+	{ ; }
+
+<TRG,FEATS,ALIGNS>\n	{
+                if (scfglex_src_arity != scfglex_trg_arity) {
+                  std::cerr << "Line " << lex_line << ": LHS and RHS arity mismatch!\n";
+                  abort();
+                }
+		// const bool ignore_grammar_features = false;
+		// if (ignore_grammar_features) scfglex_num_feats = 0;
+		TRulePtr rp(new TRule(scfglex_lhs, scfglex_src_rhs, scfglex_src_rhs_size, scfglex_trg_rhs, scfglex_trg_rhs_size, scfglex_feat_ids, scfglex_feat_vals, scfglex_num_feats, scfglex_src_arity, scfglex_als, scfglex_num_als));
+    check_and_update_ctf_stack(rp);
+    TRulePtr coarse_rp = ((ctf_level == 0) ? TRulePtr() : ctf_rule_stack.top());
+		rule_callback(rp, ctf_level, coarse_rp, rule_callback_extra);
+    ctf_rule_stack.push(rp);
+		// std::cerr << rp->AsString() << std::endl;
+		num_rules++;
+    lex_line++;
+    if (!SILENT) {
+      if (num_rules %   50000 == 0) { std::cerr << '.' << std::flush; fl = true; }
+      if (num_rules % 2000000 == 0) { std::cerr << " [" << num_rules << "]\n"; fl = false; }
+    }
+    ctf_level = 0;
+		BEGIN(INITIAL);
+		}
+
+<FEATS>[ \t;]	{ ; }
+<FEATS>[^ \t=;]+=	{
+		scfglex_tmp_token.assign(yytext, yyleng - 1);
+		const int fid = FD::Convert(scfglex_tmp_token);
+		if (fid < 1) {
+			std::cerr << "\nUNWEIGHED FEATURE " << scfglex_tmp_token << std::endl;
+			abort();
+		}
+		scfglex_feat_ids[scfglex_num_feats] = fid;
+		BEGIN(FEATVAL);
+		}
+<FEATS>\|\|\|	{
+		BEGIN(ALIGNS);
+		}
+<FEATVAL>{REAL}	{
+		scfglex_feat_vals[scfglex_num_feats] = strtod(yytext, NULL);
+		++scfglex_num_feats;
+		BEGIN(FEATS);
+		}
+<FEATVAL>.	{
+		std::cerr << "Line " << lex_line << ": unexpected input in feature value: " << yytext << std::endl;
+		abort();
+		}
+<FEATS>{REAL} 	{
+		scfglex_feat_ids[scfglex_num_feats] = scfglex_phrase_fnames[scfglex_num_feats];
+		scfglex_feat_vals[scfglex_num_feats] = strtod(yytext, NULL);
+		++scfglex_num_feats;
+		}
+<FEATS>.	{
+		std::cerr << "Line " << lex_line << " unexpected input in features: " << yytext << std::endl;
+		abort();
+		}
+<ALIGNS>[0-9]+-[0-9]+	{
+                int i = 0;
+		int a = 0;
+		int b = 0;
+		while (i < yyleng) {
+		  char c = yytext[i];
+		  if (c == '-') break;
+		  a *= 10;
+		  a += c - '0';
+		  ++i;
+		}
+		++i;
+		while (i < yyleng) {
+		  b *= 10;
+		  b += yytext[i] - '0';
+		  ++i;
+		}
+		scfglex_als[scfglex_num_als++]=AlignmentPoint(a,b);
+		}
+<ALIGNS>[ \t]	;
+<ALIGNS>.	{
+		std::cerr << "Line " << lex_line << ": unexpected input in alignment: " << yytext << std::endl;
+		abort();
+		}
+%%
+
+#include "filelib.h"
+
+void RuleLexer::ReadRules(std::istream* in, RuleLexer::RuleCallback func, void* extra) {
+  if (scfglex_phrase_fnames.empty()) {
+    scfglex_phrase_fnames.resize(100);
+    for (int i = 0; i < scfglex_phrase_fnames.size(); ++i) {
+      std::ostringstream os;
+      os << "PhraseModel_" << i;
+      scfglex_phrase_fnames[i] = FD::Convert(os.str());
+    }
+  }
+  lex_line = 1;
+  scfglex_stream = in;
+  rule_callback_extra = extra,
+  rule_callback = func;
+  yylex();
+}
+
-- 
cgit v1.2.3


From e1cc454db3be1a5eedc3dba7022e813a772696d7 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Fri, 11 May 2012 19:00:26 -0400
Subject: Add just enough jam to compile the decoder.

---
 .gitignore       |   2 +
 Jamroot          |  29 +++++++++
 decoder/Jamfile  |  72 ++++++++++++++++++++++
 klm/lm/Jamfile   |  14 +++++
 klm/util/Jamfile |  10 +++
 mteval/Jamfile   |   8 +++
 sanity.jam       | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 utils/Jamfile    |  34 ++++++++++
 8 files changed, 353 insertions(+)
 create mode 100644 Jamroot
 create mode 100644 decoder/Jamfile
 create mode 100644 klm/lm/Jamfile
 create mode 100644 klm/util/Jamfile
 create mode 100644 mteval/Jamfile
 create mode 100644 sanity.jam
 create mode 100644 utils/Jamfile

(limited to 'decoder')

diff --git a/.gitignore b/.gitignore
index 638ad83f..fcacb135 100644
--- a/.gitignore
+++ b/.gitignore
@@ -197,3 +197,5 @@ mteval/scorer_test
 phrasinator/gibbs_train_plm
 phrasinator/gibbs_train_plm_notables
 .*
+previous.sh
+dist
diff --git a/Jamroot b/Jamroot
new file mode 100644
index 00000000..ad09509d
--- /dev/null
+++ b/Jamroot
@@ -0,0 +1,29 @@
+path-constant TOP : . ;
+include $(TOP)/sanity.jam ;
+boost 104400 ;
+external-lib z ;
+
+if [ test_header google/dense_hash_map ] {
+  requirements += <define>HAVE_SPARSEHASH ;
+}
+
+if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serialization ] {
+  requirements += <define>HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP ;
+}
+
+project : requirements $(requirements) ;
+project : default-build <threading>single <warnings>on <variant>release ;
+
+install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec ;
+
+build-project mteval ;
+build-project decoder ;
+
+#Compile everything ending with _test.cc into a test and run it.
+rule all_tests ( targets * : dependencies ) {
+  targets ?= [ glob *_test.cc ] ;
+  for t in $(targets) {
+    local base = [ MATCH "^(.*).cc$" : $(t) ] ;
+    unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework ;
+  }
+}
diff --git a/decoder/Jamfile b/decoder/Jamfile
new file mode 100644
index 00000000..d7d42f28
--- /dev/null
+++ b/decoder/Jamfile
@@ -0,0 +1,72 @@
+import testing ;
+import lex ;
+import option ;
+
+if [ option.get "with-glc" ] {
+  glc = ff_glc.cc string_util.cc feature-factory.cc ;
+}
+
+lib cdec_lib : 
+  forest_writer.cc
+  maxtrans_blunsom.cc
+  cdec_ff.cc
+  cfg.cc
+  dwarf.cc
+  ff_dwarf.cc
+  rule_lexer.ll
+  fst_translator.cc
+  csplit.cc
+  translator.cc
+  scfg_translator.cc
+  hg.cc
+  hg_io.cc
+  decoder.cc
+  hg_intersect.cc
+  hg_sampler.cc
+  factored_lexicon_helper.cc
+  viterbi.cc
+  lattice.cc
+  aligner.cc
+  apply_models.cc
+  earley_composer.cc
+  phrasetable_fst.cc
+  trule.cc
+  ff.cc
+  ff_rules.cc
+  ff_wordset.cc
+  ff_context.cc
+  ff_charset.cc
+  ff_lm.cc
+  ff_klm.cc
+  ff_ngrams.cc
+  ff_spans.cc
+  ff_ruleshape.cc
+  ff_wordalign.cc
+  ff_csplit.cc
+  ff_tagger.cc
+  ff_source_syntax.cc
+  ff_bleu.cc
+  ff_factory.cc
+  lexalign.cc
+  lextrans.cc
+  tagger.cc
+  bottom_up_parser.cc
+  phrasebased_translator.cc
+  JSON_parser.c
+  json_parse.cc
+  grammar.cc
+  $(glc)
+  ..//utils
+  ..//mteval
+  ../klm/lm//kenlm
+  ..//boost_program_options
+  : <include>.
+  ;
+
+exe cdec : cdec.cc cdec_lib ;
+
+all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib ;
+
+install legacy : cdec
+  : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ;
+
diff --git a/klm/lm/Jamfile b/klm/lm/Jamfile
new file mode 100644
index 00000000..b84dbb35
--- /dev/null
+++ b/klm/lm/Jamfile
@@ -0,0 +1,14 @@
+lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. : : <include>.. <library>../util//kenutil ;
+
+import testing ;
+
+run left_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa ;
+run model_test.cc ../util//kenutil kenlm ../..//boost_unit_test_framework : : test.arpa test_nounk.arpa ;
+
+exe query : ngram_query.cc kenlm ../util//kenutil ;
+exe build_binary : build_binary.cc kenlm ../util//kenutil ;
+
+install legacy : build_binary query 
+  : <location>$(TOP)/klm/lm <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/klm/lm <link>shared:<install-type>LIB ;
+
+alias programs : build_binary query ;
diff --git a/klm/util/Jamfile b/klm/util/Jamfile
new file mode 100644
index 00000000..00eefc22
--- /dev/null
+++ b/klm/util/Jamfile
@@ -0,0 +1,10 @@
+lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ../..///z : <include>.. : : <include>.. ;
+
+import testing ;
+
+unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ;
+run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ;
+unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ;
+unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ;
+unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ;
+unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ;
diff --git a/mteval/Jamfile b/mteval/Jamfile
new file mode 100644
index 00000000..f671134a
--- /dev/null
+++ b/mteval/Jamfile
@@ -0,0 +1,8 @@
+import testing ;
+
+lib mteval : ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ..//utils : <include>. : : <include>. ;
+exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ;
+exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ;
+alias programs : fast_score mbr_kbest ;
+
+unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework ;
diff --git a/sanity.jam b/sanity.jam
new file mode 100644
index 00000000..1b37a4e7
--- /dev/null
+++ b/sanity.jam
@@ -0,0 +1,184 @@
+import modules ;
+import option ;
+import os ;
+import path ;
+import project ;
+
+#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
+rule trim-nl ( str extras * ) {
+return [ MATCH "([^
+]*)" : $(str) ] $(extras) ;
+}
+rule _shell ( cmd : extras * ) {
+  return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
+}
+
+#Run g++ with empty main and these arguments to see if it passes.  
+rule test_flags ( flags * ) {
+  local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o /dev/null >/dev/null 2>/dev/null\"" ;
+  local ret = [ SHELL $(cmd) : exit-status ] ;
+  if --debug-configuration in [ modules.peek : ARGV ] {
+    echo $(cmd) ;
+    echo $(ret) ;
+  }
+  if $(ret[2]) = 0 {
+    return true ;
+  } else {
+    return ;
+  }
+}
+
+rule test_header ( name ) {
+  return [ test_flags "-include $(name)" ] ;
+}
+
+rule test_library ( name ) {
+  return [ test_flags "-l$(name)" ] ;
+}
+
+{
+  local cleaning = [ option.get "clean" : : yes ] ;
+  cleaning ?= [ option.get "clean-all" : no : yes ] ;
+  if "clean" in [ modules.peek : ARGV ] {
+    cleaning = yes ;
+  }
+  constant CLEANING : $(cleaning) ;
+}
+
+#Determine if a library can be compiled statically.  
+rule auto-shared ( name : additional * ) {
+  additional ?= "" ;
+  if [ test_flags $(additional)" -static -l"$(name) ] {
+    return ;
+  } else {
+    return "<link>shared" ;
+  }
+}
+
+# MacPorts' default location is /opt/local -- use this if no path is given.
+with-macports = [ option.get "with-macports" : : "/opt/local" ] ;
+if $(with-macports) {
+  using darwin ;
+  ECHO "Using --with-macports=$(with-macports), implying use of darwin GCC" ;
+
+  L-boost-search = -L$(with-macports)/lib ;
+  boost-search = <search>$(with-macports)/lib ;
+  I-boost-include = -I$(with-macports)/include ;
+  boost-include = <include>$(with-macports)/include ;
+}
+else {
+  with-boost = [ option.get "with-boost" ] ;
+  if $(with-boost) {
+    L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ;
+    boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ;
+    I-boost-include = -I$(with-boost)/include ;
+    boost-include = <include>$(with-boost)/include ;
+  } else {
+    L-boost-search = "" ;
+    boost-search = ;
+    I-boost-include = "" ;
+    boost-include = ;
+  }
+}
+ 
+#Are we linking static binaries against shared boost?
+boost-auto-shared = [ auto-shared "boost_program_options" : $(L-boost-search) ] ;
+#Convenience rule for boost libraries.  Defines library boost_$(name).  
+rule boost-lib ( name macro ) {
+  #Link multi-threaded programs against the -mt version if available.  Old 
+  #versions of boost do not have -mt tagged versions of all libraries.   Sadly,
+  #boost.jam does not handle this correctly.  
+  if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt" ] {
+    lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name) ;
+    lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt ;
+  } else {
+    lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name) ;
+  }
+
+  alias boost_$(name) : inner_boost_$(name) : $(boost-auto-shared) : : <link>shared:<define>BOOST_$(macro) $(boost-include) ;
+}
+
+#Argument is e.g. 103600
+rule boost ( min-version ) {
+  local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_VERSION '\"" ;
+  local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
+  if $(boost-shell[2]) != 0 && $(CLEANING) = no {
+    echo Failed to run "$(cmd)" ;
+    exit Boost does not seem to be installed or g++ is confused. : 1 ;
+  }
+  boost-version = [ MATCH "#define BOOST_VERSION ([0-9]*)" : $(boost-shell[1]) ] ;
+  if $(boost-version) < $(min-version) && $(CLEANING) = no {
+    exit You have Boost $(boost-version).  This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
+  }
+  #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.   
+  boost-lib thread THREAD_DYN_DLL ;
+  boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
+  boost-lib unit_test_framework TEST_DYN_LINK ;
+  boost-lib iostreams IOSTREAMS_DYN_LINK ;
+  boost-lib serialization SERIALIZATION_DYN_LINK ;
+  boost-lib mpi MPI_DYN_LINK ;
+}
+ 
+#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
+rule external-lib ( name : search-path * ) {
+  lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) ;
+}
+
+#Write the current command line to previous.sh.  This does not do shell escaping.  
+{
+  local build-log = $(TOP)/previous.sh ;
+  if ! [ path.exists $(build-log) ] {
+    SHELL "touch $(build-log) && chmod +x $(build-log)" ;
+  }
+  local script = [ modules.peek : ARGV ] ;
+  if $(script[1]) = "./jam-files/bjam" {
+    #The ./bjam shell script calls ./jam-files/bjam so that appears in argv but
+    #we want ./bjam to appear so the environment variables are set correctly.  
+    script = "./bjam "$(script[2-]:J=" ") ;
+  } else {
+    script = $(script:J=" ") ;
+  }
+  script = "#!/bin/sh\n$(script)\n" ;
+  local ignored = @($(build-log):E=$(script)) ;
+}
+
+requirements = ;
+{
+  local cxxflags = [ os.environ "CXXFLAGS" ] ;
+  local cflags = [ os.environ "CFLAGS" ] ;
+  local ldflags = [ os.environ "LDFLAGS" ] ;
+
+  #Boost jam's static clang is buggy.  
+  requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <toolset>clang:<link>shared ;
+
+  #libSegFault prints a stack trace on segfault.  Link against it if available.  
+  if [ test_flags "-lSegFault" ] {
+    external-lib SegFault ;
+    requirements += <library>SegFault ;
+  }
+}
+
+if [ option.get "git" : : "yes" ] {
+  local revision = [ _shell "git rev-parse --verify HEAD |head -c 7" ] ;
+  constant GITTAG : "/"$(revision) ;
+} else {
+  constant GITTAG : "" ;
+}
+
+prefix = [ option.get "prefix" ] ;
+if $(prefix) {
+  prefix = [ path.root $(prefix) [ path.pwd ] ] ;
+} else {
+  prefix = $(TOP)/dist$(GITTAG) ;
+}
+rule install-bin-libs ( deps * ) {
+  local bindir = [ option.get "bindir" : $(prefix)/bin ] ;
+  local libdir = [ option.get "libdir" : $(prefix)/lib ] ;
+  install prefix-bin : $(deps) : <location>$(bindir) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(libdir) ;
+  install prefix-lib : $(deps) : <location>$(libdir) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(libdir) ;
+}
+rule install-headers ( name : list * : source-root ? ) {
+  local includedir = [ option.get "includedir" : $(prefix)/include ] ;
+  source-root ?= "." ;
+  install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
+}
diff --git a/utils/Jamfile b/utils/Jamfile
new file mode 100644
index 00000000..53a51277
--- /dev/null
+++ b/utils/Jamfile
@@ -0,0 +1,34 @@
+import testing ;
+import option ;
+
+additional = ;
+if [ option.get with-cmph ] {
+  additional += perfect_hash.cc ;
+}
+
+lib utils :
+  alignment_io.cc 
+  b64tools.cc 
+  corpus_tools.cc 
+  dict.cc 
+  tdict.cc 
+  fdict.cc 
+  gzstream.cc 
+  filelib.cc 
+  stringlib.cc 
+  sparse_vector.cc 
+  timing_stats.cc 
+  verbose.cc 
+  weights.cc
+  $(additional)
+  ..//z
+  : <include>.. <include>. : : <include>.. <include>. ;
+
+exe ts : ts.cc utils ;
+exe atools : atools.cc utils ..//boost_program_options ;
+exe phmt : phmt.cc utils ;
+exe reconstruct_weights : reconstruct_weights.cc utils ..//boost_program_options ;
+
+alias programs : reconstruct_weights atools ;
+
+all_tests : utils ;
-- 
cgit v1.2.3


From 708ad7d7dbfc8f8d52312c3989f86b183e12af45 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Fri, 11 May 2012 20:31:57 -0400
Subject: Fix relative paths in tests.

---
 Jamroot                 |  9 ++++-----
 decoder/Jamfile         |  2 +-
 decoder/grammar_test.cc |  6 ++++--
 mteval/Jamfile          |  2 +-
 mteval/scorer_test.cc   |  9 +++++----
 sanity.jam              | 12 +++++++-----
 6 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'decoder')

diff --git a/Jamroot b/Jamroot
index ad09509d..c9a8ee76 100644
--- a/Jamroot
+++ b/Jamroot
@@ -14,16 +14,15 @@ if [ test_header boost/serialization/map.hpp ] && [ test_library boost_serializa
 project : requirements $(requirements) ;
 project : default-build <threading>single <warnings>on <variant>release ;
 
-install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec ;
+install-bin-libs utils//programs mteval//programs klm/lm//programs decoder//cdec phrasinator//programs ;
 
-build-project mteval ;
-build-project decoder ;
+build-projects mteval decoder klm/lm ;
 
 #Compile everything ending with _test.cc into a test and run it.
-rule all_tests ( targets * : dependencies ) {
+rule all_tests ( targets * : dependencies : properties * ) {
   targets ?= [ glob *_test.cc ] ;
   for t in $(targets) {
     local base = [ MATCH "^(.*).cc$" : $(t) ] ;
-    unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework ;
+    unit-test $(base) : $(t) $(dependencies) ..//boost_unit_test_framework : $(properties) ;
   }
 }
diff --git a/decoder/Jamfile b/decoder/Jamfile
index d7d42f28..f8112cae 100644
--- a/decoder/Jamfile
+++ b/decoder/Jamfile
@@ -65,7 +65,7 @@ lib cdec_lib :
 
 exe cdec : cdec.cc cdec_lib ;
 
-all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib ;
+all_tests [ glob *_test.cc : cfg_test.cc ] : cdec_lib : <testing.arg>$(TOP)/decoder/test_data ;
 
 install legacy : cdec
   : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ;
diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc
index e1a94709..4500490a 100644
--- a/decoder/grammar_test.cc
+++ b/decoder/grammar_test.cc
@@ -17,7 +17,8 @@ using namespace std;
 
 struct GrammarTest {
   GrammarTest() {
-    Weights::InitFromFile("test_data/weights.gt", &wts);
+    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+    Weights::InitFromFile(path + "/weights.gt", &wts);
   }
   vector<weight_t> wts;
 };
@@ -40,7 +41,8 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) {
 }
 
 BOOST_AUTO_TEST_CASE(TestTextGrammarFile) {
-  GrammarPtr g(new TextGrammar("./test_data/grammar.prune"));
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  GrammarPtr g(new TextGrammar(path + "/grammar.prune"));
   vector<GrammarPtr> grammars(1, g);
 
   LatticeArc a(TD::Convert("ein"), 0.0, 1);
diff --git a/mteval/Jamfile b/mteval/Jamfile
index f671134a..24a95e8f 100644
--- a/mteval/Jamfile
+++ b/mteval/Jamfile
@@ -5,4 +5,4 @@ exe fast_score : fast_score.cc mteval ..//utils ..//boost_program_options ;
 exe mbr_kbest : mbr_kbest.cc mteval ..//utils ..//boost_program_options ;
 alias programs : fast_score mbr_kbest ;
 
-unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework ;
+unit-test scorer_test : scorer_test.cc mteval ..//utils ..//z ..//boost_unit_test_framework : <testing.arg>$(TOP)/mteval/test_data ;
diff --git a/mteval/scorer_test.cc b/mteval/scorer_test.cc
index 0e400a3a..9b765d0f 100644
--- a/mteval/scorer_test.cc
+++ b/mteval/scorer_test.cc
@@ -36,11 +36,12 @@ struct Stuff {
 BOOST_FIXTURE_TEST_SUITE( s, Stuff );
 
 BOOST_AUTO_TEST_CASE(TestCreateFromFiles) {
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
   vector<string> files;
-  files.push_back("test_data/re.txt.0");
-  files.push_back("test_data/re.txt.1");
-  files.push_back("test_data/re.txt.2");
-  files.push_back("test_data/re.txt.3");
+  files.push_back(path + "/re.txt.0");
+  files.push_back(path + "/re.txt.1");
+  files.push_back(path + "/re.txt.2");
+  files.push_back(path + "/re.txt.3");
   DocScorer ds(IBM_BLEU, files);
 }
 
diff --git a/sanity.jam b/sanity.jam
index 1b37a4e7..a89e95f9 100644
--- a/sanity.jam
+++ b/sanity.jam
@@ -21,11 +21,7 @@ rule test_flags ( flags * ) {
     echo $(cmd) ;
     echo $(ret) ;
   }
-  if $(ret[2]) = 0 {
-    return true ;
-  } else {
-    return ;
-  }
+  return $(ret[2]) == 0 ;
 }
 
 rule test_header ( name ) {
@@ -182,3 +178,9 @@ rule install-headers ( name : list * : source-root ? ) {
   source-root ?= "." ;
   install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
 }
+
+rule build-projects ( projects * ) {
+  for p in $(projects) {
+    build-project $(p) ;
+  }
+}
-- 
cgit v1.2.3


From 5d2fba19f7989b8a2c55834a5735f5fd5b60197c Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <kheafiel@cluster01.lti.ece.cmu.local>
Date: Sat, 12 May 2012 14:44:02 -0400
Subject: Build fixes: correct config error, fix paths for tests

---
 decoder/cfg_test.cc  | 3 ++-
 decoder/hg_test.cc   | 3 ++-
 decoder/hg_test.h    | 6 +++---
 jam-files/sanity.jam | 6 +++++-
 4 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'decoder')

diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc
index c61f9f2c..b8f4cf11 100644
--- a/decoder/cfg_test.cc
+++ b/decoder/cfg_test.cc
@@ -33,7 +33,8 @@ struct CFGTest : public TestWithParam<HgW> {
     istringstream ws(wts);
     EXPECT_TRUE(ws>>featw);
     CSHOW(featw)
-    HGSetup::JsonTestFile(&hg,file);
+    std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+    HGSetup::JsonTestFile(&hg,path,file);
     hg.Reweight(featw);
     cfg.Init(hg,true,true,false);
   }
diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc
index 8455a865..92ed98b2 100644
--- a/decoder/hg_test.cc
+++ b/decoder/hg_test.cc
@@ -335,7 +335,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) {
 
 BOOST_AUTO_TEST_CASE(Small) {
   Hypergraph hg;
-  CreateSmallHG(&hg);
+  std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data");
+  CreateSmallHG(&hg, path);
   SparseVector<double> wts;
   wts.set_value(FD::Convert("Model_0"), -2.0);
   wts.set_value(FD::Convert("Model_1"), -0.5);
diff --git a/decoder/hg_test.h b/decoder/hg_test.h
index 043f970a..2e308c37 100644
--- a/decoder/hg_test.h
+++ b/decoder/hg_test.h
@@ -46,10 +46,10 @@ struct HGSetup {
     ReadFile rf(f);
     HypergraphIO::ReadFromJSON(rf.stream(), hg);
   }
-  static void JsonTestFile(Hypergraph *hg,std::string n) {
-    JsonFile(hg,"test_data/"+n);
+  static void JsonTestFile(Hypergraph *hg,std::string path,std::string n) {
+    JsonFile(hg,path + "/"+n);
   }
-  static void CreateSmallHG(Hypergraph *hg) { JsonTestFile(hg,small_json); }
+  static void CreateSmallHG(Hypergraph *hg, std::string path) { JsonTestFile(hg,path,small_json); }
 };
 
 namespace {
diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam
index a89e95f9..eeb59f6e 100644
--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@@ -21,7 +21,11 @@ rule test_flags ( flags * ) {
     echo $(cmd) ;
     echo $(ret) ;
   }
-  return $(ret[2]) == 0 ;
+  if $(ret[2]) = 0 {
+    return true ;
+  } else {
+    return ;
+  }
 }
 
 rule test_header ( name ) {
-- 
cgit v1.2.3