summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rwxr-xr-xdecoder/apply_fsa_models.cc20
-rwxr-xr-xdecoder/apply_fsa_models.h27
-rw-r--r--decoder/cdec.cc2
-rwxr-xr-xdecoder/cfg.cc22
-rwxr-xr-xdecoder/cfg.h16
-rwxr-xr-xdecoder/cfg_options.h1
-rwxr-xr-xdecoder/program_options.h25
7 files changed, 87 insertions, 26 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 1c30eb90..2854b28b 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -13,6 +13,8 @@
using namespace std;
+DEFINE_NAMED_ENUM(FSA_BY)
+
struct ApplyFsa {
ApplyFsa(HgCFG &i,
const SentenceMetadata& smeta,
@@ -74,6 +76,7 @@ void ApplyFsaModels(HgCFG &i,
a.Compute();
}
+/*
namespace {
char const* anames[]={
"BU_CUBE",
@@ -82,14 +85,18 @@ char const* anames[]={
0
};
}
+*/
//TODO: named enum type in boost?
std::string ApplyFsaBy::name() const {
- return anames[algorithm];
+// return anames[algorithm];
+ return GetName(algorithm);
}
std::string ApplyFsaBy::all_names() {
+ return FsaByNames(" ");
+ /*
std::ostringstream o;
for (int i=0;i<N_ALGORITHMS;++i) {
assert(anames[i]);
@@ -97,19 +104,24 @@ std::string ApplyFsaBy::all_names() {
o<<anames[i];
}
return o.str();
+ */
}
ApplyFsaBy::ApplyFsaBy(std::string const& n, int pop_limit) : pop_limit(pop_limit) {
- algorithm=0;
std::string uname=toupper(n);
+ algorithm=GetFsaBy(uname);
+/*anames=0;
while(anames[algorithm] && anames[algorithm] != uname) ++algorithm;
if (!anames[algorithm])
throw std::runtime_error("Unknown ApplyFsaBy type: "+n+" - legal types: "+all_names());
+*/
}
-ApplyFsaBy::ApplyFsaBy(int i, int pop_limit) : pop_limit(pop_limit) {
- if (i<0 || i>=N_ALGORITHMS)
+ApplyFsaBy::ApplyFsaBy(FsaBy i, int pop_limit) : pop_limit(pop_limit) {
+/* if (i<0 || i>=N_ALGORITHMS)
throw std::runtime_error("Unknown ApplyFsaBy type id: "+itos(i)+" - legal types: "+all_names());
+*/
+ GetName(i); // checks validity
algorithm=i;
}
diff --git a/decoder/apply_fsa_models.h b/decoder/apply_fsa_models.h
index 5120fb4e..6561c70c 100755
--- a/decoder/apply_fsa_models.h
+++ b/decoder/apply_fsa_models.h
@@ -4,25 +4,36 @@
#include <string>
#include <iostream>
#include "feature_vector.h"
+#include "named_enum.h"
struct FsaFeatureFunction;
struct Hypergraph;
struct SentenceMetadata;
struct HgCFG;
+
+#define FSA_BY(X,t) \
+ X(t,BU_CUBE,) \
+ X(t,BU_FULL,) \
+ X(t,EARLEY,) \
+
+#define FSA_BY_TYPE FsaBy
+
+DECLARE_NAMED_ENUM(FSA_BY)
+
struct ApplyFsaBy {
- enum {
- BU_CUBE,
- BU_FULL,
- EARLEY,
- N_ALGORITHMS
- };
+/*enum {
+ BU_CUBE,
+ BU_FULL,
+ EARLEY,
+ N_ALGORITHMS
+ };*/
int pop_limit; // only applies to BU_FULL so far
bool IsBottomUp() const {
return algorithm==BU_FULL || algorithm==BU_CUBE;
}
int BottomUpAlgorithm() const;
- int algorithm;
+ FsaBy algorithm;
std::string name() const;
friend inline std::ostream &operator << (std::ostream &o,ApplyFsaBy const& c) {
o << c.name();
@@ -30,7 +41,7 @@ struct ApplyFsaBy {
o << "("<<c.pop_limit<<")";
return o;
}
- explicit ApplyFsaBy(int alg, int poplimit=200);
+ explicit ApplyFsaBy(FsaBy alg, int poplimit=200);
ApplyFsaBy(std::string const& name, int poplimit=200);
ApplyFsaBy(const ApplyFsaBy &o) : algorithm(o.algorithm) { }
static std::string all_names(); // space separated
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 3633febd..5898b245 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -193,7 +193,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
dconfig_options.add(opts).add(cfgo);
//add(opts).add(cfgo)
dcmdline_options.add(dconfig_options).add(clo);
-
+ argv_minus_to_underscore(argc,argv);
po::store(parse_command_line(argc, argv, dcmdline_options), conf);
if (conf.count("compgen")) {
print_options(cout,dcmdline_options);
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index 81a17355..aa9e5f30 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -11,13 +11,21 @@ using namespace std;
namespace {
CFG::BinRhs nullrhs(std::numeric_limits<int>::min(),std::numeric_limits<int>::min());
-}
-
-WordID CFG::BinName(BinRhs const& b)
+// index i >= N.size()? then it's in M[i-N.size()]
+WordID BinName(CFG::BinRhs const& b,CFG::NTs const& N,CFG::NTs const& M)
{
+ int nn=N.size();
ostringstream o;
-#define BinNameOWORD(w) do { int n=w; if (n>0) o << TD::Convert(n); else { o << 'V' << -n; } } while(0)
+#define BinNameOWORD(w) \
+ do { \
+ int n=w; if (n>0) o << TD::Convert(n); \
+ else { \
+ int i=-n; \
+ CFG::NT const&nt = i<nn?N[i]:M[i-nn]; \
+ o << nt.from << i; } \
+ } while(0)
+
BinNameOWORD(b.first);
o<<'+';
BinNameOWORD(b.second);
@@ -25,6 +33,10 @@ WordID CFG::BinName(BinRhs const& b)
return TD::Convert(o.str());
}
+}
+
+
+
void CFG::Binarize(CFGBinarize const& b) {
if (!b.Binarizing()) return;
if (!b.bin_l2r) {
@@ -57,7 +69,7 @@ void CFG::Binarize(CFGBinarize const& b) {
new_nts.back().ruleids.push_back(newruleid);
new_rules.push_back(Rule(newnt,bin));
if (b.bin_name_nts)
- new_nts.back().from.nt=BinName(bin);
+ new_nts.back().from.nt=BinName(bin,nts,new_nts);
++newnt;++newruleid;
}
}
diff --git a/decoder/cfg.h b/decoder/cfg.h
index 64924f14..e1f818e8 100755
--- a/decoder/cfg.h
+++ b/decoder/cfg.h
@@ -40,11 +40,10 @@ struct CFG {
typedef std::vector<RuleHandle> Ruleids;
void print_nt_name(std::ostream &o,NTHandle n) const {
- o << nts[n].from;
+ o << nts[n].from << n;
}
typedef std::pair<int,int> BinRhs;
- WordID BinName(BinRhs const& b);
struct Rule {
// for binarizing - no costs/probs
@@ -106,16 +105,17 @@ struct CFG {
swap(goal_nt,o.goal_nt);
}
void Binarize(CFGBinarize const& binarize_options);
+
+ typedef std::vector<NT> NTs;
+ NTs nts;
+ typedef std::vector<Rule> Rules;
+ Rules rules;
+ int goal_nt;
+ prob_t goal_inside,pushed_inside; // when we push viterbi weights to goal, we store the removed probability in pushed_inside
protected:
bool uninit;
Hypergraph const* hg_; // shouldn't be used for anything, esp. after binarization
- prob_t goal_inside,pushed_inside; // when we push viterbi weights to goal, we store the removed probability in pushed_inside
// rules/nts will have same index as hg edges/nodes
- typedef std::vector<Rule> Rules;
- Rules rules;
- typedef std::vector<NT> NTs;
- NTs nts;
- int goal_nt;
};
inline void swap(CFG &a,CFG &b) {
diff --git a/decoder/cfg_options.h b/decoder/cfg_options.h
index 956586f0..acd8d05b 100755
--- a/decoder/cfg_options.h
+++ b/decoder/cfg_options.h
@@ -28,6 +28,7 @@ struct CFGOptions {
void Validate() {
format.Validate();
binarize.Validate();
+// if (cfg_output.empty()) binarize.bin_name_nts=false;
}
char const* description() const {
return "CFG output options";
diff --git a/decoder/program_options.h b/decoder/program_options.h
index 251f5680..87afb320 100755
--- a/decoder/program_options.h
+++ b/decoder/program_options.h
@@ -13,6 +13,31 @@
#include <iosfwd>
+// change --opt-name=x --opt_name=x for all strings x. danger: probably the argv from int main isn't supposed to be modified?
+inline int arg_minusto_underscore(char *s) {
+ if (!*s || *s++ != '-') return 0;
+ if (!*s || *s++ != '-') return 0;
+ int chars_replaced=0;
+ for(;*s;++s) {
+ if (*s=='=')
+ break;
+ if (*s=='-') {
+ *s='_';
+ ++chars_replaced;
+ }
+ }
+ return chars_replaced;
+}
+
+inline
+int argv_minus_to_underscore(int argc, char **argv) {
+ int chars_replaced=0;
+ for (int i=1;i<argc;++i) {
+ chars_replaced+=arg_minusto_underscore(argv[i]);
+ }
+ return chars_replaced;
+}
+
template <class T>
boost::program_options::typed_value<T>*
defaulted_value(T *v)