summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorarmatthews <armatthe@cmu.edu>2015-02-04 18:45:05 -0500
committerarmatthews <armatthe@cmu.edu>2015-02-04 18:45:05 -0500
commit8957ca4266a31e067fe139acd0b82d669a33287c (patch)
treedcc5ff7bf55037e042b66ade587248b19166903a /decoder
parentc7b2a39958912d7b85a384a871609e6db73042c7 (diff)
Added dictionary support to PLF
Diffstat (limited to 'decoder')
-rw-r--r--decoder/hg_io.cc80
1 files changed, 56 insertions, 24 deletions
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index d97ab3dc..59d0b851 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -134,7 +134,10 @@ inline void eatws(const std::string& in, int& c) {
std::string getEscapedString(const std::string& in, int &c)
{
eatws(in,c);
- if (get(in,c++) != quote) return "ERROR";
+ if (get(in,c++) != quote) {
+ cerr << "Expected escaped string to begin with " << quote << ". Got " << get(in, c - 1) << "\n";
+ abort();
+ }
std::string res;
char cur = 0;
do {
@@ -152,7 +155,7 @@ float getFloat(const std::string& in, int &c)
{
std::string tmp;
eatws(in,c);
- while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',') {
+ while (c < (int)in.size() && get(in,c) != ' ' && get(in,c) != ')' && get(in,c) != ',' && get(in,c) != '}') {
tmp += get(in,c++);
}
eatws(in,c);
@@ -177,7 +180,18 @@ int getInt(const std::string& in, int &c)
// maximum number of nodes permitted
#define MAX_NODES 100000000
-// parse ('foo', 0.23)
+
+void ReadPLFFeature(const std::string& in, int &c, map<string, float>& features) {
+ eatws(in,c);
+ string name = getEscapedString(in,c);
+ eatws(in,c);
+ if (get(in,c++) != ':') { cerr << "PCN/PLF parse error: expected : after feature name " << name << "\n"; abort(); }
+ float value = getFloat(in, c);
+ eatws(in,c);
+ features[name] = value;
+}
+
+// parse ('foo', 0.23, 1)
void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) {
if (get(in,c++) != '(') { cerr << "PCN/PLF parse error: expected (\n"; abort(); }
vector<WordID> ewords(2, 0);
@@ -186,22 +200,46 @@ void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) {
r->ComputeArity();
// cerr << "RULE: " << r->AsString() << endl;
if (get(in,c++) != ',') { cerr << in << endl; cerr << "PCN/PLF parse error: expected , after string\n"; abort(); }
+ eatws(in,c);
+
+ map<string, float> features;
size_t cnNext = 1;
- std::vector<float> probs;
- probs.push_back(getFloat(in,c));
- while (get(in,c) == ',') {
+ // Read in sparse feature format
+ if (get(in,c) == '{') {
c++;
- float val = getFloat(in,c);
- probs.push_back(val);
- // cerr << val << endl; //REMO
+ ReadPLFFeature(in, c, features);
+ while (get(in,c) == ',') {
+ c++;
+ if (get(in,c) == '}') { break; }
+ ReadPLFFeature(in, c, features);
+ }
+ if (get(in,c++) != '}') { cerr << "PCN/PLF parse error: expected } after feature dictionary\n"; abort(); }
+ eatws(in,c);
+ if (get(in, c++) != ',') { cerr << "PCN/PLF parse error: expected , after feature dictionary\n"; abort(); }
+ cnNext = static_cast<size_t>(getFloat(in, c));
}
- //if we read more than one prob, this was a lattice, last item was column increment
- if (probs.size()>1) {
+ // Read in dense feature format
+ else {
+ std::vector<float> probs;
+ probs.push_back(getFloat(in,c));
+ while (get(in,c) == ',') {
+ c++;
+ float val = getFloat(in,c);
+ probs.push_back(val);
+ // cerr << val << endl; //REMO
+ }
+ if (probs.size() == 0) { cerr << "PCN/PLF parse error: missing destination state increment\n"; abort(); }
+
+ // the last item was column increment
cnNext = static_cast<size_t>(probs.back());
probs.pop_back();
- if (cnNext < 1) { cerr << cnNext << endl << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; abort(); }
+
+ for (unsigned i = 0; i < probs.size(); ++i) {
+ features["LatticeCost_" + to_string(i)] = probs[i];
+ }
}
- if (get(in,c++) != ')') { cerr << "PCN/PLF parse error: expected ) at end of cn alt block\n"; abort(); }
+ if (get(in,c++) != ')') { cerr << "PCN/PLF parse error: expected ) at end of cn alt block. Got " << get(in, c-1) << "\n"; abort(); }
+ if (cnNext < 1) { cerr << cnNext << endl << "PCN/PLF parse error: bad link length at last element of cn alt block\n"; abort(); }
eatws(in,c);
Hypergraph::TailNodeVector tail(1, cur_node);
Hypergraph::Edge* edge = hg->AddEdge(r, tail);
@@ -210,21 +248,15 @@ void ReadPLFEdge(const std::string& in, int &c, int cur_node, Hypergraph* hg) {
assert(head_node < MAX_NODES); // prevent malicious PLFs from using all the memory
if (hg->nodes_.size() < (head_node + 1)) { hg->ResizeNodes(head_node + 1); }
hg->ConnectEdgeToHeadNode(edge, &hg->nodes_[head_node]);
- if (probs.size() != 0) {
- if (probs.size() == 1) {
- edge->feature_values_.set_value(FD::Convert("LatticeCost"), probs[0]);
- } else {
- cerr << "Don't know how to deal with multiple lattice edge features: implement Python dictionary format.\n";
- abort();
- }
+ for (map<string, float>::iterator it = features.begin(); it != features.end(); ++it) {
+ edge->feature_values_.set_value(FD::Convert(it->first), it->second);
}
}
-// parse (('foo', 0.23), ('bar', 0.77))
+// parse (('foo', 0.23, 1), ('bar', 0.77, 1))
void ReadPLFNode(const std::string& in, int &c, int cur_node, int line, Hypergraph* hg) {
- //cerr << "PLF READING NODE " << cur_node << endl;
if (hg->nodes_.size() < (cur_node + 1)) { hg->ResizeNodes(cur_node + 1); }
- if (get(in,c++) != '(') { cerr << line << ": Syntax error 1\n"; abort(); }
+ if (get(in,c++) != '(') { cerr << line << ": Syntax error 1 in PLF\n"; abort(); }
eatws(in,c);
while (1) {
if (c > (int)in.size()) { break; }
@@ -249,7 +281,7 @@ void HypergraphIO::ReadFromPLF(const std::string& in, Hypergraph* hg, int line)
hg->clear();
int c = 0;
int cur_node = 0;
- if (in[c++] != '(') { cerr << line << ": Syntax error!\n"; abort(); }
+ if (in[c++] != '(') { cerr << line << ": Syntax error in PLF!\n"; abort(); }
while (1) {
if (c > (int)in.size()) { break; }
if (PLF::get(in,c) == ')') {