From ffe002f8792dd8693c12e9bc6a7f715ca170acfc Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 7 Jul 2010 20:59:59 +0000 Subject: fix bugs git-svn-id: https://ws10smt.googlecode.com/svn/trunk@180 ec762483-ff6d-05da-a07a-a48fb63a330f --- extools/featurize_grammar.cc | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'extools/featurize_grammar.cc') diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc index 8be057b0..771948ce 100644 --- a/extools/featurize_grammar.cc +++ b/extools/featurize_grammar.cc @@ -228,15 +228,11 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; } if (ptr == start) {cerr << "Warning! empty token.\n"; return ptr; } const WordID w = TD::Convert(string(buf, start, ptr - start)); - - if((IsBracket(buf[start]) and IsBracket(buf[ptr-1])) or( w == kDIV)) - p->push_back(1 * w); - else { - if (w == kDIV) return ptr; - p->push_back(w); - } + if (w == kDIV) return ptr; + p->push_back(w); } - return ptr; + assert(p->size() > 0); + return ptr; } void ParseLine(const char* buf, vector* cur_key, ID2RuleStatistics* counts) { @@ -251,8 +247,10 @@ void ParseLine(const char* buf, vector* cur_key, ID2RuleStatistics* coun cur_key->clear(); // key is: "[X] ||| word word word" int tmpp = ReadPhraseUntilDividerOrEnd(buf, 0, ptr, cur_key); - cur_key->push_back(kDIV); - ReadPhraseUntilDividerOrEnd(buf, tmpp, ptr, cur_key); + if (buf[tmpp] != '\t') { + cur_key->push_back(kDIV); + ReadPhraseUntilDividerOrEnd(buf, tmpp, ptr, cur_key); + } ++ptr; int start = ptr; int end = ptr; @@ -294,7 +292,6 @@ void ParseLine(const char* buf, vector* cur_key, ID2RuleStatistics* coun } } - void LexTranslationTable::createTTable(const char* buf){ AnnotatedParallelSentence sent; sent.ParseInputLine(buf); @@ -657,20 +654,11 @@ int main(int argc, char** argv){ fs1.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - //src.resize(cur_key.size() - 4); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src.at(i) = cur_key.at(i+2); - cerr << "Key: "; for (vector::const_iterator wit=cur_key.begin(); wit!=cur_key.end(); ++wit) cerr << TD::Convert(*wit) << " "; cerr << endl; - lhs = cur_key[0]; - cerr << buf << endl; for (ID2RuleStatistics::const_iterator it = cur_counts.begin(); it != cur_counts.end(); ++it) { - - cerr << "READ: <"; for (vector::const_iterator wit=src.begin(); wit!=src.end(); ++wit) cerr << TD::Convert(*wit) << " "; - cerr << "|||"; for (vector::const_iterator wit=it->first.begin(); wit!=it->first.end(); ++wit) cerr << " " << TD::Convert(*wit); - cerr << ">\n"; - for (int i = 0; i < extractors.size(); ++i) extractors[i]->ObserveFilteredRule(lhs, src, it->first); } @@ -681,7 +669,7 @@ int main(int argc, char** argv){ cin.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src[i] = cur_key[i+2]; lhs = cur_key[0]; for (ID2RuleStatistics::const_iterator it = cur_counts.begin(); it != cur_counts.end(); ++it) { @@ -697,7 +685,7 @@ int main(int argc, char** argv){ fs2.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src[i] = cur_key[i+2]; lhs = cur_key[0]; -- cgit v1.2.3