diff options
Diffstat (limited to 'extools')
-rw-r--r-- | extools/featurize_grammar.cc | 34 | ||||
-rw-r--r-- | extools/filter_grammar.cc | 11 |
2 files changed, 14 insertions, 31 deletions
diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc index 8be057b0..771948ce 100644 --- a/extools/featurize_grammar.cc +++ b/extools/featurize_grammar.cc @@ -228,15 +228,11 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; } if (ptr == start) {cerr << "Warning! empty token.\n"; return ptr; } const WordID w = TD::Convert(string(buf, start, ptr - start)); - - if((IsBracket(buf[start]) and IsBracket(buf[ptr-1])) or( w == kDIV)) - p->push_back(1 * w); - else { - if (w == kDIV) return ptr; - p->push_back(w); - } + if (w == kDIV) return ptr; + p->push_back(w); } - return ptr; + assert(p->size() > 0); + return ptr; } void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* counts) { @@ -251,8 +247,10 @@ void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* coun cur_key->clear(); // key is: "[X] ||| word word word" int tmpp = ReadPhraseUntilDividerOrEnd(buf, 0, ptr, cur_key); - cur_key->push_back(kDIV); - ReadPhraseUntilDividerOrEnd(buf, tmpp, ptr, cur_key); + if (buf[tmpp] != '\t') { + cur_key->push_back(kDIV); + ReadPhraseUntilDividerOrEnd(buf, tmpp, ptr, cur_key); + } ++ptr; int start = ptr; int end = ptr; @@ -294,7 +292,6 @@ void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* coun } } - void LexTranslationTable::createTTable(const char* buf){ AnnotatedParallelSentence sent; sent.ParseInputLine(buf); @@ -657,20 +654,11 @@ int main(int argc, char** argv){ fs1.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - //src.resize(cur_key.size() - 4); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src.at(i) = cur_key.at(i+2); - cerr << "Key: "; for (vector<WordID>::const_iterator wit=cur_key.begin(); wit!=cur_key.end(); ++wit) cerr << TD::Convert(*wit) << " "; cerr << endl; - lhs = cur_key[0]; - cerr << buf << endl; for (ID2RuleStatistics::const_iterator it = cur_counts.begin(); it != cur_counts.end(); ++it) { - - cerr << "READ: <"; for (vector<WordID>::const_iterator wit=src.begin(); wit!=src.end(); ++wit) cerr << TD::Convert(*wit) << " "; - cerr << "|||"; for (vector<WordID>::const_iterator wit=it->first.begin(); wit!=it->first.end(); ++wit) cerr << " " << TD::Convert(*wit); - cerr << ">\n"; - for (int i = 0; i < extractors.size(); ++i) extractors[i]->ObserveFilteredRule(lhs, src, it->first); } @@ -681,7 +669,7 @@ int main(int argc, char** argv){ cin.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src[i] = cur_key[i+2]; lhs = cur_key[0]; for (ID2RuleStatistics::const_iterator it = cur_counts.begin(); it != cur_counts.end(); ++it) { @@ -697,7 +685,7 @@ int main(int argc, char** argv){ fs2.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; ParseLine(buf, &cur_key, &cur_counts); - src.resize(cur_key.size() - 3); + src.resize(cur_key.size() - 2); for (int i = 0; i < src.size(); ++i) src[i] = cur_key[i+2]; lhs = cur_key[0]; diff --git a/extools/filter_grammar.cc b/extools/filter_grammar.cc index a2992f7d..6f0dcdfc 100644 --- a/extools/filter_grammar.cc +++ b/extools/filter_grammar.cc @@ -53,7 +53,6 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { } namespace { inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; } - inline bool IsBracket(char c){return c == '[' || c == ']';} inline void SkipWhitespace(const char* buf, int* ptr) { while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); } } @@ -68,14 +67,10 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; } if (ptr == start) {cerr << "Warning! empty token.\n"; return ptr; } const WordID w = TD::Convert(string(buf, start, ptr - start)); - - if((IsBracket(buf[start]) and IsBracket(buf[ptr-1])) or( w == kDIV)) - p->push_back(1 * w); - else { - if (w == kDIV) return ptr; - p->push_back(w); - } + if (w == kDIV) return ptr; + p->push_back(w); } + assert(p->size() > 0); return ptr; } |