From f47330182fdf2e44eb28d39d8db55deb31b54d1f Mon Sep 17 00:00:00 2001 From: redpony Date: Tue, 13 Jul 2010 06:29:00 +0000 Subject: start moving toward striped grammar lexer git-svn-id: https://ws10smt.googlecode.com/svn/trunk@233 ec762483-ff6d-05da-a07a-a48fb63a330f --- extools/striped_grammar.cc | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 extools/striped_grammar.cc (limited to 'extools/striped_grammar.cc') diff --git a/extools/striped_grammar.cc b/extools/striped_grammar.cc new file mode 100644 index 00000000..accf44eb --- /dev/null +++ b/extools/striped_grammar.cc @@ -0,0 +1,67 @@ +#include "striped_grammar.h" + +#include + +#include "sentence_pair.h" + +using namespace std; + +namespace { + inline bool IsWhitespace(char c) { return c == ' ' || c == '\t'; } + + inline void SkipWhitespace(const char* buf, int* ptr) { + while (buf[*ptr] && IsWhitespace(buf[*ptr])) { ++(*ptr); } + } +} + +void RuleStatistics::ParseRuleStatistics(const char* buf, int start, int end) { + int ptr = start; + counts.clear(); + aligns.clear(); + while (ptr < end) { + SkipWhitespace(buf, &ptr); + int vstart = ptr; + while(ptr < end && buf[ptr] != '=') ++ptr; + assert(buf[ptr] == '='); + assert(ptr > vstart); + if (buf[vstart] == 'A' && buf[vstart+1] == '=') { + ++ptr; + while (ptr < end && !IsWhitespace(buf[ptr])) { + while(ptr < end && buf[ptr] == ',') { ++ptr; } + assert(ptr < end); + vstart = ptr; + while(ptr < end && buf[ptr] != ',' && !IsWhitespace(buf[ptr])) { ++ptr; } + if (ptr > vstart) { + short a, b; + AnnotatedParallelSentence::ReadAlignmentPoint(buf, vstart, ptr, false, &a, &b); + aligns.push_back(make_pair(a,b)); + } + } + } else { + int name = FD::Convert(string(buf,vstart,ptr-vstart)); + ++ptr; + vstart = ptr; + while(ptr < end && !IsWhitespace(buf[ptr])) { ++ptr; } + assert(ptr > vstart); + counts.set_value(name, strtod(buf + vstart, NULL)); + } + } +} + +ostream& operator<<(ostream& os, const RuleStatistics& s) { + bool needspace = false; + for (SparseVector::const_iterator it = s.counts.begin(); it != s.counts.end(); ++it) { + if (needspace) os << ' '; else needspace = true; + os << FD::Convert(it->first) << '=' << it->second; + } + if (s.aligns.size() > 0) { + os << " A="; + needspace = false; + for (int i = 0; i < s.aligns.size(); ++i) { + if (needspace) os << ','; else needspace = true; + os << s.aligns[i].first << '-' << s.aligns[i].second; + } + } + return os; +} + -- cgit v1.2.3