summaryrefslogtreecommitdiff
path: root/extools/striped_grammar.h
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 06:29:00 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 06:29:00 +0000
commitf47330182fdf2e44eb28d39d8db55deb31b54d1f (patch)
tree4b074d1f5a22d899a55ba2017ebc3ce5e65693ba /extools/striped_grammar.h
parent2dc76ceae3dfbe333b6b404e5b1298be99b211c9 (diff)
start moving toward striped grammar lexer
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@233 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'extools/striped_grammar.h')
-rw-r--r--extools/striped_grammar.h54
1 files changed, 54 insertions, 0 deletions
diff --git a/extools/striped_grammar.h b/extools/striped_grammar.h
new file mode 100644
index 00000000..cdf529d6
--- /dev/null
+++ b/extools/striped_grammar.h
@@ -0,0 +1,54 @@
+#ifndef _STRIPED_GRAMMAR_H_
+#define _STRIPED_GRAMMAR_H_
+
+#include <iostream>
+#include <boost/functional/hash.hpp>
+#include <vector>
+#include <tr1/unordered_map>
+#include "sparse_vector.h"
+#include "wordid.h"
+#include "tdict.h"
+
+// represents statistics / information about a rule pair
+struct RuleStatistics {
+ SparseVector<float> counts;
+ std::vector<std::pair<short,short> > aligns;
+ RuleStatistics() {}
+ RuleStatistics(int name, float val, const std::vector<std::pair<short,short> >& al) :
+ aligns(al) {
+ counts.set_value(name, val);
+ }
+ void ParseRuleStatistics(const char* buf, int start, int end);
+ RuleStatistics& operator+=(const RuleStatistics& rhs) {
+ counts += rhs.counts;
+ return *this;
+ }
+};
+std::ostream& operator<<(std::ostream& os, const RuleStatistics& s);
+
+inline void WriteNamed(const std::vector<WordID>& v, std::ostream* os) {
+ bool first = true;
+ for (int i = 0; i < v.size(); ++i) {
+ if (first) { first = false; } else { (*os) << ' '; }
+ if (v[i] < 0) { (*os) << '[' << TD::Convert(-v[i]) << ']'; }
+ else (*os) << TD::Convert(v[i]);
+ }
+}
+
+inline void WriteAnonymous(const std::vector<WordID>& v, std::ostream* os) {
+ bool first = true;
+ for (int i = 0; i < v.size(); ++i) {
+ if (first) { first = false; } else { (*os) << ' '; }
+ if (v[i] <= 0) { (*os) << '[' << (1-v[i]) << ']'; }
+ else (*os) << TD::Convert(v[i]);
+ }
+}
+
+typedef std::tr1::unordered_map<std::vector<WordID>, RuleStatistics, boost::hash<std::vector<WordID> > > ID2RuleStatistics;
+
+struct StripedGrammarLexer {
+ typedef void (*GrammarCallback)(WordID lhs, const std::vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void *extra);
+ static void ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra);
+};
+
+#endif