diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-13 06:29:00 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-13 06:29:00 +0000 |
commit | f47330182fdf2e44eb28d39d8db55deb31b54d1f (patch) | |
tree | 4b074d1f5a22d899a55ba2017ebc3ce5e65693ba /extools/striped_grammar.h | |
parent | 2dc76ceae3dfbe333b6b404e5b1298be99b211c9 (diff) |
start moving toward striped grammar lexer
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@233 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'extools/striped_grammar.h')
-rw-r--r-- | extools/striped_grammar.h | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/extools/striped_grammar.h b/extools/striped_grammar.h new file mode 100644 index 00000000..cdf529d6 --- /dev/null +++ b/extools/striped_grammar.h @@ -0,0 +1,54 @@ +#ifndef _STRIPED_GRAMMAR_H_ +#define _STRIPED_GRAMMAR_H_ + +#include <iostream> +#include <boost/functional/hash.hpp> +#include <vector> +#include <tr1/unordered_map> +#include "sparse_vector.h" +#include "wordid.h" +#include "tdict.h" + +// represents statistics / information about a rule pair +struct RuleStatistics { + SparseVector<float> counts; + std::vector<std::pair<short,short> > aligns; + RuleStatistics() {} + RuleStatistics(int name, float val, const std::vector<std::pair<short,short> >& al) : + aligns(al) { + counts.set_value(name, val); + } + void ParseRuleStatistics(const char* buf, int start, int end); + RuleStatistics& operator+=(const RuleStatistics& rhs) { + counts += rhs.counts; + return *this; + } +}; +std::ostream& operator<<(std::ostream& os, const RuleStatistics& s); + +inline void WriteNamed(const std::vector<WordID>& v, std::ostream* os) { + bool first = true; + for (int i = 0; i < v.size(); ++i) { + if (first) { first = false; } else { (*os) << ' '; } + if (v[i] < 0) { (*os) << '[' << TD::Convert(-v[i]) << ']'; } + else (*os) << TD::Convert(v[i]); + } +} + +inline void WriteAnonymous(const std::vector<WordID>& v, std::ostream* os) { + bool first = true; + for (int i = 0; i < v.size(); ++i) { + if (first) { first = false; } else { (*os) << ' '; } + if (v[i] <= 0) { (*os) << '[' << (1-v[i]) << ']'; } + else (*os) << TD::Convert(v[i]); + } +} + +typedef std::tr1::unordered_map<std::vector<WordID>, RuleStatistics, boost::hash<std::vector<WordID> > > ID2RuleStatistics; + +struct StripedGrammarLexer { + typedef void (*GrammarCallback)(WordID lhs, const std::vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void *extra); + static void ReadStripedGrammar(std::istream* in, GrammarCallback func, void* extra); +}; + +#endif |