summaryrefslogtreecommitdiff
path: root/extractor/grammar_extractor.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-03-10 01:01:01 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-03-10 01:01:01 +0000
commit6d43674e6b224281e43ccefc87224a7ba2fbb99a (patch)
tree36e0128b60755e47d217825fca13ccfb3f701158 /extractor/grammar_extractor.h
parent1b9ca189fd0549bd6d969edf618f92ea59184b12 (diff)
Added comments. Hooray!
Diffstat (limited to 'extractor/grammar_extractor.h')
-rw-r--r--extractor/grammar_extractor.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/extractor/grammar_extractor.h b/extractor/grammar_extractor.h
index 6b1dcf98..b36ceeb9 100644
--- a/extractor/grammar_extractor.h
+++ b/extractor/grammar_extractor.h
@@ -19,6 +19,10 @@ class Scorer;
class SuffixArray;
class Vocabulary;
+/**
+ * Class wrapping all the logic for extracting the synchronous context free
+ * grammars.
+ */
class GrammarExtractor {
public:
GrammarExtractor(
@@ -38,11 +42,15 @@ class GrammarExtractor {
GrammarExtractor(shared_ptr<Vocabulary> vocabulary,
shared_ptr<HieroCachingRuleFactory> rule_factory);
+ // Converts the sentence to a vector of word ids and uses the RuleFactory to
+ // extract the SCFG rules which may be used to decode the sentence.
Grammar GetGrammar(const string& sentence);
private:
+ // Splits the sentence in a vector of words.
vector<string> TokenizeSentence(const string& sentence);
+ // Maps the words to word ids.
vector<int> AnnotateWords(const vector<string>& words);
shared_ptr<Vocabulary> vocabulary;