summaryrefslogtreecommitdiff
path: root/extractor/grammar_extractor.h
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/grammar_extractor.h')
-rw-r--r--extractor/grammar_extractor.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/extractor/grammar_extractor.h b/extractor/grammar_extractor.h
new file mode 100644
index 00000000..05e153fc
--- /dev/null
+++ b/extractor/grammar_extractor.h
@@ -0,0 +1,39 @@
+#ifndef _GRAMMAR_EXTRACTOR_H_
+#define _GRAMMAR_EXTRACTOR_H_
+
+#include <string>
+#include <vector>
+
+#include "rule_factory.h"
+#include "vocabulary.h"
+
+using namespace std;
+
+class Alignment;
+class DataArray;
+class Precomputation;
+class SuffixArray;
+
+class GrammarExtractor {
+ public:
+ GrammarExtractor(
+ shared_ptr<SuffixArray> source_suffix_array,
+ shared_ptr<DataArray> target_data_array,
+ const Alignment& alignment,
+ const Precomputation& precomputation,
+ int min_gap_size,
+ int max_rule_span,
+ int max_nonterminals,
+ int max_rule_symbols,
+ bool use_baeza_yates);
+
+ void GetGrammar(const string& sentence);
+
+ private:
+ vector<int> AnnotateWords(const vector<string>& words);
+
+ shared_ptr<Vocabulary> vocabulary;
+ HieroCachingRuleFactory rule_factory;
+};
+
+#endif