summaryrefslogtreecommitdiff
path: root/extractor/rule_extractor.h
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/rule_extractor.h')
-rw-r--r--extractor/rule_extractor.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/extractor/rule_extractor.h b/extractor/rule_extractor.h
index 8b6daeea..bfec0225 100644
--- a/extractor/rule_extractor.h
+++ b/extractor/rule_extractor.h
@@ -22,6 +22,10 @@ class RuleExtractorHelper;
class Scorer;
class TargetPhraseExtractor;
+/**
+ * Structure containing data about the occurrences of a source-target phrase pair
+ * in the parallel corpus.
+ */
struct Extract {
Extract(const Phrase& source_phrase, const Phrase& target_phrase,
double pairs_count, const PhraseAlignment& alignment) :
@@ -34,6 +38,9 @@ struct Extract {
PhraseAlignment alignment;
};
+/**
+ * Component for extracting SCFG rules.
+ */
class RuleExtractor {
public:
RuleExtractor(shared_ptr<DataArray> source_data_array,
@@ -64,6 +71,8 @@ class RuleExtractor {
virtual ~RuleExtractor();
+ // Extracts SCFG rules given a source phrase and a set of its occurrences
+ // in the source data.
virtual vector<Rule> ExtractRules(const Phrase& phrase,
const PhraseLocation& location) const;
@@ -71,15 +80,22 @@ class RuleExtractor {
RuleExtractor();
private:
+ // Finds all target phrases that can be aligned with the source phrase for a
+ // particular occurrence in the data.
vector<Extract> ExtractAlignments(const Phrase& phrase,
const vector<int>& matching) const;
+ // Extracts all target phrases for a given occurrence of the source phrase in
+ // the data. Constructs a vector of Extracts using these target phrases.
void AddExtracts(
vector<Extract>& extracts, const Phrase& source_phrase,
const unordered_map<int, int>& source_indexes,
const vector<pair<int, int> >& target_gaps, const vector<int>& target_low,
int target_phrase_low, int target_phrase_high, int sentence_id) const;
+ // Adds a leading and/or trailing nonterminal to the source phrase and
+ // extracts target phrases that can be aligned with the extended source
+ // phrase.
void AddNonterminalExtremities(
vector<Extract>& extracts, const vector<int>& matching,
const vector<int>& chunklen, const Phrase& source_phrase,