From 6d43674e6b224281e43ccefc87224a7ba2fbb99a Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Sun, 10 Mar 2013 01:01:01 +0000 Subject: Added comments. Hooray! --- extractor/rule_factory.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'extractor/rule_factory.h') diff --git a/extractor/rule_factory.h b/extractor/rule_factory.h index d8dc2ccc..52e8712a 100644 --- a/extractor/rule_factory.h +++ b/extractor/rule_factory.h @@ -25,6 +25,17 @@ class State; class SuffixArray; class Vocabulary; +/** + * Component containing most of the logic for extracting SCFG rules for a given + * sentence. + * + * Given a sentence (as a vector of word ids), this class constructs all the + * possible source phrases starting from this sentence. For each source phrase, + * it finds all its occurrences in the source data and samples some of these + * occurrences to extract aligned source-target phrase pairs. A trie cache is + * used to avoid unnecessary computations if a source phrase can be constructed + * more than once (e.g. some words occur more than once in the sentence). + */ class HieroCachingRuleFactory { public: HieroCachingRuleFactory( @@ -58,21 +69,30 @@ class HieroCachingRuleFactory { virtual ~HieroCachingRuleFactory(); + // Constructs SCFG rules for a given sentence. + // (See class description for more details.) virtual Grammar GetGrammar(const vector& word_ids); protected: HieroCachingRuleFactory(); private: + // Checks if the phrase (if previously encountered) or its prefix have any + // occurrences in the source data. bool CannotHaveMatchings(shared_ptr node, int word_id); + // Checks if the phrase has previously been analyzed. bool RequiresLookup(shared_ptr node, int word_id); + // Creates a new state in the trie that corresponds to adding a trailing + // nonterminal to the current phrase. void AddTrailingNonterminal(vector symbols, const Phrase& prefix, const shared_ptr& prefix_node, bool starts_with_x); + // Extends the current state by possibly adding a nonterminal followed by a + // terminal. vector ExtendState(const vector& word_ids, const State& state, vector symbols, -- cgit v1.2.3