From da176941c1f481f14e93bd7d055cc29cac0ea8c8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 12 Aug 2012 23:33:21 -0400 Subject: use new union api --- extools/extract.h | 94 ------------------------------------------------------- 1 file changed, 94 deletions(-) delete mode 100644 extools/extract.h (limited to 'extools/extract.h') diff --git a/extools/extract.h b/extools/extract.h deleted file mode 100644 index e9ea5e65..00000000 --- a/extools/extract.h +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef _EXTRACT_H_ -#define _EXTRACT_H_ - -#include -#include -#include -#include -#include "array2d.h" -#include "wordid.h" -#include "sparse_vector.h" - -struct AnnotatedParallelSentence; - -// usually represents a consistent phrase, which may -// be annotated with a type (cat) -// inside the rule extractor, this class is also used to represent a word -// in a partial rule. -struct ParallelSpan { - // i1 = i of f side - // i2 = j of f side - // j1 = i of e side - // j2 = j of e side - short i1,i2,j1,j2; - // cat is set by AnnotatePhrasesWithCategoryTypes, otherwise it's 0 - WordID cat; // category type of span (also overloaded by RuleItem class - // to be a word ID) - ParallelSpan() : i1(-1), i2(-1), j1(-1), j2(-1), cat() {} - // used by Rule class to represent a terminal symbol: - explicit ParallelSpan(WordID w) : i1(-1), i2(-1), j1(-1), j2(-1), cat(w) {} - ParallelSpan(int pi1, int pi2, int pj1, int pj2) : i1(pi1), i2(pi2), j1(pj1), j2(pj2), cat() {} - ParallelSpan(int pi1, int pi2, int pj1, int pj2, WordID c) : i1(pi1), i2(pi2), j1(pj1), j2(pj2), cat(c) {} - - // ParallelSpan is used in the Rule class where it is - // overloaded to also represent terminal symbols - inline bool IsVariable() const { return i1 != -1; } -}; - -// rule extraction logic lives here. this has no data, it's just got -// static member functions. -struct Extract { - // RuleObserver's CountRule is called for each rule extracted - // implement CountRuleImpl to do things like count the rules, - // write them to a file, etc. - struct RuleObserver { - RuleObserver() : count() {} - virtual void CountRule(WordID lhs, - const std::vector& rhs_f, - const std::vector& rhs_e, - const std::vector >& fe_terminal_alignments) { - ++count; - CountRuleImpl(lhs, rhs_f, rhs_e, fe_terminal_alignments); - } - virtual ~RuleObserver(); - - protected: - virtual void CountRuleImpl(WordID lhs, - const std::vector& rhs_f, - const std::vector& rhs_e, - const std::vector >& fe_terminal_alignments) = 0; - private: - int count; - }; - - // given a set of "tight" phrases and the aligned sentence they were - // extracted from, "loosen" them - static void LoosenPhraseBounds(const AnnotatedParallelSentence& sentence, - const int max_base_phrase_size, - std::vector* phrases); - - // extract all consistent phrase pairs, up to size max_base_phrase_size - // (on the source side). these phrases will be "tight". - static void ExtractBasePhrases(const int max_base_phrase_size, - const AnnotatedParallelSentence& sentence, - std::vector* phrases); - - // this uses the TARGET span (i,j) to annotate phrases, will copy - // phrases if there is more than one annotation. - static void AnnotatePhrasesWithCategoryTypes(const WordID default_cat, - const std::map< boost::tuple, std::vector > &types, - std::vector* phrases); - - // use the Chiang (2007) extraction logic to extract consistent subphrases - // observer->CountRule is called once for each rule extracted - static void ExtractConsistentRules(const AnnotatedParallelSentence& sentence, - const std::vector& phrases, - const int max_vars, - const int max_syms, - const bool permit_adjacent_nonterminals, - const bool require_aligned_terminal, - RuleObserver* observer, - std::vector* all_cats); -}; - -#endif -- cgit v1.2.3