diff options
Diffstat (limited to 'extractor/features')
-rw-r--r-- | extractor/features/count_source_target.h | 3 | ||||
-rw-r--r-- | extractor/features/feature.h | 6 | ||||
-rw-r--r-- | extractor/features/is_source_singleton.h | 3 | ||||
-rw-r--r-- | extractor/features/is_source_target_singleton.h | 3 | ||||
-rw-r--r-- | extractor/features/max_lex_source_given_target.h | 3 | ||||
-rw-r--r-- | extractor/features/max_lex_target_given_source.h | 3 | ||||
-rw-r--r-- | extractor/features/sample_source_count.h | 4 | ||||
-rw-r--r-- | extractor/features/target_given_source_coherent.h | 4 |
8 files changed, 29 insertions, 0 deletions
diff --git a/extractor/features/count_source_target.h b/extractor/features/count_source_target.h index dec78883..8747fa60 100644 --- a/extractor/features/count_source_target.h +++ b/extractor/features/count_source_target.h @@ -6,6 +6,9 @@ namespace extractor { namespace features { +/** + * Feature for the number of times a word pair was found in the bitext. + */ class CountSourceTarget : public Feature { public: double Score(const FeatureContext& context) const; diff --git a/extractor/features/feature.h b/extractor/features/feature.h index 6693ccbf..36ea504a 100644 --- a/extractor/features/feature.h +++ b/extractor/features/feature.h @@ -10,6 +10,9 @@ using namespace std; namespace extractor { namespace features { +/** + * Structure providing context for computing feature scores. + */ struct FeatureContext { FeatureContext(const Phrase& source_phrase, const Phrase& target_phrase, double source_phrase_count, int pair_count, int num_samples) : @@ -24,6 +27,9 @@ struct FeatureContext { int num_samples; }; +/** + * Base class for features. + */ class Feature { public: virtual double Score(const FeatureContext& context) const = 0; diff --git a/extractor/features/is_source_singleton.h b/extractor/features/is_source_singleton.h index 30f76c6d..b8352d0e 100644 --- a/extractor/features/is_source_singleton.h +++ b/extractor/features/is_source_singleton.h @@ -6,6 +6,9 @@ namespace extractor { namespace features { +/** + * Boolean feature checking if the source phrase occurs only once in the data. + */ class IsSourceSingleton : public Feature { public: double Score(const FeatureContext& context) const; diff --git a/extractor/features/is_source_target_singleton.h b/extractor/features/is_source_target_singleton.h index 12fb6ee6..dacfebba 100644 --- a/extractor/features/is_source_target_singleton.h +++ b/extractor/features/is_source_target_singleton.h @@ -6,6 +6,9 @@ namespace extractor { namespace features { +/** + * Boolean feature checking if the phrase pair occurs only once in the data. + */ class IsSourceTargetSingleton : public Feature { public: double Score(const FeatureContext& context) const; diff --git a/extractor/features/max_lex_source_given_target.h b/extractor/features/max_lex_source_given_target.h index bfa7ef1b..461b0ebf 100644 --- a/extractor/features/max_lex_source_given_target.h +++ b/extractor/features/max_lex_source_given_target.h @@ -13,6 +13,9 @@ class TranslationTable; namespace features { +/** + * Feature computing max(p(f | e)) across all pairs of words in the phrase pair. + */ class MaxLexSourceGivenTarget : public Feature { public: MaxLexSourceGivenTarget(shared_ptr<TranslationTable> table); diff --git a/extractor/features/max_lex_target_given_source.h b/extractor/features/max_lex_target_given_source.h index 66cf0914..c3c87327 100644 --- a/extractor/features/max_lex_target_given_source.h +++ b/extractor/features/max_lex_target_given_source.h @@ -13,6 +13,9 @@ class TranslationTable; namespace features { +/** + * Feature computing max(p(e | f)) across all pairs of words in the phrase pair. + */ class MaxLexTargetGivenSource : public Feature { public: MaxLexTargetGivenSource(shared_ptr<TranslationTable> table); diff --git a/extractor/features/sample_source_count.h b/extractor/features/sample_source_count.h index 53c7f954..ee6e59a0 100644 --- a/extractor/features/sample_source_count.h +++ b/extractor/features/sample_source_count.h @@ -6,6 +6,10 @@ namespace extractor { namespace features { +/** + * Feature scoring the number of times the source phrase occurs in the sampled + * set. + */ class SampleSourceCount : public Feature { public: double Score(const FeatureContext& context) const; diff --git a/extractor/features/target_given_source_coherent.h b/extractor/features/target_given_source_coherent.h index 80d9f617..e66d70a5 100644 --- a/extractor/features/target_given_source_coherent.h +++ b/extractor/features/target_given_source_coherent.h @@ -6,6 +6,10 @@ namespace extractor { namespace features { +/** + * Feature computing the ratio of the phrase pair count over all source phrase + * occurrences (sampled). + */ class TargetGivenSourceCoherent : public Feature { public: double Score(const FeatureContext& context) const; |