summaryrefslogtreecommitdiff
path: root/extractor/features
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/features')
-rw-r--r--extractor/features/count_source_target.h3
-rw-r--r--extractor/features/feature.h6
-rw-r--r--extractor/features/is_source_singleton.h3
-rw-r--r--extractor/features/is_source_target_singleton.h3
-rw-r--r--extractor/features/max_lex_source_given_target.h3
-rw-r--r--extractor/features/max_lex_target_given_source.h3
-rw-r--r--extractor/features/sample_source_count.h4
-rw-r--r--extractor/features/target_given_source_coherent.h4
8 files changed, 29 insertions, 0 deletions
diff --git a/extractor/features/count_source_target.h b/extractor/features/count_source_target.h
index dec78883..8747fa60 100644
--- a/extractor/features/count_source_target.h
+++ b/extractor/features/count_source_target.h
@@ -6,6 +6,9 @@
namespace extractor {
namespace features {
+/**
+ * Feature for the number of times a word pair was found in the bitext.
+ */
class CountSourceTarget : public Feature {
public:
double Score(const FeatureContext& context) const;
diff --git a/extractor/features/feature.h b/extractor/features/feature.h
index 6693ccbf..36ea504a 100644
--- a/extractor/features/feature.h
+++ b/extractor/features/feature.h
@@ -10,6 +10,9 @@ using namespace std;
namespace extractor {
namespace features {
+/**
+ * Structure providing context for computing feature scores.
+ */
struct FeatureContext {
FeatureContext(const Phrase& source_phrase, const Phrase& target_phrase,
double source_phrase_count, int pair_count, int num_samples) :
@@ -24,6 +27,9 @@ struct FeatureContext {
int num_samples;
};
+/**
+ * Base class for features.
+ */
class Feature {
public:
virtual double Score(const FeatureContext& context) const = 0;
diff --git a/extractor/features/is_source_singleton.h b/extractor/features/is_source_singleton.h
index 30f76c6d..b8352d0e 100644
--- a/extractor/features/is_source_singleton.h
+++ b/extractor/features/is_source_singleton.h
@@ -6,6 +6,9 @@
namespace extractor {
namespace features {
+/**
+ * Boolean feature checking if the source phrase occurs only once in the data.
+ */
class IsSourceSingleton : public Feature {
public:
double Score(const FeatureContext& context) const;
diff --git a/extractor/features/is_source_target_singleton.h b/extractor/features/is_source_target_singleton.h
index 12fb6ee6..dacfebba 100644
--- a/extractor/features/is_source_target_singleton.h
+++ b/extractor/features/is_source_target_singleton.h
@@ -6,6 +6,9 @@
namespace extractor {
namespace features {
+/**
+ * Boolean feature checking if the phrase pair occurs only once in the data.
+ */
class IsSourceTargetSingleton : public Feature {
public:
double Score(const FeatureContext& context) const;
diff --git a/extractor/features/max_lex_source_given_target.h b/extractor/features/max_lex_source_given_target.h
index bfa7ef1b..461b0ebf 100644
--- a/extractor/features/max_lex_source_given_target.h
+++ b/extractor/features/max_lex_source_given_target.h
@@ -13,6 +13,9 @@ class TranslationTable;
namespace features {
+/**
+ * Feature computing max(p(f | e)) across all pairs of words in the phrase pair.
+ */
class MaxLexSourceGivenTarget : public Feature {
public:
MaxLexSourceGivenTarget(shared_ptr<TranslationTable> table);
diff --git a/extractor/features/max_lex_target_given_source.h b/extractor/features/max_lex_target_given_source.h
index 66cf0914..c3c87327 100644
--- a/extractor/features/max_lex_target_given_source.h
+++ b/extractor/features/max_lex_target_given_source.h
@@ -13,6 +13,9 @@ class TranslationTable;
namespace features {
+/**
+ * Feature computing max(p(e | f)) across all pairs of words in the phrase pair.
+ */
class MaxLexTargetGivenSource : public Feature {
public:
MaxLexTargetGivenSource(shared_ptr<TranslationTable> table);
diff --git a/extractor/features/sample_source_count.h b/extractor/features/sample_source_count.h
index 53c7f954..ee6e59a0 100644
--- a/extractor/features/sample_source_count.h
+++ b/extractor/features/sample_source_count.h
@@ -6,6 +6,10 @@
namespace extractor {
namespace features {
+/**
+ * Feature scoring the number of times the source phrase occurs in the sampled
+ * set.
+ */
class SampleSourceCount : public Feature {
public:
double Score(const FeatureContext& context) const;
diff --git a/extractor/features/target_given_source_coherent.h b/extractor/features/target_given_source_coherent.h
index 80d9f617..e66d70a5 100644
--- a/extractor/features/target_given_source_coherent.h
+++ b/extractor/features/target_given_source_coherent.h
@@ -6,6 +6,10 @@
namespace extractor {
namespace features {
+/**
+ * Feature computing the ratio of the phrase pair count over all source phrase
+ * occurrences (sampled).
+ */
class TargetGivenSourceCoherent : public Feature {
public:
double Score(const FeatureContext& context) const;