diff options
| author | Paul Baltescu <pauldb89@gmail.com> | 2013-03-07 14:38:23 +0000 | 
|---|---|---|
| committer | Paul Baltescu <pauldb89@gmail.com> | 2013-03-07 14:39:05 +0000 | 
| commit | b34c347cd7f4f8965e4d943543a31f9a4e886f54 (patch) | |
| tree | 6fdcf61ef3aacce6315c5d8447f9b0e612b25271 /extractor | |
| parent | e65a84b9dd9dd4ac294fef8b09bec7f97bd2dec9 (diff) | |
Added unit test for loose phrases.
Diffstat (limited to 'extractor')
| -rw-r--r-- | extractor/data_array.h | 2 | ||||
| -rw-r--r-- | extractor/features/is_source_singleton.cc | 2 | ||||
| -rw-r--r-- | extractor/run_extractor.cc | 17 | ||||
| -rw-r--r-- | extractor/target_phrase_extractor_test.cc | 28 | 
4 files changed, 35 insertions, 14 deletions
| diff --git a/extractor/data_array.h b/extractor/data_array.h index 42e12135..a26bbecf 100644 --- a/extractor/data_array.h +++ b/extractor/data_array.h @@ -17,7 +17,7 @@ enum Side {    TARGET  }; -// TODO: This class has features for both the source and target data arrays. +// Note: This class has features for both the source and target data arrays.  // Maybe we can save some memory by having more specific implementations (e.g.  // sentence_id is only needed for the source data array).  class DataArray { diff --git a/extractor/features/is_source_singleton.cc b/extractor/features/is_source_singleton.cc index ab54e51a..1abb486f 100644 --- a/extractor/features/is_source_singleton.cc +++ b/extractor/features/is_source_singleton.cc @@ -6,7 +6,7 @@ namespace extractor {  namespace features {  double IsSourceSingleton::Score(const FeatureContext& context) const { -  return context.source_phrase_count == 1; +  return fabs(context.source_phrase_count - 1) < 1e-6;  }  string IsSourceSingleton::GetName() const { diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index 0f91236d..ae3a875e 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -60,7 +60,6 @@ int main(int argc, char** argv) {          "Minimum number of occurences for a pharse to be considered frequent")      ("max_samples", po::value<int>()->default_value(300),          "Maximum number of samples") -    // TODO(pauldb): Check if this works when set to false.      ("tight_phrases", po::value<bool>()->default_value(true),          "False if phrases may be loose (better, but slower)"); @@ -144,17 +143,15 @@ int main(int argc, char** argv) {         << GetDuration(preprocess_start_time, preprocess_stop_time)         << " seconds" << endl; -  cerr << "creating grammar extractor" << endl; -    Clock::time_point extraction_start_time = Clock::now();    vector<shared_ptr<Feature> > features = { -//      make_shared<TargetGivenSourceCoherent>(), -//      make_shared<SampleSourceCount>(), -//      make_shared<CountSourceTarget>(), -//      make_shared<MaxLexSourceGivenTarget>(table), -//      make_shared<MaxLexTargetGivenSource>(table), -//      make_shared<IsSourceSingleton>(), -//      make_shared<IsSourceTargetSingleton>() +      make_shared<TargetGivenSourceCoherent>(), +      make_shared<SampleSourceCount>(), +      make_shared<CountSourceTarget>(), +      make_shared<MaxLexSourceGivenTarget>(table), +      make_shared<MaxLexTargetGivenSource>(table), +      make_shared<IsSourceSingleton>(), +      make_shared<IsSourceTargetSingleton>()    };    shared_ptr<Scorer> scorer = make_shared<Scorer>(features); diff --git a/extractor/target_phrase_extractor_test.cc b/extractor/target_phrase_extractor_test.cc index a686d20b..80927dee 100644 --- a/extractor/target_phrase_extractor_test.cc +++ b/extractor/target_phrase_extractor_test.cc @@ -111,8 +111,32 @@ TEST_F(TargetPhraseExtractorTest, TestExtractPhrasesTightPhrasesFalse) {        target_gaps, target_low, 1, 5, source_indexes, 0);    EXPECT_EQ(10, results.size()); -  // TODO(pauldb): Finish unit test once it's clear how these alignments should -  // look like. +  for (int i = 0; i < 2; ++i) { +    for (int j = 4; j <= 6; ++j) { +      for (int k = 4; k <= j; ++k) { +        vector<string> expected_words; +        for (int l = i; l < 2; ++l) { +          expected_words.push_back(target_words[l]); +        } +        for (int l = k; l < j; ++l) { +          expected_words.push_back(target_words[l]); +        } + +        PhraseAlignment expected_alignment; +        expected_alignment.push_back(make_pair(1, 1 - i)); + +        bool found_expected_pair = false; +        for (auto result: results) { +          if (result.first.GetWords() == expected_words && +              result.second == expected_alignment) { +            found_expected_pair = true; +          } +        } + +        EXPECT_TRUE(found_expected_pair); +      } +    } +  }  }  } // namespace | 
