From 6d43674e6b224281e43ccefc87224a7ba2fbb99a Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Sun, 10 Mar 2013 01:01:01 +0000 Subject: Added comments. Hooray! --- extractor/translation_table.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'extractor/translation_table.cc') diff --git a/extractor/translation_table.cc b/extractor/translation_table.cc index 1852a357..45da707a 100644 --- a/extractor/translation_table.cc +++ b/extractor/translation_table.cc @@ -23,6 +23,8 @@ TranslationTable::TranslationTable(shared_ptr source_data_array, unordered_map target_links_count; unordered_map, int, PairHash> links_count; + // For each pair of aligned source target words increment their link count by + // 1. Unaligned words are paired with the NULL token. for (size_t i = 0; i < source_data_array->GetNumSentences(); ++i) { vector > links = alignment->GetLinks(i); int source_start = source_data_array->GetSentenceStart(i); @@ -40,25 +42,28 @@ TranslationTable::TranslationTable(shared_ptr source_data_array, for (pair link: links) { source_linked_words[link.first] = 1; target_linked_words[link.second] = 1; - IncreaseLinksCount(source_links_count, target_links_count, links_count, + IncrementLinksCount(source_links_count, target_links_count, links_count, source_sentence[link.first], target_sentence[link.second]); } for (size_t i = 0; i < source_sentence.size(); ++i) { if (!source_linked_words[i]) { - IncreaseLinksCount(source_links_count, target_links_count, links_count, - source_sentence[i], DataArray::NULL_WORD); + IncrementLinksCount(source_links_count, target_links_count, links_count, + source_sentence[i], DataArray::NULL_WORD); } } for (size_t i = 0; i < target_sentence.size(); ++i) { if (!target_linked_words[i]) { - IncreaseLinksCount(source_links_count, target_links_count, links_count, - DataArray::NULL_WORD, target_sentence[i]); + IncrementLinksCount(source_links_count, target_links_count, links_count, + DataArray::NULL_WORD, target_sentence[i]); } } } + // Calculating: + // p(e | f) = count(e, f) / count(f) + // p(f | e) = count(e, f) / count(e) for (pair, int> link_count: links_count) { int source_word = link_count.first.first; int target_word = link_count.first.second; @@ -72,7 +77,7 @@ TranslationTable::TranslationTable() {} TranslationTable::~TranslationTable() {} -void TranslationTable::IncreaseLinksCount( +void TranslationTable::IncrementLinksCount( unordered_map& source_links_count, unordered_map& target_links_count, unordered_map, int, PairHash>& links_count, -- cgit v1.2.3