summaryrefslogtreecommitdiff
path: root/extractor/translation_table.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-05-02 09:09:59 +0200
committerPatrick Simianer <p@simianer.de>2013-05-02 09:09:59 +0200
commit0ce66778da6079506896739e9d97dc7dff83cd72 (patch)
treef435457bb23dab0c566c9896f9d38cece9d15885 /extractor/translation_table.h
parentb6754386f1109b960b05cdf2eabbc97bdd38e8df (diff)
parentb7ea2615bc9bb69031ff714ddce1539c9f1bda2d (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/translation_table.h')
-rw-r--r--extractor/translation_table.h63
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/translation_table.h b/extractor/translation_table.h
new file mode 100644
index 00000000..10504d3b
--- /dev/null
+++ b/extractor/translation_table.h
@@ -0,0 +1,63 @@
+#ifndef _TRANSLATION_TABLE_
+#define _TRANSLATION_TABLE_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include <boost/filesystem.hpp>
+#include <boost/functional/hash.hpp>
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace extractor {
+
+typedef boost::hash<pair<int, int> > PairHash;
+
+class Alignment;
+class DataArray;
+
+/**
+ * Bilexical table with conditional probabilities.
+ */
+class TranslationTable {
+ public:
+ TranslationTable(
+ shared_ptr<DataArray> source_data_array,
+ shared_ptr<DataArray> target_data_array,
+ shared_ptr<Alignment> alignment);
+
+ virtual ~TranslationTable();
+
+ // Returns p(e | f).
+ virtual double GetTargetGivenSourceScore(const string& source_word,
+ const string& target_word);
+
+ // Returns p(f | e).
+ virtual double GetSourceGivenTargetScore(const string& source_word,
+ const string& target_word);
+
+ void WriteBinary(const fs::path& filepath) const;
+
+ protected:
+ TranslationTable();
+
+ private:
+ // Increment links count for the given (f, e) word pair.
+ void IncrementLinksCount(
+ unordered_map<int, int>& source_links_count,
+ unordered_map<int, int>& target_links_count,
+ unordered_map<pair<int, int>, int, PairHash>& links_count,
+ int source_word_id,
+ int target_word_id) const;
+
+ shared_ptr<DataArray> source_data_array;
+ shared_ptr<DataArray> target_data_array;
+ unordered_map<pair<int, int>, pair<double, double>, PairHash>
+ translation_probabilities;
+};
+
+} // namespace extractor
+
+#endif