summaryrefslogtreecommitdiff
path: root/extractor/translation_table.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/translation_table.h
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/translation_table.h')
-rw-r--r--extractor/translation_table.h63
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/translation_table.h b/extractor/translation_table.h
new file mode 100644
index 00000000..10504d3b
--- /dev/null
+++ b/extractor/translation_table.h
@@ -0,0 +1,63 @@
+#ifndef _TRANSLATION_TABLE_
+#define _TRANSLATION_TABLE_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include <boost/filesystem.hpp>
+#include <boost/functional/hash.hpp>
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace extractor {
+
+typedef boost::hash<pair<int, int> > PairHash;
+
+class Alignment;
+class DataArray;
+
+/**
+ * Bilexical table with conditional probabilities.
+ */
+class TranslationTable {
+ public:
+ TranslationTable(
+ shared_ptr<DataArray> source_data_array,
+ shared_ptr<DataArray> target_data_array,
+ shared_ptr<Alignment> alignment);
+
+ virtual ~TranslationTable();
+
+ // Returns p(e | f).
+ virtual double GetTargetGivenSourceScore(const string& source_word,
+ const string& target_word);
+
+ // Returns p(f | e).
+ virtual double GetSourceGivenTargetScore(const string& source_word,
+ const string& target_word);
+
+ void WriteBinary(const fs::path& filepath) const;
+
+ protected:
+ TranslationTable();
+
+ private:
+ // Increment links count for the given (f, e) word pair.
+ void IncrementLinksCount(
+ unordered_map<int, int>& source_links_count,
+ unordered_map<int, int>& target_links_count,
+ unordered_map<pair<int, int>, int, PairHash>& links_count,
+ int source_word_id,
+ int target_word_id) const;
+
+ shared_ptr<DataArray> source_data_array;
+ shared_ptr<DataArray> target_data_array;
+ unordered_map<pair<int, int>, pair<double, double>, PairHash>
+ translation_probabilities;
+};
+
+} // namespace extractor
+
+#endif