diff options
author | Patrick Simianer <p@simianer.de> | 2013-05-02 09:09:59 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-05-02 09:09:59 +0200 |
commit | 0ce66778da6079506896739e9d97dc7dff83cd72 (patch) | |
tree | f435457bb23dab0c566c9896f9d38cece9d15885 /extractor/translation_table.h | |
parent | b6754386f1109b960b05cdf2eabbc97bdd38e8df (diff) | |
parent | b7ea2615bc9bb69031ff714ddce1539c9f1bda2d (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/translation_table.h')
-rw-r--r-- | extractor/translation_table.h | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/translation_table.h b/extractor/translation_table.h new file mode 100644 index 00000000..10504d3b --- /dev/null +++ b/extractor/translation_table.h @@ -0,0 +1,63 @@ +#ifndef _TRANSLATION_TABLE_ +#define _TRANSLATION_TABLE_ + +#include <memory> +#include <string> +#include <unordered_map> + +#include <boost/filesystem.hpp> +#include <boost/functional/hash.hpp> + +using namespace std; +namespace fs = boost::filesystem; + +namespace extractor { + +typedef boost::hash<pair<int, int> > PairHash; + +class Alignment; +class DataArray; + +/** + * Bilexical table with conditional probabilities. + */ +class TranslationTable { + public: + TranslationTable( + shared_ptr<DataArray> source_data_array, + shared_ptr<DataArray> target_data_array, + shared_ptr<Alignment> alignment); + + virtual ~TranslationTable(); + + // Returns p(e | f). + virtual double GetTargetGivenSourceScore(const string& source_word, + const string& target_word); + + // Returns p(f | e). + virtual double GetSourceGivenTargetScore(const string& source_word, + const string& target_word); + + void WriteBinary(const fs::path& filepath) const; + + protected: + TranslationTable(); + + private: + // Increment links count for the given (f, e) word pair. + void IncrementLinksCount( + unordered_map<int, int>& source_links_count, + unordered_map<int, int>& target_links_count, + unordered_map<pair<int, int>, int, PairHash>& links_count, + int source_word_id, + int target_word_id) const; + + shared_ptr<DataArray> source_data_array; + shared_ptr<DataArray> target_data_array; + unordered_map<pair<int, int>, pair<double, double>, PairHash> + translation_probabilities; +}; + +} // namespace extractor + +#endif |