diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | 6d347f1ce078dede3da0e1498f75e357351c6543 (patch) | |
tree | 8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/translation_table.h | |
parent | d11b76def6899790161c47a73018146311356d8b (diff) | |
parent | 5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/translation_table.h')
-rw-r--r-- | extractor/translation_table.h | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/extractor/translation_table.h b/extractor/translation_table.h new file mode 100644 index 00000000..10504d3b --- /dev/null +++ b/extractor/translation_table.h @@ -0,0 +1,63 @@ +#ifndef _TRANSLATION_TABLE_ +#define _TRANSLATION_TABLE_ + +#include <memory> +#include <string> +#include <unordered_map> + +#include <boost/filesystem.hpp> +#include <boost/functional/hash.hpp> + +using namespace std; +namespace fs = boost::filesystem; + +namespace extractor { + +typedef boost::hash<pair<int, int> > PairHash; + +class Alignment; +class DataArray; + +/** + * Bilexical table with conditional probabilities. + */ +class TranslationTable { + public: + TranslationTable( + shared_ptr<DataArray> source_data_array, + shared_ptr<DataArray> target_data_array, + shared_ptr<Alignment> alignment); + + virtual ~TranslationTable(); + + // Returns p(e | f). + virtual double GetTargetGivenSourceScore(const string& source_word, + const string& target_word); + + // Returns p(f | e). + virtual double GetSourceGivenTargetScore(const string& source_word, + const string& target_word); + + void WriteBinary(const fs::path& filepath) const; + + protected: + TranslationTable(); + + private: + // Increment links count for the given (f, e) word pair. + void IncrementLinksCount( + unordered_map<int, int>& source_links_count, + unordered_map<int, int>& target_links_count, + unordered_map<pair<int, int>, int, PairHash>& links_count, + int source_word_id, + int target_word_id) const; + + shared_ptr<DataArray> source_data_array; + shared_ptr<DataArray> target_data_array; + unordered_map<pair<int, int>, pair<double, double>, PairHash> + translation_probabilities; +}; + +} // namespace extractor + +#endif |