summaryrefslogtreecommitdiff
path: root/extools/build_lexical_translation.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extools/build_lexical_translation.cc')
-rw-r--r--extools/build_lexical_translation.cc104
1 files changed, 0 insertions, 104 deletions
diff --git a/extools/build_lexical_translation.cc b/extools/build_lexical_translation.cc
deleted file mode 100644
index f609f56a..00000000
--- a/extools/build_lexical_translation.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Build lexical translation table from alignment file to use for lexical translation probabilties when scoring a grammar
- *
- * Ported largely from the train-factored-phrase-model.perl script by Philipp Koehn
- */
-#include <iostream>
-#include <string>
-#include <map>
-#include <vector>
-#include <utility>
-#include <cstdlib>
-#include <fstream>
-#include <tr1/unordered_map>
-
-#include "sentence_pair.h"
-#include "extract.h"
-#include "fdict.h"
-#include "tdict.h"
-
-#include <boost/functional/hash.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-using namespace std;
-using namespace std::tr1;
-
-static const size_t MAX_LINE_LENGTH = 64000000;
-
-int main(int argc, char* argv[]){
-
- bool DEBUG = false;
-
- map <WordID, map<WordID, int> > word_translation;
- map <WordID, int> total_foreign;
- map <WordID, int> total_english;
-
- AnnotatedParallelSentence sent;
- char* buf = new char[MAX_LINE_LENGTH];
- while(cin)
- {
- cin.getline(buf, MAX_LINE_LENGTH);
- if (buf[0] == 0) continue;
-
- sent.ParseInputLine(buf);
-
- map <WordID, int> foreign_aligned;
- map <WordID, int> english_aligned;
-
- //iterate over the alignment to compute aligned words
-
- for(int i =0;i<sent.aligned.width();i++)
- {
- for (int j=0;j<sent.aligned.height();j++)
- {
- if (DEBUG) cout << sent.aligned(i,j) << " ";
- if( sent.aligned(i,j))
- {
- if (DEBUG) cout << TD::Convert(sent.f[i]) << " aligned to " << TD::Convert(sent.e[j]);
- //local counts
- ++foreign_aligned[sent.f[i]];
- ++english_aligned[sent.e[j]];
-
- //global counts
- ++word_translation[sent.f[i]][sent.e[j]];
- ++total_foreign[sent.f[i]];
- ++total_english[sent.e[j]];
- }
- }
- if (DEBUG) cout << endl;
- }
- if (DEBUG) cout << endl;
-
- static const WordID NULL_ = TD::Convert("NULL");
- //handle unaligned words - align them to null
- map<WordID, int>& nullcounts = word_translation[NULL_];
- for (int j =0; j < sent.e_len; j++)
- {
- if (english_aligned.count(sent.e[j])) continue;
- ++nullcounts[sent.e[j]];
- ++total_foreign[NULL_];
- ++total_english[sent.e[j]];
- }
-
- for (int i =0; i < sent.f_len; i++)
- {
- if (foreign_aligned.count(sent.f[i])) continue;
- ++word_translation[sent.f[i]][NULL_];
- ++total_english[NULL_];
- ++total_foreign[sent.f[i]];
- }
-
- }
-
- for(map < WordID, map<WordID,int> >::iterator it = word_translation.begin(); it != word_translation.end(); ++it)
- {
- const map<WordID, int>& trans = it->second;
- for (map<WordID,int>::const_iterator iit = trans.begin(); iit != trans.end(); ++iit) {
- cout << TD::Convert(it->first) << "," << TD::Convert(iit->first) << "=" << iit->second << "/" << total_foreign[it->first] << endl;
- }
- }
-
-
- return 0;
-}