/* * Build lexical translation table from alignment file to use for lexical translation probabilties when scoring a grammar * * Ported largely from the train-factored-phrase-model.perl script by Philipp Koehn */ #include #include #include #include #include #include #include #include #include "sentence_pair.h" #include "extract.h" #include "fdict.h" #include "tdict.h" #include #include #include using namespace std; using namespace std::tr1; static const size_t MAX_LINE_LENGTH = 64000000; int main(int argc, char* argv[]){ bool DEBUG = false; map > word_translation; map total_foreign; map total_english; AnnotatedParallelSentence sent; char* buf = new char[MAX_LINE_LENGTH]; while(cin) { cin.getline(buf, MAX_LINE_LENGTH); if (buf[0] == 0) continue; sent.ParseInputLine(buf); map foreign_aligned; map english_aligned; //iterate over the alignment to compute aligned words for(int i =0;i& nullcounts = word_translation[NULL_]; for (int j =0; j < sent.e_len; j++) { if (english_aligned.count(sent.e[j])) continue; ++nullcounts[sent.e[j]]; ++total_foreign[NULL_]; ++total_english[sent.e[j]]; } for (int i =0; i < sent.f_len; i++) { if (foreign_aligned.count(sent.f[i])) continue; ++word_translation[sent.f[i]][NULL_]; ++total_english[NULL_]; ++total_foreign[sent.f[i]]; } } for(map < WordID, map >::iterator it = word_translation.begin(); it != word_translation.end(); ++it) { const map& trans = it->second; for (map::const_iterator iit = trans.begin(); iit != trans.end(); ++iit) { cout << TD::Convert(it->first) << "," << TD::Convert(iit->first) << "=" << iit->second << "/" << total_foreign[it->first] << endl; } } return 0; }