summaryrefslogtreecommitdiff
path: root/decoder/freqdict.cc
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
commit0172721855098ca02b207231a654dffa5e4eb1c9 (patch)
tree8069c3a62e2d72bd64a2cdeee9724b2679c8a56b /decoder/freqdict.cc
parent37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff)
initial checkin
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/freqdict.cc')
-rw-r--r--decoder/freqdict.cc29
1 files changed, 29 insertions, 0 deletions
diff --git a/decoder/freqdict.cc b/decoder/freqdict.cc
new file mode 100644
index 00000000..9e25d346
--- /dev/null
+++ b/decoder/freqdict.cc
@@ -0,0 +1,29 @@
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include "freqdict.h"
+#include "tdict.h"
+#include "filelib.h"
+
+using namespace std;
+
+void FreqDict::Load(const std::string& fname) {
+ cerr << "Reading word frequencies: " << fname << endl;
+ ReadFile rf(fname);
+ istream& ifs = *rf.stream();
+ int cc=0;
+ while (ifs) {
+ std::string word;
+ ifs >> word;
+ if (word.size() == 0) continue;
+ if (word[0] == '#') continue;
+ double count = 0;
+ ifs >> count;
+ assert(count > 0.0); // use -log(f)
+ counts_[TD::Convert(word)]=count;
+ ++cc;
+ if (cc % 10000 == 0) { std::cerr << "."; }
+ }
+ std::cerr << "\n";
+ std::cerr << "Loaded " << cc << " words\n";
+}