summaryrefslogtreecommitdiff
path: root/src/freqdict.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-06 22:25:25 -0500
committerChris Dyer <redpony@gmail.com>2009-12-06 22:25:25 -0500
commit2a18010e255810cc2b5bcbe688f3db8eabda23ca (patch)
treee310286257e5445072303dcca03acb85a865c26a /src/freqdict.cc
parent59ea352f3dcf3bf58969f404615fed4ff6b931f7 (diff)
add compound splitting logic and features (Dyer 2008, NAACL)
Diffstat (limited to 'src/freqdict.cc')
-rw-r--r--src/freqdict.cc14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/freqdict.cc b/src/freqdict.cc
index 4cfffe58..9e25d346 100644
--- a/src/freqdict.cc
+++ b/src/freqdict.cc
@@ -2,11 +2,17 @@
#include <fstream>
#include <cassert>
#include "freqdict.h"
+#include "tdict.h"
+#include "filelib.h"
-void FreqDict::load(const std::string& fname) {
- std::ifstream ifs(fname.c_str());
+using namespace std;
+
+void FreqDict::Load(const std::string& fname) {
+ cerr << "Reading word frequencies: " << fname << endl;
+ ReadFile rf(fname);
+ istream& ifs = *rf.stream();
int cc=0;
- while (!ifs.eof()) {
+ while (ifs) {
std::string word;
ifs >> word;
if (word.size() == 0) continue;
@@ -14,7 +20,7 @@ void FreqDict::load(const std::string& fname) {
double count = 0;
ifs >> count;
assert(count > 0.0); // use -log(f)
- counts_[word]=count;
+ counts_[TD::Convert(word)]=count;
++cc;
if (cc % 10000 == 0) { std::cerr << "."; }
}