diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-06 22:25:25 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-06 22:25:25 -0500 |
commit | 2a18010e255810cc2b5bcbe688f3db8eabda23ca (patch) | |
tree | e310286257e5445072303dcca03acb85a865c26a /src/freqdict.cc | |
parent | 59ea352f3dcf3bf58969f404615fed4ff6b931f7 (diff) |
add compound splitting logic and features (Dyer 2008, NAACL)
Diffstat (limited to 'src/freqdict.cc')
-rw-r--r-- | src/freqdict.cc | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/src/freqdict.cc b/src/freqdict.cc index 4cfffe58..9e25d346 100644 --- a/src/freqdict.cc +++ b/src/freqdict.cc @@ -2,11 +2,17 @@ #include <fstream> #include <cassert> #include "freqdict.h" +#include "tdict.h" +#include "filelib.h" -void FreqDict::load(const std::string& fname) { - std::ifstream ifs(fname.c_str()); +using namespace std; + +void FreqDict::Load(const std::string& fname) { + cerr << "Reading word frequencies: " << fname << endl; + ReadFile rf(fname); + istream& ifs = *rf.stream(); int cc=0; - while (!ifs.eof()) { + while (ifs) { std::string word; ifs >> word; if (word.size() == 0) continue; @@ -14,7 +20,7 @@ void FreqDict::load(const std::string& fname) { double count = 0; ifs >> count; assert(count > 0.0); // use -log(f) - counts_[word]=count; + counts_[TD::Convert(word)]=count; ++cc; if (cc % 10000 == 0) { std::cerr << "."; } } |