blob: 9e25d34606e715a2bf433d871bed4a773be55227 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#include <iostream>
#include <fstream>
#include <cassert>
#include "freqdict.h"
#include "tdict.h"
#include "filelib.h"
using namespace std;
void FreqDict::Load(const std::string& fname) {
cerr << "Reading word frequencies: " << fname << endl;
ReadFile rf(fname);
istream& ifs = *rf.stream();
int cc=0;
while (ifs) {
std::string word;
ifs >> word;
if (word.size() == 0) continue;
if (word[0] == '#') continue;
double count = 0;
ifs >> count;
assert(count > 0.0); // use -log(f)
counts_[TD::Convert(word)]=count;
++cc;
if (cc % 10000 == 0) { std::cerr << "."; }
}
std::cerr << "\n";
std::cerr << "Loaded " << cc << " words\n";
}
|