summaryrefslogtreecommitdiff
path: root/src/freqdict.cc
blob: 9e25d34606e715a2bf433d871bed4a773be55227 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#include <iostream>
#include <fstream>
#include <cassert>
#include "freqdict.h"
#include "tdict.h"
#include "filelib.h"

using namespace std;

void FreqDict::Load(const std::string& fname) {
  cerr << "Reading word frequencies: " << fname << endl;
  ReadFile rf(fname);
  istream& ifs = *rf.stream();
  int cc=0;
  while (ifs) {
    std::string word;
    ifs >> word;
    if (word.size() == 0) continue;
    if (word[0] == '#') continue;
    double count = 0;
    ifs >> count;
    assert(count > 0.0);  // use -log(f)
    counts_[TD::Convert(word)]=count;
    ++cc;
    if (cc % 10000 == 0) { std::cerr << "."; }
  }
  std::cerr << "\n";
  std::cerr << "Loaded " << cc << " words\n";
}