summaryrefslogtreecommitdiff
path: root/decoder/freqdict.h
blob: 4e03fadd00c8b4237bc0411c05d41f85012f2283 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#ifndef _FREQDICT_H_
#define _FREQDICT_H_

#include <iostream>
#include <map>
#include <string>
#include "wordid.h"
#include "filelib.h"
#include "tdict.h"

template <typename T = float>
class FreqDict {
 public:
  FreqDict() : max_() {}
  T Max() const { return max_; }
  void Load(const std::string& fname) {
    std::cerr << "Reading word statistics from: " << fname << std::endl;
    ReadFile rf(fname);
    std::istream& ifs = *rf.stream();
    int cc=0;
    std::string word;
    while (ifs) {
      ifs >> word;
      if (word.size() == 0) continue;
      if (word[0] == '#') continue;
      T count = 0;
      ifs >> count;
      if (count > max_) max_ = count;
      counts_[TD::Convert(word)]=count;
      ++cc;
      if (cc % 10000 == 0) { std::cerr << "."; }
    }
    std::cerr << "\n";
    std::cerr << "Loaded " << cc << " words\n";
  }

  T LookUp(const WordID& word) const {
    typename std::map<WordID,T>::const_iterator i = counts_.find(word);
    if (i == counts_.end()) return T();
    return i->second;
  }
 private:
  T max_;
  std::map<WordID, T> counts_;
};

#endif