summaryrefslogtreecommitdiff
path: root/decoder/freqdict.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
committerPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
commitef6085e558e26c8819f1735425761103021b6470 (patch)
tree5cf70e4c48c64d838e1326b5a505c8c4061bff4a /decoder/freqdict.h
parent10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff)
parentdfbc278c1057555fda9312291c8024049e00b7d8 (diff)
merge with upstream
Diffstat (limited to 'decoder/freqdict.h')
-rw-r--r--decoder/freqdict.h37
1 files changed, 32 insertions, 5 deletions
diff --git a/decoder/freqdict.h b/decoder/freqdict.h
index 9acf0c33..4e03fadd 100644
--- a/decoder/freqdict.h
+++ b/decoder/freqdict.h
@@ -1,20 +1,47 @@
#ifndef _FREQDICT_H_
#define _FREQDICT_H_
+#include <iostream>
#include <map>
#include <string>
#include "wordid.h"
+#include "filelib.h"
+#include "tdict.h"
+template <typename T = float>
class FreqDict {
public:
- void Load(const std::string& fname);
- float LookUp(const WordID& word) const {
- std::map<WordID,float>::const_iterator i = counts_.find(word);
- if (i == counts_.end()) return 0;
+ FreqDict() : max_() {}
+ T Max() const { return max_; }
+ void Load(const std::string& fname) {
+ std::cerr << "Reading word statistics from: " << fname << std::endl;
+ ReadFile rf(fname);
+ std::istream& ifs = *rf.stream();
+ int cc=0;
+ std::string word;
+ while (ifs) {
+ ifs >> word;
+ if (word.size() == 0) continue;
+ if (word[0] == '#') continue;
+ T count = 0;
+ ifs >> count;
+ if (count > max_) max_ = count;
+ counts_[TD::Convert(word)]=count;
+ ++cc;
+ if (cc % 10000 == 0) { std::cerr << "."; }
+ }
+ std::cerr << "\n";
+ std::cerr << "Loaded " << cc << " words\n";
+ }
+
+ T LookUp(const WordID& word) const {
+ typename std::map<WordID,T>::const_iterator i = counts_.find(word);
+ if (i == counts_.end()) return T();
return i->second;
}
private:
- std::map<WordID, float> counts_;
+ T max_;
+ std::map<WordID, T> counts_;
};
#endif