summaryrefslogtreecommitdiff
path: root/utils/fdict.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-13 13:25:46 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-13 13:25:46 +0100
commit38a5bee71f6b49515cd105a9467ff602ff9dee64 (patch)
tree9e17da6cf072beb0150d82fee1d8e4756c8f0253 /utils/fdict.h
parentb09ca8a5e6f5e8c1840e51a93c9f8e6b8c4bcc33 (diff)
optional support for doing perfect hashing of feature strings to save lots of memory
Diffstat (limited to 'utils/fdict.h')
-rw-r--r--utils/fdict.h36
1 files changed, 36 insertions, 0 deletions
diff --git a/utils/fdict.h b/utils/fdict.h
index f9673023..771e8b91 100644
--- a/utils/fdict.h
+++ b/utils/fdict.h
@@ -1,23 +1,56 @@
#ifndef _FDICT_H_
#define _FDICT_H_
+#include "config.h"
+
+#include <iostream>
#include <string>
#include <vector>
#include "dict.h"
+#ifdef HAVE_CMPH
+#include "perfect_hash.h"
+#include "string_to.h"
+#endif
+
struct FD {
// once the FD is frozen, new features not already in the
// dictionary will return 0
static void Freeze() {
frozen_ = true;
}
+ static bool UsingPerfectHashFunction() {
+#ifdef HAVE_CMPH
+ return hash_;
+#else
+ return false;
+#endif
+ }
+ static void EnableHash(const std::string& cmph_file) {
+#ifdef HAVE_CMPH
+ hash_ = new PerfectHashFunction(cmph_file);
+#endif
+ }
static inline int NumFeats() {
+#ifdef HAVE_CMPH
+ if (hash_) return hash_->number_of_keys();
+#endif
return dict_.max() + 1;
}
static inline WordID Convert(const std::string& s) {
+#ifdef HAVE_CMPH
+ if (hash_) return (*hash_)(s);
+#endif
return dict_.Convert(s, frozen_);
}
static inline const std::string& Convert(const WordID& w) {
+#ifdef HAVE_CMPH
+ if (hash_) {
+ static std::string tls;
+ tls = to_string(w);
+ return tls;
+ }
+#endif
return dict_.Convert(w);
}
static std::string Convert(WordID const *i,WordID const* e);
@@ -29,6 +62,9 @@ struct FD {
static Dict dict_;
private:
static bool frozen_;
+#ifdef HAVE_CMPH
+ static PerfectHashFunction* hash_;
+#endif
};
#endif