summaryrefslogtreecommitdiff
path: root/utils/phmt.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-13 13:25:46 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-13 13:25:46 +0100
commit38a5bee71f6b49515cd105a9467ff602ff9dee64 (patch)
tree9e17da6cf072beb0150d82fee1d8e4756c8f0253 /utils/phmt.cc
parentb09ca8a5e6f5e8c1840e51a93c9f8e6b8c4bcc33 (diff)
optional support for doing perfect hashing of feature strings to save lots of memory
Diffstat (limited to 'utils/phmt.cc')
-rw-r--r--utils/phmt.cc44
1 files changed, 44 insertions, 0 deletions
diff --git a/utils/phmt.cc b/utils/phmt.cc
new file mode 100644
index 00000000..1f59afaf
--- /dev/null
+++ b/utils/phmt.cc
@@ -0,0 +1,44 @@
+#include "config.h"
+
+#ifndef HAVE_CMPH
+int main() {
+ return 0;
+}
+#else
+
+#include <iostream>
+#include "weights.h"
+#include "fdict.h"
+
+using namespace std;
+
+int main(int argc, char** argv) {
+ if (argc != 2) { cerr << "Usage: " << argv[0] << " file.mphf\n"; return 1; }
+ FD::EnableHash(argv[1]);
+ cerr << "Number of keys: " << FD::NumFeats() << endl;
+ cerr << "LexFE = " << FD::Convert("LexFE") << endl;
+ cerr << "LexEF = " << FD::Convert("LexEF") << endl;
+ {
+ Weights w;
+ vector<weight_t> v(FD::NumFeats());
+ v[FD::Convert("LexFE")] = 1.0;
+ v[FD::Convert("LexEF")] = 0.5;
+ w.InitFromVector(v);
+ cerr << "Writing...\n";
+ w.WriteToFile("weights.bin");
+ cerr << "Done.\n";
+ }
+ {
+ Weights w;
+ vector<weight_t> v(FD::NumFeats());
+ cerr << "Reading...\n";
+ w.InitFromFile("weights.bin");
+ cerr << "Done.\n";
+ w.InitVector(&v);
+ assert(v[FD::Convert("LexFE")] == 1.0);
+ assert(v[FD::Convert("LexEF")] == 0.5);
+ }
+}
+
+#endif
+