summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-09-13 20:16:17 +0100
committerChris Dyer <cdyer@cs.cmu.edu>2011-09-13 20:16:17 +0100
commita882bb5879638329d7c1cd5486f47bb3cee0d198 (patch)
tree39894d32445255ae0532b6953e3d5093f60645f6 /utils
parent722c38fbc448aa7ffd80f0af99972b6ec6f2e9d9 (diff)
tool to reconstruct text weights from a hash function, key file, and (binary) weights file
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am5
-rw-r--r--utils/reconstruct_weights.cc68
2 files changed, 73 insertions, 0 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index c50747bf..df667655 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,3 +1,6 @@
+
+bin_PROGRAMS = reconstruct_weights
+
noinst_PROGRAMS = ts phmt
TESTS = ts phmt
@@ -11,6 +14,8 @@ noinst_PROGRAMS += \
TESTS += small_vector_test logval_test weights_test dict_test
endif
+reconstruct_weights_SOURCES = reconstruct_weights.cc
+
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = \
diff --git a/utils/reconstruct_weights.cc b/utils/reconstruct_weights.cc
new file mode 100644
index 00000000..d32e4f67
--- /dev/null
+++ b/utils/reconstruct_weights.cc
@@ -0,0 +1,68 @@
+#include <iostream>
+#include <vector>
+#include <cassert>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "fdict.h"
+#include "weights.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+ po::options_description opts("Configuration options");
+ opts.add_options()
+ ("weights,w",po::value<string>(),"Input feature weights file")
+ ("keys,k",po::value<string>(),"Keys file (list of features with dummy value at start)")
+ ("cmph_perfect_hash_file,h",po::value<string>(),"cmph perfect hash function file");
+ po::options_description clo("Command line options");
+ clo.add_options()
+ ("config", po::value<string>(), "Configuration file")
+ ("help,?", "Print this help message and exit");
+ po::options_description dconfig_options, dcmdline_options;
+ dconfig_options.add(opts);
+ dcmdline_options.add(opts).add(clo);
+
+ po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+ if (conf->count("config")) {
+ ifstream config((*conf)["config"].as<string>().c_str());
+ po::store(po::parse_config_file(config, dconfig_options), *conf);
+ }
+ po::notify(*conf);
+
+ if (conf->count("help") || !conf->count("cmph_perfect_hash_file") || !conf->count("weights") || !conf->count("keys")) {
+ cerr << "Generate a text format weights file. Options -w -k and -h are required.\n";
+ cerr << dcmdline_options << endl;
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char** argv) {
+ po::variables_map conf;
+ if (!InitCommandLine(argc, argv, &conf))
+ return false;
+
+ FD::EnableHash(conf["cmph_perfect_hash_file"].as<string>());
+
+ // load weights
+ vector<weight_t> weights;
+ Weights::InitFromFile(conf["weights"].as<string>(), &weights);
+
+ ReadFile rf(conf["keys"].as<string>());
+ istream& in = *rf.stream();
+ string key;
+ size_t lc = 0;
+ while(getline(in, key)) {
+ ++lc;
+ if (lc == 1) continue;
+ assert(lc <= weights.size());
+ cout << key << " " << weights[lc - 1] << endl;
+ }
+
+ return 0;
+}
+