From 671c21451542e2dd20e45b4033d44d8e8735f87b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 3 Dec 2009 16:33:55 -0500 Subject: initial check in --- vest/union_forests.cc | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 vest/union_forests.cc (limited to 'vest/union_forests.cc') diff --git a/vest/union_forests.cc b/vest/union_forests.cc new file mode 100644 index 00000000..207ecb5c --- /dev/null +++ b/vest/union_forests.cc @@ -0,0 +1,73 @@ +#include +#include +#include + +#include +#include + +#include "hg.h" +#include "hg_io.h" +#include "filelib.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("dev_set_size,s",po::value(),"[REQD] Development set size (# of parallel sentences)") + ("forest_repository,r",po::value(),"[REQD] Path to forest repository") + ("new_forest_repository,n",po::value(),"[REQD] Path to new forest repository") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (conf->count("dev_set_size") == 0) { + cerr << "Please specify the size of the development set using -d N\n"; + flag = true; + } + if (conf->count("new_forest_repository") == 0) { + cerr << "Please specify the starting-point weights using -n PATH\n"; + flag = true; + } + if (conf->count("forest_repository") == 0) { + cerr << "Please specify the forest repository location using -r PATH\n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const int size = conf["dev_set_size"].as(); + const string repo = conf["forest_repository"].as(); + const string new_repo = conf["new_forest_repository"].as(); + for (int i = 0; i < size; ++i) { + ostringstream sfin, sfout; + sfin << new_repo << '/' << i << ".json.gz"; + sfout << repo << '/' << i << ".json.gz"; + const string fin = sfin.str(); + const string fout = sfout.str(); + Hypergraph existing_hg; + cerr << "Processing " << fin << endl; + assert(FileExists(fin)); + if (FileExists(fout)) { + ReadFile rf(fout); + assert(HypergraphIO::ReadFromJSON(rf.stream(), &existing_hg)); + } + Hypergraph new_hg; + if (true) { + ReadFile rf(fin); + assert(HypergraphIO::ReadFromJSON(rf.stream(), &new_hg)); + } + existing_hg.Union(new_hg); + WriteFile wf(fout); + assert(HypergraphIO::WriteToJSON(existing_hg, false, wf.stream())); + } + return 0; +} -- cgit v1.2.3