diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-26 15:01:14 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-26 15:01:14 +0000 |
commit | bed3e4b867e4132917fa0640956e8ce713f0e451 (patch) | |
tree | a442aa2233c85313aebcf364ec0a0804922d2db7 /extractor/run_extractor.cc | |
parent | e633526bc2ba1f73e88989f495d70c0d2ec84a97 (diff) |
Script for grammar extraction only.
Diffstat (limited to 'extractor/run_extractor.cc')
-rw-r--r-- | extractor/run_extractor.cc | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index 6b22a302..f1aa5e35 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -5,10 +5,10 @@ #include <string> #include <vector> -#include <omp.h> #include <boost/filesystem.hpp> #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> +#include <omp.h> #include "alignment.h" #include "data_array.h" @@ -78,7 +78,8 @@ int main(int argc, char** argv) { ("tight_phrases", po::value<bool>()->default_value(true), "False if phrases may be loose (better, but slower)") ("leave_one_out", po::value<bool>()->zero_tokens(), - "do leave-one-out estimation of grammars (e.g. for extracting grammars for the training set"); + "do leave-one-out estimation of grammars " + "(e.g. for extracting grammars for the training set"); po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); @@ -99,11 +100,6 @@ int main(int argc, char** argv) { return 1; } - bool leave_one_out = false; - if (vm.count("leave_one_out")) { - leave_one_out = true; - } - int num_threads = vm["threads"].as<int>(); cerr << "Grammar extraction will use " << num_threads << " threads." << endl; @@ -178,8 +174,8 @@ int main(int argc, char** argv) { << GetDuration(preprocess_start_time, preprocess_stop_time) << " seconds" << endl; - // Features used to score each grammar rule. Clock::time_point extraction_start_time = Clock::now(); + // Features used to score each grammar rule. vector<shared_ptr<Feature>> features = { make_shared<TargetGivenSourceCoherent>(), make_shared<SampleSourceCount>(), @@ -206,9 +202,6 @@ int main(int argc, char** argv) { vm["max_samples"].as<int>(), vm["tight_phrases"].as<bool>()); - // Releases extra memory used by the initial precomputation. - precomputation.reset(); - // Creates the grammars directory if it doesn't exist. fs::path grammar_path = vm["grammars"].as<string>(); if (!fs::is_directory(grammar_path)) { @@ -224,6 +217,7 @@ int main(int argc, char** argv) { } // Extracts the grammar for each sentence and saves it to a file. + bool leave_one_out = vm.count("leave_one_out"); vector<string> suffixes(sentences.size()); #pragma omp parallel for schedule(dynamic) num_threads(num_threads) for (size_t i = 0; i < sentences.size(); ++i) { @@ -236,7 +230,9 @@ int main(int argc, char** argv) { suffixes[i] = suffix; unordered_set<int> blacklisted_sentence_ids; - if (leave_one_out) blacklisted_sentence_ids.insert(i); + if (leave_one_out) { + blacklisted_sentence_ids.insert(i); + } Grammar grammar = extractor.GetGrammar( sentences[i], blacklisted_sentence_ids); ofstream output(GetGrammarFilePath(grammar_path, i).c_str()); |