Script for grammar extraction only.

author: Paul Baltescu <pauldb89@gmail.com> 2013-11-26 15:01:14 +0000
committer: Paul Baltescu <pauldb89@gmail.com> 2013-11-26 15:01:14 +0000
commit: a3826db61847a55f59bb9666f61fd1bb88888085 (patch)
tree: 022475bafbf71ba6aaeb98efdbafcde24f7e60a5 /extractor/run_extractor.cc
parent: 1cd86c44e1799c441cdcda2a022be0ee6e52d38c (diff)
1 files changed, 8 insertions, 12 deletions
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 6b22a302..f1aa5e35 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -5,10 +5,10 @@
 #include <string>
 #include <vector>
 
-#include <omp.h>
 #include <boost/filesystem.hpp>
 #include <boost/program_options.hpp>
 #include <boost/program_options/variables_map.hpp>
+#include <omp.h>
 
 #include "alignment.h"
 #include "data_array.h"
@@ -78,7 +78,8 @@ int main(int argc, char** argv) {
     ("tight_phrases", po::value<bool>()->default_value(true),
         "False if phrases may be loose (better, but slower)")
     ("leave_one_out", po::value<bool>()->zero_tokens(),
-        "do leave-one-out estimation of grammars (e.g. for extracting grammars for the training set");
+        "do leave-one-out estimation of grammars "
+        "(e.g. for extracting grammars for the training set");
 
   po::variables_map vm;
   po::store(po::parse_command_line(argc, argv, desc), vm);
@@ -99,11 +100,6 @@ int main(int argc, char** argv) {
     return 1;
   }
 
-  bool leave_one_out = false;
-  if (vm.count("leave_one_out")) {
-    leave_one_out = true;
-  }
-
   int num_threads = vm["threads"].as<int>();
   cerr << "Grammar extraction will use " << num_threads << " threads." << endl;
 
@@ -178,8 +174,8 @@ int main(int argc, char** argv) {
        << GetDuration(preprocess_start_time, preprocess_stop_time)
        << " seconds" << endl;
 
-  // Features used to score each grammar rule.
   Clock::time_point extraction_start_time = Clock::now();
+  // Features used to score each grammar rule.
   vector<shared_ptr<Feature>> features = {
       make_shared<TargetGivenSourceCoherent>(),
       make_shared<SampleSourceCount>(),
@@ -206,9 +202,6 @@ int main(int argc, char** argv) {
       vm["max_samples"].as<int>(),
       vm["tight_phrases"].as<bool>());
 
-  // Releases extra memory used by the initial precomputation.
-  precomputation.reset();
-
   // Creates the grammars directory if it doesn't exist.
   fs::path grammar_path = vm["grammars"].as<string>();
   if (!fs::is_directory(grammar_path)) {
@@ -224,6 +217,7 @@ int main(int argc, char** argv) {
   }
 
   // Extracts the grammar for each sentence and saves it to a file.
+  bool leave_one_out = vm.count("leave_one_out");
   vector<string> suffixes(sentences.size());
   #pragma omp parallel for schedule(dynamic) num_threads(num_threads)
   for (size_t i = 0; i < sentences.size(); ++i) {
@@ -236,7 +230,9 @@ int main(int argc, char** argv) {
     suffixes[i] = suffix;
 
     unordered_set<int> blacklisted_sentence_ids;
-    if (leave_one_out) blacklisted_sentence_ids.insert(i);
+    if (leave_one_out) {
+      blacklisted_sentence_ids.insert(i);
+    }
     Grammar grammar = extractor.GetGrammar(
         sentences[i], blacklisted_sentence_ids);
     ofstream output(GetGrammarFilePath(grammar_path, i).c_str());
author	Paul Baltescu <pauldb89@gmail.com>	2013-11-26 15:01:14 +0000
committer	Paul Baltescu <pauldb89@gmail.com>	2013-11-26 15:01:14 +0000
commit	a3826db61847a55f59bb9666f61fd1bb88888085 (patch)
tree	022475bafbf71ba6aaeb98efdbafcde24f7e60a5 /extractor/run_extractor.cc
parent	1cd86c44e1799c441cdcda2a022be0ee6e52d38c (diff)