summaryrefslogtreecommitdiff
path: root/extractor/run_extractor.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2013-11-13 11:22:24 -0800
committerChris Dyer <redpony@gmail.com>2013-11-13 11:22:24 -0800
commit9be8d89b5a4065a81b26d8af1f3443d152e7922a (patch)
tree237090ff519a0419c3ba379ec3a6884f05caa6c2 /extractor/run_extractor.cc
parent8a24bb77bc2e9fd17a6f6529a2942cde96a6af49 (diff)
parent4a9449a564e626fe004200b730bfaa44d6152e0f (diff)
Merge pull request #27 from pks/master
Tidying (soft) syntax features; loo for C++ extractor; updates for dtrain
Diffstat (limited to 'extractor/run_extractor.cc')
-rw-r--r--extractor/run_extractor.cc13
1 files changed, 11 insertions, 2 deletions
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 8a9ca89d..6eb55073 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -75,7 +75,9 @@ int main(int argc, char** argv) {
("max_samples", po::value<int>()->default_value(300),
"Maximum number of samples")
("tight_phrases", po::value<bool>()->default_value(true),
- "False if phrases may be loose (better, but slower)");
+ "False if phrases may be loose (better, but slower)")
+ ("leave_one_out", po::value<bool>()->zero_tokens(),
+ "do leave-one-out estimation of grammars (e.g. for extracting grammars for the training set");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
@@ -96,6 +98,11 @@ int main(int argc, char** argv) {
return 1;
}
+ bool leave_one_out = false;
+ if (vm.count("leave_one_out")) {
+ leave_one_out = true;
+ }
+
int num_threads = vm["threads"].as<int>();
cerr << "Grammar extraction will use " << num_threads << " threads." << endl;
@@ -223,7 +230,9 @@ int main(int argc, char** argv) {
}
suffixes[i] = suffix;
- Grammar grammar = extractor.GetGrammar(sentences[i]);
+ unordered_set<int> blacklisted_sentence_ids;
+ if (leave_one_out) blacklisted_sentence_ids.insert(i);
+ Grammar grammar = extractor.GetGrammar(sentences[i], blacklisted_sentence_ids, source_data_array);
ofstream output(GetGrammarFilePath(grammar_path, i).c_str());
output << grammar;
}