summaryrefslogtreecommitdiff
path: root/extractor
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-26 16:03:16 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-26 16:03:16 +0000
commit3ece9d5cf8c58edd55da35ac96f1a5869d7d6608 (patch)
tree3f94bd94d3dac4a145e2b913429c6d72ac10c10d /extractor
parenta3826db61847a55f59bb9666f61fd1bb88888085 (diff)
Write config file after compiling data structures.
Diffstat (limited to 'extractor')
-rw-r--r--extractor/compile.cc30
-rw-r--r--extractor/extract.cc4
2 files changed, 25 insertions, 9 deletions
diff --git a/extractor/compile.cc b/extractor/compile.cc
index 9e8044ad..3ee668ce 100644
--- a/extractor/compile.cc
+++ b/extractor/compile.cc
@@ -30,6 +30,8 @@ int main(int argc, char** argv) {
("bitext,b", po::value<string>(), "Parallel text (source ||| target)")
("alignment,a", po::value<string>()->required(), "Bitext word alignment")
("output,o", po::value<string>()->required(), "Output path")
+ ("config,c", po::value<string>()->required(),
+ "Path where the config file will be generated")
("frequent", po::value<int>()->default_value(100),
"Number of precomputed frequent patterns")
("super_frequent", po::value<int>()->default_value(10),
@@ -82,8 +84,12 @@ int main(int argc, char** argv) {
target_data_array = make_shared<DataArray>(vm["target"].as<string>());
}
+ ofstream config_stream(vm["config"].as<string>());
+
Clock::time_point start_write = Clock::now();
- ofstream target_fstream((output_dir / fs::path("target.bin")).string());
+ string target_path = (output_dir / fs::path("target.bin")).string();
+ config_stream << "target = " << target_path << endl;
+ ofstream target_fstream(target_path);
ar::binary_oarchive target_stream(target_fstream);
target_stream << *target_data_array;
Clock::time_point stop_write = Clock::now();
@@ -100,7 +106,9 @@ int main(int argc, char** argv) {
make_shared<SuffixArray>(source_data_array);
start_write = Clock::now();
- ofstream source_fstream((output_dir / fs::path("source.bin")).string());
+ string source_path = (output_dir / fs::path("source.bin")).string();
+ config_stream << "source = " << source_path << endl;
+ ofstream source_fstream(source_path);
ar::binary_oarchive output_stream(source_fstream);
output_stream << *source_suffix_array;
stop_write = Clock::now();
@@ -116,7 +124,9 @@ int main(int argc, char** argv) {
make_shared<Alignment>(vm["alignment"].as<string>());
start_write = Clock::now();
- ofstream alignment_fstream((output_dir / fs::path("alignment.bin")).string());
+ string alignment_path = (output_dir / fs::path("alignment.bin")).string();
+ config_stream << "alignment = " << alignment_path << endl;
+ ofstream alignment_fstream(alignment_path);
ar::binary_oarchive alignment_stream(alignment_fstream);
alignment_stream << *alignment;
stop_write = Clock::now();
@@ -126,7 +136,7 @@ int main(int argc, char** argv) {
cerr << "Reading alignment took "
<< GetDuration(start_time, stop_time) << " seconds" << endl;
- shared_ptr<Vocabulary> vocabulary;
+ shared_ptr<Vocabulary> vocabulary = make_shared<Vocabulary>();
start_time = Clock::now();
cerr << "Precomputing collocations..." << endl;
@@ -142,11 +152,15 @@ int main(int argc, char** argv) {
vm["min_frequency"].as<int>());
start_write = Clock::now();
- ofstream precomp_fstream((output_dir / fs::path("precomp.bin")).string());
+ string precomputation_path = (output_dir / fs::path("precomp.bin")).string();
+ config_stream << "precomputation = " << precomputation_path << endl;
+ ofstream precomp_fstream(precomputation_path);
ar::binary_oarchive precomp_stream(precomp_fstream);
precomp_stream << precomputation;
- ofstream vocab_fstream((output_dir / fs::path("vocab.bin")).string());
+ string vocabulary_path = (output_dir / fs::path("vocab.bin")).string();
+ config_stream << "vocabulary = " << vocabulary_path << endl;
+ ofstream vocab_fstream(vocabulary_path);
ar::binary_oarchive vocab_stream(vocab_fstream);
vocab_stream << *vocabulary;
stop_write = Clock::now();
@@ -161,7 +175,9 @@ int main(int argc, char** argv) {
TranslationTable table(source_data_array, target_data_array, alignment);
start_write = Clock::now();
- ofstream table_fstream((output_dir / fs::path("bilex.bin")).string());
+ string table_path = (output_dir / fs::path("bilex.bin")).string();
+ config_stream << "ttable = " << table_path << endl;
+ ofstream table_fstream(table_path);
ar::binary_oarchive table_stream(table_fstream);
table_stream << table;
stop_write = Clock::now();
diff --git a/extractor/extract.cc b/extractor/extract.cc
index 2d5831fa..387cbe9b 100644
--- a/extractor/extract.cc
+++ b/extractor/extract.cc
@@ -72,7 +72,7 @@ int main(int argc, char** argv) {
po::options_description cmdline_options("Command line options");
cmdline_options.add_options()
("help", "Show available options")
- ("config", po::value<string>()->required(), "Path to config file");
+ ("config,c", po::value<string>()->required(), "Path to config file");
cmdline_options.add(general_options);
po::options_description config_options("Config file options");
@@ -236,7 +236,7 @@ int main(int argc, char** argv) {
Grammar grammar = extractor.GetGrammar(
sentences[i], blacklisted_sentence_ids);
ofstream output(GetGrammarFilePath(grammar_path, i).c_str());
- // output << grammar;
+ output << grammar;
}
for (size_t i = 0; i < sentences.size(); ++i) {