From deb555e5ab40a62738269050b43b412335d4b66a Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 12 Apr 2016 10:07:48 +0200
Subject: extractor: gzip compressed grammars
---
extractor/extract.cc | 2 +-
extractor/run_extractor.cc | 13 ++++++++-----
2 files changed, 9 insertions(+), 6 deletions(-)
(limited to 'extractor')
diff --git a/extractor/extract.cc b/extractor/extract.cc
index 08f209cc..b16a4e1c 100644
--- a/extractor/extract.cc
+++ b/extractor/extract.cc
@@ -14,7 +14,6 @@
const unsigned omp_get_num_threads() { return 1; }
#endif
-#include "filelib.h"
#include "alignment.h"
#include "data_array.h"
#include "features/count_source_target.h"
@@ -25,6 +24,7 @@
#include "features/max_lex_target_given_source.h"
#include "features/sample_source_count.h"
#include "features/target_given_source_coherent.h"
+#include "filelib.h"
#include "grammar.h"
#include "grammar_extractor.h"
#include "precomputation.h"
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 00564a36..81d0d8be 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -24,6 +24,7 @@
#include "features/max_lex_target_given_source.h"
#include "features/sample_source_count.h"
#include "features/target_given_source_coherent.h"
+#include "filelib.h"
#include "grammar.h"
#include "grammar_extractor.h"
#include "precomputation.h"
@@ -41,8 +42,8 @@ using namespace extractor;
using namespace features;
// Returns the file path in which a given grammar should be written.
-fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number) {
- string file_name = "grammar." + to_string(file_number);
+fs::path GetGrammarFilePath(const fs::path& grammar_path, int file_number, bool use_zip) {
+ string file_name = "grammar." + to_string(file_number) + (use_zip ? ".gz" : "");
return grammar_path / file_name;
}
@@ -61,6 +62,7 @@ int main(int argc, char** argv) {
("bitext,b", po::value