summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--extractor/matchings_trie.cc3
-rw-r--r--extractor/mocks/mock_precomputation.h1
-rw-r--r--extractor/precomputation.cc19
-rw-r--r--extractor/precomputation.h2
-rw-r--r--extractor/precomputation_test.cc34
-rw-r--r--extractor/run_extractor.cc15
-rw-r--r--python/pkg/cdec/sa/compile.py11
-rw-r--r--python/src/sa/_sa.c2
8 files changed, 11 insertions, 76 deletions
diff --git a/extractor/matchings_trie.cc b/extractor/matchings_trie.cc
index 921ec582..8ea795db 100644
--- a/extractor/matchings_trie.cc
+++ b/extractor/matchings_trie.cc
@@ -14,6 +14,9 @@ void MatchingsTrie::ResetTree(shared_ptr<TrieNode> root) {
for (auto child: root->children) {
ResetTree(child.second);
}
+ if (root->suffix_link != NULL) {
+ root->suffix_link.reset();
+ }
root.reset();
}
}
diff --git a/extractor/mocks/mock_precomputation.h b/extractor/mocks/mock_precomputation.h
index 987bdb2f..9bc72235 100644
--- a/extractor/mocks/mock_precomputation.h
+++ b/extractor/mocks/mock_precomputation.h
@@ -4,6 +4,5 @@
class MockPrecomputation : public Precomputation {
public:
- MOCK_CONST_METHOD0(GetInvertedIndex, const Index&());
MOCK_CONST_METHOD0(GetCollocations, const Index&());
};
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 8a76beb1..189ac42c 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -41,7 +41,6 @@ Precomputation::Precomputation(
for (int j = 1; j <= max_frequent_phrase_len && i + j <= data.size(); ++j) {
pattern.push_back(data[i + j - 1]);
if (frequent_patterns_set.count(pattern)) {
- inverted_index[pattern].push_back(i);
int is_super_frequent = super_frequent_patterns_set.count(pattern);
matchings.push_back(make_tuple(i, j, is_super_frequent));
} else {
@@ -158,19 +157,7 @@ void Precomputation::WriteBinary(const fs::path& filepath) const {
FILE* file = fopen(filepath.string().c_str(), "w");
// TODO(pauldb): Refactor this code.
- int size = inverted_index.size();
- fwrite(&size, sizeof(int), 1, file);
- for (auto entry: inverted_index) {
- size = entry.first.size();
- fwrite(&size, sizeof(int), 1, file);
- fwrite(entry.first.data(), sizeof(int), size, file);
-
- size = entry.second.size();
- fwrite(&size, sizeof(int), 1, file);
- fwrite(entry.second.data(), sizeof(int), size, file);
- }
-
- size = collocations.size();
+ int size = collocations.size();
fwrite(&size, sizeof(int), 1, file);
for (auto entry: collocations) {
size = entry.first.size();
@@ -183,10 +170,6 @@ void Precomputation::WriteBinary(const fs::path& filepath) const {
}
}
-const Index& Precomputation::GetInvertedIndex() const {
- return inverted_index;
-}
-
const Index& Precomputation::GetCollocations() const {
return collocations;
}
diff --git a/extractor/precomputation.h b/extractor/precomputation.h
index 28426bfa..3d44c2a6 100644
--- a/extractor/precomputation.h
+++ b/extractor/precomputation.h
@@ -30,7 +30,6 @@ class Precomputation {
void WriteBinary(const fs::path& filepath) const;
- virtual const Index& GetInvertedIndex() const;
virtual const Index& GetCollocations() const;
static int NON_TERMINAL;
@@ -49,7 +48,6 @@ class Precomputation {
void AddStartPositions(vector<int>& positions, int pos1, int pos2);
void AddStartPositions(vector<int>& positions, int pos1, int pos2, int pos3);
- Index inverted_index;
Index collocations;
};
diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc
index 9edb29db..6b77b9c0 100644
--- a/extractor/precomputation_test.cc
+++ b/extractor/precomputation_test.cc
@@ -35,40 +35,6 @@ class PrecomputationTest : public Test {
shared_ptr<MockSuffixArray> suffix_array;
};
-TEST_F(PrecomputationTest, TestInvertedIndex) {
- Precomputation precomputation(suffix_array, 100, 3, 10, 5, 1, 4, 2);
- Index inverted_index = precomputation.GetInvertedIndex();
-
- EXPECT_EQ(8, inverted_index.size());
- vector<int> key = {2};
- vector<int> expected_value = {1, 5, 8, 11};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {3};
- expected_value = {2, 6, 9};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {4};
- expected_value = {0, 10};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {5};
- expected_value = {3, 7};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {4, 2};
- expected_value = {0, 10};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {2, 3};
- expected_value = {1, 5, 8};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {3, 5};
- expected_value = {2, 6};
- EXPECT_EQ(expected_value, inverted_index[key]);
- key = {2, 3, 5};
- expected_value = {1, 5};
- EXPECT_EQ(expected_value, inverted_index[key]);
-
- key = {2, 4};
- EXPECT_EQ(0, inverted_index.count(key));
-}
-
TEST_F(PrecomputationTest, TestCollocations) {
Precomputation precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2);
Index collocations = precomputation.GetCollocations();
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 36dbd7a0..5255737d 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -31,14 +31,6 @@ namespace fs = boost::filesystem;
namespace po = boost::program_options;
using namespace std;
-void my_pause() {
- cerr << "pausing..." << endl;
- for (int i = 0; i < 10000000; ++i) {
- cerr << endl;
- }
- cerr << "end pause" << endl;
-}
-
int main(int argc, char** argv) {
// TODO(pauldb): Also take arguments from config file.
po::options_description desc("Command line options");
@@ -122,7 +114,7 @@ int main(int argc, char** argv) {
cerr << "Reading alignment took "
<< GetDuration(start_time, stop_time) << " seconds" << endl;
- cerr << "Precomputating collocations..." << endl;
+ cerr << "Precomputing collocations..." << endl;
start_time = Clock::now();
shared_ptr<Precomputation> precomputation = make_shared<Precomputation>(
source_suffix_array,
@@ -150,6 +142,8 @@ int main(int argc, char** argv) {
<< GetDuration(preprocess_start_time, preprocess_stop_time)
<< " seconds" << endl;
+ cerr << "creating grammar extractor" << endl;
+
Clock::time_point extraction_start_time = Clock::now();
vector<shared_ptr<Feature> > features = {
make_shared<TargetGivenSourceCoherent>(),
@@ -176,6 +170,9 @@ int main(int argc, char** argv) {
vm["max_samples"].as<int>(),
vm["tight_phrases"].as<bool>());
+ // Release extra memory used by the initial precomputation.
+ precomputation.reset();
+
int grammar_id = 0;
fs::path grammar_path = vm["grammars"].as<string>();
if (!fs::is_directory(grammar_path)) {
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py
index 1362e7b5..d4cd8387 100644
--- a/python/pkg/cdec/sa/compile.py
+++ b/python/pkg/cdec/sa/compile.py
@@ -21,10 +21,6 @@ def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2, tight_phr
train_min_gap_size=min_gap)
return precomp
-def my_pause():
- for i in range(10000000):
- print >> sys.stderr, ""
-
def main():
preprocess_start_time = monitor_cpu()
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
@@ -89,8 +85,6 @@ def main():
stop_time = monitor_cpu()
logger.info('Compiling source suffix array took %f seconds', stop_time - start_time)
- my_pause()
-
start_time = monitor_cpu()
logger.info('Compiling target data array')
if args.bitext:
@@ -100,7 +94,6 @@ def main():
e.write_binary(e_bin)
stop_time = monitor_cpu()
logger.info('Compiling target data array took %f seconds', stop_time - start_time)
- my_pause()
start_time = monitor_cpu()
logger.info('Precomputing frequent phrases')
@@ -108,15 +101,12 @@ def main():
stop_time = monitor_cpu()
logger.info('Compiling precomputations took %f seconds', stop_time - start_time)
- my_pause()
-
start_time = monitor_cpu()
logger.info('Compiling alignment')
a = cdec.sa.Alignment(from_text=args.alignment)
a.write_binary(a_bin)
stop_time = monitor_cpu()
logger.info('Compiling alignment took %f seonds', stop_time - start_time)
- my_pause()
start_time = monitor_cpu()
logger.info('Compiling bilexical dictionary')
@@ -124,7 +114,6 @@ def main():
lex.write_binary(lex_bin)
stop_time = monitor_cpu()
logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time)
- my_pause()
# Write configuration
config = cdec.configobj.ConfigObj(args.config, unrepr=True)
diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c
index 92abd4c3..4bdabc17 100644
--- a/python/src/sa/_sa.c
+++ b/python/src/sa/_sa.c
@@ -1,4 +1,4 @@
-/* Generated by Cython 0.16 on Fri Feb 22 16:52:09 2013 */
+/* Generated by Cython 0.16 on Wed Mar 6 11:06:18 2013 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"