summaryrefslogtreecommitdiff
path: root/extractor/fast_intersector.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-24 13:19:28 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-24 16:26:49 +0000
commitf528ac27dab11770f01595b043675dba2947a263 (patch)
tree0f340a74879772fc8a5d8c4484df27457f25f11e /extractor/fast_intersector.cc
parent79206291f78fba893fda6a61ff0ae9264d00bb82 (diff)
Reduce memory overhead for constructing the intersector.
Diffstat (limited to 'extractor/fast_intersector.cc')
-rw-r--r--extractor/fast_intersector.cc40
1 files changed, 11 insertions, 29 deletions
diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc
index a8591a72..0d1fa6d8 100644
--- a/extractor/fast_intersector.cc
+++ b/extractor/fast_intersector.cc
@@ -11,41 +11,22 @@
namespace extractor {
-FastIntersector::FastIntersector(shared_ptr<SuffixArray> suffix_array,
- shared_ptr<Precomputation> precomputation,
- shared_ptr<Vocabulary> vocabulary,
- int max_rule_span,
- int min_gap_size) :
+FastIntersector::FastIntersector(
+ shared_ptr<SuffixArray> suffix_array,
+ shared_ptr<Precomputation> precomputation,
+ shared_ptr<Vocabulary> vocabulary,
+ int max_rule_span,
+ int min_gap_size) :
suffix_array(suffix_array),
+ precomputation(precomputation),
vocabulary(vocabulary),
max_rule_span(max_rule_span),
- min_gap_size(min_gap_size) {
- Index precomputed_collocations = precomputation->GetCollocations();
- for (pair<vector<int>, vector<int>> entry: precomputed_collocations) {
- vector<int> phrase = ConvertPhrase(entry.first);
- collocations[phrase] = entry.second;
- }
-}
+ min_gap_size(min_gap_size) {}
FastIntersector::FastIntersector() {}
FastIntersector::~FastIntersector() {}
-vector<int> FastIntersector::ConvertPhrase(const vector<int>& old_phrase) {
- vector<int> new_phrase;
- new_phrase.reserve(old_phrase.size());
- shared_ptr<DataArray> data_array = suffix_array->GetData();
- for (int word_id: old_phrase) {
- if (word_id < 0) {
- new_phrase.push_back(word_id);
- } else {
- new_phrase.push_back(
- vocabulary->GetTerminalIndex(data_array->GetWord(word_id)));
- }
- }
- return new_phrase;
-}
-
PhraseLocation FastIntersector::Intersect(
PhraseLocation& prefix_location,
PhraseLocation& suffix_location,
@@ -59,8 +40,9 @@ PhraseLocation FastIntersector::Intersect(
assert(vocabulary->IsTerminal(symbols.front())
&& vocabulary->IsTerminal(symbols.back()));
- if (collocations.count(symbols)) {
- return PhraseLocation(collocations[symbols], phrase.Arity() + 1);
+ if (precomputation->Contains(symbols)) {
+ return PhraseLocation(precomputation->GetCollocations(symbols),
+ phrase.Arity() + 1);
}
bool prefix_ends_with_x =