summaryrefslogtreecommitdiff
path: root/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'extractor')
-rw-r--r--extractor/fast_intersector.cc7
-rw-r--r--extractor/precomputation.cc7
-rw-r--r--extractor/precomputation.h3
-rw-r--r--extractor/run_extractor.cc1
-rw-r--r--extractor/sampler.cc3
5 files changed, 9 insertions, 12 deletions
diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc
index cec3d30b..1b8c32b1 100644
--- a/extractor/fast_intersector.cc
+++ b/extractor/fast_intersector.cc
@@ -35,12 +35,9 @@ vector<int> FastIntersector::ConvertPhrase(const vector<int>& old_phrase) {
vector<int> new_phrase;
new_phrase.reserve(old_phrase.size());
shared_ptr<DataArray> data_array = suffix_array->GetData();
- int num_nonterminals = 0;
for (int word_id: old_phrase) {
- // TODO(pauldb): Remove overhead for relabelling the nonterminals here.
- if (word_id == Precomputation::NON_TERMINAL) {
- ++num_nonterminals;
- new_phrase.push_back(vocabulary->GetNonterminalIndex(num_nonterminals));
+ if (word_id < 0) {
+ new_phrase.push_back(word_id);
} else {
new_phrase.push_back(
vocabulary->GetTerminalIndex(data_array->GetWord(word_id)));
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 8cc32ffd..e29018c2 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -10,7 +10,8 @@ using namespace std;
namespace extractor {
-int Precomputation::NON_TERMINAL = -1;
+int Precomputation::FIRST_NONTERMINAL = -1;
+int Precomputation::SECOND_NONTERMINAL = -2;
Precomputation::Precomputation(
shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns,
@@ -112,13 +113,13 @@ void Precomputation::AddCollocations(
&& size1 + size2 + 1 <= max_rule_symbols) {
vector<int> pattern(data.begin() + start1,
data.begin() + start1 + size1);
- pattern.push_back(Precomputation::NON_TERMINAL);
+ pattern.push_back(Precomputation::FIRST_NONTERMINAL);
pattern.insert(pattern.end(), data.begin() + start2,
data.begin() + start2 + size2);
AddStartPositions(collocations[pattern], start1, start2);
if (is_super2) {
- pattern.push_back(Precomputation::NON_TERMINAL);
+ pattern.push_back(Precomputation::SECOND_NONTERMINAL);
for (size_t k = j + 1; k < matchings.size(); ++k) {
int start3, size3, is_super3;
tie(start3, size3, is_super3) = matchings[k];
diff --git a/extractor/precomputation.h b/extractor/precomputation.h
index dbd99c14..2c1eccf8 100644
--- a/extractor/precomputation.h
+++ b/extractor/precomputation.h
@@ -34,7 +34,8 @@ class Precomputation {
virtual const Index& GetCollocations() const;
- static int NON_TERMINAL;
+ static int FIRST_NONTERMINAL;
+ static int SECOND_NONTERMINAL;
protected:
Precomputation();
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index c701c8d0..0f91236d 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -35,7 +35,6 @@ using namespace extractor;
using namespace features;
int main(int argc, char** argv) {
- // TODO(pauldb): Also take arguments from config file.
po::options_description desc("Command line options");
desc.add_options()
("help,h", "Show available options")
diff --git a/extractor/sampler.cc b/extractor/sampler.cc
index d128913f..f64a408c 100644
--- a/extractor/sampler.cc
+++ b/extractor/sampler.cc
@@ -38,8 +38,7 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location) const {
}
int Sampler::Round(double x) const {
- // TODO(pauldb): Remove EPS.
- return x + 0.5 + 1e-8;
+ return x + 0.5;
}
} // namespace extractor