From 389f77ddd6a8eabe357691a9476f061f6262e563 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 25 Apr 2013 23:32:02 -0400 Subject: only turn on c++11 for extractor --- extractor/Makefile.am | 2 +- m4/ax_cxx_compile_stdcxx_11.m4 | 9 +++++++-- utils/filelib.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/extractor/Makefile.am b/extractor/Makefile.am index d8239b7d..fc799f74 100644 --- a/extractor/Makefile.am +++ b/extractor/Makefile.am @@ -145,5 +145,5 @@ libextractor_a_SOURCES = \ translation_table.cc \ vocabulary.cc -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -std=c++0x -fopenmp $(GTEST_CPPFLAGS) $(GMOCK_CPPFLAGS) +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(CXX11_SWITCH) -fopenmp $(GTEST_CPPFLAGS) $(GMOCK_CPPFLAGS) AM_LDFLAGS = -fopenmp diff --git a/m4/ax_cxx_compile_stdcxx_11.m4 b/m4/ax_cxx_compile_stdcxx_11.m4 index 1bc31128..f6cf4a15 100644 --- a/m4/ax_cxx_compile_stdcxx_11.m4 +++ b/m4/ax_cxx_compile_stdcxx_11.m4 @@ -74,6 +74,7 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl ac_success=yes fi + restore_it="$CXXFLAGS" m4_if([$1], [noext], [], [dnl if test x$ac_success = xno; then for switch in -std=gnu++11 -std=gnu++0x; do @@ -87,7 +88,8 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" + CXXFLAGS="$CXXFLAGS" + c11switch="$switch" ac_success=yes break fi @@ -107,12 +109,15 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl [eval $cachevar=no]) CXXFLAGS="$ac_save_CXXFLAGS"]) if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" + CXXFLAGS="$CXXFLAGS" + c11switch="$switch" ac_success=yes break fi done fi]) + CXXFLAGS="$restore_it" + AC_SUBST([CXX11_SWITCH], ["$c11switch"]) AC_LANG_POP([C++]) if test x$ax_cxx_compile_cxx11_required = xtrue; then if test x$ac_success = xno; then diff --git a/utils/filelib.h b/utils/filelib.h index bb6e7415..b9ea3940 100644 --- a/utils/filelib.h +++ b/utils/filelib.h @@ -27,7 +27,7 @@ struct BaseFile { } bool is_null() const { return !ps_; } operator bool() const { - return ps_; + return ps_.get(); } S* stream() { return ps_.get(); } S* operator->() { return ps_.get(); } // compat with old ReadFile * -> new Readfile. remove? -- cgit v1.2.3 From 3d3f7439963bf978b4bec6e3e021b850bcd5822f Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 27 Apr 2013 13:03:30 -0400 Subject: fix build if you don't have c++11 --- extractor/Makefile.am | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extractor/Makefile.am b/extractor/Makefile.am index fc799f74..e94a9b91 100644 --- a/extractor/Makefile.am +++ b/extractor/Makefile.am @@ -1,3 +1,5 @@ +if HAVE_CXX11 + bin_PROGRAMS = compile run_extractor EXTRA_PROGRAMS = alignment_test \ @@ -147,3 +149,4 @@ libextractor_a_SOURCES = \ AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(CXX11_SWITCH) -fopenmp $(GTEST_CPPFLAGS) $(GMOCK_CPPFLAGS) AM_LDFLAGS = -fopenmp +endif -- cgit v1.2.3 From b7ea2615bc9bb69031ff714ddce1539c9f1bda2d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 1 May 2013 17:09:20 -0400 Subject: fix wu ke's unique k-best extraction bug --- decoder/kbest.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/decoder/kbest.h b/decoder/kbest.h index 9a55f653..44c23151 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -6,6 +6,7 @@ #include #include +#include #include "wordid.h" #include "hg.h" @@ -134,7 +135,7 @@ namespace KBest { } add_next = false; - if (cand.size() > 0) { + while (!add_next && cand.size() > 0) { std::pop_heap(cand.begin(), cand.end(), HeapCompare()); Derivation* d = cand.back(); cand.pop_back(); @@ -145,10 +146,15 @@ namespace KBest { if (!filter(d->yield)) { D.push_back(d); add_next = true; + } else { + // just because a node already derived a string (or whatever + // equivalent derivation class), you need to add its successors + // to the node's candidate pool + LazyNext(d, &cand, &s.ds); } - } else { - break; } + if (!add_next) + break; } if (k < D.size()) return D[k]; else return NULL; } @@ -184,7 +190,11 @@ namespace KBest { s.cand.push_back(d); } - const unsigned effective_k = std::min(k_prime, s.cand.size()); + unsigned effective_k = s.cand.size(); + if (boost::is_same >::value) { + // if there's no filter you can use this optimization + effective_k = std::min(k_prime, s.cand.size()); + } const typename CandidateHeap::iterator kth = s.cand.begin() + effective_k; std::nth_element(s.cand.begin(), kth, s.cand.end(), DerivationCompare()); s.cand.resize(effective_k); -- cgit v1.2.3 From 2e4b60f35c40af366be40fe7089f21a5c1ad8e71 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 6 May 2013 22:18:08 -0400 Subject: add passthrough length features --- decoder/scfg_translator.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 3b43b586..6f0b003b 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -12,6 +12,7 @@ #include "grammar.h" #include "bottom_up_parser.h" #include "sentence_metadata.h" +#include "stringlib.h" #include "tdict.h" #include "viterbi.h" #include "verbose.h" @@ -68,7 +69,11 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const int j = alts[k].dist2next + i; const string& src = TD::Convert(alts[k].label); if (ss.count(alts[k].label) == 0) { - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + int length = static_cast(log(UTF8StringLen(src)) / log(1.6)) + 1; + if (length > 6) length = 6; + string len_feat = "PassThrough_0=1"; + len_feat[12] += length; + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1 " + len_feat)); pt->a_.push_back(AlignmentPoint(0,0)); AddRule(pt); RefineRule(pt, ctf_level); -- cgit v1.2.3 From bed260cb57fc8fb604a8a9e391321f777a697ec4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 6 May 2013 22:22:36 -0400 Subject: fix --- utils/stringlib.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/utils/stringlib.h b/utils/stringlib.h index ff5dc89d..1bb533d8 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -268,6 +268,16 @@ inline unsigned int UTF8Len(unsigned char x) { else return 0; } +inline unsigned int UTF8StringLen(const std::string& x) { + unsigned pos = 0; + int len = 0; + while(pos < x.size()) { + ++len; + pos += UTF8Len(x[pos]); + } + return len; +} + std::string md5(const std::string& in); #endif -- cgit v1.2.3 From fc3d47b81448c7537ed7951aea81ddcbd95bc18a Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Mon, 13 May 2013 22:32:44 +0100 Subject: Replace > > with >>. --- extractor/alignment.cc | 6 +++--- extractor/alignment.h | 4 ++-- extractor/alignment_test.cc | 2 +- extractor/fast_intersector.cc | 4 ++-- extractor/fast_intersector.h | 2 +- extractor/matchings_trie.h | 2 +- extractor/mocks/mock_alignment.h | 2 +- extractor/mocks/mock_rule_extractor_helper.h | 12 ++++++------ extractor/mocks/mock_target_phrase_extractor.h | 2 +- extractor/phrase_location.cc | 2 +- extractor/phrase_location.h | 2 +- extractor/precomputation.cc | 12 ++++++------ extractor/precomputation.h | 6 +++--- extractor/rule.cc | 2 +- extractor/rule.h | 4 ++-- extractor/rule_extractor.cc | 8 ++++---- extractor/rule_extractor.h | 6 +++--- extractor/rule_extractor_helper.cc | 6 +++--- extractor/rule_extractor_helper.h | 4 ++-- extractor/rule_extractor_helper_test.cc | 26 +++++++++++++------------- extractor/rule_extractor_test.cc | 10 +++++----- extractor/rule_factory_test.cc | 2 +- extractor/run_extractor.cc | 2 +- extractor/scorer.cc | 2 +- extractor/scorer.h | 4 ++-- extractor/scorer_test.cc | 2 +- extractor/target_phrase_extractor.cc | 18 +++++++++--------- extractor/target_phrase_extractor.h | 10 +++++----- extractor/target_phrase_extractor_test.cc | 14 +++++++------- extractor/translation_table.cc | 2 +- extractor/translation_table.h | 2 +- extractor/translation_table_test.cc | 6 +++--- 32 files changed, 94 insertions(+), 94 deletions(-) diff --git a/extractor/alignment.cc b/extractor/alignment.cc index 1aea34b3..b187c03a 100644 --- a/extractor/alignment.cc +++ b/extractor/alignment.cc @@ -21,7 +21,7 @@ Alignment::Alignment(const string& filename) { while (getline(infile, line)) { vector items; boost::split(items, line, boost::is_any_of(" -")); - vector > alignment; + vector> alignment; alignment.reserve(items.size() / 2); for (size_t i = 0; i < items.size(); i += 2) { alignment.push_back(make_pair(stoi(items[i]), stoi(items[i + 1]))); @@ -35,7 +35,7 @@ Alignment::Alignment() {} Alignment::~Alignment() {} -vector > Alignment::GetLinks(int sentence_index) const { +vector> Alignment::GetLinks(int sentence_index) const { return alignments[sentence_index]; } @@ -43,7 +43,7 @@ void Alignment::WriteBinary(const fs::path& filepath) { FILE* file = fopen(filepath.string().c_str(), "w"); int size = alignments.size(); fwrite(&size, sizeof(int), 1, file); - for (vector > alignment: alignments) { + for (vector> alignment: alignments) { size = alignment.size(); fwrite(&size, sizeof(int), 1, file); fwrite(alignment.data(), sizeof(pair), size, file); diff --git a/extractor/alignment.h b/extractor/alignment.h index e9292121..4596f92b 100644 --- a/extractor/alignment.h +++ b/extractor/alignment.h @@ -20,7 +20,7 @@ class Alignment { Alignment(const string& filename); // Returns the alignment for a given sentence. - virtual vector > GetLinks(int sentence_index) const; + virtual vector> GetLinks(int sentence_index) const; // Writes alignment to file in binary format. void WriteBinary(const fs::path& filepath); @@ -31,7 +31,7 @@ class Alignment { Alignment(); private: - vector > > alignments; + vector>> alignments; }; } // namespace extractor diff --git a/extractor/alignment_test.cc b/extractor/alignment_test.cc index a7defb66..43c37ebd 100644 --- a/extractor/alignment_test.cc +++ b/extractor/alignment_test.cc @@ -21,7 +21,7 @@ class AlignmentTest : public Test { }; TEST_F(AlignmentTest, TestGetLinks) { - vector > expected_links = { + vector> expected_links = { make_pair(0, 0), make_pair(1, 1), make_pair(2, 2) }; EXPECT_EQ(expected_links, alignment->GetLinks(0)); diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc index 2a7693b2..a8591a72 100644 --- a/extractor/fast_intersector.cc +++ b/extractor/fast_intersector.cc @@ -21,7 +21,7 @@ FastIntersector::FastIntersector(shared_ptr suffix_array, max_rule_span(max_rule_span), min_gap_size(min_gap_size) { Index precomputed_collocations = precomputation->GetCollocations(); - for (pair, vector > entry: precomputed_collocations) { + for (pair, vector> entry: precomputed_collocations) { vector phrase = ConvertPhrase(entry.first); collocations[phrase] = entry.second; } @@ -177,7 +177,7 @@ void FastIntersector::ExtendPhraseLocation(PhraseLocation& location) const { } location.num_subpatterns = 1; - location.matchings = make_shared >(); + location.matchings = make_shared>(); for (int i = location.sa_low; i < location.sa_high; ++i) { location.matchings->push_back(suffix_array->GetSuffix(i)); } diff --git a/extractor/fast_intersector.h b/extractor/fast_intersector.h index f950a2a9..2819d239 100644 --- a/extractor/fast_intersector.h +++ b/extractor/fast_intersector.h @@ -11,7 +11,7 @@ using namespace std; namespace extractor { -typedef boost::hash > VectorHash; +typedef boost::hash> VectorHash; typedef unordered_map, vector, VectorHash> Index; class Phrase; diff --git a/extractor/matchings_trie.h b/extractor/matchings_trie.h index 1fb29693..3bd146d1 100644 --- a/extractor/matchings_trie.h +++ b/extractor/matchings_trie.h @@ -39,7 +39,7 @@ struct TrieNode { shared_ptr suffix_link; Phrase phrase; PhraseLocation matchings; - unordered_map > children; + unordered_map> children; }; /** diff --git a/extractor/mocks/mock_alignment.h b/extractor/mocks/mock_alignment.h index 299c3d1c..1d123cd8 100644 --- a/extractor/mocks/mock_alignment.h +++ b/extractor/mocks/mock_alignment.h @@ -4,7 +4,7 @@ namespace extractor { -typedef vector > SentenceLinks; +typedef vector> SentenceLinks; class MockAlignment : public Alignment { public: diff --git a/extractor/mocks/mock_rule_extractor_helper.h b/extractor/mocks/mock_rule_extractor_helper.h index 468468f6..b5ab323f 100644 --- a/extractor/mocks/mock_rule_extractor_helper.h +++ b/extractor/mocks/mock_rule_extractor_helper.h @@ -18,7 +18,7 @@ class MockRuleExtractorHelper : public RuleExtractorHelper { const vector&, const vector&, int)); MOCK_CONST_METHOD4(CheckTightPhrases, bool(const vector&, const vector&, const vector&, int)); - MOCK_CONST_METHOD1(GetGapOrder, vector(const vector >&)); + MOCK_CONST_METHOD1(GetGapOrder, vector(const vector>&)); MOCK_CONST_METHOD4(GetSourceIndexes, Indexes(const vector&, const vector&, int, int)); @@ -36,8 +36,8 @@ class MockRuleExtractorHelper : public RuleExtractorHelper { return find_fix_point; } - bool GetGaps(vector >& source_gaps, - vector >& target_gaps, + bool GetGaps(vector>& source_gaps, + vector>& target_gaps, const vector&, const vector&, const vector&, const vector&, const vector&, const vector&, int, int, int, int, int, int, int& num_symbols, @@ -52,7 +52,7 @@ class MockRuleExtractorHelper : public RuleExtractorHelper { void SetUp( int target_phrase_low, int target_phrase_high, int source_back_low, int source_back_high, bool find_fix_point, - vector > source_gaps, vector > target_gaps, + vector> source_gaps, vector> target_gaps, int num_symbols, bool met_constraints, bool get_gaps) { this->target_phrase_low = target_phrase_low; this->target_phrase_high = target_phrase_high; @@ -72,8 +72,8 @@ class MockRuleExtractorHelper : public RuleExtractorHelper { int source_back_low; int source_back_high; bool find_fix_point; - vector > source_gaps; - vector > target_gaps; + vector> source_gaps; + vector> target_gaps; int num_symbols; bool met_constraints; bool get_gaps; diff --git a/extractor/mocks/mock_target_phrase_extractor.h b/extractor/mocks/mock_target_phrase_extractor.h index 6aad853c..a8d45631 100644 --- a/extractor/mocks/mock_target_phrase_extractor.h +++ b/extractor/mocks/mock_target_phrase_extractor.h @@ -9,7 +9,7 @@ typedef pair PhraseExtract; class MockTargetPhraseExtractor : public TargetPhraseExtractor { public: MOCK_CONST_METHOD6(ExtractPhrases, vector( - const vector > &, const vector&, int, int, + const vector>&, const vector&, int, int, const unordered_map&, int)); }; diff --git a/extractor/phrase_location.cc b/extractor/phrase_location.cc index 678ae270..13140cac 100644 --- a/extractor/phrase_location.cc +++ b/extractor/phrase_location.cc @@ -8,7 +8,7 @@ PhraseLocation::PhraseLocation(int sa_low, int sa_high) : PhraseLocation::PhraseLocation(const vector& matchings, int num_subpatterns) : sa_low(0), sa_high(0), - matchings(make_shared >(matchings)), + matchings(make_shared>(matchings)), num_subpatterns(num_subpatterns) {} bool PhraseLocation::IsEmpty() const { diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h index 91950e03..f8bcabb4 100644 --- a/extractor/phrase_location.h +++ b/extractor/phrase_location.h @@ -32,7 +32,7 @@ struct PhraseLocation { friend bool operator==(const PhraseLocation& a, const PhraseLocation& b); int sa_low, sa_high; - shared_ptr > matchings; + shared_ptr> matchings; int num_subpatterns; }; diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc index b3906943..ee4ba42c 100644 --- a/extractor/precomputation.cc +++ b/extractor/precomputation.cc @@ -19,7 +19,7 @@ Precomputation::Precomputation( int max_rule_symbols, int min_gap_size, int max_frequent_phrase_len, int min_frequency) { vector data = suffix_array->GetData()->GetData(); - vector > frequent_patterns = FindMostFrequentPatterns( + vector> frequent_patterns = FindMostFrequentPatterns( suffix_array, data, num_frequent_patterns, max_frequent_phrase_len, min_frequency); @@ -34,7 +34,7 @@ Precomputation::Precomputation( } } - vector > matchings; + vector> matchings; for (size_t i = 0; i < data.size(); ++i) { // If the sentence is over, add all the discontiguous frequent patterns to // the index. @@ -64,14 +64,14 @@ Precomputation::Precomputation() {} Precomputation::~Precomputation() {} -vector > Precomputation::FindMostFrequentPatterns( +vector> Precomputation::FindMostFrequentPatterns( shared_ptr suffix_array, const vector& data, int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency) { vector lcp = suffix_array->BuildLCPArray(); vector run_start(max_frequent_phrase_len); // Find all the patterns occurring at least min_frequency times. - priority_queue > > heap; + priority_queue>> heap; for (size_t i = 1; i < lcp.size(); ++i) { for (int len = lcp[i]; len < max_frequent_phrase_len; ++len) { int frequency = i - run_start[len]; @@ -84,7 +84,7 @@ vector > Precomputation::FindMostFrequentPatterns( } // Extract the most frequent patterns. - vector > frequent_patterns; + vector> frequent_patterns; while (frequent_patterns.size() < num_frequent_patterns && !heap.empty()) { int start = heap.top().second.first; int len = heap.top().second.second; @@ -100,7 +100,7 @@ vector > Precomputation::FindMostFrequentPatterns( } void Precomputation::AddCollocations( - const vector >& matchings, const vector& data, + const vector>& matchings, const vector& data, int max_rule_span, int min_gap_size, int max_rule_symbols) { // Select the leftmost subpattern. for (size_t i = 0; i < matchings.size(); ++i) { diff --git a/extractor/precomputation.h b/extractor/precomputation.h index e3c4d26a..3e792ac7 100644 --- a/extractor/precomputation.h +++ b/extractor/precomputation.h @@ -15,7 +15,7 @@ using namespace std; namespace extractor { -typedef boost::hash > VectorHash; +typedef boost::hash> VectorHash; typedef unordered_map, vector, VectorHash> Index; class SuffixArray; @@ -54,7 +54,7 @@ class Precomputation { private: // Finds the most frequent contiguous collocations. - vector > FindMostFrequentPatterns( + vector> FindMostFrequentPatterns( shared_ptr suffix_array, const vector& data, int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency); @@ -63,7 +63,7 @@ class Precomputation { // it adds new entries to the index for each discontiguous collocation // matching the criteria specified in the class description. void AddCollocations( - const vector >& matchings, const vector& data, + const vector>& matchings, const vector& data, int max_rule_span, int min_gap_size, int max_rule_symbols); // Adds an occurrence of a binary collocation. diff --git a/extractor/rule.cc b/extractor/rule.cc index b6c7d783..945abaee 100644 --- a/extractor/rule.cc +++ b/extractor/rule.cc @@ -5,7 +5,7 @@ namespace extractor { Rule::Rule(const Phrase& source_phrase, const Phrase& target_phrase, const vector& scores, - const vector >& alignment) : + const vector>& alignment) : source_phrase(source_phrase), target_phrase(target_phrase), scores(scores), diff --git a/extractor/rule.h b/extractor/rule.h index bc95709e..3a568593 100644 --- a/extractor/rule.h +++ b/extractor/rule.h @@ -14,12 +14,12 @@ namespace extractor { */ struct Rule { Rule(const Phrase& source_phrase, const Phrase& target_phrase, - const vector& scores, const vector >& alignment); + const vector& scores, const vector>& alignment); Phrase source_phrase; Phrase target_phrase; vector scores; - vector > alignment; + vector> alignment; }; } // namespace extractor diff --git a/extractor/rule_extractor.cc b/extractor/rule_extractor.cc index fa7386a4..d9181175 100644 --- a/extractor/rule_extractor.cc +++ b/extractor/rule_extractor.cc @@ -81,7 +81,7 @@ vector RuleExtractor::ExtractRules(const Phrase& phrase, // Calculate statistics for the (sampled) occurrences of the source phrase. map source_phrase_counter; - map > > alignments_counter; + map>> alignments_counter; for (auto i = matchings.begin(); i != matchings.end(); i += num_subpatterns) { vector matching(i, i + num_subpatterns); vector extracts = ExtractAlignments(phrase, matching); @@ -165,7 +165,7 @@ vector RuleExtractor::ExtractAlignments( // Get spans for nonterminal gaps. bool met_constraints = true; int num_symbols = phrase.GetNumSymbols(); - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; if (!helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, source_phrase_low, source_phrase_high, source_back_low, source_back_high, @@ -210,7 +210,7 @@ vector RuleExtractor::ExtractAlignments( void RuleExtractor::AddExtracts( vector& extracts, const Phrase& source_phrase, const unordered_map& source_indexes, - const vector >& target_gaps, const vector& target_low, + const vector>& target_gaps, const vector& target_low, int target_phrase_low, int target_phrase_high, int sentence_id) const { auto target_phrases = target_phrase_extractor->ExtractPhrases( target_gaps, target_low, target_phrase_low, target_phrase_high, @@ -232,7 +232,7 @@ void RuleExtractor::AddNonterminalExtremities( const vector& chunklen, const Phrase& source_phrase, int source_back_low, int source_back_high, const vector& source_low, const vector& source_high, const vector& target_low, - const vector& target_high, vector > target_gaps, + const vector& target_high, vector> target_gaps, int sentence_id, int source_sent_start, int starts_with_x, int ends_with_x, int extend_left, int extend_right) const { int source_x_low = source_back_low, source_x_high = source_back_high; diff --git a/extractor/rule_extractor.h b/extractor/rule_extractor.h index 26e6f21c..20866279 100644 --- a/extractor/rule_extractor.h +++ b/extractor/rule_extractor.h @@ -11,7 +11,7 @@ using namespace std; namespace extractor { -typedef vector > PhraseAlignment; +typedef vector> PhraseAlignment; class Alignment; class DataArray; @@ -90,7 +90,7 @@ class RuleExtractor { void AddExtracts( vector& extracts, const Phrase& source_phrase, const unordered_map& source_indexes, - const vector >& target_gaps, const vector& target_low, + const vector>& target_gaps, const vector& target_low, int target_phrase_low, int target_phrase_high, int sentence_id) const; // Adds a leading and/or trailing nonterminal to the source phrase and @@ -101,7 +101,7 @@ class RuleExtractor { const vector& chunklen, const Phrase& source_phrase, int source_back_low, int source_back_high, const vector& source_low, const vector& source_high, const vector& target_low, - const vector& target_high, vector > target_gaps, + const vector& target_high, vector> target_gaps, int sentence_id, int source_sent_start, int starts_with_x, int ends_with_x, int extend_left, int extend_right) const; diff --git a/extractor/rule_extractor_helper.cc b/extractor/rule_extractor_helper.cc index 8a9516f2..d7e332dc 100644 --- a/extractor/rule_extractor_helper.cc +++ b/extractor/rule_extractor_helper.cc @@ -37,7 +37,7 @@ void RuleExtractorHelper::GetLinksSpans( target_low = vector(target_sent_len, -1); target_high = vector(target_sent_len, -1); - vector > links = alignment->GetLinks(sentence_id); + vector> links = alignment->GetLinks(sentence_id); for (auto link: links) { if (source_low[link.first] == -1 || source_low[link.first] > link.second) { source_low[link.first] = link.second; @@ -264,7 +264,7 @@ void RuleExtractorHelper::FindProjection( } bool RuleExtractorHelper::GetGaps( - vector >& source_gaps, vector >& target_gaps, + vector>& source_gaps, vector>& target_gaps, const vector& matching, const vector& chunklen, const vector& source_low, const vector& source_high, const vector& target_low, const vector& target_high, @@ -330,7 +330,7 @@ bool RuleExtractorHelper::GetGaps( } vector RuleExtractorHelper::GetGapOrder( - const vector >& gaps) const { + const vector>& gaps) const { vector gap_order(gaps.size()); for (size_t i = 0; i < gap_order.size(); ++i) { for (size_t j = 0; j < i; ++j) { diff --git a/extractor/rule_extractor_helper.h b/extractor/rule_extractor_helper.h index d4ae45d4..716d7df5 100644 --- a/extractor/rule_extractor_helper.h +++ b/extractor/rule_extractor_helper.h @@ -59,7 +59,7 @@ class RuleExtractorHelper { // Find the gap spans for each nonterminal in the source phrase. virtual bool GetGaps( - vector >& source_gaps, vector >& target_gaps, + vector>& source_gaps, vector>& target_gaps, const vector& matching, const vector& chunklen, const vector& source_low, const vector& source_high, const vector& target_low, const vector& target_high, @@ -68,7 +68,7 @@ class RuleExtractorHelper { int& num_symbols, bool& met_constraints) const; // Get the order of the nonterminals in the target phrase. - virtual vector GetGapOrder(const vector >& gaps) const; + virtual vector GetGapOrder(const vector>& gaps) const; // Map each terminal symbol with its position in the source phrase. virtual unordered_map GetSourceIndexes( diff --git a/extractor/rule_extractor_helper_test.cc b/extractor/rule_extractor_helper_test.cc index 9b82abb1..3f9ae6d7 100644 --- a/extractor/rule_extractor_helper_test.cc +++ b/extractor/rule_extractor_helper_test.cc @@ -23,7 +23,7 @@ class RuleExtractorHelperTest : public Test { EXPECT_CALL(*target_data_array, GetSentenceLength(_)) .WillRepeatedly(Return(12)); - vector > links = { + vector> links = { make_pair(0, 0), make_pair(0, 1), make_pair(2, 2), make_pair(3, 1) }; alignment = make_shared(); @@ -394,7 +394,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapOrder) { helper = make_shared(source_data_array, target_data_array, alignment, 10, 5, true, true, true); - vector > gaps = + vector> gaps = {make_pair(0, 3), make_pair(5, 8), make_pair(11, 12), make_pair(15, 17)}; vector expected_gap_order = {0, 1, 2, 3}; EXPECT_EQ(expected_gap_order, helper->GetGapOrder(gaps)); @@ -425,7 +425,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapsExceedNumSymbols) { int source_back_low = 0, source_back_high = 6; vector matching = {11, 13, 15}; vector chunklen = {1, 1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 5; EXPECT_FALSE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, @@ -464,7 +464,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapsExtensionsNotTight) { int source_back_low = 0, source_back_high = 6; vector matching = {11, 13, 15}; vector chunklen = {1, 1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 5; EXPECT_FALSE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, @@ -499,7 +499,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapsNotTightExtremities) { int source_back_low = 1, source_back_high = 6; vector matching = {11, 13, 15}; vector chunklen = {1, 1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 5; EXPECT_TRUE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, @@ -507,7 +507,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapsNotTightExtremities) { source_back_low, source_back_high, 5, 10, num_symbols, met_constraints)); EXPECT_FALSE(met_constraints); - vector > expected_gaps = {make_pair(2, 3), make_pair(4, 5)}; + vector> expected_gaps = {make_pair(2, 3), make_pair(4, 5)}; EXPECT_EQ(expected_gaps, source_gaps); EXPECT_EQ(expected_gaps, target_gaps); @@ -545,18 +545,18 @@ TEST_F(RuleExtractorHelperTest, TestGetGapsWithExtensions) { int source_back_low = 1, source_back_high = 6; vector matching = {12, 14}; vector chunklen = {1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 3; EXPECT_TRUE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, source_phrase_low, source_phrase_high, source_back_low, source_back_high, 5, 10, num_symbols, met_constraints)); - vector > expected_source_gaps = { + vector> expected_source_gaps = { make_pair(1, 2), make_pair(3, 4), make_pair(5, 6) }; EXPECT_EQ(expected_source_gaps, source_gaps); - vector > expected_target_gaps = { + vector> expected_target_gaps = { make_pair(5, 6), make_pair(3, 4), make_pair(1, 2) }; EXPECT_EQ(expected_target_gaps, target_gaps); @@ -579,18 +579,18 @@ TEST_F(RuleExtractorHelperTest, TestGetGaps) { int source_back_low = 1, source_back_high = 6; vector matching = {11, 13, 15}; vector chunklen = {1, 1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 5; EXPECT_TRUE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, source_phrase_low, source_phrase_high, source_back_low, source_back_high, 5, 10, num_symbols, met_constraints)); - vector > expected_source_gaps = { + vector> expected_source_gaps = { make_pair(2, 3), make_pair(4, 5) }; EXPECT_EQ(expected_source_gaps, source_gaps); - vector > expected_target_gaps = { + vector> expected_target_gaps = { make_pair(4, 5), make_pair(2, 3) }; EXPECT_EQ(expected_target_gaps, target_gaps); @@ -613,7 +613,7 @@ TEST_F(RuleExtractorHelperTest, TestGetGapIntegrityChecksFailed) { int source_back_low = 2, source_back_high = 5; vector matching = {12, 14}; vector chunklen = {1, 1}; - vector > source_gaps, target_gaps; + vector> source_gaps, target_gaps; int num_symbols = 3; EXPECT_FALSE(helper->GetGaps(source_gaps, target_gaps, matching, chunklen, source_low, source_high, target_low, target_high, diff --git a/extractor/rule_extractor_test.cc b/extractor/rule_extractor_test.cc index 5c1501c7..0587276a 100644 --- a/extractor/rule_extractor_test.cc +++ b/extractor/rule_extractor_test.cc @@ -49,7 +49,7 @@ class RuleExtractorTest : public Test { PhraseAlignment phrase_alignment = {make_pair(0, 0)}; target_phrase_extractor = make_shared(); - vector > target_phrases = { + vector> target_phrases = { make_pair(target_phrase, phrase_alignment) }; EXPECT_CALL(*target_phrase_extractor, ExtractPhrases(_, _, _, _, _, _)) @@ -104,7 +104,7 @@ TEST_F(RuleExtractorTest, TestExtractRulesNoFixPoint) { EXPECT_CALL(*helper, GetLinksSpans(_, _, _, _, _)).Times(1); // Set FindFixPoint to return false. - vector > gaps; + vector> gaps; helper->SetUp(0, 0, 0, 0, false, gaps, gaps, 0, true, true); vector rules = extractor->ExtractRules(phrase, phrase_location); @@ -119,7 +119,7 @@ TEST_F(RuleExtractorTest, TestExtractRulesGapsFail) { EXPECT_CALL(*helper, GetLinksSpans(_, _, _, _, _)).Times(1); // Set CheckGaps to return false. - vector > gaps; + vector> gaps; helper->SetUp(0, 0, 0, 0, true, gaps, gaps, 0, true, false); vector rules = extractor->ExtractRules(phrase, phrase_location); @@ -133,7 +133,7 @@ TEST_F(RuleExtractorTest, TestExtractRulesNoExtremities) { PhraseLocation phrase_location(matching, 1); EXPECT_CALL(*helper, GetLinksSpans(_, _, _, _, _)).Times(1); - vector > gaps(3); + vector> gaps(3); // Set FindFixPoint to return true. The number of gaps equals the number of // nonterminals, so we won't add any extremities. helper->SetUp(0, 0, 0, 0, true, gaps, gaps, 0, true, true); @@ -155,7 +155,7 @@ TEST_F(RuleExtractorTest, TestExtractRulesAddExtremities) { SetArgReferee<2>(links), SetArgReferee<3>(links))); - vector > gaps; + vector> gaps; // Set FindFixPoint to return true. The number of gaps equals the number of // nonterminals, so we won't add any extremities. helper->SetUp(0, 0, 2, 3, true, gaps, gaps, 0, true, true); diff --git a/extractor/rule_factory_test.cc b/extractor/rule_factory_test.cc index 2129dfa0..08af3dcd 100644 --- a/extractor/rule_factory_test.cc +++ b/extractor/rule_factory_test.cc @@ -45,7 +45,7 @@ class RuleFactoryTest : public Test { Phrase phrase; vector scores = {0.5}; - vector > phrase_alignment = {make_pair(0, 0)}; + vector> phrase_alignment = {make_pair(0, 0)}; vector rules = {Rule(phrase, phrase, scores, phrase_alignment)}; extractor = make_shared(); EXPECT_CALL(*extractor, ExtractRules(_, _)) diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index aec83e3b..2fc6f724 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -169,7 +169,7 @@ int main(int argc, char** argv) { // Features used to score each grammar rule. Clock::time_point extraction_start_time = Clock::now(); - vector > features = { + vector> features = { make_shared(), make_shared(), make_shared(), diff --git a/extractor/scorer.cc b/extractor/scorer.cc index d3ebf1c9..33f5cb8c 100644 --- a/extractor/scorer.cc +++ b/extractor/scorer.cc @@ -4,7 +4,7 @@ namespace extractor { -Scorer::Scorer(const vector >& features) : +Scorer::Scorer(const vector>& features) : features(features) {} Scorer::Scorer() {} diff --git a/extractor/scorer.h b/extractor/scorer.h index af8a3b10..fc118767 100644 --- a/extractor/scorer.h +++ b/extractor/scorer.h @@ -19,7 +19,7 @@ namespace features { */ class Scorer { public: - Scorer(const vector >& features); + Scorer(const vector>& features); virtual ~Scorer(); @@ -33,7 +33,7 @@ class Scorer { Scorer(); private: - vector > features; + vector> features; }; } // namespace extractor diff --git a/extractor/scorer_test.cc b/extractor/scorer_test.cc index 3a09c9cc..bf77f7ef 100644 --- a/extractor/scorer_test.cc +++ b/extractor/scorer_test.cc @@ -24,7 +24,7 @@ class ScorerTest : public Test { EXPECT_CALL(*feature2, Score(_)).WillRepeatedly(Return(-1.3)); EXPECT_CALL(*feature2, GetName()).WillRepeatedly(Return("f2")); - vector > features = {feature1, feature2}; + vector> features = {feature1, feature2}; scorer = make_shared(features); } diff --git a/extractor/target_phrase_extractor.cc b/extractor/target_phrase_extractor.cc index 2b8a2e4a..48e30643 100644 --- a/extractor/target_phrase_extractor.cc +++ b/extractor/target_phrase_extractor.cc @@ -33,8 +33,8 @@ TargetPhraseExtractor::TargetPhraseExtractor() {} TargetPhraseExtractor::~TargetPhraseExtractor() {} -vector > TargetPhraseExtractor::ExtractPhrases( - const vector >& target_gaps, const vector& target_low, +vector> TargetPhraseExtractor::ExtractPhrases( + const vector>& target_gaps, const vector& target_low, int target_phrase_low, int target_phrase_high, const unordered_map& source_indexes, int sentence_id) const { int target_sent_len = target_data_array->GetSentenceLength(sentence_id); @@ -57,7 +57,7 @@ vector > TargetPhraseExtractor::ExtractPhrases( } } - vector > gaps(target_gaps.size()); + vector> gaps(target_gaps.size()); for (size_t i = 0; i < gaps.size(); ++i) { gaps[i] = target_gaps[target_gap_order[i]]; if (!require_tight_phrases) { @@ -77,7 +77,7 @@ vector > TargetPhraseExtractor::ExtractPhrases( // Compute the range in which each chunk may start or end. (Even indexes // represent the range in which the chunk may start, odd indexes represent the // range in which the chunk may end.) - vector > ranges(2 * gaps.size() + 2); + vector> ranges(2 * gaps.size() + 2); ranges.front() = make_pair(target_x_low, target_phrase_low); ranges.back() = make_pair(target_phrase_high, target_x_high); for (size_t i = 0; i < gaps.size(); ++i) { @@ -86,7 +86,7 @@ vector > TargetPhraseExtractor::ExtractPhrases( ranges[i * 2 + 2] = make_pair(target_gaps[j].second, gaps[i].second); } - vector > target_phrases; + vector> target_phrases; vector subpatterns(ranges.size()); GeneratePhrases(target_phrases, ranges, 0, subpatterns, target_gap_order, target_phrase_low, target_phrase_high, source_indexes, @@ -95,8 +95,8 @@ vector > TargetPhraseExtractor::ExtractPhrases( } void TargetPhraseExtractor::GeneratePhrases( - vector >& target_phrases, - const vector >& ranges, int index, vector& subpatterns, + vector>& target_phrases, + const vector>& ranges, int index, vector& subpatterns, const vector& target_gap_order, int target_phrase_low, int target_phrase_high, const unordered_map& source_indexes, int sentence_id) const { @@ -124,8 +124,8 @@ void TargetPhraseExtractor::GeneratePhrases( } // Construct the alignment between the source and the target phrase. - vector > links = alignment->GetLinks(sentence_id); - vector > alignment; + vector> links = alignment->GetLinks(sentence_id); + vector> alignment; for (pair link: links) { if (target_indexes.count(link.second)) { alignment.push_back(make_pair(source_indexes.find(link.first)->second, diff --git a/extractor/target_phrase_extractor.h b/extractor/target_phrase_extractor.h index 289bae2f..644493cd 100644 --- a/extractor/target_phrase_extractor.h +++ b/extractor/target_phrase_extractor.h @@ -9,7 +9,7 @@ using namespace std; namespace extractor { -typedef vector > PhraseAlignment; +typedef vector> PhraseAlignment; class Alignment; class DataArray; @@ -32,8 +32,8 @@ class TargetPhraseExtractor { // Finds all the target phrases that can extracted from a span in the // target sentence (matching the given set of target phrase gaps). - virtual vector > ExtractPhrases( - const vector >& target_gaps, const vector& target_low, + virtual vector> ExtractPhrases( + const vector>& target_gaps, const vector& target_low, int target_phrase_low, int target_phrase_high, const unordered_map& source_indexes, int sentence_id) const; @@ -44,8 +44,8 @@ class TargetPhraseExtractor { // Computes the cartesian product over the sets of possible target phrase // chunks. void GeneratePhrases( - vector >& target_phrases, - const vector >& ranges, int index, + vector>& target_phrases, + const vector>& ranges, int index, vector& subpatterns, const vector& target_gap_order, int target_phrase_low, int target_phrase_high, const unordered_map& source_indexes, int sentence_id) const; diff --git a/extractor/target_phrase_extractor_test.cc b/extractor/target_phrase_extractor_test.cc index 80927dee..501a473b 100644 --- a/extractor/target_phrase_extractor_test.cc +++ b/extractor/target_phrase_extractor_test.cc @@ -50,7 +50,7 @@ TEST_F(TargetPhraseExtractorTest, TestExtractTightPhrasesTrue) { .WillRepeatedly(Return(target_words[i])); } - vector > links = { + vector> links = { make_pair(0, 0), make_pair(1, 3), make_pair(2, 2), make_pair(3, 1), make_pair(4, 4) }; @@ -62,18 +62,18 @@ TEST_F(TargetPhraseExtractorTest, TestExtractTightPhrasesTrue) { extractor = make_shared( data_array, alignment, phrase_builder, helper, vocabulary, 10, true); - vector > target_gaps = {make_pair(3, 4), make_pair(1, 2)}; + vector> target_gaps = {make_pair(3, 4), make_pair(1, 2)}; vector target_low = {0, 3, 2, 1, 4}; unordered_map source_indexes = {{0, 0}, {2, 2}, {4, 4}}; - vector > results = extractor->ExtractPhrases( + vector> results = extractor->ExtractPhrases( target_gaps, target_low, 0, 5, source_indexes, 1); EXPECT_EQ(1, results.size()); vector expected_symbols = {20, -2, 22, -1, 24}; EXPECT_EQ(expected_symbols, results[0].first.Get()); vector expected_words = {"a", "c", "e"}; EXPECT_EQ(expected_words, results[0].first.GetWords()); - vector > expected_alignment = { + vector> expected_alignment = { make_pair(0, 0), make_pair(2, 2), make_pair(4, 4) }; EXPECT_EQ(expected_alignment, results[0].second); @@ -94,7 +94,7 @@ TEST_F(TargetPhraseExtractorTest, TestExtractPhrasesTightPhrasesFalse) { .WillRepeatedly(Return(target_words[i])); } - vector > links = {make_pair(1, 1)}; + vector> links = {make_pair(1, 1)}; EXPECT_CALL(*alignment, GetLinks(0)).WillRepeatedly(Return(links)); vector gap_order = {0}; @@ -103,11 +103,11 @@ TEST_F(TargetPhraseExtractorTest, TestExtractPhrasesTightPhrasesFalse) { extractor = make_shared( data_array, alignment, phrase_builder, helper, vocabulary, 10, false); - vector > target_gaps = {make_pair(2, 4)}; + vector> target_gaps = {make_pair(2, 4)}; vector target_low = {-1, 1, -1, -1, -1, -1}; unordered_map source_indexes = {{1, 1}}; - vector > results = extractor->ExtractPhrases( + vector> results = extractor->ExtractPhrases( target_gaps, target_low, 1, 5, source_indexes, 0); EXPECT_EQ(10, results.size()); diff --git a/extractor/translation_table.cc b/extractor/translation_table.cc index 45da707a..adb59cb5 100644 --- a/extractor/translation_table.cc +++ b/extractor/translation_table.cc @@ -26,7 +26,7 @@ TranslationTable::TranslationTable(shared_ptr source_data_array, // For each pair of aligned source target words increment their link count by // 1. Unaligned words are paired with the NULL token. for (size_t i = 0; i < source_data_array->GetNumSentences(); ++i) { - vector > links = alignment->GetLinks(i); + vector> links = alignment->GetLinks(i); int source_start = source_data_array->GetSentenceStart(i); int target_start = target_data_array->GetSentenceStart(i); // Ignore END_OF_LINE markers. diff --git a/extractor/translation_table.h b/extractor/translation_table.h index 10504d3b..ed43ad72 100644 --- a/extractor/translation_table.h +++ b/extractor/translation_table.h @@ -13,7 +13,7 @@ namespace fs = boost::filesystem; namespace extractor { -typedef boost::hash > PairHash; +typedef boost::hash> PairHash; class Alignment; class DataArray; diff --git a/extractor/translation_table_test.cc b/extractor/translation_table_test.cc index 051b5715..d14f2f89 100644 --- a/extractor/translation_table_test.cc +++ b/extractor/translation_table_test.cc @@ -55,12 +55,12 @@ TEST(TranslationTableTest, TestScores) { EXPECT_CALL(*target_data_array, HasWord("d")) .WillRepeatedly(Return(false)); - vector > links1 = { + vector> links1 = { make_pair(0, 0), make_pair(1, 1), make_pair(2, 2), make_pair(3, 3), make_pair(4, 4), make_pair(4, 5) }; - vector > links2 = {make_pair(1, 0), make_pair(2, 1)}; - vector > links3 = {make_pair(0, 0), make_pair(2, 1)}; + vector> links2 = {make_pair(1, 0), make_pair(2, 1)}; + vector> links3 = {make_pair(0, 0), make_pair(2, 1)}; shared_ptr alignment = make_shared(); EXPECT_CALL(*alignment, GetLinks(0)).WillRepeatedly(Return(links1)); EXPECT_CALL(*alignment, GetLinks(1)).WillRepeatedly(Return(links2)); -- cgit v1.2.3