From 783c57b2d3312738ddcf992ac55ff750afe7cb47 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 27 Jan 2014 17:42:19 -0800 Subject: KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591 --- klm/lm/filter/phrase.cc | 59 +++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'klm/lm/filter/phrase.cc') diff --git a/klm/lm/filter/phrase.cc b/klm/lm/filter/phrase.cc index 1bef2a3f..e2946b14 100644 --- a/klm/lm/filter/phrase.cc +++ b/klm/lm/filter/phrase.cc @@ -48,21 +48,21 @@ unsigned int ReadMultiple(std::istream &in, Substrings &out) { return sentence_id + sentence_content; } -namespace detail { const StringPiece kEndSentence(""); } - namespace { - typedef unsigned int Sentence; typedef std::vector Sentences; +} // namespace -class Vertex; +namespace detail { + +const StringPiece kEndSentence(""); class Arc { public: Arc() {} // For arcs from one vertex to another. - void SetPhrase(Vertex &from, Vertex &to, const Sentences &intersect) { + void SetPhrase(detail::Vertex &from, detail::Vertex &to, const Sentences &intersect) { Set(to, intersect); from_ = &from; } @@ -71,7 +71,7 @@ class Arc { * aligned). These have no from_ vertex; it implictly matches every * sentence. This also handles when the n-gram is a substring of a phrase. */ - void SetRight(Vertex &to, const Sentences &complete) { + void SetRight(detail::Vertex &to, const Sentences &complete) { Set(to, complete); from_ = NULL; } @@ -97,11 +97,11 @@ class Arc { void LowerBound(const Sentence to); private: - void Set(Vertex &to, const Sentences &sentences); + void Set(detail::Vertex &to, const Sentences &sentences); const Sentence *current_; const Sentence *last_; - Vertex *from_; + detail::Vertex *from_; }; struct ArcGreater : public std::binary_function { @@ -183,7 +183,13 @@ void Vertex::LowerBound(const Sentence to) { } } -void BuildGraph(const Substrings &phrase, const std::vector &hashes, Vertex *const vertices, Arc *free_arc) { +} // namespace detail + +namespace { + +void BuildGraph(const Substrings &phrase, const std::vector &hashes, detail::Vertex *const vertices, detail::Arc *free_arc) { + using detail::Vertex; + using detail::Arc; assert(!hashes.empty()); const Hash *const first_word = &*hashes.begin(); @@ -231,17 +237,29 @@ void BuildGraph(const Substrings &phrase, const std::vector &hashes, Verte namespace detail { -} // namespace detail +// Here instead of header due to forward declaration. +ConditionCommon::ConditionCommon(const Substrings &substrings) : substrings_(substrings) {} -bool Union::Evaluate() { +// Rest of the variables are temporaries anyway +ConditionCommon::ConditionCommon(const ConditionCommon &from) : substrings_(from.substrings_) {} + +ConditionCommon::~ConditionCommon() {} + +detail::Vertex &ConditionCommon::MakeGraph() { assert(!hashes_.empty()); - // Usually there are at most 6 words in an n-gram, so stack allocation is reasonable. - Vertex vertices[hashes_.size()]; + vertices_.clear(); + vertices_.resize(hashes_.size()); + arcs_.clear(); // One for every substring. - Arc arcs[((hashes_.size() + 1) * hashes_.size()) / 2]; - BuildGraph(substrings_, hashes_, vertices, arcs); - Vertex &last_vertex = vertices[hashes_.size() - 1]; + arcs_.resize(((hashes_.size() + 1) * hashes_.size()) / 2); + BuildGraph(substrings_, hashes_, &*vertices_.begin(), &*arcs_.begin()); + return vertices_[hashes_.size() - 1]; +} + +} // namespace detail +bool Union::Evaluate() { + detail::Vertex &last_vertex = MakeGraph(); unsigned int lower = 0; while (true) { last_vertex.LowerBound(lower); @@ -252,14 +270,7 @@ bool Union::Evaluate() { } template void Multiple::Evaluate(const StringPiece &line, Output &output) { - assert(!hashes_.empty()); - // Usually there are at most 6 words in an n-gram, so stack allocation is reasonable. - Vertex vertices[hashes_.size()]; - // One for every substring. - Arc arcs[((hashes_.size() + 1) * hashes_.size()) / 2]; - BuildGraph(substrings_, hashes_, vertices, arcs); - Vertex &last_vertex = vertices[hashes_.size() - 1]; - + detail::Vertex &last_vertex = MakeGraph(); unsigned int lower = 0; while (true) { last_vertex.LowerBound(lower); -- cgit v1.2.3