summaryrefslogtreecommitdiff
path: root/klm/lm/filter/phrase.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-01-28 15:35:31 +0100
committerPatrick Simianer <p@simianer.de>2014-01-28 15:35:31 +0100
commit9c9ba8954358f791a818b3eefda2c0eb805bbd97 (patch)
tree0153350406bf1c9c6aafa8e5da5d4b3feb9cd0c9 /klm/lm/filter/phrase.cc
parent1b0d40959f529b67db3b9d10dbf93101e0c65c7c (diff)
parent19de646f60d4fb52ddd26d25e06f50f8717fd988 (diff)
resolv conflict in mira
Diffstat (limited to 'klm/lm/filter/phrase.cc')
-rw-r--r--klm/lm/filter/phrase.cc59
1 files changed, 35 insertions, 24 deletions
diff --git a/klm/lm/filter/phrase.cc b/klm/lm/filter/phrase.cc
index 1bef2a3f..e2946b14 100644
--- a/klm/lm/filter/phrase.cc
+++ b/klm/lm/filter/phrase.cc
@@ -48,21 +48,21 @@ unsigned int ReadMultiple(std::istream &in, Substrings &out) {
return sentence_id + sentence_content;
}
-namespace detail { const StringPiece kEndSentence("</s>"); }
-
namespace {
-
typedef unsigned int Sentence;
typedef std::vector<Sentence> Sentences;
+} // namespace
-class Vertex;
+namespace detail {
+
+const StringPiece kEndSentence("</s>");
class Arc {
public:
Arc() {}
// For arcs from one vertex to another.
- void SetPhrase(Vertex &from, Vertex &to, const Sentences &intersect) {
+ void SetPhrase(detail::Vertex &from, detail::Vertex &to, const Sentences &intersect) {
Set(to, intersect);
from_ = &from;
}
@@ -71,7 +71,7 @@ class Arc {
* aligned). These have no from_ vertex; it implictly matches every
* sentence. This also handles when the n-gram is a substring of a phrase.
*/
- void SetRight(Vertex &to, const Sentences &complete) {
+ void SetRight(detail::Vertex &to, const Sentences &complete) {
Set(to, complete);
from_ = NULL;
}
@@ -97,11 +97,11 @@ class Arc {
void LowerBound(const Sentence to);
private:
- void Set(Vertex &to, const Sentences &sentences);
+ void Set(detail::Vertex &to, const Sentences &sentences);
const Sentence *current_;
const Sentence *last_;
- Vertex *from_;
+ detail::Vertex *from_;
};
struct ArcGreater : public std::binary_function<const Arc *, const Arc *, bool> {
@@ -183,7 +183,13 @@ void Vertex::LowerBound(const Sentence to) {
}
}
-void BuildGraph(const Substrings &phrase, const std::vector<Hash> &hashes, Vertex *const vertices, Arc *free_arc) {
+} // namespace detail
+
+namespace {
+
+void BuildGraph(const Substrings &phrase, const std::vector<Hash> &hashes, detail::Vertex *const vertices, detail::Arc *free_arc) {
+ using detail::Vertex;
+ using detail::Arc;
assert(!hashes.empty());
const Hash *const first_word = &*hashes.begin();
@@ -231,17 +237,29 @@ void BuildGraph(const Substrings &phrase, const std::vector<Hash> &hashes, Verte
namespace detail {
-} // namespace detail
+// Here instead of header due to forward declaration.
+ConditionCommon::ConditionCommon(const Substrings &substrings) : substrings_(substrings) {}
-bool Union::Evaluate() {
+// Rest of the variables are temporaries anyway
+ConditionCommon::ConditionCommon(const ConditionCommon &from) : substrings_(from.substrings_) {}
+
+ConditionCommon::~ConditionCommon() {}
+
+detail::Vertex &ConditionCommon::MakeGraph() {
assert(!hashes_.empty());
- // Usually there are at most 6 words in an n-gram, so stack allocation is reasonable.
- Vertex vertices[hashes_.size()];
+ vertices_.clear();
+ vertices_.resize(hashes_.size());
+ arcs_.clear();
// One for every substring.
- Arc arcs[((hashes_.size() + 1) * hashes_.size()) / 2];
- BuildGraph(substrings_, hashes_, vertices, arcs);
- Vertex &last_vertex = vertices[hashes_.size() - 1];
+ arcs_.resize(((hashes_.size() + 1) * hashes_.size()) / 2);
+ BuildGraph(substrings_, hashes_, &*vertices_.begin(), &*arcs_.begin());
+ return vertices_[hashes_.size() - 1];
+}
+
+} // namespace detail
+bool Union::Evaluate() {
+ detail::Vertex &last_vertex = MakeGraph();
unsigned int lower = 0;
while (true) {
last_vertex.LowerBound(lower);
@@ -252,14 +270,7 @@ bool Union::Evaluate() {
}
template <class Output> void Multiple::Evaluate(const StringPiece &line, Output &output) {
- assert(!hashes_.empty());
- // Usually there are at most 6 words in an n-gram, so stack allocation is reasonable.
- Vertex vertices[hashes_.size()];
- // One for every substring.
- Arc arcs[((hashes_.size() + 1) * hashes_.size()) / 2];
- BuildGraph(substrings_, hashes_, vertices, arcs);
- Vertex &last_vertex = vertices[hashes_.size() - 1];
-
+ detail::Vertex &last_vertex = MakeGraph();
unsigned int lower = 0;
while (true) {
last_vertex.LowerBound(lower);