summaryrefslogtreecommitdiff
path: root/klm/lm/builder/joint_order.hh
diff options
context:
space:
mode:
authorAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
committerAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
commit5b8253e0e1f1393a509fb9975ba8c1347af758ed (patch)
tree1790470b1d07a0b4973ebce19192e896566ea60b /klm/lm/builder/joint_order.hh
parent2389a5a8a43dda87c355579838559515b0428421 (diff)
parentb203f8c5dc8cff1b9c9c2073832b248fcad0765a (diff)
fixed conflicts
Diffstat (limited to 'klm/lm/builder/joint_order.hh')
-rw-r--r--klm/lm/builder/joint_order.hh43
1 files changed, 43 insertions, 0 deletions
diff --git a/klm/lm/builder/joint_order.hh b/klm/lm/builder/joint_order.hh
new file mode 100644
index 00000000..b5620144
--- /dev/null
+++ b/klm/lm/builder/joint_order.hh
@@ -0,0 +1,43 @@
+#ifndef LM_BUILDER_JOINT_ORDER__
+#define LM_BUILDER_JOINT_ORDER__
+
+#include "lm/builder/multi_stream.hh"
+#include "lm/lm_exception.hh"
+
+#include <string.h>
+
+namespace lm { namespace builder {
+
+template <class Callback, class Compare> void JointOrder(const ChainPositions &positions, Callback &callback) {
+ // Allow matching to reference streams[-1].
+ NGramStreams streams_with_dummy;
+ streams_with_dummy.InitWithDummy(positions);
+ NGramStream *streams = streams_with_dummy.begin() + 1;
+
+ unsigned int order;
+ for (order = 0; order < positions.size() && streams[order]; ++order) {}
+ assert(order); // should always have <unk>.
+ unsigned int current = 0;
+ while (true) {
+ // Does the context match the lower one?
+ if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
+ callback.Enter(current, *streams[current]);
+ // Transition to looking for extensions.
+ if (++current < order) continue;
+ }
+ // No extension left.
+ while(true) {
+ assert(current > 0);
+ --current;
+ callback.Exit(current, *streams[current]);
+ if (++streams[current]) break;
+ UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix");
+ order = current;
+ if (!order) return;
+ }
+ }
+}
+
+}} // namespaces
+
+#endif // LM_BUILDER_JOINT_ORDER__