summaryrefslogtreecommitdiff
path: root/klm/util/multi_intersection.hh
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-01-18 17:12:51 +0000
committerKenneth Heafield <github@kheafield.com>2013-01-18 17:12:51 +0000
commitd884099e0db8b4510847ec106b59ef7dca3c245b (patch)
treeb45a3f17eb002e224a7b728e0f985a15e2503196 /klm/util/multi_intersection.hh
parentbae5fe99037ae7e101953ad0df118127191c711c (diff)
KenLM dffafbf with lmplz source (but not built)
Diffstat (limited to 'klm/util/multi_intersection.hh')
-rw-r--r--klm/util/multi_intersection.hh80
1 files changed, 80 insertions, 0 deletions
diff --git a/klm/util/multi_intersection.hh b/klm/util/multi_intersection.hh
new file mode 100644
index 00000000..8334d39d
--- /dev/null
+++ b/klm/util/multi_intersection.hh
@@ -0,0 +1,80 @@
+#ifndef UTIL_MULTI_INTERSECTION__
+#define UTIL_MULTI_INTERSECTION__
+
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+namespace detail {
+template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
+ bool operator()(const Range &left, const Range &right) const {
+ return left.size() < right.size();
+ }
+};
+
+/* Takes sets specified by their iterators and a boost::optional containing
+ * the lowest intersection if any. Each set must be sorted in increasing
+ * order. sets is changed to truncate the beginning of each sequence to the
+ * location of the match or an empty set. Precondition: sets is not empty
+ * since the intersection over null is the universe and this function does not
+ * know the universe.
+ */
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
+ typedef std::vector<boost::iterator_range<Iterator> > Sets;
+ typedef typename std::iterator_traits<Iterator>::value_type Value;
+
+ assert(!sets.empty());
+
+ if (sets.front().empty()) return boost::optional<Value>();
+ // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
+ Value highest(sets.front().front());
+ for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
+ i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
+ if (i->empty()) return boost::optional<Value>();
+ if (less(highest, i->front())) {
+ highest = i->front();
+ // start over
+ i = sets.begin();
+ } else {
+ ++i;
+ }
+ }
+ return boost::optional<Value>(highest);
+}
+
+} // namespace detail
+
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
+ assert(!sets.empty());
+
+ std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+ return detail::FirstIntersectionSorted(sets, less);
+}
+
+template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
+ return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
+ typedef typename std::iterator_traits<Iterator>::value_type Value;
+ assert(!sets.empty());
+
+ std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+ boost::optional<Value> ret;
+ for (boost::optional<Value> ret; ret = detail::FirstIntersectionSorted(sets, less); sets.front().advance_begin(1)) {
+ out(*ret);
+ }
+}
+
+template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
+ AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_MULTI_INTERSECTION__