From d884099e0db8b4510847ec106b59ef7dca3c245b Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 18 Jan 2013 17:12:51 +0000 Subject: KenLM dffafbf with lmplz source (but not built) --- klm/util/multi_intersection.hh | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 klm/util/multi_intersection.hh (limited to 'klm/util/multi_intersection.hh') diff --git a/klm/util/multi_intersection.hh b/klm/util/multi_intersection.hh new file mode 100644 index 00000000..8334d39d --- /dev/null +++ b/klm/util/multi_intersection.hh @@ -0,0 +1,80 @@ +#ifndef UTIL_MULTI_INTERSECTION__ +#define UTIL_MULTI_INTERSECTION__ + +#include +#include + +#include +#include +#include + +namespace util { + +namespace detail { +template struct RangeLessBySize : public std::binary_function { + bool operator()(const Range &left, const Range &right) const { + return left.size() < right.size(); + } +}; + +/* Takes sets specified by their iterators and a boost::optional containing + * the lowest intersection if any. Each set must be sorted in increasing + * order. sets is changed to truncate the beginning of each sequence to the + * location of the match or an empty set. Precondition: sets is not empty + * since the intersection over null is the universe and this function does not + * know the universe. + */ +template boost::optional::value_type> FirstIntersectionSorted(std::vector > &sets, const Less &less = std::less::value_type>()) { + typedef std::vector > Sets; + typedef typename std::iterator_traits::value_type Value; + + assert(!sets.empty()); + + if (sets.front().empty()) return boost::optional(); + // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster. + Value highest(sets.front().front()); + for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) { + i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin()); + if (i->empty()) return boost::optional(); + if (less(highest, i->front())) { + highest = i->front(); + // start over + i = sets.begin(); + } else { + ++i; + } + } + return boost::optional(highest); +} + +} // namespace detail + +template boost::optional::value_type> FirstIntersection(std::vector > &sets, const Less less) { + assert(!sets.empty()); + + std::sort(sets.begin(), sets.end(), detail::RangeLessBySize >()); + return detail::FirstIntersectionSorted(sets, less); +} + +template boost::optional::value_type> FirstIntersection(std::vector > &sets) { + return FirstIntersection(sets, std::less::value_type>()); +} + +template void AllIntersection(std::vector > &sets, Output &out, const Less less) { + typedef typename std::iterator_traits::value_type Value; + assert(!sets.empty()); + + std::sort(sets.begin(), sets.end(), detail::RangeLessBySize >()); + boost::optional ret; + for (boost::optional ret; ret = detail::FirstIntersectionSorted(sets, less); sets.front().advance_begin(1)) { + out(*ret); + } +} + +template void AllIntersection(std::vector > &sets, Output &out) { + AllIntersection(sets, out, std::less::value_type>()); +} + +} // namespace util + +#endif // UTIL_MULTI_INTERSECTION__ -- cgit v1.2.3