summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
Diffstat (limited to 'gi')
-rw-r--r--gi/clda/Makefile.am5
-rw-r--r--gi/clda/configure.ac17
-rw-r--r--gi/clda/m4/boost.m41035
-rw-r--r--gi/clda/src/Makefile.am6
-rw-r--r--gi/clda/src/clda.cc140
-rw-r--r--gi/clda/src/crp.h216
-rw-r--r--gi/clda/src/dict.h43
-rw-r--r--gi/clda/src/logval.h157
-rw-r--r--gi/clda/src/prob.h8
-rw-r--r--gi/clda/src/sampler.h138
-rw-r--r--gi/clda/src/tdict.h49
-rw-r--r--gi/clda/src/timer.h18
-rw-r--r--gi/clda/src/wordid.h6
13 files changed, 1838 insertions, 0 deletions
diff --git a/gi/clda/Makefile.am b/gi/clda/Makefile.am
new file mode 100644
index 00000000..936b6ae3
--- /dev/null
+++ b/gi/clda/Makefile.am
@@ -0,0 +1,5 @@
+SUBDIRS = src
+AUTOMAKE_OPTIONS = foreign
+
+ACLOCAL_AMFLAGS = -I m4
+
diff --git a/gi/clda/configure.ac b/gi/clda/configure.ac
new file mode 100644
index 00000000..8469ee09
--- /dev/null
+++ b/gi/clda/configure.ac
@@ -0,0 +1,17 @@
+AC_INIT
+AM_INIT_AUTOMAKE(cdec,0.1)
+AC_CONFIG_HEADERS(config.h)
+AC_PROG_LIBTOOL
+AC_PROG_CC
+AC_PROG_CXX
+AC_LANG_CPLUSPLUS
+BOOST_REQUIRE
+BOOST_PROGRAM_OPTIONS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS"
+LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS"
+
+AC_PROG_INSTALL
+
+AC_OUTPUT(Makefile src/Makefile)
+
diff --git a/gi/clda/m4/boost.m4 b/gi/clda/m4/boost.m4
new file mode 100644
index 00000000..7e0ed075
--- /dev/null
+++ b/gi/clda/m4/boost.m4
@@ -0,0 +1,1035 @@
+# boost.m4: Locate Boost headers and libraries for autoconf-based projects.
+# Copyright (C) 2007, 2008, 2009 Benoit Sigoure <tsuna@lrde.epita.fr>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Additional permission under section 7 of the GNU General Public
+# License, version 3 ("GPLv3"):
+#
+# If you convey this file as part of a work that contains a
+# configuration script generated by Autoconf, you may do so under
+# terms of your choice.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+m4_define([_BOOST_SERIAL], [m4_translit([
+# serial 12
+], [#
+], [])])
+
+# Original sources can be found at http://github.com/tsuna/boost.m4
+# You can fetch the latest version of the script by doing:
+# wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4
+
+# ------ #
+# README #
+# ------ #
+
+# This file provides several macros to use the various Boost libraries.
+# The first macro is BOOST_REQUIRE. It will simply check if it's possible to
+# find the Boost headers of a given (optional) minimum version and it will
+# define BOOST_CPPFLAGS accordingly. It will add an option --with-boost to
+# your configure so that users can specify non standard locations.
+# If the user's environment contains BOOST_ROOT and --with-boost was not
+# specified, --with-boost=$BOOST_ROOT is implicitly used.
+# For more README and documentation, go to http://github.com/tsuna/boost.m4
+# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL. If you don't, don't worry,
+# simply read the README, it will show you what to do step by step.
+
+m4_pattern_forbid([^_?BOOST_])
+
+
+# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM,
+# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# --------------------------------------------------------
+# Same as AC_EGREP_CPP, but leave the result in conftest.i.
+# PATTERN is *not* overquoted, as in AC_EGREP_CPP. It could be useful
+# to turn this into a macro which extracts the value of any macro.
+m4_define([_BOOST_SED_CPP],
+[AC_LANG_PREPROC_REQUIRE()dnl
+AC_REQUIRE([AC_PROG_SED])dnl
+AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])])
+AS_IF([dnl eval is necessary to expand ac_cpp.
+dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell.
+dnl Beware of Windows end-of-lines, for instance if we are running
+dnl some Windows programs under Wine. In that case, boost/version.hpp
+dnl is certainly using "\r\n", but the regular Unix shell will only
+dnl strip `\n' with backquotes, not the `\r'. This results in
+dnl boost_cv_lib_version='1_37\r' for instance, which breaks
+dnl everything else.
+dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK
+(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD |
+ tr -d '\r' |
+ $SED -n -e "$1" >conftest.i 2>&1],
+ [$3],
+ [$4])
+rm -rf conftest*
+])# AC_EGREP_CPP
+
+
+
+# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND])
+# -----------------------------------------------
+# Look for Boost. If version is given, it must either be a literal of the form
+# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a
+# variable "$var".
+# Defines the value BOOST_CPPFLAGS. This macro only checks for headers with
+# the required version, it does not check for any of the Boost libraries.
+# On # success, defines HAVE_BOOST. On failure, calls the optional
+# ACTION-IF-NOT-FOUND action if one was supplied.
+# Otherwise aborts with an error message.
+AC_DEFUN([BOOST_REQUIRE],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_PROG_GREP])dnl
+echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD
+boost_save_IFS=$IFS
+boost_version_req=$1
+IFS=.
+set x $boost_version_req 0 0 0
+IFS=$boost_save_IFS
+shift
+boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"`
+AC_ARG_WITH([boost],
+ [AS_HELP_STRING([--with-boost=DIR],
+ [prefix of Boost $1 @<:@guess@:>@])])dnl
+AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl
+# If BOOST_ROOT is set and the user has not provided a value to
+# --with-boost, then treat BOOST_ROOT as if it the user supplied it.
+if test x"$BOOST_ROOT" != x; then
+ if test x"$with_boost" = x; then
+ AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT])
+ with_boost=$BOOST_ROOT
+ else
+ AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost])
+ fi
+fi
+AC_SUBST([DISTCHECK_CONFIGURE_FLAGS],
+ ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"])
+boost_save_CPPFLAGS=$CPPFLAGS
+ AC_CACHE_CHECK([for Boost headers version >= $boost_version_req],
+ [boost_cv_inc_path],
+ [boost_cv_inc_path=no
+AC_LANG_PUSH([C++])dnl
+m4_pattern_allow([^BOOST_VERSION$])dnl
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <boost/version.hpp>
+#if !defined BOOST_VERSION
+# error BOOST_VERSION is not defined
+#elif BOOST_VERSION < $boost_version_req
+# error Boost headers version < $boost_version_req
+#endif
+]])])
+ # If the user provided a value to --with-boost, use it and only it.
+ case $with_boost in #(
+ ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \
+ /usr/include C:/Boost/include;; #(
+ *) set x "$with_boost/include" "$with_boost";;
+ esac
+ shift
+ for boost_dir
+ do
+ # Without --layout=system, Boost (or at least some versions) installs
+ # itself in <prefix>/include/boost-<version>. This inner loop helps to
+ # find headers in such directories.
+ #
+ # Any ${boost_dir}/boost-x_xx directories are searched in reverse version
+ # order followed by ${boost_dir}. The final '.' is a sentinel for
+ # searching $boost_dir" itself. Entries are whitespace separated.
+ #
+ # I didn't indent this loop on purpose (to avoid over-indented code)
+ boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \
+ && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \
+ && echo .`
+ for boost_inc in $boost_layout_system_search_list
+ do
+ if test x"$boost_inc" != x.; then
+ boost_inc="$boost_dir/$boost_inc"
+ else
+ boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list
+ fi
+ if test x"$boost_inc" != x; then
+ # We are going to check whether the version of Boost installed
+ # in $boost_inc is usable by running a compilation that
+ # #includes it. But if we pass a -I/some/path in which Boost
+ # is not installed, the compiler will just skip this -I and
+ # use other locations (either from CPPFLAGS, or from its list
+ # of system include directories). As a result we would use
+ # header installed on the machine instead of the /some/path
+ # specified by the user. So in that precise case (trying
+ # $boost_inc), make sure the version.hpp exists.
+ #
+ # Use test -e as there can be symlinks.
+ test -e "$boost_inc/boost/version.hpp" || continue
+ CPPFLAGS="$CPPFLAGS -I$boost_inc"
+ fi
+ AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no])
+ if test x"$boost_cv_inc_path" = xyes; then
+ if test x"$boost_inc" != x; then
+ boost_cv_inc_path=$boost_inc
+ fi
+ break 2
+ fi
+ done
+ done
+AC_LANG_POP([C++])dnl
+ ])
+ case $boost_cv_inc_path in #(
+ no)
+ boost_errmsg="cannot find Boost headers version >= $boost_version_req"
+ m4_if([$2], [], [AC_MSG_ERROR([$boost_errmsg])],
+ [AC_MSG_NOTICE([$boost_errmsg])])
+ $2
+ ;;#(
+ yes)
+ BOOST_CPPFLAGS=
+ AC_DEFINE([HAVE_BOOST], [1],
+ [Defined if the requested minimum BOOST version is satisfied])
+ ;;#(
+ *)
+ AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"])
+ ;;
+ esac
+ AC_CACHE_CHECK([for Boost's header version],
+ [boost_cv_lib_version],
+ [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl
+ _BOOST_SED_CPP([/^boost-lib-version = /{s///;s/\"//g;p;g;}],
+ [#include <boost/version.hpp>
+boost-lib-version = BOOST_LIB_VERSION],
+ [boost_cv_lib_version=`cat conftest.i`])])
+ # e.g. "134" for 1_34_1 or "135" for 1_35
+ boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'`
+ case $boost_major_version in #(
+ '' | *[[!0-9]]*)
+ AC_MSG_ERROR([invalid value: boost_major_version=$boost_major_version])
+ ;;
+ esac
+CPPFLAGS=$boost_save_CPPFLAGS
+])# BOOST_REQUIRE
+
+# BOOST_STATIC()
+# --------------
+# Add the "--enable-static-boost" configure argument. If this argument is given
+# on the command line, static versions of the libraries will be looked up.
+AC_DEFUN([BOOST_STATIC],
+ [AC_ARG_ENABLE([static-boost],
+ [AC_HELP_STRING([--enable-static-boost],
+ [Prefer the static boost libraries over the shared ones [no]])],
+ [enable_static_boost=yes],
+ [enable_static_boost=no])])# BOOST_STATIC
+
+# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND])
+# --------------------------------------------------------------------------
+# Wrapper around AC_CHECK_HEADER for Boost headers. Useful to check for
+# some parts of the Boost library which are only made of headers and don't
+# require linking (such as Boost.Foreach).
+#
+# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be
+# found in the first place, in which case by default a notice is issued to the
+# user. Presumably if we haven't died already it's because it's OK to not have
+# Boost, which is why only a notice is issued instead of a hard error.
+#
+# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_<HEADER-NAME> in
+# case of success # (where HEADER-NAME is written LIKE_THIS, e.g.,
+# HAVE_BOOST_FOREACH_HPP).
+AC_DEFUN([BOOST_FIND_HEADER],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+if test x"$boost_cv_inc_path" = xno; then
+ m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])])
+else
+AC_LANG_PUSH([C++])dnl
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+AC_CHECK_HEADER([$1],
+ [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1],
+ [Define to 1 if you have <$1>])])],
+ [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])])
+CPPFLAGS=$boost_save_CPPFLAGS
+AC_LANG_POP([C++])dnl
+fi
+])# BOOST_FIND_HEADER
+
+
+# BOOST_FIND_LIB([LIB-NAME], [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST],
+# [CXX-PROLOGUE])
+# -------------------------------------------------------------------------
+# Look for the Boost library LIB-NAME (e.g., LIB-NAME = `thread', for
+# libboost_thread). Check that HEADER-NAME works and check that
+# libboost_LIB-NAME can link with the code CXX-TEST. The optional argument
+# CXX-PROLOGUE can be used to include some C++ code before the `main'
+# function.
+#
+# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above).
+#
+# Boost libraries typically come compiled with several flavors (with different
+# runtime options) so PREFERRED-RT-OPT is the preferred suffix. A suffix is one
+# or more of the following letters: sgdpn (in that order). s = static
+# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build,
+# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n'
+# must always be used along with `p'). Additionally, PREFERRED-RT-OPT can
+# start with `mt-' to indicate that there is a preference for multi-thread
+# builds. Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp
+# ... If you want to make sure you have a specific version of Boost
+# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro.
+AC_DEFUN([BOOST_FIND_LIB],
+[AC_REQUIRE([BOOST_REQUIRE])dnl
+AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl
+AC_REQUIRE([BOOST_STATIC])dnl
+AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl
+if test x"$boost_cv_inc_path" = xno; then
+ AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library])
+else
+dnl The else branch is huge and wasn't intended on purpose.
+AC_LANG_PUSH([C++])dnl
+AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl
+AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl
+AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl
+BOOST_FIND_HEADER([$3])
+boost_save_CPPFLAGS=$CPPFLAGS
+CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
+# Now let's try to find the library. The algorithm is as follows: first look
+# for a given library name according to the user's PREFERRED-RT-OPT. For each
+# library name, we prefer to use the ones that carry the tag (toolset name).
+# Each library is searched through the various standard paths were Boost is
+# usually installed. If we can't find the standard variants, we try to
+# enforce -mt (for instance on MacOSX, libboost_threads.dylib doesn't exist
+# but there's -obviously- libboost_threads-mt.dylib).
+AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib],
+ [Boost_lib=no
+ case "$2" in #(
+ mt | mt-) boost_mt=-mt; boost_rtopt=;; #(
+ mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$2" : 'Xmt-*\(.*\)'`;; #(
+ *) boost_mt=; boost_rtopt=$2;;
+ esac
+ if test $enable_static_boost = yes; then
+ boost_rtopt="s$boost_rtopt"
+ fi
+ # Find the proper debug variant depending on what we've been asked to find.
+ case $boost_rtopt in #(
+ *d*) boost_rt_d=$boost_rtopt;; #(
+ *[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn')
+ boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #(
+ *) boost_rt_d='-d';;
+ esac
+ # If the PREFERRED-RT-OPT are not empty, prepend a `-'.
+ test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt"
+ $boost_guess_use_mt && boost_mt=-mt
+ # Look for the abs path the static archive.
+ # $libext is computed by Libtool but let's make sure it's non empty.
+ test -z "$libext" &&
+ AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?])
+ boost_save_ac_objext=$ac_objext
+ # Generate the test file.
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$3>
+$5], [$4])])
+dnl Optimization hacks: compiling C++ is slow, especially with Boost. What
+dnl we're trying to do here is guess the right combination of link flags
+dnl (LIBS / LDFLAGS) to use a given library. This can take several
+dnl iterations before it succeeds and is thus *very* slow. So what we do
+dnl instead is that we compile the code first (and thus get an object file,
+dnl typically conftest.o). Then we try various combinations of link flags
+dnl until we succeed to link conftest.o in an executable. The problem is
+dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always
+dnl remove all the temporary files including conftest.o. So the trick here
+dnl is to temporarily change the value of ac_objext so that conftest.o is
+dnl preserved accross tests. This is obviously fragile and I will burn in
+dnl hell for not respecting Autoconf's documented interfaces, but in the
+dnl mean time, it optimizes the macro by a factor of 5 to 30.
+dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left
+dnl empty because the test file is generated only once above (before we
+dnl start the for loops).
+ AC_COMPILE_IFELSE([],
+ [ac_objext=do_not_rm_me_plz],
+ [AC_MSG_ERROR([cannot compile a test that uses Boost $1])])
+ ac_objext=$boost_save_ac_objext
+ boost_failed_libs=
+# Don't bother to ident the 6 nested for loops, only the 2 innermost ones
+# matter.
+for boost_tag_ in -$boost_cv_lib_tag ''; do
+for boost_ver_ in -$boost_cv_lib_version ''; do
+for boost_mt_ in $boost_mt -mt ''; do
+for boost_rtopt_ in $boost_rtopt '' -d; do
+ for boost_lib in \
+ boost_$1$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \
+ boost_$1$boost_tag_$boost_rtopt_$boost_ver_ \
+ boost_$1$boost_tag_$boost_mt_$boost_ver_ \
+ boost_$1$boost_tag_$boost_ver_
+ do
+ # Avoid testing twice the same lib
+ case $boost_failed_libs in #(
+ *@$boost_lib@*) continue;;
+ esac
+ # If with_boost is empty, we'll search in /lib first, which is not quite
+ # right so instead we'll try to a location based on where the headers are.
+ boost_tmp_lib=$with_boost
+ test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include}
+ for boost_ldpath in "$boost_tmp_lib/lib" '' \
+ /opt/local/lib /usr/local/lib /opt/lib /usr/lib \
+ "$with_boost" C:/Boost/lib /lib /usr/lib64 /lib64
+ do
+ test -e "$boost_ldpath" || continue
+ boost_save_LDFLAGS=$LDFLAGS
+ # Are we looking for a static library?
+ case $boost_ldpath:$boost_rtopt_ in #(
+ *?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt)
+ Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext"
+ test -e "$Boost_lib_LIBS" || continue;; #(
+ *) # No: use -lboost_foo to find the shared library.
+ Boost_lib_LIBS="-l$boost_lib";;
+ esac
+ boost_save_LIBS=$LIBS
+ LIBS="$Boost_lib_LIBS $LIBS"
+ test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath"
+dnl First argument of AC_LINK_IFELSE left empty because the test file is
+dnl generated only once above (before we start the for loops).
+ _BOOST_AC_LINK_IFELSE([],
+ [Boost_lib=yes], [Boost_lib=no])
+ ac_objext=$boost_save_ac_objext
+ LDFLAGS=$boost_save_LDFLAGS
+ LIBS=$boost_save_LIBS
+ if test x"$Boost_lib" = xyes; then
+ Boost_lib_LDFLAGS="-L$boost_ldpath -R$boost_ldpath"
+ break 6
+ else
+ boost_failed_libs="$boost_failed_libs@$boost_lib@"
+ fi
+ done
+ done
+done
+done
+done
+done
+rm -f conftest.$ac_objext
+])
+case $Boost_lib in #(
+ no) _AC_MSG_LOG_CONFTEST
+ AC_MSG_ERROR([cannot not find the flags to link with Boost $1])
+ ;;
+esac
+AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS])
+AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS])
+CPPFLAGS=$boost_save_CPPFLAGS
+AS_VAR_POPDEF([Boost_lib])dnl
+AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl
+AS_VAR_POPDEF([Boost_lib_LIBS])dnl
+AC_LANG_POP([C++])dnl
+fi
+])# BOOST_FIND_LIB
+
+
+# --------------------------------------- #
+# Checks for the various Boost libraries. #
+# --------------------------------------- #
+
+# List of boost libraries: http://www.boost.org/libs/libraries.htm
+# The page http://beta.boost.org/doc/libs is useful: it gives the first release
+# version of each library (among other things).
+
+# BOOST_ARRAY()
+# -------------
+# Look for Boost.Array
+AC_DEFUN([BOOST_ARRAY],
+[BOOST_FIND_HEADER([boost/array.hpp])])
+
+
+# BOOST_ASIO()
+# ------------
+# Look for Boost.Asio (new in Boost 1.35).
+AC_DEFUN([BOOST_ASIO],
+[AC_REQUIRE([BOOST_SYSTEM])dnl
+BOOST_FIND_HEADER([boost/asio.hpp])])
+
+
+# BOOST_BIND()
+# ------------
+# Look for Boost.Bind
+AC_DEFUN([BOOST_BIND],
+[BOOST_FIND_HEADER([boost/bind.hpp])])
+
+
+# BOOST_CONVERSION()
+# ------------------
+# Look for Boost.Conversion (cast / lexical_cast)
+AC_DEFUN([BOOST_CONVERSION],
+[BOOST_FIND_HEADER([boost/cast.hpp])
+BOOST_FIND_HEADER([boost/lexical_cast.hpp])
+])# BOOST_CONVERSION
+
+
+# BOOST_DATE_TIME([PREFERRED-RT-OPT])
+# -----------------------------------
+# Look for Boost.Date_Time. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_DATE_TIME],
+[BOOST_FIND_LIB([date_time], [$1],
+ [boost/date_time/posix_time/posix_time.hpp],
+ [boost::posix_time::ptime t;])
+])# BOOST_DATE_TIME
+
+
+# BOOST_FILESYSTEM([PREFERRED-RT-OPT])
+# ------------------------------------
+# Look for Boost.Filesystem. For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+# Do not check for boost/filesystem.hpp because this file was introduced in
+# 1.34.
+AC_DEFUN([BOOST_FILESYSTEM],
+[# Do we have to check for Boost.System? This link-time dependency was
+# added as of 1.35.0. If we have a version <1.35, we must not attempt to
+# find Boost.System as it didn't exist by then.
+if test $boost_major_version -ge 135; then
+BOOST_SYSTEM([$1])
+fi # end of the Boost.System check.
+boost_filesystem_save_LIBS=$LIBS
+boost_filesystem_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS"
+BOOST_FIND_LIB([filesystem], [$1],
+ [boost/filesystem/path.hpp], [boost::filesystem::path p;])
+LIBS=$boost_filesystem_save_LIBS
+LDFLAGS=$boost_filesystem_save_LDFLAGS
+])# BOOST_FILESYSTEM
+
+
+# BOOST_FOREACH()
+# ---------------
+# Look for Boost.Foreach
+AC_DEFUN([BOOST_FOREACH],
+[BOOST_FIND_HEADER([boost/foreach.hpp])])
+
+
+# BOOST_FORMAT()
+# --------------
+# Look for Boost.Format
+# Note: we can't check for boost/format/format_fwd.hpp because the header isn't
+# standalone. It can't be compiled because it triggers the following error:
+# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std'
+# does not name a type
+AC_DEFUN([BOOST_FORMAT],
+[BOOST_FIND_HEADER([boost/format.hpp])])
+
+
+# BOOST_FUNCTION()
+# ----------------
+# Look for Boost.Function
+AC_DEFUN([BOOST_FUNCTION],
+[BOOST_FIND_HEADER([boost/function.hpp])])
+
+
+# BOOST_GRAPH([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Graphs. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_GRAPH],
+[BOOST_FIND_LIB([graph], [$1],
+ [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;])
+])# BOOST_GRAPH
+
+
+# BOOST_IOSTREAMS([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.IOStreams. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_IOSTREAMS],
+[BOOST_FIND_LIB([iostreams], [$1],
+ [boost/iostreams/device/file_descriptor.hpp],
+ [boost::iostreams::file_descriptor fd(0); fd.close();])
+])# BOOST_IOSTREAMS
+
+
+# BOOST_HASH()
+# ------------
+# Look for Boost.Functional/Hash
+AC_DEFUN([BOOST_HASH],
+[BOOST_FIND_HEADER([boost/functional/hash.hpp])])
+
+
+# BOOST_LAMBDA()
+# --------------
+# Look for Boost.Lambda
+AC_DEFUN([BOOST_LAMBDA],
+[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])])
+
+
+# BOOST_MATH()
+# ------------
+# Look for Boost.Math
+# TODO: This library isn't header-only but it comes in multiple different
+# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99,
+# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1,
+# libboost_math_tr1f, libboost_math_tr1l). This macro must be fixed to do the
+# right thing anyway.
+AC_DEFUN([BOOST_MATH],
+[BOOST_FIND_HEADER([boost/math/special_functions.hpp])])
+
+
+# BOOST_MULTIARRAY()
+# ------------------
+# Look for Boost.MultiArray
+AC_DEFUN([BOOST_MULTIARRAY],
+[BOOST_FIND_HEADER([boost/multi_array.hpp])])
+
+
+# BOOST_NUMERIC_CONVERSION()
+# --------------------------
+# Look for Boost.NumericConversion (policy-based numeric conversion)
+AC_DEFUN([BOOST_NUMERIC_CONVERSION],
+[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp])
+])# BOOST_NUMERIC_CONVERSION
+
+
+# BOOST_OPTIONAL()
+# ----------------
+# Look for Boost.Optional
+AC_DEFUN([BOOST_OPTIONAL],
+[BOOST_FIND_HEADER([boost/optional.hpp])])
+
+
+# BOOST_PREPROCESSOR()
+# --------------------
+# Look for Boost.Preprocessor
+AC_DEFUN([BOOST_PREPROCESSOR],
+[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])])
+
+
+# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT])
+# -----------------------------------------
+# Look for Boost.Program_options. For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_PROGRAM_OPTIONS],
+[BOOST_FIND_LIB([program_options], [$1],
+ [boost/program_options.hpp],
+ [boost::program_options::options_description d("test");])
+])# BOOST_PROGRAM_OPTIONS
+
+
+# BOOST_REF()
+# -----------
+# Look for Boost.Ref
+AC_DEFUN([BOOST_REF],
+[BOOST_FIND_HEADER([boost/ref.hpp])])
+
+
+# BOOST_REGEX([PREFERRED-RT-OPT])
+# -------------------------------
+# Look for Boost.Regex. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_REGEX],
+[BOOST_FIND_LIB([regex], [$1],
+ [boost/regex.hpp],
+ [boost::regex exp("*"); boost::regex_match("foo", exp);])
+])# BOOST_REGEX
+
+
+# BOOST_SERIALIZATION([PREFERRED-RT-OPT])
+# ---------------------------------------
+# Look for Boost.Serialization. For the documentation of PREFERRED-RT-OPT, see
+# the documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_SERIALIZATION],
+[BOOST_FIND_LIB([serialization], [$1],
+ [boost/archive/text_oarchive.hpp],
+ [std::ostream* o = 0; // Cheap way to get an ostream...
+ boost::archive::text_oarchive t(*o);])
+])# BOOST_SIGNALS
+
+
+# BOOST_SIGNALS([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Signals. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_SIGNALS],
+[BOOST_FIND_LIB([signals], [$1],
+ [boost/signal.hpp],
+ [boost::signal<void ()> s;])
+])# BOOST_SIGNALS
+
+
+# BOOST_SMART_PTR()
+# -----------------
+# Look for Boost.SmartPtr
+AC_DEFUN([BOOST_SMART_PTR],
+[BOOST_FIND_HEADER([boost/scoped_ptr.hpp])
+BOOST_FIND_HEADER([boost/shared_ptr.hpp])
+])
+
+
+# BOOST_STATICASSERT()
+# --------------------
+# Look for Boost.StaticAssert
+AC_DEFUN([BOOST_STATICASSERT],
+[BOOST_FIND_HEADER([boost/static_assert.hpp])])
+
+
+# BOOST_STRING_ALGO()
+# -------------------
+# Look for Boost.StringAlgo
+AC_DEFUN([BOOST_STRING_ALGO],
+[BOOST_FIND_HEADER([boost/algorithm/string.hpp])
+])
+
+
+# BOOST_SYSTEM([PREFERRED-RT-OPT])
+# --------------------------------
+# Look for Boost.System. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above. This library was introduced in Boost
+# 1.35.0.
+AC_DEFUN([BOOST_SYSTEM],
+[BOOST_FIND_LIB([system], [$1],
+ [boost/system/error_code.hpp],
+ [boost::system::error_code e; e.clear();])
+])# BOOST_SYSTEM
+
+
+# BOOST_TEST([PREFERRED-RT-OPT])
+# ------------------------------
+# Look for Boost.Test. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_TEST],
+[m4_pattern_allow([^BOOST_CHECK$])dnl
+BOOST_FIND_LIB([unit_test_framework], [$1],
+ [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);],
+ [using boost::unit_test::test_suite;
+ test_suite* init_unit_test_suite(int argc, char ** argv)
+ { return NULL; }])
+])# BOOST_TEST
+
+
+# BOOST_THREADS([PREFERRED-RT-OPT])
+# ---------------------------------
+# Look for Boost.Thread. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+# FIXME: Provide an alias "BOOST_THREAD".
+AC_DEFUN([BOOST_THREADS],
+[dnl Having the pthread flag is required at least on GCC3 where
+dnl boost/thread.hpp would complain if we try to compile without
+dnl -pthread on GNU/Linux.
+AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl
+boost_threads_save_LIBS=$LIBS
+boost_threads_save_CPPFLAGS=$CPPFLAGS
+LIBS="$LIBS $boost_cv_pthread_flag"
+# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3,
+# boost/thread.hpp will trigger a #error if -pthread isn't used:
+# boost/config/requires_threads.hpp:47:5: #error "Compiler threading support
+# is not turned on. Please set the correct command line options for
+# threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)"
+CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag"
+BOOST_FIND_LIB([thread], [$1],
+ [boost/thread.hpp], [boost::thread t; boost::mutex m;])
+BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $boost_cv_pthread_flag"
+BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag"
+LIBS=$boost_threads_save_LIBS
+CPPFLAGS=$boost_threads_save_CPPFLAGS
+])# BOOST_THREADS
+
+
+# BOOST_TOKENIZER()
+# -----------------
+# Look for Boost.Tokenizer
+AC_DEFUN([BOOST_TOKENIZER],
+[BOOST_FIND_HEADER([boost/tokenizer.hpp])])
+
+
+# BOOST_TRIBOOL()
+# ---------------
+# Look for Boost.Tribool
+AC_DEFUN([BOOST_TRIBOOL],
+[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp])
+BOOST_FIND_HEADER([boost/logic/tribool.hpp])
+])
+
+
+# BOOST_TUPLE()
+# -------------
+# Look for Boost.Tuple
+AC_DEFUN([BOOST_TUPLE],
+[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])])
+
+
+# BOOST_TYPETRAITS()
+# --------------------
+# Look for Boost.TypeTraits
+AC_DEFUN([BOOST_TYPETRAITS],
+[BOOST_FIND_HEADER([boost/type_traits.hpp])])
+
+
+# BOOST_UTILITY()
+# ---------------
+# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom,
+# etc.)
+AC_DEFUN([BOOST_UTILITY],
+[BOOST_FIND_HEADER([boost/utility.hpp])])
+
+
+# BOOST_VARIANT()
+# ---------------
+# Look for Boost.Variant.
+AC_DEFUN([BOOST_VARIANT],
+[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp])
+BOOST_FIND_HEADER([boost/variant.hpp])])
+
+
+# BOOST_WAVE([PREFERRED-RT-OPT])
+# ------------------------------
+# NOTE: If you intend to use Wave/Spirit with thread support, make sure you
+# call BOOST_THREADS first.
+# Look for Boost.Wave. For the documentation of PREFERRED-RT-OPT, see the
+# documentation of BOOST_FIND_LIB above.
+AC_DEFUN([BOOST_WAVE],
+[AC_REQUIRE([BOOST_FILESYSTEM])dnl
+AC_REQUIRE([BOOST_DATE_TIME])dnl
+boost_wave_save_LIBS=$LIBS
+boost_wave_save_LDFLAGS=$LDFLAGS
+m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl
+LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS\
+$BOOST_THREAD_LIBS"
+LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS\
+$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS"
+BOOST_FIND_LIB([wave], [$1],
+ [boost/wave.hpp],
+ [boost::wave::token_id id; get_token_name(id);])
+LIBS=$boost_wave_save_LIBS
+LDFLAGS=$boost_wave_save_LDFLAGS
+])# BOOST_WAVE
+
+
+# BOOST_XPRESSIVE()
+# -----------------
+# Look for Boost.Xpressive (new since 1.36.0).
+AC_DEFUN([BOOST_XPRESSIVE],
+[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])])
+
+
+# ----------------- #
+# Internal helpers. #
+# ----------------- #
+
+
+# _BOOST_PTHREAD_FLAG()
+# ---------------------
+# Internal helper for BOOST_THREADS. Based on ACX_PTHREAD:
+# http://autoconf-archive.cryp.to/acx_pthread.html
+AC_DEFUN([_BOOST_PTHREAD_FLAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_LANG_PUSH([C++])dnl
+AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag],
+[ boost_cv_pthread_flag=
+ # The ordering *is* (sometimes) important. Some notes on the
+ # individual items follow:
+ # (none): in case threads are in libc; should be tried before -Kthread and
+ # other compiler flags to prevent continual compiler warnings
+ # -lpthreads: AIX (must check this before -lpthread)
+ # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+ # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+ # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+ # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads)
+ # -pthreads: Solaris/GCC
+ # -mthreads: MinGW32/GCC, Lynx/GCC
+ # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+ # doesn't hurt to check since this sometimes defines pthreads too;
+ # also defines -D_REENTRANT)
+ # ... -mt is also the pthreads flag for HP/aCC
+ # -lpthread: GNU Linux, etc.
+ # --thread-safe: KAI C++
+ case $host_os in #(
+ *solaris*)
+ # On Solaris (at least, for some versions), libc contains stubbed
+ # (non-functional) versions of the pthreads routines, so link-based
+ # tests will erroneously succeed. (We need to link with -pthreads/-mt/
+ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather
+ # a function called by this macro, so we could check for that, but
+ # who knows whether they'll stub that too in a future libc.) So,
+ # we'll just look for -pthreads and -lpthread first:
+ boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #(
+ *)
+ boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \
+ -pthreads -mthreads -lpthread --thread-safe -mt";;
+ esac
+ # Generate the test file.
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <pthread.h>],
+ [pthread_t th; pthread_join(th, 0);
+ pthread_attr_init(0); pthread_cleanup_push(0, 0);
+ pthread_create(0,0,0,0); pthread_cleanup_pop(0);])])
+ for boost_pthread_flag in '' $boost_pthread_flags; do
+ boost_pthread_ok=false
+dnl Re-use the test file already generated.
+ boost_pthreads__save_LIBS=$LIBS
+ LIBS="$LIBS $boost_pthread_flag"
+ AC_LINK_IFELSE([],
+ [if grep ".*$boost_pthread_flag" conftest.err; then
+ echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD
+ else
+ boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag
+ fi])
+ LIBS=$boost_pthreads__save_LIBS
+ $boost_pthread_ok && break
+ done
+])
+AC_LANG_POP([C++])dnl
+])# _BOOST_PTHREAD_FLAG
+
+
+# _BOOST_gcc_test(MAJOR, MINOR)
+# -----------------------------
+# Internal helper for _BOOST_FIND_COMPILER_TAG.
+m4_define([_BOOST_gcc_test],
+["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl
+
+
+# _BOOST_FIND_COMPILER_TAG()
+# --------------------------
+# Internal. When Boost is installed without --layout=system, each library
+# filename will hold a suffix that encodes the compiler used during the
+# build. The Boost build system seems to call this a `tag'.
+AC_DEFUN([_BOOST_FIND_COMPILER_TAG],
+[AC_REQUIRE([AC_PROG_CXX])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_CACHE_CHECK([for the toolset name used by Boost for $CXX], [boost_cv_lib_tag],
+[AC_LANG_PUSH([C++])dnl
+ boost_cv_lib_tag=unknown
+ # The following tests are mostly inspired by boost/config/auto_link.hpp
+ # The list is sorted to most recent/common to oldest compiler (in order
+ # to increase the likelihood of finding the right compiler with the
+ # least number of compilation attempt).
+ # Beware that some tests are sensible to the order (for instance, we must
+ # look for MinGW before looking for GCC3).
+ # I used one compilation test per compiler with a #error to recognize
+ # each compiler so that it works even when cross-compiling (let me know
+ # if you know a better approach).
+ # Known missing tags (known from Boost's tools/build/v2/tools/common.jam):
+ # como, edg, kcc, bck, mp, sw, tru, xlc
+ # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines
+ # the same defines as GCC's).
+ # TODO: Move the test on GCC 4.4 up once it's released.
+ for i in \
+ _BOOST_gcc_test(4, 3) \
+ _BOOST_gcc_test(4, 2) \
+ _BOOST_gcc_test(4, 1) \
+ _BOOST_gcc_test(4, 0) \
+ "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \
+ && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \
+ || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \
+ _BOOST_gcc_test(3, 4) \
+ _BOOST_gcc_test(3, 3) \
+ "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \
+ "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \
+ _BOOST_gcc_test(3, 2) \
+ "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \
+ _BOOST_gcc_test(3, 1) \
+ _BOOST_gcc_test(3, 0) \
+ "defined __BORLANDC__ @ bcb" \
+ "defined __ICC && (defined __unix || defined __unix__) @ il" \
+ "defined __ICL @ iw" \
+ "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \
+ _BOOST_gcc_test(4, 4) \
+ _BOOST_gcc_test(2, 95) \
+ "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \
+ "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \
+ "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \
+ "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8"
+ do
+ boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '`
+ boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'`
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if $boost_tag_test
+/* OK */
+#else
+# error $boost_tag_test
+#endif
+]])], [boost_cv_lib_tag=$boost_tag; break], [])
+ done
+AC_LANG_POP([C++])dnl
+ case $boost_cv_lib_tag in #(
+ # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed
+ # to "gcc41" for instance.
+ *-gcc | *'-gcc ') :;; #( Don't re-add -gcc: it's already in there.
+ gcc*)
+ boost_tag_x=
+ case $host_os in #(
+ darwin*)
+ if test $boost_major_version -ge 136; then
+ # The `x' added in r46793 of Boost.
+ boost_tag_x=x
+ fi;;
+ esac
+ # We can specify multiple tags in this variable because it's used by
+ # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ...
+ boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc"
+ ;; #(
+ unknown)
+ AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]])
+ boost_cv_lib_tag=
+ ;;
+ esac
+])dnl end of AC_CACHE_CHECK
+])# _BOOST_FIND_COMPILER_TAG
+
+
+# _BOOST_GUESS_WHETHER_TO_USE_MT()
+# --------------------------------
+# Compile a small test to try to guess whether we should favor MT (Multi
+# Thread) flavors of Boost. Sets boost_guess_use_mt accordingly.
+AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT],
+[# Check whether we do better use `mt' even though we weren't ask to.
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#if defined _REENTRANT || defined _MT || defined __MT__
+/* use -mt */
+#else
+# error MT not needed
+#endif
+]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false])
+])
+
+# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# -------------------------------------------------------------------
+# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls. Fragile,
+# will break when Autoconf changes its internals. Requires that you manually
+# rm -f conftest.$ac_objext in between to really different tests, otherwise
+# you will try to link a conftest.o left behind by a previous test.
+# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this
+# macro).
+#
+# Don't use "break" in the actions, as it would short-circuit some code
+# this macro runs after the actions.
+m4_define([_BOOST_AC_LINK_IFELSE],
+[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl
+rm -f conftest$ac_exeext
+boost_save_ac_ext=$ac_ext
+boost_use_source=:
+# If we already have a .o, re-use it. We change $ac_ext so that $ac_link
+# tries to link the existing object file instead of compiling from source.
+test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false &&
+ _AS_ECHO_LOG([re-using the existing conftest.$ac_objext])
+AS_IF([_AC_DO_STDERR($ac_link) && {
+ test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_executable_p conftest$ac_exeext
+dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough.
+ }],
+ [$2],
+ [if $boost_use_source; then
+ _AC_MSG_LOG_CONFTEST
+ fi
+ $3])
+ac_objext=$boost_save_ac_objext
+ac_ext=$boost_save_ac_ext
+dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization)
+dnl information created by the PGI compiler (conftest_ipa8_conftest.oo),
+dnl as it would interfere with the next link command.
+rm -f core conftest.err conftest_ipa8_conftest.oo \
+ conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl
+])# _BOOST_AC_LINK_IFELSE
+
+# Local Variables:
+# mode: autoconf
+# End:
diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am
new file mode 100644
index 00000000..ebb016db
--- /dev/null
+++ b/gi/clda/src/Makefile.am
@@ -0,0 +1,6 @@
+bin_PROGRAMS = clda
+
+clda_SOURCES = clda.cc
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS)
+AM_LDFLAGS = -lz
diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc
new file mode 100644
index 00000000..49702df3
--- /dev/null
+++ b/gi/clda/src/clda.cc
@@ -0,0 +1,140 @@
+#include <iostream>
+#include <vector>
+#include <map>
+
+#include "timer.h"
+#include "crp.h"
+#include "sampler.h"
+#include "tdict.h"
+Dict TD::dict_;
+std::string TD::empty = "";
+std::string TD::space = " ";
+
+using namespace std;
+
+void ShowTopWords(const map<WordID, int>& counts) {
+ multimap<int, WordID> ms;
+ for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it)
+ ms.insert(make_pair(it->second, it->first));
+}
+
+int main(int argc, char** argv) {
+ if (argc != 2) {
+ cerr << "Usage: " << argv[0] << " num-classes\n";
+ return 1;
+ }
+ const int num_classes = atoi(argv[1]);
+ if (num_classes < 2) {
+ cerr << "Must request more than 1 class\n";
+ return 1;
+ }
+ cerr << "CLASSES: " << num_classes << endl;
+ char* buf = new char[800000];
+ vector<vector<int> > wji; // w[j][i] - observed word i of doc j
+ vector<vector<int> > zji; // z[j][i] - topic assignment for word i of doc j
+ cerr << "READING DOCUMENTS\n";
+ while(cin) {
+ cin.getline(buf, 800000);
+ if (buf[0] == 0) continue;
+ wji.push_back(vector<WordID>());
+ TD::ConvertSentence(buf, &wji.back());
+ }
+ cerr << "READ " << wji.size() << " DOCUMENTS\n";
+ MT19937 rng;
+ cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n";
+ zji.resize(wji.size());
+ double beta = 0.01;
+ double alpha = 0.001;
+ vector<CRP<int> > dr(zji.size(), CRP<int>(beta)); // dr[i] describes the probability of using a topic in document i
+ vector<CRP<int> > wr(num_classes, CRP<int>(alpha)); // wr[k] describes the probability of generating a word in topic k
+ int random_topic = rng.next() * num_classes;
+ for (int j = 0; j < zji.size(); ++j) {
+ const size_t num_words = wji[j].size();
+ vector<int>& zj = zji[j];
+ const vector<int>& wj = wji[j];
+ zj.resize(num_words);
+ for (int i = 0; i < num_words; ++i) {
+ if (random_topic == num_classes) { --random_topic; }
+ zj[i] = random_topic;
+ const int word = wj[i];
+ dr[j].increment(random_topic);
+ wr[random_topic].increment(word);
+ }
+ }
+ cerr << "SAMPLING\n";
+ vector<map<WordID, int> > t2w(num_classes);
+ const int num_iterations = 1000;
+ const int burnin_size = 800;
+ bool needline = false;
+ Timer timer;
+ SampleSet ss;
+ ss.resize(num_classes);
+ double total_time = 0;
+ for (int iter = 0; iter < num_iterations; ++iter) {
+ if (iter && iter % 10 == 0) {
+ total_time += timer.Elapsed();
+ timer.Reset();
+ cerr << '.'; needline=true;
+ prob_t lh = prob_t::One();
+ for (int j = 0; j < zji.size(); ++j) {
+ const size_t num_words = wji[j].size();
+ vector<int>& zj = zji[j];
+ const vector<int>& wj = wji[j];
+ for (int i = 0; i < num_words; ++i) {
+ const int word = wj[i];
+ const int cur_topic = zj[i];
+ lh *= dr[j].prob(cur_topic);
+ lh *= wr[cur_topic].prob(word);
+ if (iter > burnin_size) {
+ ++t2w[cur_topic][word];
+ }
+ }
+ }
+ if (iter && iter % 200 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 200) << " LLH=" << log(lh) << "]\n"; needline=false; total_time=0; }
+ //cerr << "ITERATION " << iter << " LOG LIKELIHOOD: " << log(lh) << endl;
+ }
+ for (int j = 0; j < zji.size(); ++j) {
+ const size_t num_words = wji[j].size();
+ vector<int>& zj = zji[j];
+ const vector<int>& wj = wji[j];
+ for (int i = 0; i < num_words; ++i) {
+ const int word = wj[i];
+ const int cur_topic = zj[i];
+ dr[j].decrement(cur_topic);
+ wr[cur_topic].decrement(word);
+
+ for (int k = 0; k < num_classes; ++k) {
+ ss[k]= dr[j].prob(k) * wr[k].prob(word);
+ }
+ const int new_topic = rng.SelectSample(ss);
+ dr[j].increment(new_topic);
+ wr[new_topic].increment(word);
+ zj[i] = new_topic;
+ }
+ }
+ }
+ if (needline) cerr << endl;
+ for (int j = 0; j < zji.size(); ++j) {
+ const size_t num_words = wji[j].size();
+ vector<int>& zj = zji[j];
+ const vector<int>& wj = wji[j];
+ zj.resize(num_words);
+ for (int i = 0; i < num_words; ++i) {
+ cout << TD::Convert(wj[i]) << '(' << zj[i] << ") ";
+ }
+ cout << endl;
+ }
+ for (int i = 0; i < num_classes; ++i) {
+ ShowTopWords(t2w[i]);
+ }
+ for (map<int,int>::iterator it = t2w[0].begin(); it != t2w[0].end(); ++it)
+ cerr << TD::Convert(it->first) << " " << it->second << endl;
+ cerr << "---------------------------------\n";
+ for (map<int,int>::iterator it = t2w[1].begin(); it != t2w[1].end(); ++it)
+ cerr << TD::Convert(it->first) << " " << it->second << endl;
+ cerr << "---------------------------------\n";
+ for (map<int,int>::iterator it = t2w[2].begin(); it != t2w[2].end(); ++it)
+ cerr << TD::Convert(it->first) << " " << it->second << endl;
+ return 0;
+}
+
diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h
new file mode 100644
index 00000000..13596cbf
--- /dev/null
+++ b/gi/clda/src/crp.h
@@ -0,0 +1,216 @@
+#ifndef _CRP_H_
+#define _CRP_H_
+
+// shamelessly adapted from code by Phil Blunsom and Trevor Cohn
+// There are TWO CRP classes here: CRPWithTableTracking tracks the
+// (expected) number of customers per table, and CRP just tracks
+// the number of customers / dish.
+// If you are implementing a HDP model, you should use CRP for the
+// base distribution and CRPWithTableTracking for the dependent
+// distribution.
+
+#include <iostream>
+#include <map>
+#include <boost/functional/hash.hpp>
+#include <tr1/unordered_map>
+
+#include "prob.h"
+#include "sampler.h" // RNG
+
+template <typename DishType, typename Hash = boost::hash<DishType> >
+class CRP {
+ public:
+ CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {}
+ void increment(const DishType& dish);
+ void decrement(const DishType& dish);
+ void erase(const DishType& dish) {
+ counts_.erase(dish);
+ }
+ inline int count(const DishType& dish) const {
+ const typename MapType::const_iterator i = counts_.find(dish);
+ if (i == counts_.end()) return 0; else return i->second;
+ }
+ inline prob_t prob(const DishType& dish) const {
+ return (prob_t(count(dish) + alpha_)) / prob_t(total_customers_ + alpha_);
+ }
+ inline prob_t prob(const DishType& dish, const prob_t& p0) const {
+ return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_);
+ }
+ private:
+ typedef std::tr1::unordered_map<DishType, int, Hash> MapType;
+ MapType counts_;
+ const double alpha_;
+ const prob_t palpha_;
+ int total_customers_;
+};
+
+template <typename Dish, typename Hash>
+void CRP<Dish,Hash>::increment(const Dish& dish) {
+ ++counts_[dish];
+ ++total_customers_;
+}
+
+template <typename Dish, typename Hash>
+void CRP<Dish,Hash>::decrement(const Dish& dish) {
+ typename MapType::iterator i = counts_.find(dish);
+ assert(i != counts_.end());
+ if (--i->second == 0)
+ counts_.erase(i);
+ --total_customers_;
+}
+
+template <typename DishType, typename Hash = boost::hash<DishType>, typename RNG = MT19937>
+class CRPWithTableTracking {
+ public:
+ CRPWithTableTracking(double alpha, RNG* rng) :
+ alpha_(alpha), palpha_(alpha), total_customers_(),
+ total_tables_(), rng_(rng) {}
+
+ // seat a customer for dish d, returns the delta in tables
+ // with customers
+ int increment(const DishType& d, const prob_t& p0 = prob_t::One());
+ int decrement(const DishType& d);
+ void erase(const DishType& dish);
+
+ inline int count(const DishType& dish) const {
+ const typename MapType::const_iterator i = counts_.find(dish);
+ if (i == counts_.end()) return 0; else return i->second.count_;
+ }
+ inline prob_t prob(const DishType& dish) const {
+ return (prob_t(count(dish) + alpha_)) / prob_t(total_customers_ + alpha_);
+ }
+ inline prob_t prob(const DishType& dish, const prob_t& p0) const {
+ return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_);
+ }
+ private:
+ struct TableInfo {
+ TableInfo() : count_(), tables_() {}
+ int count_; // total customers eating dish
+ int tables_; // total tables labeled with dish
+ std::map<int, int> table_histogram_; // num customers at table -> number tables
+ };
+ typedef std::tr1::unordered_map<DishType, TableInfo, Hash> MapType;
+
+ inline prob_t prob_share_table(const double& customer_count) const {
+ if (customer_count)
+ return prob_t(customer_count) / prob_t(customer_count + alpha_);
+ else
+ return prob_t::Zero();
+ }
+ inline prob_t prob_new_table(const double& customer_count, const prob_t& p0) const {
+ if (customer_count)
+ return palpha_ * p0 / prob_t(customer_count + alpha_);
+ else
+ return p0;
+ }
+
+ MapType counts_;
+ const double alpha_;
+ const prob_t palpha_;
+ int total_customers_;
+ int total_tables_;
+ RNG* rng_;
+};
+
+template <typename Dish, typename Hash, typename RNG>
+int CRPWithTableTracking<Dish,Hash,RNG>::increment(const Dish& dish, const prob_t& p0) {
+ TableInfo& tc = counts_[dish];
+
+ //std::cerr << "\nincrement for " << dish << " with p0 " << p0 << "\n";
+ //std::cerr << "\tBEFORE histogram: " << tc.table_histogram_ << " ";
+ //std::cerr << "count: " << tc.count_ << " ";
+ //std::cerr << "tables: " << tc.tables_ << "\n";
+
+ // seated at a new or existing table?
+ prob_t pshare = prob_share_table(tc.count_);
+ prob_t pnew = prob_new_table(tc.count_, p0);
+
+ //std::cerr << "\t\tP0 " << p0 << " count(dish) " << count(dish)
+ // << " tables " << tc
+ // << " p(share) " << pshare << " p(new) " << pnew << "\n";
+
+ int delta = 0;
+ if (tc.count_ == 0 || rng_->SelectSample(pshare, pnew) == 1) {
+ // assign to a new table
+ ++tc.tables_;
+ ++tc.table_histogram_[1];
+ ++total_tables_;
+ delta = 1;
+ } else {
+ // can't share a table if there are no other customers
+ assert(tc.count_ > 0);
+
+ // randomly assign to an existing table
+ // remove constant denominator from inner loop
+ int r = static_cast<int>(rng_->next() * tc.count_);
+ for (std::map<int,int>::iterator hit = tc.table_histogram_.begin();
+ hit != tc.table_histogram_.end(); ++hit) {
+ r -= hit->first * hit->second;
+ if (r <= 0) {
+ ++tc.table_histogram_[hit->first+1];
+ --hit->second;
+ if (hit->second == 0)
+ tc.table_histogram_.erase(hit);
+ break;
+ }
+ }
+ if (r > 0) {
+ std::cerr << "CONSISTENCY ERROR: " << tc.count_ << std::endl;
+ std::cerr << pshare << std::endl;
+ std::cerr << pnew << std::endl;
+ std::cerr << r << std::endl;
+ abort();
+ }
+ }
+ ++tc.count_;
+ ++total_customers_;
+ return delta;
+}
+
+template <typename Dish, typename Hash, typename RNG>
+int CRPWithTableTracking<Dish,Hash,RNG>::decrement(const Dish& dish) {
+ typename MapType::iterator i = counts_.find(dish);
+ if(i == counts_.end()) {
+ std::cerr << "MISSING DISH: " << dish << std::endl;
+ abort();
+ }
+
+ int delta = 0;
+ TableInfo &tc = i->second;
+
+ //std::cout << "\ndecrement for " << dish << " with p0 " << p0 << "\n";
+ //std::cout << "\tBEFORE histogram: " << tc.table_histogram << " ";
+ //std::cout << "count: " << count(dish) << " ";
+ //std::cout << "tables: " << tc.tables << "\n";
+
+ int r = static_cast<int>(rng_->next() * tc.count_);
+ //std::cerr << "FOO: " << r << std::endl;
+ for (std::map<int,int>::iterator hit = tc.table_histogram_.begin();
+ hit != tc.table_histogram_.end(); ++hit) {
+ r -= (hit->first * hit->second);
+ if (r <= 0) {
+ if (hit->first > 1)
+ tc.table_histogram_[hit->first-1] += 1;
+ else {
+ --delta;
+ --tc.tables_;
+ --total_tables_;
+ }
+
+ --hit->second;
+ if (hit->second == 0) tc.table_histogram_.erase(hit);
+ break;
+ }
+ }
+
+ assert(r <= 0);
+
+ // remove the customer
+ --tc.count_;
+ --total_customers_;
+ assert(tc.count_ >= 0);
+ if (tc.count_ == 0) counts_.erase(i);
+ return delta;
+}
+
+#endif
diff --git a/gi/clda/src/dict.h b/gi/clda/src/dict.h
new file mode 100644
index 00000000..72e82e6d
--- /dev/null
+++ b/gi/clda/src/dict.h
@@ -0,0 +1,43 @@
+#ifndef DICT_H_
+#define DICT_H_
+
+#include <cassert>
+#include <cstring>
+#include <tr1/unordered_map>
+#include <string>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "wordid.h"
+
+class Dict {
+ typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map;
+ public:
+ Dict() : b0_("<bad0>") { words_.reserve(1000); }
+ inline int max() const { return words_.size(); }
+ inline WordID Convert(const std::string& word, bool frozen = false) {
+ Map::iterator i = d_.find(word);
+ if (i == d_.end()) {
+ if (frozen)
+ return 0;
+ words_.push_back(word);
+ d_[word] = words_.size();
+ return words_.size();
+ } else {
+ return i->second;
+ }
+ }
+ inline const std::string& Convert(const WordID& id) const {
+ if (id == 0) return b0_;
+ assert(id <= words_.size());
+ return words_[id-1];
+ }
+ void clear() { words_.clear(); d_.clear(); }
+ private:
+ const std::string b0_;
+ std::vector<std::string> words_;
+ Map d_;
+};
+
+#endif
diff --git a/gi/clda/src/logval.h b/gi/clda/src/logval.h
new file mode 100644
index 00000000..7099b9be
--- /dev/null
+++ b/gi/clda/src/logval.h
@@ -0,0 +1,157 @@
+#ifndef LOGVAL_H_
+#define LOGVAL_H_
+
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <limits>
+
+template <typename T>
+class LogVal {
+ public:
+ LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {}
+ explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+ static LogVal<T> One() { return LogVal(1); }
+ static LogVal<T> Zero() { return LogVal(); }
+
+ void logeq(const T& v) { s_ = false; v_ = v; }
+
+ LogVal& operator+=(const LogVal& a) {
+ if (a.v_ == -std::numeric_limits<T>::infinity()) return *this;
+ if (a.s_ == s_) {
+ if (a.v_ < v_) {
+ v_ = v_ + log1p(std::exp(a.v_ - v_));
+ } else {
+ v_ = a.v_ + log1p(std::exp(v_ - a.v_));
+ }
+ } else {
+ if (a.v_ < v_) {
+ v_ = v_ + log1p(-std::exp(a.v_ - v_));
+ } else {
+ v_ = a.v_ + log1p(-std::exp(v_ - a.v_));
+ s_ = !s_;
+ }
+ }
+ return *this;
+ }
+
+ LogVal& operator*=(const LogVal& a) {
+ s_ = (s_ != a.s_);
+ v_ += a.v_;
+ return *this;
+ }
+
+ LogVal& operator/=(const LogVal& a) {
+ s_ = (s_ != a.s_);
+ v_ -= a.v_;
+ return *this;
+ }
+
+ LogVal& operator-=(const LogVal& a) {
+ LogVal b = a;
+ b.invert();
+ return *this += b;
+ }
+
+ LogVal& poweq(const T& power) {
+ if (s_) {
+ std::cerr << "poweq(T) not implemented when s_ is true\n";
+ std::abort();
+ } else {
+ v_ *= power;
+ }
+ return *this;
+ }
+
+ void invert() { s_ = !s_; }
+
+ LogVal pow(const T& power) const {
+ LogVal res = *this;
+ res.poweq(power);
+ return res;
+ }
+
+ operator T() const {
+ if (s_) return -std::exp(v_); else return std::exp(v_);
+ }
+
+ bool s_;
+ T v_;
+};
+
+template<typename T>
+LogVal<T> operator+(const LogVal<T>& o1, const LogVal<T>& o2) {
+ LogVal<T> res(o1);
+ res += o2;
+ return res;
+}
+
+template<typename T>
+LogVal<T> operator*(const LogVal<T>& o1, const LogVal<T>& o2) {
+ LogVal<T> res(o1);
+ res *= o2;
+ return res;
+}
+
+template<typename T>
+LogVal<T> operator/(const LogVal<T>& o1, const LogVal<T>& o2) {
+ LogVal<T> res(o1);
+ res /= o2;
+ return res;
+}
+
+template<typename T>
+LogVal<T> operator-(const LogVal<T>& o1, const LogVal<T>& o2) {
+ LogVal<T> res(o1);
+ res -= o2;
+ return res;
+}
+
+template<typename T>
+T log(const LogVal<T>& o) {
+ if (o.s_) return log(-1.0);
+ return o.v_;
+}
+
+template <typename T>
+LogVal<T> pow(const LogVal<T>& b, const T& e) {
+ return b.pow(e);
+}
+
+template <typename T>
+bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ if (lhs.s_ == rhs.s_) {
+ return (lhs.v_ < rhs.v_);
+ } else {
+ return lhs.s_ > rhs.s_;
+ }
+}
+
+#if 0
+template <typename T>
+bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ return (lhs.v_ <= rhs.v_);
+}
+
+template <typename T>
+bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ return (lhs.v_ > rhs.v_);
+}
+
+template <typename T>
+bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ return (lhs.v_ >= rhs.v_);
+}
+#endif
+
+template <typename T>
+bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_);
+}
+
+template <typename T>
+bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
+ return !(lhs == rhs);
+}
+
+#endif
diff --git a/gi/clda/src/prob.h b/gi/clda/src/prob.h
new file mode 100644
index 00000000..bc297870
--- /dev/null
+++ b/gi/clda/src/prob.h
@@ -0,0 +1,8 @@
+#ifndef _PROB_H_
+#define _PROB_H_
+
+#include "logval.h"
+
+typedef LogVal<double> prob_t;
+
+#endif
diff --git a/gi/clda/src/sampler.h b/gi/clda/src/sampler.h
new file mode 100644
index 00000000..4d0b2e64
--- /dev/null
+++ b/gi/clda/src/sampler.h
@@ -0,0 +1,138 @@
+#ifndef SAMPLER_H_
+#define SAMPLER_H_
+
+#include <algorithm>
+#include <functional>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real.hpp>
+#include <boost/random/variate_generator.hpp>
+#include <boost/random/normal_distribution.hpp>
+#include <boost/random/poisson_distribution.hpp>
+
+#include "prob.h"
+
+struct SampleSet;
+
+template <typename RNG>
+struct RandomNumberGenerator {
+ static uint32_t GetTrulyRandomSeed() {
+ uint32_t seed;
+ std::ifstream r("/dev/urandom");
+ if (r) {
+ r.read((char*)&seed,sizeof(uint32_t));
+ }
+ if (r.fail() || !r) {
+ std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl;
+ seed = time(NULL);
+ }
+ std::cerr << "Seeding random number sequence to " << seed << std::endl;
+ return seed;
+ }
+
+ RandomNumberGenerator() : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
+ uint32_t seed = GetTrulyRandomSeed();
+ m_generator.seed(seed);
+ }
+ explicit RandomNumberGenerator(uint32_t seed) : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
+ if (!seed) seed = GetTrulyRandomSeed();
+ m_generator.seed(seed);
+ }
+
+ size_t SelectSample(const prob_t& a, const prob_t& b, double T = 1.0) {
+ if (T == 1.0) {
+ if (this->next() > (a / (a + b))) return 1; else return 0;
+ } else {
+ assert(!"not implemented");
+ }
+ }
+
+ // T is the annealing temperature, if desired
+ size_t SelectSample(const SampleSet& ss, double T = 1.0);
+
+ // draw a value from U(0,1)
+ double next() {return m_random();}
+
+ // draw a value from N(mean,var)
+ double NextNormal(double mean, double var) {
+ return boost::normal_distribution<double>(mean, var)(m_random);
+ }
+
+ // draw a value from a Poisson distribution
+ // lambda must be greater than 0
+ int NextPoisson(int lambda) {
+ return boost::poisson_distribution<int>(lambda)(m_random);
+ }
+
+ bool AcceptMetropolisHastings(const prob_t& p_cur,
+ const prob_t& p_prev,
+ const prob_t& q_cur,
+ const prob_t& q_prev) {
+ const prob_t a = (p_cur / p_prev) * (q_prev / q_cur);
+ if (log(a) >= 0.0) return true;
+ return (prob_t(this->next()) < a);
+ }
+
+ private:
+ boost::uniform_real<> m_dist;
+ RNG m_generator;
+ boost::variate_generator<RNG&, boost::uniform_real<> > m_random;
+};
+
+typedef RandomNumberGenerator<boost::mt19937> MT19937;
+
+class SampleSet {
+ public:
+ const prob_t& operator[](int i) const { return m_scores[i]; }
+ prob_t& operator[](int i) { return m_scores[i]; }
+ bool empty() const { return m_scores.empty(); }
+ void add(const prob_t& s) { m_scores.push_back(s); }
+ void clear() { m_scores.clear(); }
+ size_t size() const { return m_scores.size(); }
+ void resize(int size) { m_scores.resize(size); }
+ std::vector<prob_t> m_scores;
+};
+
+template <typename RNG>
+size_t RandomNumberGenerator<RNG>::SelectSample(const SampleSet& ss, double T) {
+ assert(T > 0.0);
+ assert(ss.m_scores.size() > 0);
+ if (ss.m_scores.size() == 1) return 0;
+ const prob_t annealing_factor(1.0 / T);
+ const bool anneal = (annealing_factor != prob_t::One());
+ prob_t sum = prob_t::Zero();
+ if (anneal) {
+ for (int i = 0; i < ss.m_scores.size(); ++i)
+ sum += ss.m_scores[i].pow(annealing_factor); // p^(1/T)
+ } else {
+ sum = std::accumulate(ss.m_scores.begin(), ss.m_scores.end(), prob_t::Zero());
+ }
+ //for (size_t i = 0; i < ss.m_scores.size(); ++i) std::cerr << ss.m_scores[i] << ",";
+ //std::cerr << std::endl;
+
+ prob_t random(this->next()); // random number between 0 and 1
+ random *= sum; // scale with normalization factor
+ //std::cerr << "Random number " << random << std::endl;
+
+ //now figure out which sample
+ size_t position = 1;
+ sum = ss.m_scores[0];
+ if (anneal) {
+ sum.poweq(annealing_factor);
+ for (; position < ss.m_scores.size() && sum < random; ++position)
+ sum += ss.m_scores[position].pow(annealing_factor);
+ } else {
+ for (; position < ss.m_scores.size() && sum < random; ++position)
+ sum += ss.m_scores[position];
+ }
+ //std::cout << "random: " << random << " sample: " << position << std::endl;
+ //std::cerr << "Sample: " << position-1 << std::endl;
+ //exit(1);
+ return position-1;
+}
+
+#endif
diff --git a/gi/clda/src/tdict.h b/gi/clda/src/tdict.h
new file mode 100644
index 00000000..97f145a1
--- /dev/null
+++ b/gi/clda/src/tdict.h
@@ -0,0 +1,49 @@
+#ifndef _TDICT_H_
+#define _TDICT_H_
+
+#include <string>
+#include <vector>
+#include "wordid.h"
+#include "dict.h"
+
+class Vocab;
+
+struct TD {
+
+ static Dict dict_;
+ static std::string empty;
+ static std::string space;
+
+ static std::string GetString(const std::vector<WordID>& str) {
+ std::string res;
+ for (std::vector<WordID>::const_iterator i = str.begin(); i != str.end(); ++i)
+ res += (i == str.begin() ? empty : space) + TD::Convert(*i);
+ return res;
+ }
+
+ static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids) {
+ std::string s = sent;
+ int last = 0;
+ ids->clear();
+ for (int i=0; i < s.size(); ++i)
+ if (s[i] == 32 || s[i] == '\t') {
+ s[i]=0;
+ if (last != i) {
+ ids->push_back(Convert(&s[last]));
+ }
+ last = i + 1;
+ }
+ if (last != s.size())
+ ids->push_back(Convert(&s[last]));
+ }
+
+ static WordID Convert(const std::string& s) {
+ return dict_.Convert(s);
+ }
+
+ static const std::string& Convert(const WordID& w) {
+ return dict_.Convert(w);
+ }
+};
+
+#endif
diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h
new file mode 100644
index 00000000..ca26b304
--- /dev/null
+++ b/gi/clda/src/timer.h
@@ -0,0 +1,18 @@
+#ifndef _TIMER_STATS_H_
+#define _TIMER_STATS_H_
+
+struct Timer {
+ Timer() { Reset(); }
+ void Reset() {
+ start_t = clock();
+ }
+ double Elapsed() const {
+ const clock_t end_t = clock();
+ const double elapsed = (end_t - start_t) / 1000000.0;
+ return elapsed;
+ }
+ private:
+ clock_t start_t;
+};
+
+#endif
diff --git a/gi/clda/src/wordid.h b/gi/clda/src/wordid.h
new file mode 100644
index 00000000..fb50bcc1
--- /dev/null
+++ b/gi/clda/src/wordid.h
@@ -0,0 +1,6 @@
+#ifndef _WORD_ID_H_
+#define _WORD_ID_H_
+
+typedef int WordID;
+
+#endif