diff options
| -rw-r--r-- | gi/clda/Makefile.am | 5 | ||||
| -rw-r--r-- | gi/clda/configure.ac | 17 | ||||
| -rw-r--r-- | gi/clda/m4/boost.m4 | 1035 | ||||
| -rw-r--r-- | gi/clda/src/Makefile.am | 6 | ||||
| -rw-r--r-- | gi/clda/src/clda.cc | 140 | ||||
| -rw-r--r-- | gi/clda/src/crp.h | 216 | ||||
| -rw-r--r-- | gi/clda/src/dict.h | 43 | ||||
| -rw-r--r-- | gi/clda/src/logval.h | 157 | ||||
| -rw-r--r-- | gi/clda/src/prob.h | 8 | ||||
| -rw-r--r-- | gi/clda/src/sampler.h | 138 | ||||
| -rw-r--r-- | gi/clda/src/tdict.h | 49 | ||||
| -rw-r--r-- | gi/clda/src/timer.h | 18 | ||||
| -rw-r--r-- | gi/clda/src/wordid.h | 6 | 
13 files changed, 1838 insertions, 0 deletions
| diff --git a/gi/clda/Makefile.am b/gi/clda/Makefile.am new file mode 100644 index 00000000..936b6ae3 --- /dev/null +++ b/gi/clda/Makefile.am @@ -0,0 +1,5 @@ +SUBDIRS = src +AUTOMAKE_OPTIONS = foreign + +ACLOCAL_AMFLAGS = -I m4 + diff --git a/gi/clda/configure.ac b/gi/clda/configure.ac new file mode 100644 index 00000000..8469ee09 --- /dev/null +++ b/gi/clda/configure.ac @@ -0,0 +1,17 @@ +AC_INIT +AM_INIT_AUTOMAKE(cdec,0.1) +AC_CONFIG_HEADERS(config.h) +AC_PROG_LIBTOOL +AC_PROG_CC +AC_PROG_CXX +AC_LANG_CPLUSPLUS +BOOST_REQUIRE +BOOST_PROGRAM_OPTIONS +CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" +LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS" +LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS" + +AC_PROG_INSTALL + +AC_OUTPUT(Makefile src/Makefile) + diff --git a/gi/clda/m4/boost.m4 b/gi/clda/m4/boost.m4 new file mode 100644 index 00000000..7e0ed075 --- /dev/null +++ b/gi/clda/m4/boost.m4 @@ -0,0 +1,1035 @@ +# boost.m4: Locate Boost headers and libraries for autoconf-based projects. +# Copyright (C) 2007, 2008, 2009  Benoit Sigoure <tsuna@lrde.epita.fr> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Additional permission under section 7 of the GNU General Public +# License, version 3 ("GPLv3"): +# +# If you convey this file as part of a work that contains a +# configuration script generated by Autoconf, you may do so under +# terms of your choice. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + +m4_define([_BOOST_SERIAL], [m4_translit([ +# serial 12 +], [# +], [])]) + +# Original sources can be found at http://github.com/tsuna/boost.m4 +# You can fetch the latest version of the script by doing: +#   wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4 + +# ------ # +# README # +# ------ # + +# This file provides several macros to use the various Boost libraries. +# The first macro is BOOST_REQUIRE.  It will simply check if it's possible to +# find the Boost headers of a given (optional) minimum version and it will +# define BOOST_CPPFLAGS accordingly.  It will add an option --with-boost to +# your configure so that users can specify non standard locations. +# If the user's environment contains BOOST_ROOT and --with-boost was not +# specified, --with-boost=$BOOST_ROOT is implicitly used. +# For more README and documentation, go to http://github.com/tsuna/boost.m4 +# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL.  If you don't, don't worry, +# simply read the README, it will show you what to do step by step. + +m4_pattern_forbid([^_?BOOST_]) + + +# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM, +#                [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# -------------------------------------------------------- +# Same as AC_EGREP_CPP, but leave the result in conftest.i. +# PATTERN is *not* overquoted, as in AC_EGREP_CPP.  It could be useful +# to turn this into a macro which extracts the value of any macro. +m4_define([_BOOST_SED_CPP], +[AC_LANG_PREPROC_REQUIRE()dnl +AC_REQUIRE([AC_PROG_SED])dnl +AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])]) +AS_IF([dnl eval is necessary to expand ac_cpp. +dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell. +dnl Beware of Windows end-of-lines, for instance if we are running +dnl some Windows programs under Wine.  In that case, boost/version.hpp +dnl is certainly using "\r\n", but the regular Unix shell will only +dnl strip `\n' with backquotes, not the `\r'.  This results in +dnl boost_cv_lib_version='1_37\r' for instance, which breaks +dnl everything else. +dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK +(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD | +  tr -d '\r' | +  $SED -n -e "$1" >conftest.i 2>&1], +  [$3], +  [$4]) +rm -rf conftest* +])# AC_EGREP_CPP + + + +# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND]) +# ----------------------------------------------- +# Look for Boost.  If version is given, it must either be a literal of the form +# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a +# variable "$var". +# Defines the value BOOST_CPPFLAGS.  This macro only checks for headers with +# the required version, it does not check for any of the Boost libraries. +# On # success, defines HAVE_BOOST.  On failure, calls the optional +# ACTION-IF-NOT-FOUND action if one was supplied. +# Otherwise aborts with an error message. +AC_DEFUN([BOOST_REQUIRE], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_PROG_GREP])dnl +echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD +boost_save_IFS=$IFS +boost_version_req=$1 +IFS=. +set x $boost_version_req 0 0 0 +IFS=$boost_save_IFS +shift +boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"` +AC_ARG_WITH([boost], +   [AS_HELP_STRING([--with-boost=DIR], +                   [prefix of Boost $1 @<:@guess@:>@])])dnl +AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl +# If BOOST_ROOT is set and the user has not provided a value to +# --with-boost, then treat BOOST_ROOT as if it the user supplied it. +if test x"$BOOST_ROOT" != x; then +  if test x"$with_boost" = x; then +    AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT]) +    with_boost=$BOOST_ROOT +  else +    AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost]) +  fi +fi +AC_SUBST([DISTCHECK_CONFIGURE_FLAGS], +         ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"]) +boost_save_CPPFLAGS=$CPPFLAGS +  AC_CACHE_CHECK([for Boost headers version >= $boost_version_req], +    [boost_cv_inc_path], +    [boost_cv_inc_path=no +AC_LANG_PUSH([C++])dnl +m4_pattern_allow([^BOOST_VERSION$])dnl +    AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include <boost/version.hpp> +#if !defined BOOST_VERSION +# error BOOST_VERSION is not defined +#elif BOOST_VERSION < $boost_version_req +# error Boost headers version < $boost_version_req +#endif +]])]) +    # If the user provided a value to --with-boost, use it and only it. +    case $with_boost in #( +      ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \ +                 /usr/include C:/Boost/include;; #( +      *)      set x "$with_boost/include" "$with_boost";; +    esac +    shift +    for boost_dir +    do +    # Without --layout=system, Boost (or at least some versions) installs +    # itself in <prefix>/include/boost-<version>.  This inner loop helps to +    # find headers in such directories. +    # +    # Any ${boost_dir}/boost-x_xx directories are searched in reverse version +    # order followed by ${boost_dir}.  The final '.' is a sentinel for +    # searching $boost_dir" itself.  Entries are whitespace separated. +    # +    # I didn't indent this loop on purpose (to avoid over-indented code) +    boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \ +        && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \ +        && echo .` +    for boost_inc in $boost_layout_system_search_list +    do +      if test x"$boost_inc" != x.; then +        boost_inc="$boost_dir/$boost_inc" +      else +        boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list +      fi +      if test x"$boost_inc" != x; then +        # We are going to check whether the version of Boost installed +        # in $boost_inc is usable by running a compilation that +        # #includes it.  But if we pass a -I/some/path in which Boost +        # is not installed, the compiler will just skip this -I and +        # use other locations (either from CPPFLAGS, or from its list +        # of system include directories).  As a result we would use +        # header installed on the machine instead of the /some/path +        # specified by the user.  So in that precise case (trying +        # $boost_inc), make sure the version.hpp exists. +        # +        # Use test -e as there can be symlinks. +        test -e "$boost_inc/boost/version.hpp" || continue +        CPPFLAGS="$CPPFLAGS -I$boost_inc" +      fi +      AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no]) +      if test x"$boost_cv_inc_path" = xyes; then +        if test x"$boost_inc" != x; then +          boost_cv_inc_path=$boost_inc +        fi +        break 2 +      fi +    done +    done +AC_LANG_POP([C++])dnl +    ]) +    case $boost_cv_inc_path in #( +      no) +        boost_errmsg="cannot find Boost headers version >= $boost_version_req" +        m4_if([$2], [],  [AC_MSG_ERROR([$boost_errmsg])], +                        [AC_MSG_NOTICE([$boost_errmsg])]) +        $2 +        ;;#( +      yes) +        BOOST_CPPFLAGS= +        AC_DEFINE([HAVE_BOOST], [1], +                  [Defined if the requested minimum BOOST version is satisfied]) +        ;;#( +      *) +        AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"]) +        ;; +    esac +  AC_CACHE_CHECK([for Boost's header version], +    [boost_cv_lib_version], +    [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl +     _BOOST_SED_CPP([/^boost-lib-version = /{s///;s/\"//g;p;g;}], +                    [#include <boost/version.hpp> +boost-lib-version = BOOST_LIB_VERSION], +    [boost_cv_lib_version=`cat conftest.i`])]) +    # e.g. "134" for 1_34_1 or "135" for 1_35 +    boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'` +    case $boost_major_version in #( +      '' | *[[!0-9]]*) +        AC_MSG_ERROR([invalid value: boost_major_version=$boost_major_version]) +        ;; +    esac +CPPFLAGS=$boost_save_CPPFLAGS +])# BOOST_REQUIRE + +# BOOST_STATIC() +# -------------- +# Add the "--enable-static-boost" configure argument. If this argument is given +# on the command line, static versions of the libraries will be looked up. +AC_DEFUN([BOOST_STATIC], +  [AC_ARG_ENABLE([static-boost], +     [AC_HELP_STRING([--enable-static-boost], +               [Prefer the static boost libraries over the shared ones [no]])], +     [enable_static_boost=yes], +     [enable_static_boost=no])])# BOOST_STATIC + +# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND]) +# -------------------------------------------------------------------------- +# Wrapper around AC_CHECK_HEADER for Boost headers.  Useful to check for +# some parts of the Boost library which are only made of headers and don't +# require linking (such as Boost.Foreach). +# +# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be +# found in the first place, in which case by default a notice is issued to the +# user.  Presumably if we haven't died already it's because it's OK to not have +# Boost, which is why only a notice is issued instead of a hard error. +# +# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_<HEADER-NAME> in +# case of success # (where HEADER-NAME is written LIKE_THIS, e.g., +# HAVE_BOOST_FOREACH_HPP). +AC_DEFUN([BOOST_FIND_HEADER], +[AC_REQUIRE([BOOST_REQUIRE])dnl +if test x"$boost_cv_inc_path" = xno; then +  m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])]) +else +AC_LANG_PUSH([C++])dnl +boost_save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" +AC_CHECK_HEADER([$1], +  [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1], +                               [Define to 1 if you have <$1>])])], +  [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])]) +CPPFLAGS=$boost_save_CPPFLAGS +AC_LANG_POP([C++])dnl +fi +])# BOOST_FIND_HEADER + + +# BOOST_FIND_LIB([LIB-NAME], [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST], +#                [CXX-PROLOGUE]) +# ------------------------------------------------------------------------- +# Look for the Boost library LIB-NAME (e.g., LIB-NAME = `thread', for +# libboost_thread).  Check that HEADER-NAME works and check that +# libboost_LIB-NAME can link with the code CXX-TEST.  The optional argument +# CXX-PROLOGUE can be used to include some C++ code before the `main' +# function. +# +# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above). +# +# Boost libraries typically come compiled with several flavors (with different +# runtime options) so PREFERRED-RT-OPT is the preferred suffix.  A suffix is one +# or more of the following letters: sgdpn (in that order).  s = static +# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build, +# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n' +# must always be used along with `p').  Additionally, PREFERRED-RT-OPT can +# start with `mt-' to indicate that there is a preference for multi-thread +# builds.  Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp +# ...  If you want to make sure you have a specific version of Boost +# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro. +AC_DEFUN([BOOST_FIND_LIB], +[AC_REQUIRE([BOOST_REQUIRE])dnl +AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl +AC_REQUIRE([BOOST_STATIC])dnl +AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl +if test x"$boost_cv_inc_path" = xno; then +  AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library]) +else +dnl The else branch is huge and wasn't intended on purpose. +AC_LANG_PUSH([C++])dnl +AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl +AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl +AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl +BOOST_FIND_HEADER([$3]) +boost_save_CPPFLAGS=$CPPFLAGS +CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" +# Now let's try to find the library.  The algorithm is as follows: first look +# for a given library name according to the user's PREFERRED-RT-OPT.  For each +# library name, we prefer to use the ones that carry the tag (toolset name). +# Each library is searched through the various standard paths were Boost is +# usually installed.  If we can't find the standard variants, we try to +# enforce -mt (for instance on MacOSX, libboost_threads.dylib doesn't exist +# but there's -obviously- libboost_threads-mt.dylib). +AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib], +  [Boost_lib=no +  case "$2" in #( +    mt | mt-) boost_mt=-mt; boost_rtopt=;; #( +    mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$2" : 'Xmt-*\(.*\)'`;; #( +    *) boost_mt=; boost_rtopt=$2;; +  esac +  if test $enable_static_boost = yes; then +    boost_rtopt="s$boost_rtopt" +  fi +  # Find the proper debug variant depending on what we've been asked to find. +  case $boost_rtopt in #( +    *d*) boost_rt_d=$boost_rtopt;; #( +    *[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn') +      boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #( +    *) boost_rt_d='-d';; +  esac +  # If the PREFERRED-RT-OPT are not empty, prepend a `-'. +  test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt" +  $boost_guess_use_mt && boost_mt=-mt +  # Look for the abs path the static archive. +  # $libext is computed by Libtool but let's make sure it's non empty. +  test -z "$libext" && +    AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?]) +  boost_save_ac_objext=$ac_objext +  # Generate the test file. +  AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$3> +$5], [$4])]) +dnl Optimization hacks: compiling C++ is slow, especially with Boost.  What +dnl we're trying to do here is guess the right combination of link flags +dnl (LIBS / LDFLAGS) to use a given library.  This can take several +dnl iterations before it succeeds and is thus *very* slow.  So what we do +dnl instead is that we compile the code first (and thus get an object file, +dnl typically conftest.o).  Then we try various combinations of link flags +dnl until we succeed to link conftest.o in an executable.  The problem is +dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always +dnl remove all the temporary files including conftest.o.  So the trick here +dnl is to temporarily change the value of ac_objext so that conftest.o is +dnl preserved accross tests.  This is obviously fragile and I will burn in +dnl hell for not respecting Autoconf's documented interfaces, but in the +dnl mean time, it optimizes the macro by a factor of 5 to 30. +dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left +dnl empty because the test file is generated only once above (before we +dnl start the for loops). +  AC_COMPILE_IFELSE([], +    [ac_objext=do_not_rm_me_plz], +    [AC_MSG_ERROR([cannot compile a test that uses Boost $1])]) +  ac_objext=$boost_save_ac_objext +  boost_failed_libs= +# Don't bother to ident the 6 nested for loops, only the 2 innermost ones +# matter. +for boost_tag_ in -$boost_cv_lib_tag ''; do +for boost_ver_ in -$boost_cv_lib_version ''; do +for boost_mt_ in $boost_mt -mt ''; do +for boost_rtopt_ in $boost_rtopt '' -d; do +  for boost_lib in \ +    boost_$1$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \ +    boost_$1$boost_tag_$boost_rtopt_$boost_ver_ \ +    boost_$1$boost_tag_$boost_mt_$boost_ver_ \ +    boost_$1$boost_tag_$boost_ver_ +  do +    # Avoid testing twice the same lib +    case $boost_failed_libs in #( +      *@$boost_lib@*) continue;; +    esac +    # If with_boost is empty, we'll search in /lib first, which is not quite +    # right so instead we'll try to a location based on where the headers are. +    boost_tmp_lib=$with_boost +    test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include} +    for boost_ldpath in "$boost_tmp_lib/lib" '' \ +             /opt/local/lib /usr/local/lib /opt/lib /usr/lib \ +             "$with_boost" C:/Boost/lib /lib /usr/lib64 /lib64 +    do +      test -e "$boost_ldpath" || continue +      boost_save_LDFLAGS=$LDFLAGS +      # Are we looking for a static library? +      case $boost_ldpath:$boost_rtopt_ in #( +        *?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt) +          Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext" +          test -e "$Boost_lib_LIBS" || continue;; #( +        *) # No: use -lboost_foo to find the shared library. +          Boost_lib_LIBS="-l$boost_lib";; +      esac +      boost_save_LIBS=$LIBS +      LIBS="$Boost_lib_LIBS $LIBS" +      test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath" +dnl First argument of AC_LINK_IFELSE left empty because the test file is +dnl generated only once above (before we start the for loops). +      _BOOST_AC_LINK_IFELSE([], +                            [Boost_lib=yes], [Boost_lib=no]) +      ac_objext=$boost_save_ac_objext +      LDFLAGS=$boost_save_LDFLAGS +      LIBS=$boost_save_LIBS +      if test x"$Boost_lib" = xyes; then +        Boost_lib_LDFLAGS="-L$boost_ldpath -R$boost_ldpath" +        break 6 +      else +        boost_failed_libs="$boost_failed_libs@$boost_lib@" +      fi +    done +  done +done +done +done +done +rm -f conftest.$ac_objext +]) +case $Boost_lib in #( +  no) _AC_MSG_LOG_CONFTEST +    AC_MSG_ERROR([cannot not find the flags to link with Boost $1]) +    ;; +esac +AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS]) +AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS]) +CPPFLAGS=$boost_save_CPPFLAGS +AS_VAR_POPDEF([Boost_lib])dnl +AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl +AS_VAR_POPDEF([Boost_lib_LIBS])dnl +AC_LANG_POP([C++])dnl +fi +])# BOOST_FIND_LIB + + +# --------------------------------------- # +# Checks for the various Boost libraries. # +# --------------------------------------- # + +# List of boost libraries: http://www.boost.org/libs/libraries.htm +# The page http://beta.boost.org/doc/libs is useful: it gives the first release +# version of each library (among other things). + +# BOOST_ARRAY() +# ------------- +# Look for Boost.Array +AC_DEFUN([BOOST_ARRAY], +[BOOST_FIND_HEADER([boost/array.hpp])]) + + +# BOOST_ASIO() +# ------------ +# Look for Boost.Asio (new in Boost 1.35). +AC_DEFUN([BOOST_ASIO], +[AC_REQUIRE([BOOST_SYSTEM])dnl +BOOST_FIND_HEADER([boost/asio.hpp])]) + + +# BOOST_BIND() +# ------------ +# Look for Boost.Bind +AC_DEFUN([BOOST_BIND], +[BOOST_FIND_HEADER([boost/bind.hpp])]) + + +# BOOST_CONVERSION() +# ------------------ +# Look for Boost.Conversion (cast / lexical_cast) +AC_DEFUN([BOOST_CONVERSION], +[BOOST_FIND_HEADER([boost/cast.hpp]) +BOOST_FIND_HEADER([boost/lexical_cast.hpp]) +])# BOOST_CONVERSION + + +# BOOST_DATE_TIME([PREFERRED-RT-OPT]) +# ----------------------------------- +# Look for Boost.Date_Time.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_DATE_TIME], +[BOOST_FIND_LIB([date_time], [$1], +                [boost/date_time/posix_time/posix_time.hpp], +                [boost::posix_time::ptime t;]) +])# BOOST_DATE_TIME + + +# BOOST_FILESYSTEM([PREFERRED-RT-OPT]) +# ------------------------------------ +# Look for Boost.Filesystem.  For the documentation of PREFERRED-RT-OPT, see +# the documentation of BOOST_FIND_LIB above. +# Do not check for boost/filesystem.hpp because this file was introduced in +# 1.34. +AC_DEFUN([BOOST_FILESYSTEM], +[# Do we have to check for Boost.System?  This link-time dependency was +# added as of 1.35.0.  If we have a version <1.35, we must not attempt to +# find Boost.System as it didn't exist by then. +if test $boost_major_version -ge 135; then +BOOST_SYSTEM([$1]) +fi # end of the Boost.System check. +boost_filesystem_save_LIBS=$LIBS +boost_filesystem_save_LDFLAGS=$LDFLAGS +m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS" +BOOST_FIND_LIB([filesystem], [$1], +                [boost/filesystem/path.hpp], [boost::filesystem::path p;]) +LIBS=$boost_filesystem_save_LIBS +LDFLAGS=$boost_filesystem_save_LDFLAGS +])# BOOST_FILESYSTEM + + +# BOOST_FOREACH() +# --------------- +# Look for Boost.Foreach +AC_DEFUN([BOOST_FOREACH], +[BOOST_FIND_HEADER([boost/foreach.hpp])]) + + +# BOOST_FORMAT() +# -------------- +# Look for Boost.Format +# Note: we can't check for boost/format/format_fwd.hpp because the header isn't +# standalone.  It can't be compiled because it triggers the following error: +# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std' +#                                                  does not name a type +AC_DEFUN([BOOST_FORMAT], +[BOOST_FIND_HEADER([boost/format.hpp])]) + + +# BOOST_FUNCTION() +# ---------------- +# Look for Boost.Function +AC_DEFUN([BOOST_FUNCTION], +[BOOST_FIND_HEADER([boost/function.hpp])]) + + +# BOOST_GRAPH([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost.Graphs.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_GRAPH], +[BOOST_FIND_LIB([graph], [$1], +                [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;]) +])# BOOST_GRAPH + + +# BOOST_IOSTREAMS([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost.IOStreams.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_IOSTREAMS], +[BOOST_FIND_LIB([iostreams], [$1], +                [boost/iostreams/device/file_descriptor.hpp], +                [boost::iostreams::file_descriptor fd(0); fd.close();]) +])# BOOST_IOSTREAMS + + +# BOOST_HASH() +# ------------ +# Look for Boost.Functional/Hash +AC_DEFUN([BOOST_HASH], +[BOOST_FIND_HEADER([boost/functional/hash.hpp])]) + + +# BOOST_LAMBDA() +# -------------- +# Look for Boost.Lambda +AC_DEFUN([BOOST_LAMBDA], +[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])]) + + +# BOOST_MATH() +# ------------ +# Look for Boost.Math +# TODO: This library isn't header-only but it comes in multiple different +# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99, +# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1, +# libboost_math_tr1f, libboost_math_tr1l).  This macro must be fixed to do the +# right thing anyway. +AC_DEFUN([BOOST_MATH], +[BOOST_FIND_HEADER([boost/math/special_functions.hpp])]) + + +# BOOST_MULTIARRAY() +# ------------------ +# Look for Boost.MultiArray +AC_DEFUN([BOOST_MULTIARRAY], +[BOOST_FIND_HEADER([boost/multi_array.hpp])]) + + +# BOOST_NUMERIC_CONVERSION() +# -------------------------- +# Look for Boost.NumericConversion (policy-based numeric conversion) +AC_DEFUN([BOOST_NUMERIC_CONVERSION], +[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp]) +])# BOOST_NUMERIC_CONVERSION + + +# BOOST_OPTIONAL() +# ---------------- +# Look for Boost.Optional +AC_DEFUN([BOOST_OPTIONAL], +[BOOST_FIND_HEADER([boost/optional.hpp])]) + + +# BOOST_PREPROCESSOR() +# -------------------- +# Look for Boost.Preprocessor +AC_DEFUN([BOOST_PREPROCESSOR], +[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])]) + + +# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT]) +# ----------------------------------------- +# Look for Boost.Program_options.  For the documentation of PREFERRED-RT-OPT, see +# the documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_PROGRAM_OPTIONS], +[BOOST_FIND_LIB([program_options], [$1], +                [boost/program_options.hpp], +                [boost::program_options::options_description d("test");]) +])# BOOST_PROGRAM_OPTIONS + + +# BOOST_REF() +# ----------- +# Look for Boost.Ref +AC_DEFUN([BOOST_REF], +[BOOST_FIND_HEADER([boost/ref.hpp])]) + + +# BOOST_REGEX([PREFERRED-RT-OPT]) +# ------------------------------- +# Look for Boost.Regex.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_REGEX], +[BOOST_FIND_LIB([regex], [$1], +                [boost/regex.hpp], +                [boost::regex exp("*"); boost::regex_match("foo", exp);]) +])# BOOST_REGEX + + +# BOOST_SERIALIZATION([PREFERRED-RT-OPT]) +# --------------------------------------- +# Look for Boost.Serialization.  For the documentation of PREFERRED-RT-OPT, see +# the documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_SERIALIZATION], +[BOOST_FIND_LIB([serialization], [$1], +                [boost/archive/text_oarchive.hpp], +                [std::ostream* o = 0; // Cheap way to get an ostream... +                boost::archive::text_oarchive t(*o);]) +])# BOOST_SIGNALS + + +# BOOST_SIGNALS([PREFERRED-RT-OPT]) +# --------------------------------- +# Look for Boost.Signals.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_SIGNALS], +[BOOST_FIND_LIB([signals], [$1], +                [boost/signal.hpp], +                [boost::signal<void ()> s;]) +])# BOOST_SIGNALS + + +# BOOST_SMART_PTR() +# ----------------- +# Look for Boost.SmartPtr +AC_DEFUN([BOOST_SMART_PTR], +[BOOST_FIND_HEADER([boost/scoped_ptr.hpp]) +BOOST_FIND_HEADER([boost/shared_ptr.hpp]) +]) + + +# BOOST_STATICASSERT() +# -------------------- +# Look for Boost.StaticAssert +AC_DEFUN([BOOST_STATICASSERT], +[BOOST_FIND_HEADER([boost/static_assert.hpp])]) + + +# BOOST_STRING_ALGO() +# ------------------- +# Look for Boost.StringAlgo +AC_DEFUN([BOOST_STRING_ALGO], +[BOOST_FIND_HEADER([boost/algorithm/string.hpp]) +]) + + +# BOOST_SYSTEM([PREFERRED-RT-OPT]) +# -------------------------------- +# Look for Boost.System.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above.  This library was introduced in Boost +# 1.35.0. +AC_DEFUN([BOOST_SYSTEM], +[BOOST_FIND_LIB([system], [$1], +                [boost/system/error_code.hpp], +                [boost::system::error_code e; e.clear();]) +])# BOOST_SYSTEM + + +# BOOST_TEST([PREFERRED-RT-OPT]) +# ------------------------------ +# Look for Boost.Test.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_TEST], +[m4_pattern_allow([^BOOST_CHECK$])dnl +BOOST_FIND_LIB([unit_test_framework], [$1], +               [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);], +               [using boost::unit_test::test_suite; +               test_suite* init_unit_test_suite(int argc, char ** argv) +               { return NULL; }]) +])# BOOST_TEST + + +# BOOST_THREADS([PREFERRED-RT-OPT]) +# --------------------------------- +# Look for Boost.Thread.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +# FIXME: Provide an alias "BOOST_THREAD". +AC_DEFUN([BOOST_THREADS], +[dnl Having the pthread flag is required at least on GCC3 where +dnl boost/thread.hpp would complain if we try to compile without +dnl -pthread on GNU/Linux. +AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl +boost_threads_save_LIBS=$LIBS +boost_threads_save_CPPFLAGS=$CPPFLAGS +LIBS="$LIBS $boost_cv_pthread_flag" +# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3, +# boost/thread.hpp will trigger a #error if -pthread isn't used: +#   boost/config/requires_threads.hpp:47:5: #error "Compiler threading support +#   is not turned on. Please set the correct command line options for +#   threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)" +CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag" +BOOST_FIND_LIB([thread], [$1], +                [boost/thread.hpp], [boost::thread t; boost::mutex m;]) +BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $boost_cv_pthread_flag" +BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag" +LIBS=$boost_threads_save_LIBS +CPPFLAGS=$boost_threads_save_CPPFLAGS +])# BOOST_THREADS + + +# BOOST_TOKENIZER() +# ----------------- +# Look for Boost.Tokenizer +AC_DEFUN([BOOST_TOKENIZER], +[BOOST_FIND_HEADER([boost/tokenizer.hpp])]) + + +# BOOST_TRIBOOL() +# --------------- +# Look for Boost.Tribool +AC_DEFUN([BOOST_TRIBOOL], +[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp]) +BOOST_FIND_HEADER([boost/logic/tribool.hpp]) +]) + + +# BOOST_TUPLE() +# ------------- +# Look for Boost.Tuple +AC_DEFUN([BOOST_TUPLE], +[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])]) + + +# BOOST_TYPETRAITS() +# -------------------- +# Look for Boost.TypeTraits +AC_DEFUN([BOOST_TYPETRAITS], +[BOOST_FIND_HEADER([boost/type_traits.hpp])]) + + +# BOOST_UTILITY() +# --------------- +# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom, +# etc.) +AC_DEFUN([BOOST_UTILITY], +[BOOST_FIND_HEADER([boost/utility.hpp])]) + + +# BOOST_VARIANT() +# --------------- +# Look for Boost.Variant. +AC_DEFUN([BOOST_VARIANT], +[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp]) +BOOST_FIND_HEADER([boost/variant.hpp])]) + + +# BOOST_WAVE([PREFERRED-RT-OPT]) +# ------------------------------ +# NOTE: If you intend to use Wave/Spirit with thread support, make sure you +# call BOOST_THREADS first. +# Look for Boost.Wave.  For the documentation of PREFERRED-RT-OPT, see the +# documentation of BOOST_FIND_LIB above. +AC_DEFUN([BOOST_WAVE], +[AC_REQUIRE([BOOST_FILESYSTEM])dnl +AC_REQUIRE([BOOST_DATE_TIME])dnl +boost_wave_save_LIBS=$LIBS +boost_wave_save_LDFLAGS=$LDFLAGS +m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl +LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS\ +$BOOST_THREAD_LIBS" +LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS\ +$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS" +BOOST_FIND_LIB([wave], [$1], +                [boost/wave.hpp], +                [boost::wave::token_id id; get_token_name(id);]) +LIBS=$boost_wave_save_LIBS +LDFLAGS=$boost_wave_save_LDFLAGS +])# BOOST_WAVE + + +# BOOST_XPRESSIVE() +# ----------------- +# Look for Boost.Xpressive (new since 1.36.0). +AC_DEFUN([BOOST_XPRESSIVE], +[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])]) + + +# ----------------- # +# Internal helpers. # +# ----------------- # + + +# _BOOST_PTHREAD_FLAG() +# --------------------- +# Internal helper for BOOST_THREADS.  Based on ACX_PTHREAD: +# http://autoconf-archive.cryp.to/acx_pthread.html +AC_DEFUN([_BOOST_PTHREAD_FLAG], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_LANG_PUSH([C++])dnl +AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag], +[ boost_cv_pthread_flag= +  # The ordering *is* (sometimes) important.  Some notes on the +  # individual items follow: +  # (none): in case threads are in libc; should be tried before -Kthread and +  #       other compiler flags to prevent continual compiler warnings +  # -lpthreads: AIX (must check this before -lpthread) +  # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +  # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +  # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +  # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads) +  # -pthreads: Solaris/GCC +  # -mthreads: MinGW32/GCC, Lynx/GCC +  # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +  #      doesn't hurt to check since this sometimes defines pthreads too; +  #      also defines -D_REENTRANT) +  #      ... -mt is also the pthreads flag for HP/aCC +  # -lpthread: GNU Linux, etc. +  # --thread-safe: KAI C++ +  case $host_os in #( +    *solaris*) +      # On Solaris (at least, for some versions), libc contains stubbed +      # (non-functional) versions of the pthreads routines, so link-based +      # tests will erroneously succeed.  (We need to link with -pthreads/-mt/ +      # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather +      # a function called by this macro, so we could check for that, but +      # who knows whether they'll stub that too in a future libc.)  So, +      # we'll just look for -pthreads and -lpthread first: +      boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #( +    *) +      boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \ +                           -pthreads -mthreads -lpthread --thread-safe -mt";; +  esac +  # Generate the test file. +  AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <pthread.h>], +    [pthread_t th; pthread_join(th, 0); +    pthread_attr_init(0); pthread_cleanup_push(0, 0); +    pthread_create(0,0,0,0); pthread_cleanup_pop(0);])]) +  for boost_pthread_flag in '' $boost_pthread_flags; do +    boost_pthread_ok=false +dnl Re-use the test file already generated. +    boost_pthreads__save_LIBS=$LIBS +    LIBS="$LIBS $boost_pthread_flag" +    AC_LINK_IFELSE([], +      [if grep ".*$boost_pthread_flag" conftest.err; then +         echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD +       else +         boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag +       fi]) +    LIBS=$boost_pthreads__save_LIBS +    $boost_pthread_ok && break +  done +]) +AC_LANG_POP([C++])dnl +])# _BOOST_PTHREAD_FLAG + + +# _BOOST_gcc_test(MAJOR, MINOR) +# ----------------------------- +# Internal helper for _BOOST_FIND_COMPILER_TAG. +m4_define([_BOOST_gcc_test], +["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl + + +# _BOOST_FIND_COMPILER_TAG() +# -------------------------- +# Internal.  When Boost is installed without --layout=system, each library +# filename will hold a suffix that encodes the compiler used during the +# build.  The Boost build system seems to call this a `tag'. +AC_DEFUN([_BOOST_FIND_COMPILER_TAG], +[AC_REQUIRE([AC_PROG_CXX])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_CACHE_CHECK([for the toolset name used by Boost for $CXX], [boost_cv_lib_tag], +[AC_LANG_PUSH([C++])dnl +  boost_cv_lib_tag=unknown +  # The following tests are mostly inspired by boost/config/auto_link.hpp +  # The list is sorted to most recent/common to oldest compiler (in order +  # to increase the likelihood of finding the right compiler with the +  # least number of compilation attempt). +  # Beware that some tests are sensible to the order (for instance, we must +  # look for MinGW before looking for GCC3). +  # I used one compilation test per compiler with a #error to recognize +  # each compiler so that it works even when cross-compiling (let me know +  # if you know a better approach). +  # Known missing tags (known from Boost's tools/build/v2/tools/common.jam): +  #   como, edg, kcc, bck, mp, sw, tru, xlc +  # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines +  # the same defines as GCC's). +  # TODO: Move the test on GCC 4.4 up once it's released. +  for i in \ +    _BOOST_gcc_test(4, 3) \ +    _BOOST_gcc_test(4, 2) \ +    _BOOST_gcc_test(4, 1) \ +    _BOOST_gcc_test(4, 0) \ +    "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \ +     && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \ +         || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \ +    _BOOST_gcc_test(3, 4) \ +    _BOOST_gcc_test(3, 3) \ +    "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \ +    "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \ +    _BOOST_gcc_test(3, 2) \ +    "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \ +    _BOOST_gcc_test(3, 1) \ +    _BOOST_gcc_test(3, 0) \ +    "defined __BORLANDC__ @ bcb" \ +    "defined __ICC && (defined __unix || defined __unix__) @ il" \ +    "defined __ICL @ iw" \ +    "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \ +    _BOOST_gcc_test(4, 4) \ +    _BOOST_gcc_test(2, 95) \ +    "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \ +    "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \ +    "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \ +    "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8" +  do +    boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '` +    boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'` +    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if $boost_tag_test +/* OK */ +#else +# error $boost_tag_test +#endif +]])], [boost_cv_lib_tag=$boost_tag; break], []) +  done +AC_LANG_POP([C++])dnl +  case $boost_cv_lib_tag in #( +    # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed +    # to "gcc41" for instance. +    *-gcc | *'-gcc ') :;; #(  Don't re-add -gcc: it's already in there. +    gcc*) +      boost_tag_x= +      case $host_os in #( +        darwin*) +          if test $boost_major_version -ge 136; then +            # The `x' added in r46793 of Boost. +            boost_tag_x=x +          fi;; +      esac +      # We can specify multiple tags in this variable because it's used by +      # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ... +      boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc" +      ;; #( +    unknown) +      AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]]) +      boost_cv_lib_tag= +      ;; +  esac +])dnl end of AC_CACHE_CHECK +])# _BOOST_FIND_COMPILER_TAG + + +# _BOOST_GUESS_WHETHER_TO_USE_MT() +# -------------------------------- +# Compile a small test to try to guess whether we should favor MT (Multi +# Thread) flavors of Boost.  Sets boost_guess_use_mt accordingly. +AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT], +[# Check whether we do better use `mt' even though we weren't ask to. +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +#if defined _REENTRANT || defined _MT || defined __MT__ +/* use -mt */ +#else +# error MT not needed +#endif +]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false]) +]) + +# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) +# ------------------------------------------------------------------- +# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls.  Fragile, +# will break when Autoconf changes its internals.  Requires that you manually +# rm -f conftest.$ac_objext in between to really different tests, otherwise +# you will try to link a conftest.o left behind by a previous test. +# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this +# macro). +# +# Don't use "break" in the actions, as it would short-circuit some code +# this macro runs after the actions. +m4_define([_BOOST_AC_LINK_IFELSE], +[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl +rm -f conftest$ac_exeext +boost_save_ac_ext=$ac_ext +boost_use_source=: +# If we already have a .o, re-use it.  We change $ac_ext so that $ac_link +# tries to link the existing object file instead of compiling from source. +test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false && +  _AS_ECHO_LOG([re-using the existing conftest.$ac_objext]) +AS_IF([_AC_DO_STDERR($ac_link) && { +	 test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" || +	 test ! -s conftest.err +       } && test -s conftest$ac_exeext && { +	 test "$cross_compiling" = yes || +	 $as_executable_p conftest$ac_exeext +dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough. +       }], +      [$2], +      [if $boost_use_source; then +         _AC_MSG_LOG_CONFTEST +       fi +       $3]) +ac_objext=$boost_save_ac_objext +ac_ext=$boost_save_ac_ext +dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization) +dnl information created by the PGI compiler (conftest_ipa8_conftest.oo), +dnl as it would interfere with the next link command. +rm -f core conftest.err conftest_ipa8_conftest.oo \ +      conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl +])# _BOOST_AC_LINK_IFELSE + +# Local Variables: +# mode: autoconf +# End: diff --git a/gi/clda/src/Makefile.am b/gi/clda/src/Makefile.am new file mode 100644 index 00000000..ebb016db --- /dev/null +++ b/gi/clda/src/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = clda + +clda_SOURCES = clda.cc + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) +AM_LDFLAGS = -lz diff --git a/gi/clda/src/clda.cc b/gi/clda/src/clda.cc new file mode 100644 index 00000000..49702df3 --- /dev/null +++ b/gi/clda/src/clda.cc @@ -0,0 +1,140 @@ +#include <iostream> +#include <vector> +#include <map> + +#include "timer.h" +#include "crp.h" +#include "sampler.h" +#include "tdict.h" +Dict TD::dict_; +std::string TD::empty = ""; +std::string TD::space = " "; + +using namespace std; + +void ShowTopWords(const map<WordID, int>& counts) { +  multimap<int, WordID> ms; +  for (map<WordID,int>::const_iterator it = counts.begin(); it != counts.end(); ++it) +    ms.insert(make_pair(it->second, it->first)); +} + +int main(int argc, char** argv) { +  if (argc != 2) { +    cerr << "Usage: " << argv[0] << " num-classes\n"; +    return 1; +  } +  const int num_classes = atoi(argv[1]); +  if (num_classes < 2) { +    cerr << "Must request more than 1 class\n"; +    return 1; +  } +  cerr << "CLASSES: " << num_classes << endl; +  char* buf = new char[800000]; +  vector<vector<int> > wji;   // w[j][i] - observed word i of doc j +  vector<vector<int> > zji;   // z[j][i] - topic assignment for word i of doc j +  cerr << "READING DOCUMENTS\n"; +  while(cin) { +    cin.getline(buf, 800000); +    if (buf[0] == 0) continue; +    wji.push_back(vector<WordID>()); +    TD::ConvertSentence(buf, &wji.back()); +  } +  cerr << "READ " << wji.size() << " DOCUMENTS\n"; +  MT19937 rng; +  cerr << "INITIALIZING RANDOM TOPIC ASSIGNMENTS\n"; +  zji.resize(wji.size()); +  double beta = 0.01; +  double alpha = 0.001; +  vector<CRP<int> > dr(zji.size(), CRP<int>(beta)); // dr[i] describes the probability of using a topic in document i +  vector<CRP<int> > wr(num_classes, CRP<int>(alpha)); // wr[k] describes the probability of generating a word in topic k +      int random_topic = rng.next() * num_classes; +  for (int j = 0; j < zji.size(); ++j) { +    const size_t num_words = wji[j].size(); +    vector<int>& zj = zji[j]; +    const vector<int>& wj = wji[j]; +    zj.resize(num_words); +    for (int i = 0; i < num_words; ++i) { +      if (random_topic == num_classes) { --random_topic; } +      zj[i] = random_topic; +      const int word = wj[i]; +      dr[j].increment(random_topic); +      wr[random_topic].increment(word); +    } +  } +  cerr << "SAMPLING\n"; +  vector<map<WordID, int> > t2w(num_classes); +  const int num_iterations = 1000; +  const int burnin_size = 800; +  bool needline = false; +  Timer timer; +  SampleSet ss; +  ss.resize(num_classes); +  double total_time = 0; +  for (int iter = 0; iter < num_iterations; ++iter) { +    if (iter && iter % 10 == 0) { +      total_time += timer.Elapsed(); +      timer.Reset(); +      cerr << '.'; needline=true; +      prob_t lh = prob_t::One(); +      for (int j = 0; j < zji.size(); ++j) { +        const size_t num_words = wji[j].size(); +        vector<int>& zj = zji[j]; +        const vector<int>& wj = wji[j]; +        for (int i = 0; i < num_words; ++i) { +          const int word = wj[i]; +          const int cur_topic = zj[i]; +          lh *= dr[j].prob(cur_topic); +          lh *= wr[cur_topic].prob(word); +          if (iter > burnin_size) { +            ++t2w[cur_topic][word]; +          } +        } +      } +      if (iter && iter % 200 == 0) { cerr << " [ITER=" << iter << " SEC/SAMPLE=" << (total_time / 200) << " LLH=" << log(lh) << "]\n"; needline=false; total_time=0; } +      //cerr << "ITERATION " << iter << " LOG LIKELIHOOD: " << log(lh) << endl; +    } +    for (int j = 0; j < zji.size(); ++j) { +      const size_t num_words = wji[j].size(); +      vector<int>& zj = zji[j]; +      const vector<int>& wj = wji[j]; +      for (int i = 0; i < num_words; ++i) { +        const int word = wj[i]; +        const int cur_topic = zj[i]; +        dr[j].decrement(cur_topic); +        wr[cur_topic].decrement(word); +  +        for (int k = 0; k < num_classes; ++k) { +          ss[k]= dr[j].prob(k) * wr[k].prob(word); +        } +        const int new_topic = rng.SelectSample(ss); +        dr[j].increment(new_topic); +        wr[new_topic].increment(word); +        zj[i] = new_topic; +      } +    } +  } +  if (needline) cerr << endl; +  for (int j = 0; j < zji.size(); ++j) { +    const size_t num_words = wji[j].size(); +    vector<int>& zj = zji[j]; +    const vector<int>& wj = wji[j]; +    zj.resize(num_words); +    for (int i = 0; i < num_words; ++i) { +      cout << TD::Convert(wj[i]) << '(' << zj[i] << ") "; +    } +    cout << endl; +  } +  for (int i = 0; i < num_classes; ++i) { +    ShowTopWords(t2w[i]); +  } +  for (map<int,int>::iterator it = t2w[0].begin(); it != t2w[0].end(); ++it) +    cerr << TD::Convert(it->first) << " " << it->second << endl; +  cerr << "---------------------------------\n"; +  for (map<int,int>::iterator it = t2w[1].begin(); it != t2w[1].end(); ++it) +    cerr << TD::Convert(it->first) << " " << it->second << endl; +  cerr << "---------------------------------\n"; +  for (map<int,int>::iterator it = t2w[2].begin(); it != t2w[2].end(); ++it) +    cerr << TD::Convert(it->first) << " " << it->second << endl; +  return 0; +} + diff --git a/gi/clda/src/crp.h b/gi/clda/src/crp.h new file mode 100644 index 00000000..13596cbf --- /dev/null +++ b/gi/clda/src/crp.h @@ -0,0 +1,216 @@ +#ifndef _CRP_H_ +#define _CRP_H_ + +// shamelessly adapted from code by Phil Blunsom and Trevor Cohn +// There are TWO CRP classes here: CRPWithTableTracking tracks the +// (expected) number of customers per table, and CRP just tracks +// the number of customers / dish. +// If you are implementing a HDP model, you should use CRP for the +// base distribution and CRPWithTableTracking for the dependent +// distribution. + +#include <iostream> +#include <map> +#include <boost/functional/hash.hpp> +#include <tr1/unordered_map> + +#include "prob.h" +#include "sampler.h"   // RNG + +template <typename DishType, typename Hash = boost::hash<DishType> > +class CRP { + public: +  CRP(double alpha) : alpha_(alpha), palpha_(alpha), total_customers_() {} +  void increment(const DishType& dish); +  void decrement(const DishType& dish); +  void erase(const DishType& dish) { +    counts_.erase(dish); +  } +  inline int count(const DishType& dish) const { +    const typename MapType::const_iterator i = counts_.find(dish); +    if (i == counts_.end()) return 0; else return i->second; +  } +  inline prob_t prob(const DishType& dish) const { +    return (prob_t(count(dish) + alpha_)) / prob_t(total_customers_ + alpha_); +  } +  inline prob_t prob(const DishType& dish, const prob_t& p0) const { +    return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_); +  } + private: +  typedef std::tr1::unordered_map<DishType, int, Hash> MapType; +  MapType counts_; +  const double alpha_; +  const prob_t palpha_; +  int total_customers_; +}; + +template <typename Dish, typename Hash> +void CRP<Dish,Hash>::increment(const Dish& dish) { +  ++counts_[dish]; +  ++total_customers_; +} + +template <typename Dish, typename Hash> +void CRP<Dish,Hash>::decrement(const Dish& dish) { +  typename MapType::iterator i = counts_.find(dish); +  assert(i != counts_.end()); +  if (--i->second == 0) +    counts_.erase(i); +  --total_customers_; +} + +template <typename DishType, typename Hash = boost::hash<DishType>, typename RNG = MT19937> +class CRPWithTableTracking { + public: +  CRPWithTableTracking(double alpha, RNG* rng) : +    alpha_(alpha), palpha_(alpha), total_customers_(), +    total_tables_(), rng_(rng) {} + +  // seat a customer for dish d, returns the delta in tables +  // with customers +  int increment(const DishType& d, const prob_t& p0 = prob_t::One()); +  int decrement(const DishType& d); +  void erase(const DishType& dish); + +  inline int count(const DishType& dish) const { +    const typename MapType::const_iterator i = counts_.find(dish); +    if (i == counts_.end()) return 0; else return i->second.count_; +  } +  inline prob_t prob(const DishType& dish) const { +    return (prob_t(count(dish) + alpha_)) / prob_t(total_customers_ + alpha_); +  } +  inline prob_t prob(const DishType& dish, const prob_t& p0) const { +    return (prob_t(count(dish)) + palpha_ * p0) / prob_t(total_customers_ + alpha_); +  } + private: +  struct TableInfo { +    TableInfo() : count_(), tables_() {} +    int count_;          // total customers eating dish +    int tables_;         // total tables labeled with dish +    std::map<int, int> table_histogram_; // num customers at table -> number tables +  }; +  typedef std::tr1::unordered_map<DishType, TableInfo, Hash> MapType; + +  inline prob_t prob_share_table(const double& customer_count) const { +    if (customer_count) +      return prob_t(customer_count) / prob_t(customer_count + alpha_); +    else +      return prob_t::Zero(); +  } +  inline prob_t prob_new_table(const double& customer_count, const prob_t& p0) const { +    if (customer_count) +      return palpha_ * p0 / prob_t(customer_count + alpha_); +    else +      return p0; +  } + +  MapType counts_; +  const double alpha_; +  const prob_t palpha_; +  int total_customers_; +  int total_tables_; +  RNG* rng_; +}; + +template <typename Dish, typename Hash, typename RNG> +int CRPWithTableTracking<Dish,Hash,RNG>::increment(const Dish& dish, const prob_t& p0) { +  TableInfo& tc = counts_[dish]; + +  //std::cerr << "\nincrement for " << dish << " with p0 " << p0 << "\n"; +  //std::cerr << "\tBEFORE histogram: " << tc.table_histogram_ << " "; +  //std::cerr << "count: " << tc.count_ << " "; +  //std::cerr << "tables: " << tc.tables_ << "\n"; + +  // seated at a new or existing table? +  prob_t pshare = prob_share_table(tc.count_); +  prob_t pnew = prob_new_table(tc.count_, p0); + +  //std::cerr << "\t\tP0 " << p0 << " count(dish) " << count(dish) +  //  << " tables " << tc +  //  << " p(share) " << pshare << " p(new) " << pnew << "\n"; + +  int delta = 0; +  if (tc.count_ == 0 || rng_->SelectSample(pshare, pnew) == 1) { +    // assign to a new table +    ++tc.tables_; +    ++tc.table_histogram_[1]; +    ++total_tables_; +    delta = 1; +  } else { +    // can't share a table if there are no other customers +    assert(tc.count_ > 0); + +    // randomly assign to an existing table +    // remove constant denominator from inner loop +    int r = static_cast<int>(rng_->next() * tc.count_); +    for (std::map<int,int>::iterator hit = tc.table_histogram_.begin(); +         hit != tc.table_histogram_.end(); ++hit) { +      r -= hit->first * hit->second; +      if (r <= 0) { +        ++tc.table_histogram_[hit->first+1]; +        --hit->second; +        if (hit->second == 0) +          tc.table_histogram_.erase(hit); +        break; +      } +    } +    if (r > 0) {  +      std::cerr << "CONSISTENCY ERROR: " << tc.count_ << std::endl; +      std::cerr << pshare << std::endl; +      std::cerr << pnew << std::endl; +      std::cerr << r << std::endl; +      abort(); +    } +  } +  ++tc.count_; +  ++total_customers_; +  return delta; +} + +template <typename Dish, typename Hash, typename RNG> +int CRPWithTableTracking<Dish,Hash,RNG>::decrement(const Dish& dish) { +  typename MapType::iterator i = counts_.find(dish); +  if(i == counts_.end()) { +    std::cerr << "MISSING DISH: " << dish << std::endl; +    abort(); +  } + +  int delta = 0; +  TableInfo &tc = i->second; + +  //std::cout << "\ndecrement for " << dish << " with p0 " << p0 << "\n"; +  //std::cout << "\tBEFORE histogram: " << tc.table_histogram << " "; +  //std::cout << "count: " << count(dish) << " "; +  //std::cout << "tables: " << tc.tables << "\n"; + +  int r = static_cast<int>(rng_->next() * tc.count_); +  //std::cerr << "FOO: " << r << std::endl; +  for (std::map<int,int>::iterator hit = tc.table_histogram_.begin(); +       hit != tc.table_histogram_.end(); ++hit) { +    r -= (hit->first * hit->second); +    if (r <= 0) { +      if (hit->first > 1) +        tc.table_histogram_[hit->first-1] += 1; +      else { +        --delta; +        --tc.tables_; +        --total_tables_; +      } + +      --hit->second; +      if (hit->second == 0) tc.table_histogram_.erase(hit); +      break; +    } +  } + +  assert(r <= 0); + +  // remove the customer +  --tc.count_; +  --total_customers_; +  assert(tc.count_ >= 0); +  if (tc.count_ == 0) counts_.erase(i); +  return delta; +} + +#endif diff --git a/gi/clda/src/dict.h b/gi/clda/src/dict.h new file mode 100644 index 00000000..72e82e6d --- /dev/null +++ b/gi/clda/src/dict.h @@ -0,0 +1,43 @@ +#ifndef DICT_H_ +#define DICT_H_ + +#include <cassert> +#include <cstring> +#include <tr1/unordered_map> +#include <string> +#include <vector> + +#include <boost/functional/hash.hpp> + +#include "wordid.h" + +class Dict { + typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map; + public: +  Dict() : b0_("<bad0>") { words_.reserve(1000); } +  inline int max() const { return words_.size(); } +  inline WordID Convert(const std::string& word, bool frozen = false) { +    Map::iterator i = d_.find(word); +    if (i == d_.end()) { +      if (frozen) +        return 0; +      words_.push_back(word); +      d_[word] = words_.size(); +      return words_.size(); +    } else { +      return i->second; +    } +  } +  inline const std::string& Convert(const WordID& id) const { +    if (id == 0) return b0_; +    assert(id <= words_.size()); +    return words_[id-1]; +  } +  void clear() { words_.clear(); d_.clear(); } + private: +  const std::string b0_; +  std::vector<std::string> words_; +  Map d_; +}; + +#endif diff --git a/gi/clda/src/logval.h b/gi/clda/src/logval.h new file mode 100644 index 00000000..7099b9be --- /dev/null +++ b/gi/clda/src/logval.h @@ -0,0 +1,157 @@ +#ifndef LOGVAL_H_ +#define LOGVAL_H_ + +#include <iostream> +#include <cstdlib> +#include <cmath> +#include <limits> + +template <typename T> +class LogVal { + public: +  LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {} +  explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {} +  static LogVal<T> One() { return LogVal(1); } +  static LogVal<T> Zero() { return LogVal(); } + +  void logeq(const T& v) { s_ = false; v_ = v; } + +  LogVal& operator+=(const LogVal& a) { +    if (a.v_ == -std::numeric_limits<T>::infinity()) return *this; +    if (a.s_ == s_) { +      if (a.v_ < v_) { +        v_ = v_ + log1p(std::exp(a.v_ - v_)); +      } else { +        v_ = a.v_ + log1p(std::exp(v_ - a.v_)); +      } +    } else { +      if (a.v_ < v_) { +        v_ = v_ + log1p(-std::exp(a.v_ - v_)); +      } else { +        v_ = a.v_ + log1p(-std::exp(v_ - a.v_)); +        s_ = !s_; +      } +    } +    return *this; +  } + +  LogVal& operator*=(const LogVal& a) { +    s_ = (s_ != a.s_); +    v_ += a.v_; +    return *this; +  } + +  LogVal& operator/=(const LogVal& a) { +    s_ = (s_ != a.s_); +    v_ -= a.v_; +    return *this; +  } + +  LogVal& operator-=(const LogVal& a) { +    LogVal b = a; +    b.invert(); +    return *this += b; +  } + +  LogVal& poweq(const T& power) { +    if (s_) { +      std::cerr << "poweq(T) not implemented when s_ is true\n"; +      std::abort(); +    } else { +      v_ *= power; +    } +    return *this; +  } + +  void invert() { s_ = !s_; } + +  LogVal pow(const T& power) const { +    LogVal res = *this; +    res.poweq(power); +    return res; +  } + +  operator T() const { +    if (s_) return -std::exp(v_); else return std::exp(v_); +  } + +  bool s_; +  T v_; +}; + +template<typename T> +LogVal<T> operator+(const LogVal<T>& o1, const LogVal<T>& o2) { +  LogVal<T> res(o1); +  res += o2; +  return res; +} + +template<typename T> +LogVal<T> operator*(const LogVal<T>& o1, const LogVal<T>& o2) { +  LogVal<T> res(o1); +  res *= o2; +  return res; +} + +template<typename T> +LogVal<T> operator/(const LogVal<T>& o1, const LogVal<T>& o2) { +  LogVal<T> res(o1); +  res /= o2; +  return res; +} + +template<typename T> +LogVal<T> operator-(const LogVal<T>& o1, const LogVal<T>& o2) { +  LogVal<T> res(o1); +  res -= o2; +  return res; +} + +template<typename T> +T log(const LogVal<T>& o) { +  if (o.s_) return log(-1.0); +  return o.v_; +} + +template <typename T> +LogVal<T> pow(const LogVal<T>& b, const T& e) { +  return b.pow(e); +} + +template <typename T> +bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  if (lhs.s_ == rhs.s_) { +    return (lhs.v_ < rhs.v_); +  } else { +    return lhs.s_ > rhs.s_; +  } +} + +#if 0 +template <typename T> +bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  return (lhs.v_ <= rhs.v_); +} + +template <typename T> +bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  return (lhs.v_ > rhs.v_); +} + +template <typename T> +bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  return (lhs.v_ >= rhs.v_); +} +#endif + +template <typename T> +bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_); +} + +template <typename T> +bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) { +  return !(lhs == rhs); +} + +#endif diff --git a/gi/clda/src/prob.h b/gi/clda/src/prob.h new file mode 100644 index 00000000..bc297870 --- /dev/null +++ b/gi/clda/src/prob.h @@ -0,0 +1,8 @@ +#ifndef _PROB_H_ +#define _PROB_H_ + +#include "logval.h" + +typedef LogVal<double> prob_t; + +#endif diff --git a/gi/clda/src/sampler.h b/gi/clda/src/sampler.h new file mode 100644 index 00000000..4d0b2e64 --- /dev/null +++ b/gi/clda/src/sampler.h @@ -0,0 +1,138 @@ +#ifndef SAMPLER_H_ +#define SAMPLER_H_ + +#include <algorithm> +#include <functional> +#include <numeric> +#include <iostream> +#include <fstream> +#include <vector> + +#include <boost/random/mersenne_twister.hpp> +#include <boost/random/uniform_real.hpp> +#include <boost/random/variate_generator.hpp> +#include <boost/random/normal_distribution.hpp> +#include <boost/random/poisson_distribution.hpp> + +#include "prob.h" + +struct SampleSet; + +template <typename RNG> +struct RandomNumberGenerator { +  static uint32_t GetTrulyRandomSeed() { +    uint32_t seed; +    std::ifstream r("/dev/urandom"); +    if (r) { +      r.read((char*)&seed,sizeof(uint32_t)); +    } +    if (r.fail() || !r) { +      std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl; +      seed = time(NULL); +    } +    std::cerr << "Seeding random number sequence to " << seed << std::endl; +    return seed; +  } + +  RandomNumberGenerator() : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) { +    uint32_t seed = GetTrulyRandomSeed(); +    m_generator.seed(seed); +  } +  explicit RandomNumberGenerator(uint32_t seed) : m_dist(0,1), m_generator(), m_random(m_generator,m_dist) { +    if (!seed) seed = GetTrulyRandomSeed(); +    m_generator.seed(seed); +  } + +  size_t SelectSample(const prob_t& a, const prob_t& b, double T = 1.0) { +    if (T == 1.0) { +      if (this->next() > (a / (a + b))) return 1; else return 0; +    } else { +      assert(!"not implemented"); +    } +  } + +  // T is the annealing temperature, if desired +  size_t SelectSample(const SampleSet& ss, double T = 1.0); + +  // draw a value from U(0,1) +  double next() {return m_random();} + +  // draw a value from N(mean,var) +  double NextNormal(double mean, double var) { +    return boost::normal_distribution<double>(mean, var)(m_random); +  } + +  // draw a value from a Poisson distribution +  // lambda must be greater than 0 +  int NextPoisson(int lambda) { +    return boost::poisson_distribution<int>(lambda)(m_random); +  } + +  bool AcceptMetropolisHastings(const prob_t& p_cur, +                                const prob_t& p_prev, +                                const prob_t& q_cur, +                                const prob_t& q_prev) { +    const prob_t a = (p_cur / p_prev) * (q_prev / q_cur); +    if (log(a) >= 0.0) return true; +    return (prob_t(this->next()) < a); +  } + + private: +  boost::uniform_real<> m_dist; +  RNG m_generator; +  boost::variate_generator<RNG&, boost::uniform_real<> > m_random; +}; + +typedef RandomNumberGenerator<boost::mt19937> MT19937; + +class SampleSet { + public: +  const prob_t& operator[](int i) const { return m_scores[i]; } +  prob_t& operator[](int i) { return m_scores[i]; } +  bool empty() const { return m_scores.empty(); } +  void add(const prob_t& s) { m_scores.push_back(s); } +  void clear() { m_scores.clear(); } +  size_t size() const { return m_scores.size(); } +  void resize(int size) { m_scores.resize(size); } +  std::vector<prob_t> m_scores; +}; + +template <typename RNG> +size_t RandomNumberGenerator<RNG>::SelectSample(const SampleSet& ss, double T) { +  assert(T > 0.0); +  assert(ss.m_scores.size() > 0); +  if (ss.m_scores.size() == 1) return 0; +  const prob_t annealing_factor(1.0 / T); +  const bool anneal = (annealing_factor != prob_t::One()); +  prob_t sum = prob_t::Zero(); +  if (anneal) { +    for (int i = 0; i < ss.m_scores.size(); ++i) +      sum += ss.m_scores[i].pow(annealing_factor);  // p^(1/T) +  } else { +    sum = std::accumulate(ss.m_scores.begin(), ss.m_scores.end(), prob_t::Zero()); +  } +  //for (size_t i = 0; i < ss.m_scores.size(); ++i) std::cerr << ss.m_scores[i] << ","; +  //std::cerr << std::endl; +  +  prob_t random(this->next());    // random number between 0 and 1 +  random *= sum;                  // scale with normalization factor +  //std::cerr << "Random number " << random << std::endl; + +  //now figure out which sample +  size_t position = 1; +  sum = ss.m_scores[0]; +  if (anneal) { +    sum.poweq(annealing_factor); +    for (; position < ss.m_scores.size() && sum < random; ++position)  +      sum += ss.m_scores[position].pow(annealing_factor); +  } else { +    for (; position < ss.m_scores.size() && sum < random; ++position)  +      sum += ss.m_scores[position]; +  } +  //std::cout << "random: " << random <<  " sample: " << position << std::endl; +  //std::cerr << "Sample: " << position-1 << std::endl; +  //exit(1); +  return position-1; +} + +#endif diff --git a/gi/clda/src/tdict.h b/gi/clda/src/tdict.h new file mode 100644 index 00000000..97f145a1 --- /dev/null +++ b/gi/clda/src/tdict.h @@ -0,0 +1,49 @@ +#ifndef _TDICT_H_ +#define _TDICT_H_ + +#include <string> +#include <vector> +#include "wordid.h" +#include "dict.h" + +class Vocab; + +struct TD { + +  static Dict dict_; +  static std::string empty; +  static std::string space; + +  static std::string GetString(const std::vector<WordID>& str) { +    std::string res; +    for (std::vector<WordID>::const_iterator i = str.begin(); i != str.end(); ++i) +      res += (i == str.begin() ? empty : space) + TD::Convert(*i); +    return res; +  } + +  static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids) { +    std::string s = sent; +    int last = 0; +    ids->clear(); +    for (int i=0; i < s.size(); ++i) +      if (s[i] == 32 || s[i] == '\t') { +        s[i]=0; +        if (last != i) { +          ids->push_back(Convert(&s[last])); +        } +        last = i + 1; +      } +    if (last != s.size()) +      ids->push_back(Convert(&s[last])); +  } + +  static WordID Convert(const std::string& s) { +    return dict_.Convert(s); +  } + +  static const std::string& Convert(const WordID& w) { +    return dict_.Convert(w); +  } +}; + +#endif diff --git a/gi/clda/src/timer.h b/gi/clda/src/timer.h new file mode 100644 index 00000000..ca26b304 --- /dev/null +++ b/gi/clda/src/timer.h @@ -0,0 +1,18 @@ +#ifndef _TIMER_STATS_H_ +#define _TIMER_STATS_H_ + +struct Timer { +  Timer() { Reset(); } +  void Reset() { +    start_t = clock(); +  } +  double Elapsed() const { +    const clock_t end_t = clock(); +    const double elapsed = (end_t - start_t) / 1000000.0; +    return elapsed; +  } + private: +  clock_t start_t; +}; + +#endif diff --git a/gi/clda/src/wordid.h b/gi/clda/src/wordid.h new file mode 100644 index 00000000..fb50bcc1 --- /dev/null +++ b/gi/clda/src/wordid.h @@ -0,0 +1,6 @@ +#ifndef _WORD_ID_H_ +#define _WORD_ID_H_ + +typedef int WordID; + +#endif | 
