summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-05-08 15:19:40 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-05-08 15:19:40 -0400
commit4ecae3b2e34a45dfdf22f4f244fbbcd66c8635b0 (patch)
tree2bdbb243cede55e2277b497aaa97f1f707d2989e /training
parentce9a18da4516f53ecf23fb7522562aef2d470397 (diff)
add liblbfgs, which is much less crappy than the current one
Diffstat (limited to 'training')
-rw-r--r--training/liblbfgs/Makefile.am13
-rw-r--r--training/liblbfgs/arithmetic_ansi.h133
-rw-r--r--training/liblbfgs/arithmetic_sse_double.h294
-rw-r--r--training/liblbfgs/arithmetic_sse_float.h298
-rw-r--r--training/liblbfgs/lbfgs++.h129
-rw-r--r--training/liblbfgs/lbfgs.c1371
-rw-r--r--training/liblbfgs/lbfgs.h745
-rw-r--r--training/liblbfgs/ll_test.cc32
8 files changed, 3015 insertions, 0 deletions
diff --git a/training/liblbfgs/Makefile.am b/training/liblbfgs/Makefile.am
new file mode 100644
index 00000000..9327c47f
--- /dev/null
+++ b/training/liblbfgs/Makefile.am
@@ -0,0 +1,13 @@
+TESTS = ll_test
+noinst_PROGRAMS = ll_test
+ll_test_SOURCES = ll_test.cc
+
+noinst_LIBRARIES = liblbfgs.a
+
+liblbfgs_a_SOURCES = lbfgs.c
+
+################################################################
+# do NOT NOT NOT add any other -I includes NO NO NO NO NO ######
+AM_LDFLAGS = liblbfgs.a -lz
+AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -I. -I..
+################################################################
diff --git a/training/liblbfgs/arithmetic_ansi.h b/training/liblbfgs/arithmetic_ansi.h
new file mode 100644
index 00000000..fa390da3
--- /dev/null
+++ b/training/liblbfgs/arithmetic_ansi.h
@@ -0,0 +1,133 @@
+/*
+ * ANSI C implementation of vector operations.
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id$ */
+
+#include <stdlib.h>
+#include <memory.h>
+
+#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
+#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
+#else
+#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
+#endif/*LBFGS_IEEE_FLOAT*/
+
+inline static void* vecalloc(size_t size)
+{
+ void *memblock = malloc(size);
+ if (memblock) {
+ memset(memblock, 0, size);
+ }
+ return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+ free(memblock);
+}
+
+inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ x[i] = c;
+ }
+}
+
+inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ y[i] = x[i];
+ }
+}
+
+inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ y[i] = -x[i];
+ }
+}
+
+inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ y[i] += c * x[i];
+ }
+}
+
+inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ z[i] = x[i] - y[i];
+ }
+}
+
+inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ y[i] *= c;
+ }
+}
+
+inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
+{
+ int i;
+
+ for (i = 0;i < n;++i) {
+ y[i] *= x[i];
+ }
+}
+
+inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
+{
+ int i;
+ *s = 0.;
+ for (i = 0;i < n;++i) {
+ *s += x[i] * y[i];
+ }
+}
+
+inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
+{
+ vecdot(s, x, x, n);
+ *s = (lbfgsfloatval_t)sqrt(*s);
+}
+
+inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
+{
+ vec2norm(s, x, n);
+ *s = (lbfgsfloatval_t)(1.0 / *s);
+}
diff --git a/training/liblbfgs/arithmetic_sse_double.h b/training/liblbfgs/arithmetic_sse_double.h
new file mode 100644
index 00000000..83405eeb
--- /dev/null
+++ b/training/liblbfgs/arithmetic_sse_double.h
@@ -0,0 +1,294 @@
+/*
+ * SSE2 implementation of vector oprations (64bit double).
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id$ */
+
+#include <stdlib.h>
+#ifndef __APPLE__
+#include <malloc.h>
+#endif
+#include <memory.h>
+
+#if 1400 <= _MSC_VER
+#include <intrin.h>
+#endif/*1400 <= _MSC_VER*/
+
+#if HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+#endif/*HAVE_EMMINTRIN_H*/
+
+inline static void* vecalloc(size_t size)
+{
+#if defined(_MSC_VER)
+ void *memblock = _aligned_malloc(size, 16);
+#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
+ void *memblock = malloc(size);
+#else
+ void *memblock = NULL, *p = NULL;
+ if (posix_memalign(&p, 16, size) == 0) {
+ memblock = p;
+ }
+#endif
+ if (memblock != NULL) {
+ memset(memblock, 0, size);
+ }
+ return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+#ifdef _MSC_VER
+ _aligned_free(memblock);
+#else
+ free(memblock);
+#endif
+}
+
+#define fsigndiff(x, y) \
+ ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
+
+#define vecset(x, c, n) \
+{ \
+ int i; \
+ __m128d XMM0 = _mm_set1_pd(c); \
+ for (i = 0;i < (n);i += 8) { \
+ _mm_store_pd((x)+i , XMM0); \
+ _mm_store_pd((x)+i+2, XMM0); \
+ _mm_store_pd((x)+i+4, XMM0); \
+ _mm_store_pd((x)+i+6, XMM0); \
+ } \
+}
+
+#define veccpy(y, x, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 8) { \
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
+ _mm_store_pd((y)+i , XMM0); \
+ _mm_store_pd((y)+i+2, XMM1); \
+ _mm_store_pd((y)+i+4, XMM2); \
+ _mm_store_pd((y)+i+6, XMM3); \
+ } \
+}
+
+#define vecncpy(y, x, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 8) { \
+ __m128d XMM0 = _mm_setzero_pd(); \
+ __m128d XMM1 = _mm_setzero_pd(); \
+ __m128d XMM2 = _mm_setzero_pd(); \
+ __m128d XMM3 = _mm_setzero_pd(); \
+ __m128d XMM4 = _mm_load_pd((x)+i ); \
+ __m128d XMM5 = _mm_load_pd((x)+i+2); \
+ __m128d XMM6 = _mm_load_pd((x)+i+4); \
+ __m128d XMM7 = _mm_load_pd((x)+i+6); \
+ XMM0 = _mm_sub_pd(XMM0, XMM4); \
+ XMM1 = _mm_sub_pd(XMM1, XMM5); \
+ XMM2 = _mm_sub_pd(XMM2, XMM6); \
+ XMM3 = _mm_sub_pd(XMM3, XMM7); \
+ _mm_store_pd((y)+i , XMM0); \
+ _mm_store_pd((y)+i+2, XMM1); \
+ _mm_store_pd((y)+i+4, XMM2); \
+ _mm_store_pd((y)+i+6, XMM3); \
+ } \
+}
+
+#define vecadd(y, x, c, n) \
+{ \
+ int i; \
+ __m128d XMM7 = _mm_set1_pd(c); \
+ for (i = 0;i < (n);i += 4) { \
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
+ __m128d XMM2 = _mm_load_pd((y)+i ); \
+ __m128d XMM3 = _mm_load_pd((y)+i+2); \
+ XMM0 = _mm_mul_pd(XMM0, XMM7); \
+ XMM1 = _mm_mul_pd(XMM1, XMM7); \
+ XMM2 = _mm_add_pd(XMM2, XMM0); \
+ XMM3 = _mm_add_pd(XMM3, XMM1); \
+ _mm_store_pd((y)+i , XMM2); \
+ _mm_store_pd((y)+i+2, XMM3); \
+ } \
+}
+
+#define vecdiff(z, x, y, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 8) { \
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
+ __m128d XMM4 = _mm_load_pd((y)+i ); \
+ __m128d XMM5 = _mm_load_pd((y)+i+2); \
+ __m128d XMM6 = _mm_load_pd((y)+i+4); \
+ __m128d XMM7 = _mm_load_pd((y)+i+6); \
+ XMM0 = _mm_sub_pd(XMM0, XMM4); \
+ XMM1 = _mm_sub_pd(XMM1, XMM5); \
+ XMM2 = _mm_sub_pd(XMM2, XMM6); \
+ XMM3 = _mm_sub_pd(XMM3, XMM7); \
+ _mm_store_pd((z)+i , XMM0); \
+ _mm_store_pd((z)+i+2, XMM1); \
+ _mm_store_pd((z)+i+4, XMM2); \
+ _mm_store_pd((z)+i+6, XMM3); \
+ } \
+}
+
+#define vecscale(y, c, n) \
+{ \
+ int i; \
+ __m128d XMM7 = _mm_set1_pd(c); \
+ for (i = 0;i < (n);i += 4) { \
+ __m128d XMM0 = _mm_load_pd((y)+i ); \
+ __m128d XMM1 = _mm_load_pd((y)+i+2); \
+ XMM0 = _mm_mul_pd(XMM0, XMM7); \
+ XMM1 = _mm_mul_pd(XMM1, XMM7); \
+ _mm_store_pd((y)+i , XMM0); \
+ _mm_store_pd((y)+i+2, XMM1); \
+ } \
+}
+
+#define vecmul(y, x, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 8) { \
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
+ __m128d XMM4 = _mm_load_pd((y)+i ); \
+ __m128d XMM5 = _mm_load_pd((y)+i+2); \
+ __m128d XMM6 = _mm_load_pd((y)+i+4); \
+ __m128d XMM7 = _mm_load_pd((y)+i+6); \
+ XMM4 = _mm_mul_pd(XMM4, XMM0); \
+ XMM5 = _mm_mul_pd(XMM5, XMM1); \
+ XMM6 = _mm_mul_pd(XMM6, XMM2); \
+ XMM7 = _mm_mul_pd(XMM7, XMM3); \
+ _mm_store_pd((y)+i , XMM4); \
+ _mm_store_pd((y)+i+2, XMM5); \
+ _mm_store_pd((y)+i+4, XMM6); \
+ _mm_store_pd((y)+i+6, XMM7); \
+ } \
+}
+
+
+
+#if 3 <= __SSE__ || defined(__SSE3__)
+/*
+ Horizontal add with haddps SSE3 instruction. The work register (rw)
+ is unused.
+ */
+#define __horizontal_sum(r, rw) \
+ r = _mm_hadd_ps(r, r); \
+ r = _mm_hadd_ps(r, r);
+
+#else
+/*
+ Horizontal add with SSE instruction. The work register (rw) is used.
+ */
+#define __horizontal_sum(r, rw) \
+ rw = r; \
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
+ r = _mm_add_ps(r, rw); \
+ rw = r; \
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
+ r = _mm_add_ps(r, rw);
+
+#endif
+
+#define vecdot(s, x, y, n) \
+{ \
+ int i; \
+ __m128d XMM0 = _mm_setzero_pd(); \
+ __m128d XMM1 = _mm_setzero_pd(); \
+ __m128d XMM2, XMM3, XMM4, XMM5; \
+ for (i = 0;i < (n);i += 4) { \
+ XMM2 = _mm_load_pd((x)+i ); \
+ XMM3 = _mm_load_pd((x)+i+2); \
+ XMM4 = _mm_load_pd((y)+i ); \
+ XMM5 = _mm_load_pd((y)+i+2); \
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
+ } \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ _mm_store_sd((s), XMM0); \
+}
+
+#define vec2norm(s, x, n) \
+{ \
+ int i; \
+ __m128d XMM0 = _mm_setzero_pd(); \
+ __m128d XMM1 = _mm_setzero_pd(); \
+ __m128d XMM2, XMM3, XMM4, XMM5; \
+ for (i = 0;i < (n);i += 4) { \
+ XMM2 = _mm_load_pd((x)+i ); \
+ XMM3 = _mm_load_pd((x)+i+2); \
+ XMM4 = XMM2; \
+ XMM5 = XMM3; \
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
+ } \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ XMM0 = _mm_sqrt_pd(XMM0); \
+ _mm_store_sd((s), XMM0); \
+}
+
+
+#define vec2norminv(s, x, n) \
+{ \
+ int i; \
+ __m128d XMM0 = _mm_setzero_pd(); \
+ __m128d XMM1 = _mm_setzero_pd(); \
+ __m128d XMM2, XMM3, XMM4, XMM5; \
+ for (i = 0;i < (n);i += 4) { \
+ XMM2 = _mm_load_pd((x)+i ); \
+ XMM3 = _mm_load_pd((x)+i+2); \
+ XMM4 = XMM2; \
+ XMM5 = XMM3; \
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
+ } \
+ XMM2 = _mm_set1_pd(1.0); \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
+ XMM0 = _mm_sqrt_pd(XMM0); \
+ XMM2 = _mm_div_pd(XMM2, XMM0); \
+ _mm_store_sd((s), XMM2); \
+}
diff --git a/training/liblbfgs/arithmetic_sse_float.h b/training/liblbfgs/arithmetic_sse_float.h
new file mode 100644
index 00000000..835b8aa8
--- /dev/null
+++ b/training/liblbfgs/arithmetic_sse_float.h
@@ -0,0 +1,298 @@
+/*
+ * SSE/SSE3 implementation of vector oprations (32bit float).
+ *
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id$ */
+
+#include <stdlib.h>
+#ifndef __APPLE__
+#include <malloc.h>
+#endif
+#include <memory.h>
+
+#if 1400 <= _MSC_VER
+#include <intrin.h>
+#endif/*_MSC_VER*/
+
+#if HAVE_XMMINTRIN_H
+#include <xmmintrin.h>
+#endif/*HAVE_XMMINTRIN_H*/
+
+#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
+#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
+#else
+#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
+#endif/*LBFGS_IEEE_FLOAT*/
+
+inline static void* vecalloc(size_t size)
+{
+#if defined(_MSC_VER)
+ void *memblock = _aligned_malloc(size, 16);
+#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
+ void *memblock = malloc(size);
+#else
+ void *memblock = NULL, *p = NULL;
+ if (posix_memalign(&p, 16, size) == 0) {
+ memblock = p;
+ }
+#endif
+ if (memblock != NULL) {
+ memset(memblock, 0, size);
+ }
+ return memblock;
+}
+
+inline static void vecfree(void *memblock)
+{
+ _aligned_free(memblock);
+}
+
+#define vecset(x, c, n) \
+{ \
+ int i; \
+ __m128 XMM0 = _mm_set_ps1(c); \
+ for (i = 0;i < (n);i += 16) { \
+ _mm_store_ps((x)+i , XMM0); \
+ _mm_store_ps((x)+i+ 4, XMM0); \
+ _mm_store_ps((x)+i+ 8, XMM0); \
+ _mm_store_ps((x)+i+12, XMM0); \
+ } \
+}
+
+#define veccpy(y, x, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 16) { \
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
+ _mm_store_ps((y)+i , XMM0); \
+ _mm_store_ps((y)+i+ 4, XMM1); \
+ _mm_store_ps((y)+i+ 8, XMM2); \
+ _mm_store_ps((y)+i+12, XMM3); \
+ } \
+}
+
+#define vecncpy(y, x, n) \
+{ \
+ int i; \
+ const uint32_t mask = 0x80000000; \
+ __m128 XMM4 = _mm_load_ps1((float*)&mask); \
+ for (i = 0;i < (n);i += 16) { \
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
+ XMM0 = _mm_xor_ps(XMM0, XMM4); \
+ XMM1 = _mm_xor_ps(XMM1, XMM4); \
+ XMM2 = _mm_xor_ps(XMM2, XMM4); \
+ XMM3 = _mm_xor_ps(XMM3, XMM4); \
+ _mm_store_ps((y)+i , XMM0); \
+ _mm_store_ps((y)+i+ 4, XMM1); \
+ _mm_store_ps((y)+i+ 8, XMM2); \
+ _mm_store_ps((y)+i+12, XMM3); \
+ } \
+}
+
+#define vecadd(y, x, c, n) \
+{ \
+ int i; \
+ __m128 XMM7 = _mm_set_ps1(c); \
+ for (i = 0;i < (n);i += 8) { \
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
+ __m128 XMM1 = _mm_load_ps((x)+i+4); \
+ __m128 XMM2 = _mm_load_ps((y)+i ); \
+ __m128 XMM3 = _mm_load_ps((y)+i+4); \
+ XMM0 = _mm_mul_ps(XMM0, XMM7); \
+ XMM1 = _mm_mul_ps(XMM1, XMM7); \
+ XMM2 = _mm_add_ps(XMM2, XMM0); \
+ XMM3 = _mm_add_ps(XMM3, XMM1); \
+ _mm_store_ps((y)+i , XMM2); \
+ _mm_store_ps((y)+i+4, XMM3); \
+ } \
+}
+
+#define vecdiff(z, x, y, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 16) { \
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
+ __m128 XMM4 = _mm_load_ps((y)+i ); \
+ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
+ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
+ __m128 XMM7 = _mm_load_ps((y)+i+12); \
+ XMM0 = _mm_sub_ps(XMM0, XMM4); \
+ XMM1 = _mm_sub_ps(XMM1, XMM5); \
+ XMM2 = _mm_sub_ps(XMM2, XMM6); \
+ XMM3 = _mm_sub_ps(XMM3, XMM7); \
+ _mm_store_ps((z)+i , XMM0); \
+ _mm_store_ps((z)+i+ 4, XMM1); \
+ _mm_store_ps((z)+i+ 8, XMM2); \
+ _mm_store_ps((z)+i+12, XMM3); \
+ } \
+}
+
+#define vecscale(y, c, n) \
+{ \
+ int i; \
+ __m128 XMM7 = _mm_set_ps1(c); \
+ for (i = 0;i < (n);i += 8) { \
+ __m128 XMM0 = _mm_load_ps((y)+i ); \
+ __m128 XMM1 = _mm_load_ps((y)+i+4); \
+ XMM0 = _mm_mul_ps(XMM0, XMM7); \
+ XMM1 = _mm_mul_ps(XMM1, XMM7); \
+ _mm_store_ps((y)+i , XMM0); \
+ _mm_store_ps((y)+i+4, XMM1); \
+ } \
+}
+
+#define vecmul(y, x, n) \
+{ \
+ int i; \
+ for (i = 0;i < (n);i += 16) { \
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
+ __m128 XMM4 = _mm_load_ps((y)+i ); \
+ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
+ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
+ __m128 XMM7 = _mm_load_ps((y)+i+12); \
+ XMM4 = _mm_mul_ps(XMM4, XMM0); \
+ XMM5 = _mm_mul_ps(XMM5, XMM1); \
+ XMM6 = _mm_mul_ps(XMM6, XMM2); \
+ XMM7 = _mm_mul_ps(XMM7, XMM3); \
+ _mm_store_ps((y)+i , XMM4); \
+ _mm_store_ps((y)+i+ 4, XMM5); \
+ _mm_store_ps((y)+i+ 8, XMM6); \
+ _mm_store_ps((y)+i+12, XMM7); \
+ } \
+}
+
+
+
+#if 3 <= __SSE__ || defined(__SSE3__)
+/*
+ Horizontal add with haddps SSE3 instruction. The work register (rw)
+ is unused.
+ */
+#define __horizontal_sum(r, rw) \
+ r = _mm_hadd_ps(r, r); \
+ r = _mm_hadd_ps(r, r);
+
+#else
+/*
+ Horizontal add with SSE instruction. The work register (rw) is used.
+ */
+#define __horizontal_sum(r, rw) \
+ rw = r; \
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
+ r = _mm_add_ps(r, rw); \
+ rw = r; \
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
+ r = _mm_add_ps(r, rw);
+
+#endif
+
+#define vecdot(s, x, y, n) \
+{ \
+ int i; \
+ __m128 XMM0 = _mm_setzero_ps(); \
+ __m128 XMM1 = _mm_setzero_ps(); \
+ __m128 XMM2, XMM3, XMM4, XMM5; \
+ for (i = 0;i < (n);i += 8) { \
+ XMM2 = _mm_load_ps((x)+i ); \
+ XMM3 = _mm_load_ps((x)+i+4); \
+ XMM4 = _mm_load_ps((y)+i ); \
+ XMM5 = _mm_load_ps((y)+i+4); \
+ XMM2 = _mm_mul_ps(XMM2, XMM4); \
+ XMM3 = _mm_mul_ps(XMM3, XMM5); \
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
+ } \
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
+ __horizontal_sum(XMM0, XMM1); \
+ _mm_store_ss((s), XMM0); \
+}
+
+#define vec2norm(s, x, n) \
+{ \
+ int i; \
+ __m128 XMM0 = _mm_setzero_ps(); \
+ __m128 XMM1 = _mm_setzero_ps(); \
+ __m128 XMM2, XMM3; \
+ for (i = 0;i < (n);i += 8) { \
+ XMM2 = _mm_load_ps((x)+i ); \
+ XMM3 = _mm_load_ps((x)+i+4); \
+ XMM2 = _mm_mul_ps(XMM2, XMM2); \
+ XMM3 = _mm_mul_ps(XMM3, XMM3); \
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
+ } \
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
+ __horizontal_sum(XMM0, XMM1); \
+ XMM2 = XMM0; \
+ XMM1 = _mm_rsqrt_ss(XMM0); \
+ XMM3 = XMM1; \
+ XMM1 = _mm_mul_ss(XMM1, XMM1); \
+ XMM1 = _mm_mul_ss(XMM1, XMM3); \
+ XMM1 = _mm_mul_ss(XMM1, XMM0); \
+ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
+ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
+ XMM3 = _mm_add_ss(XMM3, XMM1); \
+ XMM3 = _mm_mul_ss(XMM3, XMM2); \
+ _mm_store_ss((s), XMM3); \
+}
+
+#define vec2norminv(s, x, n) \
+{ \
+ int i; \
+ __m128 XMM0 = _mm_setzero_ps(); \
+ __m128 XMM1 = _mm_setzero_ps(); \
+ __m128 XMM2, XMM3; \
+ for (i = 0;i < (n);i += 16) { \
+ XMM2 = _mm_load_ps((x)+i ); \
+ XMM3 = _mm_load_ps((x)+i+4); \
+ XMM2 = _mm_mul_ps(XMM2, XMM2); \
+ XMM3 = _mm_mul_ps(XMM3, XMM3); \
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
+ } \
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
+ __horizontal_sum(XMM0, XMM1); \
+ XMM2 = XMM0; \
+ XMM1 = _mm_rsqrt_ss(XMM0); \
+ XMM3 = XMM1; \
+ XMM1 = _mm_mul_ss(XMM1, XMM1); \
+ XMM1 = _mm_mul_ss(XMM1, XMM3); \
+ XMM1 = _mm_mul_ss(XMM1, XMM0); \
+ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
+ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
+ XMM3 = _mm_add_ss(XMM3, XMM1); \
+ _mm_store_ss((s), XMM3); \
+}
diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h
new file mode 100644
index 00000000..119511e5
--- /dev/null
+++ b/training/liblbfgs/lbfgs++.h
@@ -0,0 +1,129 @@
+// THIS IS CDEC'S C++ WRAPPER AROUND LIBLBFGS
+// liblbfgs is
+// Copyright (c) 1990, Jorge Nocedal
+// Copyright (c) 2007-2010, Naoaki Okazaki
+//
+// see https://github.com/chokkan/liblbfgs for more details
+//
+#ifndef __LBFGSPP_H__
+#define __LBFGSPP_H__
+
+#include <vector>
+#include "liblbfgs/lbfgs.h"
+
+// Function must be lbfgsfloatval_t f(const double* x_start, double* g_start)
+template <typename Function>
+class LBFGS {
+ public:
+ LBFGS(size_t n, // number of variables
+ const Function& f, // function to optimize
+ double l1_c = 0.0, // l1 penalty strength
+ size_t m = 10 // number of memory buffers
+ // TODO should use custom allocator here:
+ ) : p_x(new std::vector<lbfgsfloatval_t>(n, 0.0)),
+ owned(true),
+ m_x(*p_x),
+ func(f) {
+ Init(m, l1_c);
+ }
+
+ // constructor where external vector storage for variables is used
+ LBFGS(std::vector<lbfgsfloatval_t>* px,
+ const Function& f,
+ double l1_c = 0.0, // l1 penalty strength
+ size_t m = 10
+ ) : p_x(px),
+ owned(false),
+ m_x(*p_x),
+ func(f) {
+ Init(m, l1_c);
+ }
+
+ ~LBFGS() {
+ if (owned) delete p_x;
+ }
+ const lbfgsfloatval_t& operator[](size_t i) const { return m_x[i]; }
+ lbfgsfloatval_t& operator[](size_t i) { return m_x[i]; }
+ size_t size() const { return m_x.size(); }
+
+ int Optimize() {
+ lbfgsfloatval_t fx;
+ int ret = lbfgs(m_x.size(), &m_x[0], &fx, _evaluate, _progress, this, &param);
+ std::cerr << "L-BFGS optimization terminated with status code = " << ret << std::endl;
+ std::cerr << " fx = " << fx << std::endl;
+ return ret;
+ }
+
+ private:
+ void Init(size_t m, double l1_c) {
+ lbfgs_parameter_init(&param);
+ param.m = m;
+ if (l1_c > 0.0) {
+ param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
+ param.orthantwise_c = 1.0;
+ }
+ }
+
+ static lbfgsfloatval_t _evaluate(
+ void *instance,
+ const lbfgsfloatval_t *x,
+ lbfgsfloatval_t *g,
+ const int n,
+ const lbfgsfloatval_t step) {
+ return reinterpret_cast<LBFGS<Function>*>(instance)->evaluate(x, g, n, step);
+ }
+
+ lbfgsfloatval_t evaluate(const lbfgsfloatval_t *x,
+ lbfgsfloatval_t *g,
+ const int n,
+ const lbfgsfloatval_t step) {
+ (void) n;
+ (void) step;
+ return func(x, g);
+ }
+
+ static int _progress(
+ void *instance,
+ const lbfgsfloatval_t *x,
+ const lbfgsfloatval_t *g,
+ const lbfgsfloatval_t fx,
+ const lbfgsfloatval_t xnorm,
+ const lbfgsfloatval_t gnorm,
+ const lbfgsfloatval_t step,
+ int n,
+ int k,
+ int ls
+ )
+ {
+ return reinterpret_cast<LBFGS<Function>*>(instance)
+ ->progress(x, g, fx, xnorm, gnorm, step, n, k, ls);
+ }
+
+ int progress(
+ const lbfgsfloatval_t *x,
+ const lbfgsfloatval_t *g,
+ const lbfgsfloatval_t fx,
+ const lbfgsfloatval_t xnorm,
+ const lbfgsfloatval_t gnorm,
+ const lbfgsfloatval_t step,
+ int n,
+ int k,
+ int ls
+ )
+ {
+ (void) n;
+ (void) k;
+ std::cerr << "Iteration " << k << ':' << std::endl;
+ std::cerr << " fx = " << fx << ", x[0] = " << x[0] << ", x[1] = " << x[1] << std::endl;
+ std::cerr << " xnorm = " << xnorm << ", gnorm = " << gnorm << ", step = " << step << std::endl << std::endl;
+ return 0;
+ }
+ std::vector<lbfgsfloatval_t>* p_x;
+ const bool owned;
+ std::vector<lbfgsfloatval_t>& m_x;
+ const Function& func;
+ lbfgs_parameter_t param;
+
+};
+
+#endif
diff --git a/training/liblbfgs/lbfgs.c b/training/liblbfgs/lbfgs.c
new file mode 100644
index 00000000..0add48f4
--- /dev/null
+++ b/training/liblbfgs/lbfgs.c
@@ -0,0 +1,1371 @@
+/*
+ * Limited memory BFGS (L-BFGS).
+ *
+ * Copyright (c) 1990, Jorge Nocedal
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id$ */
+
+/*
+This library is a C port of the FORTRAN implementation of Limited-memory
+Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
+The original FORTRAN source code is available at:
+http://www.ece.northwestern.edu/~nocedal/lbfgs.html
+
+The L-BFGS algorithm is described in:
+ - Jorge Nocedal.
+ Updating Quasi-Newton Matrices with Limited Storage.
+ <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
+ - Dong C. Liu and Jorge Nocedal.
+ On the limited memory BFGS method for large scale optimization.
+ <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
+
+The line search algorithms used in this implementation are described in:
+ - John E. Dennis and Robert B. Schnabel.
+ <i>Numerical Methods for Unconstrained Optimization and Nonlinear
+ Equations</i>, Englewood Cliffs, 1983.
+ - Jorge J. More and David J. Thuente.
+ Line search algorithm with guaranteed sufficient decrease.
+ <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
+ pp. 286-307, 1994.
+
+This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
+method presented in:
+ - Galen Andrew and Jianfeng Gao.
+ Scalable training of L1-regularized log-linear models.
+ In <i>Proceedings of the 24th International Conference on Machine
+ Learning (ICML 2007)</i>, pp. 33-40, 2007.
+
+I would like to thank the original author, Jorge Nocedal, who has been
+distributing the effieicnt and explanatory implementation in an open source
+licence.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif/*HAVE_CONFIG_H*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "liblbfgs/lbfgs.h"
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif/*_MSC_VER*/
+
+#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
+/* Use SSE2 optimization for 64bit double precision. */
+#include "arithmetic_sse_double.h"
+
+#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
+/* Use SSE optimization for 32bit float precision. */
+#include "arithmetic_sse_float.h"
+
+#else
+/* No CPU specific optimization. */
+#include "arithmetic_ansi.h"
+
+#endif
+
+#define min2(a, b) ((a) <= (b) ? (a) : (b))
+#define max2(a, b) ((a) >= (b) ? (a) : (b))
+#define max3(a, b, c) max2(max2((a), (b)), (c));
+
+struct tag_callback_data {
+ int n;
+ void *instance;
+ lbfgs_evaluate_t proc_evaluate;
+ lbfgs_progress_t proc_progress;
+};
+typedef struct tag_callback_data callback_data_t;
+
+struct tag_iteration_data {
+ lbfgsfloatval_t alpha;
+ lbfgsfloatval_t *s; /* [n] */
+ lbfgsfloatval_t *y; /* [n] */
+ lbfgsfloatval_t ys; /* vecdot(y, s) */
+};
+typedef struct tag_iteration_data iteration_data_t;
+
+static const lbfgs_parameter_t _defparam = {
+ 6, 1e-5, 0, 1e-5,
+ 0, LBFGS_LINESEARCH_DEFAULT, 40,
+ 1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16,
+ 0.0, 0, -1,
+};
+
+/* Forward function declarations. */
+
+typedef int (*line_search_proc)(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wa,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ );
+
+static int line_search_backtracking(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wa,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ );
+
+static int line_search_backtracking_owlqn(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wp,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ );
+
+static int line_search_morethuente(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wa,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ );
+
+static int update_trial_interval(
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *fx,
+ lbfgsfloatval_t *dx,
+ lbfgsfloatval_t *y,
+ lbfgsfloatval_t *fy,
+ lbfgsfloatval_t *dy,
+ lbfgsfloatval_t *t,
+ lbfgsfloatval_t *ft,
+ lbfgsfloatval_t *dt,
+ const lbfgsfloatval_t tmin,
+ const lbfgsfloatval_t tmax,
+ int *brackt
+ );
+
+static lbfgsfloatval_t owlqn_x1norm(
+ const lbfgsfloatval_t* x,
+ const int start,
+ const int n
+ );
+
+static void owlqn_pseudo_gradient(
+ lbfgsfloatval_t* pg,
+ const lbfgsfloatval_t* x,
+ const lbfgsfloatval_t* g,
+ const int n,
+ const lbfgsfloatval_t c,
+ const int start,
+ const int end
+ );
+
+static void owlqn_project(
+ lbfgsfloatval_t* d,
+ const lbfgsfloatval_t* sign,
+ const int start,
+ const int end
+ );
+
+
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+static int round_out_variables(int n)
+{
+ n += 7;
+ n /= 8;
+ n *= 8;
+ return n;
+}
+#endif/*defined(USE_SSE)*/
+
+lbfgsfloatval_t* lbfgs_malloc(int n)
+{
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+ n = round_out_variables(n);
+#endif/*defined(USE_SSE)*/
+ return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n);
+}
+
+void lbfgs_free(lbfgsfloatval_t *x)
+{
+ vecfree(x);
+}
+
+void lbfgs_parameter_init(lbfgs_parameter_t *param)
+{
+ memcpy(param, &_defparam, sizeof(*param));
+}
+
+int lbfgs(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *ptr_fx,
+ lbfgs_evaluate_t proc_evaluate,
+ lbfgs_progress_t proc_progress,
+ void *instance,
+ lbfgs_parameter_t *_param
+ )
+{
+ int ret;
+ int i, j, k, ls, end, bound;
+ lbfgsfloatval_t step;
+
+ /* Constant parameters and their default values. */
+ lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
+ const int m = param.m;
+
+ lbfgsfloatval_t *xp = NULL;
+ lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;
+ lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;
+ iteration_data_t *lm = NULL, *it = NULL;
+ lbfgsfloatval_t ys, yy;
+ lbfgsfloatval_t xnorm, gnorm, beta;
+ lbfgsfloatval_t fx = 0.;
+ lbfgsfloatval_t rate = 0.;
+ line_search_proc linesearch = line_search_morethuente;
+
+ /* Construct a callback data. */
+ callback_data_t cd;
+ cd.n = n;
+ cd.instance = instance;
+ cd.proc_evaluate = proc_evaluate;
+ cd.proc_progress = proc_progress;
+
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+ /* Round out the number of variables. */
+ n = round_out_variables(n);
+#endif/*defined(USE_SSE)*/
+
+ /* Check the input parameters for errors. */
+ if (n <= 0) {
+ return LBFGSERR_INVALID_N;
+ }
+#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
+ if (n % 8 != 0) {
+ return LBFGSERR_INVALID_N_SSE;
+ }
+ if ((uintptr_t)(const void*)x % 16 != 0) {
+ return LBFGSERR_INVALID_X_SSE;
+ }
+#endif/*defined(USE_SSE)*/
+ if (param.epsilon < 0.) {
+ return LBFGSERR_INVALID_EPSILON;
+ }
+ if (param.past < 0) {
+ return LBFGSERR_INVALID_TESTPERIOD;
+ }
+ if (param.delta < 0.) {
+ return LBFGSERR_INVALID_DELTA;
+ }
+ if (param.min_step < 0.) {
+ return LBFGSERR_INVALID_MINSTEP;
+ }
+ if (param.max_step < param.min_step) {
+ return LBFGSERR_INVALID_MAXSTEP;
+ }
+ if (param.ftol < 0.) {
+ return LBFGSERR_INVALID_FTOL;
+ }
+ if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
+ param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
+ if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
+ return LBFGSERR_INVALID_WOLFE;
+ }
+ }
+ if (param.gtol < 0.) {
+ return LBFGSERR_INVALID_GTOL;
+ }
+ if (param.xtol < 0.) {
+ return LBFGSERR_INVALID_XTOL;
+ }
+ if (param.max_linesearch <= 0) {
+ return LBFGSERR_INVALID_MAXLINESEARCH;
+ }
+ if (param.orthantwise_c < 0.) {
+ return LBFGSERR_INVALID_ORTHANTWISE;
+ }
+ if (param.orthantwise_start < 0 || n < param.orthantwise_start) {
+ return LBFGSERR_INVALID_ORTHANTWISE_START;
+ }
+ if (param.orthantwise_end < 0) {
+ param.orthantwise_end = n;
+ }
+ if (n < param.orthantwise_end) {
+ return LBFGSERR_INVALID_ORTHANTWISE_END;
+ }
+ if (param.orthantwise_c != 0.) {
+ switch (param.linesearch) {
+ case LBFGS_LINESEARCH_BACKTRACKING:
+ linesearch = line_search_backtracking_owlqn;
+ break;
+ default:
+ /* Only the backtracking method is available. */
+ return LBFGSERR_INVALID_LINESEARCH;
+ }
+ } else {
+ switch (param.linesearch) {
+ case LBFGS_LINESEARCH_MORETHUENTE:
+ linesearch = line_search_morethuente;
+ break;
+ case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
+ case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
+ case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
+ linesearch = line_search_backtracking;
+ break;
+ default:
+ return LBFGSERR_INVALID_LINESEARCH;
+ }
+ }
+
+ /* Allocate working space. */
+ xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
+ ret = LBFGSERR_OUTOFMEMORY;
+ goto lbfgs_exit;
+ }
+
+ if (param.orthantwise_c != 0.) {
+ /* Allocate working space for OW-LQN. */
+ pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ if (pg == NULL) {
+ ret = LBFGSERR_OUTOFMEMORY;
+ goto lbfgs_exit;
+ }
+ }
+
+ /* Allocate limited memory storage. */
+ lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));
+ if (lm == NULL) {
+ ret = LBFGSERR_OUTOFMEMORY;
+ goto lbfgs_exit;
+ }
+
+ /* Initialize the limited memory. */
+ for (i = 0;i < m;++i) {
+ it = &lm[i];
+ it->alpha = 0;
+ it->ys = 0;
+ it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
+ if (it->s == NULL || it->y == NULL) {
+ ret = LBFGSERR_OUTOFMEMORY;
+ goto lbfgs_exit;
+ }
+ }
+
+ /* Allocate an array for storing previous values of the objective function. */
+ if (0 < param.past) {
+ pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t));
+ }
+
+ /* Evaluate the function value and its gradient. */
+ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
+ if (0. != param.orthantwise_c) {
+ /* Compute the L1 norm of the variable and add it to the object value. */
+ xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
+ fx += xnorm * param.orthantwise_c;
+ owlqn_pseudo_gradient(
+ pg, x, g, n,
+ param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+ );
+ }
+
+ /* Store the initial value of the objective function. */
+ if (pf != NULL) {
+ pf[0] = fx;
+ }
+
+ /*
+ Compute the direction;
+ we assume the initial hessian matrix H_0 as the identity matrix.
+ */
+ if (param.orthantwise_c == 0.) {
+ vecncpy(d, g, n);
+ } else {
+ vecncpy(d, pg, n);
+ }
+
+ /*
+ Make sure that the initial variables are not a minimizer.
+ */
+ vec2norm(&xnorm, x, n);
+ if (param.orthantwise_c == 0.) {
+ vec2norm(&gnorm, g, n);
+ } else {
+ vec2norm(&gnorm, pg, n);
+ }
+ if (xnorm < 1.0) xnorm = 1.0;
+ if (gnorm / xnorm <= param.epsilon) {
+ ret = LBFGS_ALREADY_MINIMIZED;
+ goto lbfgs_exit;
+ }
+
+ /* Compute the initial step:
+ step = 1.0 / sqrt(vecdot(d, d, n))
+ */
+ vec2norminv(&step, d, n);
+
+ k = 1;
+ end = 0;
+ for (;;) {
+ /* Store the current position and gradient vectors. */
+ veccpy(xp, x, n);
+ veccpy(gp, g, n);
+
+ /* Search for an optimal step. */
+ if (param.orthantwise_c == 0.) {
+ ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
+ } else {
+ ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
+ owlqn_pseudo_gradient(
+ pg, x, g, n,
+ param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+ );
+ }
+ if (ls < 0) {
+ /* Revert to the previous point. */
+ veccpy(x, xp, n);
+ veccpy(g, gp, n);
+ ret = ls;
+ goto lbfgs_exit;
+ }
+
+ /* Compute x and g norms. */
+ vec2norm(&xnorm, x, n);
+ if (param.orthantwise_c == 0.) {
+ vec2norm(&gnorm, g, n);
+ } else {
+ vec2norm(&gnorm, pg, n);
+ }
+
+ /* Report the progress. */
+ if (cd.proc_progress) {
+ if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
+ goto lbfgs_exit;
+ }
+ }
+
+ /*
+ Convergence test.
+ The criterion is given by the following formula:
+ |g(x)| / \max(1, |x|) < \epsilon
+ */
+ if (xnorm < 1.0) xnorm = 1.0;
+ if (gnorm / xnorm <= param.epsilon) {
+ /* Convergence. */
+ ret = LBFGS_SUCCESS;
+ break;
+ }
+
+ /*
+ Test for stopping criterion.
+ The criterion is given by the following formula:
+ (f(past_x) - f(x)) / f(x) < \delta
+ */
+ if (pf != NULL) {
+ /* We don't test the stopping criterion while k < past. */
+ if (param.past <= k) {
+ /* Compute the relative improvement from the past. */
+ rate = (pf[k % param.past] - fx) / fx;
+
+ /* The stopping criterion. */
+ if (rate < param.delta) {
+ ret = LBFGS_STOP;
+ break;
+ }
+ }
+
+ /* Store the current value of the objective function. */
+ pf[k % param.past] = fx;
+ }
+
+ if (param.max_iterations != 0 && param.max_iterations < k+1) {
+ /* Maximum number of iterations. */
+ ret = LBFGSERR_MAXIMUMITERATION;
+ break;
+ }
+
+ /*
+ Update vectors s and y:
+ s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
+ y_{k+1} = g_{k+1} - g_{k}.
+ */
+ it = &lm[end];
+ vecdiff(it->s, x, xp, n);
+ vecdiff(it->y, g, gp, n);
+
+ /*
+ Compute scalars ys and yy:
+ ys = y^t \cdot s = 1 / \rho.
+ yy = y^t \cdot y.
+ Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
+ */
+ vecdot(&ys, it->y, it->s, n);
+ vecdot(&yy, it->y, it->y, n);
+ it->ys = ys;
+
+ /*
+ Recursive formula to compute dir = -(H \cdot g).
+ This is described in page 779 of:
+ Jorge Nocedal.
+ Updating Quasi-Newton Matrices with Limited Storage.
+ Mathematics of Computation, Vol. 35, No. 151,
+ pp. 773--782, 1980.
+ */
+ bound = (m <= k) ? m : k;
+ ++k;
+ end = (end + 1) % m;
+
+ /* Compute the steepest direction. */
+ if (param.orthantwise_c == 0.) {
+ /* Compute the negative of gradients. */
+ vecncpy(d, g, n);
+ } else {
+ vecncpy(d, pg, n);
+ }
+
+ j = end;
+ for (i = 0;i < bound;++i) {
+ j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
+ it = &lm[j];
+ /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
+ vecdot(&it->alpha, it->s, d, n);
+ it->alpha /= it->ys;
+ /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
+ vecadd(d, it->y, -it->alpha, n);
+ }
+
+ vecscale(d, ys / yy, n);
+
+ for (i = 0;i < bound;++i) {
+ it = &lm[j];
+ /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
+ vecdot(&beta, it->y, d, n);
+ beta /= it->ys;
+ /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
+ vecadd(d, it->s, it->alpha - beta, n);
+ j = (j + 1) % m; /* if (++j == m) j = 0; */
+ }
+
+ /*
+ Constrain the search direction for orthant-wise updates.
+ */
+ if (param.orthantwise_c != 0.) {
+ for (i = param.orthantwise_start;i < param.orthantwise_end;++i) {
+ if (d[i] * pg[i] >= 0) {
+ d[i] = 0;
+ }
+ }
+ }
+
+ /*
+ Now the search direction d is ready. We try step = 1 first.
+ */
+ step = 1.0;
+ }
+
+lbfgs_exit:
+ /* Return the final value of the objective function. */
+ if (ptr_fx != NULL) {
+ *ptr_fx = fx;
+ }
+
+ vecfree(pf);
+
+ /* Free memory blocks used by this function. */
+ if (lm != NULL) {
+ for (i = 0;i < m;++i) {
+ vecfree(lm[i].s);
+ vecfree(lm[i].y);
+ }
+ vecfree(lm);
+ }
+ vecfree(pg);
+ vecfree(w);
+ vecfree(d);
+ vecfree(gp);
+ vecfree(g);
+ vecfree(xp);
+
+ return ret;
+}
+
+
+
+static int line_search_backtracking(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wp,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ )
+{
+ int count = 0;
+ lbfgsfloatval_t width, dg;
+ lbfgsfloatval_t finit, dginit = 0., dgtest;
+ const lbfgsfloatval_t dec = 0.5, inc = 2.1;
+
+ /* Check the input parameters for errors. */
+ if (*stp <= 0.) {
+ return LBFGSERR_INVALIDPARAMETERS;
+ }
+
+ /* Compute the initial gradient in the search direction. */
+ vecdot(&dginit, g, s, n);
+
+ /* Make sure that s points to a descent direction. */
+ if (0 < dginit) {
+ return LBFGSERR_INCREASEGRADIENT;
+ }
+
+ /* The initial value of the objective function. */
+ finit = *f;
+ dgtest = param->ftol * dginit;
+
+ for (;;) {
+ veccpy(x, xp, n);
+ vecadd(x, s, *stp, n);
+
+ /* Evaluate the function and gradient values. */
+ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+
+ ++count;
+
+ if (*f > finit + *stp * dgtest) {
+ width = dec;
+ } else {
+ /* The sufficient decrease condition (Armijo condition). */
+ if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) {
+ /* Exit with the Armijo condition. */
+ return count;
+ }
+
+ /* Check the Wolfe condition. */
+ vecdot(&dg, g, s, n);
+ if (dg < param->wolfe * dginit) {
+ width = inc;
+ } else {
+ if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
+ /* Exit with the regular Wolfe condition. */
+ return count;
+ }
+
+ /* Check the strong Wolfe condition. */
+ if(dg > -param->wolfe * dginit) {
+ width = dec;
+ } else {
+ /* Exit with the strong Wolfe condition. */
+ return count;
+ }
+ }
+ }
+
+ if (*stp < param->min_step) {
+ /* The step is the minimum value. */
+ return LBFGSERR_MINIMUMSTEP;
+ }
+ if (*stp > param->max_step) {
+ /* The step is the maximum value. */
+ return LBFGSERR_MAXIMUMSTEP;
+ }
+ if (param->max_linesearch <= count) {
+ /* Maximum number of iteration. */
+ return LBFGSERR_MAXIMUMLINESEARCH;
+ }
+
+ (*stp) *= width;
+ }
+}
+
+
+
+static int line_search_backtracking_owlqn(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wp,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ )
+{
+ int i, count = 0;
+ lbfgsfloatval_t width = 0.5, norm = 0.;
+ lbfgsfloatval_t finit = *f, dgtest;
+
+ /* Check the input parameters for errors. */
+ if (*stp <= 0.) {
+ return LBFGSERR_INVALIDPARAMETERS;
+ }
+
+ /* Choose the orthant for the new point. */
+ for (i = 0;i < n;++i) {
+ wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i];
+ }
+
+ for (;;) {
+ /* Update the current point. */
+ veccpy(x, xp, n);
+ vecadd(x, s, *stp, n);
+
+ /* The current point is projected onto the orthant. */
+ owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end);
+
+ /* Evaluate the function and gradient values. */
+ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+
+ /* Compute the L1 norm of the variables and add it to the object value. */
+ norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end);
+ *f += norm * param->orthantwise_c;
+
+ ++count;
+
+ dgtest = 0.;
+ for (i = 0;i < n;++i) {
+ dgtest += (x[i] - xp[i]) * gp[i];
+ }
+
+ if (*f <= finit + param->ftol * dgtest) {
+ /* The sufficient decrease condition. */
+ return count;
+ }
+
+ if (*stp < param->min_step) {
+ /* The step is the minimum value. */
+ return LBFGSERR_MINIMUMSTEP;
+ }
+ if (*stp > param->max_step) {
+ /* The step is the maximum value. */
+ return LBFGSERR_MAXIMUMSTEP;
+ }
+ if (param->max_linesearch <= count) {
+ /* Maximum number of iteration. */
+ return LBFGSERR_MAXIMUMLINESEARCH;
+ }
+
+ (*stp) *= width;
+ }
+}
+
+
+
+static int line_search_morethuente(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *f,
+ lbfgsfloatval_t *g,
+ lbfgsfloatval_t *s,
+ lbfgsfloatval_t *stp,
+ const lbfgsfloatval_t* xp,
+ const lbfgsfloatval_t* gp,
+ lbfgsfloatval_t *wa,
+ callback_data_t *cd,
+ const lbfgs_parameter_t *param
+ )
+{
+ int count = 0;
+ int brackt, stage1, uinfo = 0;
+ lbfgsfloatval_t dg;
+ lbfgsfloatval_t stx, fx, dgx;
+ lbfgsfloatval_t sty, fy, dgy;
+ lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm;
+ lbfgsfloatval_t finit, ftest1, dginit, dgtest;
+ lbfgsfloatval_t width, prev_width;
+ lbfgsfloatval_t stmin, stmax;
+
+ /* Check the input parameters for errors. */
+ if (*stp <= 0.) {
+ return LBFGSERR_INVALIDPARAMETERS;
+ }
+
+ /* Compute the initial gradient in the search direction. */
+ vecdot(&dginit, g, s, n);
+
+ /* Make sure that s points to a descent direction. */
+ if (0 < dginit) {
+ return LBFGSERR_INCREASEGRADIENT;
+ }
+
+ /* Initialize local variables. */
+ brackt = 0;
+ stage1 = 1;
+ finit = *f;
+ dgtest = param->ftol * dginit;
+ width = param->max_step - param->min_step;
+ prev_width = 2.0 * width;
+
+ /*
+ The variables stx, fx, dgx contain the values of the step,
+ function, and directional derivative at the best step.
+ The variables sty, fy, dgy contain the value of the step,
+ function, and derivative at the other endpoint of
+ the interval of uncertainty.
+ The variables stp, f, dg contain the values of the step,
+ function, and derivative at the current step.
+ */
+ stx = sty = 0.;
+ fx = fy = finit;
+ dgx = dgy = dginit;
+
+ for (;;) {
+ /*
+ Set the minimum and maximum steps to correspond to the
+ present interval of uncertainty.
+ */
+ if (brackt) {
+ stmin = min2(stx, sty);
+ stmax = max2(stx, sty);
+ } else {
+ stmin = stx;
+ stmax = *stp + 4.0 * (*stp - stx);
+ }
+
+ /* Clip the step in the range of [stpmin, stpmax]. */
+ if (*stp < param->min_step) *stp = param->min_step;
+ if (param->max_step < *stp) *stp = param->max_step;
+
+ /*
+ If an unusual termination is to occur then let
+ stp be the lowest point obtained so far.
+ */
+ if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) {
+ *stp = stx;
+ }
+
+ /*
+ Compute the current value of x:
+ x <- x + (*stp) * s.
+ */
+ veccpy(x, xp, n);
+ vecadd(x, s, *stp, n);
+
+ /* Evaluate the function and gradient values. */
+ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
+ vecdot(&dg, g, s, n);
+
+ ftest1 = finit + *stp * dgtest;
+ ++count;
+
+ /* Test for errors and convergence. */
+ if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) {
+ /* Rounding errors prevent further progress. */
+ return LBFGSERR_ROUNDING_ERROR;
+ }
+ if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) {
+ /* The step is the maximum value. */
+ return LBFGSERR_MAXIMUMSTEP;
+ }
+ if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) {
+ /* The step is the minimum value. */
+ return LBFGSERR_MINIMUMSTEP;
+ }
+ if (brackt && (stmax - stmin) <= param->xtol * stmax) {
+ /* Relative width of the interval of uncertainty is at most xtol. */
+ return LBFGSERR_WIDTHTOOSMALL;
+ }
+ if (param->max_linesearch <= count) {
+ /* Maximum number of iteration. */
+ return LBFGSERR_MAXIMUMLINESEARCH;
+ }
+ if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) {
+ /* The sufficient decrease condition and the directional derivative condition hold. */
+ return count;
+ }
+
+ /*
+ In the first stage we seek a step for which the modified
+ function has a nonpositive value and nonnegative derivative.
+ */
+ if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) {
+ stage1 = 0;
+ }
+
+ /*
+ A modified function is used to predict the step only if
+ we have not obtained a step for which the modified
+ function has a nonpositive function value and nonnegative
+ derivative, and if a lower function value has been
+ obtained but the decrease is not sufficient.
+ */
+ if (stage1 && ftest1 < *f && *f <= fx) {
+ /* Define the modified function and derivative values. */
+ fm = *f - *stp * dgtest;
+ fxm = fx - stx * dgtest;
+ fym = fy - sty * dgtest;
+ dgm = dg - dgtest;
+ dgxm = dgx - dgtest;
+ dgym = dgy - dgtest;
+
+ /*
+ Call update_trial_interval() to update the interval of
+ uncertainty and to compute the new step.
+ */
+ uinfo = update_trial_interval(
+ &stx, &fxm, &dgxm,
+ &sty, &fym, &dgym,
+ stp, &fm, &dgm,
+ stmin, stmax, &brackt
+ );
+
+ /* Reset the function and gradient values for f. */
+ fx = fxm + stx * dgtest;
+ fy = fym + sty * dgtest;
+ dgx = dgxm + dgtest;
+ dgy = dgym + dgtest;
+ } else {
+ /*
+ Call update_trial_interval() to update the interval of
+ uncertainty and to compute the new step.
+ */
+ uinfo = update_trial_interval(
+ &stx, &fx, &dgx,
+ &sty, &fy, &dgy,
+ stp, f, &dg,
+ stmin, stmax, &brackt
+ );
+ }
+
+ /*
+ Force a sufficient decrease in the interval of uncertainty.
+ */
+ if (brackt) {
+ if (0.66 * prev_width <= fabs(sty - stx)) {
+ *stp = stx + 0.5 * (sty - stx);
+ }
+ prev_width = width;
+ width = fabs(sty - stx);
+ }
+ }
+
+ return LBFGSERR_LOGICERROR;
+}
+
+
+
+/**
+ * Define the local variables for computing minimizers.
+ */
+#define USES_MINIMIZER \
+ lbfgsfloatval_t a, d, gamma, theta, p, q, r, s;
+
+/**
+ * Find a minimizer of an interpolated cubic function.
+ * @param cm The minimizer of the interpolated cubic.
+ * @param u The value of one point, u.
+ * @param fu The value of f(u).
+ * @param du The value of f'(u).
+ * @param v The value of another point, v.
+ * @param fv The value of f(v).
+ * @param du The value of f'(v).
+ */
+#define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \
+ d = (v) - (u); \
+ theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
+ p = fabs(theta); \
+ q = fabs(du); \
+ r = fabs(dv); \
+ s = max3(p, q, r); \
+ /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
+ a = theta / s; \
+ gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \
+ if ((v) < (u)) gamma = -gamma; \
+ p = gamma - (du) + theta; \
+ q = gamma - (du) + gamma + (dv); \
+ r = p / q; \
+ (cm) = (u) + r * d;
+
+/**
+ * Find a minimizer of an interpolated cubic function.
+ * @param cm The minimizer of the interpolated cubic.
+ * @param u The value of one point, u.
+ * @param fu The value of f(u).
+ * @param du The value of f'(u).
+ * @param v The value of another point, v.
+ * @param fv The value of f(v).
+ * @param du The value of f'(v).
+ * @param xmin The maximum value.
+ * @param xmin The minimum value.
+ */
+#define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \
+ d = (v) - (u); \
+ theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
+ p = fabs(theta); \
+ q = fabs(du); \
+ r = fabs(dv); \
+ s = max3(p, q, r); \
+ /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
+ a = theta / s; \
+ gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \
+ if ((u) < (v)) gamma = -gamma; \
+ p = gamma - (dv) + theta; \
+ q = gamma - (dv) + gamma + (du); \
+ r = p / q; \
+ if (r < 0. && gamma != 0.) { \
+ (cm) = (v) - r * d; \
+ } else if (a < 0) { \
+ (cm) = (xmax); \
+ } else { \
+ (cm) = (xmin); \
+ }
+
+/**
+ * Find a minimizer of an interpolated quadratic function.
+ * @param qm The minimizer of the interpolated quadratic.
+ * @param u The value of one point, u.
+ * @param fu The value of f(u).
+ * @param du The value of f'(u).
+ * @param v The value of another point, v.
+ * @param fv The value of f(v).
+ */
+#define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \
+ a = (v) - (u); \
+ (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a;
+
+/**
+ * Find a minimizer of an interpolated quadratic function.
+ * @param qm The minimizer of the interpolated quadratic.
+ * @param u The value of one point, u.
+ * @param du The value of f'(u).
+ * @param v The value of another point, v.
+ * @param dv The value of f'(v).
+ */
+#define QUARD_MINIMIZER2(qm, u, du, v, dv) \
+ a = (u) - (v); \
+ (qm) = (v) + (dv) / ((dv) - (du)) * a;
+
+/**
+ * Update a safeguarded trial value and interval for line search.
+ *
+ * The parameter x represents the step with the least function value.
+ * The parameter t represents the current step. This function assumes
+ * that the derivative at the point of x in the direction of the step.
+ * If the bracket is set to true, the minimizer has been bracketed in
+ * an interval of uncertainty with endpoints between x and y.
+ *
+ * @param x The pointer to the value of one endpoint.
+ * @param fx The pointer to the value of f(x).
+ * @param dx The pointer to the value of f'(x).
+ * @param y The pointer to the value of another endpoint.
+ * @param fy The pointer to the value of f(y).
+ * @param dy The pointer to the value of f'(y).
+ * @param t The pointer to the value of the trial value, t.
+ * @param ft The pointer to the value of f(t).
+ * @param dt The pointer to the value of f'(t).
+ * @param tmin The minimum value for the trial value, t.
+ * @param tmax The maximum value for the trial value, t.
+ * @param brackt The pointer to the predicate if the trial value is
+ * bracketed.
+ * @retval int Status value. Zero indicates a normal termination.
+ *
+ * @see
+ * Jorge J. More and David J. Thuente. Line search algorithm with
+ * guaranteed sufficient decrease. ACM Transactions on Mathematical
+ * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994.
+ */
+static int update_trial_interval(
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *fx,
+ lbfgsfloatval_t *dx,
+ lbfgsfloatval_t *y,
+ lbfgsfloatval_t *fy,
+ lbfgsfloatval_t *dy,
+ lbfgsfloatval_t *t,
+ lbfgsfloatval_t *ft,
+ lbfgsfloatval_t *dt,
+ const lbfgsfloatval_t tmin,
+ const lbfgsfloatval_t tmax,
+ int *brackt
+ )
+{
+ int bound;
+ int dsign = fsigndiff(dt, dx);
+ lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */
+ lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */
+ lbfgsfloatval_t newt; /* new trial value. */
+ USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */
+
+ /* Check the input parameters for errors. */
+ if (*brackt) {
+ if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) {
+ /* The trival value t is out of the interval. */
+ return LBFGSERR_OUTOFINTERVAL;
+ }
+ if (0. <= *dx * (*t - *x)) {
+ /* The function must decrease from x. */
+ return LBFGSERR_INCREASEGRADIENT;
+ }
+ if (tmax < tmin) {
+ /* Incorrect tmin and tmax specified. */
+ return LBFGSERR_INCORRECT_TMINMAX;
+ }
+ }
+
+ /*
+ Trial value selection.
+ */
+ if (*fx < *ft) {
+ /*
+ Case 1: a higher function value.
+ The minimum is brackt. If the cubic minimizer is closer
+ to x than the quadratic one, the cubic one is taken, else
+ the average of the minimizers is taken.
+ */
+ *brackt = 1;
+ bound = 1;
+ CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
+ QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
+ if (fabs(mc - *x) < fabs(mq - *x)) {
+ newt = mc;
+ } else {
+ newt = mc + 0.5 * (mq - mc);
+ }
+ } else if (dsign) {
+ /*
+ Case 2: a lower function value and derivatives of
+ opposite sign. The minimum is brackt. If the cubic
+ minimizer is closer to x than the quadratic (secant) one,
+ the cubic one is taken, else the quadratic one is taken.
+ */
+ *brackt = 1;
+ bound = 0;
+ CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
+ QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
+ if (fabs(mc - *t) > fabs(mq - *t)) {
+ newt = mc;
+ } else {
+ newt = mq;
+ }
+ } else if (fabs(*dt) < fabs(*dx)) {
+ /*
+ Case 3: a lower function value, derivatives of the
+ same sign, and the magnitude of the derivative decreases.
+ The cubic minimizer is only used if the cubic tends to
+ infinity in the direction of the minimizer or if the minimum
+ of the cubic is beyond t. Otherwise the cubic minimizer is
+ defined to be either tmin or tmax. The quadratic (secant)
+ minimizer is also computed and if the minimum is brackt
+ then the the minimizer closest to x is taken, else the one
+ farthest away is taken.
+ */
+ bound = 1;
+ CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax);
+ QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
+ if (*brackt) {
+ if (fabs(*t - mc) < fabs(*t - mq)) {
+ newt = mc;
+ } else {
+ newt = mq;
+ }
+ } else {
+ if (fabs(*t - mc) > fabs(*t - mq)) {
+ newt = mc;
+ } else {
+ newt = mq;
+ }
+ }
+ } else {
+ /*
+ Case 4: a lower function value, derivatives of the
+ same sign, and the magnitude of the derivative does
+ not decrease. If the minimum is not brackt, the step
+ is either tmin or tmax, else the cubic minimizer is taken.
+ */
+ bound = 0;
+ if (*brackt) {
+ CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy);
+ } else if (*x < *t) {
+ newt = tmax;
+ } else {
+ newt = tmin;
+ }
+ }
+
+ /*
+ Update the interval of uncertainty. This update does not
+ depend on the new step or the case analysis above.
+
+ - Case a: if f(x) < f(t),
+ x <- x, y <- t.
+ - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0,
+ x <- t, y <- y.
+ - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0,
+ x <- t, y <- x.
+ */
+ if (*fx < *ft) {
+ /* Case a */
+ *y = *t;
+ *fy = *ft;
+ *dy = *dt;
+ } else {
+ /* Case c */
+ if (dsign) {
+ *y = *x;
+ *fy = *fx;
+ *dy = *dx;
+ }
+ /* Cases b and c */
+ *x = *t;
+ *fx = *ft;
+ *dx = *dt;
+ }
+
+ /* Clip the new trial value in [tmin, tmax]. */
+ if (tmax < newt) newt = tmax;
+ if (newt < tmin) newt = tmin;
+
+ /*
+ Redefine the new trial value if it is close to the upper bound
+ of the interval.
+ */
+ if (*brackt && bound) {
+ mq = *x + 0.66 * (*y - *x);
+ if (*x < *y) {
+ if (mq < newt) newt = mq;
+ } else {
+ if (newt < mq) newt = mq;
+ }
+ }
+
+ /* Return the new trial value. */
+ *t = newt;
+ return 0;
+}
+
+
+
+
+
+static lbfgsfloatval_t owlqn_x1norm(
+ const lbfgsfloatval_t* x,
+ const int start,
+ const int n
+ )
+{
+ int i;
+ lbfgsfloatval_t norm = 0.;
+
+ for (i = start;i < n;++i) {
+ norm += fabs(x[i]);
+ }
+
+ return norm;
+}
+
+static void owlqn_pseudo_gradient(
+ lbfgsfloatval_t* pg,
+ const lbfgsfloatval_t* x,
+ const lbfgsfloatval_t* g,
+ const int n,
+ const lbfgsfloatval_t c,
+ const int start,
+ const int end
+ )
+{
+ int i;
+
+ /* Compute the negative of gradients. */
+ for (i = 0;i < start;++i) {
+ pg[i] = g[i];
+ }
+
+ /* Compute the psuedo-gradients. */
+ for (i = start;i < end;++i) {
+ if (x[i] < 0.) {
+ /* Differentiable. */
+ pg[i] = g[i] - c;
+ } else if (0. < x[i]) {
+ /* Differentiable. */
+ pg[i] = g[i] + c;
+ } else {
+ if (g[i] < -c) {
+ /* Take the right partial derivative. */
+ pg[i] = g[i] + c;
+ } else if (c < g[i]) {
+ /* Take the left partial derivative. */
+ pg[i] = g[i] - c;
+ } else {
+ pg[i] = 0.;
+ }
+ }
+ }
+
+ for (i = end;i < n;++i) {
+ pg[i] = g[i];
+ }
+}
+
+static void owlqn_project(
+ lbfgsfloatval_t* d,
+ const lbfgsfloatval_t* sign,
+ const int start,
+ const int end
+ )
+{
+ int i;
+
+ for (i = start;i < end;++i) {
+ if (d[i] * sign[i] <= 0) {
+ d[i] = 0;
+ }
+ }
+}
diff --git a/training/liblbfgs/lbfgs.h b/training/liblbfgs/lbfgs.h
new file mode 100644
index 00000000..cd944a33
--- /dev/null
+++ b/training/liblbfgs/lbfgs.h
@@ -0,0 +1,745 @@
+/*
+ * C library of Limited memory BFGS (L-BFGS).
+ *
+ * Copyright (c) 1990, Jorge Nocedal
+ * Copyright (c) 2007-2010 Naoaki Okazaki
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* $Id$ */
+
+#ifndef __LBFGS_H__
+#define __LBFGS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif/*__cplusplus*/
+
+/*
+ * The default precision of floating point values is 64bit (double).
+ */
+#ifndef LBFGS_FLOAT
+#define LBFGS_FLOAT 64
+#endif/*LBFGS_FLOAT*/
+
+/*
+ * Activate optimization routines for IEEE754 floating point values.
+ */
+#ifndef LBFGS_IEEE_FLOAT
+#define LBFGS_IEEE_FLOAT 1
+#endif/*LBFGS_IEEE_FLOAT*/
+
+#if LBFGS_FLOAT == 32
+typedef float lbfgsfloatval_t;
+
+#elif LBFGS_FLOAT == 64
+typedef double lbfgsfloatval_t;
+
+#else
+#error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only."
+
+#endif
+
+
+/**
+ * \addtogroup liblbfgs_api libLBFGS API
+ * @{
+ *
+ * The libLBFGS API.
+ */
+
+/**
+ * Return values of lbfgs().
+ *
+ * Roughly speaking, a negative value indicates an error.
+ */
+enum {
+ /** L-BFGS reaches convergence. */
+ LBFGS_SUCCESS = 0,
+ LBFGS_CONVERGENCE = 0,
+ LBFGS_STOP,
+ /** The initial variables already minimize the objective function. */
+ LBFGS_ALREADY_MINIMIZED,
+
+ /** Unknown error. */
+ LBFGSERR_UNKNOWNERROR = -1024,
+ /** Logic error. */
+ LBFGSERR_LOGICERROR,
+ /** Insufficient memory. */
+ LBFGSERR_OUTOFMEMORY,
+ /** The minimization process has been canceled. */
+ LBFGSERR_CANCELED,
+ /** Invalid number of variables specified. */
+ LBFGSERR_INVALID_N,
+ /** Invalid number of variables (for SSE) specified. */
+ LBFGSERR_INVALID_N_SSE,
+ /** The array x must be aligned to 16 (for SSE). */
+ LBFGSERR_INVALID_X_SSE,
+ /** Invalid parameter lbfgs_parameter_t::epsilon specified. */
+ LBFGSERR_INVALID_EPSILON,
+ /** Invalid parameter lbfgs_parameter_t::past specified. */
+ LBFGSERR_INVALID_TESTPERIOD,
+ /** Invalid parameter lbfgs_parameter_t::delta specified. */
+ LBFGSERR_INVALID_DELTA,
+ /** Invalid parameter lbfgs_parameter_t::linesearch specified. */
+ LBFGSERR_INVALID_LINESEARCH,
+ /** Invalid parameter lbfgs_parameter_t::max_step specified. */
+ LBFGSERR_INVALID_MINSTEP,
+ /** Invalid parameter lbfgs_parameter_t::max_step specified. */
+ LBFGSERR_INVALID_MAXSTEP,
+ /** Invalid parameter lbfgs_parameter_t::ftol specified. */
+ LBFGSERR_INVALID_FTOL,
+ /** Invalid parameter lbfgs_parameter_t::wolfe specified. */
+ LBFGSERR_INVALID_WOLFE,
+ /** Invalid parameter lbfgs_parameter_t::gtol specified. */
+ LBFGSERR_INVALID_GTOL,
+ /** Invalid parameter lbfgs_parameter_t::xtol specified. */
+ LBFGSERR_INVALID_XTOL,
+ /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */
+ LBFGSERR_INVALID_MAXLINESEARCH,
+ /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */
+ LBFGSERR_INVALID_ORTHANTWISE,
+ /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */
+ LBFGSERR_INVALID_ORTHANTWISE_START,
+ /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */
+ LBFGSERR_INVALID_ORTHANTWISE_END,
+ /** The line-search step went out of the interval of uncertainty. */
+ LBFGSERR_OUTOFINTERVAL,
+ /** A logic error occurred; alternatively, the interval of uncertainty
+ became too small. */
+ LBFGSERR_INCORRECT_TMINMAX,
+ /** A rounding error occurred; alternatively, no line-search step
+ satisfies the sufficient decrease and curvature conditions. */
+ LBFGSERR_ROUNDING_ERROR,
+ /** The line-search step became smaller than lbfgs_parameter_t::min_step. */
+ LBFGSERR_MINIMUMSTEP,
+ /** The line-search step became larger than lbfgs_parameter_t::max_step. */
+ LBFGSERR_MAXIMUMSTEP,
+ /** The line-search routine reaches the maximum number of evaluations. */
+ LBFGSERR_MAXIMUMLINESEARCH,
+ /** The algorithm routine reaches the maximum number of iterations. */
+ LBFGSERR_MAXIMUMITERATION,
+ /** Relative width of the interval of uncertainty is at most
+ lbfgs_parameter_t::xtol. */
+ LBFGSERR_WIDTHTOOSMALL,
+ /** A logic error (negative line-search step) occurred. */
+ LBFGSERR_INVALIDPARAMETERS,
+ /** The current search direction increases the objective function value. */
+ LBFGSERR_INCREASEGRADIENT,
+};
+
+/**
+ * Line search algorithms.
+ */
+enum {
+ /** The default algorithm (MoreThuente method). */
+ LBFGS_LINESEARCH_DEFAULT = 0,
+ /** MoreThuente method proposd by More and Thuente. */
+ LBFGS_LINESEARCH_MORETHUENTE = 0,
+ /**
+ * Backtracking method with the Armijo condition.
+ * The backtracking method finds the step length such that it satisfies
+ * the sufficient decrease (Armijo) condition,
+ * - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d,
+ *
+ * where x is the current point, d is the current search direction, and
+ * a is the step length.
+ */
+ LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,
+ /** The backtracking method with the defualt (regular Wolfe) condition. */
+ LBFGS_LINESEARCH_BACKTRACKING = 2,
+ /**
+ * Backtracking method with regular Wolfe condition.
+ * The backtracking method finds the step length such that it satisfies
+ * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
+ * and the curvature condition,
+ * - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d,
+ *
+ * where x is the current point, d is the current search direction, and
+ * a is the step length.
+ */
+ LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,
+ /**
+ * Backtracking method with strong Wolfe condition.
+ * The backtracking method finds the step length such that it satisfies
+ * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
+ * and the following condition,
+ * - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|,
+ *
+ * where x is the current point, d is the current search direction, and
+ * a is the step length.
+ */
+ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3,
+};
+
+/**
+ * L-BFGS optimization parameters.
+ * Call lbfgs_parameter_init() function to initialize parameters to the
+ * default values.
+ */
+typedef struct {
+ /**
+ * The number of corrections to approximate the inverse hessian matrix.
+ * The L-BFGS routine stores the computation results of previous \ref m
+ * iterations to approximate the inverse hessian matrix of the current
+ * iteration. This parameter controls the size of the limited memories
+ * (corrections). The default value is \c 6. Values less than \c 3 are
+ * not recommended. Large values will result in excessive computing time.
+ */
+ int m;
+
+ /**
+ * Epsilon for convergence test.
+ * This parameter determines the accuracy with which the solution is to
+ * be found. A minimization terminates when
+ * ||g|| < \ref epsilon * max(1, ||x||),
+ * where ||.|| denotes the Euclidean (L2) norm. The default value is
+ * \c 1e-5.
+ */
+ lbfgsfloatval_t epsilon;
+
+ /**
+ * Distance for delta-based convergence test.
+ * This parameter determines the distance, in iterations, to compute
+ * the rate of decrease of the objective function. If the value of this
+ * parameter is zero, the library does not perform the delta-based
+ * convergence test. The default value is \c 0.
+ */
+ int past;
+
+ /**
+ * Delta for convergence test.
+ * This parameter determines the minimum rate of decrease of the
+ * objective function. The library stops iterations when the
+ * following condition is met:
+ * (f' - f) / f < \ref delta,
+ * where f' is the objective value of \ref past iterations ago, and f is
+ * the objective value of the current iteration.
+ * The default value is \c 0.
+ */
+ lbfgsfloatval_t delta;
+
+ /**
+ * The maximum number of iterations.
+ * The lbfgs() function terminates an optimization process with
+ * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
+ * exceedes this parameter. Setting this parameter to zero continues an
+ * optimization process until a convergence or error. The default value
+ * is \c 0.
+ */
+ int max_iterations;
+
+ /**
+ * The line search algorithm.
+ * This parameter specifies a line search algorithm to be used by the
+ * L-BFGS routine.
+ */
+ int linesearch;
+
+ /**
+ * The maximum number of trials for the line search.
+ * This parameter controls the number of function and gradients evaluations
+ * per iteration for the line search routine. The default value is \c 20.
+ */
+ int max_linesearch;
+
+ /**
+ * The minimum step of the line search routine.
+ * The default value is \c 1e-20. This value need not be modified unless
+ * the exponents are too large for the machine being used, or unless the
+ * problem is extremely badly scaled (in which case the exponents should
+ * be increased).
+ */
+ lbfgsfloatval_t min_step;
+
+ /**
+ * The maximum step of the line search.
+ * The default value is \c 1e+20. This value need not be modified unless
+ * the exponents are too large for the machine being used, or unless the
+ * problem is extremely badly scaled (in which case the exponents should
+ * be increased).
+ */
+ lbfgsfloatval_t max_step;
+
+ /**
+ * A parameter to control the accuracy of the line search routine.
+ * The default value is \c 1e-4. This parameter should be greater
+ * than zero and smaller than \c 0.5.
+ */
+ lbfgsfloatval_t ftol;
+
+ /**
+ * A coefficient for the Wolfe condition.
+ * This parameter is valid only when the backtracking line-search
+ * algorithm is used with the Wolfe condition,
+ * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or
+ * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE .
+ * The default value is \c 0.9. This parameter should be greater
+ * the \ref ftol parameter and smaller than \c 1.0.
+ */
+ lbfgsfloatval_t wolfe;
+
+ /**
+ * A parameter to control the accuracy of the line search routine.
+ * The default value is \c 0.9. If the function and gradient
+ * evaluations are inexpensive with respect to the cost of the
+ * iteration (which is sometimes the case when solving very large
+ * problems) it may be advantageous to set this parameter to a small
+ * value. A typical small value is \c 0.1. This parameter shuold be
+ * greater than the \ref ftol parameter (\c 1e-4) and smaller than
+ * \c 1.0.
+ */
+ lbfgsfloatval_t gtol;
+
+ /**
+ * The machine precision for floating-point values.
+ * This parameter must be a positive value set by a client program to
+ * estimate the machine precision. The line search routine will terminate
+ * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
+ * of the interval of uncertainty is less than this parameter.
+ */
+ lbfgsfloatval_t xtol;
+
+ /**
+ * Coeefficient for the L1 norm of variables.
+ * This parameter should be set to zero for standard minimization
+ * problems. Setting this parameter to a positive value activates
+ * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
+ * minimizes the objective function F(x) combined with the L1 norm |x|
+ * of the variables, {F(x) + C |x|}. This parameter is the coeefficient
+ * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
+ * zero, the library modifies function and gradient evaluations from
+ * a client program suitably; a client program thus have only to return
+ * the function value F(x) and gradients G(x) as usual. The default value
+ * is zero.
+ */
+ lbfgsfloatval_t orthantwise_c;
+
+ /**
+ * Start index for computing L1 norm of the variables.
+ * This parameter is valid only for OWL-QN method
+ * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N)
+ * specifies the index number from which the library computes the
+ * L1 norm of the variables x,
+ * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
+ * In other words, variables x_1, ..., x_{b-1} are not used for
+ * computing the L1 norm. Setting b (0 < b < N), one can protect
+ * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
+ * regression) from being regularized. The default value is zero.
+ */
+ int orthantwise_start;
+
+ /**
+ * End index for computing L1 norm of the variables.
+ * This parameter is valid only for OWL-QN method
+ * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N)
+ * specifies the index number at which the library stops computing the
+ * L1 norm of the variables x,
+ */
+ int orthantwise_end;
+} lbfgs_parameter_t;
+
+
+/**
+ * Callback interface to provide objective function and gradient evaluations.
+ *
+ * The lbfgs() function call this function to obtain the values of objective
+ * function and its gradients when needed. A client program must implement
+ * this function to evaluate the values of the objective function and its
+ * gradients, given current values of variables.
+ *
+ * @param instance The user data sent for lbfgs() function by the client.
+ * @param x The current values of variables.
+ * @param g The gradient vector. The callback function must compute
+ * the gradient values for the current variables.
+ * @param n The number of variables.
+ * @param step The current step of the line search routine.
+ * @retval lbfgsfloatval_t The value of the objective function for the current
+ * variables.
+ */
+typedef lbfgsfloatval_t (*lbfgs_evaluate_t)(
+ void *instance,
+ const lbfgsfloatval_t *x,
+ lbfgsfloatval_t *g,
+ const int n,
+ const lbfgsfloatval_t step
+ );
+
+/**
+ * Callback interface to receive the progress of the optimization process.
+ *
+ * The lbfgs() function call this function for each iteration. Implementing
+ * this function, a client program can store or display the current progress
+ * of the optimization process.
+ *
+ * @param instance The user data sent for lbfgs() function by the client.
+ * @param x The current values of variables.
+ * @param g The current gradient values of variables.
+ * @param fx The current value of the objective function.
+ * @param xnorm The Euclidean norm of the variables.
+ * @param gnorm The Euclidean norm of the gradients.
+ * @param step The line-search step used for this iteration.
+ * @param n The number of variables.
+ * @param k The iteration count.
+ * @param ls The number of evaluations called for this iteration.
+ * @retval int Zero to continue the optimization process. Returning a
+ * non-zero value will cancel the optimization process.
+ */
+typedef int (*lbfgs_progress_t)(
+ void *instance,
+ const lbfgsfloatval_t *x,
+ const lbfgsfloatval_t *g,
+ const lbfgsfloatval_t fx,
+ const lbfgsfloatval_t xnorm,
+ const lbfgsfloatval_t gnorm,
+ const lbfgsfloatval_t step,
+ int n,
+ int k,
+ int ls
+ );
+
+/*
+A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation
+callback) and pass the pointer to the callback function to lbfgs() arguments.
+Similarly, a user can implement a function compatible with ::lbfgs_progress_t
+(progress callback) to obtain the current progress (e.g., variables, function
+value, ||G||, etc) and to cancel the iteration process if necessary.
+Implementation of a progress callback is optional: a user can pass \c NULL if
+progress notification is not necessary.
+
+In addition, a user must preserve two requirements:
+ - The number of variables must be multiples of 16 (this is not 4).
+ - The memory block of variable array ::x must be aligned to 16.
+
+This algorithm terminates an optimization
+when:
+
+ ||G|| < \epsilon \cdot \max(1, ||x||) .
+
+In this formula, ||.|| denotes the Euclidean norm.
+*/
+
+/**
+ * Start a L-BFGS optimization.
+ *
+ * @param n The number of variables.
+ * @param x The array of variables. A client program can set
+ * default values for the optimization and receive the
+ * optimization result through this array. This array
+ * must be allocated by ::lbfgs_malloc function
+ * for libLBFGS built with SSE/SSE2 optimization routine
+ * enabled. The library built without SSE/SSE2
+ * optimization does not have such a requirement.
+ * @param ptr_fx The pointer to the variable that receives the final
+ * value of the objective function for the variables.
+ * This argument can be set to \c NULL if the final
+ * value of the objective function is unnecessary.
+ * @param proc_evaluate The callback function to provide function and
+ * gradient evaluations given a current values of
+ * variables. A client program must implement a
+ * callback function compatible with \ref
+ * lbfgs_evaluate_t and pass the pointer to the
+ * callback function.
+ * @param proc_progress The callback function to receive the progress
+ * (the number of iterations, the current value of
+ * the objective function) of the minimization
+ * process. This argument can be set to \c NULL if
+ * a progress report is unnecessary.
+ * @param instance A user data for the client program. The callback
+ * functions will receive the value of this argument.
+ * @param param The pointer to a structure representing parameters for
+ * L-BFGS optimization. A client program can set this
+ * parameter to \c NULL to use the default parameters.
+ * Call lbfgs_parameter_init() function to fill a
+ * structure with the default values.
+ * @retval int The status code. This function returns zero if the
+ * minimization process terminates without an error. A
+ * non-zero value indicates an error.
+ */
+int lbfgs(
+ int n,
+ lbfgsfloatval_t *x,
+ lbfgsfloatval_t *ptr_fx,
+ lbfgs_evaluate_t proc_evaluate,
+ lbfgs_progress_t proc_progress,
+ void *instance,
+ lbfgs_parameter_t *param
+ );
+
+/**
+ * Initialize L-BFGS parameters to the default values.
+ *
+ * Call this function to fill a parameter structure with the default values
+ * and overwrite parameter values if necessary.
+ *
+ * @param param The pointer to the parameter structure.
+ */
+void lbfgs_parameter_init(lbfgs_parameter_t *param);
+
+/**
+ * Allocate an array for variables.
+ *
+ * This function allocates an array of variables for the convenience of
+ * ::lbfgs function; the function has a requreiemt for a variable array
+ * when libLBFGS is built with SSE/SSE2 optimization routines. A user does
+ * not have to use this function for libLBFGS built without SSE/SSE2
+ * optimization.
+ *
+ * @param n The number of variables.
+ */
+lbfgsfloatval_t* lbfgs_malloc(int n);
+
+/**
+ * Free an array of variables.
+ *
+ * @param x The array of variables allocated by ::lbfgs_malloc
+ * function.
+ */
+void lbfgs_free(lbfgsfloatval_t *x);
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif/*__cplusplus*/
+
+
+
+/**
+@mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)
+
+@section intro Introduction
+
+This library is a C port of the implementation of Limited-memory
+Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
+The original FORTRAN source code is available at:
+http://www.ece.northwestern.edu/~nocedal/lbfgs.html
+
+The L-BFGS method solves the unconstrainted minimization problem,
+
+<pre>
+ minimize F(x), x = (x1, x2, ..., xN),
+</pre>
+
+only if the objective function F(x) and its gradient G(x) are computable. The
+well-known Newton's method requires computation of the inverse of the hessian
+matrix of the objective function. However, the computational cost for the
+inverse hessian matrix is expensive especially when the objective function
+takes a large number of variables. The L-BFGS method iteratively finds a
+minimizer by approximating the inverse hessian matrix by information from last
+m iterations. This innovation saves the memory storage and computational time
+drastically for large-scaled problems.
+
+Among the various ports of L-BFGS, this library provides several features:
+- <b>Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton
+ (OWL-QN) method)</b>:
+ In addition to standard minimization problems, the library can minimize
+ a function F(x) combined with L1-norm |x| of the variables,
+ {F(x) + C |x|}, where C is a constant scalar parameter. This feature is
+ useful for estimating parameters of sparse log-linear models (e.g.,
+ logistic regression and maximum entropy) with L1-regularization (or
+ Laplacian prior).
+- <b>Clean C code</b>:
+ Unlike C codes generated automatically by f2c (Fortran 77 into C converter),
+ this port includes changes based on my interpretations, improvements,
+ optimizations, and clean-ups so that the ported code would be well-suited
+ for a C code. In addition to comments inherited from the original code,
+ a number of comments were added through my interpretations.
+- <b>Callback interface</b>:
+ The library receives function and gradient values via a callback interface.
+ The library also notifies the progress of the optimization by invoking a
+ callback function. In the original implementation, a user had to set
+ function and gradient values every time the function returns for obtaining
+ updated values.
+- <b>Thread safe</b>:
+ The library is thread-safe, which is the secondary gain from the callback
+ interface.
+- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
+ Studio 2010, GNU C Compiler (gcc), etc.
+- <b>Configurable precision</b>: A user can choose single-precision (float)
+ or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro.
+- <b>SSE/SSE2 optimization</b>:
+ This library includes SSE/SSE2 optimization (written in compiler intrinsics)
+ for vector arithmetic operations on Intel/AMD processors. The library uses
+ SSE for float values and SSE2 for double values. The SSE/SSE2 optimization
+ routine is disabled by default.
+
+This library is used by:
+- <a href="http://www.chokkan.org/software/crfsuite/">CRFsuite: A fast implementation of Conditional Random Fields (CRFs)</a>
+- <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a>
+- <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a>
+- <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a>
+- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a>
+- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a>
+
+@section download Download
+
+- <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a>
+- <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a>
+
+libLBFGS is distributed under the term of the
+<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
+
+@section changelog History
+- Version 1.10 (2010-12-22):
+ - Fixed compiling errors on Mac OS X; this patch was kindly submitted by
+ Nic Schraudolph.
+ - Reduced compiling warnings on Mac OS X; this patch was kindly submitted
+ by Tamas Nepusz.
+ - Replaced memalign() with posix_memalign().
+ - Updated solution and project files for Microsoft Visual Studio 2010.
+- Version 1.9 (2010-01-29):
+ - Fixed a mistake in checking the validity of the parameters "ftol" and
+ "wolfe"; this was discovered by Kevin S. Van Horn.
+- Version 1.8 (2009-07-13):
+ - Accepted the patch submitted by Takashi Imamichi;
+ the backtracking method now has three criteria for choosing the step
+ length:
+ - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo)
+ condition only
+ - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition
+ (sufficient decrease condition + curvature condition)
+ - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition
+ - Updated the documentation to explain the above three criteria.
+- Version 1.7 (2009-02-28):
+ - Improved OWL-QN routines for stability.
+ - Removed the support of OWL-QN method in MoreThuente algorithm because
+ it accidentally fails in early stages of iterations for some objectives.
+ Because of this change, <b>the OW-LQN method must be used with the
+ backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING)</b>, or the
+ library returns ::LBFGSERR_INVALID_LINESEARCH.
+ - Renamed line search algorithms as follows:
+ - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition.
+ - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition.
+ - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition.
+ - Source code clean-up.
+- Version 1.6 (2008-11-02):
+ - Improved line-search algorithm with strong Wolfe condition, which was
+ contributed by Takashi Imamichi. This routine is now default for
+ ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm
+ with regular Wolfe condition is still available as
+ ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE.
+ - Configurable stop index for L1-norm computation. A member variable
+ ::lbfgs_parameter_t::orthantwise_end was added to specify the index
+ number at which the library stops computing the L1 norm of the
+ variables. This is useful to prevent some variables from being
+ regularized by the OW-LQN method.
+ - A sample program written in C++ (sample/sample.cpp).
+- Version 1.5 (2008-07-10):
+ - Configurable starting index for L1-norm computation. A member variable
+ ::lbfgs_parameter_t::orthantwise_start was added to specify the index
+ number from which the library computes the L1 norm of the variables.
+ This is useful to prevent some variables from being regularized by the
+ OWL-QN method.
+ - Fixed a zero-division error when the initial variables have already
+ been a minimizer (reported by Takashi Imamichi). In this case, the
+ library returns ::LBFGS_ALREADY_MINIMIZED status code.
+ - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants,
+ LBFGSFALSE and LBFGSTRUE.
+ - Fixed a compile error in an implicit down-cast.
+- Version 1.4 (2008-04-25):
+ - Configurable line search algorithms. A member variable
+ ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente
+ method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm
+ (::LBFGS_LINESEARCH_BACKTRACKING).
+ - Fixed a bug: the previous version did not compute psuedo-gradients
+ properly in the line search routines for OWL-QN. This bug might quit
+ an iteration process too early when the OWL-QN routine was activated
+ (0 < ::lbfgs_parameter_t::orthantwise_c).
+ - Configure script for POSIX environments.
+ - SSE/SSE2 optimizations with GCC.
+ - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines
+ transparently. It is uncessary to use these functions for libLBFGS built
+ without SSE/SSE2 routines; you can still use any memory allocators if
+ SSE/SSE2 routines are disabled in libLBFGS.
+- Version 1.3 (2007-12-16):
+ - An API change. An argument was added to lbfgs() function to receive the
+ final value of the objective function. This argument can be set to
+ \c NULL if the final value is unnecessary.
+ - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi).
+ - Added build scripts for Microsoft Visual Studio 2005 and GCC.
+ - Added README file.
+- Version 1.2 (2007-12-13):
+ - Fixed a serious bug in orthant-wise L-BFGS.
+ An important variable was used without initialization.
+- Version 1.1 (2007-12-01):
+ - Implemented orthant-wise L-BFGS.
+ - Implemented lbfgs_parameter_init() function.
+ - Fixed several bugs.
+ - API documentation.
+- Version 1.0 (2007-09-20):
+ - Initial release.
+
+@section api Documentation
+
+- @ref liblbfgs_api "libLBFGS API"
+
+@section sample Sample code
+
+@include sample.c
+
+@section ack Acknowledgements
+
+The L-BFGS algorithm is described in:
+ - Jorge Nocedal.
+ Updating Quasi-Newton Matrices with Limited Storage.
+ <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
+ - Dong C. Liu and Jorge Nocedal.
+ On the limited memory BFGS method for large scale optimization.
+ <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
+
+The line search algorithms used in this implementation are described in:
+ - John E. Dennis and Robert B. Schnabel.
+ <i>Numerical Methods for Unconstrained Optimization and Nonlinear
+ Equations</i>, Englewood Cliffs, 1983.
+ - Jorge J. More and David J. Thuente.
+ Line search algorithm with guaranteed sufficient decrease.
+ <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
+ pp. 286-307, 1994.
+
+This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
+method presented in:
+ - Galen Andrew and Jianfeng Gao.
+ Scalable training of L1-regularized log-linear models.
+ In <i>Proceedings of the 24th International Conference on Machine
+ Learning (ICML 2007)</i>, pp. 33-40, 2007.
+
+Special thanks go to:
+ - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
+ OWL-QN
+ - Takashi Imamichi for the useful enhancements of the backtracking method
+ - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes
+
+Finally I would like to thank the original author, Jorge Nocedal, who has been
+distributing the effieicnt and explanatory implementation in an open source
+licence.
+
+@section reference Reference
+
+- <a href="http://www.ece.northwestern.edu/~nocedal/lbfgs.html">L-BFGS</a> by Jorge Nocedal.
+- <a href="http://research.microsoft.com/en-us/downloads/b1eb1016-1738-4bd5-83a9-370c9d498a03/default.aspx">Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives</a> by Galen Andrew.
+- <a href="http://chasen.org/~taku/software/misc/lbfgs/">C port (via f2c)</a> by Taku Kudo.
+- <a href="http://www.alglib.net/optimization/lbfgs.php">C#/C++/Delphi/VisualBasic6 port</a> in ALGLIB.
+- <a href="http://cctbx.sourceforge.net/">Computational Crystallography Toolbox</a> includes
+ <a href="http://cctbx.sourceforge.net/current_cvs/c_plus_plus/namespacescitbx_1_1lbfgs.html">scitbx::lbfgs</a>.
+*/
+
+#endif/*__LBFGS_H__*/
diff --git a/training/liblbfgs/ll_test.cc b/training/liblbfgs/ll_test.cc
new file mode 100644
index 00000000..058db716
--- /dev/null
+++ b/training/liblbfgs/ll_test.cc
@@ -0,0 +1,32 @@
+#include <iostream>
+#include <liblbfgs/lbfgs++.h>
+
+using namespace std;
+
+// Function must be lbfgsfloatval_t f(x.begin, x.end, g.begin)
+lbfgsfloatval_t func(const lbfgsfloatval_t* x, lbfgsfloatval_t* g) {
+ int i;
+ lbfgsfloatval_t fx = 0.0;
+ int n = 4;
+
+ for (i = 0;i < n;i += 2) {
+ lbfgsfloatval_t t1 = 1.0 - x[i];
+ lbfgsfloatval_t t2 = 10.0 * (x[i+1] - x[i] * x[i]);
+ g[i+1] = 20.0 * t2;
+ g[i] = -2.0 * (x[i] * g[i+1] + t1);
+ fx += t1 * t1 + t2 * t2;
+ }
+ return fx;
+}
+
+template<typename F>
+void Opt(F& f) {
+ LBFGS<F> lbfgs(4, f, 1.0);
+ lbfgs.Optimize();
+}
+
+int main(int argc, char** argv) {
+ Opt(func);
+ return 0;
+}
+