From b56da6f08c4f59b562a102671ac3deb135b0538a Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sun, 13 May 2012 16:18:43 -0700
Subject: fast creg training code for univariate linear and logistic regression

---
 training/liblbfgs/lbfgs++.h  | 29 ++++++++++++++++++-----------
 training/liblbfgs/ll_test.cc |  4 ++--
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'training/liblbfgs')
diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h
index 342f9b0e..92ead955 100644
--- a/training/liblbfgs/lbfgs++.h
+++ b/training/liblbfgs/lbfgs++.h
@@ -16,28 +16,33 @@
 template <typename Function>
 class LBFGS {
  public:
-  LBFGS(size_t n,            // number of variables
-        const Function& f,   // function to optimize
-        double l1_c = 0.0,   // l1 penalty strength
-        size_t m = 10        // number of memory buffers
-                             // TODO should use custom allocator here:
+  LBFGS(size_t n,              // number of variables
+        const Function& f,     // function to optimize
+        size_t m = 10,         // number of memory buffers
+        double l1_c = 0.0,     // l1 penalty strength
+        unsigned l1_start = 0, // l1 penalty starting index
+        double eps = 1e-5      // convergence epsilon
+                               // TODO should use custom allocator here:
         ) : p_x(new std::vector<lbfgsfloatval_t>(n, 0.0)),
                              owned(true),
                              m_x(*p_x),
                              func(f) {
-    Init(m, l1_c);
+    Init(m, l1_c, l1_start, eps);
   }
 
   // constructor where external vector storage for variables is used
   LBFGS(std::vector<lbfgsfloatval_t>* px,
         const Function& f,
-        double l1_c = 0.0,   // l1 penalty strength
-        size_t m = 10
+        size_t m = 10,         // number of memory buffers
+        double l1_c = 0.0,     // l1 penalty strength
+        unsigned l1_start = 0, // l1 penalty starting index
+        double eps = 1e-5      // convergence epsilon
+                               // TODO should use custom allocator here:
         ) : p_x(px),
                              owned(false),
                              m_x(*p_x),
                              func(f) {
-    Init(m, l1_c);
+    Init(m, l1_c, l1_start, eps);
   }
 
   ~LBFGS() {
@@ -60,12 +65,14 @@ class LBFGS {
   }
 
  private:
-  void Init(size_t m, double l1_c) {
+  void Init(size_t m, double l1_c, unsigned l1_start, double eps) {
     lbfgs_parameter_init(&param);
     param.m = m;
+    param.epsilon = eps;
     if (l1_c > 0.0) {
       param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
-      param.orthantwise_c = 1.0;
+      param.orthantwise_c = l1_c;
+      param.orthantwise_start = l1_start;
     }
     silence = false;
   }
diff --git a/training/liblbfgs/ll_test.cc b/training/liblbfgs/ll_test.cc
index 43c0f214..48bc0366 100644
--- a/training/liblbfgs/ll_test.cc
+++ b/training/liblbfgs/ll_test.cc
@@ -5,7 +5,7 @@ using namespace std;
 
 // Function must be lbfgsfloatval_t f(x.begin, x.end, g.begin)
 lbfgsfloatval_t func(const vector<lbfgsfloatval_t>& x, lbfgsfloatval_t* g) {
-    int i;
+    unsigned i;
     lbfgsfloatval_t fx = 0.0;
 
     for (i = 0;i < x.size();i += 2) {
@@ -24,7 +24,7 @@ void Opt(F& f) {
   lbfgs.MinimizeFunction();
 }
 
-int main(int argc, char** argv) {
+int main() {
   Opt(func);
   return 0;
 }
-- 
cgit v1.2.3


From 7001792f10cb17d88ed2d4c58364b6304bbd0816 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sun, 13 May 2012 17:09:34 -0700
Subject: put creg in its own top-level folder

---
 creg/Makefile.am                |  11 ++
 creg/README                     |   7 +
 creg/creg.cc                    | 334 ++++++++++++++++++++++++++++++++++++++++
 creg/json_feature_map_lexer.h   |  15 ++
 creg/json_feature_map_lexer.ll  | 132 ++++++++++++++++
 creg/test_data/iris.testfeat    |  50 ++++++
 creg/test_data/iris.testresp    |  50 ++++++
 creg/test_data/iris.trainfeat   | 100 ++++++++++++
 creg/test_data/iris.trainresp   | 100 ++++++++++++
 training/Makefile.am            |   4 -
 training/creg.cc                | 334 ----------------------------------------
 training/liblbfgs/lbfgs++.h     |   1 +
 utils/Makefile.am               |   4 -
 utils/json_feature_map_lexer.h  |  15 --
 utils/json_feature_map_lexer.ll | 132 ----------------
 15 files changed, 800 insertions(+), 489 deletions(-)
 create mode 100644 creg/Makefile.am
 create mode 100644 creg/README
 create mode 100644 creg/creg.cc
 create mode 100644 creg/json_feature_map_lexer.h
 create mode 100644 creg/json_feature_map_lexer.ll
 create mode 100644 creg/test_data/iris.testfeat
 create mode 100644 creg/test_data/iris.testresp
 create mode 100644 creg/test_data/iris.trainfeat
 create mode 100644 creg/test_data/iris.trainresp
 delete mode 100644 training/creg.cc
 delete mode 100644 utils/json_feature_map_lexer.h
 delete mode 100644 utils/json_feature_map_lexer.ll

(limited to 'training/liblbfgs')

diff --git a/creg/Makefile.am b/creg/Makefile.am
new file mode 100644
index 00000000..9e25b838
--- /dev/null
+++ b/creg/Makefile.am
@@ -0,0 +1,11 @@
+bin_PROGRAMS = \
+  creg
+
+creg_SOURCES = creg.cc json_feature_map_lexer.cc
+creg_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
+
+json_feature_map_lexer.cc: json_feature_map_lexer.ll
+	$(LEX) -s -8 -CF -o$@ $<
+
+AM_CPPFLAGS = -W -Wall -DNDEBUG -I$(top_srcdir)/utils -I$(top_srcdir)/training
+
diff --git a/creg/README b/creg/README
new file mode 100644
index 00000000..2c04c83b
--- /dev/null
+++ b/creg/README
@@ -0,0 +1,7 @@
+creg is a fast tool for training linear and logistic regression models with
+l_1 and l_2 regularization. Its data (feature and response) format is compatible
+with ARKRegression.
+
+Example invokation:
+$ ./creg -x test_data/iris.trainfeat -y test_data/iris.trainresp --l2 100
+
diff --git a/creg/creg.cc b/creg/creg.cc
new file mode 100644
index 00000000..43f01bc4
--- /dev/null
+++ b/creg/creg.cc
@@ -0,0 +1,334 @@
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <tr1/unordered_map>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "json_feature_map_lexer.h"
+#include "prob.h"
+#include "filelib.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "liblbfgs/lbfgs++.h"
+
+using namespace std;
+using namespace std::tr1;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("training_features,x", po::value<string>(), "File containing training instance features (ARKRegression format)")
+        ("training_responses,y", po::value<string>(), "File containing training response features (ARKRegression format)")
+        ("linear,n", "Linear (rather than logistic) regression")
+        ("l1",po::value<double>()->default_value(0.0), "l_1 regularization strength")
+        ("l2",po::value<double>()->default_value(0.0), "l_2 regularization strength")
+        ("weights,w", po::value<string>(), "Initial weights")
+        ("epsilon,e", po::value<double>()->default_value(1e-4), "Epsilon for convergence test. Terminates when ||g|| < epsilon * max(1, ||w||)")
+        ("memory_buffers,m",po::value<unsigned>()->default_value(40), "Number of memory buffers for LBFGS")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("help") || !conf->count("training_features") || !conf->count("training_responses")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct TrainingInstance {
+  SparseVector<float> x;
+  union {
+    unsigned label;  // for categorical predictions
+    float value;     // for continuous predictions
+  } y;
+};
+
+struct ReaderHelper {
+  explicit ReaderHelper(vector<TrainingInstance>* xyp) : xy_pairs(xyp), lc(), flag() {}
+  unordered_map<string, unsigned> id2ind;
+  vector<TrainingInstance>* xy_pairs;
+  int lc;
+  bool flag;
+};
+
+void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) {
+  ReaderHelper& rh = *reinterpret_cast<ReaderHelper*>(extra);
+  ++rh.lc;
+  if (rh.lc % 1000 == 0) { cerr << '.'; rh.flag = true; }
+  if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; }
+  const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id);
+  if (it == rh.id2ind.end()) {
+    cerr << "Unlabeled example in line " << rh.lc << endl;
+    abort();
+  }
+  (*rh.xy_pairs)[it->second - 1].x = fmap;
+}
+
+void ReadLabeledInstances(const string& ffeats,
+                 const string& fresp,
+                 const bool is_continuous,
+                 vector<TrainingInstance>* xy_pairs,
+                 vector<string>* labels) {
+  bool flag = false;
+  xy_pairs->clear();
+  int lc = 0;
+  ReaderHelper rh(xy_pairs);
+  unordered_map<string, unsigned> label2id;
+  cerr << "Reading training responses from " << fresp << " ..." << endl;
+  ReadFile fr(fresp);
+  for (unsigned i = 0; i < labels->size(); ++i)
+    label2id[(*labels)[i]] = i;
+  istream& in = *fr.stream();
+  string line;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
+    if (line.size() == 0) continue;
+    if (line[0] == '#') continue;
+    unsigned p = 0;
+    while (p < line.size() && line[p] != ' ' && line[p] != '\t') { ++p; }
+    unsigned& ind = rh.id2ind[line.substr(0, p)];
+    if (ind != 0) { cerr << "ID " << line.substr(0, p) << " duplicated in line " << lc << endl; abort(); }
+    while (p < line.size() && (line[p] == ' ' || line[p] == '\t')) { ++p; }
+    assert(p < line.size());
+    xy_pairs->push_back(TrainingInstance());
+    ind = xy_pairs->size();
+    if (is_continuous) {
+      xy_pairs->back().y.value = strtof(&line[p], 0);
+    } else { // categorical predictions
+      unordered_map<string, unsigned>::iterator it = label2id.find(line.substr(p));
+      if (it == label2id.end()) {
+        const string label = line.substr(p);
+        it = label2id.insert(make_pair(label, labels->size())).first;
+        labels->push_back(label);
+      }
+      xy_pairs->back().y.label = it->second;  // label id
+    }
+  }
+  if (flag) cerr << endl;
+  if (!is_continuous) {
+    cerr << "LABELS:";
+    for (unsigned j = 0; j < labels->size(); ++j)
+      cerr << " " << (*labels)[j];
+    cerr << endl;
+  }
+  cerr << "Reading training features from " << ffeats << " ..." << endl;
+  ReadFile ff(ffeats);
+  JSONFeatureMapLexer::ReadRules(ff.stream(), ReaderCB, &rh);
+  if (rh.flag) cerr << endl;
+}
+
+// helper base class (not polymorphic- just a container and some helper functions) for loss functions
+// real loss functions should implement double operator()(const vector<double>& x, double* g),
+// which should evaluate f(x) and g = f'(x)
+struct BaseLoss {
+  // dimp1 = number of categorial outputs possible for logistic regression
+  // for linear regression, it should be 1 more than the dimension of the response variable
+  BaseLoss(
+      const vector<TrainingInstance>& tr,
+      unsigned dimp1,
+      unsigned numfeats,
+      unsigned ll2) : training(tr), K(dimp1), p(numfeats), l2(ll2) {}
+
+  // weight vector layout for K classes, with p features
+  //   w[0 : K-1] = bias weights
+  //   w[y*p + K : y*p + K + p - 1] = feature weights for y^th class
+  // this representation is used in ComputeDotProducts and GradAdd
+  void ComputeDotProducts(const SparseVector<float>& fx,  // feature vector of x
+                          const vector<double>& w,         // full weight vector
+                          vector<double>* pdotprods) const {
+    vector<double>& dotprods = *pdotprods;
+    const unsigned km1 = K - 1;
+    dotprods.resize(km1);
+    for (unsigned y = 0; y < km1; ++y)
+      dotprods[y] = w[y];  // bias terms
+    for (SparseVector<float>::const_iterator it = fx.begin(); it != fx.end(); ++it) {
+      const float fval = it->second;
+      const unsigned fid = it->first;
+      for (unsigned y = 0; y < km1; ++y)
+        dotprods[y] += w[fid + y * p + km1] * fval;
+    }
+  }
+
+  double ApplyRegularizationTerms(const vector<double>& weights,
+                                  double* g) const {
+    double reg = 0;
+    for (size_t i = K - 1; i < weights.size(); ++i) {
+      const double& w_i = weights[i];
+      reg += l2 * w_i * w_i;
+      g[i] += 2 * l2 * w_i;
+    }
+    return reg;
+  }
+
+  void GradAdd(const SparseVector<float>& fx,
+               const unsigned y,
+               const double scale,
+               double* acc) const {
+    acc[y] += scale; // class bias
+    for (SparseVector<float>::const_iterator it = fx.begin();
+         it != fx.end(); ++it)
+      acc[it->first + y * p + K - 1] += it->second * scale;
+  }
+
+  const vector<TrainingInstance>& training;
+  const unsigned K, p;
+  const double l2;
+};
+
+struct UnivariateSquaredLoss : public BaseLoss {
+  UnivariateSquaredLoss(
+          const vector<TrainingInstance>& tr,
+          unsigned numfeats,
+          const double l2) : BaseLoss(tr, 2, numfeats, l2) {}
+
+  // evaluate squared loss and gradient
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    double cll = 0;
+    vector<double> dotprods(1);  // univariate prediction
+    for (unsigned i = 0; i < training.size(); ++i) {
+      const SparseVector<float>& fmapx = training[i].x;
+      const double refy = training[i].y.value;
+      ComputeDotProducts(fmapx, x, &dotprods);
+      double diff = dotprods[0] - refy;
+      cll += diff * diff;
+
+      double scale = 2 * diff;
+      GradAdd(fmapx, 0, scale, g);
+    }
+    double reg = ApplyRegularizationTerms(x, g);
+    return cll + reg;
+  }
+};
+
+struct MulticlassLogLoss : public BaseLoss {
+  MulticlassLogLoss(
+          const vector<TrainingInstance>& tr,
+          unsigned k,
+          unsigned numfeats,
+          const double l2) : BaseLoss(tr, k, numfeats, l2) {}
+
+  // evaluate log loss and gradient
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    vector<double> dotprods(K - 1);  // K-1 degrees of freedom
+    vector<prob_t> probs(K);
+    double cll = 0;
+    for (unsigned i = 0; i < training.size(); ++i) {
+      const SparseVector<float>& fmapx = training[i].x;
+      const unsigned refy = training[i].y.label;
+      //cerr << "FMAP: " << fmapx << endl;
+      ComputeDotProducts(fmapx, x, &dotprods);
+      prob_t z;
+      for (unsigned j = 0; j < dotprods.size(); ++j)
+        z += (probs[j] = prob_t(dotprods[j], init_lnx()));
+      z += (probs.back() = prob_t::One());
+      for (unsigned y = 0; y < probs.size(); ++y) {
+        probs[y] /= z;
+        //cerr << "  p(y=" << y << ")=" << probs[y].as_float() << "\tz=" << z << endl;
+      }
+      cll -= log(probs[refy]);  // log p(y | x)
+
+      for (unsigned y = 0; y < dotprods.size(); ++y) {
+        double scale = probs[y].as_float();
+        if (y == refy) { scale -= 1.0; }
+        GradAdd(fmapx, y, scale, g);
+      }
+    }
+    double reg = ApplyRegularizationTerms(x, g);
+    return cll + reg;
+  }
+};
+
+template <class LossFunction>
+double LearnParameters(LossFunction& loss,
+                       const double l1,
+                       const unsigned l1_start,
+                       const unsigned memory_buffers,
+                       const double eps,
+                       vector<double>* px) {
+  LBFGS<LossFunction> lbfgs(px, loss, memory_buffers, l1, l1_start, eps);
+  lbfgs.MinimizeFunction();
+  return 0;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  string line;
+  vector<TrainingInstance> training;
+  const string xfile = conf["training_features"].as<string>();
+  const string yfile = conf["training_responses"].as<string>();
+  double l1 = conf["l1"].as<double>();
+  double l2 = conf["l2"].as<double>();
+  const unsigned memory_buffers = conf["memory_buffers"].as<unsigned>();
+  const double epsilon = conf["epsilon"].as<double>();
+  if (l1 < 0.0) {
+    cerr << "L1 strength must be >= 0\n";
+    return 1;
+  }
+  if (l2 < 0.0) {
+    cerr << "L2 strength must be >= 0\n";
+    return 2;
+  }
+
+  const bool is_continuous = conf.count("linear");
+  vector<string> labels; // only populated for non-continuous models
+  ReadLabeledInstances(xfile, yfile, is_continuous, &training, &labels);
+
+  if (conf.count("weights")) {
+    cerr << "Initial weights are not implemented, please implement." << endl;
+    // TODO read weights for categorical and continuous predictions
+    // can't use normal cdec weight framework
+    abort();
+  }
+
+  cerr << "         Number of features: " << FD::NumFeats() << endl;
+  cerr << "Number of training examples: " << training.size() << endl;
+  const unsigned p = FD::NumFeats();
+  cout.precision(15);
+
+  if (conf.count("linear")) {  // linear regression
+    vector<double> weights(1 + FD::NumFeats(), 0.0);
+    cerr << "       Number of parameters: " << weights.size() << endl;
+    UnivariateSquaredLoss loss(training, p, l2);
+    LearnParameters(loss, l1, 1, memory_buffers, epsilon, &weights);
+    cout << p << "\t***CONTINUOUS***" << endl;
+    cout << "***BIAS***\t" << weights[0] << endl;
+    for (unsigned f = 0; f < p; ++f) {
+      const double w = weights[1 + f];
+      if (w)
+        cout << FD::Convert(f) << "\t" << w << endl;
+    }
+  } else {                     // logistic regression
+    vector<double> weights((1 + FD::NumFeats()) * (labels.size() - 1), 0.0);
+    cerr << "       Number of parameters: " << weights.size() << endl;
+    cerr << "           Number of labels: " << labels.size() << endl;
+    const unsigned K = labels.size();
+    const unsigned km1 = K - 1;
+    MulticlassLogLoss loss(training, K, p, l2);
+    LearnParameters(loss, l1, km1, memory_buffers, epsilon, &weights);
+
+    cout << p << "\t***CATEGORICAL***";
+    for (unsigned y = 0; y < K; ++y)
+      cout << '\t' << labels[y];
+    cout << endl;
+    for (unsigned y = 0; y < km1; ++y)
+      cout << labels[y] << "\t***BIAS***\t" << weights[y] << endl;
+    for (unsigned y = 0; y < km1; ++y) {
+      for (unsigned f = 0; f < p; ++f) {
+        const double w = weights[km1 + y * p + f];
+        if (w)
+          cout << labels[y] << "\t" << FD::Convert(f) << "\t" << w << endl;
+      }
+    }
+  }
+
+  return 0;
+}
+
diff --git a/creg/json_feature_map_lexer.h b/creg/json_feature_map_lexer.h
new file mode 100644
index 00000000..3324aa29
--- /dev/null
+++ b/creg/json_feature_map_lexer.h
@@ -0,0 +1,15 @@
+#ifndef _RULE_LEXER_H_
+#define _RULE_LEXER_H_
+
+#include <iostream>
+#include <string>
+
+#include "sparse_vector.h"
+
+struct JSONFeatureMapLexer {
+  typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra);
+  static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra);
+};
+
+#endif
+
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll
new file mode 100644
index 00000000..372b52f5
--- /dev/null
+++ b/creg/json_feature_map_lexer.ll
@@ -0,0 +1,132 @@
+%option nounput
+%{
+
+#include "json_feature_map_lexer.h"
+#include "fdict.h"
+#include "fast_sparse_vector.h"
+
+#define YY_DECL int json_fmap_yylex (void)
+#undef YY_INPUT
+#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount())
+#define YY_SKIP_YYWRAP 1
+int yywrap() { return 1; }
+
+JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL;
+void* json_fmap_callback_extra = NULL;
+std::istream* jfmap_stream = NULL;
+bool fl = true;
+unsigned spos = 0;
+char featname[16000];
+#define MAX_FEATS 20000
+std::pair<int, float> featmap[MAX_FEATS];
+unsigned curfeat = 0;
+std::string instid;
+
+inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) {
+  uint32_t cp;
+  if((w1 & 0xfc00) == 0xd800) {
+    if((w2 & 0xfc00) == 0xdc00) {
+      cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff));
+    } else {
+      abort();
+    }
+  } else {
+    cp = w1;
+  }
+  
+  
+  if(cp < 0x80) {
+    putf8[0] = static_cast<char>(cp);
+    return 1;
+  } else if(cp < 0x0800) {
+    putf8[0] = 0xc0 | ((cp >> 6) & 0x1f);
+    putf8[1] = 0x80 | (cp & 0x3f);
+    return 2;
+  } else if(cp < 0x10000) {
+    putf8[0] = 0xe0 | ((cp >> 6) & 0x0f);
+    putf8[1] = 0x80 | ((cp >> 6) & 0x3f);
+    putf8[2] = 0x80 | (cp & 0x3f);
+    return 3;
+  } else if(cp < 0x1fffff) {
+    putf8[0] = 0xf0 | ((cp >> 18) & 0x07);
+    putf8[1] = 0x80 | ((cp >> 12) & 0x3f);
+    putf8[2] = 0x80 | ((cp >> 6) & 0x3f);
+    putf8[3] = 0x80 | (cp & 0x3f);
+    return 4;
+  } else {
+    abort();
+  } 
+  return 0;
+}
+
+%}
+
+ID [A-Za-z_0-9]+
+HEX_D [a-fA-F0-9]
+INT [-]?[0-9]+
+DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?)
+WS [ \t\r\n]
+LCB [{]
+RCB [}]
+UNESCAPED_CH [^\"\\\b\n\r\f\t]
+
+%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE
+%%
+
+<INITIAL>{ID}                            { instid = yytext; BEGIN(JSON); }
+
+<JSON>{WS}*{LCB}{WS}*                    { BEGIN(PREVAL); }
+
+<PREVAL>\"                               { BEGIN(STRING); spos=0; }
+
+<STRING>\"                               { featname[spos] = 0;
+                                           featmap[curfeat].first = FD::Convert(featname);
+                                           BEGIN(JSONVAL);
+                                         }
+<STRING>{UNESCAPED_CH}                   { featname[spos++] = yytext[0]; }
+<STRING>\\\"                             { featname[spos++] = '"'; }
+<STRING>\\\\                             { featname[spos++] = '\\'; }
+<STRING>\\\/                             { featname[spos++] = '/'; }
+<STRING>\\b                              { }
+<STRING>\\f                              { }
+<STRING>\\n                              { }
+<STRING>\\r                              { }
+<STRING>\\t                              { }
+<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D}  { abort();
+                                         }
+
+<JSONVAL>{WS}*:{WS}*                     { BEGIN(DOUBLE); }
+<DOUBLE>{DOUBLE}                         { featmap[curfeat++].second = strtod(yytext, 0);
+                                           BEGIN(POSTVAL); }
+
+<POSTVAL>{WS}*,{WS}*                     { BEGIN(PREVAL); }
+<POSTVAL>{WS}*{RCB}\n*                   {
+                                           const SparseVector<float> x(&featmap[0], &featmap[curfeat]);
+                                           json_fmap_callback(instid, x, json_fmap_callback_extra);
+                                           curfeat = 0;
+                                           BEGIN(INITIAL);
+                                         }
+
+<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); }
+
+%%
+
+void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) {
+  json_fmap_callback = func;
+  json_fmap_callback_extra = extra;
+  jfmap_stream = in;
+  json_fmap_yylex();
+}
+
+#if 0
+void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) {
+  (void) extra;
+  static int cc = 0;
+  cc++;
+}
+
+int main() {
+  JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
+}
+#endif
+
diff --git a/creg/test_data/iris.testfeat b/creg/test_data/iris.testfeat
new file mode 100644
index 00000000..f7528f81
--- /dev/null
+++ b/creg/test_data/iris.testfeat
@@ -0,0 +1,50 @@
+100	{"sepal-length": 4.9, "sepal-width": 2.5, "petal-length": 4.5, "petal-width": 1.7}
+101	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.0}
+102	{"sepal-length": 4.4, "sepal-width": 3.0, "petal-length": 1.3, "petal-width": 0.2}
+103	{"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2}
+104	{"sepal-length": 5.0, "sepal-width": 3.0, "petal-length": 1.6, "petal-width": 0.2}
+105	{"sepal-length": 5.1, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2}
+106	{"sepal-length": 5.5, "sepal-width": 2.3, "petal-length": 4.0, "petal-width": 1.3}
+107	{"sepal-length": 5.5, "sepal-width": 2.6, "petal-length": 4.4, "petal-width": 1.2}
+108	{"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.7, "petal-width": 0.2}
+109	{"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.7, "petal-width": 1.0}
+110	{"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.0, "petal-width": 1.7}
+111	{"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.2}
+112	{"sepal-length": 5.5, "sepal-width": 4.2, "petal-length": 1.4, "petal-width": 0.2}
+113	{"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.5}
+114	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+115	{"sepal-length": 7.7, "sepal-width": 2.6, "petal-length": 6.9, "petal-width": 2.3}
+116	{"sepal-length": 5.0, "sepal-width": 3.6, "petal-length": 1.4, "petal-width": 0.2}
+117	{"sepal-length": 6.3, "sepal-width": 2.3, "petal-length": 4.4, "petal-width": 1.3}
+118	{"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.1}
+119	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9}
+120	{"sepal-length": 5.2, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.4}
+121	{"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.6, "petal-width": 0.6}
+122	{"sepal-length": 5.0, "sepal-width": 3.2, "petal-length": 1.2, "petal-width": 0.2}
+123	{"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.3}
+124	{"sepal-length": 5.5, "sepal-width": 2.5, "petal-length": 4.0, "petal-width": 1.3}
+125	{"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5}
+126	{"sepal-length": 6.6, "sepal-width": 3.0, "petal-length": 4.4, "petal-width": 1.4}
+127	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.6, "petal-width": 0.2}
+128	{"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 5.1, "petal-width": 1.8}
+129	{"sepal-length": 6.2, "sepal-width": 3.4, "petal-length": 5.4, "petal-width": 2.3}
+130	{"sepal-length": 5.6, "sepal-width": 2.8, "petal-length": 4.9, "petal-width": 2.0}
+131	{"sepal-length": 5.7, "sepal-width": 2.9, "petal-length": 4.2, "petal-width": 1.3}
+132	{"sepal-length": 6.2, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3}
+133	{"sepal-length": 6.0, "sepal-width": 3.4, "petal-length": 4.5, "petal-width": 1.6}
+134	{"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.7, "petal-width": 0.4}
+135	{"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 6.0, "petal-width": 2.5}
+136	{"sepal-length": 6.5, "sepal-width": 3.2, "petal-length": 5.1, "petal-width": 2.0}
+137	{"sepal-length": 5.1, "sepal-width": 2.5, "petal-length": 3.0, "petal-width": 1.1}
+138	{"sepal-length": 4.3, "sepal-width": 3.0, "petal-length": 1.1, "petal-width": 0.1}
+139	{"sepal-length": 5.7, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 2.0}
+140	{"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 5.0, "petal-width": 1.5}
+141	{"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 5.3, "petal-width": 2.3}
+142	{"sepal-length": 6.5, "sepal-width": 2.8, "petal-length": 4.6, "petal-width": 1.5}
+143	{"sepal-length": 5.5, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.2}
+144	{"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2}
+145	{"sepal-length": 4.6, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.3}
+146	{"sepal-length": 5.7, "sepal-width": 2.6, "petal-length": 3.5, "petal-width": 1.0}
+147	{"sepal-length": 5.8, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 2.4}
+148	{"sepal-length": 7.7, "sepal-width": 2.8, "petal-length": 6.7, "petal-width": 2.0}
+149	{"sepal-length": 6.3, "sepal-width": 2.9, "petal-length": 5.6, "petal-width": 1.8}
diff --git a/creg/test_data/iris.testresp b/creg/test_data/iris.testresp
new file mode 100644
index 00000000..0952e4da
--- /dev/null
+++ b/creg/test_data/iris.testresp
@@ -0,0 +1,50 @@
+100	Iris-virginica
+101	Iris-virginica
+102	Iris-setosa
+103	Iris-setosa
+104	Iris-setosa
+105	Iris-setosa
+106	Iris-versicolor
+107	Iris-versicolor
+108	Iris-setosa
+109	Iris-versicolor
+110	Iris-versicolor
+111	Iris-virginica
+112	Iris-setosa
+113	Iris-versicolor
+114	Iris-setosa
+115	Iris-virginica
+116	Iris-setosa
+117	Iris-versicolor
+118	Iris-virginica
+119	Iris-virginica
+120	Iris-versicolor
+121	Iris-setosa
+122	Iris-setosa
+123	Iris-virginica
+124	Iris-versicolor
+125	Iris-versicolor
+126	Iris-versicolor
+127	Iris-setosa
+128	Iris-virginica
+129	Iris-virginica
+130	Iris-virginica
+131	Iris-versicolor
+132	Iris-versicolor
+133	Iris-versicolor
+134	Iris-setosa
+135	Iris-virginica
+136	Iris-virginica
+137	Iris-versicolor
+138	Iris-setosa
+139	Iris-virginica
+140	Iris-virginica
+141	Iris-virginica
+142	Iris-versicolor
+143	Iris-setosa
+144	Iris-setosa
+145	Iris-setosa
+146	Iris-versicolor
+147	Iris-virginica
+148	Iris-virginica
+149	Iris-virginica
diff --git a/creg/test_data/iris.trainfeat b/creg/test_data/iris.trainfeat
new file mode 100644
index 00000000..a930a446
--- /dev/null
+++ b/creg/test_data/iris.trainfeat
@@ -0,0 +1,100 @@
+0	{"sepal-length": 5.4, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5}
+1	{"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.4}
+2	{"sepal-length": 5.0, "sepal-width": 3.3, "petal-length": 1.4, "petal-width": 0.2}
+3	{"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.5, "petal-width": 1.3}
+4	{"sepal-length": 6.4, "sepal-width": 3.1, "petal-length": 5.5, "petal-width": 1.8}
+5	{"sepal-length": 7.9, "sepal-width": 3.8, "petal-length": 6.4, "petal-width": 2.0}
+6	{"sepal-length": 5.9, "sepal-width": 3.2, "petal-length": 4.8, "petal-width": 1.8}
+7	{"sepal-length": 6.7, "sepal-width": 2.5, "petal-length": 5.8, "petal-width": 1.8}
+8	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.4, "petal-width": 1.4}
+9	{"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 4.9, "petal-width": 1.5}
+10	{"sepal-length": 6.1, "sepal-width": 2.9, "petal-length": 4.7, "petal-width": 1.4}
+11	{"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 4.7, "petal-width": 1.6}
+12	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.7, "petal-width": 1.5}
+13	{"sepal-length": 6.2, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.8}
+14	{"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.3}
+15	{"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.3, "petal-width": 0.4}
+16	{"sepal-length": 7.4, "sepal-width": 2.8, "petal-length": 6.1, "petal-width": 1.9}
+17	{"sepal-length": 7.2, "sepal-width": 3.2, "petal-length": 6.0, "petal-width": 1.8}
+18	{"sepal-length": 5.7, "sepal-width": 3.8, "petal-length": 1.7, "petal-width": 0.3}
+19	{"sepal-length": 4.5, "sepal-width": 2.3, "petal-length": 1.3, "petal-width": 0.3}
+20	{"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.1, "petal-width": 1.3}
+21	{"sepal-length": 6.8, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 2.1}
+22	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 2.2}
+23	{"sepal-length": 4.4, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2}
+24	{"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 1.9}
+25	{"sepal-length": 4.4, "sepal-width": 2.9, "petal-length": 1.4, "petal-width": 0.2}
+26	{"sepal-length": 4.9, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.2}
+27	{"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.4}
+28	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.2}
+29	{"sepal-length": 5.6, "sepal-width": 2.5, "petal-length": 3.9, "petal-width": 1.1}
+30	{"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.3}
+31	{"sepal-length": 5.6, "sepal-width": 2.7, "petal-length": 4.2, "petal-width": 1.3}
+32	{"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.2}
+33	{"sepal-length": 6.4, "sepal-width": 2.7, "petal-length": 5.3, "petal-width": 1.9}
+34	{"sepal-length": 5.8, "sepal-width": 4.0, "petal-length": 1.2, "petal-width": 0.2}
+35	{"sepal-length": 5.2, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.2}
+36	{"sepal-length": 7.6, "sepal-width": 3.0, "petal-length": 6.6, "petal-width": 2.1}
+37	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9}
+38	{"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 4.0, "petal-width": 1.0}
+39	{"sepal-length": 7.7, "sepal-width": 3.0, "petal-length": 6.1, "petal-width": 2.3}
+40	{"sepal-length": 5.1, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.4}
+41	{"sepal-length": 6.1, "sepal-width": 2.6, "petal-length": 5.6, "petal-width": 1.4}
+42	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 5.6, "petal-width": 2.4}
+43	{"sepal-length": 7.7, "sepal-width": 3.8, "petal-length": 6.7, "petal-width": 2.2}
+44	{"sepal-length": 5.1, "sepal-width": 3.3, "petal-length": 1.7, "petal-width": 0.5}
+45	{"sepal-length": 6.3, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 1.5}
+46	{"sepal-length": 5.0, "sepal-width": 2.0, "petal-length": 3.5, "petal-width": 1.0}
+47	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.5, "petal-width": 0.3}
+48	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+49	{"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.9, "petal-width": 1.8}
+50	{"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.1}
+51	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 1.8}
+52	{"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.7, "petal-width": 1.2}
+53	{"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.0, "petal-width": 1.3}
+54	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+55	{"sepal-length": 6.8, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.4}
+56	{"sepal-length": 6.3, "sepal-width": 2.7, "petal-length": 4.9, "petal-width": 1.8}
+57	{"sepal-length": 4.6, "sepal-width": 3.2, "petal-length": 1.4, "petal-width": 0.2}
+58	{"sepal-length": 6.3, "sepal-width": 3.4, "petal-length": 5.6, "petal-width": 2.4}
+59	{"sepal-length": 5.7, "sepal-width": 4.4, "petal-length": 1.5, "petal-width": 0.4}
+60	{"sepal-length": 6.4, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3}
+61	{"sepal-length": 7.2, "sepal-width": 3.6, "petal-length": 6.1, "petal-width": 2.5}
+62	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 4.1, "petal-width": 1.0}
+63	{"sepal-length": 6.0, "sepal-width": 3.0, "petal-length": 4.8, "petal-width": 1.8}
+64	{"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.6, "petal-width": 0.2}
+65	{"sepal-length": 6.9, "sepal-width": 3.2, "petal-length": 5.7, "petal-width": 2.3}
+66	{"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 4.5, "petal-width": 1.5}
+67	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.4, "petal-width": 2.1}
+68	{"sepal-length": 5.2, "sepal-width": 3.5, "petal-length": 1.5, "petal-width": 0.2}
+69	{"sepal-length": 5.3, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2}
+70	{"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.8, "petal-width": 1.1}
+71	{"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.9, "petal-width": 0.2}
+72	{"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.1, "petal-width": 1.3}
+73	{"sepal-length": 4.9, "sepal-width": 2.4, "petal-length": 3.3, "petal-width": 1.0}
+74	{"sepal-length": 6.2, "sepal-width": 2.2, "petal-length": 4.5, "petal-width": 1.5}
+75	{"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.5}
+76	{"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.6, "petal-width": 1.4}
+77	{"sepal-length": 4.6, "sepal-width": 3.6, "petal-length": 1.0, "petal-width": 0.2}
+78	{"sepal-length": 7.0, "sepal-width": 3.2, "petal-length": 4.7, "petal-width": 1.4}
+79	{"sepal-length": 6.6, "sepal-width": 2.9, "petal-length": 4.6, "petal-width": 1.3}
+80	{"sepal-length": 5.4, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2}
+81	{"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.3}
+82	{"sepal-length": 7.2, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 1.6}
+83	{"sepal-length": 7.1, "sepal-width": 3.0, "petal-length": 5.9, "petal-width": 2.1}
+84	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 4.9, "petal-width": 1.5}
+85	{"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.1}
+86	{"sepal-length": 7.3, "sepal-width": 2.9, "petal-length": 6.3, "petal-width": 1.8}
+87	{"sepal-length": 6.0, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.6}
+88	{"sepal-length": 6.8, "sepal-width": 3.2, "petal-length": 5.9, "petal-width": 2.3}
+89	{"sepal-length": 4.6, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.2}
+90	{"sepal-length": 4.8, "sepal-width": 3.1, "petal-length": 1.6, "petal-width": 0.2}
+91	{"sepal-length": 5.0, "sepal-width": 2.3, "petal-length": 3.3, "petal-width": 1.0}
+92	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.1, "petal-width": 2.3}
+93	{"sepal-length": 5.7, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.2}
+94	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.9, "petal-width": 0.4}
+95	{"sepal-length": 6.0, "sepal-width": 2.9, "petal-length": 4.5, "petal-width": 1.5}
+96	{"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.2}
+97	{"sepal-length": 5.2, "sepal-width": 4.1, "petal-length": 1.5, "petal-width": 0.1}
+98	{"sepal-length": 5.6, "sepal-width": 2.9, "petal-length": 3.6, "petal-width": 1.3}
+99	{"sepal-length": 5.8, "sepal-width": 2.6, "petal-length": 4.0, "petal-width": 1.2}
diff --git a/creg/test_data/iris.trainresp b/creg/test_data/iris.trainresp
new file mode 100644
index 00000000..d77bc6a2
--- /dev/null
+++ b/creg/test_data/iris.trainresp
@@ -0,0 +1,100 @@
+0	Iris-versicolor
+1	Iris-setosa
+2	Iris-setosa
+3	Iris-versicolor
+4	Iris-virginica
+5	Iris-virginica
+6	Iris-versicolor
+7	Iris-virginica
+8	Iris-versicolor
+9	Iris-versicolor
+10	Iris-versicolor
+11	Iris-versicolor
+12	Iris-versicolor
+13	Iris-virginica
+14	Iris-setosa
+15	Iris-setosa
+16	Iris-virginica
+17	Iris-virginica
+18	Iris-setosa
+19	Iris-setosa
+20	Iris-versicolor
+21	Iris-virginica
+22	Iris-virginica
+23	Iris-setosa
+24	Iris-virginica
+25	Iris-setosa
+26	Iris-setosa
+27	Iris-setosa
+28	Iris-versicolor
+29	Iris-versicolor
+30	Iris-setosa
+31	Iris-versicolor
+32	Iris-setosa
+33	Iris-virginica
+34	Iris-setosa
+35	Iris-setosa
+36	Iris-virginica
+37	Iris-virginica
+38	Iris-versicolor
+39	Iris-virginica
+40	Iris-setosa
+41	Iris-virginica
+42	Iris-virginica
+43	Iris-virginica
+44	Iris-setosa
+45	Iris-virginica
+46	Iris-versicolor
+47	Iris-setosa
+48	Iris-setosa
+49	Iris-virginica
+50	Iris-virginica
+51	Iris-virginica
+52	Iris-versicolor
+53	Iris-versicolor
+54	Iris-setosa
+55	Iris-versicolor
+56	Iris-virginica
+57	Iris-setosa
+58	Iris-virginica
+59	Iris-setosa
+60	Iris-versicolor
+61	Iris-virginica
+62	Iris-versicolor
+63	Iris-virginica
+64	Iris-setosa
+65	Iris-virginica
+66	Iris-versicolor
+67	Iris-virginica
+68	Iris-setosa
+69	Iris-setosa
+70	Iris-versicolor
+71	Iris-setosa
+72	Iris-versicolor
+73	Iris-versicolor
+74	Iris-versicolor
+75	Iris-virginica
+76	Iris-versicolor
+77	Iris-setosa
+78	Iris-versicolor
+79	Iris-versicolor
+80	Iris-setosa
+81	Iris-setosa
+82	Iris-virginica
+83	Iris-virginica
+84	Iris-versicolor
+85	Iris-setosa
+86	Iris-virginica
+87	Iris-versicolor
+88	Iris-virginica
+89	Iris-setosa
+90	Iris-setosa
+91	Iris-versicolor
+92	Iris-virginica
+93	Iris-versicolor
+94	Iris-setosa
+95	Iris-versicolor
+96	Iris-setosa
+97	Iris-setosa
+98	Iris-versicolor
+99	Iris-versicolor
diff --git a/training/Makefile.am b/training/Makefile.am
index 4b69ea94..991ac210 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -1,6 +1,5 @@
 bin_PROGRAMS = \
   model1 \
-  creg \
   lbl_model \
   test_ngram \
   mr_em_map_adapter \
@@ -24,9 +23,6 @@ noinst_PROGRAMS = \
 
 TESTS = lbfgs_test optimize_test
 
-creg_SOURCES = creg.cc
-creg_LDADD = ./liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
-
 mpi_online_optimize_SOURCES = mpi_online_optimize.cc online_optimizer.cc
 mpi_online_optimize_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
diff --git a/training/creg.cc b/training/creg.cc
deleted file mode 100644
index 58adea00..00000000
--- a/training/creg.cc
+++ /dev/null
@@ -1,334 +0,0 @@
-#include <cstdlib>
-#include <iostream>
-#include <vector>
-#include <tr1/unordered_map>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "json_feature_map_lexer.h"
-#include "prob.h"
-#include "filelib.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "liblbfgs/lbfgs++.h"
-
-using namespace std;
-using namespace std::tr1;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("training_features,x", po::value<string>(), "File containing training instance features (ARKRegression format)")
-        ("training_responses,y", po::value<string>(), "File containing training response features (ARKRegression format)")
-        ("linear,n", "Linear (rather than logistic) regression")
-        ("l1",po::value<double>()->default_value(0.0), "l_1 regularization strength")
-        ("l2",po::value<double>()->default_value(0.0), "l_2 regularization strength")
-        ("weights,w", po::value<string>(), "Initial weights")
-        ("epsilon,e", po::value<double>()->default_value(1e-4), "Epsilon for convergence test. Terminates when ||g|| < epsilon * max(1, ||x||)")
-        ("memory_buffers,m",po::value<unsigned>()->default_value(40), "Number of memory buffers for LBFGS")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("help") || !conf->count("training_features") || !conf->count("training_responses")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct TrainingInstance {
-  SparseVector<float> x;
-  union {
-    unsigned label;  // for categorical predictions
-    float value;     // for continuous predictions
-  } y;
-};
-
-struct ReaderHelper {
-  explicit ReaderHelper(vector<TrainingInstance>* xyp) : xy_pairs(xyp), lc(), flag() {}
-  unordered_map<string, unsigned> id2ind;
-  vector<TrainingInstance>* xy_pairs;
-  int lc;
-  bool flag;
-};
-
-void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) {
-  ReaderHelper& rh = *reinterpret_cast<ReaderHelper*>(extra);
-  ++rh.lc;
-  if (rh.lc % 1000 == 0) { cerr << '.'; rh.flag = true; }
-  if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; }
-  const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id);
-  if (it == rh.id2ind.end()) {
-    cerr << "Unlabeled example in line " << rh.lc << endl;
-    abort();
-  }
-  (*rh.xy_pairs)[it->second - 1].x = fmap;
-}
-
-void ReadLabeledInstances(const string& ffeats,
-                 const string& fresp,
-                 const bool is_continuous,
-                 vector<TrainingInstance>* xy_pairs,
-                 vector<string>* labels) {
-  bool flag = false;
-  xy_pairs->clear();
-  int lc = 0;
-  ReaderHelper rh(xy_pairs);
-  unordered_map<string, unsigned> label2id;
-  cerr << "Reading training responses from " << fresp << " ..." << endl;
-  ReadFile fr(fresp);
-  for (unsigned i = 0; i < labels->size(); ++i)
-    label2id[(*labels)[i]] = i;
-  istream& in = *fr.stream();
-  string line;
-  while(getline(in, line)) {
-    ++lc;
-    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
-    if (line.size() == 0) continue;
-    if (line[0] == '#') continue;
-    unsigned p = 0;
-    while (p < line.size() && line[p] != ' ' && line[p] != '\t') { ++p; }
-    unsigned& ind = rh.id2ind[line.substr(0, p)];
-    if (ind != 0) { cerr << "ID " << line.substr(0, p) << " duplicated in line " << lc << endl; abort(); }
-    while (p < line.size() && (line[p] == ' ' || line[p] == '\t')) { ++p; }
-    assert(p < line.size());
-    xy_pairs->push_back(TrainingInstance());
-    ind = xy_pairs->size();
-    if (is_continuous) {
-      xy_pairs->back().y.value = strtof(&line[p], 0);
-    } else { // categorical predictions
-      unordered_map<string, unsigned>::iterator it = label2id.find(line.substr(p));
-      if (it == label2id.end()) {
-        const string label = line.substr(p);
-        it = label2id.insert(make_pair(label, labels->size())).first;
-        labels->push_back(label);
-      }
-      xy_pairs->back().y.label = it->second;  // label id
-    }
-  }
-  if (flag) cerr << endl;
-  if (!is_continuous) {
-    cerr << "LABELS:";
-    for (unsigned j = 0; j < labels->size(); ++j)
-      cerr << " " << (*labels)[j];
-    cerr << endl;
-  }
-  cerr << "Reading training features from " << ffeats << " ..." << endl;
-  ReadFile ff(ffeats);
-  JSONFeatureMapLexer::ReadRules(ff.stream(), ReaderCB, &rh);
-  if (rh.flag) cerr << endl;
-}
-
-// helper base class (not polymorphic- just a container and some helper functions) for loss functions
-// real loss functions should implement double operator()(const vector<double>& x, double* g),
-// which should evaluate f(x) and g = f'(x)
-struct BaseLoss {
-  // dimp1 = number of categorial outputs possible for logistic regression
-  // for linear regression, it should be 1 more than the dimension of the response variable
-  BaseLoss(
-      const vector<TrainingInstance>& tr,
-      unsigned dimp1,
-      unsigned numfeats,
-      unsigned ll2) : training(tr), K(dimp1), p(numfeats), l2(ll2) {}
-
-  // weight vector layout for K classes, with p features
-  //   w[0 : K-1] = bias weights
-  //   w[y*p + K : y*p + K + p - 1] = feature weights for y^th class
-  // this representation is used in ComputeDotProducts and GradAdd
-  void ComputeDotProducts(const SparseVector<float>& fx,  // feature vector of x
-                          const vector<double>& w,         // full weight vector
-                          vector<double>* pdotprods) const {
-    vector<double>& dotprods = *pdotprods;
-    const unsigned km1 = K - 1;
-    dotprods.resize(km1);
-    for (unsigned y = 0; y < km1; ++y)
-      dotprods[y] = w[y];  // bias terms
-    for (SparseVector<float>::const_iterator it = fx.begin(); it != fx.end(); ++it) {
-      const float fval = it->second;
-      const unsigned fid = it->first;
-      for (unsigned y = 0; y < km1; ++y)
-        dotprods[y] += w[fid + y * p + km1] * fval;
-    }
-  }
-
-  double ApplyRegularizationTerms(const vector<double>& weights,
-                                  double* g) const {
-    double reg = 0;
-    for (size_t i = K - 1; i < weights.size(); ++i) {
-      const double& w_i = weights[i];
-      reg += l2 * w_i * w_i;
-      g[i] += 2 * l2 * w_i;
-    }
-    return reg;
-  }
-
-  void GradAdd(const SparseVector<float>& fx,
-               const unsigned y,
-               const double scale,
-               double* acc) const {
-    acc[y] += scale; // class bias
-    for (SparseVector<float>::const_iterator it = fx.begin();
-         it != fx.end(); ++it)
-      acc[it->first + y * p + K - 1] += it->second * scale;
-  }
-
-  const vector<TrainingInstance>& training;
-  const unsigned K, p;
-  const double l2;
-};
-
-struct UnivariateSquaredLoss : public BaseLoss {
-  UnivariateSquaredLoss(
-          const vector<TrainingInstance>& tr,
-          unsigned numfeats,
-          const double l2) : BaseLoss(tr, 2, numfeats, l2) {}
-
-  // evaluate squared loss and gradient
-  double operator()(const vector<double>& x, double* g) const {
-    fill(g, g + x.size(), 0.0);
-    double cll = 0;
-    vector<double> dotprods(1);  // univariate prediction
-    for (int i = 0; i < training.size(); ++i) {
-      const SparseVector<float>& fmapx = training[i].x;
-      const double refy = training[i].y.value;
-      ComputeDotProducts(fmapx, x, &dotprods);
-      double diff = dotprods[0] - refy;
-      cll += diff * diff;
-
-      double scale = 2 * diff;
-      GradAdd(fmapx, 0, scale, g);
-    }
-    double reg = ApplyRegularizationTerms(x, g);
-    return cll + reg;
-  }
-};
-
-struct MulticlassLogLoss : public BaseLoss {
-  MulticlassLogLoss(
-          const vector<TrainingInstance>& tr,
-          unsigned k,
-          unsigned numfeats,
-          const double l2) : BaseLoss(tr, k, numfeats, l2) {}
-
-  // evaluate log loss and gradient
-  double operator()(const vector<double>& x, double* g) const {
-    fill(g, g + x.size(), 0.0);
-    vector<double> dotprods(K - 1);  // K-1 degrees of freedom
-    vector<prob_t> probs(K);
-    double cll = 0;
-    for (int i = 0; i < training.size(); ++i) {
-      const SparseVector<float>& fmapx = training[i].x;
-      const unsigned refy = training[i].y.label;
-      //cerr << "FMAP: " << fmapx << endl;
-      ComputeDotProducts(fmapx, x, &dotprods);
-      prob_t z;
-      for (unsigned j = 0; j < dotprods.size(); ++j)
-        z += (probs[j] = prob_t(dotprods[j], init_lnx()));
-      z += (probs.back() = prob_t::One());
-      for (unsigned y = 0; y < probs.size(); ++y) {
-        probs[y] /= z;
-        //cerr << "  p(y=" << y << ")=" << probs[y].as_float() << "\tz=" << z << endl;
-      }
-      cll -= log(probs[refy]);  // log p(y | x)
-
-      for (unsigned y = 0; y < dotprods.size(); ++y) {
-        double scale = probs[y].as_float();
-        if (y == refy) { scale -= 1.0; }
-        GradAdd(fmapx, y, scale, g);
-      }
-    }
-    double reg = ApplyRegularizationTerms(x, g);
-    return cll + reg;
-  }
-};
-
-template <class LossFunction>
-double LearnParameters(LossFunction& loss,
-                       const double l1,
-                       const unsigned l1_start,
-                       const unsigned memory_buffers,
-                       const double eps,
-                       vector<double>* px) {
-  LBFGS<LossFunction> lbfgs(px, loss, memory_buffers, l1, l1_start, eps);
-  lbfgs.MinimizeFunction();
-  return 0;
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  string line;
-  vector<TrainingInstance> training;
-  const string xfile = conf["training_features"].as<string>();
-  const string yfile = conf["training_responses"].as<string>();
-  double l1 = conf["l1"].as<double>();
-  double l2 = conf["l2"].as<double>();
-  const unsigned memory_buffers = conf["memory_buffers"].as<unsigned>();
-  const double epsilon = conf["epsilon"].as<double>();
-  if (l1 < 0.0) {
-    cerr << "L1 strength must be >= 0\n";
-    return 1;
-  }
-  if (l2 < 0.0) {
-    cerr << "L2 strength must be >= 0\n";
-    return 2;
-  }
-
-  const bool is_continuous = conf.count("linear");
-  vector<string> labels; // only populated for non-continuous models
-  ReadLabeledInstances(xfile, yfile, is_continuous, &training, &labels);
-
-  if (conf.count("weights")) {
-    cerr << "Initial weights are not implemented, please implement." << endl;
-    // TODO read weights for categorical and continuous predictions
-    // can't use normal cdec weight framework
-    abort();
-  }
-
-  cerr << "         Number of features: " << FD::NumFeats() << endl;
-  cerr << "Number of training examples: " << training.size() << endl;
-  const unsigned p = FD::NumFeats();
-  cout.precision(15);
-
-  if (conf.count("linear")) {  // linear regression
-    vector<double> weights(1 + FD::NumFeats(), 0.0);
-    cerr << "       Number of parameters: " << weights.size() << endl;
-    UnivariateSquaredLoss loss(training, p, l2);
-    LearnParameters(loss, l1, 1, memory_buffers, epsilon, &weights);
-    cout << p << "\t***CONTINUOUS***" << endl;
-    cout << "***BIAS***\t" << weights[0] << endl;
-    for (unsigned f = 0; f < p; ++f) {
-      const double w = weights[1 + f];
-      if (w)
-        cout << FD::Convert(f) << "\t" << w << endl;
-    }
-  } else {                     // logistic regression
-    vector<double> weights((1 + FD::NumFeats()) * (labels.size() - 1), 0.0);
-    cerr << "       Number of parameters: " << weights.size() << endl;
-    cerr << "           Number of labels: " << labels.size() << endl;
-    const unsigned K = labels.size();
-    const unsigned km1 = K - 1;
-    MulticlassLogLoss loss(training, K, p, l2);
-    LearnParameters(loss, l1, km1, memory_buffers, epsilon, &weights);
-
-    cout << p << "\t***CATEGORICAL***";
-    for (unsigned y = 0; y < K; ++y)
-      cout << '\t' << labels[y];
-    cout << endl;
-    for (unsigned y = 0; y < km1; ++y)
-      cout << labels[y] << "\t***BIAS***\t" << weights[y] << endl;
-    for (unsigned y = 0; y < km1; ++y) {
-      for (unsigned f = 0; f < p; ++f) {
-        const double w = weights[km1 + y * p + f];
-        if (w)
-          cout << labels[y] << "\t" << FD::Convert(f) << "\t" << w << endl;
-      }
-    }
-  }
-
-  return 0;
-}
-
diff --git a/training/liblbfgs/lbfgs++.h b/training/liblbfgs/lbfgs++.h
index 92ead955..2b40c19b 100644
--- a/training/liblbfgs/lbfgs++.h
+++ b/training/liblbfgs/lbfgs++.h
@@ -90,6 +90,7 @@ class LBFGS {
                              lbfgsfloatval_t *g,
                              const int n,
                              const lbfgsfloatval_t step) {
+      (void) x;
       (void) n;
       (void) step;
       if (!silence) { ec++; std::cerr << '.'; }
diff --git a/utils/Makefile.am b/utils/Makefile.am
index b7da0f06..46650c75 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -33,16 +33,12 @@ libutils_a_SOURCES = \
   sparse_vector.cc \
   timing_stats.cc \
   verbose.cc \
-  json_feature_map_lexer.cc \
   weights.cc
 
 if HAVE_CMPH
   libutils_a_SOURCES += perfect_hash.cc
 endif
 
-json_feature_map_lexer.cc: json_feature_map_lexer.ll
-	$(LEX) -s -8 -CF -o$@ $<
-
 phmt_SOURCES = phmt.cc
 ts_SOURCES = ts.cc
 m_test_SOURCES = m_test.cc
diff --git a/utils/json_feature_map_lexer.h b/utils/json_feature_map_lexer.h
deleted file mode 100644
index 3324aa29..00000000
--- a/utils/json_feature_map_lexer.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _RULE_LEXER_H_
-#define _RULE_LEXER_H_
-
-#include <iostream>
-#include <string>
-
-#include "sparse_vector.h"
-
-struct JSONFeatureMapLexer {
-  typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra);
-  static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra);
-};
-
-#endif
-
diff --git a/utils/json_feature_map_lexer.ll b/utils/json_feature_map_lexer.ll
deleted file mode 100644
index 372b52f5..00000000
--- a/utils/json_feature_map_lexer.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-%option nounput
-%{
-
-#include "json_feature_map_lexer.h"
-#include "fdict.h"
-#include "fast_sparse_vector.h"
-
-#define YY_DECL int json_fmap_yylex (void)
-#undef YY_INPUT
-#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount())
-#define YY_SKIP_YYWRAP 1
-int yywrap() { return 1; }
-
-JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL;
-void* json_fmap_callback_extra = NULL;
-std::istream* jfmap_stream = NULL;
-bool fl = true;
-unsigned spos = 0;
-char featname[16000];
-#define MAX_FEATS 20000
-std::pair<int, float> featmap[MAX_FEATS];
-unsigned curfeat = 0;
-std::string instid;
-
-inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) {
-  uint32_t cp;
-  if((w1 & 0xfc00) == 0xd800) {
-    if((w2 & 0xfc00) == 0xdc00) {
-      cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff));
-    } else {
-      abort();
-    }
-  } else {
-    cp = w1;
-  }
-  
-  
-  if(cp < 0x80) {
-    putf8[0] = static_cast<char>(cp);
-    return 1;
-  } else if(cp < 0x0800) {
-    putf8[0] = 0xc0 | ((cp >> 6) & 0x1f);
-    putf8[1] = 0x80 | (cp & 0x3f);
-    return 2;
-  } else if(cp < 0x10000) {
-    putf8[0] = 0xe0 | ((cp >> 6) & 0x0f);
-    putf8[1] = 0x80 | ((cp >> 6) & 0x3f);
-    putf8[2] = 0x80 | (cp & 0x3f);
-    return 3;
-  } else if(cp < 0x1fffff) {
-    putf8[0] = 0xf0 | ((cp >> 18) & 0x07);
-    putf8[1] = 0x80 | ((cp >> 12) & 0x3f);
-    putf8[2] = 0x80 | ((cp >> 6) & 0x3f);
-    putf8[3] = 0x80 | (cp & 0x3f);
-    return 4;
-  } else {
-    abort();
-  } 
-  return 0;
-}
-
-%}
-
-ID [A-Za-z_0-9]+
-HEX_D [a-fA-F0-9]
-INT [-]?[0-9]+
-DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?)
-WS [ \t\r\n]
-LCB [{]
-RCB [}]
-UNESCAPED_CH [^\"\\\b\n\r\f\t]
-
-%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE
-%%
-
-<INITIAL>{ID}                            { instid = yytext; BEGIN(JSON); }
-
-<JSON>{WS}*{LCB}{WS}*                    { BEGIN(PREVAL); }
-
-<PREVAL>\"                               { BEGIN(STRING); spos=0; }
-
-<STRING>\"                               { featname[spos] = 0;
-                                           featmap[curfeat].first = FD::Convert(featname);
-                                           BEGIN(JSONVAL);
-                                         }
-<STRING>{UNESCAPED_CH}                   { featname[spos++] = yytext[0]; }
-<STRING>\\\"                             { featname[spos++] = '"'; }
-<STRING>\\\\                             { featname[spos++] = '\\'; }
-<STRING>\\\/                             { featname[spos++] = '/'; }
-<STRING>\\b                              { }
-<STRING>\\f                              { }
-<STRING>\\n                              { }
-<STRING>\\r                              { }
-<STRING>\\t                              { }
-<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D}  { abort();
-                                         }
-
-<JSONVAL>{WS}*:{WS}*                     { BEGIN(DOUBLE); }
-<DOUBLE>{DOUBLE}                         { featmap[curfeat++].second = strtod(yytext, 0);
-                                           BEGIN(POSTVAL); }
-
-<POSTVAL>{WS}*,{WS}*                     { BEGIN(PREVAL); }
-<POSTVAL>{WS}*{RCB}\n*                   {
-                                           const SparseVector<float> x(&featmap[0], &featmap[curfeat]);
-                                           json_fmap_callback(instid, x, json_fmap_callback_extra);
-                                           curfeat = 0;
-                                           BEGIN(INITIAL);
-                                         }
-
-<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); }
-
-%%
-
-void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) {
-  json_fmap_callback = func;
-  json_fmap_callback_extra = extra;
-  jfmap_stream = in;
-  json_fmap_yylex();
-}
-
-#if 0
-void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) {
-  (void) extra;
-  static int cc = 0;
-  cc++;
-}
-
-int main() {
-  JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
-}
-#endif
-
-- 
cgit v1.2.3