summaryrefslogtreecommitdiff
path: root/creg
diff options
context:
space:
mode:
Diffstat (limited to 'creg')
-rw-r--r--creg/Makefile.am11
-rw-r--r--creg/README7
-rw-r--r--creg/creg.cc334
-rw-r--r--creg/json_feature_map_lexer.h15
-rw-r--r--creg/json_feature_map_lexer.ll132
-rw-r--r--creg/test_data/iris.testfeat50
-rw-r--r--creg/test_data/iris.testresp50
-rw-r--r--creg/test_data/iris.trainfeat100
-rw-r--r--creg/test_data/iris.trainresp100
9 files changed, 799 insertions, 0 deletions
diff --git a/creg/Makefile.am b/creg/Makefile.am
new file mode 100644
index 00000000..9e25b838
--- /dev/null
+++ b/creg/Makefile.am
@@ -0,0 +1,11 @@
+bin_PROGRAMS = \
+ creg
+
+creg_SOURCES = creg.cc json_feature_map_lexer.cc
+creg_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
+
+json_feature_map_lexer.cc: json_feature_map_lexer.ll
+ $(LEX) -s -8 -CF -o$@ $<
+
+AM_CPPFLAGS = -W -Wall -DNDEBUG -I$(top_srcdir)/utils -I$(top_srcdir)/training
+
diff --git a/creg/README b/creg/README
new file mode 100644
index 00000000..2c04c83b
--- /dev/null
+++ b/creg/README
@@ -0,0 +1,7 @@
+creg is a fast tool for training linear and logistic regression models with
+l_1 and l_2 regularization. Its data (feature and response) format is compatible
+with ARKRegression.
+
+Example invokation:
+$ ./creg -x test_data/iris.trainfeat -y test_data/iris.trainresp --l2 100
+
diff --git a/creg/creg.cc b/creg/creg.cc
new file mode 100644
index 00000000..43f01bc4
--- /dev/null
+++ b/creg/creg.cc
@@ -0,0 +1,334 @@
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <tr1/unordered_map>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "json_feature_map_lexer.h"
+#include "prob.h"
+#include "filelib.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "liblbfgs/lbfgs++.h"
+
+using namespace std;
+using namespace std::tr1;
+namespace po = boost::program_options;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+ po::options_description opts("Configuration options");
+ opts.add_options()
+ ("training_features,x", po::value<string>(), "File containing training instance features (ARKRegression format)")
+ ("training_responses,y", po::value<string>(), "File containing training response features (ARKRegression format)")
+ ("linear,n", "Linear (rather than logistic) regression")
+ ("l1",po::value<double>()->default_value(0.0), "l_1 regularization strength")
+ ("l2",po::value<double>()->default_value(0.0), "l_2 regularization strength")
+ ("weights,w", po::value<string>(), "Initial weights")
+ ("epsilon,e", po::value<double>()->default_value(1e-4), "Epsilon for convergence test. Terminates when ||g|| < epsilon * max(1, ||w||)")
+ ("memory_buffers,m",po::value<unsigned>()->default_value(40), "Number of memory buffers for LBFGS")
+ ("help,h", "Help");
+ po::options_description dcmdline_options;
+ dcmdline_options.add(opts);
+ po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+ if (conf->count("help") || !conf->count("training_features") || !conf->count("training_responses")) {
+ cerr << dcmdline_options << endl;
+ exit(1);
+ }
+}
+
+struct TrainingInstance {
+ SparseVector<float> x;
+ union {
+ unsigned label; // for categorical predictions
+ float value; // for continuous predictions
+ } y;
+};
+
+struct ReaderHelper {
+ explicit ReaderHelper(vector<TrainingInstance>* xyp) : xy_pairs(xyp), lc(), flag() {}
+ unordered_map<string, unsigned> id2ind;
+ vector<TrainingInstance>* xy_pairs;
+ int lc;
+ bool flag;
+};
+
+void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) {
+ ReaderHelper& rh = *reinterpret_cast<ReaderHelper*>(extra);
+ ++rh.lc;
+ if (rh.lc % 1000 == 0) { cerr << '.'; rh.flag = true; }
+ if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; }
+ const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id);
+ if (it == rh.id2ind.end()) {
+ cerr << "Unlabeled example in line " << rh.lc << endl;
+ abort();
+ }
+ (*rh.xy_pairs)[it->second - 1].x = fmap;
+}
+
+void ReadLabeledInstances(const string& ffeats,
+ const string& fresp,
+ const bool is_continuous,
+ vector<TrainingInstance>* xy_pairs,
+ vector<string>* labels) {
+ bool flag = false;
+ xy_pairs->clear();
+ int lc = 0;
+ ReaderHelper rh(xy_pairs);
+ unordered_map<string, unsigned> label2id;
+ cerr << "Reading training responses from " << fresp << " ..." << endl;
+ ReadFile fr(fresp);
+ for (unsigned i = 0; i < labels->size(); ++i)
+ label2id[(*labels)[i]] = i;
+ istream& in = *fr.stream();
+ string line;
+ while(getline(in, line)) {
+ ++lc;
+ if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+ if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
+ if (line.size() == 0) continue;
+ if (line[0] == '#') continue;
+ unsigned p = 0;
+ while (p < line.size() && line[p] != ' ' && line[p] != '\t') { ++p; }
+ unsigned& ind = rh.id2ind[line.substr(0, p)];
+ if (ind != 0) { cerr << "ID " << line.substr(0, p) << " duplicated in line " << lc << endl; abort(); }
+ while (p < line.size() && (line[p] == ' ' || line[p] == '\t')) { ++p; }
+ assert(p < line.size());
+ xy_pairs->push_back(TrainingInstance());
+ ind = xy_pairs->size();
+ if (is_continuous) {
+ xy_pairs->back().y.value = strtof(&line[p], 0);
+ } else { // categorical predictions
+ unordered_map<string, unsigned>::iterator it = label2id.find(line.substr(p));
+ if (it == label2id.end()) {
+ const string label = line.substr(p);
+ it = label2id.insert(make_pair(label, labels->size())).first;
+ labels->push_back(label);
+ }
+ xy_pairs->back().y.label = it->second; // label id
+ }
+ }
+ if (flag) cerr << endl;
+ if (!is_continuous) {
+ cerr << "LABELS:";
+ for (unsigned j = 0; j < labels->size(); ++j)
+ cerr << " " << (*labels)[j];
+ cerr << endl;
+ }
+ cerr << "Reading training features from " << ffeats << " ..." << endl;
+ ReadFile ff(ffeats);
+ JSONFeatureMapLexer::ReadRules(ff.stream(), ReaderCB, &rh);
+ if (rh.flag) cerr << endl;
+}
+
+// helper base class (not polymorphic- just a container and some helper functions) for loss functions
+// real loss functions should implement double operator()(const vector<double>& x, double* g),
+// which should evaluate f(x) and g = f'(x)
+struct BaseLoss {
+ // dimp1 = number of categorial outputs possible for logistic regression
+ // for linear regression, it should be 1 more than the dimension of the response variable
+ BaseLoss(
+ const vector<TrainingInstance>& tr,
+ unsigned dimp1,
+ unsigned numfeats,
+ unsigned ll2) : training(tr), K(dimp1), p(numfeats), l2(ll2) {}
+
+ // weight vector layout for K classes, with p features
+ // w[0 : K-1] = bias weights
+ // w[y*p + K : y*p + K + p - 1] = feature weights for y^th class
+ // this representation is used in ComputeDotProducts and GradAdd
+ void ComputeDotProducts(const SparseVector<float>& fx, // feature vector of x
+ const vector<double>& w, // full weight vector
+ vector<double>* pdotprods) const {
+ vector<double>& dotprods = *pdotprods;
+ const unsigned km1 = K - 1;
+ dotprods.resize(km1);
+ for (unsigned y = 0; y < km1; ++y)
+ dotprods[y] = w[y]; // bias terms
+ for (SparseVector<float>::const_iterator it = fx.begin(); it != fx.end(); ++it) {
+ const float fval = it->second;
+ const unsigned fid = it->first;
+ for (unsigned y = 0; y < km1; ++y)
+ dotprods[y] += w[fid + y * p + km1] * fval;
+ }
+ }
+
+ double ApplyRegularizationTerms(const vector<double>& weights,
+ double* g) const {
+ double reg = 0;
+ for (size_t i = K - 1; i < weights.size(); ++i) {
+ const double& w_i = weights[i];
+ reg += l2 * w_i * w_i;
+ g[i] += 2 * l2 * w_i;
+ }
+ return reg;
+ }
+
+ void GradAdd(const SparseVector<float>& fx,
+ const unsigned y,
+ const double scale,
+ double* acc) const {
+ acc[y] += scale; // class bias
+ for (SparseVector<float>::const_iterator it = fx.begin();
+ it != fx.end(); ++it)
+ acc[it->first + y * p + K - 1] += it->second * scale;
+ }
+
+ const vector<TrainingInstance>& training;
+ const unsigned K, p;
+ const double l2;
+};
+
+struct UnivariateSquaredLoss : public BaseLoss {
+ UnivariateSquaredLoss(
+ const vector<TrainingInstance>& tr,
+ unsigned numfeats,
+ const double l2) : BaseLoss(tr, 2, numfeats, l2) {}
+
+ // evaluate squared loss and gradient
+ double operator()(const vector<double>& x, double* g) const {
+ fill(g, g + x.size(), 0.0);
+ double cll = 0;
+ vector<double> dotprods(1); // univariate prediction
+ for (unsigned i = 0; i < training.size(); ++i) {
+ const SparseVector<float>& fmapx = training[i].x;
+ const double refy = training[i].y.value;
+ ComputeDotProducts(fmapx, x, &dotprods);
+ double diff = dotprods[0] - refy;
+ cll += diff * diff;
+
+ double scale = 2 * diff;
+ GradAdd(fmapx, 0, scale, g);
+ }
+ double reg = ApplyRegularizationTerms(x, g);
+ return cll + reg;
+ }
+};
+
+struct MulticlassLogLoss : public BaseLoss {
+ MulticlassLogLoss(
+ const vector<TrainingInstance>& tr,
+ unsigned k,
+ unsigned numfeats,
+ const double l2) : BaseLoss(tr, k, numfeats, l2) {}
+
+ // evaluate log loss and gradient
+ double operator()(const vector<double>& x, double* g) const {
+ fill(g, g + x.size(), 0.0);
+ vector<double> dotprods(K - 1); // K-1 degrees of freedom
+ vector<prob_t> probs(K);
+ double cll = 0;
+ for (unsigned i = 0; i < training.size(); ++i) {
+ const SparseVector<float>& fmapx = training[i].x;
+ const unsigned refy = training[i].y.label;
+ //cerr << "FMAP: " << fmapx << endl;
+ ComputeDotProducts(fmapx, x, &dotprods);
+ prob_t z;
+ for (unsigned j = 0; j < dotprods.size(); ++j)
+ z += (probs[j] = prob_t(dotprods[j], init_lnx()));
+ z += (probs.back() = prob_t::One());
+ for (unsigned y = 0; y < probs.size(); ++y) {
+ probs[y] /= z;
+ //cerr << " p(y=" << y << ")=" << probs[y].as_float() << "\tz=" << z << endl;
+ }
+ cll -= log(probs[refy]); // log p(y | x)
+
+ for (unsigned y = 0; y < dotprods.size(); ++y) {
+ double scale = probs[y].as_float();
+ if (y == refy) { scale -= 1.0; }
+ GradAdd(fmapx, y, scale, g);
+ }
+ }
+ double reg = ApplyRegularizationTerms(x, g);
+ return cll + reg;
+ }
+};
+
+template <class LossFunction>
+double LearnParameters(LossFunction& loss,
+ const double l1,
+ const unsigned l1_start,
+ const unsigned memory_buffers,
+ const double eps,
+ vector<double>* px) {
+ LBFGS<LossFunction> lbfgs(px, loss, memory_buffers, l1, l1_start, eps);
+ lbfgs.MinimizeFunction();
+ return 0;
+}
+
+int main(int argc, char** argv) {
+ po::variables_map conf;
+ InitCommandLine(argc, argv, &conf);
+ string line;
+ vector<TrainingInstance> training;
+ const string xfile = conf["training_features"].as<string>();
+ const string yfile = conf["training_responses"].as<string>();
+ double l1 = conf["l1"].as<double>();
+ double l2 = conf["l2"].as<double>();
+ const unsigned memory_buffers = conf["memory_buffers"].as<unsigned>();
+ const double epsilon = conf["epsilon"].as<double>();
+ if (l1 < 0.0) {
+ cerr << "L1 strength must be >= 0\n";
+ return 1;
+ }
+ if (l2 < 0.0) {
+ cerr << "L2 strength must be >= 0\n";
+ return 2;
+ }
+
+ const bool is_continuous = conf.count("linear");
+ vector<string> labels; // only populated for non-continuous models
+ ReadLabeledInstances(xfile, yfile, is_continuous, &training, &labels);
+
+ if (conf.count("weights")) {
+ cerr << "Initial weights are not implemented, please implement." << endl;
+ // TODO read weights for categorical and continuous predictions
+ // can't use normal cdec weight framework
+ abort();
+ }
+
+ cerr << " Number of features: " << FD::NumFeats() << endl;
+ cerr << "Number of training examples: " << training.size() << endl;
+ const unsigned p = FD::NumFeats();
+ cout.precision(15);
+
+ if (conf.count("linear")) { // linear regression
+ vector<double> weights(1 + FD::NumFeats(), 0.0);
+ cerr << " Number of parameters: " << weights.size() << endl;
+ UnivariateSquaredLoss loss(training, p, l2);
+ LearnParameters(loss, l1, 1, memory_buffers, epsilon, &weights);
+ cout << p << "\t***CONTINUOUS***" << endl;
+ cout << "***BIAS***\t" << weights[0] << endl;
+ for (unsigned f = 0; f < p; ++f) {
+ const double w = weights[1 + f];
+ if (w)
+ cout << FD::Convert(f) << "\t" << w << endl;
+ }
+ } else { // logistic regression
+ vector<double> weights((1 + FD::NumFeats()) * (labels.size() - 1), 0.0);
+ cerr << " Number of parameters: " << weights.size() << endl;
+ cerr << " Number of labels: " << labels.size() << endl;
+ const unsigned K = labels.size();
+ const unsigned km1 = K - 1;
+ MulticlassLogLoss loss(training, K, p, l2);
+ LearnParameters(loss, l1, km1, memory_buffers, epsilon, &weights);
+
+ cout << p << "\t***CATEGORICAL***";
+ for (unsigned y = 0; y < K; ++y)
+ cout << '\t' << labels[y];
+ cout << endl;
+ for (unsigned y = 0; y < km1; ++y)
+ cout << labels[y] << "\t***BIAS***\t" << weights[y] << endl;
+ for (unsigned y = 0; y < km1; ++y) {
+ for (unsigned f = 0; f < p; ++f) {
+ const double w = weights[km1 + y * p + f];
+ if (w)
+ cout << labels[y] << "\t" << FD::Convert(f) << "\t" << w << endl;
+ }
+ }
+ }
+
+ return 0;
+}
+
diff --git a/creg/json_feature_map_lexer.h b/creg/json_feature_map_lexer.h
new file mode 100644
index 00000000..3324aa29
--- /dev/null
+++ b/creg/json_feature_map_lexer.h
@@ -0,0 +1,15 @@
+#ifndef _RULE_LEXER_H_
+#define _RULE_LEXER_H_
+
+#include <iostream>
+#include <string>
+
+#include "sparse_vector.h"
+
+struct JSONFeatureMapLexer {
+ typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra);
+ static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra);
+};
+
+#endif
+
diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll
new file mode 100644
index 00000000..372b52f5
--- /dev/null
+++ b/creg/json_feature_map_lexer.ll
@@ -0,0 +1,132 @@
+%option nounput
+%{
+
+#include "json_feature_map_lexer.h"
+#include "fdict.h"
+#include "fast_sparse_vector.h"
+
+#define YY_DECL int json_fmap_yylex (void)
+#undef YY_INPUT
+#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount())
+#define YY_SKIP_YYWRAP 1
+int yywrap() { return 1; }
+
+JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL;
+void* json_fmap_callback_extra = NULL;
+std::istream* jfmap_stream = NULL;
+bool fl = true;
+unsigned spos = 0;
+char featname[16000];
+#define MAX_FEATS 20000
+std::pair<int, float> featmap[MAX_FEATS];
+unsigned curfeat = 0;
+std::string instid;
+
+inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) {
+ uint32_t cp;
+ if((w1 & 0xfc00) == 0xd800) {
+ if((w2 & 0xfc00) == 0xdc00) {
+ cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff));
+ } else {
+ abort();
+ }
+ } else {
+ cp = w1;
+ }
+
+
+ if(cp < 0x80) {
+ putf8[0] = static_cast<char>(cp);
+ return 1;
+ } else if(cp < 0x0800) {
+ putf8[0] = 0xc0 | ((cp >> 6) & 0x1f);
+ putf8[1] = 0x80 | (cp & 0x3f);
+ return 2;
+ } else if(cp < 0x10000) {
+ putf8[0] = 0xe0 | ((cp >> 6) & 0x0f);
+ putf8[1] = 0x80 | ((cp >> 6) & 0x3f);
+ putf8[2] = 0x80 | (cp & 0x3f);
+ return 3;
+ } else if(cp < 0x1fffff) {
+ putf8[0] = 0xf0 | ((cp >> 18) & 0x07);
+ putf8[1] = 0x80 | ((cp >> 12) & 0x3f);
+ putf8[2] = 0x80 | ((cp >> 6) & 0x3f);
+ putf8[3] = 0x80 | (cp & 0x3f);
+ return 4;
+ } else {
+ abort();
+ }
+ return 0;
+}
+
+%}
+
+ID [A-Za-z_0-9]+
+HEX_D [a-fA-F0-9]
+INT [-]?[0-9]+
+DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?)
+WS [ \t\r\n]
+LCB [{]
+RCB [}]
+UNESCAPED_CH [^\"\\\b\n\r\f\t]
+
+%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE
+%%
+
+<INITIAL>{ID} { instid = yytext; BEGIN(JSON); }
+
+<JSON>{WS}*{LCB}{WS}* { BEGIN(PREVAL); }
+
+<PREVAL>\" { BEGIN(STRING); spos=0; }
+
+<STRING>\" { featname[spos] = 0;
+ featmap[curfeat].first = FD::Convert(featname);
+ BEGIN(JSONVAL);
+ }
+<STRING>{UNESCAPED_CH} { featname[spos++] = yytext[0]; }
+<STRING>\\\" { featname[spos++] = '"'; }
+<STRING>\\\\ { featname[spos++] = '\\'; }
+<STRING>\\\/ { featname[spos++] = '/'; }
+<STRING>\\b { }
+<STRING>\\f { }
+<STRING>\\n { }
+<STRING>\\r { }
+<STRING>\\t { }
+<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D} { abort();
+ }
+
+<JSONVAL>{WS}*:{WS}* { BEGIN(DOUBLE); }
+<DOUBLE>{DOUBLE} { featmap[curfeat++].second = strtod(yytext, 0);
+ BEGIN(POSTVAL); }
+
+<POSTVAL>{WS}*,{WS}* { BEGIN(PREVAL); }
+<POSTVAL>{WS}*{RCB}\n* {
+ const SparseVector<float> x(&featmap[0], &featmap[curfeat]);
+ json_fmap_callback(instid, x, json_fmap_callback_extra);
+ curfeat = 0;
+ BEGIN(INITIAL);
+ }
+
+<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); }
+
+%%
+
+void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) {
+ json_fmap_callback = func;
+ json_fmap_callback_extra = extra;
+ jfmap_stream = in;
+ json_fmap_yylex();
+}
+
+#if 0
+void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) {
+ (void) extra;
+ static int cc = 0;
+ cc++;
+}
+
+int main() {
+ JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL);
+}
+#endif
+
diff --git a/creg/test_data/iris.testfeat b/creg/test_data/iris.testfeat
new file mode 100644
index 00000000..f7528f81
--- /dev/null
+++ b/creg/test_data/iris.testfeat
@@ -0,0 +1,50 @@
+100 {"sepal-length": 4.9, "sepal-width": 2.5, "petal-length": 4.5, "petal-width": 1.7}
+101 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.0}
+102 {"sepal-length": 4.4, "sepal-width": 3.0, "petal-length": 1.3, "petal-width": 0.2}
+103 {"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2}
+104 {"sepal-length": 5.0, "sepal-width": 3.0, "petal-length": 1.6, "petal-width": 0.2}
+105 {"sepal-length": 5.1, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2}
+106 {"sepal-length": 5.5, "sepal-width": 2.3, "petal-length": 4.0, "petal-width": 1.3}
+107 {"sepal-length": 5.5, "sepal-width": 2.6, "petal-length": 4.4, "petal-width": 1.2}
+108 {"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.7, "petal-width": 0.2}
+109 {"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.7, "petal-width": 1.0}
+110 {"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.0, "petal-width": 1.7}
+111 {"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.2}
+112 {"sepal-length": 5.5, "sepal-width": 4.2, "petal-length": 1.4, "petal-width": 0.2}
+113 {"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.5}
+114 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+115 {"sepal-length": 7.7, "sepal-width": 2.6, "petal-length": 6.9, "petal-width": 2.3}
+116 {"sepal-length": 5.0, "sepal-width": 3.6, "petal-length": 1.4, "petal-width": 0.2}
+117 {"sepal-length": 6.3, "sepal-width": 2.3, "petal-length": 4.4, "petal-width": 1.3}
+118 {"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.1}
+119 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9}
+120 {"sepal-length": 5.2, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.4}
+121 {"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.6, "petal-width": 0.6}
+122 {"sepal-length": 5.0, "sepal-width": 3.2, "petal-length": 1.2, "petal-width": 0.2}
+123 {"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.3}
+124 {"sepal-length": 5.5, "sepal-width": 2.5, "petal-length": 4.0, "petal-width": 1.3}
+125 {"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5}
+126 {"sepal-length": 6.6, "sepal-width": 3.0, "petal-length": 4.4, "petal-width": 1.4}
+127 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.6, "petal-width": 0.2}
+128 {"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 5.1, "petal-width": 1.8}
+129 {"sepal-length": 6.2, "sepal-width": 3.4, "petal-length": 5.4, "petal-width": 2.3}
+130 {"sepal-length": 5.6, "sepal-width": 2.8, "petal-length": 4.9, "petal-width": 2.0}
+131 {"sepal-length": 5.7, "sepal-width": 2.9, "petal-length": 4.2, "petal-width": 1.3}
+132 {"sepal-length": 6.2, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3}
+133 {"sepal-length": 6.0, "sepal-width": 3.4, "petal-length": 4.5, "petal-width": 1.6}
+134 {"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.7, "petal-width": 0.4}
+135 {"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 6.0, "petal-width": 2.5}
+136 {"sepal-length": 6.5, "sepal-width": 3.2, "petal-length": 5.1, "petal-width": 2.0}
+137 {"sepal-length": 5.1, "sepal-width": 2.5, "petal-length": 3.0, "petal-width": 1.1}
+138 {"sepal-length": 4.3, "sepal-width": 3.0, "petal-length": 1.1, "petal-width": 0.1}
+139 {"sepal-length": 5.7, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 2.0}
+140 {"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 5.0, "petal-width": 1.5}
+141 {"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 5.3, "petal-width": 2.3}
+142 {"sepal-length": 6.5, "sepal-width": 2.8, "petal-length": 4.6, "petal-width": 1.5}
+143 {"sepal-length": 5.5, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.2}
+144 {"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2}
+145 {"sepal-length": 4.6, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.3}
+146 {"sepal-length": 5.7, "sepal-width": 2.6, "petal-length": 3.5, "petal-width": 1.0}
+147 {"sepal-length": 5.8, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 2.4}
+148 {"sepal-length": 7.7, "sepal-width": 2.8, "petal-length": 6.7, "petal-width": 2.0}
+149 {"sepal-length": 6.3, "sepal-width": 2.9, "petal-length": 5.6, "petal-width": 1.8}
diff --git a/creg/test_data/iris.testresp b/creg/test_data/iris.testresp
new file mode 100644
index 00000000..0952e4da
--- /dev/null
+++ b/creg/test_data/iris.testresp
@@ -0,0 +1,50 @@
+100 Iris-virginica
+101 Iris-virginica
+102 Iris-setosa
+103 Iris-setosa
+104 Iris-setosa
+105 Iris-setosa
+106 Iris-versicolor
+107 Iris-versicolor
+108 Iris-setosa
+109 Iris-versicolor
+110 Iris-versicolor
+111 Iris-virginica
+112 Iris-setosa
+113 Iris-versicolor
+114 Iris-setosa
+115 Iris-virginica
+116 Iris-setosa
+117 Iris-versicolor
+118 Iris-virginica
+119 Iris-virginica
+120 Iris-versicolor
+121 Iris-setosa
+122 Iris-setosa
+123 Iris-virginica
+124 Iris-versicolor
+125 Iris-versicolor
+126 Iris-versicolor
+127 Iris-setosa
+128 Iris-virginica
+129 Iris-virginica
+130 Iris-virginica
+131 Iris-versicolor
+132 Iris-versicolor
+133 Iris-versicolor
+134 Iris-setosa
+135 Iris-virginica
+136 Iris-virginica
+137 Iris-versicolor
+138 Iris-setosa
+139 Iris-virginica
+140 Iris-virginica
+141 Iris-virginica
+142 Iris-versicolor
+143 Iris-setosa
+144 Iris-setosa
+145 Iris-setosa
+146 Iris-versicolor
+147 Iris-virginica
+148 Iris-virginica
+149 Iris-virginica
diff --git a/creg/test_data/iris.trainfeat b/creg/test_data/iris.trainfeat
new file mode 100644
index 00000000..a930a446
--- /dev/null
+++ b/creg/test_data/iris.trainfeat
@@ -0,0 +1,100 @@
+0 {"sepal-length": 5.4, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5}
+1 {"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.4}
+2 {"sepal-length": 5.0, "sepal-width": 3.3, "petal-length": 1.4, "petal-width": 0.2}
+3 {"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.5, "petal-width": 1.3}
+4 {"sepal-length": 6.4, "sepal-width": 3.1, "petal-length": 5.5, "petal-width": 1.8}
+5 {"sepal-length": 7.9, "sepal-width": 3.8, "petal-length": 6.4, "petal-width": 2.0}
+6 {"sepal-length": 5.9, "sepal-width": 3.2, "petal-length": 4.8, "petal-width": 1.8}
+7 {"sepal-length": 6.7, "sepal-width": 2.5, "petal-length": 5.8, "petal-width": 1.8}
+8 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.4, "petal-width": 1.4}
+9 {"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 4.9, "petal-width": 1.5}
+10 {"sepal-length": 6.1, "sepal-width": 2.9, "petal-length": 4.7, "petal-width": 1.4}
+11 {"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 4.7, "petal-width": 1.6}
+12 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.7, "petal-width": 1.5}
+13 {"sepal-length": 6.2, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.8}
+14 {"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.3}
+15 {"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.3, "petal-width": 0.4}
+16 {"sepal-length": 7.4, "sepal-width": 2.8, "petal-length": 6.1, "petal-width": 1.9}
+17 {"sepal-length": 7.2, "sepal-width": 3.2, "petal-length": 6.0, "petal-width": 1.8}
+18 {"sepal-length": 5.7, "sepal-width": 3.8, "petal-length": 1.7, "petal-width": 0.3}
+19 {"sepal-length": 4.5, "sepal-width": 2.3, "petal-length": 1.3, "petal-width": 0.3}
+20 {"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.1, "petal-width": 1.3}
+21 {"sepal-length": 6.8, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 2.1}
+22 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 2.2}
+23 {"sepal-length": 4.4, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2}
+24 {"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 1.9}
+25 {"sepal-length": 4.4, "sepal-width": 2.9, "petal-length": 1.4, "petal-width": 0.2}
+26 {"sepal-length": 4.9, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.2}
+27 {"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.4}
+28 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.2}
+29 {"sepal-length": 5.6, "sepal-width": 2.5, "petal-length": 3.9, "petal-width": 1.1}
+30 {"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.3}
+31 {"sepal-length": 5.6, "sepal-width": 2.7, "petal-length": 4.2, "petal-width": 1.3}
+32 {"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.2}
+33 {"sepal-length": 6.4, "sepal-width": 2.7, "petal-length": 5.3, "petal-width": 1.9}
+34 {"sepal-length": 5.8, "sepal-width": 4.0, "petal-length": 1.2, "petal-width": 0.2}
+35 {"sepal-length": 5.2, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.2}
+36 {"sepal-length": 7.6, "sepal-width": 3.0, "petal-length": 6.6, "petal-width": 2.1}
+37 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9}
+38 {"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 4.0, "petal-width": 1.0}
+39 {"sepal-length": 7.7, "sepal-width": 3.0, "petal-length": 6.1, "petal-width": 2.3}
+40 {"sepal-length": 5.1, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.4}
+41 {"sepal-length": 6.1, "sepal-width": 2.6, "petal-length": 5.6, "petal-width": 1.4}
+42 {"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 5.6, "petal-width": 2.4}
+43 {"sepal-length": 7.7, "sepal-width": 3.8, "petal-length": 6.7, "petal-width": 2.2}
+44 {"sepal-length": 5.1, "sepal-width": 3.3, "petal-length": 1.7, "petal-width": 0.5}
+45 {"sepal-length": 6.3, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 1.5}
+46 {"sepal-length": 5.0, "sepal-width": 2.0, "petal-length": 3.5, "petal-width": 1.0}
+47 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.5, "petal-width": 0.3}
+48 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+49 {"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.9, "petal-width": 1.8}
+50 {"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.1}
+51 {"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 1.8}
+52 {"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.7, "petal-width": 1.2}
+53 {"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.0, "petal-width": 1.3}
+54 {"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1}
+55 {"sepal-length": 6.8, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.4}
+56 {"sepal-length": 6.3, "sepal-width": 2.7, "petal-length": 4.9, "petal-width": 1.8}
+57 {"sepal-length": 4.6, "sepal-width": 3.2, "petal-length": 1.4, "petal-width": 0.2}
+58 {"sepal-length": 6.3, "sepal-width": 3.4, "petal-length": 5.6, "petal-width": 2.4}
+59 {"sepal-length": 5.7, "sepal-width": 4.4, "petal-length": 1.5, "petal-width": 0.4}
+60 {"sepal-length": 6.4, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3}
+61 {"sepal-length": 7.2, "sepal-width": 3.6, "petal-length": 6.1, "petal-width": 2.5}
+62 {"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 4.1, "petal-width": 1.0}
+63 {"sepal-length": 6.0, "sepal-width": 3.0, "petal-length": 4.8, "petal-width": 1.8}
+64 {"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.6, "petal-width": 0.2}
+65 {"sepal-length": 6.9, "sepal-width": 3.2, "petal-length": 5.7, "petal-width": 2.3}
+66 {"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 4.5, "petal-width": 1.5}
+67 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.4, "petal-width": 2.1}
+68 {"sepal-length": 5.2, "sepal-width": 3.5, "petal-length": 1.5, "petal-width": 0.2}
+69 {"sepal-length": 5.3, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2}
+70 {"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.8, "petal-width": 1.1}
+71 {"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.9, "petal-width": 0.2}
+72 {"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.1, "petal-width": 1.3}
+73 {"sepal-length": 4.9, "sepal-width": 2.4, "petal-length": 3.3, "petal-width": 1.0}
+74 {"sepal-length": 6.2, "sepal-width": 2.2, "petal-length": 4.5, "petal-width": 1.5}
+75 {"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.5}
+76 {"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.6, "petal-width": 1.4}
+77 {"sepal-length": 4.6, "sepal-width": 3.6, "petal-length": 1.0, "petal-width": 0.2}
+78 {"sepal-length": 7.0, "sepal-width": 3.2, "petal-length": 4.7, "petal-width": 1.4}
+79 {"sepal-length": 6.6, "sepal-width": 2.9, "petal-length": 4.6, "petal-width": 1.3}
+80 {"sepal-length": 5.4, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2}
+81 {"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.3}
+82 {"sepal-length": 7.2, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 1.6}
+83 {"sepal-length": 7.1, "sepal-width": 3.0, "petal-length": 5.9, "petal-width": 2.1}
+84 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 4.9, "petal-width": 1.5}
+85 {"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.1}
+86 {"sepal-length": 7.3, "sepal-width": 2.9, "petal-length": 6.3, "petal-width": 1.8}
+87 {"sepal-length": 6.0, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.6}
+88 {"sepal-length": 6.8, "sepal-width": 3.2, "petal-length": 5.9, "petal-width": 2.3}
+89 {"sepal-length": 4.6, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.2}
+90 {"sepal-length": 4.8, "sepal-width": 3.1, "petal-length": 1.6, "petal-width": 0.2}
+91 {"sepal-length": 5.0, "sepal-width": 2.3, "petal-length": 3.3, "petal-width": 1.0}
+92 {"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.1, "petal-width": 2.3}
+93 {"sepal-length": 5.7, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.2}
+94 {"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.9, "petal-width": 0.4}
+95 {"sepal-length": 6.0, "sepal-width": 2.9, "petal-length": 4.5, "petal-width": 1.5}
+96 {"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.2}
+97 {"sepal-length": 5.2, "sepal-width": 4.1, "petal-length": 1.5, "petal-width": 0.1}
+98 {"sepal-length": 5.6, "sepal-width": 2.9, "petal-length": 3.6, "petal-width": 1.3}
+99 {"sepal-length": 5.8, "sepal-width": 2.6, "petal-length": 4.0, "petal-width": 1.2}
diff --git a/creg/test_data/iris.trainresp b/creg/test_data/iris.trainresp
new file mode 100644
index 00000000..d77bc6a2
--- /dev/null
+++ b/creg/test_data/iris.trainresp
@@ -0,0 +1,100 @@
+0 Iris-versicolor
+1 Iris-setosa
+2 Iris-setosa
+3 Iris-versicolor
+4 Iris-virginica
+5 Iris-virginica
+6 Iris-versicolor
+7 Iris-virginica
+8 Iris-versicolor
+9 Iris-versicolor
+10 Iris-versicolor
+11 Iris-versicolor
+12 Iris-versicolor
+13 Iris-virginica
+14 Iris-setosa
+15 Iris-setosa
+16 Iris-virginica
+17 Iris-virginica
+18 Iris-setosa
+19 Iris-setosa
+20 Iris-versicolor
+21 Iris-virginica
+22 Iris-virginica
+23 Iris-setosa
+24 Iris-virginica
+25 Iris-setosa
+26 Iris-setosa
+27 Iris-setosa
+28 Iris-versicolor
+29 Iris-versicolor
+30 Iris-setosa
+31 Iris-versicolor
+32 Iris-setosa
+33 Iris-virginica
+34 Iris-setosa
+35 Iris-setosa
+36 Iris-virginica
+37 Iris-virginica
+38 Iris-versicolor
+39 Iris-virginica
+40 Iris-setosa
+41 Iris-virginica
+42 Iris-virginica
+43 Iris-virginica
+44 Iris-setosa
+45 Iris-virginica
+46 Iris-versicolor
+47 Iris-setosa
+48 Iris-setosa
+49 Iris-virginica
+50 Iris-virginica
+51 Iris-virginica
+52 Iris-versicolor
+53 Iris-versicolor
+54 Iris-setosa
+55 Iris-versicolor
+56 Iris-virginica
+57 Iris-setosa
+58 Iris-virginica
+59 Iris-setosa
+60 Iris-versicolor
+61 Iris-virginica
+62 Iris-versicolor
+63 Iris-virginica
+64 Iris-setosa
+65 Iris-virginica
+66 Iris-versicolor
+67 Iris-virginica
+68 Iris-setosa
+69 Iris-setosa
+70 Iris-versicolor
+71 Iris-setosa
+72 Iris-versicolor
+73 Iris-versicolor
+74 Iris-versicolor
+75 Iris-virginica
+76 Iris-versicolor
+77 Iris-setosa
+78 Iris-versicolor
+79 Iris-versicolor
+80 Iris-setosa
+81 Iris-setosa
+82 Iris-virginica
+83 Iris-virginica
+84 Iris-versicolor
+85 Iris-setosa
+86 Iris-virginica
+87 Iris-versicolor
+88 Iris-virginica
+89 Iris-setosa
+90 Iris-setosa
+91 Iris-versicolor
+92 Iris-virginica
+93 Iris-versicolor
+94 Iris-setosa
+95 Iris-versicolor
+96 Iris-setosa
+97 Iris-setosa
+98 Iris-versicolor
+99 Iris-versicolor