diff options
Diffstat (limited to 'creg')
| -rw-r--r-- | creg/Makefile.am | 11 | ||||
| -rw-r--r-- | creg/README | 7 | ||||
| -rw-r--r-- | creg/creg.cc | 334 | ||||
| -rw-r--r-- | creg/json_feature_map_lexer.h | 15 | ||||
| -rw-r--r-- | creg/json_feature_map_lexer.ll | 132 | ||||
| -rw-r--r-- | creg/test_data/iris.testfeat | 50 | ||||
| -rw-r--r-- | creg/test_data/iris.testresp | 50 | ||||
| -rw-r--r-- | creg/test_data/iris.trainfeat | 100 | ||||
| -rw-r--r-- | creg/test_data/iris.trainresp | 100 | 
9 files changed, 799 insertions, 0 deletions
| diff --git a/creg/Makefile.am b/creg/Makefile.am new file mode 100644 index 00000000..9e25b838 --- /dev/null +++ b/creg/Makefile.am @@ -0,0 +1,11 @@ +bin_PROGRAMS = \ +  creg + +creg_SOURCES = creg.cc json_feature_map_lexer.cc +creg_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz + +json_feature_map_lexer.cc: json_feature_map_lexer.ll +	$(LEX) -s -8 -CF -o$@ $< + +AM_CPPFLAGS = -W -Wall -DNDEBUG -I$(top_srcdir)/utils -I$(top_srcdir)/training + diff --git a/creg/README b/creg/README new file mode 100644 index 00000000..2c04c83b --- /dev/null +++ b/creg/README @@ -0,0 +1,7 @@ +creg is a fast tool for training linear and logistic regression models with +l_1 and l_2 regularization. Its data (feature and response) format is compatible +with ARKRegression. + +Example invokation: +$ ./creg -x test_data/iris.trainfeat -y test_data/iris.trainresp --l2 100 + diff --git a/creg/creg.cc b/creg/creg.cc new file mode 100644 index 00000000..43f01bc4 --- /dev/null +++ b/creg/creg.cc @@ -0,0 +1,334 @@ +#include <cstdlib> +#include <iostream> +#include <vector> +#include <tr1/unordered_map> + +#include <boost/program_options.hpp> +#include <boost/program_options/variables_map.hpp> + +#include "json_feature_map_lexer.h" +#include "prob.h" +#include "filelib.h" +#include "weights.h" +#include "sparse_vector.h" +#include "liblbfgs/lbfgs++.h" + +using namespace std; +using namespace std::tr1; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +  po::options_description opts("Configuration options"); +  opts.add_options() +        ("training_features,x", po::value<string>(), "File containing training instance features (ARKRegression format)") +        ("training_responses,y", po::value<string>(), "File containing training response features (ARKRegression format)") +        ("linear,n", "Linear (rather than logistic) regression") +        ("l1",po::value<double>()->default_value(0.0), "l_1 regularization strength") +        ("l2",po::value<double>()->default_value(0.0), "l_2 regularization strength") +        ("weights,w", po::value<string>(), "Initial weights") +        ("epsilon,e", po::value<double>()->default_value(1e-4), "Epsilon for convergence test. Terminates when ||g|| < epsilon * max(1, ||w||)") +        ("memory_buffers,m",po::value<unsigned>()->default_value(40), "Number of memory buffers for LBFGS") +        ("help,h", "Help"); +  po::options_description dcmdline_options; +  dcmdline_options.add(opts); +  po::store(parse_command_line(argc, argv, dcmdline_options), *conf); +  if (conf->count("help") || !conf->count("training_features") || !conf->count("training_responses")) { +    cerr << dcmdline_options << endl; +    exit(1); +  } +} + +struct TrainingInstance { +  SparseVector<float> x; +  union { +    unsigned label;  // for categorical predictions +    float value;     // for continuous predictions +  } y; +}; + +struct ReaderHelper { +  explicit ReaderHelper(vector<TrainingInstance>* xyp) : xy_pairs(xyp), lc(), flag() {} +  unordered_map<string, unsigned> id2ind; +  vector<TrainingInstance>* xy_pairs; +  int lc; +  bool flag; +}; + +void ReaderCB(const string& id, const SparseVector<float>& fmap, void* extra) { +  ReaderHelper& rh = *reinterpret_cast<ReaderHelper*>(extra); +  ++rh.lc; +  if (rh.lc % 1000 == 0) { cerr << '.'; rh.flag = true; } +  if (rh.lc % 40000 == 0) { cerr << " [" << rh.lc << "]\n"; rh.flag = false; } +  const unordered_map<string, unsigned>::iterator it = rh.id2ind.find(id); +  if (it == rh.id2ind.end()) { +    cerr << "Unlabeled example in line " << rh.lc << endl; +    abort(); +  } +  (*rh.xy_pairs)[it->second - 1].x = fmap; +} + +void ReadLabeledInstances(const string& ffeats, +                 const string& fresp, +                 const bool is_continuous, +                 vector<TrainingInstance>* xy_pairs, +                 vector<string>* labels) { +  bool flag = false; +  xy_pairs->clear(); +  int lc = 0; +  ReaderHelper rh(xy_pairs); +  unordered_map<string, unsigned> label2id; +  cerr << "Reading training responses from " << fresp << " ..." << endl; +  ReadFile fr(fresp); +  for (unsigned i = 0; i < labels->size(); ++i) +    label2id[(*labels)[i]] = i; +  istream& in = *fr.stream(); +  string line; +  while(getline(in, line)) { +    ++lc; +    if (lc % 1000 == 0) { cerr << '.'; flag = true; } +    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } +    if (line.size() == 0) continue; +    if (line[0] == '#') continue; +    unsigned p = 0; +    while (p < line.size() && line[p] != ' ' && line[p] != '\t') { ++p; } +    unsigned& ind = rh.id2ind[line.substr(0, p)]; +    if (ind != 0) { cerr << "ID " << line.substr(0, p) << " duplicated in line " << lc << endl; abort(); } +    while (p < line.size() && (line[p] == ' ' || line[p] == '\t')) { ++p; } +    assert(p < line.size()); +    xy_pairs->push_back(TrainingInstance()); +    ind = xy_pairs->size(); +    if (is_continuous) { +      xy_pairs->back().y.value = strtof(&line[p], 0); +    } else { // categorical predictions +      unordered_map<string, unsigned>::iterator it = label2id.find(line.substr(p)); +      if (it == label2id.end()) { +        const string label = line.substr(p); +        it = label2id.insert(make_pair(label, labels->size())).first; +        labels->push_back(label); +      } +      xy_pairs->back().y.label = it->second;  // label id +    } +  } +  if (flag) cerr << endl; +  if (!is_continuous) { +    cerr << "LABELS:"; +    for (unsigned j = 0; j < labels->size(); ++j) +      cerr << " " << (*labels)[j]; +    cerr << endl; +  } +  cerr << "Reading training features from " << ffeats << " ..." << endl; +  ReadFile ff(ffeats); +  JSONFeatureMapLexer::ReadRules(ff.stream(), ReaderCB, &rh); +  if (rh.flag) cerr << endl; +} + +// helper base class (not polymorphic- just a container and some helper functions) for loss functions +// real loss functions should implement double operator()(const vector<double>& x, double* g), +// which should evaluate f(x) and g = f'(x) +struct BaseLoss { +  // dimp1 = number of categorial outputs possible for logistic regression +  // for linear regression, it should be 1 more than the dimension of the response variable +  BaseLoss( +      const vector<TrainingInstance>& tr, +      unsigned dimp1, +      unsigned numfeats, +      unsigned ll2) : training(tr), K(dimp1), p(numfeats), l2(ll2) {} + +  // weight vector layout for K classes, with p features +  //   w[0 : K-1] = bias weights +  //   w[y*p + K : y*p + K + p - 1] = feature weights for y^th class +  // this representation is used in ComputeDotProducts and GradAdd +  void ComputeDotProducts(const SparseVector<float>& fx,  // feature vector of x +                          const vector<double>& w,         // full weight vector +                          vector<double>* pdotprods) const { +    vector<double>& dotprods = *pdotprods; +    const unsigned km1 = K - 1; +    dotprods.resize(km1); +    for (unsigned y = 0; y < km1; ++y) +      dotprods[y] = w[y];  // bias terms +    for (SparseVector<float>::const_iterator it = fx.begin(); it != fx.end(); ++it) { +      const float fval = it->second; +      const unsigned fid = it->first; +      for (unsigned y = 0; y < km1; ++y) +        dotprods[y] += w[fid + y * p + km1] * fval; +    } +  } + +  double ApplyRegularizationTerms(const vector<double>& weights, +                                  double* g) const { +    double reg = 0; +    for (size_t i = K - 1; i < weights.size(); ++i) { +      const double& w_i = weights[i]; +      reg += l2 * w_i * w_i; +      g[i] += 2 * l2 * w_i; +    } +    return reg; +  } + +  void GradAdd(const SparseVector<float>& fx, +               const unsigned y, +               const double scale, +               double* acc) const { +    acc[y] += scale; // class bias +    for (SparseVector<float>::const_iterator it = fx.begin(); +         it != fx.end(); ++it) +      acc[it->first + y * p + K - 1] += it->second * scale; +  } + +  const vector<TrainingInstance>& training; +  const unsigned K, p; +  const double l2; +}; + +struct UnivariateSquaredLoss : public BaseLoss { +  UnivariateSquaredLoss( +          const vector<TrainingInstance>& tr, +          unsigned numfeats, +          const double l2) : BaseLoss(tr, 2, numfeats, l2) {} + +  // evaluate squared loss and gradient +  double operator()(const vector<double>& x, double* g) const { +    fill(g, g + x.size(), 0.0); +    double cll = 0; +    vector<double> dotprods(1);  // univariate prediction +    for (unsigned i = 0; i < training.size(); ++i) { +      const SparseVector<float>& fmapx = training[i].x; +      const double refy = training[i].y.value; +      ComputeDotProducts(fmapx, x, &dotprods); +      double diff = dotprods[0] - refy; +      cll += diff * diff; + +      double scale = 2 * diff; +      GradAdd(fmapx, 0, scale, g); +    } +    double reg = ApplyRegularizationTerms(x, g); +    return cll + reg; +  } +}; + +struct MulticlassLogLoss : public BaseLoss { +  MulticlassLogLoss( +          const vector<TrainingInstance>& tr, +          unsigned k, +          unsigned numfeats, +          const double l2) : BaseLoss(tr, k, numfeats, l2) {} + +  // evaluate log loss and gradient +  double operator()(const vector<double>& x, double* g) const { +    fill(g, g + x.size(), 0.0); +    vector<double> dotprods(K - 1);  // K-1 degrees of freedom +    vector<prob_t> probs(K); +    double cll = 0; +    for (unsigned i = 0; i < training.size(); ++i) { +      const SparseVector<float>& fmapx = training[i].x; +      const unsigned refy = training[i].y.label; +      //cerr << "FMAP: " << fmapx << endl; +      ComputeDotProducts(fmapx, x, &dotprods); +      prob_t z; +      for (unsigned j = 0; j < dotprods.size(); ++j) +        z += (probs[j] = prob_t(dotprods[j], init_lnx())); +      z += (probs.back() = prob_t::One()); +      for (unsigned y = 0; y < probs.size(); ++y) { +        probs[y] /= z; +        //cerr << "  p(y=" << y << ")=" << probs[y].as_float() << "\tz=" << z << endl; +      } +      cll -= log(probs[refy]);  // log p(y | x) + +      for (unsigned y = 0; y < dotprods.size(); ++y) { +        double scale = probs[y].as_float(); +        if (y == refy) { scale -= 1.0; } +        GradAdd(fmapx, y, scale, g); +      } +    } +    double reg = ApplyRegularizationTerms(x, g); +    return cll + reg; +  } +}; + +template <class LossFunction> +double LearnParameters(LossFunction& loss, +                       const double l1, +                       const unsigned l1_start, +                       const unsigned memory_buffers, +                       const double eps, +                       vector<double>* px) { +  LBFGS<LossFunction> lbfgs(px, loss, memory_buffers, l1, l1_start, eps); +  lbfgs.MinimizeFunction(); +  return 0; +} + +int main(int argc, char** argv) { +  po::variables_map conf; +  InitCommandLine(argc, argv, &conf); +  string line; +  vector<TrainingInstance> training; +  const string xfile = conf["training_features"].as<string>(); +  const string yfile = conf["training_responses"].as<string>(); +  double l1 = conf["l1"].as<double>(); +  double l2 = conf["l2"].as<double>(); +  const unsigned memory_buffers = conf["memory_buffers"].as<unsigned>(); +  const double epsilon = conf["epsilon"].as<double>(); +  if (l1 < 0.0) { +    cerr << "L1 strength must be >= 0\n"; +    return 1; +  } +  if (l2 < 0.0) { +    cerr << "L2 strength must be >= 0\n"; +    return 2; +  } + +  const bool is_continuous = conf.count("linear"); +  vector<string> labels; // only populated for non-continuous models +  ReadLabeledInstances(xfile, yfile, is_continuous, &training, &labels); + +  if (conf.count("weights")) { +    cerr << "Initial weights are not implemented, please implement." << endl; +    // TODO read weights for categorical and continuous predictions +    // can't use normal cdec weight framework +    abort(); +  } + +  cerr << "         Number of features: " << FD::NumFeats() << endl; +  cerr << "Number of training examples: " << training.size() << endl; +  const unsigned p = FD::NumFeats(); +  cout.precision(15); + +  if (conf.count("linear")) {  // linear regression +    vector<double> weights(1 + FD::NumFeats(), 0.0); +    cerr << "       Number of parameters: " << weights.size() << endl; +    UnivariateSquaredLoss loss(training, p, l2); +    LearnParameters(loss, l1, 1, memory_buffers, epsilon, &weights); +    cout << p << "\t***CONTINUOUS***" << endl; +    cout << "***BIAS***\t" << weights[0] << endl; +    for (unsigned f = 0; f < p; ++f) { +      const double w = weights[1 + f]; +      if (w) +        cout << FD::Convert(f) << "\t" << w << endl; +    } +  } else {                     // logistic regression +    vector<double> weights((1 + FD::NumFeats()) * (labels.size() - 1), 0.0); +    cerr << "       Number of parameters: " << weights.size() << endl; +    cerr << "           Number of labels: " << labels.size() << endl; +    const unsigned K = labels.size(); +    const unsigned km1 = K - 1; +    MulticlassLogLoss loss(training, K, p, l2); +    LearnParameters(loss, l1, km1, memory_buffers, epsilon, &weights); + +    cout << p << "\t***CATEGORICAL***"; +    for (unsigned y = 0; y < K; ++y) +      cout << '\t' << labels[y]; +    cout << endl; +    for (unsigned y = 0; y < km1; ++y) +      cout << labels[y] << "\t***BIAS***\t" << weights[y] << endl; +    for (unsigned y = 0; y < km1; ++y) { +      for (unsigned f = 0; f < p; ++f) { +        const double w = weights[km1 + y * p + f]; +        if (w) +          cout << labels[y] << "\t" << FD::Convert(f) << "\t" << w << endl; +      } +    } +  } + +  return 0; +} + diff --git a/creg/json_feature_map_lexer.h b/creg/json_feature_map_lexer.h new file mode 100644 index 00000000..3324aa29 --- /dev/null +++ b/creg/json_feature_map_lexer.h @@ -0,0 +1,15 @@ +#ifndef _RULE_LEXER_H_ +#define _RULE_LEXER_H_ + +#include <iostream> +#include <string> + +#include "sparse_vector.h" + +struct JSONFeatureMapLexer { +  typedef void (*FeatureMapCallback)(const std::string& id, const SparseVector<float>& fmap, void* extra); +  static void ReadRules(std::istream* in, FeatureMapCallback func, void* extra); +}; + +#endif + diff --git a/creg/json_feature_map_lexer.ll b/creg/json_feature_map_lexer.ll new file mode 100644 index 00000000..372b52f5 --- /dev/null +++ b/creg/json_feature_map_lexer.ll @@ -0,0 +1,132 @@ +%option nounput +%{ + +#include "json_feature_map_lexer.h" +#include "fdict.h" +#include "fast_sparse_vector.h" + +#define YY_DECL int json_fmap_yylex (void) +#undef YY_INPUT +#define YY_INPUT(buf, result, max_size) (result = jfmap_stream->read(buf, max_size).gcount()) +#define YY_SKIP_YYWRAP 1 +int yywrap() { return 1; } + +JSONFeatureMapLexer::FeatureMapCallback json_fmap_callback = NULL; +void* json_fmap_callback_extra = NULL; +std::istream* jfmap_stream = NULL; +bool fl = true; +unsigned spos = 0; +char featname[16000]; +#define MAX_FEATS 20000 +std::pair<int, float> featmap[MAX_FEATS]; +unsigned curfeat = 0; +std::string instid; + +inline unsigned unicode_escape_to_utf8(uint16_t w1, uint16_t w2, char* putf8) { +  uint32_t cp; +  if((w1 & 0xfc00) == 0xd800) { +    if((w2 & 0xfc00) == 0xdc00) { +      cp = 0x10000 + (((static_cast<uint32_t>(w1) & 0x3ff) << 10) | (w2 & 0x3ff)); +    } else { +      abort(); +    } +  } else { +    cp = w1; +  } +   +   +  if(cp < 0x80) { +    putf8[0] = static_cast<char>(cp); +    return 1; +  } else if(cp < 0x0800) { +    putf8[0] = 0xc0 | ((cp >> 6) & 0x1f); +    putf8[1] = 0x80 | (cp & 0x3f); +    return 2; +  } else if(cp < 0x10000) { +    putf8[0] = 0xe0 | ((cp >> 6) & 0x0f); +    putf8[1] = 0x80 | ((cp >> 6) & 0x3f); +    putf8[2] = 0x80 | (cp & 0x3f); +    return 3; +  } else if(cp < 0x1fffff) { +    putf8[0] = 0xf0 | ((cp >> 18) & 0x07); +    putf8[1] = 0x80 | ((cp >> 12) & 0x3f); +    putf8[2] = 0x80 | ((cp >> 6) & 0x3f); +    putf8[3] = 0x80 | (cp & 0x3f); +    return 4; +  } else { +    abort(); +  }  +  return 0; +} + +%} + +ID [A-Za-z_0-9]+ +HEX_D [a-fA-F0-9] +INT [-]?[0-9]+ +DOUBLE {INT}((\.[0-9]+)?([eE][-+]?[0-9]+)?) +WS [ \t\r\n] +LCB [{] +RCB [}] +UNESCAPED_CH [^\"\\\b\n\r\f\t] + +%x JSON PREVAL STRING JSONVAL POSTVAL DOUBLE +%% + +<INITIAL>{ID}                            { instid = yytext; BEGIN(JSON); } + +<JSON>{WS}*{LCB}{WS}*                    { BEGIN(PREVAL); } + +<PREVAL>\"                               { BEGIN(STRING); spos=0; } + +<STRING>\"                               { featname[spos] = 0; +                                           featmap[curfeat].first = FD::Convert(featname); +                                           BEGIN(JSONVAL); +                                         } +<STRING>{UNESCAPED_CH}                   { featname[spos++] = yytext[0]; } +<STRING>\\\"                             { featname[spos++] = '"'; } +<STRING>\\\\                             { featname[spos++] = '\\'; } +<STRING>\\\/                             { featname[spos++] = '/'; } +<STRING>\\b                              { } +<STRING>\\f                              { } +<STRING>\\n                              { } +<STRING>\\r                              { } +<STRING>\\t                              { } +<STRING>\\u{HEX_D}{HEX_D}{HEX_D}{HEX_D}  { abort(); +                                         } + +<JSONVAL>{WS}*:{WS}*                     { BEGIN(DOUBLE); } +<DOUBLE>{DOUBLE}                         { featmap[curfeat++].second = strtod(yytext, 0); +                                           BEGIN(POSTVAL); } + +<POSTVAL>{WS}*,{WS}*                     { BEGIN(PREVAL); } +<POSTVAL>{WS}*{RCB}\n*                   { +                                           const SparseVector<float> x(&featmap[0], &featmap[curfeat]); +                                           json_fmap_callback(instid, x, json_fmap_callback_extra); +                                           curfeat = 0; +                                           BEGIN(INITIAL); +                                         } + +<PREVAL,POSTVAL,DOUBLE,JSONVAL,INITIAL>. { std::cerr << "bad input: " << yytext << std::endl; abort(); } + +%% + +void JSONFeatureMapLexer::ReadRules(std::istream* in, FeatureMapCallback func, void* extra) { +  json_fmap_callback = func; +  json_fmap_callback_extra = extra; +  jfmap_stream = in; +  json_fmap_yylex(); +} + +#if 0 +void cb(const std::string& id, const SparseVector<float>& fmap, void* extra) { +  (void) extra; +  static int cc = 0; +  cc++; +} + +int main() { +  JSONFeatureMapLexer::ReadRules(&std::cin, cb, NULL); +} +#endif + diff --git a/creg/test_data/iris.testfeat b/creg/test_data/iris.testfeat new file mode 100644 index 00000000..f7528f81 --- /dev/null +++ b/creg/test_data/iris.testfeat @@ -0,0 +1,50 @@ +100	{"sepal-length": 4.9, "sepal-width": 2.5, "petal-length": 4.5, "petal-width": 1.7} +101	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.0} +102	{"sepal-length": 4.4, "sepal-width": 3.0, "petal-length": 1.3, "petal-width": 0.2} +103	{"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2} +104	{"sepal-length": 5.0, "sepal-width": 3.0, "petal-length": 1.6, "petal-width": 0.2} +105	{"sepal-length": 5.1, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.2} +106	{"sepal-length": 5.5, "sepal-width": 2.3, "petal-length": 4.0, "petal-width": 1.3} +107	{"sepal-length": 5.5, "sepal-width": 2.6, "petal-length": 4.4, "petal-width": 1.2} +108	{"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.7, "petal-width": 0.2} +109	{"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.7, "petal-width": 1.0} +110	{"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.0, "petal-width": 1.7} +111	{"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.2} +112	{"sepal-length": 5.5, "sepal-width": 4.2, "petal-length": 1.4, "petal-width": 0.2} +113	{"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.5} +114	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} +115	{"sepal-length": 7.7, "sepal-width": 2.6, "petal-length": 6.9, "petal-width": 2.3} +116	{"sepal-length": 5.0, "sepal-width": 3.6, "petal-length": 1.4, "petal-width": 0.2} +117	{"sepal-length": 6.3, "sepal-width": 2.3, "petal-length": 4.4, "petal-width": 1.3} +118	{"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.1} +119	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9} +120	{"sepal-length": 5.2, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.4} +121	{"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.6, "petal-width": 0.6} +122	{"sepal-length": 5.0, "sepal-width": 3.2, "petal-length": 1.2, "petal-width": 0.2} +123	{"sepal-length": 6.7, "sepal-width": 3.0, "petal-length": 5.2, "petal-width": 2.3} +124	{"sepal-length": 5.5, "sepal-width": 2.5, "petal-length": 4.0, "petal-width": 1.3} +125	{"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5} +126	{"sepal-length": 6.6, "sepal-width": 3.0, "petal-length": 4.4, "petal-width": 1.4} +127	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.6, "petal-width": 0.2} +128	{"sepal-length": 5.9, "sepal-width": 3.0, "petal-length": 5.1, "petal-width": 1.8} +129	{"sepal-length": 6.2, "sepal-width": 3.4, "petal-length": 5.4, "petal-width": 2.3} +130	{"sepal-length": 5.6, "sepal-width": 2.8, "petal-length": 4.9, "petal-width": 2.0} +131	{"sepal-length": 5.7, "sepal-width": 2.9, "petal-length": 4.2, "petal-width": 1.3} +132	{"sepal-length": 6.2, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3} +133	{"sepal-length": 6.0, "sepal-width": 3.4, "petal-length": 4.5, "petal-width": 1.6} +134	{"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.7, "petal-width": 0.4} +135	{"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 6.0, "petal-width": 2.5} +136	{"sepal-length": 6.5, "sepal-width": 3.2, "petal-length": 5.1, "petal-width": 2.0} +137	{"sepal-length": 5.1, "sepal-width": 2.5, "petal-length": 3.0, "petal-width": 1.1} +138	{"sepal-length": 4.3, "sepal-width": 3.0, "petal-length": 1.1, "petal-width": 0.1} +139	{"sepal-length": 5.7, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 2.0} +140	{"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 5.0, "petal-width": 1.5} +141	{"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 5.3, "petal-width": 2.3} +142	{"sepal-length": 6.5, "sepal-width": 2.8, "petal-length": 4.6, "petal-width": 1.5} +143	{"sepal-length": 5.5, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.2} +144	{"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2} +145	{"sepal-length": 4.6, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.3} +146	{"sepal-length": 5.7, "sepal-width": 2.6, "petal-length": 3.5, "petal-width": 1.0} +147	{"sepal-length": 5.8, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 2.4} +148	{"sepal-length": 7.7, "sepal-width": 2.8, "petal-length": 6.7, "petal-width": 2.0} +149	{"sepal-length": 6.3, "sepal-width": 2.9, "petal-length": 5.6, "petal-width": 1.8} diff --git a/creg/test_data/iris.testresp b/creg/test_data/iris.testresp new file mode 100644 index 00000000..0952e4da --- /dev/null +++ b/creg/test_data/iris.testresp @@ -0,0 +1,50 @@ +100	Iris-virginica +101	Iris-virginica +102	Iris-setosa +103	Iris-setosa +104	Iris-setosa +105	Iris-setosa +106	Iris-versicolor +107	Iris-versicolor +108	Iris-setosa +109	Iris-versicolor +110	Iris-versicolor +111	Iris-virginica +112	Iris-setosa +113	Iris-versicolor +114	Iris-setosa +115	Iris-virginica +116	Iris-setosa +117	Iris-versicolor +118	Iris-virginica +119	Iris-virginica +120	Iris-versicolor +121	Iris-setosa +122	Iris-setosa +123	Iris-virginica +124	Iris-versicolor +125	Iris-versicolor +126	Iris-versicolor +127	Iris-setosa +128	Iris-virginica +129	Iris-virginica +130	Iris-virginica +131	Iris-versicolor +132	Iris-versicolor +133	Iris-versicolor +134	Iris-setosa +135	Iris-virginica +136	Iris-virginica +137	Iris-versicolor +138	Iris-setosa +139	Iris-virginica +140	Iris-virginica +141	Iris-virginica +142	Iris-versicolor +143	Iris-setosa +144	Iris-setosa +145	Iris-setosa +146	Iris-versicolor +147	Iris-virginica +148	Iris-virginica +149	Iris-virginica diff --git a/creg/test_data/iris.trainfeat b/creg/test_data/iris.trainfeat new file mode 100644 index 00000000..a930a446 --- /dev/null +++ b/creg/test_data/iris.trainfeat @@ -0,0 +1,100 @@ +0	{"sepal-length": 5.4, "sepal-width": 3.0, "petal-length": 4.5, "petal-width": 1.5} +1	{"sepal-length": 5.0, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.4} +2	{"sepal-length": 5.0, "sepal-width": 3.3, "petal-length": 1.4, "petal-width": 0.2} +3	{"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.5, "petal-width": 1.3} +4	{"sepal-length": 6.4, "sepal-width": 3.1, "petal-length": 5.5, "petal-width": 1.8} +5	{"sepal-length": 7.9, "sepal-width": 3.8, "petal-length": 6.4, "petal-width": 2.0} +6	{"sepal-length": 5.9, "sepal-width": 3.2, "petal-length": 4.8, "petal-width": 1.8} +7	{"sepal-length": 6.7, "sepal-width": 2.5, "petal-length": 5.8, "petal-width": 1.8} +8	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.4, "petal-width": 1.4} +9	{"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 4.9, "petal-width": 1.5} +10	{"sepal-length": 6.1, "sepal-width": 2.9, "petal-length": 4.7, "petal-width": 1.4} +11	{"sepal-length": 6.3, "sepal-width": 3.3, "petal-length": 4.7, "petal-width": 1.6} +12	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 4.7, "petal-width": 1.5} +13	{"sepal-length": 6.2, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.8} +14	{"sepal-length": 5.0, "sepal-width": 3.5, "petal-length": 1.3, "petal-width": 0.3} +15	{"sepal-length": 5.4, "sepal-width": 3.9, "petal-length": 1.3, "petal-width": 0.4} +16	{"sepal-length": 7.4, "sepal-width": 2.8, "petal-length": 6.1, "petal-width": 1.9} +17	{"sepal-length": 7.2, "sepal-width": 3.2, "petal-length": 6.0, "petal-width": 1.8} +18	{"sepal-length": 5.7, "sepal-width": 3.8, "petal-length": 1.7, "petal-width": 0.3} +19	{"sepal-length": 4.5, "sepal-width": 2.3, "petal-length": 1.3, "petal-width": 0.3} +20	{"sepal-length": 5.6, "sepal-width": 3.0, "petal-length": 4.1, "petal-width": 1.3} +21	{"sepal-length": 6.8, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 2.1} +22	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 2.2} +23	{"sepal-length": 4.4, "sepal-width": 3.2, "petal-length": 1.3, "petal-width": 0.2} +24	{"sepal-length": 6.3, "sepal-width": 2.5, "petal-length": 5.0, "petal-width": 1.9} +25	{"sepal-length": 4.4, "sepal-width": 2.9, "petal-length": 1.4, "petal-width": 0.2} +26	{"sepal-length": 4.9, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.2} +27	{"sepal-length": 5.4, "sepal-width": 3.4, "petal-length": 1.5, "petal-width": 0.4} +28	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 3.9, "petal-width": 1.2} +29	{"sepal-length": 5.6, "sepal-width": 2.5, "petal-length": 3.9, "petal-width": 1.1} +30	{"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.3} +31	{"sepal-length": 5.6, "sepal-width": 2.7, "petal-length": 4.2, "petal-width": 1.3} +32	{"sepal-length": 5.1, "sepal-width": 3.5, "petal-length": 1.4, "petal-width": 0.2} +33	{"sepal-length": 6.4, "sepal-width": 2.7, "petal-length": 5.3, "petal-width": 1.9} +34	{"sepal-length": 5.8, "sepal-width": 4.0, "petal-length": 1.2, "petal-width": 0.2} +35	{"sepal-length": 5.2, "sepal-width": 3.4, "petal-length": 1.4, "petal-width": 0.2} +36	{"sepal-length": 7.6, "sepal-width": 3.0, "petal-length": 6.6, "petal-width": 2.1} +37	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.9} +38	{"sepal-length": 6.0, "sepal-width": 2.2, "petal-length": 4.0, "petal-width": 1.0} +39	{"sepal-length": 7.7, "sepal-width": 3.0, "petal-length": 6.1, "petal-width": 2.3} +40	{"sepal-length": 5.1, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.4} +41	{"sepal-length": 6.1, "sepal-width": 2.6, "petal-length": 5.6, "petal-width": 1.4} +42	{"sepal-length": 6.7, "sepal-width": 3.1, "petal-length": 5.6, "petal-width": 2.4} +43	{"sepal-length": 7.7, "sepal-width": 3.8, "petal-length": 6.7, "petal-width": 2.2} +44	{"sepal-length": 5.1, "sepal-width": 3.3, "petal-length": 1.7, "petal-width": 0.5} +45	{"sepal-length": 6.3, "sepal-width": 2.8, "petal-length": 5.1, "petal-width": 1.5} +46	{"sepal-length": 5.0, "sepal-width": 2.0, "petal-length": 3.5, "petal-width": 1.0} +47	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.5, "petal-width": 0.3} +48	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} +49	{"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.9, "petal-width": 1.8} +50	{"sepal-length": 6.4, "sepal-width": 2.8, "petal-length": 5.6, "petal-width": 2.1} +51	{"sepal-length": 6.5, "sepal-width": 3.0, "petal-length": 5.5, "petal-width": 1.8} +52	{"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.7, "petal-width": 1.2} +53	{"sepal-length": 6.1, "sepal-width": 2.8, "petal-length": 4.0, "petal-width": 1.3} +54	{"sepal-length": 4.9, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.1} +55	{"sepal-length": 6.8, "sepal-width": 2.8, "petal-length": 4.8, "petal-width": 1.4} +56	{"sepal-length": 6.3, "sepal-width": 2.7, "petal-length": 4.9, "petal-width": 1.8} +57	{"sepal-length": 4.6, "sepal-width": 3.2, "petal-length": 1.4, "petal-width": 0.2} +58	{"sepal-length": 6.3, "sepal-width": 3.4, "petal-length": 5.6, "petal-width": 2.4} +59	{"sepal-length": 5.7, "sepal-width": 4.4, "petal-length": 1.5, "petal-width": 0.4} +60	{"sepal-length": 6.4, "sepal-width": 2.9, "petal-length": 4.3, "petal-width": 1.3} +61	{"sepal-length": 7.2, "sepal-width": 3.6, "petal-length": 6.1, "petal-width": 2.5} +62	{"sepal-length": 5.8, "sepal-width": 2.7, "petal-length": 4.1, "petal-width": 1.0} +63	{"sepal-length": 6.0, "sepal-width": 3.0, "petal-length": 4.8, "petal-width": 1.8} +64	{"sepal-length": 4.7, "sepal-width": 3.2, "petal-length": 1.6, "petal-width": 0.2} +65	{"sepal-length": 6.9, "sepal-width": 3.2, "petal-length": 5.7, "petal-width": 2.3} +66	{"sepal-length": 6.4, "sepal-width": 3.2, "petal-length": 4.5, "petal-width": 1.5} +67	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.4, "petal-width": 2.1} +68	{"sepal-length": 5.2, "sepal-width": 3.5, "petal-length": 1.5, "petal-width": 0.2} +69	{"sepal-length": 5.3, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2} +70	{"sepal-length": 5.5, "sepal-width": 2.4, "petal-length": 3.8, "petal-width": 1.1} +71	{"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.9, "petal-width": 0.2} +72	{"sepal-length": 5.7, "sepal-width": 2.8, "petal-length": 4.1, "petal-width": 1.3} +73	{"sepal-length": 4.9, "sepal-width": 2.4, "petal-length": 3.3, "petal-width": 1.0} +74	{"sepal-length": 6.2, "sepal-width": 2.2, "petal-length": 4.5, "petal-width": 1.5} +75	{"sepal-length": 6.7, "sepal-width": 3.3, "petal-length": 5.7, "petal-width": 2.5} +76	{"sepal-length": 6.1, "sepal-width": 3.0, "petal-length": 4.6, "petal-width": 1.4} +77	{"sepal-length": 4.6, "sepal-width": 3.6, "petal-length": 1.0, "petal-width": 0.2} +78	{"sepal-length": 7.0, "sepal-width": 3.2, "petal-length": 4.7, "petal-width": 1.4} +79	{"sepal-length": 6.6, "sepal-width": 2.9, "petal-length": 4.6, "petal-width": 1.3} +80	{"sepal-length": 5.4, "sepal-width": 3.7, "petal-length": 1.5, "petal-width": 0.2} +81	{"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.3} +82	{"sepal-length": 7.2, "sepal-width": 3.0, "petal-length": 5.8, "petal-width": 1.6} +83	{"sepal-length": 7.1, "sepal-width": 3.0, "petal-length": 5.9, "petal-width": 2.1} +84	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 4.9, "petal-width": 1.5} +85	{"sepal-length": 4.8, "sepal-width": 3.0, "petal-length": 1.4, "petal-width": 0.1} +86	{"sepal-length": 7.3, "sepal-width": 2.9, "petal-length": 6.3, "petal-width": 1.8} +87	{"sepal-length": 6.0, "sepal-width": 2.7, "petal-length": 5.1, "petal-width": 1.6} +88	{"sepal-length": 6.8, "sepal-width": 3.2, "petal-length": 5.9, "petal-width": 2.3} +89	{"sepal-length": 4.6, "sepal-width": 3.1, "petal-length": 1.5, "petal-width": 0.2} +90	{"sepal-length": 4.8, "sepal-width": 3.1, "petal-length": 1.6, "petal-width": 0.2} +91	{"sepal-length": 5.0, "sepal-width": 2.3, "petal-length": 3.3, "petal-width": 1.0} +92	{"sepal-length": 6.9, "sepal-width": 3.1, "petal-length": 5.1, "petal-width": 2.3} +93	{"sepal-length": 5.7, "sepal-width": 3.0, "petal-length": 4.2, "petal-width": 1.2} +94	{"sepal-length": 5.1, "sepal-width": 3.8, "petal-length": 1.9, "petal-width": 0.4} +95	{"sepal-length": 6.0, "sepal-width": 2.9, "petal-length": 4.5, "petal-width": 1.5} +96	{"sepal-length": 4.8, "sepal-width": 3.4, "petal-length": 1.6, "petal-width": 0.2} +97	{"sepal-length": 5.2, "sepal-width": 4.1, "petal-length": 1.5, "petal-width": 0.1} +98	{"sepal-length": 5.6, "sepal-width": 2.9, "petal-length": 3.6, "petal-width": 1.3} +99	{"sepal-length": 5.8, "sepal-width": 2.6, "petal-length": 4.0, "petal-width": 1.2} diff --git a/creg/test_data/iris.trainresp b/creg/test_data/iris.trainresp new file mode 100644 index 00000000..d77bc6a2 --- /dev/null +++ b/creg/test_data/iris.trainresp @@ -0,0 +1,100 @@ +0	Iris-versicolor +1	Iris-setosa +2	Iris-setosa +3	Iris-versicolor +4	Iris-virginica +5	Iris-virginica +6	Iris-versicolor +7	Iris-virginica +8	Iris-versicolor +9	Iris-versicolor +10	Iris-versicolor +11	Iris-versicolor +12	Iris-versicolor +13	Iris-virginica +14	Iris-setosa +15	Iris-setosa +16	Iris-virginica +17	Iris-virginica +18	Iris-setosa +19	Iris-setosa +20	Iris-versicolor +21	Iris-virginica +22	Iris-virginica +23	Iris-setosa +24	Iris-virginica +25	Iris-setosa +26	Iris-setosa +27	Iris-setosa +28	Iris-versicolor +29	Iris-versicolor +30	Iris-setosa +31	Iris-versicolor +32	Iris-setosa +33	Iris-virginica +34	Iris-setosa +35	Iris-setosa +36	Iris-virginica +37	Iris-virginica +38	Iris-versicolor +39	Iris-virginica +40	Iris-setosa +41	Iris-virginica +42	Iris-virginica +43	Iris-virginica +44	Iris-setosa +45	Iris-virginica +46	Iris-versicolor +47	Iris-setosa +48	Iris-setosa +49	Iris-virginica +50	Iris-virginica +51	Iris-virginica +52	Iris-versicolor +53	Iris-versicolor +54	Iris-setosa +55	Iris-versicolor +56	Iris-virginica +57	Iris-setosa +58	Iris-virginica +59	Iris-setosa +60	Iris-versicolor +61	Iris-virginica +62	Iris-versicolor +63	Iris-virginica +64	Iris-setosa +65	Iris-virginica +66	Iris-versicolor +67	Iris-virginica +68	Iris-setosa +69	Iris-setosa +70	Iris-versicolor +71	Iris-setosa +72	Iris-versicolor +73	Iris-versicolor +74	Iris-versicolor +75	Iris-virginica +76	Iris-versicolor +77	Iris-setosa +78	Iris-versicolor +79	Iris-versicolor +80	Iris-setosa +81	Iris-setosa +82	Iris-virginica +83	Iris-virginica +84	Iris-versicolor +85	Iris-setosa +86	Iris-virginica +87	Iris-versicolor +88	Iris-virginica +89	Iris-setosa +90	Iris-setosa +91	Iris-versicolor +92	Iris-virginica +93	Iris-versicolor +94	Iris-setosa +95	Iris-versicolor +96	Iris-setosa +97	Iris-setosa +98	Iris-versicolor +99	Iris-versicolor | 
