From 16827862bcc4f04ada087abc255c6604d88076c1 Mon Sep 17 00:00:00 2001
From: "Wu, Ke" <wuke@cs.umd.edu>
Date: Sat, 6 Dec 2014 12:17:27 -0500
Subject: Move non-MaxEnt code out of utils

1. alignment.h, argument_reorder_model.h, src_sentence.h, tree.h,
   tsuruoka_maxent.h -> decoder/ff_const_reorder_common.h.

2. Trainers source files (argument_reorder_model.cc and
   constituent_reorder_model.cc) are moved to training/const_reorder.
---
 training/Makefile.am                               |   4 +-
 training/const_reorder/Makefile.am                 |   8 +
 training/const_reorder/argument_reorder_model.cc   | 307 ++++++++++
 .../const_reorder/constituent_reorder_model.cc     | 636 +++++++++++++++++++++
 4 files changed, 953 insertions(+), 2 deletions(-)
 create mode 100644 training/const_reorder/Makefile.am
 create mode 100644 training/const_reorder/argument_reorder_model.cc
 create mode 100644 training/const_reorder/constituent_reorder_model.cc

(limited to 'training')
diff --git a/training/Makefile.am b/training/Makefile.am
index 8ef3c939..2812a9be 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -8,5 +8,5 @@ SUBDIRS = \
   dtrain \
   latent_svm \
   mira \
-  rampion
-
+  rampion \
+  const_reorder
diff --git a/training/const_reorder/Makefile.am b/training/const_reorder/Makefile.am
new file mode 100644
index 00000000..2e81e588
--- /dev/null
+++ b/training/const_reorder/Makefile.am
@@ -0,0 +1,8 @@
+bin_PROGRAMS = const_reorder_model_trainer argument_reorder_model_trainer
+
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder
+
+const_reorder_model_trainer_SOURCES = constituent_reorder_model.cc
+const_reorder_model_trainer_LDADD = ../../utils/libutils.a
+argument_reorder_model_trainer_SOURCES = argument_reorder_model.cc
+argument_reorder_model_trainer_LDADD = ../../utils/libutils.a
diff --git a/training/const_reorder/argument_reorder_model.cc b/training/const_reorder/argument_reorder_model.cc
new file mode 100644
index 00000000..54402436
--- /dev/null
+++ b/training/const_reorder/argument_reorder_model.cc
@@ -0,0 +1,307 @@
+/*
+ * argument_reorder_model.cc
+ *
+ *  Created on: Dec 15, 2013
+ *      Author: lijunhui
+ */
+
+#include <boost/program_options.hpp>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "utils/filelib.h"
+
+#include "decoder/ff_const_reorder_common.h"
+
+using namespace std;
+using namespace const_reorder;
+
+inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
+                                    const char* pszNewFName) {
+  Map hashPredicate;
+  {
+    ReadFile in(pszFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      for (const auto& i : terms) {
+        ++hashPredicate[i];
+      }
+    }
+  }
+
+  {
+    ReadFile in(pszFName);
+    WriteFile out(pszNewFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      bool written = false;
+      for (const auto& i : terms) {
+        if (hashPredicate[i] >= iCutoff) {
+          (*out.stream()) << i << " ";
+          written = true;
+        }
+      }
+      if (written) {
+        (*out.stream()) << "\n";
+      }
+    }
+  }
+}
+
+struct SArgumentReorderTrainer {
+  SArgumentReorderTrainer(
+      const char* pszSRLFname,           // source-side srl tree file name
+      const char* pszAlignFname,         // alignment filename
+      const char* pszSourceFname,        // source file name
+      const char* pszTargetFname,        // target file name
+      const char* pszTopPredicateFname,  // target file name
+      const char* pszInstanceFname,      // training instance file name
+      const char* pszModelFname,         // classifier model file name
+      int iCutoff) {
+    fnGenerateInstanceFiles(pszSRLFname, pszAlignFname, pszSourceFname,
+                            pszTargetFname, pszTopPredicateFname,
+                            pszInstanceFname);
+
+    string strInstanceFname, strModelFname;
+    strInstanceFname = string(pszInstanceFname) + string(".left");
+    strModelFname = string(pszModelFname) + string(".left");
+    fnTraining(strInstanceFname.c_str(), strModelFname.c_str(), iCutoff);
+    strInstanceFname = string(pszInstanceFname) + string(".right");
+    strModelFname = string(pszModelFname) + string(".right");
+    fnTraining(strInstanceFname.c_str(), strModelFname.c_str(), iCutoff);
+  }
+
+  ~SArgumentReorderTrainer() {}
+
+ private:
+  void fnTraining(const char* pszInstanceFname, const char* pszModelFname,
+                  int iCutoff) {
+    char* pszNewInstanceFName = new char[strlen(pszInstanceFname) + 50];
+    if (iCutoff > 0) {
+      sprintf(pszNewInstanceFName, "%s.tmp", pszInstanceFname);
+      fnPreparingTrainingdata(pszInstanceFname, iCutoff, pszNewInstanceFName);
+    } else {
+      strcpy(pszNewInstanceFName, pszInstanceFname);
+    }
+
+    Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL);
+    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300);
+    delete pMaxent;
+
+    if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) {
+      sprintf(pszNewInstanceFName, "rm %s.tmp", pszInstanceFname);
+      system(pszNewInstanceFName);
+    }
+    delete[] pszNewInstanceFName;
+  }
+
+  void fnGenerateInstanceFiles(
+      const char* pszSRLFname,     // source-side flattened parse tree file name
+      const char* pszAlignFname,   // alignment filename
+      const char* pszSourceFname,  // source file name
+      const char* pszTargetFname,  // target file name
+      const char* pszTopPredicateFname,  // top predicate file name (we only
+                                         // consider predicates with 100+
+                                         // occurrences
+      const char* pszInstanceFname       // training instance file name
+      ) {
+    SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
+    SSrlSentenceReader* pSRLReader = new SSrlSentenceReader(pszSRLFname);
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
+
+    Map* pMapPredicate;
+    if (pszTopPredicateFname != NULL)
+      pMapPredicate = fnLoadTopPredicates(pszTopPredicateFname);
+    else
+      pMapPredicate = NULL;
+
+    string line;
+
+    WriteFile left_file(pszInstanceFname + string(".left"));
+    WriteFile right_file(pszInstanceFname + string(".right"));
+
+    // read sentence by sentence
+    SAlignment* pAlign;
+    SSrlSentence* pSRL;
+    SParsedTree* pTree;
+    int iSentNum = 0;
+    while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
+      pSRL = pSRLReader->fnReadNextSrlSentence();
+      assert(pSRL != NULL);
+      pTree = pSRL->m_pTree;
+      assert(getline(*source_file.stream(), line));
+      vector<string> vecSTerms;
+      SplitOnWhitespace(line, &vecSTerms);
+      assert(getline(*target_file.stream(), line));
+      vector<string> vecTTerms;
+      SplitOnWhitespace(line, &vecTTerms);
+      // vecTPOSTerms.size() == 0, given the case when an english sentence fails
+      // parsing
+
+      if (pTree != NULL) {
+        for (size_t i = 0; i < pSRL->m_vecPred.size(); i++) {
+          SPredicate* pPred = pSRL->m_vecPred[i];
+          if (strcmp(pTree->m_vecTerminals[pPred->m_iPosition]
+                         ->m_ptParent->m_pszTerm,
+                     "VA") == 0)
+            continue;
+          string strPred =
+              string(pTree->m_vecTerminals[pPred->m_iPosition]->m_pszTerm);
+          if (pMapPredicate != NULL) {
+            Map::iterator iter_map = pMapPredicate->find(strPred);
+            if (pMapPredicate != NULL && iter_map == pMapPredicate->end())
+              continue;
+          }
+
+          SPredicateItem* pPredItem = new SPredicateItem(pTree, pPred);
+
+          vector<string> vecStrBlock;
+          for (size_t j = 0; j < pPredItem->vec_items_.size(); j++) {
+            SSRLItem* pItem1 = pPredItem->vec_items_[j];
+            vecStrBlock.push_back(SArgumentReorderModel::fnGetBlockOutcome(
+                pItem1->tree_item_->m_iBegin, pItem1->tree_item_->m_iEnd,
+                pAlign));
+          }
+
+          vector<string> vecStrLeftReorderType;
+          vector<string> vecStrRightReorderType;
+          SArgumentReorderModel::fnGetReorderType(
+              pPredItem, pAlign, vecStrLeftReorderType, vecStrRightReorderType);
+          for (int j = 1; j < pPredItem->vec_items_.size(); j++) {
+            string strLeftOutcome, strRightOutcome;
+            strLeftOutcome = vecStrLeftReorderType[j - 1];
+            strRightOutcome = vecStrRightReorderType[j - 1];
+            ostringstream ostr;
+            SArgumentReorderModel::fnGenerateFeature(pTree, pPred, pPredItem, j,
+                                                     vecStrBlock[j - 1],
+                                                     vecStrBlock[j], ostr);
+
+            // fprintf(stderr, "%s %s\n", ostr.str().c_str(),
+            // strOutcome.c_str());
+            // fprintf(fpOut, "sentid=%d %s %s\n", iSentNum, ostr.str().c_str(),
+            // strOutcome.c_str());
+            (*left_file.stream()) << ostr.str() << " " << strLeftOutcome
+                                  << "\n";
+            (*right_file.stream()) << ostr.str() << " " << strRightOutcome
+                                   << "\n";
+          }
+        }
+      }
+      delete pSRL;
+
+      delete pAlign;
+      iSentNum++;
+
+      if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
+    }
+
+    delete pAlignReader;
+    delete pSRLReader;
+  }
+
+  Map* fnLoadTopPredicates(const char* pszTopPredicateFname) {
+    if (pszTopPredicateFname == NULL) return NULL;
+
+    Map* pMapPredicate = new Map();
+    // STxtFileReader* pReader = new STxtFileReader(pszTopPredicateFname);
+    ReadFile in(pszTopPredicateFname);
+    // char* pszLine = new char[50001];
+    string line;
+    int iNumCount = 0;
+    while (getline(*in.stream(), line)) {
+      if (line.size() && line[0] == '#') continue;
+      auto p = line.find(' ');
+      assert(p != string::npos);
+      int iCount = atoi(line.substr(p + 1).c_str());
+      if (iCount < 100) break;
+      (*pMapPredicate)[line] = iNumCount++;
+    }
+    return pMapPredicate;
+  }
+};
+
+namespace po = boost::program_options;
+
+inline void print_options(std::ostream& out,
+                          po::options_description const& opts) {
+  typedef std::vector<boost::shared_ptr<po::option_description> > Ds;
+  Ds const& ds = opts.options();
+  out << '"';
+  for (unsigned i = 0; i < ds.size(); ++i) {
+    if (i) out << ' ';
+    out << "--" << ds[i]->long_name();
+  }
+  out << '\n';
+}
+inline string str(char const* name, po::variables_map const& conf) {
+  return conf[name].as<string>();
+}
+
+//--srl_file /scratch0/mt_exp/gale-align/gale-align.nw.srl.cn --align_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.al --source_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.cn --target_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.en --instance_file
+/// scratch0/mt_exp/gale-align/gale-align.nw.argreorder.instance --model_prefix
+/// scratch0/mt_exp/gale-align/gale-align.nw.argreorder.model --feature_cutoff 2
+//--srl_file /scratch0/mt_exp/gale-ctb/gale-ctb.srl.cn --align_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.align --source_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.cn --target_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.en0 --instance_file
+/// scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.instance --model_prefix
+/// scratch0/mt_exp/gale-ctb/gale-ctb.argreorder.model --feature_cutoff 2
+int main(int argc, char** argv) {
+
+  po::options_description opts("Configuration options");
+  opts.add_options()("srl_file", po::value<string>(), "srl file path (input)")(
+      "align_file", po::value<string>(), "Alignment file path (input)")(
+      "source_file", po::value<string>(), "Source text file path (input)")(
+      "target_file", po::value<string>(), "Target text file path (input)")(
+      "instance_file", po::value<string>(), "Instance file path (output)")(
+      "model_prefix", po::value<string>(),
+      "Model file path prefix (output): three files will be generated")(
+      "feature_cutoff", po::value<int>()->default_value(100),
+      "Feature cutoff threshold")("help", "produce help message");
+
+  po::variables_map vm;
+  if (argc) {
+    po::store(po::parse_command_line(argc, argv, opts), vm);
+    po::notify(vm);
+  }
+
+  if (vm.count("help")) {
+    print_options(cout, opts);
+    return 1;
+  }
+
+  if (!vm.count("srl_file") || !vm.count("align_file") ||
+      !vm.count("source_file") || !vm.count("target_file") ||
+      !vm.count("instance_file") || !vm.count("model_prefix")) {
+    print_options(cout, opts);
+    if (!vm.count("parse_file")) cout << "--parse_file NOT FOUND\n";
+    if (!vm.count("align_file")) cout << "--align_file NOT FOUND\n";
+    if (!vm.count("source_file")) cout << "--source_file NOT FOUND\n";
+    if (!vm.count("target_file")) cout << "--target_file NOT FOUND\n";
+    if (!vm.count("instance_file")) cout << "--instance_file NOT FOUND\n";
+    if (!vm.count("model_prefix")) cout << "--model_prefix NOT FOUND\n";
+    exit(0);
+  }
+
+  SArgumentReorderTrainer* pTrainer = new SArgumentReorderTrainer(
+      str("srl_file", vm).c_str(), str("align_file", vm).c_str(),
+      str("source_file", vm).c_str(), str("target_file", vm).c_str(), NULL,
+      str("instance_file", vm).c_str(), str("model_prefix", vm).c_str(),
+      vm["feature_cutoff"].as<int>());
+  delete pTrainer;
+
+  return 1;
+}
diff --git a/training/const_reorder/constituent_reorder_model.cc b/training/const_reorder/constituent_reorder_model.cc
new file mode 100644
index 00000000..6bec3f0b
--- /dev/null
+++ b/training/const_reorder/constituent_reorder_model.cc
@@ -0,0 +1,636 @@
+/*
+ * constituent_reorder_model.cc
+ *
+ *  Created on: Jul 10, 2013
+ *      Author: junhuili
+ */
+
+#include <string>
+#include <unordered_map>
+
+#include <boost/program_options.hpp>
+
+#include "utils/filelib.h"
+
+#include "decoder/ff_const_reorder_common.h"
+
+using namespace std;
+using namespace const_reorder;
+
+typedef std::unordered_map<std::string, int> Map;
+typedef std::unordered_map<std::string, int>::iterator Iterator;
+
+namespace po = boost::program_options;
+
+inline void fnPreparingTrainingdata(const char* pszFName, int iCutoff,
+                                    const char* pszNewFName) {
+  Map hashPredicate;
+  {
+    ReadFile f(pszFName);
+    string line;
+    while (getline(*f.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      for (const auto& i : terms) {
+        ++hashPredicate[i];
+      }
+    }
+  }
+
+  {
+    ReadFile in(pszFName);
+    WriteFile out(pszNewFName);
+    string line;
+    while (getline(*in.stream(), line)) {
+      if (!line.size()) continue;
+      vector<string> terms;
+      SplitOnWhitespace(line, &terms);
+      bool written = false;
+      for (const auto& i : terms) {
+        if (hashPredicate[i] >= iCutoff) {
+          (*out.stream()) << i << " ";
+          written = true;
+        }
+      }
+      if (written) {
+        (*out.stream()) << "\n";
+      }
+    }
+  }
+}
+
+struct SConstReorderTrainer {
+  SConstReorderTrainer(
+      const char* pszSynFname,     // source-side flattened parse tree file name
+      const char* pszAlignFname,   // alignment filename
+      const char* pszSourceFname,  // source file name
+      const char* pszTargetFname,  // target file name
+      const char* pszInstanceFname,  // training instance file name
+      const char* pszModelPrefix,    // classifier model file name prefix
+      int iCutoff,                   // feature count threshold
+      const char* /*pszOption*/  // other classifier parameters (for svmlight)
+      ) {
+    fnGenerateInstanceFile(pszSynFname, pszAlignFname, pszSourceFname,
+                           pszTargetFname, pszInstanceFname);
+
+    string strInstanceLeftFname = string(pszInstanceFname) + string(".left");
+    string strInstanceRightFname = string(pszInstanceFname) + string(".right");
+
+    string strModelLeftFname = string(pszModelPrefix) + string(".left");
+    string strModelRightFname = string(pszModelPrefix) + string(".right");
+
+    fprintf(stdout, "...Training the left ordering model\n");
+    fnTraining(strInstanceLeftFname.c_str(), strModelLeftFname.c_str(),
+               iCutoff);
+    fprintf(stdout, "...Training the right ordering model\n");
+    fnTraining(strInstanceRightFname.c_str(), strModelRightFname.c_str(),
+               iCutoff);
+  }
+  ~SConstReorderTrainer() {}
+
+ private:
+  void fnTraining(const char* pszInstanceFname, const char* pszModelFname,
+                  int iCutoff) {
+    char* pszNewInstanceFName = new char[strlen(pszInstanceFname) + 50];
+    if (iCutoff > 0) {
+      sprintf(pszNewInstanceFName, "%s.tmp", pszInstanceFname);
+      fnPreparingTrainingdata(pszInstanceFname, iCutoff, pszNewInstanceFName);
+    } else {
+      strcpy(pszNewInstanceFName, pszInstanceFname);
+    }
+
+    /*Zhangle_Maxent *pZhangleMaxent = new Zhangle_Maxent(NULL);
+pZhangleMaxent->fnTrain(pszInstanceFname, "lbfgs", pszModelFname, 100, 2.0);
+delete pZhangleMaxent;*/
+
+    Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL);
+    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300);
+    delete pMaxent;
+
+    if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) {
+      sprintf(pszNewInstanceFName, "rm %s.tmp", pszInstanceFname);
+      system(pszNewInstanceFName);
+    }
+    delete[] pszNewInstanceFName;
+  }
+
+  inline bool fnIsVerbPOS(const char* pszTerm) {
+    if (strcmp(pszTerm, "VV") == 0 || strcmp(pszTerm, "VA") == 0 ||
+        strcmp(pszTerm, "VC") == 0 || strcmp(pszTerm, "VE") == 0)
+      return true;
+    return false;
+  }
+
+  inline void fnGetOutcome(int iL1, int iR1, int iL2, int iR2,
+                           const SAlignment* /*pAlign*/, string& strOutcome) {
+    if (iL1 == -1 && iL2 == -1)
+      strOutcome = "BU";  // 1. both are untranslated
+    else if (iL1 == -1)
+      strOutcome = "1U";  // 2. XP1 is untranslated
+    else if (iL2 == -1)
+      strOutcome = "2U";  // 3. XP2 is untranslated
+    else if (iL1 == iL2 && iR1 == iR2)
+      strOutcome = "SS";  // 4. Have same scope
+    else if (iL1 <= iL2 && iR1 >= iR2)
+      strOutcome = "1C2";  // 5. XP1's translation covers XP2's
+    else if (iL1 >= iL2 && iR1 <= iR2)
+      strOutcome = "2C1";  // 6. XP2's translation covers XP1's
+    else if (iR1 < iL2) {
+      int i = iR1 + 1;
+      /*while (i < iL2) {
+              if (pAlign->fnIsAligned(i, false))
+                      break;
+              i++;
+      }*/
+      if (i == iL2)
+        strOutcome = "M";  // 7. Monotone
+      else
+        strOutcome = "DM";  // 8. Discontinuous monotone
+    } else if (iL1 < iL2 && iL2 <= iR1 && iR1 < iR2)
+      strOutcome = "OM";  // 9. Overlap monotone
+    else if (iR2 < iL1) {
+      int i = iR2 + 1;
+      /*while (i < iL1) {
+              if (pAlign->fnIsAligned(i, false))
+                      break;
+              i++;
+      }*/
+      if (i == iL1)
+        strOutcome = "S";  // 10. Swap
+      else
+        strOutcome = "DS";  // 11. Discontinuous swap
+    } else if (iL2 < iL1 && iL1 <= iR2 && iR2 < iR1)
+      strOutcome = "OS";  // 12. Overlap swap
+    else
+      assert(false);
+  }
+
+  inline void fnGetOutcome(int i1, int i2, string& strOutcome) {
+    assert(i1 != i2);
+    if (i1 < i2) {
+      if (i2 > i1 + 1)
+        strOutcome = string("DM");
+      else
+        strOutcome = string("M");
+    } else {
+      if (i1 > i2 + 1)
+        strOutcome = string("DS");
+      else
+        strOutcome = string("S");
+    }
+  }
+
+  inline void fnGetRelativePosition(const vector<int>& vecLeft,
+                                    vector<int>& vecPosition) {
+    vecPosition.clear();
+
+    vector<float> vec;
+    for (size_t i = 0; i < vecLeft.size(); i++) {
+      if (vecLeft[i] == -1) {
+        if (i == 0)
+          vec.push_back(-1);
+        else
+          vec.push_back(vecLeft[i - 1] + 0.1);
+      } else
+        vec.push_back(vecLeft[i]);
+    }
+
+    for (size_t i = 0; i < vecLeft.size(); i++) {
+      int count = 0;
+
+      for (size_t j = 0; j < vecLeft.size(); j++) {
+        if (j == i) continue;
+        if (vec[j] < vec[i]) {
+          count++;
+        } else if (vec[j] == vec[i] && j < i) {
+          count++;
+        }
+      }
+      vecPosition.push_back(count);
+    }
+  }
+
+  /*
+   * features:
+   * f1: (left_label, right_label, parent_label)
+   * f2: (left_label, right_label, parent_label, other_right_sibling_label)
+   * f3: (left_label, right_label, parent_label, other_left_sibling_label)
+   * f4: (left_label, right_label, left_head_pos)
+   * f5: (left_label, right_label, left_head_word)
+   * f6: (left_label, right_label, right_head_pos)
+   * f7: (left_label, right_label, right_head_word)
+   * f8: (left_label, right_label, left_chunk_status)
+   * f9: (left_label, right_label, right_chunk_status)
+   * f10: (left_label, parent_label)
+   * f11: (right_label, parent_label)
+   */
+  void fnGenerateInstance(const SParsedTree* pTree, const STreeItem* pParent,
+                          int iPos, const vector<string>& vecChunkStatus,
+                          const vector<int>& vecPosition,
+                          const vector<string>& vecSTerms,
+                          const vector<string>& /*vecTTerms*/, string& strOutcome,
+                          ostringstream& ostr) {
+    STreeItem* pCon1, *pCon2;
+    pCon1 = pParent->m_vecChildren[iPos - 1];
+    pCon2 = pParent->m_vecChildren[iPos];
+
+    fnGetOutcome(vecPosition[iPos - 1], vecPosition[iPos], strOutcome);
+
+    string left_label = string(pCon1->m_pszTerm);
+    string right_label = string(pCon2->m_pszTerm);
+    string parent_label = string(pParent->m_pszTerm);
+
+    vector<string> vec_other_right_sibling;
+    for (int i = iPos + 1; i < pParent->m_vecChildren.size(); i++)
+      vec_other_right_sibling.push_back(
+          string(pParent->m_vecChildren[i]->m_pszTerm));
+    if (vec_other_right_sibling.size() == 0)
+      vec_other_right_sibling.push_back(string("NULL"));
+    vector<string> vec_other_left_sibling;
+    for (int i = 0; i < iPos - 1; i++)
+      vec_other_left_sibling.push_back(
+          string(pParent->m_vecChildren[i]->m_pszTerm));
+    if (vec_other_left_sibling.size() == 0)
+      vec_other_left_sibling.push_back(string("NULL"));
+
+    // generate features
+    // f1
+    ostr << "f1=" << left_label << "_" << right_label << "_" << parent_label;
+    // f2
+    for (int i = 0; i < vec_other_right_sibling.size(); i++)
+      ostr << " f2=" << left_label << "_" << right_label << "_" << parent_label
+           << "_" << vec_other_right_sibling[i];
+    // f3
+    for (int i = 0; i < vec_other_left_sibling.size(); i++)
+      ostr << " f3=" << left_label << "_" << right_label << "_" << parent_label
+           << "_" << vec_other_left_sibling[i];
+    // f4
+    ostr << " f4=" << left_label << "_" << right_label << "_"
+         << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_ptParent->m_pszTerm;
+    // f5
+    ostr << " f5=" << left_label << "_" << right_label << "_"
+         << vecSTerms[pCon1->m_iHeadWord];
+    // f6
+    ostr << " f6=" << left_label << "_" << right_label << "_"
+         << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_ptParent->m_pszTerm;
+    // f7
+    ostr << " f7=" << left_label << "_" << right_label << "_"
+         << vecSTerms[pCon2->m_iHeadWord];
+    // f8
+    ostr << " f8=" << left_label << "_" << right_label << "_"
+         << vecChunkStatus[iPos - 1];
+    // f9
+    ostr << " f9=" << left_label << "_" << right_label << "_"
+         << vecChunkStatus[iPos];
+    // f10
+    ostr << " f10=" << left_label << "_" << parent_label;
+    // f11
+    ostr << " f11=" << right_label << "_" << parent_label;
+  }
+
+  /*
+   * Source side (11 features):
+   * f1: the categories of XP1 and XP2 (f1_1, f1_2)
+   * f2: the head words of XP1 and XP2 (f2_1, f2_2)
+   * f3: the first and last word of XP1 (f3_f, f3_l)
+   * f4: the first and last word of XP2 (f4_f, f4_l)
+   * f5: is XP1 or XP2 the head node (f5_1, f5_2)
+   * f6: the category of the common parent
+   * Target side (6 features):
+   * f7: the first and the last word of XP1's translation (f7_f, f7_l)
+   * f8: the first and the last word of XP2's translation (f8_f, f8_l)
+   * f9: the translation of XP1's and XP2's head word (f9_1, f9_2)
+   */
+  void fnGenerateInstance(const SParsedTree* /*pTree*/, const STreeItem* pParent,
+                          const STreeItem* pCon1, const STreeItem* pCon2,
+                          const SAlignment* pAlign,
+                          const vector<string>& vecSTerms,
+                          const vector<string>& /*vecTTerms*/, string& strOutcome,
+                          ostringstream& ostr) {
+
+    int iLeft1, iRight1, iLeft2, iRight2;
+    pAlign->fnGetLeftRightMost(pCon1->m_iBegin, pCon1->m_iEnd, true, iLeft1,
+                               iRight1);
+    pAlign->fnGetLeftRightMost(pCon2->m_iBegin, pCon2->m_iEnd, true, iLeft2,
+                               iRight2);
+
+    fnGetOutcome(iLeft1, iRight1, iLeft2, iRight2, pAlign, strOutcome);
+
+    // generate features
+    // f1
+    ostr << "f1_1=" << pCon1->m_pszTerm << " f1_2=" << pCon2->m_pszTerm;
+    // f2
+    ostr << " f2_1=" << vecSTerms[pCon1->m_iHeadWord] << " f2_2"
+         << vecSTerms[pCon2->m_iHeadWord];
+    // f3
+    ostr << " f3_f=" << vecSTerms[pCon1->m_iBegin]
+         << " f3_l=" << vecSTerms[pCon1->m_iEnd];
+    // f4
+    ostr << " f4_f=" << vecSTerms[pCon2->m_iBegin]
+         << " f4_l=" << vecSTerms[pCon2->m_iEnd];
+    // f5
+    if (pParent->m_iHeadChild == pCon1->m_iBrotherIndex)
+      ostr << " f5_1=1";
+    else
+      ostr << " f5_1=0";
+    if (pParent->m_iHeadChild == pCon2->m_iBrotherIndex)
+      ostr << " f5_2=1";
+    else
+      ostr << " f5_2=0";
+    // f6
+    ostr << " f6=" << pParent->m_pszTerm;
+
+    /*//f7
+    if (iLeft1 != -1) {
+            ostr << " f7_f=" << vecTTerms[iLeft1] << " f7_l=" <<
+    vecTTerms[iRight1];
+    }
+    if (iLeft2 != -1) {
+            ostr << " f8_f=" << vecTTerms[iLeft2] << " f8_l=" <<
+    vecTTerms[iRight2];
+    }
+
+    const vector<int>* pvecTarget =
+    pAlign->fnGetSingleWordAlign(pCon1->m_iHeadWord, true);
+    string str = "";
+    for (size_t i = 0; pvecTarget != NULL && i < pvecTarget->size(); i++) {
+            str += vecTTerms[(*pvecTarget)[i]] + "_";
+    }
+    if (str.length() > 0) {
+            ostr << " f9_1=" << str.substr(0, str.size()-1);
+    }
+    pvecTarget = pAlign->fnGetSingleWordAlign(pCon2->m_iHeadWord, true);
+    str = "";
+    for (size_t i = 0; pvecTarget != NULL && i < pvecTarget->size(); i++) {
+            str += vecTTerms[(*pvecTarget)[i]] + "_";
+    }
+    if (str.length() > 0) {
+            ostr << " f9_2=" << str.substr(0, str.size()-1);
+    } */
+  }
+
+  void fnGetFocusedParentNodes(const SParsedTree* pTree,
+                               vector<STreeItem*>& vecFocused) {
+    for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) {
+      STreeItem* pParent = pTree->m_vecTerminals[i]->m_ptParent;
+
+      while (pParent != NULL) {
+        // if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd -
+        // pParent->m_iBegin > 5) {
+        if (pParent->m_vecChildren.size() > 1) {
+          // do constituent reordering for all children of pParent
+          vecFocused.push_back(pParent);
+        }
+        if (pParent->m_iBrotherIndex != 0) break;
+        pParent = pParent->m_ptParent;
+      }
+    }
+  }
+
+  void fnGenerateInstanceFile(
+      const char* pszSynFname,     // source-side flattened parse tree file name
+      const char* pszAlignFname,   // alignment filename
+      const char* pszSourceFname,  // source file name
+      const char* pszTargetFname,  // target file name
+      const char* pszInstanceFname  // training instance file name
+      ) {
+    SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
+    SParseReader* pParseReader = new SParseReader(pszSynFname, false);
+
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
+    string strInstanceLeftFname = string(pszInstanceFname) + string(".left");
+    string strInstanceRightFname = string(pszInstanceFname) + string(".right");
+    WriteFile left_file(strInstanceLeftFname);
+    WriteFile right_file(strInstanceRightFname);
+
+    // read sentence by sentence
+    SAlignment* pAlign;
+    SParsedTree* pTree;
+    string line;
+    int iSentNum = 0;
+    while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
+      pTree = pParseReader->fnReadNextParseTree();
+
+      assert(getline(*source_file.stream(), line));
+      vector<string> vecSTerms;
+      SplitOnWhitespace(line, &vecSTerms);
+
+      assert(getline(*target_file.stream(), line));
+      vector<string> vecTTerms;
+      SplitOnWhitespace(line, &vecTTerms);
+
+      if (pTree != NULL) {
+
+        vector<STreeItem*> vecFocused;
+        fnGetFocusedParentNodes(pTree, vecFocused);
+
+        for (size_t i = 0; i < vecFocused.size(); i++) {
+
+          STreeItem* pParent = vecFocused[i];
+
+          vector<int> vecLeft, vecRight;
+          for (size_t j = 0; j < pParent->m_vecChildren.size(); j++) {
+            STreeItem* pCon1 = pParent->m_vecChildren[j];
+            int iLeft1, iRight1;
+            pAlign->fnGetLeftRightMost(pCon1->m_iBegin, pCon1->m_iEnd, true,
+                                       iLeft1, iRight1);
+            vecLeft.push_back(iLeft1);
+            vecRight.push_back(iRight1);
+          }
+          vector<int> vecLeftPosition;
+          fnGetRelativePosition(vecLeft, vecLeftPosition);
+          vector<int> vecRightPosition;
+          fnGetRelativePosition(vecRight, vecRightPosition);
+
+          vector<string> vecChunkStatus;
+          for (size_t j = 0; j < pParent->m_vecChildren.size(); j++) {
+            string strOutcome =
+                pAlign->fnIsContinuous(pParent->m_vecChildren[j]->m_iBegin,
+                                       pParent->m_vecChildren[j]->m_iEnd);
+            vecChunkStatus.push_back(strOutcome);
+          }
+
+          for (size_t j = 1; j < pParent->m_vecChildren.size(); j++) {
+            // children[j-1] vs. children[j] reordering
+
+            string strLeftOutcome;
+            ostringstream ostr;
+
+            fnGenerateInstance(pTree, pParent, j, vecChunkStatus,
+                               vecLeftPosition, vecSTerms, vecTTerms,
+                               strLeftOutcome, ostr);
+
+            string ostr_str = ostr.str();
+
+            // fprintf(stderr, "%s %s\n", ostr.str().c_str(),
+            // strLeftOutcome.c_str());
+            (*left_file.stream()) << ostr_str << " " << strLeftOutcome << "\n";
+
+            string strRightOutcome;
+            fnGetOutcome(vecRightPosition[j - 1], vecRightPosition[j],
+                         strRightOutcome);
+            (*right_file.stream()) << ostr_str
+                                   << " LeftOrder=" << strLeftOutcome << " "
+                                   << strRightOutcome << "\n";
+          }
+        }
+        delete pTree;
+      }
+
+      delete pAlign;
+      iSentNum++;
+
+      if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
+    }
+
+    delete pAlignReader;
+    delete pParseReader;
+  }
+
+  void fnGenerateInstanceFile2(
+      const char* pszSynFname,     // source-side flattened parse tree file name
+      const char* pszAlignFname,   // alignment filename
+      const char* pszSourceFname,  // source file name
+      const char* pszTargetFname,  // target file name
+      const char* pszInstanceFname  // training instance file name
+      ) {
+    SAlignmentReader* pAlignReader = new SAlignmentReader(pszAlignFname);
+    SParseReader* pParseReader = new SParseReader(pszSynFname, false);
+
+    ReadFile source_file(pszSourceFname);
+    ReadFile target_file(pszTargetFname);
+
+    WriteFile output_file(pszInstanceFname);
+
+    // read sentence by sentence
+    SAlignment* pAlign;
+    SParsedTree* pTree;
+    string line;
+    int iSentNum = 0;
+    while ((pAlign = pAlignReader->fnReadNextAlignment()) != NULL) {
+      pTree = pParseReader->fnReadNextParseTree();
+      assert(getline(*source_file.stream(), line));
+      vector<string> vecSTerms;
+      SplitOnWhitespace(line, &vecSTerms);
+
+      assert(getline(*target_file.stream(), line));
+      vector<string> vecTTerms;
+      SplitOnWhitespace(line, &vecTTerms);
+
+      if (pTree != NULL) {
+
+        vector<STreeItem*> vecFocused;
+        fnGetFocusedParentNodes(pTree, vecFocused);
+
+        for (size_t i = 0;
+             i < vecFocused.size() && pTree->m_vecTerminals.size() > 10; i++) {
+
+          STreeItem* pParent = vecFocused[i];
+
+          for (size_t j = 1; j < pParent->m_vecChildren.size(); j++) {
+            // children[j-1] vs. children[j] reordering
+
+            string strOutcome;
+            ostringstream ostr;
+
+            fnGenerateInstance(pTree, pParent, pParent->m_vecChildren[j - 1],
+                               pParent->m_vecChildren[j], pAlign, vecSTerms,
+                               vecTTerms, strOutcome, ostr);
+
+            // fprintf(stderr, "%s %s\n", ostr.str().c_str(),
+            // strOutcome.c_str());
+            (*output_file.stream()) << ostr.str() << " " << strOutcome << "\n";
+          }
+        }
+        delete pTree;
+      }
+
+      delete pAlign;
+      iSentNum++;
+
+      if (iSentNum % 100000 == 0) fprintf(stderr, "#%d\n", iSentNum);
+    }
+
+    delete pAlignReader;
+    delete pParseReader;
+  }
+};
+
+inline void print_options(std::ostream& out,
+                          po::options_description const& opts) {
+  typedef std::vector<boost::shared_ptr<po::option_description> > Ds;
+  Ds const& ds = opts.options();
+  out << '"';
+  for (unsigned i = 0; i < ds.size(); ++i) {
+    if (i) out << ' ';
+    out << "--" << ds[i]->long_name();
+  }
+  out << '\n';
+}
+inline string str(char const* name, po::variables_map const& conf) {
+  return conf[name].as<string>();
+}
+
+//--parse_file /scratch0/mt_exp/gq-ctb/data/train.srl.cn --align_file
+/// scratch0/mt_exp/gq-ctb/data/aligned.grow-diag-final-and --source_file
+/// scratch0/mt_exp/gq-ctb/data/train.cn --target_file
+/// scratch0/mt_exp/gq-ctb/data/train.en --instance_file
+/// scratch0/mt_exp/gq-ctb/data/srl-instance --model_prefix
+/// scratch0/mt_exp/gq-ctb/data/srl-instance --feature_cutoff 10
+int main(int argc, char** argv) {
+
+  po::options_description opts("Configuration options");
+  opts.add_options()("parse_file", po::value<string>(),
+                     "parse file path (input)")(
+      "align_file", po::value<string>(), "Alignment file path (input)")(
+      "source_file", po::value<string>(), "Source text file path (input)")(
+      "target_file", po::value<string>(), "Target text file path (input)")(
+      "instance_file", po::value<string>(), "Instance file path (output)")(
+      "model_prefix", po::value<string>(),
+      "Model file path prefix (output): three files will be generated")(
+      "feature_cutoff", po::value<int>()->default_value(100),
+      "Feature cutoff threshold")("svm_option", po::value<string>(),
+                                  "Parameters for SVMLight classifier")(
+      "help", "produce help message");
+
+  po::variables_map vm;
+  if (argc) {
+    po::store(po::parse_command_line(argc, argv, opts), vm);
+    po::notify(vm);
+  }
+
+  if (vm.count("help")) {
+    print_options(cout, opts);
+    return 1;
+  }
+
+  if (!vm.count("parse_file") || !vm.count("align_file") ||
+      !vm.count("source_file") || !vm.count("target_file") ||
+      !vm.count("instance_file") || !vm.count("model_prefix")) {
+    print_options(cout, opts);
+    if (!vm.count("parse_file")) cout << "--parse_file NOT FOUND\n";
+    if (!vm.count("align_file")) cout << "--align_file NOT FOUND\n";
+    if (!vm.count("source_file")) cout << "--source_file NOT FOUND\n";
+    if (!vm.count("target_file")) cout << "--target_file NOT FOUND\n";
+    if (!vm.count("instance_file")) cout << "--instance_file NOT FOUND\n";
+    if (!vm.count("model_prefix")) cout << "--model_prefix NOT FOUND\n";
+    exit(0);
+  }
+
+  const char* pOption;
+  if (vm.count("svm_option"))
+    pOption = str("svm_option", vm).c_str();
+  else
+    pOption = NULL;
+
+  SConstReorderTrainer* pTrainer = new SConstReorderTrainer(
+      str("parse_file", vm).c_str(), str("align_file", vm).c_str(),
+      str("source_file", vm).c_str(), str("target_file", vm).c_str(),
+      str("instance_file", vm).c_str(), str("model_prefix", vm).c_str(),
+      vm["feature_cutoff"].as<int>(), pOption);
+  delete pTrainer;
+
+  return 0;
+}
-- 
cgit v1.2.3


From 62249e8de1be27057649aa787b715af5727f8a7c Mon Sep 17 00:00:00 2001
From: "Wu, Ke" <wuke@cs.umd.edu>
Date: Wed, 17 Dec 2014 15:41:32 -0500
Subject: Move training routine out of ff_const_reorder_common.h

---
 decoder/ff_const_reorder_common.h                  | 93 ----------------------
 training/const_reorder/Makefile.am                 |  8 +-
 training/const_reorder/argument_reorder_model.cc   |  6 +-
 .../const_reorder/constituent_reorder_model.cc     |  6 +-
 training/const_reorder/trainer.cc                  | 67 ++++++++++++++++
 training/const_reorder/trainer.h                   | 12 +++
 6 files changed, 91 insertions(+), 101 deletions(-)
 create mode 100644 training/const_reorder/trainer.cc
 create mode 100644 training/const_reorder/trainer.h

(limited to 'training')

diff --git a/decoder/ff_const_reorder_common.h b/decoder/ff_const_reorder_common.h
index 7c111de3..b124ce47 100644
--- a/decoder/ff_const_reorder_common.h
+++ b/decoder/ff_const_reorder_common.h
@@ -1091,99 +1091,6 @@ struct Tsuruoka_Maxent {
     if (m_pModel != NULL) delete m_pModel;
   }
 
-  void fnTrain(const char* pszInstanceFName, const char* pszAlgorithm,
-               const char* pszModelFName, int /*iNumIteration*/) {
-    assert(strcmp(pszAlgorithm, "l1") == 0 || strcmp(pszAlgorithm, "l2") == 0 ||
-           strcmp(pszAlgorithm, "sgd") == 0 ||
-           strcmp(pszAlgorithm, "SGD") == 0);
-    FILE* fpIn = fopen(pszInstanceFName, "r");
-
-    ME_Model* pModel = new ME_Model();
-
-    char* pszLine = new char[100001];
-    int iNumInstances = 0;
-    int iLen;
-    while (!feof(fpIn)) {
-      pszLine[0] = '\0';
-      fgets(pszLine, 20000, fpIn);
-      if (strlen(pszLine) == 0) {
-        continue;
-      }
-
-      iLen = strlen(pszLine);
-      while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) {
-        pszLine[iLen - 1] = '\0';
-        iLen--;
-      }
-
-      iNumInstances++;
-
-      ME_Sample* pmes = new ME_Sample();
-
-      char* p = strrchr(pszLine, ' ');
-      assert(p != NULL);
-      p[0] = '\0';
-      p++;
-      std::vector<std::string> vecContext;
-      SplitOnWhitespace(std::string(pszLine), &vecContext);
-
-      pmes->label = std::string(p);
-      for (size_t i = 0; i < vecContext.size(); i++)
-        pmes->add_feature(vecContext[i]);
-      pModel->add_training_sample((*pmes));
-      if (iNumInstances % 100000 == 0)
-        fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances);
-      delete pmes;
-    }
-    fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances);
-    fclose(fpIn);
-
-    if (strcmp(pszAlgorithm, "l1") == 0)
-      pModel->use_l1_regularizer(1.0);
-    else if (strcmp(pszAlgorithm, "l2") == 0)
-      pModel->use_l2_regularizer(1.0);
-    else
-      pModel->use_SGD();
-
-    pModel->train();
-    pModel->save_to_file(pszModelFName);
-
-    delete pModel;
-    fprintf(stdout, "......Finished Training\n");
-    fprintf(stdout, "......Model saved as %s\n", pszModelFName);
-    delete[] pszLine;
-  }
-
-  double fnEval(const char* pszContext, const char* pszOutcome) const {
-    std::vector<std::string> vecContext;
-    ME_Sample* pmes = new ME_Sample();
-    SplitOnWhitespace(std::string(pszContext), &vecContext);
-
-    for (size_t i = 0; i < vecContext.size(); i++)
-      pmes->add_feature(vecContext[i]);
-    std::vector<double> vecProb = m_pModel->classify(*pmes);
-    delete pmes;
-    int iLableID = m_pModel->get_class_id(pszOutcome);
-    return vecProb[iLableID];
-  }
-  void fnEval(const char* pszContext,
-              std::vector<std::pair<std::string, double> >& vecOutput) const {
-    std::vector<std::string> vecContext;
-    ME_Sample* pmes = new ME_Sample();
-    SplitOnWhitespace(std::string(pszContext), &vecContext);
-
-    vecOutput.clear();
-
-    for (size_t i = 0; i < vecContext.size(); i++)
-      pmes->add_feature(vecContext[i]);
-    std::vector<double> vecProb = m_pModel->classify(*pmes);
-
-    for (size_t i = 0; i < vecProb.size(); i++) {
-      std::string label = m_pModel->get_class_label(i);
-      vecOutput.push_back(make_pair(label, vecProb[i]));
-    }
-    delete pmes;
-  }
   void fnEval(const char* pszContext, std::vector<double>& vecOutput) const {
     std::vector<std::string> vecContext;
     ME_Sample* pmes = new ME_Sample();
diff --git a/training/const_reorder/Makefile.am b/training/const_reorder/Makefile.am
index 2e81e588..367ac904 100644
--- a/training/const_reorder/Makefile.am
+++ b/training/const_reorder/Makefile.am
@@ -1,8 +1,12 @@
+noinst_LIBRARIES = libtrainer.a
+
+libtrainer_a_SOURCES = trainer.h trainer.cc
+
 bin_PROGRAMS = const_reorder_model_trainer argument_reorder_model_trainer
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder
 
 const_reorder_model_trainer_SOURCES = constituent_reorder_model.cc
-const_reorder_model_trainer_LDADD = ../../utils/libutils.a
+const_reorder_model_trainer_LDADD = ../../utils/libutils.a libtrainer.a
 argument_reorder_model_trainer_SOURCES = argument_reorder_model.cc
-argument_reorder_model_trainer_LDADD = ../../utils/libutils.a
+argument_reorder_model_trainer_LDADD = ../../utils/libutils.a libtrainer.a
diff --git a/training/const_reorder/argument_reorder_model.cc b/training/const_reorder/argument_reorder_model.cc
index 54402436..87f2ce2f 100644
--- a/training/const_reorder/argument_reorder_model.cc
+++ b/training/const_reorder/argument_reorder_model.cc
@@ -14,7 +14,7 @@
 
 #include "utils/filelib.h"
 
-#include "decoder/ff_const_reorder_common.h"
+#include "trainer.h"
 
 using namespace std;
 using namespace const_reorder;
@@ -93,8 +93,8 @@ struct SArgumentReorderTrainer {
       strcpy(pszNewInstanceFName, pszInstanceFname);
     }
 
-    Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL);
-    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300);
+    Tsuruoka_Maxent_Trainer* pMaxent = new Tsuruoka_Maxent_Trainer;
+    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname);
     delete pMaxent;
 
     if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) {
diff --git a/training/const_reorder/constituent_reorder_model.cc b/training/const_reorder/constituent_reorder_model.cc
index 6bec3f0b..d3ad0f2b 100644
--- a/training/const_reorder/constituent_reorder_model.cc
+++ b/training/const_reorder/constituent_reorder_model.cc
@@ -12,7 +12,7 @@
 
 #include "utils/filelib.h"
 
-#include "decoder/ff_const_reorder_common.h"
+#include "trainer.h"
 
 using namespace std;
 using namespace const_reorder;
@@ -104,8 +104,8 @@ struct SConstReorderTrainer {
 pZhangleMaxent->fnTrain(pszInstanceFname, "lbfgs", pszModelFname, 100, 2.0);
 delete pZhangleMaxent;*/
 
-    Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL);
-    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300);
+    Tsuruoka_Maxent_Trainer* pMaxent = new Tsuruoka_Maxent_Trainer;
+    pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname);
     delete pMaxent;
 
     if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) {
diff --git a/training/const_reorder/trainer.cc b/training/const_reorder/trainer.cc
new file mode 100644
index 00000000..e22a8a66
--- /dev/null
+++ b/training/const_reorder/trainer.cc
@@ -0,0 +1,67 @@
+#include "trainer.h"
+
+Tsuruoka_Maxent_Trainer::Tsuruoka_Maxent_Trainer()
+    : const_reorder::Tsuruoka_Maxent(NULL) {}
+
+void Tsuruoka_Maxent_Trainer::fnTrain(const char* pszInstanceFName,
+                                      const char* pszAlgorithm,
+                                      const char* pszModelFName) {
+  assert(strcmp(pszAlgorithm, "l1") == 0 || strcmp(pszAlgorithm, "l2") == 0 ||
+         strcmp(pszAlgorithm, "sgd") == 0 || strcmp(pszAlgorithm, "SGD") == 0);
+  FILE* fpIn = fopen(pszInstanceFName, "r");
+
+  ME_Model* pModel = new ME_Model();
+
+  char* pszLine = new char[100001];
+  int iNumInstances = 0;
+  int iLen;
+  while (!feof(fpIn)) {
+    pszLine[0] = '\0';
+    fgets(pszLine, 20000, fpIn);
+    if (strlen(pszLine) == 0) {
+      continue;
+    }
+
+    iLen = strlen(pszLine);
+    while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) {
+      pszLine[iLen - 1] = '\0';
+      iLen--;
+    }
+
+    iNumInstances++;
+
+    ME_Sample* pmes = new ME_Sample();
+
+    char* p = strrchr(pszLine, ' ');
+    assert(p != NULL);
+    p[0] = '\0';
+    p++;
+    std::vector<std::string> vecContext;
+    SplitOnWhitespace(std::string(pszLine), &vecContext);
+
+    pmes->label = std::string(p);
+    for (size_t i = 0; i < vecContext.size(); i++)
+      pmes->add_feature(vecContext[i]);
+    pModel->add_training_sample((*pmes));
+    if (iNumInstances % 100000 == 0)
+      fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances);
+    delete pmes;
+  }
+  fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances);
+  fclose(fpIn);
+
+  if (strcmp(pszAlgorithm, "l1") == 0)
+    pModel->use_l1_regularizer(1.0);
+  else if (strcmp(pszAlgorithm, "l2") == 0)
+    pModel->use_l2_regularizer(1.0);
+  else
+    pModel->use_SGD();
+
+  pModel->train();
+  pModel->save_to_file(pszModelFName);
+
+  delete pModel;
+  fprintf(stdout, "......Finished Training\n");
+  fprintf(stdout, "......Model saved as %s\n", pszModelFName);
+  delete[] pszLine;
+}
diff --git a/training/const_reorder/trainer.h b/training/const_reorder/trainer.h
new file mode 100644
index 00000000..e574a536
--- /dev/null
+++ b/training/const_reorder/trainer.h
@@ -0,0 +1,12 @@
+#ifndef TRAINING_CONST_REORDER_TRAINER_H_
+#define TRAINING_CONST_REORDER_TRAINER_H_
+
+#include "decoder/ff_const_reorder_common.h"
+
+struct Tsuruoka_Maxent_Trainer : const_reorder::Tsuruoka_Maxent {
+  Tsuruoka_Maxent_Trainer();
+  void fnTrain(const char* pszInstanceFName, const char* pszAlgorithm,
+               const char* pszModelFName);
+};
+
+#endif  // TRAINING_CONST_REORDER_TRAINER_H_
-- 
cgit v1.2.3


From b6dd5a683db9dda2d634dd2fdb76606819594901 Mon Sep 17 00:00:00 2001
From: "Wu, Ke" <wuke@cs.umd.edu>
Date: Wed, 17 Dec 2014 16:00:04 -0500
Subject: Combine everything related to maxent to a single file

---
 decoder/ff_const_reorder_common.h |   6 +-
 training/const_reorder/trainer.cc |   4 +-
 utils/Makefile.am                 |   5 -
 utils/lbfgs.cpp                   | 108 ----------
 utils/lbfgs.h                     |  20 --
 utils/mathvec.h                   |  87 --------
 utils/maxent.cpp                  | 427 +++++++++++++++++++++++++++++++++++++-
 utils/maxent.h                    |  95 ++++++++-
 utils/owlqn.cpp                   | 127 ------------
 utils/sgd.cpp                     | 193 -----------------
 10 files changed, 516 insertions(+), 556 deletions(-)
 delete mode 100644 utils/lbfgs.cpp
 delete mode 100644 utils/lbfgs.h
 delete mode 100644 utils/mathvec.h
 delete mode 100644 utils/owlqn.cpp
 delete mode 100644 utils/sgd.cpp

(limited to 'training')

diff --git a/decoder/ff_const_reorder_common.h b/decoder/ff_const_reorder_common.h
index b124ce47..755fd948 100644
--- a/decoder/ff_const_reorder_common.h
+++ b/decoder/ff_const_reorder_common.h
@@ -1081,7 +1081,7 @@ typedef std::unordered_map<std::string, int>::iterator Iterator;
 struct Tsuruoka_Maxent {
   Tsuruoka_Maxent(const char* pszModelFName) {
     if (pszModelFName != NULL) {
-      m_pModel = new ME_Model();
+      m_pModel = new maxent::ME_Model();
       m_pModel->load_from_file(pszModelFName);
     } else
       m_pModel = NULL;
@@ -1093,7 +1093,7 @@ struct Tsuruoka_Maxent {
 
   void fnEval(const char* pszContext, std::vector<double>& vecOutput) const {
     std::vector<std::string> vecContext;
-    ME_Sample* pmes = new ME_Sample();
+    maxent::ME_Sample* pmes = new maxent::ME_Sample();
     SplitOnWhitespace(std::string(pszContext), &vecContext);
 
     vecOutput.clear();
@@ -1113,7 +1113,7 @@ struct Tsuruoka_Maxent {
   }
 
  private:
-  ME_Model* m_pModel;
+  maxent::ME_Model* m_pModel;
 };
 
 // an argument item or a predicate item (the verb itself)
diff --git a/training/const_reorder/trainer.cc b/training/const_reorder/trainer.cc
index e22a8a66..89bd7479 100644
--- a/training/const_reorder/trainer.cc
+++ b/training/const_reorder/trainer.cc
@@ -10,7 +10,7 @@ void Tsuruoka_Maxent_Trainer::fnTrain(const char* pszInstanceFName,
          strcmp(pszAlgorithm, "sgd") == 0 || strcmp(pszAlgorithm, "SGD") == 0);
   FILE* fpIn = fopen(pszInstanceFName, "r");
 
-  ME_Model* pModel = new ME_Model();
+  maxent::ME_Model* pModel = new maxent::ME_Model();
 
   char* pszLine = new char[100001];
   int iNumInstances = 0;
@@ -30,7 +30,7 @@ void Tsuruoka_Maxent_Trainer::fnTrain(const char* pszInstanceFName,
 
     iNumInstances++;
 
-    ME_Sample* pmes = new ME_Sample();
+    maxent::ME_Sample* pmes = new maxent::ME_Sample();
 
     char* p = strrchr(pszLine, ' ');
     assert(p != NULL);
diff --git a/utils/Makefile.am b/utils/Makefile.am
index fabb4454..e0221e64 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -38,11 +38,8 @@ libutils_a_SOURCES = \
   have_64_bits.h \
   indices_after.h \
   kernel_string_subseq.h \
-  lbfgs.h \
-  lbfgs.cpp \
   logval.h \
   m.h \
-  mathvec.h \
   maxent.h \
   maxent.cpp \
   murmur_hash3.h \
@@ -50,8 +47,6 @@ libutils_a_SOURCES = \
   named_enum.h \
   null_deleter.h \
   null_traits.h \
-  owlqn.cpp \
-  sgd.cpp \
   perfect_hash.h \
   prob.h \
   sampler.h \
diff --git a/utils/lbfgs.cpp b/utils/lbfgs.cpp
deleted file mode 100644
index bd26f048..00000000
--- a/utils/lbfgs.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-#include <vector>
-#include <iostream>
-#include <cmath>
-#include <stdio.h>
-#include "mathvec.h"
-#include "lbfgs.h"
-#include "maxent.h"
-
-using namespace std;
-
-const static int M = LBFGS_M;
-const static double LINE_SEARCH_ALPHA = 0.1;
-const static double LINE_SEARCH_BETA = 0.5;
-
-// stopping criteria
-int LBFGS_MAX_ITER = 300;
-const static double MIN_GRAD_NORM = 0.0001;
-
-double ME_Model::backtracking_line_search(const Vec& x0, const Vec& grad0,
-                                          const double f0, const Vec& dx,
-                                          Vec& x, Vec& grad1) {
-  double t = 1.0 / LINE_SEARCH_BETA;
-
-  double f;
-  do {
-    t *= LINE_SEARCH_BETA;
-    x = x0 + t * dx;
-    f = FunctionGradient(x.STLVec(), grad1.STLVec());
-    //        cout << "*";
-  } while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0));
-
-  return f;
-}
-
-//
-// Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited Storage",
-// Mathematics of Computation, Vol. 35, No. 151, pp. 773-782, 1980.
-//
-Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
-                   const Vec y[], const double z[]) {
-  int offset, bound;
-  if (iter <= M) {
-    offset = 0;
-    bound = iter;
-  } else {
-    offset = iter - M;
-    bound = M;
-  }
-
-  Vec q = grad;
-  double alpha[M], beta[M];
-  for (int i = bound - 1; i >= 0; i--) {
-    const int j = (i + offset) % M;
-    alpha[i] = z[j] * dot_product(s[j], q);
-    q += -alpha[i] * y[j];
-  }
-  if (iter > 0) {
-    const int j = (iter - 1) % M;
-    const double gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
-    //    static double gamma;
-    //    if (gamma == 0) gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
-    q *= gamma;
-  }
-  for (int i = 0; i <= bound - 1; i++) {
-    const int j = (i + offset) % M;
-    beta[i] = z[j] * dot_product(y[j], q);
-    q += s[j] * (alpha[i] - beta[i]);
-  }
-
-  return q;
-}
-
-vector<double> ME_Model::perform_LBFGS(const vector<double>& x0) {
-  const size_t dim = x0.size();
-  Vec x = x0;
-
-  Vec grad(dim), dx(dim);
-  double f = FunctionGradient(x.STLVec(), grad.STLVec());
-
-  Vec s[M], y[M];
-  double z[M];  // rho
-
-  for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) {
-
-    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
-    if (_nheldout > 0) {
-      const double heldout_logl = heldout_likelihood();
-      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
-              _heldout_error);
-    }
-    fprintf(stderr, "\n");
-
-    if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break;
-
-    dx = -1 * approximate_Hg(iter, grad, s, y, z);
-
-    Vec x1(dim), grad1(dim);
-    f = backtracking_line_search(x, grad, f, dx, x1, grad1);
-
-    s[iter % M] = x1 - x;
-    y[iter % M] = grad1 - grad;
-    z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
-    x = x1;
-    grad = grad1;
-  }
-
-  return x.STLVec();
-}
diff --git a/utils/lbfgs.h b/utils/lbfgs.h
deleted file mode 100644
index 4d706f7a..00000000
--- a/utils/lbfgs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _LBFGS_H_
-#define _LBFGS_H_
-
-#include <vector>
-
-// template<class FuncGrad>
-// std::vector<double>
-// perform_LBFGS(FuncGrad func_grad, const std::vector<double> & x0);
-
-std::vector<double> perform_LBFGS(
-    double (*func_grad)(const std::vector<double> &, std::vector<double> &),
-    const std::vector<double> &x0);
-
-std::vector<double> perform_OWLQN(
-    double (*func_grad)(const std::vector<double> &, std::vector<double> &),
-    const std::vector<double> &x0, const double C);
-
-const int LBFGS_M = 10;
-
-#endif
diff --git a/utils/mathvec.h b/utils/mathvec.h
deleted file mode 100644
index f8c60e5d..00000000
--- a/utils/mathvec.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef _MATH_VECTOR_H_
-#define _MATH_VECTOR_H_
-
-#include <vector>
-#include <iostream>
-#include <cassert>
-
-class Vec {
- private:
-  std::vector<double> _v;
-
- public:
-  Vec(const size_t n = 0, const double val = 0) { _v.resize(n, val); }
-  Vec(const std::vector<double>& v) : _v(v) {}
-  const std::vector<double>& STLVec() const { return _v; }
-  std::vector<double>& STLVec() { return _v; }
-  size_t Size() const { return _v.size(); }
-  double& operator[](int i) { return _v[i]; }
-  const double& operator[](int i) const { return _v[i]; }
-  Vec& operator+=(const Vec& b) {
-    assert(b.Size() == _v.size());
-    for (size_t i = 0; i < _v.size(); i++) {
-      _v[i] += b[i];
-    }
-    return *this;
-  }
-  Vec& operator*=(const double c) {
-    for (size_t i = 0; i < _v.size(); i++) {
-      _v[i] *= c;
-    }
-    return *this;
-  }
-  void Project(const Vec& y) {
-    for (size_t i = 0; i < _v.size(); i++) {
-      //      if (sign(_v[i]) != sign(y[i])) _v[i] = 0;
-      if (_v[i] * y[i] <= 0) _v[i] = 0;
-    }
-  }
-};
-
-inline double dot_product(const Vec& a, const Vec& b) {
-  double sum = 0;
-  for (size_t i = 0; i < a.Size(); i++) {
-    sum += a[i] * b[i];
-  }
-  return sum;
-}
-
-inline std::ostream& operator<<(std::ostream& s, const Vec& a) {
-  s << "(";
-  for (size_t i = 0; i < a.Size(); i++) {
-    if (i != 0) s << ", ";
-    s << a[i];
-  }
-  s << ")";
-  return s;
-}
-
-inline const Vec operator+(const Vec& a, const Vec& b) {
-  Vec v(a.Size());
-  assert(a.Size() == b.Size());
-  for (size_t i = 0; i < a.Size(); i++) {
-    v[i] = a[i] + b[i];
-  }
-  return v;
-}
-
-inline const Vec operator-(const Vec& a, const Vec& b) {
-  Vec v(a.Size());
-  assert(a.Size() == b.Size());
-  for (size_t i = 0; i < a.Size(); i++) {
-    v[i] = a[i] - b[i];
-  }
-  return v;
-}
-
-inline const Vec operator*(const Vec& a, const double c) {
-  Vec v(a.Size());
-  for (size_t i = 0; i < a.Size(); i++) {
-    v[i] = a[i] * c;
-  }
-  return v;
-}
-
-inline const Vec operator*(const double c, const Vec& a) { return a * c; }
-
-#endif
diff --git a/utils/maxent.cpp b/utils/maxent.cpp
index 0f49ee9d..fd772e08 100644
--- a/utils/maxent.cpp
+++ b/utils/maxent.cpp
@@ -3,12 +3,15 @@
  */
 
 #include "maxent.h"
+
+#include <vector>
+#include <iostream>
 #include <cmath>
 #include <cstdio>
-#include "lbfgs.h"
 
 using namespace std;
 
+namespace maxent {
 double ME_Model::FunctionGradient(const vector<double>& x,
                                   vector<double>& grad) {
   assert((int)_fb.Size() == x.size());
@@ -601,6 +604,428 @@ vector<double> ME_Model::classify(ME_Sample& mes) const {
   return vp;
 }
 
+// template<class FuncGrad>
+// std::vector<double>
+// perform_LBFGS(FuncGrad func_grad, const std::vector<double> & x0);
+
+std::vector<double> perform_LBFGS(
+    double (*func_grad)(const std::vector<double> &, std::vector<double> &),
+    const std::vector<double> &x0);
+
+std::vector<double> perform_OWLQN(
+    double (*func_grad)(const std::vector<double> &, std::vector<double> &),
+    const std::vector<double> &x0, const double C);
+
+const int LBFGS_M = 10;
+
+const static int M = LBFGS_M;
+const static double LINE_SEARCH_ALPHA = 0.1;
+const static double LINE_SEARCH_BETA = 0.5;
+
+// stopping criteria
+int LBFGS_MAX_ITER = 300;
+const static double MIN_GRAD_NORM = 0.0001;
+
+// LBFGS
+
+double ME_Model::backtracking_line_search(const Vec& x0, const Vec& grad0,
+                                          const double f0, const Vec& dx,
+                                          Vec& x, Vec& grad1) {
+  double t = 1.0 / LINE_SEARCH_BETA;
+
+  double f;
+  do {
+    t *= LINE_SEARCH_BETA;
+    x = x0 + t * dx;
+    f = FunctionGradient(x.STLVec(), grad1.STLVec());
+    //        cout << "*";
+  } while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0));
+
+  return f;
+}
+
+//
+// Jorge Nocedal, "Updating Quasi-Newton Matrices With Limited Storage",
+// Mathematics of Computation, Vol. 35, No. 151, pp. 773-782, 1980.
+//
+Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
+                   const Vec y[], const double z[]) {
+  int offset, bound;
+  if (iter <= M) {
+    offset = 0;
+    bound = iter;
+  } else {
+    offset = iter - M;
+    bound = M;
+  }
+
+  Vec q = grad;
+  double alpha[M], beta[M];
+  for (int i = bound - 1; i >= 0; i--) {
+    const int j = (i + offset) % M;
+    alpha[i] = z[j] * dot_product(s[j], q);
+    q += -alpha[i] * y[j];
+  }
+  if (iter > 0) {
+    const int j = (iter - 1) % M;
+    const double gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
+    //    static double gamma;
+    //    if (gamma == 0) gamma = ((1.0 / z[j]) / dot_product(y[j], y[j]));
+    q *= gamma;
+  }
+  for (int i = 0; i <= bound - 1; i++) {
+    const int j = (i + offset) % M;
+    beta[i] = z[j] * dot_product(y[j], q);
+    q += s[j] * (alpha[i] - beta[i]);
+  }
+
+  return q;
+}
+
+vector<double> ME_Model::perform_LBFGS(const vector<double>& x0) {
+  const size_t dim = x0.size();
+  Vec x = x0;
+
+  Vec grad(dim), dx(dim);
+  double f = FunctionGradient(x.STLVec(), grad.STLVec());
+
+  Vec s[M], y[M];
+  double z[M];  // rho
+
+  for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) {
+
+    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
+    if (_nheldout > 0) {
+      const double heldout_logl = heldout_likelihood();
+      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
+              _heldout_error);
+    }
+    fprintf(stderr, "\n");
+
+    if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break;
+
+    dx = -1 * approximate_Hg(iter, grad, s, y, z);
+
+    Vec x1(dim), grad1(dim);
+    f = backtracking_line_search(x, grad, f, dx, x1, grad1);
+
+    s[iter % M] = x1 - x;
+    y[iter % M] = grad1 - grad;
+    z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
+    x = x1;
+    grad = grad1;
+  }
+
+  return x.STLVec();
+}
+
+// OWLQN
+
+// stopping criteria
+int OWLQN_MAX_ITER = 300;
+
+Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
+                   const Vec y[], const double z[]);
+
+inline int sign(double x) {
+  if (x > 0) return 1;
+  if (x < 0) return -1;
+  return 0;
+};
+
+static Vec pseudo_gradient(const Vec& x, const Vec& grad0, const double C) {
+  Vec grad = grad0;
+  for (size_t i = 0; i < x.Size(); i++) {
+    if (x[i] != 0) {
+      grad[i] += C * sign(x[i]);
+      continue;
+    }
+    const double gm = grad0[i] - C;
+    if (gm > 0) {
+      grad[i] = gm;
+      continue;
+    }
+    const double gp = grad0[i] + C;
+    if (gp < 0) {
+      grad[i] = gp;
+      continue;
+    }
+    grad[i] = 0;
+  }
+
+  return grad;
+}
+
+double ME_Model::regularized_func_grad(const double C, const Vec& x,
+                                       Vec& grad) {
+  double f = FunctionGradient(x.STLVec(), grad.STLVec());
+  for (size_t i = 0; i < x.Size(); i++) {
+    f += C * fabs(x[i]);
+  }
+
+  return f;
+}
+
+double ME_Model::constrained_line_search(double C, const Vec& x0,
+                                         const Vec& grad0, const double f0,
+                                         const Vec& dx, Vec& x, Vec& grad1) {
+  // compute the orthant to explore
+  Vec orthant = x0;
+  for (size_t i = 0; i < orthant.Size(); i++) {
+    if (orthant[i] == 0) orthant[i] = -grad0[i];
+  }
+
+  double t = 1.0 / LINE_SEARCH_BETA;
+
+  double f;
+  do {
+    t *= LINE_SEARCH_BETA;
+    x = x0 + t * dx;
+    x.Project(orthant);
+    //    for (size_t i = 0; i < x.Size(); i++) {
+    //      if (x0[i] != 0 && sign(x[i]) != sign(x0[i])) x[i] = 0;
+    //    }
+
+    f = regularized_func_grad(C, x, grad1);
+    //        cout << "*";
+  } while (f > f0 + LINE_SEARCH_ALPHA * dot_product(x - x0, grad0));
+
+  return f;
+}
+
+vector<double> ME_Model::perform_OWLQN(const vector<double>& x0,
+                                       const double C) {
+  const size_t dim = x0.size();
+  Vec x = x0;
+
+  Vec grad(dim), dx(dim);
+  double f = regularized_func_grad(C, x, grad);
+
+  Vec s[M], y[M];
+  double z[M];  // rho
+
+  for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) {
+    Vec pg = pseudo_gradient(x, grad, C);
+
+    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
+    if (_nheldout > 0) {
+      const double heldout_logl = heldout_likelihood();
+      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
+              _heldout_error);
+    }
+    fprintf(stderr, "\n");
+
+    if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break;
+
+    dx = -1 * approximate_Hg(iter, pg, s, y, z);
+    if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg);
+
+    Vec x1(dim), grad1(dim);
+    f = constrained_line_search(C, x, pg, f, dx, x1, grad1);
+
+    s[iter % M] = x1 - x;
+    y[iter % M] = grad1 - grad;
+    z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
+
+    x = x1;
+    grad = grad1;
+  }
+
+  return x.STLVec();
+}
+
+// SGD
+
+// const double SGD_ETA0 = 1;
+// const double SGD_ITER = 30;
+// const double SGD_ALPHA = 0.85;
+
+//#define FOLOS_NAIVE
+//#define FOLOS_LAZY
+#define SGD_CP
+
+inline void apply_l1_penalty(const int i, const double u, vector<double>& _vl,
+                             vector<double>& q) {
+  double& w = _vl[i];
+  const double z = w;
+  double& qi = q[i];
+  if (w > 0) {
+    w = max(0.0, w - (u + qi));
+  } else if (w < 0) {
+    w = min(0.0, w + (u - qi));
+  }
+  qi += w - z;
+}
+
+static double l1norm(const vector<double>& v) {
+  double sum = 0;
+  for (size_t i = 0; i < v.size(); i++) sum += abs(v[i]);
+  return sum;
+}
+
+inline void update_folos_lazy(const int iter_sample, const int k,
+                              vector<double>& _vl,
+                              const vector<double>& sum_eta,
+                              vector<int>& last_updated) {
+  const double penalty = sum_eta[iter_sample] - sum_eta[last_updated[k]];
+  double& x = _vl[k];
+  if (x > 0)
+    x = max(0.0, x - penalty);
+  else
+    x = min(0.0, x + penalty);
+  last_updated[k] = iter_sample;
+}
+
+int ME_Model::perform_SGD() {
+  if (_l2reg > 0) {
+    cerr << "error: L2 regularization is currently not supported in SGD mode."
+         << endl;
+    exit(1);
+  }
+
+  cerr << "performing SGD" << endl;
+
+  const double l1param = _l1reg;
+
+  const int d = _fb.Size();
+
+  vector<int> ri(_vs.size());
+  for (size_t i = 0; i < ri.size(); i++) ri[i] = i;
+
+  vector<double> grad(d);
+  int iter_sample = 0;
+  const double eta0 = SGD_ETA0;
+
+  //  cerr << "l1param = " << l1param << endl;
+  cerr << "eta0 = " << eta0 << " alpha = " << SGD_ALPHA << endl;
+
+  double u = 0;
+  vector<double> q(d, 0);
+  vector<int> last_updated(d, 0);
+  vector<double> sum_eta;
+  sum_eta.push_back(0);
+
+  for (int iter = 0; iter < SGD_ITER; iter++) {
+
+    random_shuffle(ri.begin(), ri.end());
+
+    double logl = 0;
+    int ncorrect = 0, ntotal = 0;
+    for (size_t i = 0; i < _vs.size(); i++, ntotal++, iter_sample++) {
+      const Sample& s = _vs[ri[i]];
+
+#ifdef FOLOS_LAZY
+      for (vector<int>::const_iterator j = s.positive_features.begin();
+           j != s.positive_features.end(); j++) {
+        for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+             k != _feature2mef[*j].end(); k++) {
+          update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
+        }
+      }
+#endif
+
+      vector<double> membp(_num_classes);
+      const int max_label = conditional_probability(s, membp);
+
+      const double eta =
+          eta0 * pow(SGD_ALPHA,
+                     (double)iter_sample / _vs.size());  // exponential decay
+      //      const double eta = eta0 / (1.0 + (double)iter_sample /
+      // _vs.size());
+
+      //      if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta <<
+      // endl;
+      u += eta * l1param;
+
+      sum_eta.push_back(sum_eta.back() + eta * l1param);
+
+      logl += log(membp[s.label]);
+      if (max_label == s.label) ncorrect++;
+
+      // binary features
+      for (vector<int>::const_iterator j = s.positive_features.begin();
+           j != s.positive_features.end(); j++) {
+        for (vector<int>::const_iterator k = _feature2mef[*j].begin();
+             k != _feature2mef[*j].end(); k++) {
+          const double me = membp[_fb.Feature(*k).label()];
+          const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+          const double grad = (me - ee);
+          _vl[*k] -= eta * grad;
+#ifdef SGD_CP
+          apply_l1_penalty(*k, u, _vl, q);
+#endif
+        }
+      }
+      // real-valued features
+      for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin();
+           j != s.rvfeatures.end(); j++) {
+        for (vector<int>::const_iterator k = _feature2mef[j->first].begin();
+             k != _feature2mef[j->first].end(); k++) {
+          const double me = membp[_fb.Feature(*k).label()];
+          const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
+          const double grad = (me - ee) * j->second;
+          _vl[*k] -= eta * grad;
+#ifdef SGD_CP
+          apply_l1_penalty(*k, u, _vl, q);
+#endif
+        }
+      }
+
+#ifdef FOLOS_NAIVE
+      for (size_t j = 0; j < d; j++) {
+        double& x = _vl[j];
+        if (x > 0)
+          x = max(0.0, x - eta * l1param);
+        else
+          x = min(0.0, x + eta * l1param);
+      }
+#endif
+    }
+    logl /= _vs.size();
+//    fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl,
+// (double)ncorrect / ntotal);
+
+#ifdef FOLOS_LAZY
+    if (l1param > 0) {
+      for (size_t j = 0; j < d; j++)
+        update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
+    }
+#endif
+
+    double f = logl;
+    if (l1param > 0) {
+      const double l1 =
+          l1norm(_vl);  // this is not accurate when lazy update is used
+      //      cerr << "f0 = " <<  update_model_expectation() - l1param * l1 << "
+      // ";
+      f -= l1param * l1;
+      int nonzero = 0;
+      for (int j = 0; j < d; j++)
+        if (_vl[j] != 0) nonzero++;
+      //      cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = "
+      // << nonzero << endl;
+    }
+    //    fprintf(stderr, "%4d  obj = %7.3f acc = %6.4f", iter+1, f,
+    // (double)ncorrect/ntotal);
+    //    fprintf(stderr, "%4d  obj = %f", iter+1, f);
+    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, f,
+            1 - (double)ncorrect / ntotal);
+
+    if (_nheldout > 0) {
+      double heldout_logl = heldout_likelihood();
+      //      fprintf(stderr, "  heldout_logl = %f  acc = %6.4f\n",
+      // heldout_logl, 1 - _heldout_error);
+      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
+              _heldout_error);
+    }
+    fprintf(stderr, "\n");
+  }
+
+  return 0;
+}
+
+}  // namespace maxent
+
 /*
  * $Log: maxent.cpp,v $
  * Revision 1.1.1.1  2007/05/15 08:30:35  kyoshida
diff --git a/utils/maxent.h b/utils/maxent.h
index b1efd88e..74d13a6f 100644
--- a/utils/maxent.h
+++ b/utils/maxent.h
@@ -5,21 +5,95 @@
 #ifndef __MAXENT_H_
 #define __MAXENT_H_
 
-#include <string>
-#include <vector>
-#include <list>
-#include <map>
 #include <algorithm>
 #include <iostream>
+#include <list>
+#include <map>
 #include <string>
+#include <unordered_map>
+#include <vector>
+
 #include <cassert>
-#include "mathvec.h"
 
-#define USE_HASH_MAP  // if you encounter errors with hash, try commenting out
-                      // this line. (the program will be a bit slower, though)
-#ifdef USE_HASH_MAP
-#include <unordered_map>
-#endif
+namespace maxent {
+class Vec {
+ private:
+  std::vector<double> _v;
+
+ public:
+  Vec(const size_t n = 0, const double val = 0) { _v.resize(n, val); }
+  Vec(const std::vector<double>& v) : _v(v) {}
+  const std::vector<double>& STLVec() const { return _v; }
+  std::vector<double>& STLVec() { return _v; }
+  size_t Size() const { return _v.size(); }
+  double& operator[](int i) { return _v[i]; }
+  const double& operator[](int i) const { return _v[i]; }
+  Vec& operator+=(const Vec& b) {
+    assert(b.Size() == _v.size());
+    for (size_t i = 0; i < _v.size(); i++) {
+      _v[i] += b[i];
+    }
+    return *this;
+  }
+  Vec& operator*=(const double c) {
+    for (size_t i = 0; i < _v.size(); i++) {
+      _v[i] *= c;
+    }
+    return *this;
+  }
+  void Project(const Vec& y) {
+    for (size_t i = 0; i < _v.size(); i++) {
+      //      if (sign(_v[i]) != sign(y[i])) _v[i] = 0;
+      if (_v[i] * y[i] <= 0) _v[i] = 0;
+    }
+  }
+};
+
+inline double dot_product(const Vec& a, const Vec& b) {
+  double sum = 0;
+  for (size_t i = 0; i < a.Size(); i++) {
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+
+inline std::ostream& operator<<(std::ostream& s, const Vec& a) {
+  s << "(";
+  for (size_t i = 0; i < a.Size(); i++) {
+    if (i != 0) s << ", ";
+    s << a[i];
+  }
+  s << ")";
+  return s;
+}
+
+inline const Vec operator+(const Vec& a, const Vec& b) {
+  Vec v(a.Size());
+  assert(a.Size() == b.Size());
+  for (size_t i = 0; i < a.Size(); i++) {
+    v[i] = a[i] + b[i];
+  }
+  return v;
+}
+
+inline const Vec operator-(const Vec& a, const Vec& b) {
+  Vec v(a.Size());
+  assert(a.Size() == b.Size());
+  for (size_t i = 0; i < a.Size(); i++) {
+    v[i] = a[i] - b[i];
+  }
+  return v;
+}
+
+inline const Vec operator*(const Vec& a, const double c) {
+  Vec v(a.Size());
+  for (size_t i = 0; i < a.Size(); i++) {
+    v[i] = a[i] * c;
+  }
+  return v;
+}
+
+inline const Vec operator*(const double c, const Vec& a) { return a * c; }
 
 //
 // data format for each sample for training/testing
@@ -309,6 +383,7 @@ class ME_Model {
   static double FunctionGradientWrapper(const std::vector<double>& x,
                                         std::vector<double>& grad);
 };
+}  // namespace maxent
 
 #endif
 
diff --git a/utils/owlqn.cpp b/utils/owlqn.cpp
deleted file mode 100644
index c3a0f0da..00000000
--- a/utils/owlqn.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <vector>
-#include <iostream>
-#include <cmath>
-#include <stdio.h>
-#include "mathvec.h"
-#include "lbfgs.h"
-#include "maxent.h"
-
-using namespace std;
-
-const static int M = LBFGS_M;
-const static double LINE_SEARCH_ALPHA = 0.1;
-const static double LINE_SEARCH_BETA = 0.5;
-
-// stopping criteria
-int OWLQN_MAX_ITER = 300;
-const static double MIN_GRAD_NORM = 0.0001;
-
-Vec approximate_Hg(const int iter, const Vec& grad, const Vec s[],
-                   const Vec y[], const double z[]);
-
-inline int sign(double x) {
-  if (x > 0) return 1;
-  if (x < 0) return -1;
-  return 0;
-};
-
-static Vec pseudo_gradient(const Vec& x, const Vec& grad0, const double C) {
-  Vec grad = grad0;
-  for (size_t i = 0; i < x.Size(); i++) {
-    if (x[i] != 0) {
-      grad[i] += C * sign(x[i]);
-      continue;
-    }
-    const double gm = grad0[i] - C;
-    if (gm > 0) {
-      grad[i] = gm;
-      continue;
-    }
-    const double gp = grad0[i] + C;
-    if (gp < 0) {
-      grad[i] = gp;
-      continue;
-    }
-    grad[i] = 0;
-  }
-
-  return grad;
-}
-
-double ME_Model::regularized_func_grad(const double C, const Vec& x,
-                                       Vec& grad) {
-  double f = FunctionGradient(x.STLVec(), grad.STLVec());
-  for (size_t i = 0; i < x.Size(); i++) {
-    f += C * fabs(x[i]);
-  }
-
-  return f;
-}
-
-double ME_Model::constrained_line_search(double C, const Vec& x0,
-                                         const Vec& grad0, const double f0,
-                                         const Vec& dx, Vec& x, Vec& grad1) {
-  // compute the orthant to explore
-  Vec orthant = x0;
-  for (size_t i = 0; i < orthant.Size(); i++) {
-    if (orthant[i] == 0) orthant[i] = -grad0[i];
-  }
-
-  double t = 1.0 / LINE_SEARCH_BETA;
-
-  double f;
-  do {
-    t *= LINE_SEARCH_BETA;
-    x = x0 + t * dx;
-    x.Project(orthant);
-    //    for (size_t i = 0; i < x.Size(); i++) {
-    //      if (x0[i] != 0 && sign(x[i]) != sign(x0[i])) x[i] = 0;
-    //    }
-
-    f = regularized_func_grad(C, x, grad1);
-    //        cout << "*";
-  } while (f > f0 + LINE_SEARCH_ALPHA * dot_product(x - x0, grad0));
-
-  return f;
-}
-
-vector<double> ME_Model::perform_OWLQN(const vector<double>& x0,
-                                       const double C) {
-  const size_t dim = x0.size();
-  Vec x = x0;
-
-  Vec grad(dim), dx(dim);
-  double f = regularized_func_grad(C, x, grad);
-
-  Vec s[M], y[M];
-  double z[M];  // rho
-
-  for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) {
-    Vec pg = pseudo_gradient(x, grad, C);
-
-    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, -f, _train_error);
-    if (_nheldout > 0) {
-      const double heldout_logl = heldout_likelihood();
-      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
-              _heldout_error);
-    }
-    fprintf(stderr, "\n");
-
-    if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break;
-
-    dx = -1 * approximate_Hg(iter, pg, s, y, z);
-    if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg);
-
-    Vec x1(dim), grad1(dim);
-    f = constrained_line_search(C, x, pg, f, dx, x1, grad1);
-
-    s[iter % M] = x1 - x;
-    y[iter % M] = grad1 - grad;
-    z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]);
-
-    x = x1;
-    grad = grad1;
-  }
-
-  return x.STLVec();
-}
diff --git a/utils/sgd.cpp b/utils/sgd.cpp
deleted file mode 100644
index 8613edca..00000000
--- a/utils/sgd.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-#include "maxent.h"
-#include <cmath>
-#include <stdio.h>
-
-using namespace std;
-
-// const double SGD_ETA0 = 1;
-// const double SGD_ITER = 30;
-// const double SGD_ALPHA = 0.85;
-
-//#define FOLOS_NAIVE
-//#define FOLOS_LAZY
-#define SGD_CP
-
-inline void apply_l1_penalty(const int i, const double u, vector<double>& _vl,
-                             vector<double>& q) {
-  double& w = _vl[i];
-  const double z = w;
-  double& qi = q[i];
-  if (w > 0) {
-    w = max(0.0, w - (u + qi));
-  } else if (w < 0) {
-    w = min(0.0, w + (u - qi));
-  }
-  qi += w - z;
-}
-
-static double l1norm(const vector<double>& v) {
-  double sum = 0;
-  for (size_t i = 0; i < v.size(); i++) sum += abs(v[i]);
-  return sum;
-}
-
-inline void update_folos_lazy(const int iter_sample, const int k,
-                              vector<double>& _vl,
-                              const vector<double>& sum_eta,
-                              vector<int>& last_updated) {
-  const double penalty = sum_eta[iter_sample] - sum_eta[last_updated[k]];
-  double& x = _vl[k];
-  if (x > 0)
-    x = max(0.0, x - penalty);
-  else
-    x = min(0.0, x + penalty);
-  last_updated[k] = iter_sample;
-}
-
-int ME_Model::perform_SGD() {
-  if (_l2reg > 0) {
-    cerr << "error: L2 regularization is currently not supported in SGD mode."
-         << endl;
-    exit(1);
-  }
-
-  cerr << "performing SGD" << endl;
-
-  const double l1param = _l1reg;
-
-  const int d = _fb.Size();
-
-  vector<int> ri(_vs.size());
-  for (size_t i = 0; i < ri.size(); i++) ri[i] = i;
-
-  vector<double> grad(d);
-  int iter_sample = 0;
-  const double eta0 = SGD_ETA0;
-
-  //  cerr << "l1param = " << l1param << endl;
-  cerr << "eta0 = " << eta0 << " alpha = " << SGD_ALPHA << endl;
-
-  double u = 0;
-  vector<double> q(d, 0);
-  vector<int> last_updated(d, 0);
-  vector<double> sum_eta;
-  sum_eta.push_back(0);
-
-  for (int iter = 0; iter < SGD_ITER; iter++) {
-
-    random_shuffle(ri.begin(), ri.end());
-
-    double logl = 0;
-    int ncorrect = 0, ntotal = 0;
-    for (size_t i = 0; i < _vs.size(); i++, ntotal++, iter_sample++) {
-      const Sample& s = _vs[ri[i]];
-
-#ifdef FOLOS_LAZY
-      for (vector<int>::const_iterator j = s.positive_features.begin();
-           j != s.positive_features.end(); j++) {
-        for (vector<int>::const_iterator k = _feature2mef[*j].begin();
-             k != _feature2mef[*j].end(); k++) {
-          update_folos_lazy(iter_sample, *k, _vl, sum_eta, last_updated);
-        }
-      }
-#endif
-
-      vector<double> membp(_num_classes);
-      const int max_label = conditional_probability(s, membp);
-
-      const double eta =
-          eta0 * pow(SGD_ALPHA,
-                     (double)iter_sample / _vs.size());  // exponential decay
-      //      const double eta = eta0 / (1.0 + (double)iter_sample /
-      // _vs.size());
-
-      //      if (iter_sample % _vs.size() == 0) cerr << "eta = " << eta <<
-      // endl;
-      u += eta * l1param;
-
-      sum_eta.push_back(sum_eta.back() + eta * l1param);
-
-      logl += log(membp[s.label]);
-      if (max_label == s.label) ncorrect++;
-
-      // binary features
-      for (vector<int>::const_iterator j = s.positive_features.begin();
-           j != s.positive_features.end(); j++) {
-        for (vector<int>::const_iterator k = _feature2mef[*j].begin();
-             k != _feature2mef[*j].end(); k++) {
-          const double me = membp[_fb.Feature(*k).label()];
-          const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
-          const double grad = (me - ee);
-          _vl[*k] -= eta * grad;
-#ifdef SGD_CP
-          apply_l1_penalty(*k, u, _vl, q);
-#endif
-        }
-      }
-      // real-valued features
-      for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin();
-           j != s.rvfeatures.end(); j++) {
-        for (vector<int>::const_iterator k = _feature2mef[j->first].begin();
-             k != _feature2mef[j->first].end(); k++) {
-          const double me = membp[_fb.Feature(*k).label()];
-          const double ee = (_fb.Feature(*k).label() == s.label ? 1.0 : 0);
-          const double grad = (me - ee) * j->second;
-          _vl[*k] -= eta * grad;
-#ifdef SGD_CP
-          apply_l1_penalty(*k, u, _vl, q);
-#endif
-        }
-      }
-
-#ifdef FOLOS_NAIVE
-      for (size_t j = 0; j < d; j++) {
-        double& x = _vl[j];
-        if (x > 0)
-          x = max(0.0, x - eta * l1param);
-        else
-          x = min(0.0, x + eta * l1param);
-      }
-#endif
-    }
-    logl /= _vs.size();
-//    fprintf(stderr, "%4d logl = %8.3f acc = %6.4f ", iter, logl,
-// (double)ncorrect / ntotal);
-
-#ifdef FOLOS_LAZY
-    if (l1param > 0) {
-      for (size_t j = 0; j < d; j++)
-        update_folos_lazy(iter_sample, j, _vl, sum_eta, last_updated);
-    }
-#endif
-
-    double f = logl;
-    if (l1param > 0) {
-      const double l1 =
-          l1norm(_vl);  // this is not accurate when lazy update is used
-      //      cerr << "f0 = " <<  update_model_expectation() - l1param * l1 << "
-      // ";
-      f -= l1param * l1;
-      int nonzero = 0;
-      for (int j = 0; j < d; j++)
-        if (_vl[j] != 0) nonzero++;
-      //      cerr << " f = " << f << " l1 = " << l1 << " nonzero_features = "
-      // << nonzero << endl;
-    }
-    //    fprintf(stderr, "%4d  obj = %7.3f acc = %6.4f", iter+1, f,
-    // (double)ncorrect/ntotal);
-    //    fprintf(stderr, "%4d  obj = %f", iter+1, f);
-    fprintf(stderr, "%3d  obj(err) = %f (%6.4f)", iter + 1, f,
-            1 - (double)ncorrect / ntotal);
-
-    if (_nheldout > 0) {
-      double heldout_logl = heldout_likelihood();
-      //      fprintf(stderr, "  heldout_logl = %f  acc = %6.4f\n",
-      // heldout_logl, 1 - _heldout_error);
-      fprintf(stderr, "  heldout_logl(err) = %f (%6.4f)", heldout_logl,
-              _heldout_error);
-    }
-    fprintf(stderr, "\n");
-  }
-
-  return 0;
-}
-- 
cgit v1.2.3