diff options
author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 15:41:32 -0500 |
---|---|---|
committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 15:41:32 -0500 |
commit | f2d50c333d0dde8a5ef211bc31b4978a3d8911cf (patch) | |
tree | 524139e2845f1a507af6284124f1ac5483e0931e /training/const_reorder | |
parent | 008317586752d71d1f30dd8fea1de7319ffc29ea (diff) |
Move training routine out of ff_const_reorder_common.h
Diffstat (limited to 'training/const_reorder')
-rw-r--r-- | training/const_reorder/Makefile.am | 8 | ||||
-rw-r--r-- | training/const_reorder/argument_reorder_model.cc | 6 | ||||
-rw-r--r-- | training/const_reorder/constituent_reorder_model.cc | 6 | ||||
-rw-r--r-- | training/const_reorder/trainer.cc | 67 | ||||
-rw-r--r-- | training/const_reorder/trainer.h | 12 |
5 files changed, 91 insertions, 8 deletions
diff --git a/training/const_reorder/Makefile.am b/training/const_reorder/Makefile.am index 2e81e588..367ac904 100644 --- a/training/const_reorder/Makefile.am +++ b/training/const_reorder/Makefile.am @@ -1,8 +1,12 @@ +noinst_LIBRARIES = libtrainer.a + +libtrainer_a_SOURCES = trainer.h trainer.cc + bin_PROGRAMS = const_reorder_model_trainer argument_reorder_model_trainer AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder const_reorder_model_trainer_SOURCES = constituent_reorder_model.cc -const_reorder_model_trainer_LDADD = ../../utils/libutils.a +const_reorder_model_trainer_LDADD = ../../utils/libutils.a libtrainer.a argument_reorder_model_trainer_SOURCES = argument_reorder_model.cc -argument_reorder_model_trainer_LDADD = ../../utils/libutils.a +argument_reorder_model_trainer_LDADD = ../../utils/libutils.a libtrainer.a diff --git a/training/const_reorder/argument_reorder_model.cc b/training/const_reorder/argument_reorder_model.cc index 54402436..87f2ce2f 100644 --- a/training/const_reorder/argument_reorder_model.cc +++ b/training/const_reorder/argument_reorder_model.cc @@ -14,7 +14,7 @@ #include "utils/filelib.h" -#include "decoder/ff_const_reorder_common.h" +#include "trainer.h" using namespace std; using namespace const_reorder; @@ -93,8 +93,8 @@ struct SArgumentReorderTrainer { strcpy(pszNewInstanceFName, pszInstanceFname); } - Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL); - pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300); + Tsuruoka_Maxent_Trainer* pMaxent = new Tsuruoka_Maxent_Trainer; + pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname); delete pMaxent; if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) { diff --git a/training/const_reorder/constituent_reorder_model.cc b/training/const_reorder/constituent_reorder_model.cc index 6bec3f0b..d3ad0f2b 100644 --- a/training/const_reorder/constituent_reorder_model.cc +++ b/training/const_reorder/constituent_reorder_model.cc @@ -12,7 +12,7 @@ #include "utils/filelib.h" -#include "decoder/ff_const_reorder_common.h" +#include "trainer.h" using namespace std; using namespace const_reorder; @@ -104,8 +104,8 @@ struct SConstReorderTrainer { pZhangleMaxent->fnTrain(pszInstanceFname, "lbfgs", pszModelFname, 100, 2.0); delete pZhangleMaxent;*/ - Tsuruoka_Maxent* pMaxent = new Tsuruoka_Maxent(NULL); - pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname, 300); + Tsuruoka_Maxent_Trainer* pMaxent = new Tsuruoka_Maxent_Trainer; + pMaxent->fnTrain(pszNewInstanceFName, "l1", pszModelFname); delete pMaxent; if (strcmp(pszNewInstanceFName, pszInstanceFname) != 0) { diff --git a/training/const_reorder/trainer.cc b/training/const_reorder/trainer.cc new file mode 100644 index 00000000..e22a8a66 --- /dev/null +++ b/training/const_reorder/trainer.cc @@ -0,0 +1,67 @@ +#include "trainer.h" + +Tsuruoka_Maxent_Trainer::Tsuruoka_Maxent_Trainer() + : const_reorder::Tsuruoka_Maxent(NULL) {} + +void Tsuruoka_Maxent_Trainer::fnTrain(const char* pszInstanceFName, + const char* pszAlgorithm, + const char* pszModelFName) { + assert(strcmp(pszAlgorithm, "l1") == 0 || strcmp(pszAlgorithm, "l2") == 0 || + strcmp(pszAlgorithm, "sgd") == 0 || strcmp(pszAlgorithm, "SGD") == 0); + FILE* fpIn = fopen(pszInstanceFName, "r"); + + ME_Model* pModel = new ME_Model(); + + char* pszLine = new char[100001]; + int iNumInstances = 0; + int iLen; + while (!feof(fpIn)) { + pszLine[0] = '\0'; + fgets(pszLine, 20000, fpIn); + if (strlen(pszLine) == 0) { + continue; + } + + iLen = strlen(pszLine); + while (iLen > 0 && pszLine[iLen - 1] > 0 && pszLine[iLen - 1] < 33) { + pszLine[iLen - 1] = '\0'; + iLen--; + } + + iNumInstances++; + + ME_Sample* pmes = new ME_Sample(); + + char* p = strrchr(pszLine, ' '); + assert(p != NULL); + p[0] = '\0'; + p++; + std::vector<std::string> vecContext; + SplitOnWhitespace(std::string(pszLine), &vecContext); + + pmes->label = std::string(p); + for (size_t i = 0; i < vecContext.size(); i++) + pmes->add_feature(vecContext[i]); + pModel->add_training_sample((*pmes)); + if (iNumInstances % 100000 == 0) + fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances); + delete pmes; + } + fprintf(stdout, "......Reading #Instances: %1d\n", iNumInstances); + fclose(fpIn); + + if (strcmp(pszAlgorithm, "l1") == 0) + pModel->use_l1_regularizer(1.0); + else if (strcmp(pszAlgorithm, "l2") == 0) + pModel->use_l2_regularizer(1.0); + else + pModel->use_SGD(); + + pModel->train(); + pModel->save_to_file(pszModelFName); + + delete pModel; + fprintf(stdout, "......Finished Training\n"); + fprintf(stdout, "......Model saved as %s\n", pszModelFName); + delete[] pszLine; +} diff --git a/training/const_reorder/trainer.h b/training/const_reorder/trainer.h new file mode 100644 index 00000000..e574a536 --- /dev/null +++ b/training/const_reorder/trainer.h @@ -0,0 +1,12 @@ +#ifndef TRAINING_CONST_REORDER_TRAINER_H_ +#define TRAINING_CONST_REORDER_TRAINER_H_ + +#include "decoder/ff_const_reorder_common.h" + +struct Tsuruoka_Maxent_Trainer : const_reorder::Tsuruoka_Maxent { + Tsuruoka_Maxent_Trainer(); + void fnTrain(const char* pszInstanceFName, const char* pszAlgorithm, + const char* pszModelFName); +}; + +#endif // TRAINING_CONST_REORDER_TRAINER_H_ |